1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 14 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 15 PetscScalar *va, *vv; 16 Vec vB, vA; 17 const PetscScalar *vb; 18 19 PetscFunctionBegin; 20 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 21 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 22 23 PetscCall(VecGetArrayWrite(vA, &va)); 24 if (idx) { 25 for (i = 0; i < m; i++) { 26 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 27 } 28 } 29 30 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 31 PetscCall(PetscMalloc1(m, &idxb)); 32 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 33 34 PetscCall(VecGetArrayWrite(v, &vv)); 35 PetscCall(VecGetArrayRead(vB, &vb)); 36 for (i = 0; i < m; i++) { 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 38 vv[i] = vb[i]; 39 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 40 } else { 41 vv[i] = va[i]; 42 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 43 } 44 } 45 PetscCall(VecRestoreArrayWrite(vA, &vv)); 46 PetscCall(VecRestoreArrayWrite(vA, &va)); 47 PetscCall(VecRestoreArrayRead(vB, &vb)); 48 PetscCall(PetscFree(idxb)); 49 PetscCall(VecDestroy(&vA)); 50 PetscCall(VecDestroy(&vB)); 51 PetscFunctionReturn(0); 52 } 53 54 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 55 { 56 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 57 58 PetscFunctionBegin; 59 PetscCall(MatStoreValues(aij->A)); 60 PetscCall(MatStoreValues(aij->B)); 61 PetscFunctionReturn(0); 62 } 63 64 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 65 { 66 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 67 68 PetscFunctionBegin; 69 PetscCall(MatRetrieveValues(aij->A)); 70 PetscCall(MatRetrieveValues(aij->B)); 71 PetscFunctionReturn(0); 72 } 73 74 /* 75 Local utility routine that creates a mapping from the global column 76 number to the local number in the off-diagonal part of the local 77 storage of the matrix. This is done in a non scalable way since the 78 length of colmap equals the global matrix length. 79 */ 80 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 81 { 82 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 83 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 84 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 85 86 PetscFunctionBegin; 87 #if defined(PETSC_USE_CTABLE) 88 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 89 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 90 #else 91 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 92 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 93 #endif 94 PetscFunctionReturn(0); 95 } 96 97 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 98 { \ 99 brow = row / bs; \ 100 rp = aj + ai[brow]; \ 101 ap = aa + bs2 * ai[brow]; \ 102 rmax = aimax[brow]; \ 103 nrow = ailen[brow]; \ 104 bcol = col / bs; \ 105 ridx = row % bs; \ 106 cidx = col % bs; \ 107 low = 0; \ 108 high = nrow; \ 109 while (high - low > 3) { \ 110 t = (low + high) / 2; \ 111 if (rp[t] > bcol) high = t; \ 112 else low = t; \ 113 } \ 114 for (_i = low; _i < high; _i++) { \ 115 if (rp[_i] > bcol) break; \ 116 if (rp[_i] == bcol) { \ 117 bap = ap + bs2 * _i + bs * cidx + ridx; \ 118 if (addv == ADD_VALUES) *bap += value; \ 119 else *bap = value; \ 120 goto a_noinsert; \ 121 } \ 122 } \ 123 if (a->nonew == 1) goto a_noinsert; \ 124 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 125 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 126 N = nrow++ - 1; \ 127 /* shift up all the later entries in this row */ \ 128 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 129 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 130 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 131 rp[_i] = bcol; \ 132 ap[bs2 * _i + bs * cidx + ridx] = value; \ 133 a_noinsert:; \ 134 ailen[brow] = nrow; \ 135 } 136 137 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 138 { \ 139 brow = row / bs; \ 140 rp = bj + bi[brow]; \ 141 ap = ba + bs2 * bi[brow]; \ 142 rmax = bimax[brow]; \ 143 nrow = bilen[brow]; \ 144 bcol = col / bs; \ 145 ridx = row % bs; \ 146 cidx = col % bs; \ 147 low = 0; \ 148 high = nrow; \ 149 while (high - low > 3) { \ 150 t = (low + high) / 2; \ 151 if (rp[t] > bcol) high = t; \ 152 else low = t; \ 153 } \ 154 for (_i = low; _i < high; _i++) { \ 155 if (rp[_i] > bcol) break; \ 156 if (rp[_i] == bcol) { \ 157 bap = ap + bs2 * _i + bs * cidx + ridx; \ 158 if (addv == ADD_VALUES) *bap += value; \ 159 else *bap = value; \ 160 goto b_noinsert; \ 161 } \ 162 } \ 163 if (b->nonew == 1) goto b_noinsert; \ 164 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 165 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 166 N = nrow++ - 1; \ 167 /* shift up all the later entries in this row */ \ 168 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 169 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 170 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 171 rp[_i] = bcol; \ 172 ap[bs2 * _i + bs * cidx + ridx] = value; \ 173 b_noinsert:; \ 174 bilen[brow] = nrow; \ 175 } 176 177 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 178 { 179 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 180 MatScalar value; 181 PetscBool roworiented = baij->roworiented; 182 PetscInt i, j, row, col; 183 PetscInt rstart_orig = mat->rmap->rstart; 184 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 185 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 186 187 /* Some Variables required in the macro */ 188 Mat A = baij->A; 189 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 190 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 191 MatScalar *aa = a->a; 192 193 Mat B = baij->B; 194 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 195 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 196 MatScalar *ba = b->a; 197 198 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 199 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 200 MatScalar *ap, *bap; 201 202 PetscFunctionBegin; 203 for (i = 0; i < m; i++) { 204 if (im[i] < 0) continue; 205 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 206 if (im[i] >= rstart_orig && im[i] < rend_orig) { 207 row = im[i] - rstart_orig; 208 for (j = 0; j < n; j++) { 209 if (in[j] >= cstart_orig && in[j] < cend_orig) { 210 col = in[j] - cstart_orig; 211 if (roworiented) value = v[i * n + j]; 212 else value = v[i + j * m]; 213 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 214 } else if (in[j] < 0) { 215 continue; 216 } else { 217 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 218 if (mat->was_assembled) { 219 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 220 #if defined(PETSC_USE_CTABLE) 221 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 222 col = col - 1; 223 #else 224 col = baij->colmap[in[j] / bs] - 1; 225 #endif 226 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 227 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 228 col = in[j]; 229 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 230 B = baij->B; 231 b = (Mat_SeqBAIJ *)(B)->data; 232 bimax = b->imax; 233 bi = b->i; 234 bilen = b->ilen; 235 bj = b->j; 236 ba = b->a; 237 } else { 238 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 239 col += in[j] % bs; 240 } 241 } else col = in[j]; 242 if (roworiented) value = v[i * n + j]; 243 else value = v[i + j * m]; 244 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 245 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 246 } 247 } 248 } else { 249 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 250 if (!baij->donotstash) { 251 mat->assembled = PETSC_FALSE; 252 if (roworiented) { 253 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 254 } else { 255 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 256 } 257 } 258 } 259 } 260 PetscFunctionReturn(0); 261 } 262 263 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 264 { 265 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 266 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 267 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 268 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 269 PetscBool roworiented = a->roworiented; 270 const PetscScalar *value = v; 271 MatScalar *ap, *aa = a->a, *bap; 272 273 PetscFunctionBegin; 274 rp = aj + ai[row]; 275 ap = aa + bs2 * ai[row]; 276 rmax = imax[row]; 277 nrow = ailen[row]; 278 value = v; 279 low = 0; 280 high = nrow; 281 while (high - low > 7) { 282 t = (low + high) / 2; 283 if (rp[t] > col) high = t; 284 else low = t; 285 } 286 for (i = low; i < high; i++) { 287 if (rp[i] > col) break; 288 if (rp[i] == col) { 289 bap = ap + bs2 * i; 290 if (roworiented) { 291 if (is == ADD_VALUES) { 292 for (ii = 0; ii < bs; ii++) { 293 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 294 } 295 } else { 296 for (ii = 0; ii < bs; ii++) { 297 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 298 } 299 } 300 } else { 301 if (is == ADD_VALUES) { 302 for (ii = 0; ii < bs; ii++, value += bs) { 303 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 304 bap += bs; 305 } 306 } else { 307 for (ii = 0; ii < bs; ii++, value += bs) { 308 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 309 bap += bs; 310 } 311 } 312 } 313 goto noinsert2; 314 } 315 } 316 if (nonew == 1) goto noinsert2; 317 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 318 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 319 N = nrow++ - 1; 320 high++; 321 /* shift up all the later entries in this row */ 322 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 323 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 324 rp[i] = col; 325 bap = ap + bs2 * i; 326 if (roworiented) { 327 for (ii = 0; ii < bs; ii++) { 328 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 329 } 330 } else { 331 for (ii = 0; ii < bs; ii++) { 332 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 333 } 334 } 335 noinsert2:; 336 ailen[row] = nrow; 337 PetscFunctionReturn(0); 338 } 339 340 /* 341 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 342 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 343 */ 344 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 345 { 346 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 347 const PetscScalar *value; 348 MatScalar *barray = baij->barray; 349 PetscBool roworiented = baij->roworiented; 350 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 351 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 352 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 353 354 PetscFunctionBegin; 355 if (!barray) { 356 PetscCall(PetscMalloc1(bs2, &barray)); 357 baij->barray = barray; 358 } 359 360 if (roworiented) stepval = (n - 1) * bs; 361 else stepval = (m - 1) * bs; 362 363 for (i = 0; i < m; i++) { 364 if (im[i] < 0) continue; 365 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 366 if (im[i] >= rstart && im[i] < rend) { 367 row = im[i] - rstart; 368 for (j = 0; j < n; j++) { 369 /* If NumCol = 1 then a copy is not required */ 370 if ((roworiented) && (n == 1)) { 371 barray = (MatScalar *)v + i * bs2; 372 } else if ((!roworiented) && (m == 1)) { 373 barray = (MatScalar *)v + j * bs2; 374 } else { /* Here a copy is required */ 375 if (roworiented) { 376 value = v + (i * (stepval + bs) + j) * bs; 377 } else { 378 value = v + (j * (stepval + bs) + i) * bs; 379 } 380 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 381 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 382 barray += bs; 383 } 384 barray -= bs2; 385 } 386 387 if (in[j] >= cstart && in[j] < cend) { 388 col = in[j] - cstart; 389 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 390 } else if (in[j] < 0) { 391 continue; 392 } else { 393 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 394 if (mat->was_assembled) { 395 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 396 397 #if defined(PETSC_USE_DEBUG) 398 #if defined(PETSC_USE_CTABLE) 399 { 400 PetscInt data; 401 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 402 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 403 } 404 #else 405 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 406 #endif 407 #endif 408 #if defined(PETSC_USE_CTABLE) 409 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 410 col = (col - 1) / bs; 411 #else 412 col = (baij->colmap[in[j]] - 1) / bs; 413 #endif 414 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 415 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 416 col = in[j]; 417 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 418 } else col = in[j]; 419 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 420 } 421 } 422 } else { 423 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 424 if (!baij->donotstash) { 425 if (roworiented) { 426 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 427 } else { 428 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 429 } 430 } 431 } 432 } 433 PetscFunctionReturn(0); 434 } 435 436 #define HASH_KEY 0.6180339887 437 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 438 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 439 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 440 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 441 { 442 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 443 PetscBool roworiented = baij->roworiented; 444 PetscInt i, j, row, col; 445 PetscInt rstart_orig = mat->rmap->rstart; 446 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 447 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 448 PetscReal tmp; 449 MatScalar **HD = baij->hd, value; 450 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 451 452 PetscFunctionBegin; 453 for (i = 0; i < m; i++) { 454 if (PetscDefined(USE_DEBUG)) { 455 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 456 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 457 } 458 row = im[i]; 459 if (row >= rstart_orig && row < rend_orig) { 460 for (j = 0; j < n; j++) { 461 col = in[j]; 462 if (roworiented) value = v[i * n + j]; 463 else value = v[i + j * m]; 464 /* Look up PetscInto the Hash Table */ 465 key = (row / bs) * Nbs + (col / bs) + 1; 466 h1 = HASH(size, key, tmp); 467 468 idx = h1; 469 if (PetscDefined(USE_DEBUG)) { 470 insert_ct++; 471 total_ct++; 472 if (HT[idx] != key) { 473 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 474 ; 475 if (idx == size) { 476 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 477 ; 478 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 479 } 480 } 481 } else if (HT[idx] != key) { 482 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 483 ; 484 if (idx == size) { 485 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 486 ; 487 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 488 } 489 } 490 /* A HASH table entry is found, so insert the values at the correct address */ 491 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 492 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 493 } 494 } else if (!baij->donotstash) { 495 if (roworiented) { 496 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 497 } else { 498 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 499 } 500 } 501 } 502 if (PetscDefined(USE_DEBUG)) { 503 baij->ht_total_ct += total_ct; 504 baij->ht_insert_ct += insert_ct; 505 } 506 PetscFunctionReturn(0); 507 } 508 509 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 510 { 511 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 512 PetscBool roworiented = baij->roworiented; 513 PetscInt i, j, ii, jj, row, col; 514 PetscInt rstart = baij->rstartbs; 515 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 516 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 517 PetscReal tmp; 518 MatScalar **HD = baij->hd, *baij_a; 519 const PetscScalar *v_t, *value; 520 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 521 522 PetscFunctionBegin; 523 if (roworiented) stepval = (n - 1) * bs; 524 else stepval = (m - 1) * bs; 525 526 for (i = 0; i < m; i++) { 527 if (PetscDefined(USE_DEBUG)) { 528 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 529 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 530 } 531 row = im[i]; 532 v_t = v + i * nbs2; 533 if (row >= rstart && row < rend) { 534 for (j = 0; j < n; j++) { 535 col = in[j]; 536 537 /* Look up into the Hash Table */ 538 key = row * Nbs + col + 1; 539 h1 = HASH(size, key, tmp); 540 541 idx = h1; 542 if (PetscDefined(USE_DEBUG)) { 543 total_ct++; 544 insert_ct++; 545 if (HT[idx] != key) { 546 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 547 ; 548 if (idx == size) { 549 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 550 ; 551 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 552 } 553 } 554 } else if (HT[idx] != key) { 555 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 556 ; 557 if (idx == size) { 558 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 559 ; 560 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 561 } 562 } 563 baij_a = HD[idx]; 564 if (roworiented) { 565 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 566 /* value = v + (i*(stepval+bs)+j)*bs; */ 567 value = v_t; 568 v_t += bs; 569 if (addv == ADD_VALUES) { 570 for (ii = 0; ii < bs; ii++, value += stepval) { 571 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 572 } 573 } else { 574 for (ii = 0; ii < bs; ii++, value += stepval) { 575 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 576 } 577 } 578 } else { 579 value = v + j * (stepval + bs) * bs + i * bs; 580 if (addv == ADD_VALUES) { 581 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 582 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 583 } 584 } else { 585 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 586 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 587 } 588 } 589 } 590 } 591 } else { 592 if (!baij->donotstash) { 593 if (roworiented) { 594 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 595 } else { 596 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 597 } 598 } 599 } 600 } 601 if (PetscDefined(USE_DEBUG)) { 602 baij->ht_total_ct += total_ct; 603 baij->ht_insert_ct += insert_ct; 604 } 605 PetscFunctionReturn(0); 606 } 607 608 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 609 { 610 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 611 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 612 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 613 614 PetscFunctionBegin; 615 for (i = 0; i < m; i++) { 616 if (idxm[i] < 0) continue; /* negative row */ 617 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 618 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 619 row = idxm[i] - bsrstart; 620 for (j = 0; j < n; j++) { 621 if (idxn[j] < 0) continue; /* negative column */ 622 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 623 if (idxn[j] >= bscstart && idxn[j] < bscend) { 624 col = idxn[j] - bscstart; 625 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 626 } else { 627 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 628 #if defined(PETSC_USE_CTABLE) 629 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 630 data--; 631 #else 632 data = baij->colmap[idxn[j] / bs] - 1; 633 #endif 634 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 635 else { 636 col = data + idxn[j] % bs; 637 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 638 } 639 } 640 } 641 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 642 } 643 PetscFunctionReturn(0); 644 } 645 646 PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 647 { 648 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 649 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 650 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 651 PetscReal sum = 0.0; 652 MatScalar *v; 653 654 PetscFunctionBegin; 655 if (baij->size == 1) { 656 PetscCall(MatNorm(baij->A, type, nrm)); 657 } else { 658 if (type == NORM_FROBENIUS) { 659 v = amat->a; 660 nz = amat->nz * bs2; 661 for (i = 0; i < nz; i++) { 662 sum += PetscRealPart(PetscConj(*v) * (*v)); 663 v++; 664 } 665 v = bmat->a; 666 nz = bmat->nz * bs2; 667 for (i = 0; i < nz; i++) { 668 sum += PetscRealPart(PetscConj(*v) * (*v)); 669 v++; 670 } 671 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 672 *nrm = PetscSqrtReal(*nrm); 673 } else if (type == NORM_1) { /* max column sum */ 674 PetscReal *tmp, *tmp2; 675 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 676 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 677 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 678 v = amat->a; 679 jj = amat->j; 680 for (i = 0; i < amat->nz; i++) { 681 for (j = 0; j < bs; j++) { 682 col = bs * (cstart + *jj) + j; /* column index */ 683 for (row = 0; row < bs; row++) { 684 tmp[col] += PetscAbsScalar(*v); 685 v++; 686 } 687 } 688 jj++; 689 } 690 v = bmat->a; 691 jj = bmat->j; 692 for (i = 0; i < bmat->nz; i++) { 693 for (j = 0; j < bs; j++) { 694 col = bs * garray[*jj] + j; 695 for (row = 0; row < bs; row++) { 696 tmp[col] += PetscAbsScalar(*v); 697 v++; 698 } 699 } 700 jj++; 701 } 702 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 703 *nrm = 0.0; 704 for (j = 0; j < mat->cmap->N; j++) { 705 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 706 } 707 PetscCall(PetscFree(tmp)); 708 PetscCall(PetscFree(tmp2)); 709 } else if (type == NORM_INFINITY) { /* max row sum */ 710 PetscReal *sums; 711 PetscCall(PetscMalloc1(bs, &sums)); 712 sum = 0.0; 713 for (j = 0; j < amat->mbs; j++) { 714 for (row = 0; row < bs; row++) sums[row] = 0.0; 715 v = amat->a + bs2 * amat->i[j]; 716 nz = amat->i[j + 1] - amat->i[j]; 717 for (i = 0; i < nz; i++) { 718 for (col = 0; col < bs; col++) { 719 for (row = 0; row < bs; row++) { 720 sums[row] += PetscAbsScalar(*v); 721 v++; 722 } 723 } 724 } 725 v = bmat->a + bs2 * bmat->i[j]; 726 nz = bmat->i[j + 1] - bmat->i[j]; 727 for (i = 0; i < nz; i++) { 728 for (col = 0; col < bs; col++) { 729 for (row = 0; row < bs; row++) { 730 sums[row] += PetscAbsScalar(*v); 731 v++; 732 } 733 } 734 } 735 for (row = 0; row < bs; row++) { 736 if (sums[row] > sum) sum = sums[row]; 737 } 738 } 739 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 740 PetscCall(PetscFree(sums)); 741 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 742 } 743 PetscFunctionReturn(0); 744 } 745 746 /* 747 Creates the hash table, and sets the table 748 This table is created only once. 749 If new entried need to be added to the matrix 750 then the hash table has to be destroyed and 751 recreated. 752 */ 753 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 754 { 755 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 756 Mat A = baij->A, B = baij->B; 757 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 758 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 759 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 760 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 761 PetscInt *HT, key; 762 MatScalar **HD; 763 PetscReal tmp; 764 #if defined(PETSC_USE_INFO) 765 PetscInt ct = 0, max = 0; 766 #endif 767 768 PetscFunctionBegin; 769 if (baij->ht) PetscFunctionReturn(0); 770 771 baij->ht_size = (PetscInt)(factor * nz); 772 ht_size = baij->ht_size; 773 774 /* Allocate Memory for Hash Table */ 775 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 776 HD = baij->hd; 777 HT = baij->ht; 778 779 /* Loop Over A */ 780 for (i = 0; i < a->mbs; i++) { 781 for (j = ai[i]; j < ai[i + 1]; j++) { 782 row = i + rstart; 783 col = aj[j] + cstart; 784 785 key = row * Nbs + col + 1; 786 h1 = HASH(ht_size, key, tmp); 787 for (k = 0; k < ht_size; k++) { 788 if (!HT[(h1 + k) % ht_size]) { 789 HT[(h1 + k) % ht_size] = key; 790 HD[(h1 + k) % ht_size] = a->a + j * bs2; 791 break; 792 #if defined(PETSC_USE_INFO) 793 } else { 794 ct++; 795 #endif 796 } 797 } 798 #if defined(PETSC_USE_INFO) 799 if (k > max) max = k; 800 #endif 801 } 802 } 803 /* Loop Over B */ 804 for (i = 0; i < b->mbs; i++) { 805 for (j = bi[i]; j < bi[i + 1]; j++) { 806 row = i + rstart; 807 col = garray[bj[j]]; 808 key = row * Nbs + col + 1; 809 h1 = HASH(ht_size, key, tmp); 810 for (k = 0; k < ht_size; k++) { 811 if (!HT[(h1 + k) % ht_size]) { 812 HT[(h1 + k) % ht_size] = key; 813 HD[(h1 + k) % ht_size] = b->a + j * bs2; 814 break; 815 #if defined(PETSC_USE_INFO) 816 } else { 817 ct++; 818 #endif 819 } 820 } 821 #if defined(PETSC_USE_INFO) 822 if (k > max) max = k; 823 #endif 824 } 825 } 826 827 /* Print Summary */ 828 #if defined(PETSC_USE_INFO) 829 for (i = 0, j = 0; i < ht_size; i++) { 830 if (HT[i]) j++; 831 } 832 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 833 #endif 834 PetscFunctionReturn(0); 835 } 836 837 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 838 { 839 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 840 PetscInt nstash, reallocs; 841 842 PetscFunctionBegin; 843 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 844 845 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 846 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 847 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 848 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 849 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 850 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 851 PetscFunctionReturn(0); 852 } 853 854 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 855 { 856 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 857 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 858 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 859 PetscInt *row, *col; 860 PetscBool r1, r2, r3, other_disassembled; 861 MatScalar *val; 862 PetscMPIInt n; 863 864 PetscFunctionBegin; 865 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 866 if (!baij->donotstash && !mat->nooffprocentries) { 867 while (1) { 868 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 869 if (!flg) break; 870 871 for (i = 0; i < n;) { 872 /* Now identify the consecutive vals belonging to the same row */ 873 for (j = i, rstart = row[j]; j < n; j++) { 874 if (row[j] != rstart) break; 875 } 876 if (j < n) ncols = j - i; 877 else ncols = n - i; 878 /* Now assemble all these values with a single function call */ 879 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 880 i = j; 881 } 882 } 883 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 884 /* Now process the block-stash. Since the values are stashed column-oriented, 885 set the roworiented flag to column oriented, and after MatSetValues() 886 restore the original flags */ 887 r1 = baij->roworiented; 888 r2 = a->roworiented; 889 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 890 891 baij->roworiented = PETSC_FALSE; 892 a->roworiented = PETSC_FALSE; 893 894 (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 895 while (1) { 896 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 897 if (!flg) break; 898 899 for (i = 0; i < n;) { 900 /* Now identify the consecutive vals belonging to the same row */ 901 for (j = i, rstart = row[j]; j < n; j++) { 902 if (row[j] != rstart) break; 903 } 904 if (j < n) ncols = j - i; 905 else ncols = n - i; 906 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 907 i = j; 908 } 909 } 910 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 911 912 baij->roworiented = r1; 913 a->roworiented = r2; 914 915 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */ 916 } 917 918 PetscCall(MatAssemblyBegin(baij->A, mode)); 919 PetscCall(MatAssemblyEnd(baij->A, mode)); 920 921 /* determine if any processor has disassembled, if so we must 922 also disassemble ourselves, in order that we may reassemble. */ 923 /* 924 if nonzero structure of submatrix B cannot change then we know that 925 no processor disassembled thus we can skip this stuff 926 */ 927 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 928 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 929 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 930 } 931 932 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 933 PetscCall(MatAssemblyBegin(baij->B, mode)); 934 PetscCall(MatAssemblyEnd(baij->B, mode)); 935 936 #if defined(PETSC_USE_INFO) 937 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 938 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 939 940 baij->ht_total_ct = 0; 941 baij->ht_insert_ct = 0; 942 } 943 #endif 944 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 945 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 946 947 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 948 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 949 } 950 951 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 952 953 baij->rowvalues = NULL; 954 955 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 956 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 957 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 958 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 959 } 960 PetscFunctionReturn(0); 961 } 962 963 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 964 #include <petscdraw.h> 965 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 966 { 967 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 968 PetscMPIInt rank = baij->rank; 969 PetscInt bs = mat->rmap->bs; 970 PetscBool iascii, isdraw; 971 PetscViewer sviewer; 972 PetscViewerFormat format; 973 974 PetscFunctionBegin; 975 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 976 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 977 if (iascii) { 978 PetscCall(PetscViewerGetFormat(viewer, &format)); 979 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 980 MatInfo info; 981 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 982 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 983 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 984 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 985 mat->rmap->bs, (double)info.memory)); 986 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 987 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 988 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 989 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 990 PetscCall(PetscViewerFlush(viewer)); 991 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 992 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 993 PetscCall(VecScatterView(baij->Mvctx, viewer)); 994 PetscFunctionReturn(0); 995 } else if (format == PETSC_VIEWER_ASCII_INFO) { 996 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 997 PetscFunctionReturn(0); 998 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 999 PetscFunctionReturn(0); 1000 } 1001 } 1002 1003 if (isdraw) { 1004 PetscDraw draw; 1005 PetscBool isnull; 1006 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1007 PetscCall(PetscDrawIsNull(draw, &isnull)); 1008 if (isnull) PetscFunctionReturn(0); 1009 } 1010 1011 { 1012 /* assemble the entire matrix onto first processor. */ 1013 Mat A; 1014 Mat_SeqBAIJ *Aloc; 1015 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1016 MatScalar *a; 1017 const char *matname; 1018 1019 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1020 /* Perhaps this should be the type of mat? */ 1021 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1022 if (rank == 0) { 1023 PetscCall(MatSetSizes(A, M, N, M, N)); 1024 } else { 1025 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1026 } 1027 PetscCall(MatSetType(A, MATMPIBAIJ)); 1028 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1029 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1030 1031 /* copy over the A part */ 1032 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1033 ai = Aloc->i; 1034 aj = Aloc->j; 1035 a = Aloc->a; 1036 PetscCall(PetscMalloc1(bs, &rvals)); 1037 1038 for (i = 0; i < mbs; i++) { 1039 rvals[0] = bs * (baij->rstartbs + i); 1040 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1041 for (j = ai[i]; j < ai[i + 1]; j++) { 1042 col = (baij->cstartbs + aj[j]) * bs; 1043 for (k = 0; k < bs; k++) { 1044 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1045 col++; 1046 a += bs; 1047 } 1048 } 1049 } 1050 /* copy over the B part */ 1051 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1052 ai = Aloc->i; 1053 aj = Aloc->j; 1054 a = Aloc->a; 1055 for (i = 0; i < mbs; i++) { 1056 rvals[0] = bs * (baij->rstartbs + i); 1057 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1058 for (j = ai[i]; j < ai[i + 1]; j++) { 1059 col = baij->garray[aj[j]] * bs; 1060 for (k = 0; k < bs; k++) { 1061 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1062 col++; 1063 a += bs; 1064 } 1065 } 1066 } 1067 PetscCall(PetscFree(rvals)); 1068 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1069 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1070 /* 1071 Everyone has to call to draw the matrix since the graphics waits are 1072 synchronized across all processors that share the PetscDraw object 1073 */ 1074 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1075 PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1076 if (rank == 0) { 1077 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname)); 1078 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer)); 1079 } 1080 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1081 PetscCall(PetscViewerFlush(viewer)); 1082 PetscCall(MatDestroy(&A)); 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1088 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1089 { 1090 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1091 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1092 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1093 const PetscInt *garray = aij->garray; 1094 PetscInt header[4], M, N, m, rs, cs, bs, nz, cnt, i, j, ja, jb, k, l; 1095 PetscInt *rowlens, *colidxs; 1096 PetscScalar *matvals; 1097 1098 PetscFunctionBegin; 1099 PetscCall(PetscViewerSetUp(viewer)); 1100 1101 M = mat->rmap->N; 1102 N = mat->cmap->N; 1103 m = mat->rmap->n; 1104 rs = mat->rmap->rstart; 1105 cs = mat->cmap->rstart; 1106 bs = mat->rmap->bs; 1107 nz = bs * bs * (A->nz + B->nz); 1108 1109 /* write matrix header */ 1110 header[0] = MAT_FILE_CLASSID; 1111 header[1] = M; 1112 header[2] = N; 1113 header[3] = nz; 1114 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1115 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1116 1117 /* fill in and store row lengths */ 1118 PetscCall(PetscMalloc1(m, &rowlens)); 1119 for (cnt = 0, i = 0; i < A->mbs; i++) 1120 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1121 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1122 PetscCall(PetscFree(rowlens)); 1123 1124 /* fill in and store column indices */ 1125 PetscCall(PetscMalloc1(nz, &colidxs)); 1126 for (cnt = 0, i = 0; i < A->mbs; i++) { 1127 for (k = 0; k < bs; k++) { 1128 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1129 if (garray[B->j[jb]] > cs / bs) break; 1130 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1131 } 1132 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1133 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1134 for (; jb < B->i[i + 1]; jb++) 1135 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1136 } 1137 } 1138 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1139 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1140 PetscCall(PetscFree(colidxs)); 1141 1142 /* fill in and store nonzero values */ 1143 PetscCall(PetscMalloc1(nz, &matvals)); 1144 for (cnt = 0, i = 0; i < A->mbs; i++) { 1145 for (k = 0; k < bs; k++) { 1146 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1147 if (garray[B->j[jb]] > cs / bs) break; 1148 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1149 } 1150 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1151 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1152 for (; jb < B->i[i + 1]; jb++) 1153 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1154 } 1155 } 1156 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1157 PetscCall(PetscFree(matvals)); 1158 1159 /* write block size option to the viewer's .info file */ 1160 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1165 { 1166 PetscBool iascii, isdraw, issocket, isbinary; 1167 1168 PetscFunctionBegin; 1169 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1170 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1171 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1172 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1173 if (iascii || isdraw || issocket) { 1174 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1175 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1180 { 1181 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1182 1183 PetscFunctionBegin; 1184 #if defined(PETSC_USE_LOG) 1185 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 1186 #endif 1187 PetscCall(MatStashDestroy_Private(&mat->stash)); 1188 PetscCall(MatStashDestroy_Private(&mat->bstash)); 1189 PetscCall(MatDestroy(&baij->A)); 1190 PetscCall(MatDestroy(&baij->B)); 1191 #if defined(PETSC_USE_CTABLE) 1192 PetscCall(PetscHMapIDestroy(&baij->colmap)); 1193 #else 1194 PetscCall(PetscFree(baij->colmap)); 1195 #endif 1196 PetscCall(PetscFree(baij->garray)); 1197 PetscCall(VecDestroy(&baij->lvec)); 1198 PetscCall(VecScatterDestroy(&baij->Mvctx)); 1199 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 1200 PetscCall(PetscFree(baij->barray)); 1201 PetscCall(PetscFree2(baij->hd, baij->ht)); 1202 PetscCall(PetscFree(baij->rangebs)); 1203 PetscCall(PetscFree(mat->data)); 1204 1205 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1206 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1207 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1208 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 1209 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 1210 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 1211 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 1212 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 1213 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 1214 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 1215 #if defined(PETSC_HAVE_HYPRE) 1216 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 1217 #endif 1218 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1223 { 1224 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1225 PetscInt nt; 1226 1227 PetscFunctionBegin; 1228 PetscCall(VecGetLocalSize(xx, &nt)); 1229 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1230 PetscCall(VecGetLocalSize(yy, &nt)); 1231 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible parition of A and yy"); 1232 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1233 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1234 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1235 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1240 { 1241 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1242 1243 PetscFunctionBegin; 1244 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1245 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1246 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1247 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1252 { 1253 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1254 1255 PetscFunctionBegin; 1256 /* do nondiagonal part */ 1257 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1258 /* do local part */ 1259 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1260 /* add partial results together */ 1261 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1262 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1267 { 1268 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1269 1270 PetscFunctionBegin; 1271 /* do nondiagonal part */ 1272 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1273 /* do local part */ 1274 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1275 /* add partial results together */ 1276 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1277 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 /* 1282 This only works correctly for square matrices where the subblock A->A is the 1283 diagonal block 1284 */ 1285 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1286 { 1287 PetscFunctionBegin; 1288 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1289 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1290 PetscFunctionReturn(0); 1291 } 1292 1293 PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1294 { 1295 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1296 1297 PetscFunctionBegin; 1298 PetscCall(MatScale(a->A, aa)); 1299 PetscCall(MatScale(a->B, aa)); 1300 PetscFunctionReturn(0); 1301 } 1302 1303 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1304 { 1305 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1306 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1307 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1308 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1309 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1310 1311 PetscFunctionBegin; 1312 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1313 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1314 mat->getrowactive = PETSC_TRUE; 1315 1316 if (!mat->rowvalues && (idx || v)) { 1317 /* 1318 allocate enough space to hold information from the longest row. 1319 */ 1320 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1321 PetscInt max = 1, mbs = mat->mbs, tmp; 1322 for (i = 0; i < mbs; i++) { 1323 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1324 if (max < tmp) max = tmp; 1325 } 1326 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1327 } 1328 lrow = row - brstart; 1329 1330 pvA = &vworkA; 1331 pcA = &cworkA; 1332 pvB = &vworkB; 1333 pcB = &cworkB; 1334 if (!v) { 1335 pvA = NULL; 1336 pvB = NULL; 1337 } 1338 if (!idx) { 1339 pcA = NULL; 1340 if (!v) pcB = NULL; 1341 } 1342 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1343 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1344 nztot = nzA + nzB; 1345 1346 cmap = mat->garray; 1347 if (v || idx) { 1348 if (nztot) { 1349 /* Sort by increasing column numbers, assuming A and B already sorted */ 1350 PetscInt imark = -1; 1351 if (v) { 1352 *v = v_p = mat->rowvalues; 1353 for (i = 0; i < nzB; i++) { 1354 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1355 else break; 1356 } 1357 imark = i; 1358 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1359 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1360 } 1361 if (idx) { 1362 *idx = idx_p = mat->rowindices; 1363 if (imark > -1) { 1364 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1365 } else { 1366 for (i = 0; i < nzB; i++) { 1367 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1368 else break; 1369 } 1370 imark = i; 1371 } 1372 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1373 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1374 } 1375 } else { 1376 if (idx) *idx = NULL; 1377 if (v) *v = NULL; 1378 } 1379 } 1380 *nz = nztot; 1381 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1382 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1383 PetscFunctionReturn(0); 1384 } 1385 1386 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1387 { 1388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1389 1390 PetscFunctionBegin; 1391 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1392 baij->getrowactive = PETSC_FALSE; 1393 PetscFunctionReturn(0); 1394 } 1395 1396 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1397 { 1398 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1399 1400 PetscFunctionBegin; 1401 PetscCall(MatZeroEntries(l->A)); 1402 PetscCall(MatZeroEntries(l->B)); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1407 { 1408 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1409 Mat A = a->A, B = a->B; 1410 PetscLogDouble isend[5], irecv[5]; 1411 1412 PetscFunctionBegin; 1413 info->block_size = (PetscReal)matin->rmap->bs; 1414 1415 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1416 1417 isend[0] = info->nz_used; 1418 isend[1] = info->nz_allocated; 1419 isend[2] = info->nz_unneeded; 1420 isend[3] = info->memory; 1421 isend[4] = info->mallocs; 1422 1423 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1424 1425 isend[0] += info->nz_used; 1426 isend[1] += info->nz_allocated; 1427 isend[2] += info->nz_unneeded; 1428 isend[3] += info->memory; 1429 isend[4] += info->mallocs; 1430 1431 if (flag == MAT_LOCAL) { 1432 info->nz_used = isend[0]; 1433 info->nz_allocated = isend[1]; 1434 info->nz_unneeded = isend[2]; 1435 info->memory = isend[3]; 1436 info->mallocs = isend[4]; 1437 } else if (flag == MAT_GLOBAL_MAX) { 1438 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1439 1440 info->nz_used = irecv[0]; 1441 info->nz_allocated = irecv[1]; 1442 info->nz_unneeded = irecv[2]; 1443 info->memory = irecv[3]; 1444 info->mallocs = irecv[4]; 1445 } else if (flag == MAT_GLOBAL_SUM) { 1446 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1447 1448 info->nz_used = irecv[0]; 1449 info->nz_allocated = irecv[1]; 1450 info->nz_unneeded = irecv[2]; 1451 info->memory = irecv[3]; 1452 info->mallocs = irecv[4]; 1453 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1454 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1455 info->fill_ratio_needed = 0; 1456 info->factor_mallocs = 0; 1457 PetscFunctionReturn(0); 1458 } 1459 1460 PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1461 { 1462 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1463 1464 PetscFunctionBegin; 1465 switch (op) { 1466 case MAT_NEW_NONZERO_LOCATIONS: 1467 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1468 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1469 case MAT_KEEP_NONZERO_PATTERN: 1470 case MAT_NEW_NONZERO_LOCATION_ERR: 1471 MatCheckPreallocated(A, 1); 1472 PetscCall(MatSetOption(a->A, op, flg)); 1473 PetscCall(MatSetOption(a->B, op, flg)); 1474 break; 1475 case MAT_ROW_ORIENTED: 1476 MatCheckPreallocated(A, 1); 1477 a->roworiented = flg; 1478 1479 PetscCall(MatSetOption(a->A, op, flg)); 1480 PetscCall(MatSetOption(a->B, op, flg)); 1481 break; 1482 case MAT_FORCE_DIAGONAL_ENTRIES: 1483 case MAT_SORTED_FULL: 1484 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1485 break; 1486 case MAT_IGNORE_OFF_PROC_ENTRIES: 1487 a->donotstash = flg; 1488 break; 1489 case MAT_USE_HASH_TABLE: 1490 a->ht_flag = flg; 1491 a->ht_fact = 1.39; 1492 break; 1493 case MAT_SYMMETRIC: 1494 case MAT_STRUCTURALLY_SYMMETRIC: 1495 case MAT_HERMITIAN: 1496 case MAT_SUBMAT_SINGLEIS: 1497 case MAT_SYMMETRY_ETERNAL: 1498 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1499 case MAT_SPD_ETERNAL: 1500 /* if the diagonal matrix is square it inherits some of the properties above */ 1501 break; 1502 default: 1503 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1504 } 1505 PetscFunctionReturn(0); 1506 } 1507 1508 PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1509 { 1510 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1511 Mat_SeqBAIJ *Aloc; 1512 Mat B; 1513 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1514 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1515 MatScalar *a; 1516 1517 PetscFunctionBegin; 1518 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1519 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1520 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1521 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1522 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1523 /* Do not know preallocation information, but must set block size */ 1524 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1525 } else { 1526 B = *matout; 1527 } 1528 1529 /* copy over the A part */ 1530 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1531 ai = Aloc->i; 1532 aj = Aloc->j; 1533 a = Aloc->a; 1534 PetscCall(PetscMalloc1(bs, &rvals)); 1535 1536 for (i = 0; i < mbs; i++) { 1537 rvals[0] = bs * (baij->rstartbs + i); 1538 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1539 for (j = ai[i]; j < ai[i + 1]; j++) { 1540 col = (baij->cstartbs + aj[j]) * bs; 1541 for (k = 0; k < bs; k++) { 1542 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1543 1544 col++; 1545 a += bs; 1546 } 1547 } 1548 } 1549 /* copy over the B part */ 1550 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1551 ai = Aloc->i; 1552 aj = Aloc->j; 1553 a = Aloc->a; 1554 for (i = 0; i < mbs; i++) { 1555 rvals[0] = bs * (baij->rstartbs + i); 1556 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1557 for (j = ai[i]; j < ai[i + 1]; j++) { 1558 col = baij->garray[aj[j]] * bs; 1559 for (k = 0; k < bs; k++) { 1560 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1561 col++; 1562 a += bs; 1563 } 1564 } 1565 } 1566 PetscCall(PetscFree(rvals)); 1567 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1568 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1569 1570 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1571 else PetscCall(MatHeaderMerge(A, &B)); 1572 PetscFunctionReturn(0); 1573 } 1574 1575 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1576 { 1577 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1578 Mat a = baij->A, b = baij->B; 1579 PetscInt s1, s2, s3; 1580 1581 PetscFunctionBegin; 1582 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1583 if (rr) { 1584 PetscCall(VecGetLocalSize(rr, &s1)); 1585 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1586 /* Overlap communication with computation. */ 1587 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1588 } 1589 if (ll) { 1590 PetscCall(VecGetLocalSize(ll, &s1)); 1591 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1592 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1593 } 1594 /* scale the diagonal block */ 1595 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1596 1597 if (rr) { 1598 /* Do a scatter end and then right scale the off-diagonal block */ 1599 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1600 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1601 } 1602 PetscFunctionReturn(0); 1603 } 1604 1605 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1606 { 1607 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1608 PetscInt *lrows; 1609 PetscInt r, len; 1610 PetscBool cong; 1611 1612 PetscFunctionBegin; 1613 /* get locally owned rows */ 1614 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1615 /* fix right hand side if needed */ 1616 if (x && b) { 1617 const PetscScalar *xx; 1618 PetscScalar *bb; 1619 1620 PetscCall(VecGetArrayRead(x, &xx)); 1621 PetscCall(VecGetArray(b, &bb)); 1622 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1623 PetscCall(VecRestoreArrayRead(x, &xx)); 1624 PetscCall(VecRestoreArray(b, &bb)); 1625 } 1626 1627 /* actually zap the local rows */ 1628 /* 1629 Zero the required rows. If the "diagonal block" of the matrix 1630 is square and the user wishes to set the diagonal we use separate 1631 code so that MatSetValues() is not called for each diagonal allocating 1632 new memory, thus calling lots of mallocs and slowing things down. 1633 1634 */ 1635 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1636 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1637 PetscCall(MatHasCongruentLayouts(A, &cong)); 1638 if ((diag != 0.0) && cong) { 1639 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1640 } else if (diag != 0.0) { 1641 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1642 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1643 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1644 for (r = 0; r < len; ++r) { 1645 const PetscInt row = lrows[r] + A->rmap->rstart; 1646 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1647 } 1648 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1649 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1650 } else { 1651 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1652 } 1653 PetscCall(PetscFree(lrows)); 1654 1655 /* only change matrix nonzero state if pattern was allowed to be changed */ 1656 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1657 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1658 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1659 } 1660 PetscFunctionReturn(0); 1661 } 1662 1663 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1664 { 1665 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1666 PetscMPIInt n = A->rmap->n, p = 0; 1667 PetscInt i, j, k, r, len = 0, row, col, count; 1668 PetscInt *lrows, *owners = A->rmap->range; 1669 PetscSFNode *rrows; 1670 PetscSF sf; 1671 const PetscScalar *xx; 1672 PetscScalar *bb, *mask; 1673 Vec xmask, lmask; 1674 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1675 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1676 PetscScalar *aa; 1677 1678 PetscFunctionBegin; 1679 /* Create SF where leaves are input rows and roots are owned rows */ 1680 PetscCall(PetscMalloc1(n, &lrows)); 1681 for (r = 0; r < n; ++r) lrows[r] = -1; 1682 PetscCall(PetscMalloc1(N, &rrows)); 1683 for (r = 0; r < N; ++r) { 1684 const PetscInt idx = rows[r]; 1685 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1686 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1687 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1688 } 1689 rrows[r].rank = p; 1690 rrows[r].index = rows[r] - owners[p]; 1691 } 1692 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1693 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1694 /* Collect flags for rows to be zeroed */ 1695 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1696 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1697 PetscCall(PetscSFDestroy(&sf)); 1698 /* Compress and put in row numbers */ 1699 for (r = 0; r < n; ++r) 1700 if (lrows[r] >= 0) lrows[len++] = r; 1701 /* zero diagonal part of matrix */ 1702 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1703 /* handle off diagonal part of matrix */ 1704 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1705 PetscCall(VecDuplicate(l->lvec, &lmask)); 1706 PetscCall(VecGetArray(xmask, &bb)); 1707 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1708 PetscCall(VecRestoreArray(xmask, &bb)); 1709 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1710 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1711 PetscCall(VecDestroy(&xmask)); 1712 if (x) { 1713 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1714 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1715 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1716 PetscCall(VecGetArray(b, &bb)); 1717 } 1718 PetscCall(VecGetArray(lmask, &mask)); 1719 /* remove zeroed rows of off diagonal matrix */ 1720 for (i = 0; i < len; ++i) { 1721 row = lrows[i]; 1722 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1723 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 1724 for (k = 0; k < count; ++k) { 1725 aa[0] = 0.0; 1726 aa += bs; 1727 } 1728 } 1729 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1730 for (i = 0; i < l->B->rmap->N; ++i) { 1731 row = i / bs; 1732 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1733 for (k = 0; k < bs; ++k) { 1734 col = bs * baij->j[j] + k; 1735 if (PetscAbsScalar(mask[col])) { 1736 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 1737 if (x) bb[i] -= aa[0] * xx[col]; 1738 aa[0] = 0.0; 1739 } 1740 } 1741 } 1742 } 1743 if (x) { 1744 PetscCall(VecRestoreArray(b, &bb)); 1745 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1746 } 1747 PetscCall(VecRestoreArray(lmask, &mask)); 1748 PetscCall(VecDestroy(&lmask)); 1749 PetscCall(PetscFree(lrows)); 1750 1751 /* only change matrix nonzero state if pattern was allowed to be changed */ 1752 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1753 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1754 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1755 } 1756 PetscFunctionReturn(0); 1757 } 1758 1759 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1760 { 1761 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1762 1763 PetscFunctionBegin; 1764 PetscCall(MatSetUnfactored(a->A)); 1765 PetscFunctionReturn(0); 1766 } 1767 1768 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1769 1770 PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1771 { 1772 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1773 Mat a, b, c, d; 1774 PetscBool flg; 1775 1776 PetscFunctionBegin; 1777 a = matA->A; 1778 b = matA->B; 1779 c = matB->A; 1780 d = matB->B; 1781 1782 PetscCall(MatEqual(a, c, &flg)); 1783 if (flg) PetscCall(MatEqual(b, d, &flg)); 1784 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1785 PetscFunctionReturn(0); 1786 } 1787 1788 PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1789 { 1790 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1791 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1792 1793 PetscFunctionBegin; 1794 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1795 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1796 PetscCall(MatCopy_Basic(A, B, str)); 1797 } else { 1798 PetscCall(MatCopy(a->A, b->A, str)); 1799 PetscCall(MatCopy(a->B, b->B, str)); 1800 } 1801 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1802 PetscFunctionReturn(0); 1803 } 1804 1805 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1806 { 1807 PetscFunctionBegin; 1808 PetscCall(MatMPIBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 1809 PetscFunctionReturn(0); 1810 } 1811 1812 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1813 { 1814 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1815 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1816 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1817 1818 PetscFunctionBegin; 1819 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1820 PetscFunctionReturn(0); 1821 } 1822 1823 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1824 { 1825 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1826 PetscBLASInt bnz, one = 1; 1827 Mat_SeqBAIJ *x, *y; 1828 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1829 1830 PetscFunctionBegin; 1831 if (str == SAME_NONZERO_PATTERN) { 1832 PetscScalar alpha = a; 1833 x = (Mat_SeqBAIJ *)xx->A->data; 1834 y = (Mat_SeqBAIJ *)yy->A->data; 1835 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1836 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1837 x = (Mat_SeqBAIJ *)xx->B->data; 1838 y = (Mat_SeqBAIJ *)yy->B->data; 1839 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1840 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1841 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1842 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1843 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1844 } else { 1845 Mat B; 1846 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1847 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1848 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1849 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1850 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1851 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1852 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1853 PetscCall(MatSetType(B, MATMPIBAIJ)); 1854 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1855 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1856 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1857 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1858 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1859 PetscCall(MatHeaderMerge(Y, &B)); 1860 PetscCall(PetscFree(nnz_d)); 1861 PetscCall(PetscFree(nnz_o)); 1862 } 1863 PetscFunctionReturn(0); 1864 } 1865 1866 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1867 { 1868 PetscFunctionBegin; 1869 if (PetscDefined(USE_COMPLEX)) { 1870 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1871 1872 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1873 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1874 } 1875 PetscFunctionReturn(0); 1876 } 1877 1878 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1879 { 1880 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1881 1882 PetscFunctionBegin; 1883 PetscCall(MatRealPart(a->A)); 1884 PetscCall(MatRealPart(a->B)); 1885 PetscFunctionReturn(0); 1886 } 1887 1888 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1889 { 1890 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1891 1892 PetscFunctionBegin; 1893 PetscCall(MatImaginaryPart(a->A)); 1894 PetscCall(MatImaginaryPart(a->B)); 1895 PetscFunctionReturn(0); 1896 } 1897 1898 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1899 { 1900 IS iscol_local; 1901 PetscInt csize; 1902 1903 PetscFunctionBegin; 1904 PetscCall(ISGetLocalSize(iscol, &csize)); 1905 if (call == MAT_REUSE_MATRIX) { 1906 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1907 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1908 } else { 1909 PetscCall(ISAllGather(iscol, &iscol_local)); 1910 } 1911 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 1912 if (call == MAT_INITIAL_MATRIX) { 1913 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1914 PetscCall(ISDestroy(&iscol_local)); 1915 } 1916 PetscFunctionReturn(0); 1917 } 1918 1919 /* 1920 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1921 in local and then by concatenating the local matrices the end result. 1922 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1923 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1924 */ 1925 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 1926 { 1927 PetscMPIInt rank, size; 1928 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1929 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1930 Mat M, Mreuse; 1931 MatScalar *vwork, *aa; 1932 MPI_Comm comm; 1933 IS isrow_new, iscol_new; 1934 Mat_SeqBAIJ *aij; 1935 1936 PetscFunctionBegin; 1937 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1938 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1939 PetscCallMPI(MPI_Comm_size(comm, &size)); 1940 /* The compression and expansion should be avoided. Doesn't point 1941 out errors, might change the indices, hence buggey */ 1942 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1943 PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1944 1945 if (call == MAT_REUSE_MATRIX) { 1946 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1947 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1948 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse)); 1949 } else { 1950 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse)); 1951 } 1952 PetscCall(ISDestroy(&isrow_new)); 1953 PetscCall(ISDestroy(&iscol_new)); 1954 /* 1955 m - number of local rows 1956 n - number of columns (same on all processors) 1957 rstart - first row in new global matrix generated 1958 */ 1959 PetscCall(MatGetBlockSize(mat, &bs)); 1960 PetscCall(MatGetSize(Mreuse, &m, &n)); 1961 m = m / bs; 1962 n = n / bs; 1963 1964 if (call == MAT_INITIAL_MATRIX) { 1965 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1966 ii = aij->i; 1967 jj = aij->j; 1968 1969 /* 1970 Determine the number of non-zeros in the diagonal and off-diagonal 1971 portions of the matrix in order to do correct preallocation 1972 */ 1973 1974 /* first get start and end of "diagonal" columns */ 1975 if (csize == PETSC_DECIDE) { 1976 PetscCall(ISGetSize(isrow, &mglobal)); 1977 if (mglobal == n * bs) { /* square matrix */ 1978 nlocal = m; 1979 } else { 1980 nlocal = n / size + ((n % size) > rank); 1981 } 1982 } else { 1983 nlocal = csize / bs; 1984 } 1985 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1986 rstart = rend - nlocal; 1987 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1988 1989 /* next, compute all the lengths */ 1990 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1991 for (i = 0; i < m; i++) { 1992 jend = ii[i + 1] - ii[i]; 1993 olen = 0; 1994 dlen = 0; 1995 for (j = 0; j < jend; j++) { 1996 if (*jj < rstart || *jj >= rend) olen++; 1997 else dlen++; 1998 jj++; 1999 } 2000 olens[i] = olen; 2001 dlens[i] = dlen; 2002 } 2003 PetscCall(MatCreate(comm, &M)); 2004 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2005 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 2006 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2007 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2008 PetscCall(PetscFree2(dlens, olens)); 2009 } else { 2010 PetscInt ml, nl; 2011 2012 M = *newmat; 2013 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2014 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2015 PetscCall(MatZeroEntries(M)); 2016 /* 2017 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2018 rather than the slower MatSetValues(). 2019 */ 2020 M->was_assembled = PETSC_TRUE; 2021 M->assembled = PETSC_FALSE; 2022 } 2023 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2024 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2025 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2026 ii = aij->i; 2027 jj = aij->j; 2028 aa = aij->a; 2029 for (i = 0; i < m; i++) { 2030 row = rstart / bs + i; 2031 nz = ii[i + 1] - ii[i]; 2032 cwork = jj; 2033 jj += nz; 2034 vwork = aa; 2035 aa += nz * bs * bs; 2036 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2037 } 2038 2039 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2040 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2041 *newmat = M; 2042 2043 /* save submatrix used in processor for next request */ 2044 if (call == MAT_INITIAL_MATRIX) { 2045 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2046 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2047 } 2048 PetscFunctionReturn(0); 2049 } 2050 2051 PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2052 { 2053 MPI_Comm comm, pcomm; 2054 PetscInt clocal_size, nrows; 2055 const PetscInt *rows; 2056 PetscMPIInt size; 2057 IS crowp, lcolp; 2058 2059 PetscFunctionBegin; 2060 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2061 /* make a collective version of 'rowp' */ 2062 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2063 if (pcomm == comm) { 2064 crowp = rowp; 2065 } else { 2066 PetscCall(ISGetSize(rowp, &nrows)); 2067 PetscCall(ISGetIndices(rowp, &rows)); 2068 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2069 PetscCall(ISRestoreIndices(rowp, &rows)); 2070 } 2071 PetscCall(ISSetPermutation(crowp)); 2072 /* make a local version of 'colp' */ 2073 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2074 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2075 if (size == 1) { 2076 lcolp = colp; 2077 } else { 2078 PetscCall(ISAllGather(colp, &lcolp)); 2079 } 2080 PetscCall(ISSetPermutation(lcolp)); 2081 /* now we just get the submatrix */ 2082 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2083 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B)); 2084 /* clean up */ 2085 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2086 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2087 PetscFunctionReturn(0); 2088 } 2089 2090 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2091 { 2092 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2093 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2094 2095 PetscFunctionBegin; 2096 if (nghosts) *nghosts = B->nbs; 2097 if (ghosts) *ghosts = baij->garray; 2098 PetscFunctionReturn(0); 2099 } 2100 2101 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2102 { 2103 Mat B; 2104 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2105 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2106 Mat_SeqAIJ *b; 2107 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2108 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2109 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2110 2111 PetscFunctionBegin; 2112 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2113 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2114 2115 /* ---------------------------------------------------------------- 2116 Tell every processor the number of nonzeros per row 2117 */ 2118 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2119 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2120 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2121 displs = recvcounts + size; 2122 for (i = 0; i < size; i++) { 2123 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2124 displs[i] = A->rmap->range[i] / bs; 2125 } 2126 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2127 /* --------------------------------------------------------------- 2128 Create the sequential matrix of the same type as the local block diagonal 2129 */ 2130 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2131 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2132 PetscCall(MatSetType(B, MATSEQAIJ)); 2133 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2134 b = (Mat_SeqAIJ *)B->data; 2135 2136 /*-------------------------------------------------------------------- 2137 Copy my part of matrix column indices over 2138 */ 2139 sendcount = ad->nz + bd->nz; 2140 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2141 a_jsendbuf = ad->j; 2142 b_jsendbuf = bd->j; 2143 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2144 cnt = 0; 2145 for (i = 0; i < n; i++) { 2146 /* put in lower diagonal portion */ 2147 m = bd->i[i + 1] - bd->i[i]; 2148 while (m > 0) { 2149 /* is it above diagonal (in bd (compressed) numbering) */ 2150 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2151 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2152 m--; 2153 } 2154 2155 /* put in diagonal portion */ 2156 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2157 2158 /* put in upper diagonal portion */ 2159 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2160 } 2161 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2162 2163 /*-------------------------------------------------------------------- 2164 Gather all column indices to all processors 2165 */ 2166 for (i = 0; i < size; i++) { 2167 recvcounts[i] = 0; 2168 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2169 } 2170 displs[0] = 0; 2171 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2172 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2173 /*-------------------------------------------------------------------- 2174 Assemble the matrix into useable form (note numerical values not yet set) 2175 */ 2176 /* set the b->ilen (length of each row) values */ 2177 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2178 /* set the b->i indices */ 2179 b->i[0] = 0; 2180 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2181 PetscCall(PetscFree(lens)); 2182 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2183 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2184 PetscCall(PetscFree(recvcounts)); 2185 2186 PetscCall(MatPropagateSymmetryOptions(A, B)); 2187 *newmat = B; 2188 PetscFunctionReturn(0); 2189 } 2190 2191 PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2192 { 2193 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2194 Vec bb1 = NULL; 2195 2196 PetscFunctionBegin; 2197 if (flag == SOR_APPLY_UPPER) { 2198 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2199 PetscFunctionReturn(0); 2200 } 2201 2202 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2203 2204 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2205 if (flag & SOR_ZERO_INITIAL_GUESS) { 2206 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2207 its--; 2208 } 2209 2210 while (its--) { 2211 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2212 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2213 2214 /* update rhs: bb1 = bb - B*x */ 2215 PetscCall(VecScale(mat->lvec, -1.0)); 2216 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2217 2218 /* local sweep */ 2219 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2220 } 2221 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2222 if (flag & SOR_ZERO_INITIAL_GUESS) { 2223 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2224 its--; 2225 } 2226 while (its--) { 2227 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2228 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2229 2230 /* update rhs: bb1 = bb - B*x */ 2231 PetscCall(VecScale(mat->lvec, -1.0)); 2232 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2233 2234 /* local sweep */ 2235 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2236 } 2237 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2238 if (flag & SOR_ZERO_INITIAL_GUESS) { 2239 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2240 its--; 2241 } 2242 while (its--) { 2243 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2244 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2245 2246 /* update rhs: bb1 = bb - B*x */ 2247 PetscCall(VecScale(mat->lvec, -1.0)); 2248 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2249 2250 /* local sweep */ 2251 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2252 } 2253 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2254 2255 PetscCall(VecDestroy(&bb1)); 2256 PetscFunctionReturn(0); 2257 } 2258 2259 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2260 { 2261 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2262 PetscInt m, N, i, *garray = aij->garray; 2263 PetscInt ib, jb, bs = A->rmap->bs; 2264 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2265 MatScalar *a_val = a_aij->a; 2266 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2267 MatScalar *b_val = b_aij->a; 2268 PetscReal *work; 2269 2270 PetscFunctionBegin; 2271 PetscCall(MatGetSize(A, &m, &N)); 2272 PetscCall(PetscCalloc1(N, &work)); 2273 if (type == NORM_2) { 2274 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2275 for (jb = 0; jb < bs; jb++) { 2276 for (ib = 0; ib < bs; ib++) { 2277 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2278 a_val++; 2279 } 2280 } 2281 } 2282 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2283 for (jb = 0; jb < bs; jb++) { 2284 for (ib = 0; ib < bs; ib++) { 2285 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2286 b_val++; 2287 } 2288 } 2289 } 2290 } else if (type == NORM_1) { 2291 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2292 for (jb = 0; jb < bs; jb++) { 2293 for (ib = 0; ib < bs; ib++) { 2294 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2295 a_val++; 2296 } 2297 } 2298 } 2299 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2300 for (jb = 0; jb < bs; jb++) { 2301 for (ib = 0; ib < bs; ib++) { 2302 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2303 b_val++; 2304 } 2305 } 2306 } 2307 } else if (type == NORM_INFINITY) { 2308 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2309 for (jb = 0; jb < bs; jb++) { 2310 for (ib = 0; ib < bs; ib++) { 2311 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2312 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2313 a_val++; 2314 } 2315 } 2316 } 2317 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2318 for (jb = 0; jb < bs; jb++) { 2319 for (ib = 0; ib < bs; ib++) { 2320 int col = garray[b_aij->j[i]] * bs + jb; 2321 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2322 b_val++; 2323 } 2324 } 2325 } 2326 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2327 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2328 for (jb = 0; jb < bs; jb++) { 2329 for (ib = 0; ib < bs; ib++) { 2330 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2331 a_val++; 2332 } 2333 } 2334 } 2335 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2336 for (jb = 0; jb < bs; jb++) { 2337 for (ib = 0; ib < bs; ib++) { 2338 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2339 b_val++; 2340 } 2341 } 2342 } 2343 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2344 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2345 for (jb = 0; jb < bs; jb++) { 2346 for (ib = 0; ib < bs; ib++) { 2347 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2348 a_val++; 2349 } 2350 } 2351 } 2352 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2353 for (jb = 0; jb < bs; jb++) { 2354 for (ib = 0; ib < bs; ib++) { 2355 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2356 b_val++; 2357 } 2358 } 2359 } 2360 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2361 if (type == NORM_INFINITY) { 2362 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2363 } else { 2364 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2365 } 2366 PetscCall(PetscFree(work)); 2367 if (type == NORM_2) { 2368 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2369 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2370 for (i = 0; i < N; i++) reductions[i] /= m; 2371 } 2372 PetscFunctionReturn(0); 2373 } 2374 2375 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2376 { 2377 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2378 2379 PetscFunctionBegin; 2380 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2381 A->factorerrortype = a->A->factorerrortype; 2382 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2383 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2384 PetscFunctionReturn(0); 2385 } 2386 2387 PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2388 { 2389 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2390 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2391 2392 PetscFunctionBegin; 2393 if (!Y->preallocated) { 2394 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2395 } else if (!aij->nz) { 2396 PetscInt nonew = aij->nonew; 2397 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2398 aij->nonew = nonew; 2399 } 2400 PetscCall(MatShift_Basic(Y, a)); 2401 PetscFunctionReturn(0); 2402 } 2403 2404 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2405 { 2406 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2407 2408 PetscFunctionBegin; 2409 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2410 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2411 if (d) { 2412 PetscInt rstart; 2413 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2414 *d += rstart / A->rmap->bs; 2415 } 2416 PetscFunctionReturn(0); 2417 } 2418 2419 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2420 { 2421 PetscFunctionBegin; 2422 *a = ((Mat_MPIBAIJ *)A->data)->A; 2423 PetscFunctionReturn(0); 2424 } 2425 2426 /* -------------------------------------------------------------------*/ 2427 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2428 MatGetRow_MPIBAIJ, 2429 MatRestoreRow_MPIBAIJ, 2430 MatMult_MPIBAIJ, 2431 /* 4*/ MatMultAdd_MPIBAIJ, 2432 MatMultTranspose_MPIBAIJ, 2433 MatMultTransposeAdd_MPIBAIJ, 2434 NULL, 2435 NULL, 2436 NULL, 2437 /*10*/ NULL, 2438 NULL, 2439 NULL, 2440 MatSOR_MPIBAIJ, 2441 MatTranspose_MPIBAIJ, 2442 /*15*/ MatGetInfo_MPIBAIJ, 2443 MatEqual_MPIBAIJ, 2444 MatGetDiagonal_MPIBAIJ, 2445 MatDiagonalScale_MPIBAIJ, 2446 MatNorm_MPIBAIJ, 2447 /*20*/ MatAssemblyBegin_MPIBAIJ, 2448 MatAssemblyEnd_MPIBAIJ, 2449 MatSetOption_MPIBAIJ, 2450 MatZeroEntries_MPIBAIJ, 2451 /*24*/ MatZeroRows_MPIBAIJ, 2452 NULL, 2453 NULL, 2454 NULL, 2455 NULL, 2456 /*29*/ MatSetUp_MPIBAIJ, 2457 NULL, 2458 NULL, 2459 MatGetDiagonalBlock_MPIBAIJ, 2460 NULL, 2461 /*34*/ MatDuplicate_MPIBAIJ, 2462 NULL, 2463 NULL, 2464 NULL, 2465 NULL, 2466 /*39*/ MatAXPY_MPIBAIJ, 2467 MatCreateSubMatrices_MPIBAIJ, 2468 MatIncreaseOverlap_MPIBAIJ, 2469 MatGetValues_MPIBAIJ, 2470 MatCopy_MPIBAIJ, 2471 /*44*/ NULL, 2472 MatScale_MPIBAIJ, 2473 MatShift_MPIBAIJ, 2474 NULL, 2475 MatZeroRowsColumns_MPIBAIJ, 2476 /*49*/ NULL, 2477 NULL, 2478 NULL, 2479 NULL, 2480 NULL, 2481 /*54*/ MatFDColoringCreate_MPIXAIJ, 2482 NULL, 2483 MatSetUnfactored_MPIBAIJ, 2484 MatPermute_MPIBAIJ, 2485 MatSetValuesBlocked_MPIBAIJ, 2486 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2487 MatDestroy_MPIBAIJ, 2488 MatView_MPIBAIJ, 2489 NULL, 2490 NULL, 2491 /*64*/ NULL, 2492 NULL, 2493 NULL, 2494 NULL, 2495 NULL, 2496 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2497 NULL, 2498 NULL, 2499 NULL, 2500 NULL, 2501 /*74*/ NULL, 2502 MatFDColoringApply_BAIJ, 2503 NULL, 2504 NULL, 2505 NULL, 2506 /*79*/ NULL, 2507 NULL, 2508 NULL, 2509 NULL, 2510 MatLoad_MPIBAIJ, 2511 /*84*/ NULL, 2512 NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 /*89*/ NULL, 2517 NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 /*94*/ NULL, 2522 NULL, 2523 NULL, 2524 NULL, 2525 NULL, 2526 /*99*/ NULL, 2527 NULL, 2528 NULL, 2529 MatConjugate_MPIBAIJ, 2530 NULL, 2531 /*104*/ NULL, 2532 MatRealPart_MPIBAIJ, 2533 MatImaginaryPart_MPIBAIJ, 2534 NULL, 2535 NULL, 2536 /*109*/ NULL, 2537 NULL, 2538 NULL, 2539 NULL, 2540 MatMissingDiagonal_MPIBAIJ, 2541 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2542 NULL, 2543 MatGetGhosts_MPIBAIJ, 2544 NULL, 2545 NULL, 2546 /*119*/ NULL, 2547 NULL, 2548 NULL, 2549 NULL, 2550 MatGetMultiProcBlock_MPIBAIJ, 2551 /*124*/ NULL, 2552 MatGetColumnReductions_MPIBAIJ, 2553 MatInvertBlockDiagonal_MPIBAIJ, 2554 NULL, 2555 NULL, 2556 /*129*/ NULL, 2557 NULL, 2558 NULL, 2559 NULL, 2560 NULL, 2561 /*134*/ NULL, 2562 NULL, 2563 NULL, 2564 NULL, 2565 NULL, 2566 /*139*/ MatSetBlockSizes_Default, 2567 NULL, 2568 NULL, 2569 MatFDColoringSetUp_MPIXAIJ, 2570 NULL, 2571 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2572 NULL, 2573 NULL, 2574 NULL, 2575 NULL, 2576 NULL, 2577 /*150*/ NULL, 2578 NULL}; 2579 2580 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2581 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2582 2583 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2584 { 2585 PetscInt m, rstart, cstart, cend; 2586 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2587 const PetscInt *JJ = NULL; 2588 PetscScalar *values = NULL; 2589 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2590 PetscBool nooffprocentries; 2591 2592 PetscFunctionBegin; 2593 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2594 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2595 PetscCall(PetscLayoutSetUp(B->rmap)); 2596 PetscCall(PetscLayoutSetUp(B->cmap)); 2597 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2598 m = B->rmap->n / bs; 2599 rstart = B->rmap->rstart / bs; 2600 cstart = B->cmap->rstart / bs; 2601 cend = B->cmap->rend / bs; 2602 2603 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2604 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2605 for (i = 0; i < m; i++) { 2606 nz = ii[i + 1] - ii[i]; 2607 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2608 nz_max = PetscMax(nz_max, nz); 2609 dlen = 0; 2610 olen = 0; 2611 JJ = jj + ii[i]; 2612 for (j = 0; j < nz; j++) { 2613 if (*JJ < cstart || *JJ >= cend) olen++; 2614 else dlen++; 2615 JJ++; 2616 } 2617 d_nnz[i] = dlen; 2618 o_nnz[i] = olen; 2619 } 2620 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2621 PetscCall(PetscFree2(d_nnz, o_nnz)); 2622 2623 values = (PetscScalar *)V; 2624 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2625 for (i = 0; i < m; i++) { 2626 PetscInt row = i + rstart; 2627 PetscInt ncols = ii[i + 1] - ii[i]; 2628 const PetscInt *icols = jj + ii[i]; 2629 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2630 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2631 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2632 } else { /* block ordering does not match so we can only insert one block at a time. */ 2633 PetscInt j; 2634 for (j = 0; j < ncols; j++) { 2635 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2636 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2637 } 2638 } 2639 } 2640 2641 if (!V) PetscCall(PetscFree(values)); 2642 nooffprocentries = B->nooffprocentries; 2643 B->nooffprocentries = PETSC_TRUE; 2644 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2645 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2646 B->nooffprocentries = nooffprocentries; 2647 2648 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2649 PetscFunctionReturn(0); 2650 } 2651 2652 /*@C 2653 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2654 2655 Collective 2656 2657 Input Parameters: 2658 + B - the matrix 2659 . bs - the block size 2660 . i - the indices into j for the start of each local row (starts with zero) 2661 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2662 - v - optional values in the matrix 2663 2664 Level: advanced 2665 2666 Notes: 2667 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2668 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2669 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2670 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2671 block column and the second index is over columns within a block. 2672 2673 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2674 2675 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ` 2676 @*/ 2677 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2678 { 2679 PetscFunctionBegin; 2680 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2681 PetscValidType(B, 1); 2682 PetscValidLogicalCollectiveInt(B, bs, 2); 2683 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2684 PetscFunctionReturn(0); 2685 } 2686 2687 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2688 { 2689 Mat_MPIBAIJ *b; 2690 PetscInt i; 2691 PetscMPIInt size; 2692 2693 PetscFunctionBegin; 2694 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2695 PetscCall(PetscLayoutSetUp(B->rmap)); 2696 PetscCall(PetscLayoutSetUp(B->cmap)); 2697 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2698 2699 if (d_nnz) { 2700 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2701 } 2702 if (o_nnz) { 2703 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2704 } 2705 2706 b = (Mat_MPIBAIJ *)B->data; 2707 b->bs2 = bs * bs; 2708 b->mbs = B->rmap->n / bs; 2709 b->nbs = B->cmap->n / bs; 2710 b->Mbs = B->rmap->N / bs; 2711 b->Nbs = B->cmap->N / bs; 2712 2713 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2714 b->rstartbs = B->rmap->rstart / bs; 2715 b->rendbs = B->rmap->rend / bs; 2716 b->cstartbs = B->cmap->rstart / bs; 2717 b->cendbs = B->cmap->rend / bs; 2718 2719 #if defined(PETSC_USE_CTABLE) 2720 PetscCall(PetscHMapIDestroy(&b->colmap)); 2721 #else 2722 PetscCall(PetscFree(b->colmap)); 2723 #endif 2724 PetscCall(PetscFree(b->garray)); 2725 PetscCall(VecDestroy(&b->lvec)); 2726 PetscCall(VecScatterDestroy(&b->Mvctx)); 2727 2728 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2729 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2730 PetscCall(MatDestroy(&b->B)); 2731 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2732 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2733 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2734 2735 if (!B->preallocated) { 2736 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2737 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2738 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2739 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2740 } 2741 2742 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2743 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2744 B->preallocated = PETSC_TRUE; 2745 B->was_assembled = PETSC_FALSE; 2746 B->assembled = PETSC_FALSE; 2747 PetscFunctionReturn(0); 2748 } 2749 2750 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2751 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2752 2753 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2754 { 2755 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2756 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2757 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2758 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2759 2760 PetscFunctionBegin; 2761 PetscCall(PetscMalloc1(M + 1, &ii)); 2762 ii[0] = 0; 2763 for (i = 0; i < M; i++) { 2764 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2765 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2766 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2767 /* remove one from count of matrix has diagonal */ 2768 for (j = id[i]; j < id[i + 1]; j++) { 2769 if (jd[j] == i) { 2770 ii[i + 1]--; 2771 break; 2772 } 2773 } 2774 } 2775 PetscCall(PetscMalloc1(ii[M], &jj)); 2776 cnt = 0; 2777 for (i = 0; i < M; i++) { 2778 for (j = io[i]; j < io[i + 1]; j++) { 2779 if (garray[jo[j]] > rstart) break; 2780 jj[cnt++] = garray[jo[j]]; 2781 } 2782 for (k = id[i]; k < id[i + 1]; k++) { 2783 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2784 } 2785 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2786 } 2787 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2788 PetscFunctionReturn(0); 2789 } 2790 2791 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2792 2793 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2794 2795 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2796 { 2797 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2798 Mat_MPIAIJ *b; 2799 Mat B; 2800 2801 PetscFunctionBegin; 2802 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2803 2804 if (reuse == MAT_REUSE_MATRIX) { 2805 B = *newmat; 2806 } else { 2807 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2808 PetscCall(MatSetType(B, MATMPIAIJ)); 2809 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2810 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2811 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2812 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2813 } 2814 b = (Mat_MPIAIJ *)B->data; 2815 2816 if (reuse == MAT_REUSE_MATRIX) { 2817 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2818 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2819 } else { 2820 PetscCall(MatDestroy(&b->A)); 2821 PetscCall(MatDestroy(&b->B)); 2822 PetscCall(MatDisAssemble_MPIBAIJ(A)); 2823 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2824 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2825 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 2826 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 2827 } 2828 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2829 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2830 2831 if (reuse == MAT_INPLACE_MATRIX) { 2832 PetscCall(MatHeaderReplace(A, &B)); 2833 } else { 2834 *newmat = B; 2835 } 2836 PetscFunctionReturn(0); 2837 } 2838 2839 /*MC 2840 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2841 2842 Options Database Keys: 2843 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2844 . -mat_block_size <bs> - set the blocksize used to store the matrix 2845 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2846 - -mat_use_hash_table <fact> - set hash table factor 2847 2848 Level: beginner 2849 2850 Note: 2851 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 2852 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2853 2854 .seealso: MATBAIJ`, MATSEQBAIJ`, `MatCreateBAIJ` 2855 M*/ 2856 2857 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2858 2859 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2860 { 2861 Mat_MPIBAIJ *b; 2862 PetscBool flg = PETSC_FALSE; 2863 2864 PetscFunctionBegin; 2865 PetscCall(PetscNew(&b)); 2866 B->data = (void *)b; 2867 2868 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 2869 B->assembled = PETSC_FALSE; 2870 2871 B->insertmode = NOT_SET_VALUES; 2872 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2873 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2874 2875 /* build local table of row and column ownerships */ 2876 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2877 2878 /* build cache for off array entries formed */ 2879 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2880 2881 b->donotstash = PETSC_FALSE; 2882 b->colmap = NULL; 2883 b->garray = NULL; 2884 b->roworiented = PETSC_TRUE; 2885 2886 /* stuff used in block assembly */ 2887 b->barray = NULL; 2888 2889 /* stuff used for matrix vector multiply */ 2890 b->lvec = NULL; 2891 b->Mvctx = NULL; 2892 2893 /* stuff for MatGetRow() */ 2894 b->rowindices = NULL; 2895 b->rowvalues = NULL; 2896 b->getrowactive = PETSC_FALSE; 2897 2898 /* hash table stuff */ 2899 b->ht = NULL; 2900 b->hd = NULL; 2901 b->ht_size = 0; 2902 b->ht_flag = PETSC_FALSE; 2903 b->ht_fact = 0; 2904 b->ht_total_ct = 0; 2905 b->ht_insert_ct = 0; 2906 2907 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2908 b->ijonly = PETSC_FALSE; 2909 2910 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2912 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2913 #if defined(PETSC_HAVE_HYPRE) 2914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2915 #endif 2916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2923 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2924 2925 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2926 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2927 if (flg) { 2928 PetscReal fact = 1.39; 2929 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2930 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2931 if (fact <= 1.0) fact = 1.39; 2932 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2933 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2934 } 2935 PetscOptionsEnd(); 2936 PetscFunctionReturn(0); 2937 } 2938 2939 /*MC 2940 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2941 2942 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2943 and `MATMPIBAIJ` otherwise. 2944 2945 Options Database Keys: 2946 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2947 2948 Level: beginner 2949 2950 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2951 M*/ 2952 2953 /*@C 2954 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2955 (block compressed row). For good matrix assembly performance 2956 the user should preallocate the matrix storage by setting the parameters 2957 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2958 performance can be increased by more than a factor of 50. 2959 2960 Collective on Mat 2961 2962 Input Parameters: 2963 + B - the matrix 2964 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2965 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2966 . d_nz - number of block nonzeros per block row in diagonal portion of local 2967 submatrix (same for all local rows) 2968 . d_nnz - array containing the number of block nonzeros in the various block rows 2969 of the in diagonal portion of the local (possibly different for each block 2970 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 2971 set it even if it is zero. 2972 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2973 submatrix (same for all local rows). 2974 - o_nnz - array containing the number of nonzeros in the various block rows of the 2975 off-diagonal portion of the local submatrix (possibly different for 2976 each block row) or NULL. 2977 2978 If the *_nnz parameter is given then the *_nz parameter is ignored 2979 2980 Options Database Keys: 2981 + -mat_block_size - size of the blocks to use 2982 - -mat_use_hash_table <fact> - set hash table factor 2983 2984 Notes: 2985 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2986 than it must be used on all processors that share the object for that argument. 2987 2988 Storage Information: 2989 For a square global matrix we define each processor's diagonal portion 2990 to be its local rows and the corresponding columns (a square submatrix); 2991 each processor's off-diagonal portion encompasses the remainder of the 2992 local matrix (a rectangular submatrix). 2993 2994 The user can specify preallocated storage for the diagonal part of 2995 the local submatrix with either d_nz or d_nnz (not both). Set 2996 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2997 memory allocation. Likewise, specify preallocated storage for the 2998 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2999 3000 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3001 the figure below we depict these three local rows and all columns (0-11). 3002 3003 .vb 3004 0 1 2 3 4 5 6 7 8 9 10 11 3005 -------------------------- 3006 row 3 |o o o d d d o o o o o o 3007 row 4 |o o o d d d o o o o o o 3008 row 5 |o o o d d d o o o o o o 3009 -------------------------- 3010 .ve 3011 3012 Thus, any entries in the d locations are stored in the d (diagonal) 3013 submatrix, and any entries in the o locations are stored in the 3014 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3015 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3016 3017 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3018 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3019 In general, for PDE problems in which most nonzeros are near the diagonal, 3020 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3021 or you will get TERRIBLE performance; see the users' manual chapter on 3022 matrices. 3023 3024 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3025 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3026 You can also run with the option -info and look for messages with the string 3027 malloc in them to see if additional memory allocation was needed. 3028 3029 Level: intermediate 3030 3031 .seealso: `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3032 @*/ 3033 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3034 { 3035 PetscFunctionBegin; 3036 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3037 PetscValidType(B, 1); 3038 PetscValidLogicalCollectiveInt(B, bs, 2); 3039 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3040 PetscFunctionReturn(0); 3041 } 3042 3043 /*@C 3044 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3045 (block compressed row). For good matrix assembly performance 3046 the user should preallocate the matrix storage by setting the parameters 3047 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3048 performance can be increased by more than a factor of 50. 3049 3050 Collective 3051 3052 Input Parameters: 3053 + comm - MPI communicator 3054 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3055 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3056 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3057 This value should be the same as the local size used in creating the 3058 y vector for the matrix-vector product y = Ax. 3059 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3060 This value should be the same as the local size used in creating the 3061 x vector for the matrix-vector product y = Ax. 3062 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3063 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3064 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3065 submatrix (same for all local rows) 3066 . d_nnz - array containing the number of nonzero blocks in the various block rows 3067 of the in diagonal portion of the local (possibly different for each block 3068 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3069 and set it even if it is zero. 3070 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3071 submatrix (same for all local rows). 3072 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3073 off-diagonal portion of the local submatrix (possibly different for 3074 each block row) or NULL. 3075 3076 Output Parameter: 3077 . A - the matrix 3078 3079 Options Database Keys: 3080 + -mat_block_size - size of the blocks to use 3081 - -mat_use_hash_table <fact> - set hash table factor 3082 3083 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3084 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3085 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3086 3087 Notes: 3088 If the *_nnz parameter is given then the *_nz parameter is ignored 3089 3090 A nonzero block is any block that as 1 or more nonzeros in it 3091 3092 The user MUST specify either the local or global matrix dimensions 3093 (possibly both). 3094 3095 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3096 than it must be used on all processors that share the object for that argument. 3097 3098 Storage Information: 3099 For a square global matrix we define each processor's diagonal portion 3100 to be its local rows and the corresponding columns (a square submatrix); 3101 each processor's off-diagonal portion encompasses the remainder of the 3102 local matrix (a rectangular submatrix). 3103 3104 The user can specify preallocated storage for the diagonal part of 3105 the local submatrix with either d_nz or d_nnz (not both). Set 3106 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3107 memory allocation. Likewise, specify preallocated storage for the 3108 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3109 3110 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3111 the figure below we depict these three local rows and all columns (0-11). 3112 3113 .vb 3114 0 1 2 3 4 5 6 7 8 9 10 11 3115 -------------------------- 3116 row 3 |o o o d d d o o o o o o 3117 row 4 |o o o d d d o o o o o o 3118 row 5 |o o o d d d o o o o o o 3119 -------------------------- 3120 .ve 3121 3122 Thus, any entries in the d locations are stored in the d (diagonal) 3123 submatrix, and any entries in the o locations are stored in the 3124 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3125 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3126 3127 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3128 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3129 In general, for PDE problems in which most nonzeros are near the diagonal, 3130 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3131 or you will get TERRIBLE performance; see the users' manual chapter on 3132 matrices. 3133 3134 Level: intermediate 3135 3136 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3137 @*/ 3138 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3139 { 3140 PetscMPIInt size; 3141 3142 PetscFunctionBegin; 3143 PetscCall(MatCreate(comm, A)); 3144 PetscCall(MatSetSizes(*A, m, n, M, N)); 3145 PetscCallMPI(MPI_Comm_size(comm, &size)); 3146 if (size > 1) { 3147 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3148 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3149 } else { 3150 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3151 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3152 } 3153 PetscFunctionReturn(0); 3154 } 3155 3156 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3157 { 3158 Mat mat; 3159 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3160 PetscInt len = 0; 3161 3162 PetscFunctionBegin; 3163 *newmat = NULL; 3164 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3165 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3166 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3167 3168 mat->factortype = matin->factortype; 3169 mat->preallocated = PETSC_TRUE; 3170 mat->assembled = PETSC_TRUE; 3171 mat->insertmode = NOT_SET_VALUES; 3172 3173 a = (Mat_MPIBAIJ *)mat->data; 3174 mat->rmap->bs = matin->rmap->bs; 3175 a->bs2 = oldmat->bs2; 3176 a->mbs = oldmat->mbs; 3177 a->nbs = oldmat->nbs; 3178 a->Mbs = oldmat->Mbs; 3179 a->Nbs = oldmat->Nbs; 3180 3181 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3182 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3183 3184 a->size = oldmat->size; 3185 a->rank = oldmat->rank; 3186 a->donotstash = oldmat->donotstash; 3187 a->roworiented = oldmat->roworiented; 3188 a->rowindices = NULL; 3189 a->rowvalues = NULL; 3190 a->getrowactive = PETSC_FALSE; 3191 a->barray = NULL; 3192 a->rstartbs = oldmat->rstartbs; 3193 a->rendbs = oldmat->rendbs; 3194 a->cstartbs = oldmat->cstartbs; 3195 a->cendbs = oldmat->cendbs; 3196 3197 /* hash table stuff */ 3198 a->ht = NULL; 3199 a->hd = NULL; 3200 a->ht_size = 0; 3201 a->ht_flag = oldmat->ht_flag; 3202 a->ht_fact = oldmat->ht_fact; 3203 a->ht_total_ct = 0; 3204 a->ht_insert_ct = 0; 3205 3206 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3207 if (oldmat->colmap) { 3208 #if defined(PETSC_USE_CTABLE) 3209 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3210 #else 3211 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3212 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3213 #endif 3214 } else a->colmap = NULL; 3215 3216 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 3217 PetscCall(PetscMalloc1(len, &a->garray)); 3218 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3219 } else a->garray = NULL; 3220 3221 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3222 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3223 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3224 3225 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3226 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3227 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3228 *newmat = mat; 3229 PetscFunctionReturn(0); 3230 } 3231 3232 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3233 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3234 { 3235 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3236 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3237 PetscScalar *matvals; 3238 3239 PetscFunctionBegin; 3240 PetscCall(PetscViewerSetUp(viewer)); 3241 3242 /* read in matrix header */ 3243 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3244 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3245 M = header[1]; 3246 N = header[2]; 3247 nz = header[3]; 3248 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3249 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3250 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3251 3252 /* set block sizes from the viewer's .info file */ 3253 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3254 /* set local sizes if not set already */ 3255 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3256 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3257 /* set global sizes if not set already */ 3258 if (mat->rmap->N < 0) mat->rmap->N = M; 3259 if (mat->cmap->N < 0) mat->cmap->N = N; 3260 PetscCall(PetscLayoutSetUp(mat->rmap)); 3261 PetscCall(PetscLayoutSetUp(mat->cmap)); 3262 3263 /* check if the matrix sizes are correct */ 3264 PetscCall(MatGetSize(mat, &rows, &cols)); 3265 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3266 PetscCall(MatGetBlockSize(mat, &bs)); 3267 PetscCall(MatGetLocalSize(mat, &m, &n)); 3268 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3269 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3270 mbs = m / bs; 3271 nbs = n / bs; 3272 3273 /* read in row lengths and build row indices */ 3274 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3275 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3276 rowidxs[0] = 0; 3277 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3278 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3279 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3280 3281 /* read in column indices and matrix values */ 3282 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3283 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3284 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3285 3286 { /* preallocate matrix storage */ 3287 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3288 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3289 PetscBool sbaij, done; 3290 PetscInt *d_nnz, *o_nnz; 3291 3292 PetscCall(PetscBTCreate(nbs, &bt)); 3293 PetscCall(PetscHSetICreate(&ht)); 3294 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3295 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3296 for (i = 0; i < mbs; i++) { 3297 PetscCall(PetscBTMemzero(nbs, bt)); 3298 PetscCall(PetscHSetIClear(ht)); 3299 for (k = 0; k < bs; k++) { 3300 PetscInt row = bs * i + k; 3301 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3302 PetscInt col = colidxs[j]; 3303 if (!sbaij || col >= row) { 3304 if (col >= cs && col < ce) { 3305 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3306 } else { 3307 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3308 if (done) o_nnz[i]++; 3309 } 3310 } 3311 } 3312 } 3313 } 3314 PetscCall(PetscBTDestroy(&bt)); 3315 PetscCall(PetscHSetIDestroy(&ht)); 3316 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3317 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3318 PetscCall(PetscFree2(d_nnz, o_nnz)); 3319 } 3320 3321 /* store matrix values */ 3322 for (i = 0; i < m; i++) { 3323 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3324 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3325 } 3326 3327 PetscCall(PetscFree(rowidxs)); 3328 PetscCall(PetscFree2(colidxs, matvals)); 3329 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3330 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3331 PetscFunctionReturn(0); 3332 } 3333 3334 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3335 { 3336 PetscBool isbinary; 3337 3338 PetscFunctionBegin; 3339 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3340 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3341 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3342 PetscFunctionReturn(0); 3343 } 3344 3345 /*@ 3346 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3347 3348 Input Parameters: 3349 + mat - the matrix 3350 - fact - factor 3351 3352 Options Database Key: 3353 . -mat_use_hash_table <fact> - provide the factor 3354 3355 Level: advanced 3356 3357 .seealso: `MATMPIBAIJ`, `MatSetOption()` 3358 @*/ 3359 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3360 { 3361 PetscFunctionBegin; 3362 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3363 PetscFunctionReturn(0); 3364 } 3365 3366 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3367 { 3368 Mat_MPIBAIJ *baij; 3369 3370 PetscFunctionBegin; 3371 baij = (Mat_MPIBAIJ *)mat->data; 3372 baij->ht_fact = fact; 3373 PetscFunctionReturn(0); 3374 } 3375 3376 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3377 { 3378 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3379 PetscBool flg; 3380 3381 PetscFunctionBegin; 3382 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3383 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3384 if (Ad) *Ad = a->A; 3385 if (Ao) *Ao = a->B; 3386 if (colmap) *colmap = a->garray; 3387 PetscFunctionReturn(0); 3388 } 3389 3390 /* 3391 Special version for direct calls from Fortran (to eliminate two function call overheads 3392 */ 3393 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3394 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3395 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3396 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3397 #endif 3398 3399 /*@C 3400 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3401 3402 Collective on Mat 3403 3404 Input Parameters: 3405 + mat - the matrix 3406 . min - number of input rows 3407 . im - input rows 3408 . nin - number of input columns 3409 . in - input columns 3410 . v - numerical values input 3411 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3412 3413 Developer Note: 3414 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3415 3416 Level: advanced 3417 3418 .seealso: `MatSetValuesBlocked()` 3419 @*/ 3420 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3421 { 3422 /* convert input arguments to C version */ 3423 Mat mat = *matin; 3424 PetscInt m = *min, n = *nin; 3425 InsertMode addv = *addvin; 3426 3427 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3428 const MatScalar *value; 3429 MatScalar *barray = baij->barray; 3430 PetscBool roworiented = baij->roworiented; 3431 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3432 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3433 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3434 3435 PetscFunctionBegin; 3436 /* tasks normally handled by MatSetValuesBlocked() */ 3437 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3438 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3439 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3440 if (mat->assembled) { 3441 mat->was_assembled = PETSC_TRUE; 3442 mat->assembled = PETSC_FALSE; 3443 } 3444 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3445 3446 if (!barray) { 3447 PetscCall(PetscMalloc1(bs2, &barray)); 3448 baij->barray = barray; 3449 } 3450 3451 if (roworiented) stepval = (n - 1) * bs; 3452 else stepval = (m - 1) * bs; 3453 3454 for (i = 0; i < m; i++) { 3455 if (im[i] < 0) continue; 3456 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3457 if (im[i] >= rstart && im[i] < rend) { 3458 row = im[i] - rstart; 3459 for (j = 0; j < n; j++) { 3460 /* If NumCol = 1 then a copy is not required */ 3461 if ((roworiented) && (n == 1)) { 3462 barray = (MatScalar *)v + i * bs2; 3463 } else if ((!roworiented) && (m == 1)) { 3464 barray = (MatScalar *)v + j * bs2; 3465 } else { /* Here a copy is required */ 3466 if (roworiented) { 3467 value = v + i * (stepval + bs) * bs + j * bs; 3468 } else { 3469 value = v + j * (stepval + bs) * bs + i * bs; 3470 } 3471 for (ii = 0; ii < bs; ii++, value += stepval) { 3472 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3473 } 3474 barray -= bs2; 3475 } 3476 3477 if (in[j] >= cstart && in[j] < cend) { 3478 col = in[j] - cstart; 3479 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3480 } else if (in[j] < 0) { 3481 continue; 3482 } else { 3483 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3484 if (mat->was_assembled) { 3485 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3486 3487 #if defined(PETSC_USE_DEBUG) 3488 #if defined(PETSC_USE_CTABLE) 3489 { 3490 PetscInt data; 3491 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3492 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3493 } 3494 #else 3495 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3496 #endif 3497 #endif 3498 #if defined(PETSC_USE_CTABLE) 3499 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3500 col = (col - 1) / bs; 3501 #else 3502 col = (baij->colmap[in[j]] - 1) / bs; 3503 #endif 3504 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 3505 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3506 col = in[j]; 3507 } 3508 } else col = in[j]; 3509 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3510 } 3511 } 3512 } else { 3513 if (!baij->donotstash) { 3514 if (roworiented) { 3515 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3516 } else { 3517 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3518 } 3519 } 3520 } 3521 } 3522 3523 /* task normally handled by MatSetValuesBlocked() */ 3524 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3525 PetscFunctionReturn(0); 3526 } 3527 3528 /*@ 3529 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block 3530 CSR format the local rows. 3531 3532 Collective 3533 3534 Input Parameters: 3535 + comm - MPI communicator 3536 . bs - the block size, only a block size of 1 is supported 3537 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3538 . n - This value should be the same as the local size used in creating the 3539 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 3540 calculated if N is given) For square matrices n is almost always m. 3541 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3542 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3543 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3544 . j - column indices 3545 - a - matrix values 3546 3547 Output Parameter: 3548 . mat - the matrix 3549 3550 Level: intermediate 3551 3552 Notes: 3553 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3554 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3555 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3556 3557 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3558 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3559 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3560 with column-major ordering within blocks. 3561 3562 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3563 3564 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3565 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3566 @*/ 3567 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3568 { 3569 PetscFunctionBegin; 3570 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3571 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3572 PetscCall(MatCreate(comm, mat)); 3573 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3574 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3575 PetscCall(MatSetBlockSize(*mat, bs)); 3576 PetscCall(MatSetUp(*mat)); 3577 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3578 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3579 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3580 PetscFunctionReturn(0); 3581 } 3582 3583 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3584 { 3585 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3586 PetscInt *indx; 3587 PetscScalar *values; 3588 3589 PetscFunctionBegin; 3590 PetscCall(MatGetSize(inmat, &m, &N)); 3591 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3592 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3593 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3594 PetscInt *bindx, rmax = a->rmax, j; 3595 PetscMPIInt rank, size; 3596 3597 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3598 mbs = m / bs; 3599 Nbs = N / cbs; 3600 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3601 nbs = n / cbs; 3602 3603 PetscCall(PetscMalloc1(rmax, &bindx)); 3604 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3605 3606 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3607 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3608 if (rank == size - 1) { 3609 /* Check sum(nbs) = Nbs */ 3610 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3611 } 3612 3613 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3614 for (i = 0; i < mbs; i++) { 3615 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3616 nnz = nnz / bs; 3617 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3618 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3619 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3620 } 3621 PetscCall(PetscFree(bindx)); 3622 3623 PetscCall(MatCreate(comm, outmat)); 3624 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3625 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3626 PetscCall(MatSetType(*outmat, MATBAIJ)); 3627 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3628 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3629 MatPreallocateEnd(dnz, onz); 3630 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3631 } 3632 3633 /* numeric phase */ 3634 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3635 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3636 3637 for (i = 0; i < m; i++) { 3638 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3639 Ii = i + rstart; 3640 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3641 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3642 } 3643 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3644 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3645 PetscFunctionReturn(0); 3646 } 3647