1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 14 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 15 PetscScalar *va, *vv; 16 Vec vB, vA; 17 const PetscScalar *vb; 18 19 PetscFunctionBegin; 20 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 21 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 22 23 PetscCall(VecGetArrayWrite(vA, &va)); 24 if (idx) { 25 for (i = 0; i < m; i++) { 26 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 27 } 28 } 29 30 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 31 PetscCall(PetscMalloc1(m, &idxb)); 32 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 33 34 PetscCall(VecGetArrayWrite(v, &vv)); 35 PetscCall(VecGetArrayRead(vB, &vb)); 36 for (i = 0; i < m; i++) { 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 38 vv[i] = vb[i]; 39 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 40 } else { 41 vv[i] = va[i]; 42 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 43 } 44 } 45 PetscCall(VecRestoreArrayWrite(vA, &vv)); 46 PetscCall(VecRestoreArrayWrite(vA, &va)); 47 PetscCall(VecRestoreArrayRead(vB, &vb)); 48 PetscCall(PetscFree(idxb)); 49 PetscCall(VecDestroy(&vA)); 50 PetscCall(VecDestroy(&vB)); 51 PetscFunctionReturn(0); 52 } 53 54 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 55 { 56 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 57 58 PetscFunctionBegin; 59 PetscCall(MatStoreValues(aij->A)); 60 PetscCall(MatStoreValues(aij->B)); 61 PetscFunctionReturn(0); 62 } 63 64 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 65 { 66 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 67 68 PetscFunctionBegin; 69 PetscCall(MatRetrieveValues(aij->A)); 70 PetscCall(MatRetrieveValues(aij->B)); 71 PetscFunctionReturn(0); 72 } 73 74 /* 75 Local utility routine that creates a mapping from the global column 76 number to the local number in the off-diagonal part of the local 77 storage of the matrix. This is done in a non scalable way since the 78 length of colmap equals the global matrix length. 79 */ 80 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 81 { 82 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 83 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 84 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 85 86 PetscFunctionBegin; 87 #if defined(PETSC_USE_CTABLE) 88 PetscCall(PetscTableCreate(baij->nbs, baij->Nbs + 1, &baij->colmap)); 89 for (i = 0; i < nbs; i++) PetscCall(PetscTableAdd(baij->colmap, baij->garray[i] + 1, i * bs + 1, INSERT_VALUES)); 90 #else 91 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 92 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 93 #endif 94 PetscFunctionReturn(0); 95 } 96 97 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 98 { \ 99 brow = row / bs; \ 100 rp = aj + ai[brow]; \ 101 ap = aa + bs2 * ai[brow]; \ 102 rmax = aimax[brow]; \ 103 nrow = ailen[brow]; \ 104 bcol = col / bs; \ 105 ridx = row % bs; \ 106 cidx = col % bs; \ 107 low = 0; \ 108 high = nrow; \ 109 while (high - low > 3) { \ 110 t = (low + high) / 2; \ 111 if (rp[t] > bcol) high = t; \ 112 else low = t; \ 113 } \ 114 for (_i = low; _i < high; _i++) { \ 115 if (rp[_i] > bcol) break; \ 116 if (rp[_i] == bcol) { \ 117 bap = ap + bs2 * _i + bs * cidx + ridx; \ 118 if (addv == ADD_VALUES) *bap += value; \ 119 else *bap = value; \ 120 goto a_noinsert; \ 121 } \ 122 } \ 123 if (a->nonew == 1) goto a_noinsert; \ 124 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 125 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 126 N = nrow++ - 1; \ 127 /* shift up all the later entries in this row */ \ 128 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 129 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 130 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 131 rp[_i] = bcol; \ 132 ap[bs2 * _i + bs * cidx + ridx] = value; \ 133 a_noinsert:; \ 134 ailen[brow] = nrow; \ 135 } 136 137 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 138 { \ 139 brow = row / bs; \ 140 rp = bj + bi[brow]; \ 141 ap = ba + bs2 * bi[brow]; \ 142 rmax = bimax[brow]; \ 143 nrow = bilen[brow]; \ 144 bcol = col / bs; \ 145 ridx = row % bs; \ 146 cidx = col % bs; \ 147 low = 0; \ 148 high = nrow; \ 149 while (high - low > 3) { \ 150 t = (low + high) / 2; \ 151 if (rp[t] > bcol) high = t; \ 152 else low = t; \ 153 } \ 154 for (_i = low; _i < high; _i++) { \ 155 if (rp[_i] > bcol) break; \ 156 if (rp[_i] == bcol) { \ 157 bap = ap + bs2 * _i + bs * cidx + ridx; \ 158 if (addv == ADD_VALUES) *bap += value; \ 159 else *bap = value; \ 160 goto b_noinsert; \ 161 } \ 162 } \ 163 if (b->nonew == 1) goto b_noinsert; \ 164 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 165 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 166 N = nrow++ - 1; \ 167 /* shift up all the later entries in this row */ \ 168 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 169 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 170 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 171 rp[_i] = bcol; \ 172 ap[bs2 * _i + bs * cidx + ridx] = value; \ 173 b_noinsert:; \ 174 bilen[brow] = nrow; \ 175 } 176 177 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 178 { 179 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 180 MatScalar value; 181 PetscBool roworiented = baij->roworiented; 182 PetscInt i, j, row, col; 183 PetscInt rstart_orig = mat->rmap->rstart; 184 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 185 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 186 187 /* Some Variables required in the macro */ 188 Mat A = baij->A; 189 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 190 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 191 MatScalar *aa = a->a; 192 193 Mat B = baij->B; 194 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 195 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 196 MatScalar *ba = b->a; 197 198 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 199 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 200 MatScalar *ap, *bap; 201 202 PetscFunctionBegin; 203 for (i = 0; i < m; i++) { 204 if (im[i] < 0) continue; 205 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 206 if (im[i] >= rstart_orig && im[i] < rend_orig) { 207 row = im[i] - rstart_orig; 208 for (j = 0; j < n; j++) { 209 if (in[j] >= cstart_orig && in[j] < cend_orig) { 210 col = in[j] - cstart_orig; 211 if (roworiented) value = v[i * n + j]; 212 else value = v[i + j * m]; 213 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 214 } else if (in[j] < 0) { 215 continue; 216 } else { 217 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 218 if (mat->was_assembled) { 219 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 220 #if defined(PETSC_USE_CTABLE) 221 PetscCall(PetscTableFind(baij->colmap, in[j] / bs + 1, &col)); 222 col = col - 1; 223 #else 224 col = baij->colmap[in[j] / bs] - 1; 225 #endif 226 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 227 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 228 col = in[j]; 229 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 230 B = baij->B; 231 b = (Mat_SeqBAIJ *)(B)->data; 232 bimax = b->imax; 233 bi = b->i; 234 bilen = b->ilen; 235 bj = b->j; 236 ba = b->a; 237 } else { 238 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 239 col += in[j] % bs; 240 } 241 } else col = in[j]; 242 if (roworiented) value = v[i * n + j]; 243 else value = v[i + j * m]; 244 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 245 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 246 } 247 } 248 } else { 249 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 250 if (!baij->donotstash) { 251 mat->assembled = PETSC_FALSE; 252 if (roworiented) { 253 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 254 } else { 255 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 256 } 257 } 258 } 259 } 260 PetscFunctionReturn(0); 261 } 262 263 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 264 { 265 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 266 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 267 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 268 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 269 PetscBool roworiented = a->roworiented; 270 const PetscScalar *value = v; 271 MatScalar *ap, *aa = a->a, *bap; 272 273 PetscFunctionBegin; 274 rp = aj + ai[row]; 275 ap = aa + bs2 * ai[row]; 276 rmax = imax[row]; 277 nrow = ailen[row]; 278 value = v; 279 low = 0; 280 high = nrow; 281 while (high - low > 7) { 282 t = (low + high) / 2; 283 if (rp[t] > col) high = t; 284 else low = t; 285 } 286 for (i = low; i < high; i++) { 287 if (rp[i] > col) break; 288 if (rp[i] == col) { 289 bap = ap + bs2 * i; 290 if (roworiented) { 291 if (is == ADD_VALUES) { 292 for (ii = 0; ii < bs; ii++) { 293 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 294 } 295 } else { 296 for (ii = 0; ii < bs; ii++) { 297 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 298 } 299 } 300 } else { 301 if (is == ADD_VALUES) { 302 for (ii = 0; ii < bs; ii++, value += bs) { 303 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 304 bap += bs; 305 } 306 } else { 307 for (ii = 0; ii < bs; ii++, value += bs) { 308 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 309 bap += bs; 310 } 311 } 312 } 313 goto noinsert2; 314 } 315 } 316 if (nonew == 1) goto noinsert2; 317 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 318 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 319 N = nrow++ - 1; 320 high++; 321 /* shift up all the later entries in this row */ 322 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 323 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 324 rp[i] = col; 325 bap = ap + bs2 * i; 326 if (roworiented) { 327 for (ii = 0; ii < bs; ii++) { 328 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 329 } 330 } else { 331 for (ii = 0; ii < bs; ii++) { 332 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 333 } 334 } 335 noinsert2:; 336 ailen[row] = nrow; 337 PetscFunctionReturn(0); 338 } 339 340 /* 341 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 342 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 343 */ 344 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 345 { 346 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 347 const PetscScalar *value; 348 MatScalar *barray = baij->barray; 349 PetscBool roworiented = baij->roworiented; 350 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 351 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 352 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 353 354 PetscFunctionBegin; 355 if (!barray) { 356 PetscCall(PetscMalloc1(bs2, &barray)); 357 baij->barray = barray; 358 } 359 360 if (roworiented) stepval = (n - 1) * bs; 361 else stepval = (m - 1) * bs; 362 363 for (i = 0; i < m; i++) { 364 if (im[i] < 0) continue; 365 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 366 if (im[i] >= rstart && im[i] < rend) { 367 row = im[i] - rstart; 368 for (j = 0; j < n; j++) { 369 /* If NumCol = 1 then a copy is not required */ 370 if ((roworiented) && (n == 1)) { 371 barray = (MatScalar *)v + i * bs2; 372 } else if ((!roworiented) && (m == 1)) { 373 barray = (MatScalar *)v + j * bs2; 374 } else { /* Here a copy is required */ 375 if (roworiented) { 376 value = v + (i * (stepval + bs) + j) * bs; 377 } else { 378 value = v + (j * (stepval + bs) + i) * bs; 379 } 380 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 381 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 382 barray += bs; 383 } 384 barray -= bs2; 385 } 386 387 if (in[j] >= cstart && in[j] < cend) { 388 col = in[j] - cstart; 389 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 390 } else if (in[j] < 0) { 391 continue; 392 } else { 393 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 394 if (mat->was_assembled) { 395 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 396 397 #if defined(PETSC_USE_DEBUG) 398 #if defined(PETSC_USE_CTABLE) 399 { 400 PetscInt data; 401 PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data)); 402 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 403 } 404 #else 405 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 406 #endif 407 #endif 408 #if defined(PETSC_USE_CTABLE) 409 PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col)); 410 col = (col - 1) / bs; 411 #else 412 col = (baij->colmap[in[j]] - 1) / bs; 413 #endif 414 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 415 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 416 col = in[j]; 417 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 418 } else col = in[j]; 419 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 420 } 421 } 422 } else { 423 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 424 if (!baij->donotstash) { 425 if (roworiented) { 426 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 427 } else { 428 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 429 } 430 } 431 } 432 } 433 PetscFunctionReturn(0); 434 } 435 436 #define HASH_KEY 0.6180339887 437 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 438 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 439 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 440 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 441 { 442 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 443 PetscBool roworiented = baij->roworiented; 444 PetscInt i, j, row, col; 445 PetscInt rstart_orig = mat->rmap->rstart; 446 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 447 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 448 PetscReal tmp; 449 MatScalar **HD = baij->hd, value; 450 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 451 452 PetscFunctionBegin; 453 for (i = 0; i < m; i++) { 454 if (PetscDefined(USE_DEBUG)) { 455 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 456 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 457 } 458 row = im[i]; 459 if (row >= rstart_orig && row < rend_orig) { 460 for (j = 0; j < n; j++) { 461 col = in[j]; 462 if (roworiented) value = v[i * n + j]; 463 else value = v[i + j * m]; 464 /* Look up PetscInto the Hash Table */ 465 key = (row / bs) * Nbs + (col / bs) + 1; 466 h1 = HASH(size, key, tmp); 467 468 idx = h1; 469 if (PetscDefined(USE_DEBUG)) { 470 insert_ct++; 471 total_ct++; 472 if (HT[idx] != key) { 473 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 474 ; 475 if (idx == size) { 476 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 477 ; 478 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 479 } 480 } 481 } else if (HT[idx] != key) { 482 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 483 ; 484 if (idx == size) { 485 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 486 ; 487 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 488 } 489 } 490 /* A HASH table entry is found, so insert the values at the correct address */ 491 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 492 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 493 } 494 } else if (!baij->donotstash) { 495 if (roworiented) { 496 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 497 } else { 498 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 499 } 500 } 501 } 502 if (PetscDefined(USE_DEBUG)) { 503 baij->ht_total_ct += total_ct; 504 baij->ht_insert_ct += insert_ct; 505 } 506 PetscFunctionReturn(0); 507 } 508 509 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 510 { 511 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 512 PetscBool roworiented = baij->roworiented; 513 PetscInt i, j, ii, jj, row, col; 514 PetscInt rstart = baij->rstartbs; 515 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 516 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 517 PetscReal tmp; 518 MatScalar **HD = baij->hd, *baij_a; 519 const PetscScalar *v_t, *value; 520 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 521 522 PetscFunctionBegin; 523 if (roworiented) stepval = (n - 1) * bs; 524 else stepval = (m - 1) * bs; 525 526 for (i = 0; i < m; i++) { 527 if (PetscDefined(USE_DEBUG)) { 528 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 529 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 530 } 531 row = im[i]; 532 v_t = v + i * nbs2; 533 if (row >= rstart && row < rend) { 534 for (j = 0; j < n; j++) { 535 col = in[j]; 536 537 /* Look up into the Hash Table */ 538 key = row * Nbs + col + 1; 539 h1 = HASH(size, key, tmp); 540 541 idx = h1; 542 if (PetscDefined(USE_DEBUG)) { 543 total_ct++; 544 insert_ct++; 545 if (HT[idx] != key) { 546 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 547 ; 548 if (idx == size) { 549 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 550 ; 551 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 552 } 553 } 554 } else if (HT[idx] != key) { 555 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 556 ; 557 if (idx == size) { 558 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 559 ; 560 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 561 } 562 } 563 baij_a = HD[idx]; 564 if (roworiented) { 565 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 566 /* value = v + (i*(stepval+bs)+j)*bs; */ 567 value = v_t; 568 v_t += bs; 569 if (addv == ADD_VALUES) { 570 for (ii = 0; ii < bs; ii++, value += stepval) { 571 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 572 } 573 } else { 574 for (ii = 0; ii < bs; ii++, value += stepval) { 575 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 576 } 577 } 578 } else { 579 value = v + j * (stepval + bs) * bs + i * bs; 580 if (addv == ADD_VALUES) { 581 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 582 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 583 } 584 } else { 585 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 586 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 587 } 588 } 589 } 590 } 591 } else { 592 if (!baij->donotstash) { 593 if (roworiented) { 594 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 595 } else { 596 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 597 } 598 } 599 } 600 } 601 if (PetscDefined(USE_DEBUG)) { 602 baij->ht_total_ct += total_ct; 603 baij->ht_insert_ct += insert_ct; 604 } 605 PetscFunctionReturn(0); 606 } 607 608 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 609 { 610 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 611 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 612 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 613 614 PetscFunctionBegin; 615 for (i = 0; i < m; i++) { 616 if (idxm[i] < 0) continue; /* negative row */ 617 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 618 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 619 row = idxm[i] - bsrstart; 620 for (j = 0; j < n; j++) { 621 if (idxn[j] < 0) continue; /* negative column */ 622 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 623 if (idxn[j] >= bscstart && idxn[j] < bscend) { 624 col = idxn[j] - bscstart; 625 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 626 } else { 627 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 628 #if defined(PETSC_USE_CTABLE) 629 PetscCall(PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data)); 630 data--; 631 #else 632 data = baij->colmap[idxn[j] / bs] - 1; 633 #endif 634 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 635 else { 636 col = data + idxn[j] % bs; 637 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 638 } 639 } 640 } 641 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 642 } 643 PetscFunctionReturn(0); 644 } 645 646 PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 647 { 648 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 649 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 650 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 651 PetscReal sum = 0.0; 652 MatScalar *v; 653 654 PetscFunctionBegin; 655 if (baij->size == 1) { 656 PetscCall(MatNorm(baij->A, type, nrm)); 657 } else { 658 if (type == NORM_FROBENIUS) { 659 v = amat->a; 660 nz = amat->nz * bs2; 661 for (i = 0; i < nz; i++) { 662 sum += PetscRealPart(PetscConj(*v) * (*v)); 663 v++; 664 } 665 v = bmat->a; 666 nz = bmat->nz * bs2; 667 for (i = 0; i < nz; i++) { 668 sum += PetscRealPart(PetscConj(*v) * (*v)); 669 v++; 670 } 671 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 672 *nrm = PetscSqrtReal(*nrm); 673 } else if (type == NORM_1) { /* max column sum */ 674 PetscReal *tmp, *tmp2; 675 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 676 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 677 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 678 v = amat->a; 679 jj = amat->j; 680 for (i = 0; i < amat->nz; i++) { 681 for (j = 0; j < bs; j++) { 682 col = bs * (cstart + *jj) + j; /* column index */ 683 for (row = 0; row < bs; row++) { 684 tmp[col] += PetscAbsScalar(*v); 685 v++; 686 } 687 } 688 jj++; 689 } 690 v = bmat->a; 691 jj = bmat->j; 692 for (i = 0; i < bmat->nz; i++) { 693 for (j = 0; j < bs; j++) { 694 col = bs * garray[*jj] + j; 695 for (row = 0; row < bs; row++) { 696 tmp[col] += PetscAbsScalar(*v); 697 v++; 698 } 699 } 700 jj++; 701 } 702 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 703 *nrm = 0.0; 704 for (j = 0; j < mat->cmap->N; j++) { 705 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 706 } 707 PetscCall(PetscFree(tmp)); 708 PetscCall(PetscFree(tmp2)); 709 } else if (type == NORM_INFINITY) { /* max row sum */ 710 PetscReal *sums; 711 PetscCall(PetscMalloc1(bs, &sums)); 712 sum = 0.0; 713 for (j = 0; j < amat->mbs; j++) { 714 for (row = 0; row < bs; row++) sums[row] = 0.0; 715 v = amat->a + bs2 * amat->i[j]; 716 nz = amat->i[j + 1] - amat->i[j]; 717 for (i = 0; i < nz; i++) { 718 for (col = 0; col < bs; col++) { 719 for (row = 0; row < bs; row++) { 720 sums[row] += PetscAbsScalar(*v); 721 v++; 722 } 723 } 724 } 725 v = bmat->a + bs2 * bmat->i[j]; 726 nz = bmat->i[j + 1] - bmat->i[j]; 727 for (i = 0; i < nz; i++) { 728 for (col = 0; col < bs; col++) { 729 for (row = 0; row < bs; row++) { 730 sums[row] += PetscAbsScalar(*v); 731 v++; 732 } 733 } 734 } 735 for (row = 0; row < bs; row++) { 736 if (sums[row] > sum) sum = sums[row]; 737 } 738 } 739 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 740 PetscCall(PetscFree(sums)); 741 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 742 } 743 PetscFunctionReturn(0); 744 } 745 746 /* 747 Creates the hash table, and sets the table 748 This table is created only once. 749 If new entried need to be added to the matrix 750 then the hash table has to be destroyed and 751 recreated. 752 */ 753 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 754 { 755 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 756 Mat A = baij->A, B = baij->B; 757 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 758 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 759 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 760 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 761 PetscInt *HT, key; 762 MatScalar **HD; 763 PetscReal tmp; 764 #if defined(PETSC_USE_INFO) 765 PetscInt ct = 0, max = 0; 766 #endif 767 768 PetscFunctionBegin; 769 if (baij->ht) PetscFunctionReturn(0); 770 771 baij->ht_size = (PetscInt)(factor * nz); 772 ht_size = baij->ht_size; 773 774 /* Allocate Memory for Hash Table */ 775 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 776 HD = baij->hd; 777 HT = baij->ht; 778 779 /* Loop Over A */ 780 for (i = 0; i < a->mbs; i++) { 781 for (j = ai[i]; j < ai[i + 1]; j++) { 782 row = i + rstart; 783 col = aj[j] + cstart; 784 785 key = row * Nbs + col + 1; 786 h1 = HASH(ht_size, key, tmp); 787 for (k = 0; k < ht_size; k++) { 788 if (!HT[(h1 + k) % ht_size]) { 789 HT[(h1 + k) % ht_size] = key; 790 HD[(h1 + k) % ht_size] = a->a + j * bs2; 791 break; 792 #if defined(PETSC_USE_INFO) 793 } else { 794 ct++; 795 #endif 796 } 797 } 798 #if defined(PETSC_USE_INFO) 799 if (k > max) max = k; 800 #endif 801 } 802 } 803 /* Loop Over B */ 804 for (i = 0; i < b->mbs; i++) { 805 for (j = bi[i]; j < bi[i + 1]; j++) { 806 row = i + rstart; 807 col = garray[bj[j]]; 808 key = row * Nbs + col + 1; 809 h1 = HASH(ht_size, key, tmp); 810 for (k = 0; k < ht_size; k++) { 811 if (!HT[(h1 + k) % ht_size]) { 812 HT[(h1 + k) % ht_size] = key; 813 HD[(h1 + k) % ht_size] = b->a + j * bs2; 814 break; 815 #if defined(PETSC_USE_INFO) 816 } else { 817 ct++; 818 #endif 819 } 820 } 821 #if defined(PETSC_USE_INFO) 822 if (k > max) max = k; 823 #endif 824 } 825 } 826 827 /* Print Summary */ 828 #if defined(PETSC_USE_INFO) 829 for (i = 0, j = 0; i < ht_size; i++) { 830 if (HT[i]) j++; 831 } 832 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 833 #endif 834 PetscFunctionReturn(0); 835 } 836 837 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 838 { 839 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 840 PetscInt nstash, reallocs; 841 842 PetscFunctionBegin; 843 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 844 845 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 846 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 847 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 848 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 849 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 850 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 851 PetscFunctionReturn(0); 852 } 853 854 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 855 { 856 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 857 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 858 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 859 PetscInt *row, *col; 860 PetscBool r1, r2, r3, other_disassembled; 861 MatScalar *val; 862 PetscMPIInt n; 863 864 PetscFunctionBegin; 865 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 866 if (!baij->donotstash && !mat->nooffprocentries) { 867 while (1) { 868 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 869 if (!flg) break; 870 871 for (i = 0; i < n;) { 872 /* Now identify the consecutive vals belonging to the same row */ 873 for (j = i, rstart = row[j]; j < n; j++) { 874 if (row[j] != rstart) break; 875 } 876 if (j < n) ncols = j - i; 877 else ncols = n - i; 878 /* Now assemble all these values with a single function call */ 879 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 880 i = j; 881 } 882 } 883 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 884 /* Now process the block-stash. Since the values are stashed column-oriented, 885 set the roworiented flag to column oriented, and after MatSetValues() 886 restore the original flags */ 887 r1 = baij->roworiented; 888 r2 = a->roworiented; 889 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 890 891 baij->roworiented = PETSC_FALSE; 892 a->roworiented = PETSC_FALSE; 893 894 (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 895 while (1) { 896 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 897 if (!flg) break; 898 899 for (i = 0; i < n;) { 900 /* Now identify the consecutive vals belonging to the same row */ 901 for (j = i, rstart = row[j]; j < n; j++) { 902 if (row[j] != rstart) break; 903 } 904 if (j < n) ncols = j - i; 905 else ncols = n - i; 906 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 907 i = j; 908 } 909 } 910 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 911 912 baij->roworiented = r1; 913 a->roworiented = r2; 914 915 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */ 916 } 917 918 PetscCall(MatAssemblyBegin(baij->A, mode)); 919 PetscCall(MatAssemblyEnd(baij->A, mode)); 920 921 /* determine if any processor has disassembled, if so we must 922 also disassemble ourselves, in order that we may reassemble. */ 923 /* 924 if nonzero structure of submatrix B cannot change then we know that 925 no processor disassembled thus we can skip this stuff 926 */ 927 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 928 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 929 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 930 } 931 932 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 933 PetscCall(MatAssemblyBegin(baij->B, mode)); 934 PetscCall(MatAssemblyEnd(baij->B, mode)); 935 936 #if defined(PETSC_USE_INFO) 937 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 938 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 939 940 baij->ht_total_ct = 0; 941 baij->ht_insert_ct = 0; 942 } 943 #endif 944 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 945 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 946 947 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 948 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 949 } 950 951 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 952 953 baij->rowvalues = NULL; 954 955 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 956 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 957 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 958 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 959 } 960 PetscFunctionReturn(0); 961 } 962 963 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 964 #include <petscdraw.h> 965 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 966 { 967 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 968 PetscMPIInt rank = baij->rank; 969 PetscInt bs = mat->rmap->bs; 970 PetscBool iascii, isdraw; 971 PetscViewer sviewer; 972 PetscViewerFormat format; 973 974 PetscFunctionBegin; 975 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 976 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 977 if (iascii) { 978 PetscCall(PetscViewerGetFormat(viewer, &format)); 979 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 980 MatInfo info; 981 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 982 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 983 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 984 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 985 mat->rmap->bs, (double)info.memory)); 986 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 987 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 988 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 989 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 990 PetscCall(PetscViewerFlush(viewer)); 991 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 992 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 993 PetscCall(VecScatterView(baij->Mvctx, viewer)); 994 PetscFunctionReturn(0); 995 } else if (format == PETSC_VIEWER_ASCII_INFO) { 996 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 997 PetscFunctionReturn(0); 998 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 999 PetscFunctionReturn(0); 1000 } 1001 } 1002 1003 if (isdraw) { 1004 PetscDraw draw; 1005 PetscBool isnull; 1006 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1007 PetscCall(PetscDrawIsNull(draw, &isnull)); 1008 if (isnull) PetscFunctionReturn(0); 1009 } 1010 1011 { 1012 /* assemble the entire matrix onto first processor. */ 1013 Mat A; 1014 Mat_SeqBAIJ *Aloc; 1015 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1016 MatScalar *a; 1017 const char *matname; 1018 1019 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1020 /* Perhaps this should be the type of mat? */ 1021 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1022 if (rank == 0) { 1023 PetscCall(MatSetSizes(A, M, N, M, N)); 1024 } else { 1025 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1026 } 1027 PetscCall(MatSetType(A, MATMPIBAIJ)); 1028 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1029 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1030 1031 /* copy over the A part */ 1032 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1033 ai = Aloc->i; 1034 aj = Aloc->j; 1035 a = Aloc->a; 1036 PetscCall(PetscMalloc1(bs, &rvals)); 1037 1038 for (i = 0; i < mbs; i++) { 1039 rvals[0] = bs * (baij->rstartbs + i); 1040 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1041 for (j = ai[i]; j < ai[i + 1]; j++) { 1042 col = (baij->cstartbs + aj[j]) * bs; 1043 for (k = 0; k < bs; k++) { 1044 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1045 col++; 1046 a += bs; 1047 } 1048 } 1049 } 1050 /* copy over the B part */ 1051 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1052 ai = Aloc->i; 1053 aj = Aloc->j; 1054 a = Aloc->a; 1055 for (i = 0; i < mbs; i++) { 1056 rvals[0] = bs * (baij->rstartbs + i); 1057 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1058 for (j = ai[i]; j < ai[i + 1]; j++) { 1059 col = baij->garray[aj[j]] * bs; 1060 for (k = 0; k < bs; k++) { 1061 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1062 col++; 1063 a += bs; 1064 } 1065 } 1066 } 1067 PetscCall(PetscFree(rvals)); 1068 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1069 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1070 /* 1071 Everyone has to call to draw the matrix since the graphics waits are 1072 synchronized across all processors that share the PetscDraw object 1073 */ 1074 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1075 PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1076 if (rank == 0) { 1077 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname)); 1078 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer)); 1079 } 1080 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1081 PetscCall(PetscViewerFlush(viewer)); 1082 PetscCall(MatDestroy(&A)); 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1088 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1089 { 1090 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1091 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1092 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1093 const PetscInt *garray = aij->garray; 1094 PetscInt header[4], M, N, m, rs, cs, bs, nz, cnt, i, j, ja, jb, k, l; 1095 PetscInt *rowlens, *colidxs; 1096 PetscScalar *matvals; 1097 1098 PetscFunctionBegin; 1099 PetscCall(PetscViewerSetUp(viewer)); 1100 1101 M = mat->rmap->N; 1102 N = mat->cmap->N; 1103 m = mat->rmap->n; 1104 rs = mat->rmap->rstart; 1105 cs = mat->cmap->rstart; 1106 bs = mat->rmap->bs; 1107 nz = bs * bs * (A->nz + B->nz); 1108 1109 /* write matrix header */ 1110 header[0] = MAT_FILE_CLASSID; 1111 header[1] = M; 1112 header[2] = N; 1113 header[3] = nz; 1114 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1115 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1116 1117 /* fill in and store row lengths */ 1118 PetscCall(PetscMalloc1(m, &rowlens)); 1119 for (cnt = 0, i = 0; i < A->mbs; i++) 1120 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1121 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1122 PetscCall(PetscFree(rowlens)); 1123 1124 /* fill in and store column indices */ 1125 PetscCall(PetscMalloc1(nz, &colidxs)); 1126 for (cnt = 0, i = 0; i < A->mbs; i++) { 1127 for (k = 0; k < bs; k++) { 1128 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1129 if (garray[B->j[jb]] > cs / bs) break; 1130 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1131 } 1132 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1133 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1134 for (; jb < B->i[i + 1]; jb++) 1135 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1136 } 1137 } 1138 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1139 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1140 PetscCall(PetscFree(colidxs)); 1141 1142 /* fill in and store nonzero values */ 1143 PetscCall(PetscMalloc1(nz, &matvals)); 1144 for (cnt = 0, i = 0; i < A->mbs; i++) { 1145 for (k = 0; k < bs; k++) { 1146 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1147 if (garray[B->j[jb]] > cs / bs) break; 1148 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1149 } 1150 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1151 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1152 for (; jb < B->i[i + 1]; jb++) 1153 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1154 } 1155 } 1156 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1157 PetscCall(PetscFree(matvals)); 1158 1159 /* write block size option to the viewer's .info file */ 1160 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1165 { 1166 PetscBool iascii, isdraw, issocket, isbinary; 1167 1168 PetscFunctionBegin; 1169 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1170 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1171 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1172 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1173 if (iascii || isdraw || issocket) { 1174 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1175 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1180 { 1181 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1182 1183 PetscFunctionBegin; 1184 #if defined(PETSC_USE_LOG) 1185 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 1186 #endif 1187 PetscCall(MatStashDestroy_Private(&mat->stash)); 1188 PetscCall(MatStashDestroy_Private(&mat->bstash)); 1189 PetscCall(MatDestroy(&baij->A)); 1190 PetscCall(MatDestroy(&baij->B)); 1191 #if defined(PETSC_USE_CTABLE) 1192 PetscCall(PetscTableDestroy(&baij->colmap)); 1193 #else 1194 PetscCall(PetscFree(baij->colmap)); 1195 #endif 1196 PetscCall(PetscFree(baij->garray)); 1197 PetscCall(VecDestroy(&baij->lvec)); 1198 PetscCall(VecScatterDestroy(&baij->Mvctx)); 1199 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 1200 PetscCall(PetscFree(baij->barray)); 1201 PetscCall(PetscFree2(baij->hd, baij->ht)); 1202 PetscCall(PetscFree(baij->rangebs)); 1203 PetscCall(PetscFree(mat->data)); 1204 1205 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1206 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1207 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1208 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 1209 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 1210 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 1211 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 1212 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 1213 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 1214 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 1215 #if defined(PETSC_HAVE_HYPRE) 1216 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 1217 #endif 1218 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1223 { 1224 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1225 PetscInt nt; 1226 1227 PetscFunctionBegin; 1228 PetscCall(VecGetLocalSize(xx, &nt)); 1229 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1230 PetscCall(VecGetLocalSize(yy, &nt)); 1231 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1232 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1233 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1234 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1235 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1240 { 1241 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1242 1243 PetscFunctionBegin; 1244 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1245 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1246 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1247 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1252 { 1253 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1254 1255 PetscFunctionBegin; 1256 /* do nondiagonal part */ 1257 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1258 /* do local part */ 1259 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1260 /* add partial results together */ 1261 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1262 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1267 { 1268 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1269 1270 PetscFunctionBegin; 1271 /* do nondiagonal part */ 1272 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1273 /* do local part */ 1274 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1275 /* add partial results together */ 1276 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1277 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 /* 1282 This only works correctly for square matrices where the subblock A->A is the 1283 diagonal block 1284 */ 1285 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1286 { 1287 PetscFunctionBegin; 1288 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1289 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1290 PetscFunctionReturn(0); 1291 } 1292 1293 PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1294 { 1295 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1296 1297 PetscFunctionBegin; 1298 PetscCall(MatScale(a->A, aa)); 1299 PetscCall(MatScale(a->B, aa)); 1300 PetscFunctionReturn(0); 1301 } 1302 1303 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1304 { 1305 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1306 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1307 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1308 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1309 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1310 1311 PetscFunctionBegin; 1312 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1313 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1314 mat->getrowactive = PETSC_TRUE; 1315 1316 if (!mat->rowvalues && (idx || v)) { 1317 /* 1318 allocate enough space to hold information from the longest row. 1319 */ 1320 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1321 PetscInt max = 1, mbs = mat->mbs, tmp; 1322 for (i = 0; i < mbs; i++) { 1323 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1324 if (max < tmp) max = tmp; 1325 } 1326 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1327 } 1328 lrow = row - brstart; 1329 1330 pvA = &vworkA; 1331 pcA = &cworkA; 1332 pvB = &vworkB; 1333 pcB = &cworkB; 1334 if (!v) { 1335 pvA = NULL; 1336 pvB = NULL; 1337 } 1338 if (!idx) { 1339 pcA = NULL; 1340 if (!v) pcB = NULL; 1341 } 1342 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1343 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1344 nztot = nzA + nzB; 1345 1346 cmap = mat->garray; 1347 if (v || idx) { 1348 if (nztot) { 1349 /* Sort by increasing column numbers, assuming A and B already sorted */ 1350 PetscInt imark = -1; 1351 if (v) { 1352 *v = v_p = mat->rowvalues; 1353 for (i = 0; i < nzB; i++) { 1354 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1355 else break; 1356 } 1357 imark = i; 1358 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1359 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1360 } 1361 if (idx) { 1362 *idx = idx_p = mat->rowindices; 1363 if (imark > -1) { 1364 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1365 } else { 1366 for (i = 0; i < nzB; i++) { 1367 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1368 else break; 1369 } 1370 imark = i; 1371 } 1372 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1373 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1374 } 1375 } else { 1376 if (idx) *idx = NULL; 1377 if (v) *v = NULL; 1378 } 1379 } 1380 *nz = nztot; 1381 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1382 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1383 PetscFunctionReturn(0); 1384 } 1385 1386 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1387 { 1388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1389 1390 PetscFunctionBegin; 1391 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1392 baij->getrowactive = PETSC_FALSE; 1393 PetscFunctionReturn(0); 1394 } 1395 1396 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1397 { 1398 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1399 1400 PetscFunctionBegin; 1401 PetscCall(MatZeroEntries(l->A)); 1402 PetscCall(MatZeroEntries(l->B)); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1407 { 1408 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1409 Mat A = a->A, B = a->B; 1410 PetscLogDouble isend[5], irecv[5]; 1411 1412 PetscFunctionBegin; 1413 info->block_size = (PetscReal)matin->rmap->bs; 1414 1415 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1416 1417 isend[0] = info->nz_used; 1418 isend[1] = info->nz_allocated; 1419 isend[2] = info->nz_unneeded; 1420 isend[3] = info->memory; 1421 isend[4] = info->mallocs; 1422 1423 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1424 1425 isend[0] += info->nz_used; 1426 isend[1] += info->nz_allocated; 1427 isend[2] += info->nz_unneeded; 1428 isend[3] += info->memory; 1429 isend[4] += info->mallocs; 1430 1431 if (flag == MAT_LOCAL) { 1432 info->nz_used = isend[0]; 1433 info->nz_allocated = isend[1]; 1434 info->nz_unneeded = isend[2]; 1435 info->memory = isend[3]; 1436 info->mallocs = isend[4]; 1437 } else if (flag == MAT_GLOBAL_MAX) { 1438 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1439 1440 info->nz_used = irecv[0]; 1441 info->nz_allocated = irecv[1]; 1442 info->nz_unneeded = irecv[2]; 1443 info->memory = irecv[3]; 1444 info->mallocs = irecv[4]; 1445 } else if (flag == MAT_GLOBAL_SUM) { 1446 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1447 1448 info->nz_used = irecv[0]; 1449 info->nz_allocated = irecv[1]; 1450 info->nz_unneeded = irecv[2]; 1451 info->memory = irecv[3]; 1452 info->mallocs = irecv[4]; 1453 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1454 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1455 info->fill_ratio_needed = 0; 1456 info->factor_mallocs = 0; 1457 PetscFunctionReturn(0); 1458 } 1459 1460 PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1461 { 1462 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1463 1464 PetscFunctionBegin; 1465 switch (op) { 1466 case MAT_NEW_NONZERO_LOCATIONS: 1467 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1468 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1469 case MAT_KEEP_NONZERO_PATTERN: 1470 case MAT_NEW_NONZERO_LOCATION_ERR: 1471 MatCheckPreallocated(A, 1); 1472 PetscCall(MatSetOption(a->A, op, flg)); 1473 PetscCall(MatSetOption(a->B, op, flg)); 1474 break; 1475 case MAT_ROW_ORIENTED: 1476 MatCheckPreallocated(A, 1); 1477 a->roworiented = flg; 1478 1479 PetscCall(MatSetOption(a->A, op, flg)); 1480 PetscCall(MatSetOption(a->B, op, flg)); 1481 break; 1482 case MAT_FORCE_DIAGONAL_ENTRIES: 1483 case MAT_SORTED_FULL: 1484 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1485 break; 1486 case MAT_IGNORE_OFF_PROC_ENTRIES: 1487 a->donotstash = flg; 1488 break; 1489 case MAT_USE_HASH_TABLE: 1490 a->ht_flag = flg; 1491 a->ht_fact = 1.39; 1492 break; 1493 case MAT_SYMMETRIC: 1494 case MAT_STRUCTURALLY_SYMMETRIC: 1495 case MAT_HERMITIAN: 1496 case MAT_SUBMAT_SINGLEIS: 1497 case MAT_SYMMETRY_ETERNAL: 1498 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1499 case MAT_SPD_ETERNAL: 1500 /* if the diagonal matrix is square it inherits some of the properties above */ 1501 break; 1502 default: 1503 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1504 } 1505 PetscFunctionReturn(0); 1506 } 1507 1508 PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1509 { 1510 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1511 Mat_SeqBAIJ *Aloc; 1512 Mat B; 1513 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1514 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1515 MatScalar *a; 1516 1517 PetscFunctionBegin; 1518 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1519 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1520 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1521 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1522 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1523 /* Do not know preallocation information, but must set block size */ 1524 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1525 } else { 1526 B = *matout; 1527 } 1528 1529 /* copy over the A part */ 1530 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1531 ai = Aloc->i; 1532 aj = Aloc->j; 1533 a = Aloc->a; 1534 PetscCall(PetscMalloc1(bs, &rvals)); 1535 1536 for (i = 0; i < mbs; i++) { 1537 rvals[0] = bs * (baij->rstartbs + i); 1538 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1539 for (j = ai[i]; j < ai[i + 1]; j++) { 1540 col = (baij->cstartbs + aj[j]) * bs; 1541 for (k = 0; k < bs; k++) { 1542 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1543 1544 col++; 1545 a += bs; 1546 } 1547 } 1548 } 1549 /* copy over the B part */ 1550 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1551 ai = Aloc->i; 1552 aj = Aloc->j; 1553 a = Aloc->a; 1554 for (i = 0; i < mbs; i++) { 1555 rvals[0] = bs * (baij->rstartbs + i); 1556 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1557 for (j = ai[i]; j < ai[i + 1]; j++) { 1558 col = baij->garray[aj[j]] * bs; 1559 for (k = 0; k < bs; k++) { 1560 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1561 col++; 1562 a += bs; 1563 } 1564 } 1565 } 1566 PetscCall(PetscFree(rvals)); 1567 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1568 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1569 1570 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1571 else PetscCall(MatHeaderMerge(A, &B)); 1572 PetscFunctionReturn(0); 1573 } 1574 1575 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1576 { 1577 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1578 Mat a = baij->A, b = baij->B; 1579 PetscInt s1, s2, s3; 1580 1581 PetscFunctionBegin; 1582 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1583 if (rr) { 1584 PetscCall(VecGetLocalSize(rr, &s1)); 1585 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1586 /* Overlap communication with computation. */ 1587 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1588 } 1589 if (ll) { 1590 PetscCall(VecGetLocalSize(ll, &s1)); 1591 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1592 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1593 } 1594 /* scale the diagonal block */ 1595 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1596 1597 if (rr) { 1598 /* Do a scatter end and then right scale the off-diagonal block */ 1599 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1600 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1601 } 1602 PetscFunctionReturn(0); 1603 } 1604 1605 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1606 { 1607 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1608 PetscInt *lrows; 1609 PetscInt r, len; 1610 PetscBool cong; 1611 1612 PetscFunctionBegin; 1613 /* get locally owned rows */ 1614 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1615 /* fix right hand side if needed */ 1616 if (x && b) { 1617 const PetscScalar *xx; 1618 PetscScalar *bb; 1619 1620 PetscCall(VecGetArrayRead(x, &xx)); 1621 PetscCall(VecGetArray(b, &bb)); 1622 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1623 PetscCall(VecRestoreArrayRead(x, &xx)); 1624 PetscCall(VecRestoreArray(b, &bb)); 1625 } 1626 1627 /* actually zap the local rows */ 1628 /* 1629 Zero the required rows. If the "diagonal block" of the matrix 1630 is square and the user wishes to set the diagonal we use separate 1631 code so that MatSetValues() is not called for each diagonal allocating 1632 new memory, thus calling lots of mallocs and slowing things down. 1633 1634 */ 1635 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1636 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1637 PetscCall(MatHasCongruentLayouts(A, &cong)); 1638 if ((diag != 0.0) && cong) { 1639 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1640 } else if (diag != 0.0) { 1641 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1642 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1643 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1644 for (r = 0; r < len; ++r) { 1645 const PetscInt row = lrows[r] + A->rmap->rstart; 1646 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1647 } 1648 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1649 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1650 } else { 1651 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1652 } 1653 PetscCall(PetscFree(lrows)); 1654 1655 /* only change matrix nonzero state if pattern was allowed to be changed */ 1656 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1657 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1658 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1659 } 1660 PetscFunctionReturn(0); 1661 } 1662 1663 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1664 { 1665 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1666 PetscMPIInt n = A->rmap->n, p = 0; 1667 PetscInt i, j, k, r, len = 0, row, col, count; 1668 PetscInt *lrows, *owners = A->rmap->range; 1669 PetscSFNode *rrows; 1670 PetscSF sf; 1671 const PetscScalar *xx; 1672 PetscScalar *bb, *mask; 1673 Vec xmask, lmask; 1674 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1675 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1676 PetscScalar *aa; 1677 1678 PetscFunctionBegin; 1679 /* Create SF where leaves are input rows and roots are owned rows */ 1680 PetscCall(PetscMalloc1(n, &lrows)); 1681 for (r = 0; r < n; ++r) lrows[r] = -1; 1682 PetscCall(PetscMalloc1(N, &rrows)); 1683 for (r = 0; r < N; ++r) { 1684 const PetscInt idx = rows[r]; 1685 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1686 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1687 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1688 } 1689 rrows[r].rank = p; 1690 rrows[r].index = rows[r] - owners[p]; 1691 } 1692 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1693 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1694 /* Collect flags for rows to be zeroed */ 1695 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1696 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1697 PetscCall(PetscSFDestroy(&sf)); 1698 /* Compress and put in row numbers */ 1699 for (r = 0; r < n; ++r) 1700 if (lrows[r] >= 0) lrows[len++] = r; 1701 /* zero diagonal part of matrix */ 1702 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1703 /* handle off diagonal part of matrix */ 1704 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1705 PetscCall(VecDuplicate(l->lvec, &lmask)); 1706 PetscCall(VecGetArray(xmask, &bb)); 1707 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1708 PetscCall(VecRestoreArray(xmask, &bb)); 1709 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1710 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1711 PetscCall(VecDestroy(&xmask)); 1712 if (x) { 1713 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1714 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1715 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1716 PetscCall(VecGetArray(b, &bb)); 1717 } 1718 PetscCall(VecGetArray(lmask, &mask)); 1719 /* remove zeroed rows of off diagonal matrix */ 1720 for (i = 0; i < len; ++i) { 1721 row = lrows[i]; 1722 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1723 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 1724 for (k = 0; k < count; ++k) { 1725 aa[0] = 0.0; 1726 aa += bs; 1727 } 1728 } 1729 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1730 for (i = 0; i < l->B->rmap->N; ++i) { 1731 row = i / bs; 1732 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1733 for (k = 0; k < bs; ++k) { 1734 col = bs * baij->j[j] + k; 1735 if (PetscAbsScalar(mask[col])) { 1736 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 1737 if (x) bb[i] -= aa[0] * xx[col]; 1738 aa[0] = 0.0; 1739 } 1740 } 1741 } 1742 } 1743 if (x) { 1744 PetscCall(VecRestoreArray(b, &bb)); 1745 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1746 } 1747 PetscCall(VecRestoreArray(lmask, &mask)); 1748 PetscCall(VecDestroy(&lmask)); 1749 PetscCall(PetscFree(lrows)); 1750 1751 /* only change matrix nonzero state if pattern was allowed to be changed */ 1752 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1753 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1754 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1755 } 1756 PetscFunctionReturn(0); 1757 } 1758 1759 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1760 { 1761 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1762 1763 PetscFunctionBegin; 1764 PetscCall(MatSetUnfactored(a->A)); 1765 PetscFunctionReturn(0); 1766 } 1767 1768 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1769 1770 PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1771 { 1772 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1773 Mat a, b, c, d; 1774 PetscBool flg; 1775 1776 PetscFunctionBegin; 1777 a = matA->A; 1778 b = matA->B; 1779 c = matB->A; 1780 d = matB->B; 1781 1782 PetscCall(MatEqual(a, c, &flg)); 1783 if (flg) PetscCall(MatEqual(b, d, &flg)); 1784 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1785 PetscFunctionReturn(0); 1786 } 1787 1788 PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1789 { 1790 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1791 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1792 1793 PetscFunctionBegin; 1794 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1795 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1796 PetscCall(MatCopy_Basic(A, B, str)); 1797 } else { 1798 PetscCall(MatCopy(a->A, b->A, str)); 1799 PetscCall(MatCopy(a->B, b->B, str)); 1800 } 1801 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1802 PetscFunctionReturn(0); 1803 } 1804 1805 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1806 { 1807 PetscFunctionBegin; 1808 PetscCall(MatMPIBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 1809 PetscFunctionReturn(0); 1810 } 1811 1812 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1813 { 1814 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1815 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1816 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1817 1818 PetscFunctionBegin; 1819 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1820 PetscFunctionReturn(0); 1821 } 1822 1823 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1824 { 1825 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1826 PetscBLASInt bnz, one = 1; 1827 Mat_SeqBAIJ *x, *y; 1828 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1829 1830 PetscFunctionBegin; 1831 if (str == SAME_NONZERO_PATTERN) { 1832 PetscScalar alpha = a; 1833 x = (Mat_SeqBAIJ *)xx->A->data; 1834 y = (Mat_SeqBAIJ *)yy->A->data; 1835 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1836 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1837 x = (Mat_SeqBAIJ *)xx->B->data; 1838 y = (Mat_SeqBAIJ *)yy->B->data; 1839 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1840 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1841 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1842 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1843 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1844 } else { 1845 Mat B; 1846 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1847 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1848 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1849 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1850 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1851 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1852 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1853 PetscCall(MatSetType(B, MATMPIBAIJ)); 1854 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1855 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1856 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1857 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1858 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1859 PetscCall(MatHeaderMerge(Y, &B)); 1860 PetscCall(PetscFree(nnz_d)); 1861 PetscCall(PetscFree(nnz_o)); 1862 } 1863 PetscFunctionReturn(0); 1864 } 1865 1866 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1867 { 1868 PetscFunctionBegin; 1869 if (PetscDefined(USE_COMPLEX)) { 1870 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1871 1872 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1873 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1874 } 1875 PetscFunctionReturn(0); 1876 } 1877 1878 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1879 { 1880 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1881 1882 PetscFunctionBegin; 1883 PetscCall(MatRealPart(a->A)); 1884 PetscCall(MatRealPart(a->B)); 1885 PetscFunctionReturn(0); 1886 } 1887 1888 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1889 { 1890 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1891 1892 PetscFunctionBegin; 1893 PetscCall(MatImaginaryPart(a->A)); 1894 PetscCall(MatImaginaryPart(a->B)); 1895 PetscFunctionReturn(0); 1896 } 1897 1898 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1899 { 1900 IS iscol_local; 1901 PetscInt csize; 1902 1903 PetscFunctionBegin; 1904 PetscCall(ISGetLocalSize(iscol, &csize)); 1905 if (call == MAT_REUSE_MATRIX) { 1906 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1907 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1908 } else { 1909 PetscCall(ISAllGather(iscol, &iscol_local)); 1910 } 1911 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 1912 if (call == MAT_INITIAL_MATRIX) { 1913 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1914 PetscCall(ISDestroy(&iscol_local)); 1915 } 1916 PetscFunctionReturn(0); 1917 } 1918 1919 /* 1920 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1921 in local and then by concatenating the local matrices the end result. 1922 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1923 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1924 */ 1925 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 1926 { 1927 PetscMPIInt rank, size; 1928 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1929 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1930 Mat M, Mreuse; 1931 MatScalar *vwork, *aa; 1932 MPI_Comm comm; 1933 IS isrow_new, iscol_new; 1934 Mat_SeqBAIJ *aij; 1935 1936 PetscFunctionBegin; 1937 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1938 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1939 PetscCallMPI(MPI_Comm_size(comm, &size)); 1940 /* The compression and expansion should be avoided. Doesn't point 1941 out errors, might change the indices, hence buggey */ 1942 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1943 PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1944 1945 if (call == MAT_REUSE_MATRIX) { 1946 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1947 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1948 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse)); 1949 } else { 1950 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse)); 1951 } 1952 PetscCall(ISDestroy(&isrow_new)); 1953 PetscCall(ISDestroy(&iscol_new)); 1954 /* 1955 m - number of local rows 1956 n - number of columns (same on all processors) 1957 rstart - first row in new global matrix generated 1958 */ 1959 PetscCall(MatGetBlockSize(mat, &bs)); 1960 PetscCall(MatGetSize(Mreuse, &m, &n)); 1961 m = m / bs; 1962 n = n / bs; 1963 1964 if (call == MAT_INITIAL_MATRIX) { 1965 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1966 ii = aij->i; 1967 jj = aij->j; 1968 1969 /* 1970 Determine the number of non-zeros in the diagonal and off-diagonal 1971 portions of the matrix in order to do correct preallocation 1972 */ 1973 1974 /* first get start and end of "diagonal" columns */ 1975 if (csize == PETSC_DECIDE) { 1976 PetscCall(ISGetSize(isrow, &mglobal)); 1977 if (mglobal == n * bs) { /* square matrix */ 1978 nlocal = m; 1979 } else { 1980 nlocal = n / size + ((n % size) > rank); 1981 } 1982 } else { 1983 nlocal = csize / bs; 1984 } 1985 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1986 rstart = rend - nlocal; 1987 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1988 1989 /* next, compute all the lengths */ 1990 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1991 for (i = 0; i < m; i++) { 1992 jend = ii[i + 1] - ii[i]; 1993 olen = 0; 1994 dlen = 0; 1995 for (j = 0; j < jend; j++) { 1996 if (*jj < rstart || *jj >= rend) olen++; 1997 else dlen++; 1998 jj++; 1999 } 2000 olens[i] = olen; 2001 dlens[i] = dlen; 2002 } 2003 PetscCall(MatCreate(comm, &M)); 2004 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2005 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 2006 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2007 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2008 PetscCall(PetscFree2(dlens, olens)); 2009 } else { 2010 PetscInt ml, nl; 2011 2012 M = *newmat; 2013 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2014 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2015 PetscCall(MatZeroEntries(M)); 2016 /* 2017 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2018 rather than the slower MatSetValues(). 2019 */ 2020 M->was_assembled = PETSC_TRUE; 2021 M->assembled = PETSC_FALSE; 2022 } 2023 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2024 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2025 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2026 ii = aij->i; 2027 jj = aij->j; 2028 aa = aij->a; 2029 for (i = 0; i < m; i++) { 2030 row = rstart / bs + i; 2031 nz = ii[i + 1] - ii[i]; 2032 cwork = jj; 2033 jj += nz; 2034 vwork = aa; 2035 aa += nz * bs * bs; 2036 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2037 } 2038 2039 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2040 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2041 *newmat = M; 2042 2043 /* save submatrix used in processor for next request */ 2044 if (call == MAT_INITIAL_MATRIX) { 2045 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2046 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2047 } 2048 PetscFunctionReturn(0); 2049 } 2050 2051 PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2052 { 2053 MPI_Comm comm, pcomm; 2054 PetscInt clocal_size, nrows; 2055 const PetscInt *rows; 2056 PetscMPIInt size; 2057 IS crowp, lcolp; 2058 2059 PetscFunctionBegin; 2060 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2061 /* make a collective version of 'rowp' */ 2062 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2063 if (pcomm == comm) { 2064 crowp = rowp; 2065 } else { 2066 PetscCall(ISGetSize(rowp, &nrows)); 2067 PetscCall(ISGetIndices(rowp, &rows)); 2068 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2069 PetscCall(ISRestoreIndices(rowp, &rows)); 2070 } 2071 PetscCall(ISSetPermutation(crowp)); 2072 /* make a local version of 'colp' */ 2073 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2074 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2075 if (size == 1) { 2076 lcolp = colp; 2077 } else { 2078 PetscCall(ISAllGather(colp, &lcolp)); 2079 } 2080 PetscCall(ISSetPermutation(lcolp)); 2081 /* now we just get the submatrix */ 2082 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2083 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B)); 2084 /* clean up */ 2085 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2086 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2087 PetscFunctionReturn(0); 2088 } 2089 2090 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2091 { 2092 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2093 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2094 2095 PetscFunctionBegin; 2096 if (nghosts) *nghosts = B->nbs; 2097 if (ghosts) *ghosts = baij->garray; 2098 PetscFunctionReturn(0); 2099 } 2100 2101 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2102 { 2103 Mat B; 2104 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2105 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2106 Mat_SeqAIJ *b; 2107 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2108 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2109 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2110 2111 PetscFunctionBegin; 2112 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2113 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2114 2115 /* ---------------------------------------------------------------- 2116 Tell every processor the number of nonzeros per row 2117 */ 2118 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2119 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2120 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2121 displs = recvcounts + size; 2122 for (i = 0; i < size; i++) { 2123 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2124 displs[i] = A->rmap->range[i] / bs; 2125 } 2126 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2127 /* --------------------------------------------------------------- 2128 Create the sequential matrix of the same type as the local block diagonal 2129 */ 2130 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2131 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2132 PetscCall(MatSetType(B, MATSEQAIJ)); 2133 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2134 b = (Mat_SeqAIJ *)B->data; 2135 2136 /*-------------------------------------------------------------------- 2137 Copy my part of matrix column indices over 2138 */ 2139 sendcount = ad->nz + bd->nz; 2140 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2141 a_jsendbuf = ad->j; 2142 b_jsendbuf = bd->j; 2143 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2144 cnt = 0; 2145 for (i = 0; i < n; i++) { 2146 /* put in lower diagonal portion */ 2147 m = bd->i[i + 1] - bd->i[i]; 2148 while (m > 0) { 2149 /* is it above diagonal (in bd (compressed) numbering) */ 2150 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2151 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2152 m--; 2153 } 2154 2155 /* put in diagonal portion */ 2156 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2157 2158 /* put in upper diagonal portion */ 2159 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2160 } 2161 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2162 2163 /*-------------------------------------------------------------------- 2164 Gather all column indices to all processors 2165 */ 2166 for (i = 0; i < size; i++) { 2167 recvcounts[i] = 0; 2168 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2169 } 2170 displs[0] = 0; 2171 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2172 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2173 /*-------------------------------------------------------------------- 2174 Assemble the matrix into useable form (note numerical values not yet set) 2175 */ 2176 /* set the b->ilen (length of each row) values */ 2177 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2178 /* set the b->i indices */ 2179 b->i[0] = 0; 2180 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2181 PetscCall(PetscFree(lens)); 2182 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2183 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2184 PetscCall(PetscFree(recvcounts)); 2185 2186 PetscCall(MatPropagateSymmetryOptions(A, B)); 2187 *newmat = B; 2188 PetscFunctionReturn(0); 2189 } 2190 2191 PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2192 { 2193 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2194 Vec bb1 = NULL; 2195 2196 PetscFunctionBegin; 2197 if (flag == SOR_APPLY_UPPER) { 2198 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2199 PetscFunctionReturn(0); 2200 } 2201 2202 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2203 2204 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2205 if (flag & SOR_ZERO_INITIAL_GUESS) { 2206 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2207 its--; 2208 } 2209 2210 while (its--) { 2211 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2212 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2213 2214 /* update rhs: bb1 = bb - B*x */ 2215 PetscCall(VecScale(mat->lvec, -1.0)); 2216 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2217 2218 /* local sweep */ 2219 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2220 } 2221 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2222 if (flag & SOR_ZERO_INITIAL_GUESS) { 2223 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2224 its--; 2225 } 2226 while (its--) { 2227 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2228 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2229 2230 /* update rhs: bb1 = bb - B*x */ 2231 PetscCall(VecScale(mat->lvec, -1.0)); 2232 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2233 2234 /* local sweep */ 2235 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2236 } 2237 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2238 if (flag & SOR_ZERO_INITIAL_GUESS) { 2239 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2240 its--; 2241 } 2242 while (its--) { 2243 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2244 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2245 2246 /* update rhs: bb1 = bb - B*x */ 2247 PetscCall(VecScale(mat->lvec, -1.0)); 2248 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2249 2250 /* local sweep */ 2251 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2252 } 2253 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2254 2255 PetscCall(VecDestroy(&bb1)); 2256 PetscFunctionReturn(0); 2257 } 2258 2259 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2260 { 2261 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2262 PetscInt m, N, i, *garray = aij->garray; 2263 PetscInt ib, jb, bs = A->rmap->bs; 2264 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2265 MatScalar *a_val = a_aij->a; 2266 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2267 MatScalar *b_val = b_aij->a; 2268 PetscReal *work; 2269 2270 PetscFunctionBegin; 2271 PetscCall(MatGetSize(A, &m, &N)); 2272 PetscCall(PetscCalloc1(N, &work)); 2273 if (type == NORM_2) { 2274 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2275 for (jb = 0; jb < bs; jb++) { 2276 for (ib = 0; ib < bs; ib++) { 2277 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2278 a_val++; 2279 } 2280 } 2281 } 2282 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2283 for (jb = 0; jb < bs; jb++) { 2284 for (ib = 0; ib < bs; ib++) { 2285 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2286 b_val++; 2287 } 2288 } 2289 } 2290 } else if (type == NORM_1) { 2291 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2292 for (jb = 0; jb < bs; jb++) { 2293 for (ib = 0; ib < bs; ib++) { 2294 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2295 a_val++; 2296 } 2297 } 2298 } 2299 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2300 for (jb = 0; jb < bs; jb++) { 2301 for (ib = 0; ib < bs; ib++) { 2302 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2303 b_val++; 2304 } 2305 } 2306 } 2307 } else if (type == NORM_INFINITY) { 2308 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2309 for (jb = 0; jb < bs; jb++) { 2310 for (ib = 0; ib < bs; ib++) { 2311 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2312 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2313 a_val++; 2314 } 2315 } 2316 } 2317 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2318 for (jb = 0; jb < bs; jb++) { 2319 for (ib = 0; ib < bs; ib++) { 2320 int col = garray[b_aij->j[i]] * bs + jb; 2321 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2322 b_val++; 2323 } 2324 } 2325 } 2326 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2327 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2328 for (jb = 0; jb < bs; jb++) { 2329 for (ib = 0; ib < bs; ib++) { 2330 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2331 a_val++; 2332 } 2333 } 2334 } 2335 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2336 for (jb = 0; jb < bs; jb++) { 2337 for (ib = 0; ib < bs; ib++) { 2338 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2339 b_val++; 2340 } 2341 } 2342 } 2343 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2344 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2345 for (jb = 0; jb < bs; jb++) { 2346 for (ib = 0; ib < bs; ib++) { 2347 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2348 a_val++; 2349 } 2350 } 2351 } 2352 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2353 for (jb = 0; jb < bs; jb++) { 2354 for (ib = 0; ib < bs; ib++) { 2355 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2356 b_val++; 2357 } 2358 } 2359 } 2360 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2361 if (type == NORM_INFINITY) { 2362 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2363 } else { 2364 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2365 } 2366 PetscCall(PetscFree(work)); 2367 if (type == NORM_2) { 2368 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2369 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2370 for (i = 0; i < N; i++) reductions[i] /= m; 2371 } 2372 PetscFunctionReturn(0); 2373 } 2374 2375 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2376 { 2377 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2378 2379 PetscFunctionBegin; 2380 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2381 A->factorerrortype = a->A->factorerrortype; 2382 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2383 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2384 PetscFunctionReturn(0); 2385 } 2386 2387 PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2388 { 2389 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2390 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2391 2392 PetscFunctionBegin; 2393 if (!Y->preallocated) { 2394 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2395 } else if (!aij->nz) { 2396 PetscInt nonew = aij->nonew; 2397 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2398 aij->nonew = nonew; 2399 } 2400 PetscCall(MatShift_Basic(Y, a)); 2401 PetscFunctionReturn(0); 2402 } 2403 2404 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2405 { 2406 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2407 2408 PetscFunctionBegin; 2409 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2410 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2411 if (d) { 2412 PetscInt rstart; 2413 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2414 *d += rstart / A->rmap->bs; 2415 } 2416 PetscFunctionReturn(0); 2417 } 2418 2419 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2420 { 2421 PetscFunctionBegin; 2422 *a = ((Mat_MPIBAIJ *)A->data)->A; 2423 PetscFunctionReturn(0); 2424 } 2425 2426 /* -------------------------------------------------------------------*/ 2427 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2428 MatGetRow_MPIBAIJ, 2429 MatRestoreRow_MPIBAIJ, 2430 MatMult_MPIBAIJ, 2431 /* 4*/ MatMultAdd_MPIBAIJ, 2432 MatMultTranspose_MPIBAIJ, 2433 MatMultTransposeAdd_MPIBAIJ, 2434 NULL, 2435 NULL, 2436 NULL, 2437 /*10*/ NULL, 2438 NULL, 2439 NULL, 2440 MatSOR_MPIBAIJ, 2441 MatTranspose_MPIBAIJ, 2442 /*15*/ MatGetInfo_MPIBAIJ, 2443 MatEqual_MPIBAIJ, 2444 MatGetDiagonal_MPIBAIJ, 2445 MatDiagonalScale_MPIBAIJ, 2446 MatNorm_MPIBAIJ, 2447 /*20*/ MatAssemblyBegin_MPIBAIJ, 2448 MatAssemblyEnd_MPIBAIJ, 2449 MatSetOption_MPIBAIJ, 2450 MatZeroEntries_MPIBAIJ, 2451 /*24*/ MatZeroRows_MPIBAIJ, 2452 NULL, 2453 NULL, 2454 NULL, 2455 NULL, 2456 /*29*/ MatSetUp_MPIBAIJ, 2457 NULL, 2458 NULL, 2459 MatGetDiagonalBlock_MPIBAIJ, 2460 NULL, 2461 /*34*/ MatDuplicate_MPIBAIJ, 2462 NULL, 2463 NULL, 2464 NULL, 2465 NULL, 2466 /*39*/ MatAXPY_MPIBAIJ, 2467 MatCreateSubMatrices_MPIBAIJ, 2468 MatIncreaseOverlap_MPIBAIJ, 2469 MatGetValues_MPIBAIJ, 2470 MatCopy_MPIBAIJ, 2471 /*44*/ NULL, 2472 MatScale_MPIBAIJ, 2473 MatShift_MPIBAIJ, 2474 NULL, 2475 MatZeroRowsColumns_MPIBAIJ, 2476 /*49*/ NULL, 2477 NULL, 2478 NULL, 2479 NULL, 2480 NULL, 2481 /*54*/ MatFDColoringCreate_MPIXAIJ, 2482 NULL, 2483 MatSetUnfactored_MPIBAIJ, 2484 MatPermute_MPIBAIJ, 2485 MatSetValuesBlocked_MPIBAIJ, 2486 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2487 MatDestroy_MPIBAIJ, 2488 MatView_MPIBAIJ, 2489 NULL, 2490 NULL, 2491 /*64*/ NULL, 2492 NULL, 2493 NULL, 2494 NULL, 2495 NULL, 2496 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2497 NULL, 2498 NULL, 2499 NULL, 2500 NULL, 2501 /*74*/ NULL, 2502 MatFDColoringApply_BAIJ, 2503 NULL, 2504 NULL, 2505 NULL, 2506 /*79*/ NULL, 2507 NULL, 2508 NULL, 2509 NULL, 2510 MatLoad_MPIBAIJ, 2511 /*84*/ NULL, 2512 NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 /*89*/ NULL, 2517 NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 /*94*/ NULL, 2522 NULL, 2523 NULL, 2524 NULL, 2525 NULL, 2526 /*99*/ NULL, 2527 NULL, 2528 NULL, 2529 MatConjugate_MPIBAIJ, 2530 NULL, 2531 /*104*/ NULL, 2532 MatRealPart_MPIBAIJ, 2533 MatImaginaryPart_MPIBAIJ, 2534 NULL, 2535 NULL, 2536 /*109*/ NULL, 2537 NULL, 2538 NULL, 2539 NULL, 2540 MatMissingDiagonal_MPIBAIJ, 2541 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2542 NULL, 2543 MatGetGhosts_MPIBAIJ, 2544 NULL, 2545 NULL, 2546 /*119*/ NULL, 2547 NULL, 2548 NULL, 2549 NULL, 2550 MatGetMultiProcBlock_MPIBAIJ, 2551 /*124*/ NULL, 2552 MatGetColumnReductions_MPIBAIJ, 2553 MatInvertBlockDiagonal_MPIBAIJ, 2554 NULL, 2555 NULL, 2556 /*129*/ NULL, 2557 NULL, 2558 NULL, 2559 NULL, 2560 NULL, 2561 /*134*/ NULL, 2562 NULL, 2563 NULL, 2564 NULL, 2565 NULL, 2566 /*139*/ MatSetBlockSizes_Default, 2567 NULL, 2568 NULL, 2569 MatFDColoringSetUp_MPIXAIJ, 2570 NULL, 2571 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2572 NULL, 2573 NULL, 2574 NULL, 2575 NULL, 2576 NULL, 2577 /*150*/ NULL}; 2578 2579 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2580 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2581 2582 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2583 { 2584 PetscInt m, rstart, cstart, cend; 2585 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2586 const PetscInt *JJ = NULL; 2587 PetscScalar *values = NULL; 2588 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2589 PetscBool nooffprocentries; 2590 2591 PetscFunctionBegin; 2592 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2593 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2594 PetscCall(PetscLayoutSetUp(B->rmap)); 2595 PetscCall(PetscLayoutSetUp(B->cmap)); 2596 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2597 m = B->rmap->n / bs; 2598 rstart = B->rmap->rstart / bs; 2599 cstart = B->cmap->rstart / bs; 2600 cend = B->cmap->rend / bs; 2601 2602 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2603 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2604 for (i = 0; i < m; i++) { 2605 nz = ii[i + 1] - ii[i]; 2606 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2607 nz_max = PetscMax(nz_max, nz); 2608 dlen = 0; 2609 olen = 0; 2610 JJ = jj + ii[i]; 2611 for (j = 0; j < nz; j++) { 2612 if (*JJ < cstart || *JJ >= cend) olen++; 2613 else dlen++; 2614 JJ++; 2615 } 2616 d_nnz[i] = dlen; 2617 o_nnz[i] = olen; 2618 } 2619 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2620 PetscCall(PetscFree2(d_nnz, o_nnz)); 2621 2622 values = (PetscScalar *)V; 2623 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2624 for (i = 0; i < m; i++) { 2625 PetscInt row = i + rstart; 2626 PetscInt ncols = ii[i + 1] - ii[i]; 2627 const PetscInt *icols = jj + ii[i]; 2628 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2629 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2630 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2631 } else { /* block ordering does not match so we can only insert one block at a time. */ 2632 PetscInt j; 2633 for (j = 0; j < ncols; j++) { 2634 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2635 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2636 } 2637 } 2638 } 2639 2640 if (!V) PetscCall(PetscFree(values)); 2641 nooffprocentries = B->nooffprocentries; 2642 B->nooffprocentries = PETSC_TRUE; 2643 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2644 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2645 B->nooffprocentries = nooffprocentries; 2646 2647 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2648 PetscFunctionReturn(0); 2649 } 2650 2651 /*@C 2652 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2653 2654 Collective 2655 2656 Input Parameters: 2657 + B - the matrix 2658 . bs - the block size 2659 . i - the indices into j for the start of each local row (starts with zero) 2660 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2661 - v - optional values in the matrix 2662 2663 Level: advanced 2664 2665 Notes: 2666 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2667 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2668 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2669 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2670 block column and the second index is over columns within a block. 2671 2672 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2673 2674 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ` 2675 @*/ 2676 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2677 { 2678 PetscFunctionBegin; 2679 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2680 PetscValidType(B, 1); 2681 PetscValidLogicalCollectiveInt(B, bs, 2); 2682 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2683 PetscFunctionReturn(0); 2684 } 2685 2686 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2687 { 2688 Mat_MPIBAIJ *b; 2689 PetscInt i; 2690 PetscMPIInt size; 2691 2692 PetscFunctionBegin; 2693 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2694 PetscCall(PetscLayoutSetUp(B->rmap)); 2695 PetscCall(PetscLayoutSetUp(B->cmap)); 2696 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2697 2698 if (d_nnz) { 2699 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2700 } 2701 if (o_nnz) { 2702 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2703 } 2704 2705 b = (Mat_MPIBAIJ *)B->data; 2706 b->bs2 = bs * bs; 2707 b->mbs = B->rmap->n / bs; 2708 b->nbs = B->cmap->n / bs; 2709 b->Mbs = B->rmap->N / bs; 2710 b->Nbs = B->cmap->N / bs; 2711 2712 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2713 b->rstartbs = B->rmap->rstart / bs; 2714 b->rendbs = B->rmap->rend / bs; 2715 b->cstartbs = B->cmap->rstart / bs; 2716 b->cendbs = B->cmap->rend / bs; 2717 2718 #if defined(PETSC_USE_CTABLE) 2719 PetscCall(PetscTableDestroy(&b->colmap)); 2720 #else 2721 PetscCall(PetscFree(b->colmap)); 2722 #endif 2723 PetscCall(PetscFree(b->garray)); 2724 PetscCall(VecDestroy(&b->lvec)); 2725 PetscCall(VecScatterDestroy(&b->Mvctx)); 2726 2727 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2728 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2729 PetscCall(MatDestroy(&b->B)); 2730 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2731 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2732 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2733 2734 if (!B->preallocated) { 2735 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2736 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2737 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2738 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2739 } 2740 2741 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2742 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2743 B->preallocated = PETSC_TRUE; 2744 B->was_assembled = PETSC_FALSE; 2745 B->assembled = PETSC_FALSE; 2746 PetscFunctionReturn(0); 2747 } 2748 2749 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2750 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2751 2752 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2753 { 2754 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2755 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2756 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2757 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2758 2759 PetscFunctionBegin; 2760 PetscCall(PetscMalloc1(M + 1, &ii)); 2761 ii[0] = 0; 2762 for (i = 0; i < M; i++) { 2763 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2764 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2765 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2766 /* remove one from count of matrix has diagonal */ 2767 for (j = id[i]; j < id[i + 1]; j++) { 2768 if (jd[j] == i) { 2769 ii[i + 1]--; 2770 break; 2771 } 2772 } 2773 } 2774 PetscCall(PetscMalloc1(ii[M], &jj)); 2775 cnt = 0; 2776 for (i = 0; i < M; i++) { 2777 for (j = io[i]; j < io[i + 1]; j++) { 2778 if (garray[jo[j]] > rstart) break; 2779 jj[cnt++] = garray[jo[j]]; 2780 } 2781 for (k = id[i]; k < id[i + 1]; k++) { 2782 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2783 } 2784 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2785 } 2786 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2787 PetscFunctionReturn(0); 2788 } 2789 2790 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2791 2792 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2793 2794 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2795 { 2796 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2797 Mat_MPIAIJ *b; 2798 Mat B; 2799 2800 PetscFunctionBegin; 2801 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2802 2803 if (reuse == MAT_REUSE_MATRIX) { 2804 B = *newmat; 2805 } else { 2806 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2807 PetscCall(MatSetType(B, MATMPIAIJ)); 2808 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2809 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2810 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2811 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2812 } 2813 b = (Mat_MPIAIJ *)B->data; 2814 2815 if (reuse == MAT_REUSE_MATRIX) { 2816 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2817 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2818 } else { 2819 PetscBool3 sym = A->symmetric, hermitian = A->hermitian, structurally_symmetric = A->structurally_symmetric, spd = A->spd; 2820 PetscCall(MatDestroy(&b->A)); 2821 PetscCall(MatDestroy(&b->B)); 2822 PetscCall(MatDisAssemble_MPIBAIJ(A)); 2823 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2824 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2825 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 2826 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 2827 A->symmetric = sym; 2828 A->hermitian = hermitian; 2829 A->structurally_symmetric = structurally_symmetric; 2830 A->spd = spd; 2831 } 2832 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2833 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2834 2835 if (reuse == MAT_INPLACE_MATRIX) { 2836 PetscCall(MatHeaderReplace(A, &B)); 2837 } else { 2838 *newmat = B; 2839 } 2840 PetscFunctionReturn(0); 2841 } 2842 2843 /*MC 2844 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2845 2846 Options Database Keys: 2847 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2848 . -mat_block_size <bs> - set the blocksize used to store the matrix 2849 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2850 - -mat_use_hash_table <fact> - set hash table factor 2851 2852 Level: beginner 2853 2854 Note: 2855 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 2856 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2857 2858 .seealso: MATBAIJ`, MATSEQBAIJ`, `MatCreateBAIJ` 2859 M*/ 2860 2861 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2862 2863 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2864 { 2865 Mat_MPIBAIJ *b; 2866 PetscBool flg = PETSC_FALSE; 2867 2868 PetscFunctionBegin; 2869 PetscCall(PetscNew(&b)); 2870 B->data = (void *)b; 2871 2872 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 2873 B->assembled = PETSC_FALSE; 2874 2875 B->insertmode = NOT_SET_VALUES; 2876 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2877 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2878 2879 /* build local table of row and column ownerships */ 2880 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2881 2882 /* build cache for off array entries formed */ 2883 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2884 2885 b->donotstash = PETSC_FALSE; 2886 b->colmap = NULL; 2887 b->garray = NULL; 2888 b->roworiented = PETSC_TRUE; 2889 2890 /* stuff used in block assembly */ 2891 b->barray = NULL; 2892 2893 /* stuff used for matrix vector multiply */ 2894 b->lvec = NULL; 2895 b->Mvctx = NULL; 2896 2897 /* stuff for MatGetRow() */ 2898 b->rowindices = NULL; 2899 b->rowvalues = NULL; 2900 b->getrowactive = PETSC_FALSE; 2901 2902 /* hash table stuff */ 2903 b->ht = NULL; 2904 b->hd = NULL; 2905 b->ht_size = 0; 2906 b->ht_flag = PETSC_FALSE; 2907 b->ht_fact = 0; 2908 b->ht_total_ct = 0; 2909 b->ht_insert_ct = 0; 2910 2911 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2912 b->ijonly = PETSC_FALSE; 2913 2914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2915 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2917 #if defined(PETSC_HAVE_HYPRE) 2918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2919 #endif 2920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2927 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2928 2929 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2930 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2931 if (flg) { 2932 PetscReal fact = 1.39; 2933 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2934 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2935 if (fact <= 1.0) fact = 1.39; 2936 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2937 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2938 } 2939 PetscOptionsEnd(); 2940 PetscFunctionReturn(0); 2941 } 2942 2943 /*MC 2944 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2945 2946 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2947 and `MATMPIBAIJ` otherwise. 2948 2949 Options Database Keys: 2950 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2951 2952 Level: beginner 2953 2954 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2955 M*/ 2956 2957 /*@C 2958 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2959 (block compressed row). For good matrix assembly performance 2960 the user should preallocate the matrix storage by setting the parameters 2961 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2962 performance can be increased by more than a factor of 50. 2963 2964 Collective 2965 2966 Input Parameters: 2967 + B - the matrix 2968 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2969 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2970 . d_nz - number of block nonzeros per block row in diagonal portion of local 2971 submatrix (same for all local rows) 2972 . d_nnz - array containing the number of block nonzeros in the various block rows 2973 of the in diagonal portion of the local (possibly different for each block 2974 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 2975 set it even if it is zero. 2976 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2977 submatrix (same for all local rows). 2978 - o_nnz - array containing the number of nonzeros in the various block rows of the 2979 off-diagonal portion of the local submatrix (possibly different for 2980 each block row) or NULL. 2981 2982 If the *_nnz parameter is given then the *_nz parameter is ignored 2983 2984 Options Database Keys: 2985 + -mat_block_size - size of the blocks to use 2986 - -mat_use_hash_table <fact> - set hash table factor 2987 2988 Notes: 2989 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2990 than it must be used on all processors that share the object for that argument. 2991 2992 Storage Information: 2993 For a square global matrix we define each processor's diagonal portion 2994 to be its local rows and the corresponding columns (a square submatrix); 2995 each processor's off-diagonal portion encompasses the remainder of the 2996 local matrix (a rectangular submatrix). 2997 2998 The user can specify preallocated storage for the diagonal part of 2999 the local submatrix with either d_nz or d_nnz (not both). Set 3000 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3001 memory allocation. Likewise, specify preallocated storage for the 3002 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3003 3004 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3005 the figure below we depict these three local rows and all columns (0-11). 3006 3007 .vb 3008 0 1 2 3 4 5 6 7 8 9 10 11 3009 -------------------------- 3010 row 3 |o o o d d d o o o o o o 3011 row 4 |o o o d d d o o o o o o 3012 row 5 |o o o d d d o o o o o o 3013 -------------------------- 3014 .ve 3015 3016 Thus, any entries in the d locations are stored in the d (diagonal) 3017 submatrix, and any entries in the o locations are stored in the 3018 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3019 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3020 3021 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3022 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3023 In general, for PDE problems in which most nonzeros are near the diagonal, 3024 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3025 or you will get TERRIBLE performance; see the users' manual chapter on 3026 matrices. 3027 3028 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3029 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3030 You can also run with the option -info and look for messages with the string 3031 malloc in them to see if additional memory allocation was needed. 3032 3033 Level: intermediate 3034 3035 .seealso: `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3036 @*/ 3037 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3038 { 3039 PetscFunctionBegin; 3040 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3041 PetscValidType(B, 1); 3042 PetscValidLogicalCollectiveInt(B, bs, 2); 3043 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3044 PetscFunctionReturn(0); 3045 } 3046 3047 /*@C 3048 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3049 (block compressed row). For good matrix assembly performance 3050 the user should preallocate the matrix storage by setting the parameters 3051 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3052 performance can be increased by more than a factor of 50. 3053 3054 Collective 3055 3056 Input Parameters: 3057 + comm - MPI communicator 3058 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3059 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3060 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3061 This value should be the same as the local size used in creating the 3062 y vector for the matrix-vector product y = Ax. 3063 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3064 This value should be the same as the local size used in creating the 3065 x vector for the matrix-vector product y = Ax. 3066 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3067 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3068 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3069 submatrix (same for all local rows) 3070 . d_nnz - array containing the number of nonzero blocks in the various block rows 3071 of the in diagonal portion of the local (possibly different for each block 3072 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3073 and set it even if it is zero. 3074 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3075 submatrix (same for all local rows). 3076 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3077 off-diagonal portion of the local submatrix (possibly different for 3078 each block row) or NULL. 3079 3080 Output Parameter: 3081 . A - the matrix 3082 3083 Options Database Keys: 3084 + -mat_block_size - size of the blocks to use 3085 - -mat_use_hash_table <fact> - set hash table factor 3086 3087 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3088 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3089 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3090 3091 Notes: 3092 If the *_nnz parameter is given then the *_nz parameter is ignored 3093 3094 A nonzero block is any block that as 1 or more nonzeros in it 3095 3096 The user MUST specify either the local or global matrix dimensions 3097 (possibly both). 3098 3099 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3100 than it must be used on all processors that share the object for that argument. 3101 3102 Storage Information: 3103 For a square global matrix we define each processor's diagonal portion 3104 to be its local rows and the corresponding columns (a square submatrix); 3105 each processor's off-diagonal portion encompasses the remainder of the 3106 local matrix (a rectangular submatrix). 3107 3108 The user can specify preallocated storage for the diagonal part of 3109 the local submatrix with either d_nz or d_nnz (not both). Set 3110 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3111 memory allocation. Likewise, specify preallocated storage for the 3112 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3113 3114 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3115 the figure below we depict these three local rows and all columns (0-11). 3116 3117 .vb 3118 0 1 2 3 4 5 6 7 8 9 10 11 3119 -------------------------- 3120 row 3 |o o o d d d o o o o o o 3121 row 4 |o o o d d d o o o o o o 3122 row 5 |o o o d d d o o o o o o 3123 -------------------------- 3124 .ve 3125 3126 Thus, any entries in the d locations are stored in the d (diagonal) 3127 submatrix, and any entries in the o locations are stored in the 3128 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3129 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3130 3131 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3132 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3133 In general, for PDE problems in which most nonzeros are near the diagonal, 3134 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3135 or you will get TERRIBLE performance; see the users' manual chapter on 3136 matrices. 3137 3138 Level: intermediate 3139 3140 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3141 @*/ 3142 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3143 { 3144 PetscMPIInt size; 3145 3146 PetscFunctionBegin; 3147 PetscCall(MatCreate(comm, A)); 3148 PetscCall(MatSetSizes(*A, m, n, M, N)); 3149 PetscCallMPI(MPI_Comm_size(comm, &size)); 3150 if (size > 1) { 3151 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3152 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3153 } else { 3154 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3155 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3156 } 3157 PetscFunctionReturn(0); 3158 } 3159 3160 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3161 { 3162 Mat mat; 3163 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3164 PetscInt len = 0; 3165 3166 PetscFunctionBegin; 3167 *newmat = NULL; 3168 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3169 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3170 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3171 3172 mat->factortype = matin->factortype; 3173 mat->preallocated = PETSC_TRUE; 3174 mat->assembled = PETSC_TRUE; 3175 mat->insertmode = NOT_SET_VALUES; 3176 3177 a = (Mat_MPIBAIJ *)mat->data; 3178 mat->rmap->bs = matin->rmap->bs; 3179 a->bs2 = oldmat->bs2; 3180 a->mbs = oldmat->mbs; 3181 a->nbs = oldmat->nbs; 3182 a->Mbs = oldmat->Mbs; 3183 a->Nbs = oldmat->Nbs; 3184 3185 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3186 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3187 3188 a->size = oldmat->size; 3189 a->rank = oldmat->rank; 3190 a->donotstash = oldmat->donotstash; 3191 a->roworiented = oldmat->roworiented; 3192 a->rowindices = NULL; 3193 a->rowvalues = NULL; 3194 a->getrowactive = PETSC_FALSE; 3195 a->barray = NULL; 3196 a->rstartbs = oldmat->rstartbs; 3197 a->rendbs = oldmat->rendbs; 3198 a->cstartbs = oldmat->cstartbs; 3199 a->cendbs = oldmat->cendbs; 3200 3201 /* hash table stuff */ 3202 a->ht = NULL; 3203 a->hd = NULL; 3204 a->ht_size = 0; 3205 a->ht_flag = oldmat->ht_flag; 3206 a->ht_fact = oldmat->ht_fact; 3207 a->ht_total_ct = 0; 3208 a->ht_insert_ct = 0; 3209 3210 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3211 if (oldmat->colmap) { 3212 #if defined(PETSC_USE_CTABLE) 3213 PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap)); 3214 #else 3215 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3216 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3217 #endif 3218 } else a->colmap = NULL; 3219 3220 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 3221 PetscCall(PetscMalloc1(len, &a->garray)); 3222 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3223 } else a->garray = NULL; 3224 3225 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3226 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3227 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3228 3229 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3230 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3231 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3232 *newmat = mat; 3233 PetscFunctionReturn(0); 3234 } 3235 3236 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3237 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3238 { 3239 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3240 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3241 PetscScalar *matvals; 3242 3243 PetscFunctionBegin; 3244 PetscCall(PetscViewerSetUp(viewer)); 3245 3246 /* read in matrix header */ 3247 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3248 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3249 M = header[1]; 3250 N = header[2]; 3251 nz = header[3]; 3252 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3253 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3254 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3255 3256 /* set block sizes from the viewer's .info file */ 3257 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3258 /* set local sizes if not set already */ 3259 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3260 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3261 /* set global sizes if not set already */ 3262 if (mat->rmap->N < 0) mat->rmap->N = M; 3263 if (mat->cmap->N < 0) mat->cmap->N = N; 3264 PetscCall(PetscLayoutSetUp(mat->rmap)); 3265 PetscCall(PetscLayoutSetUp(mat->cmap)); 3266 3267 /* check if the matrix sizes are correct */ 3268 PetscCall(MatGetSize(mat, &rows, &cols)); 3269 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3270 PetscCall(MatGetBlockSize(mat, &bs)); 3271 PetscCall(MatGetLocalSize(mat, &m, &n)); 3272 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3273 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3274 mbs = m / bs; 3275 nbs = n / bs; 3276 3277 /* read in row lengths and build row indices */ 3278 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3279 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3280 rowidxs[0] = 0; 3281 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3282 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3283 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3284 3285 /* read in column indices and matrix values */ 3286 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3287 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3288 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3289 3290 { /* preallocate matrix storage */ 3291 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3292 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3293 PetscBool sbaij, done; 3294 PetscInt *d_nnz, *o_nnz; 3295 3296 PetscCall(PetscBTCreate(nbs, &bt)); 3297 PetscCall(PetscHSetICreate(&ht)); 3298 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3299 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3300 for (i = 0; i < mbs; i++) { 3301 PetscCall(PetscBTMemzero(nbs, bt)); 3302 PetscCall(PetscHSetIClear(ht)); 3303 for (k = 0; k < bs; k++) { 3304 PetscInt row = bs * i + k; 3305 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3306 PetscInt col = colidxs[j]; 3307 if (!sbaij || col >= row) { 3308 if (col >= cs && col < ce) { 3309 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3310 } else { 3311 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3312 if (done) o_nnz[i]++; 3313 } 3314 } 3315 } 3316 } 3317 } 3318 PetscCall(PetscBTDestroy(&bt)); 3319 PetscCall(PetscHSetIDestroy(&ht)); 3320 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3321 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3322 PetscCall(PetscFree2(d_nnz, o_nnz)); 3323 } 3324 3325 /* store matrix values */ 3326 for (i = 0; i < m; i++) { 3327 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3328 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3329 } 3330 3331 PetscCall(PetscFree(rowidxs)); 3332 PetscCall(PetscFree2(colidxs, matvals)); 3333 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3334 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3335 PetscFunctionReturn(0); 3336 } 3337 3338 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3339 { 3340 PetscBool isbinary; 3341 3342 PetscFunctionBegin; 3343 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3344 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3345 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3346 PetscFunctionReturn(0); 3347 } 3348 3349 /*@ 3350 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3351 3352 Input Parameters: 3353 + mat - the matrix 3354 - fact - factor 3355 3356 Options Database Key: 3357 . -mat_use_hash_table <fact> - provide the factor 3358 3359 Level: advanced 3360 3361 .seealso: `MATMPIBAIJ`, `MatSetOption()` 3362 @*/ 3363 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3364 { 3365 PetscFunctionBegin; 3366 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3367 PetscFunctionReturn(0); 3368 } 3369 3370 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3371 { 3372 Mat_MPIBAIJ *baij; 3373 3374 PetscFunctionBegin; 3375 baij = (Mat_MPIBAIJ *)mat->data; 3376 baij->ht_fact = fact; 3377 PetscFunctionReturn(0); 3378 } 3379 3380 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3381 { 3382 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3383 PetscBool flg; 3384 3385 PetscFunctionBegin; 3386 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3387 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3388 if (Ad) *Ad = a->A; 3389 if (Ao) *Ao = a->B; 3390 if (colmap) *colmap = a->garray; 3391 PetscFunctionReturn(0); 3392 } 3393 3394 /* 3395 Special version for direct calls from Fortran (to eliminate two function call overheads 3396 */ 3397 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3398 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3399 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3400 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3401 #endif 3402 3403 /*@C 3404 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3405 3406 Collective 3407 3408 Input Parameters: 3409 + mat - the matrix 3410 . min - number of input rows 3411 . im - input rows 3412 . nin - number of input columns 3413 . in - input columns 3414 . v - numerical values input 3415 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3416 3417 Developer Note: 3418 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3419 3420 Level: advanced 3421 3422 .seealso: `MatSetValuesBlocked()` 3423 @*/ 3424 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3425 { 3426 /* convert input arguments to C version */ 3427 Mat mat = *matin; 3428 PetscInt m = *min, n = *nin; 3429 InsertMode addv = *addvin; 3430 3431 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3432 const MatScalar *value; 3433 MatScalar *barray = baij->barray; 3434 PetscBool roworiented = baij->roworiented; 3435 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3436 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3437 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3438 3439 PetscFunctionBegin; 3440 /* tasks normally handled by MatSetValuesBlocked() */ 3441 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3442 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3443 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3444 if (mat->assembled) { 3445 mat->was_assembled = PETSC_TRUE; 3446 mat->assembled = PETSC_FALSE; 3447 } 3448 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3449 3450 if (!barray) { 3451 PetscCall(PetscMalloc1(bs2, &barray)); 3452 baij->barray = barray; 3453 } 3454 3455 if (roworiented) stepval = (n - 1) * bs; 3456 else stepval = (m - 1) * bs; 3457 3458 for (i = 0; i < m; i++) { 3459 if (im[i] < 0) continue; 3460 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3461 if (im[i] >= rstart && im[i] < rend) { 3462 row = im[i] - rstart; 3463 for (j = 0; j < n; j++) { 3464 /* If NumCol = 1 then a copy is not required */ 3465 if ((roworiented) && (n == 1)) { 3466 barray = (MatScalar *)v + i * bs2; 3467 } else if ((!roworiented) && (m == 1)) { 3468 barray = (MatScalar *)v + j * bs2; 3469 } else { /* Here a copy is required */ 3470 if (roworiented) { 3471 value = v + i * (stepval + bs) * bs + j * bs; 3472 } else { 3473 value = v + j * (stepval + bs) * bs + i * bs; 3474 } 3475 for (ii = 0; ii < bs; ii++, value += stepval) { 3476 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3477 } 3478 barray -= bs2; 3479 } 3480 3481 if (in[j] >= cstart && in[j] < cend) { 3482 col = in[j] - cstart; 3483 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3484 } else if (in[j] < 0) { 3485 continue; 3486 } else { 3487 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3488 if (mat->was_assembled) { 3489 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3490 3491 #if defined(PETSC_USE_DEBUG) 3492 #if defined(PETSC_USE_CTABLE) 3493 { 3494 PetscInt data; 3495 PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data)); 3496 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3497 } 3498 #else 3499 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3500 #endif 3501 #endif 3502 #if defined(PETSC_USE_CTABLE) 3503 PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col)); 3504 col = (col - 1) / bs; 3505 #else 3506 col = (baij->colmap[in[j]] - 1) / bs; 3507 #endif 3508 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 3509 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3510 col = in[j]; 3511 } 3512 } else col = in[j]; 3513 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3514 } 3515 } 3516 } else { 3517 if (!baij->donotstash) { 3518 if (roworiented) { 3519 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3520 } else { 3521 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3522 } 3523 } 3524 } 3525 } 3526 3527 /* task normally handled by MatSetValuesBlocked() */ 3528 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3529 PetscFunctionReturn(0); 3530 } 3531 3532 /*@ 3533 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block 3534 CSR format the local rows. 3535 3536 Collective 3537 3538 Input Parameters: 3539 + comm - MPI communicator 3540 . bs - the block size, only a block size of 1 is supported 3541 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3542 . n - This value should be the same as the local size used in creating the 3543 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 3544 calculated if N is given) For square matrices n is almost always m. 3545 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3546 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3547 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3548 . j - column indices 3549 - a - matrix values 3550 3551 Output Parameter: 3552 . mat - the matrix 3553 3554 Level: intermediate 3555 3556 Notes: 3557 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3558 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3559 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3560 3561 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3562 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3563 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3564 with column-major ordering within blocks. 3565 3566 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3567 3568 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3569 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3570 @*/ 3571 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3572 { 3573 PetscFunctionBegin; 3574 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3575 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3576 PetscCall(MatCreate(comm, mat)); 3577 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3578 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3579 PetscCall(MatSetBlockSize(*mat, bs)); 3580 PetscCall(MatSetUp(*mat)); 3581 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3582 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3583 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3584 PetscFunctionReturn(0); 3585 } 3586 3587 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3588 { 3589 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3590 PetscInt *indx; 3591 PetscScalar *values; 3592 3593 PetscFunctionBegin; 3594 PetscCall(MatGetSize(inmat, &m, &N)); 3595 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3596 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3597 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3598 PetscInt *bindx, rmax = a->rmax, j; 3599 PetscMPIInt rank, size; 3600 3601 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3602 mbs = m / bs; 3603 Nbs = N / cbs; 3604 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3605 nbs = n / cbs; 3606 3607 PetscCall(PetscMalloc1(rmax, &bindx)); 3608 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3609 3610 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3611 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3612 if (rank == size - 1) { 3613 /* Check sum(nbs) = Nbs */ 3614 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3615 } 3616 3617 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3618 for (i = 0; i < mbs; i++) { 3619 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3620 nnz = nnz / bs; 3621 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3622 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3623 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3624 } 3625 PetscCall(PetscFree(bindx)); 3626 3627 PetscCall(MatCreate(comm, outmat)); 3628 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3629 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3630 PetscCall(MatSetType(*outmat, MATBAIJ)); 3631 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3632 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3633 MatPreallocateEnd(dnz, onz); 3634 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3635 } 3636 3637 /* numeric phase */ 3638 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3639 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3640 3641 for (i = 0; i < m; i++) { 3642 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3643 Ii = i + rstart; 3644 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3645 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3646 } 3647 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3648 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3649 PetscFunctionReturn(0); 3650 } 3651