1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/sbaij/seq/sbaij.h>
3 #include <petscbt.h>
4 #include <../src/mat/utils/freespace.h>
5
MatFactorGetSolverType_petsc(Mat A,MatSolverType * type)6 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
7 {
8 PetscFunctionBegin;
9 *type = MATSOLVERPETSC;
10 PetscFunctionReturn(PETSC_SUCCESS);
11 }
12
MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat * B)13 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
14 {
15 PetscInt n = A->rmap->n;
16
17 PetscFunctionBegin;
18 if (PetscDefined(USE_COMPLEX) && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
19 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
20 *B = NULL;
21 PetscFunctionReturn(PETSC_SUCCESS);
22 }
23
24 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
25 PetscCall(MatSetSizes(*B, n, n, n, n));
26 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
27 PetscCall(MatSetType(*B, MATSEQAIJ));
28
29 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
30 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
31
32 PetscCall(MatSetBlockSizesFromMats(*B, A, A));
33 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
34 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
35 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
36 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
37 PetscCall(MatSetType(*B, MATSEQSBAIJ));
38 PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
39
40 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
41 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
42 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
43 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
44 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
45 (*B)->factortype = ftype;
46
47 PetscCall(PetscFree((*B)->solvertype));
48 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
49 (*B)->canuseordering = PETSC_TRUE;
50 PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
51 PetscFunctionReturn(PETSC_SUCCESS);
52 }
53
MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo * info)54 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
55 {
56 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
57 IS isicol;
58 const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
59 PetscInt i, n = A->rmap->n;
60 PetscInt *bi, *bj;
61 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
62 PetscReal f;
63 PetscInt nlnk, *lnk, k, **bi_ptr;
64 PetscFreeSpaceList free_space = NULL, current_space = NULL;
65 PetscBT lnkbt;
66 PetscBool diagDense;
67
68 PetscFunctionBegin;
69 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
70 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
71 PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
72
73 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
74 PetscCall(ISGetIndices(isrow, &r));
75 PetscCall(ISGetIndices(isicol, &ic));
76
77 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
78 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
79 PetscCall(PetscMalloc1(n + 1, &bdiag));
80 bi[0] = bdiag[0] = 0;
81
82 /* linked list for storing column indices of the active row */
83 nlnk = n + 1;
84 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
85
86 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
87
88 /* initial FreeSpace size is f*(ai[n]+1) */
89 f = info->fill;
90 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
91 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
92 current_space = free_space;
93
94 for (i = 0; i < n; i++) {
95 /* copy previous fill into linked list */
96 nzi = 0;
97 nnz = ai[r[i] + 1] - ai[r[i]];
98 ajtmp = aj + ai[r[i]];
99 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
100 nzi += nlnk;
101
102 /* add pivot rows into linked list */
103 row = lnk[n];
104 while (row < i) {
105 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
106 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
107 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
108 nzi += nlnk;
109 row = lnk[row];
110 }
111 bi[i + 1] = bi[i] + nzi;
112 im[i] = nzi;
113
114 /* mark bdiag */
115 nzbd = 0;
116 nnz = nzi;
117 k = lnk[n];
118 while (nnz-- && k < i) {
119 nzbd++;
120 k = lnk[k];
121 }
122 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
123
124 /* if free space is not available, make more free space */
125 if (current_space->local_remaining < nzi) {
126 /* estimated additional space needed */
127 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
128 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
129 reallocs++;
130 }
131
132 /* copy data into free space, then initialize lnk */
133 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
134
135 bi_ptr[i] = current_space->array;
136 current_space->array += nzi;
137 current_space->local_used += nzi;
138 current_space->local_remaining -= nzi;
139 }
140
141 PetscCall(ISRestoreIndices(isrow, &r));
142 PetscCall(ISRestoreIndices(isicol, &ic));
143
144 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
145 PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
146 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
147 PetscCall(PetscLLDestroy(lnk, lnkbt));
148 PetscCall(PetscFree2(bi_ptr, im));
149
150 /* put together the new matrix */
151 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
152 b = (Mat_SeqAIJ *)B->data;
153 b->free_ij = PETSC_TRUE;
154 PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
155 b->free_a = PETSC_TRUE;
156 b->j = bj;
157 b->i = bi;
158 b->diag = bdiag;
159 b->ilen = NULL;
160 b->imax = NULL;
161 b->row = isrow;
162 b->col = iscol;
163 PetscCall(PetscObjectReference((PetscObject)isrow));
164 PetscCall(PetscObjectReference((PetscObject)iscol));
165 b->icol = isicol;
166 PetscCall(PetscMalloc1(n, &b->solve_work));
167
168 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
169 b->maxnz = b->nz = bdiag[0] + 1;
170
171 B->factortype = MAT_FACTOR_LU;
172 B->info.factor_mallocs = reallocs;
173 B->info.fill_ratio_given = f;
174
175 if (ai[n]) {
176 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
177 } else {
178 B->info.fill_ratio_needed = 0.0;
179 }
180 #if defined(PETSC_USE_INFO)
181 if (ai[n] != 0) {
182 PetscReal af = B->info.fill_ratio_needed;
183 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
184 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
185 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
186 PetscCall(PetscInfo(A, "for best performance.\n"));
187 } else {
188 PetscCall(PetscInfo(A, "Empty matrix\n"));
189 }
190 #endif
191 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
192 if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
193 PetscCall(MatSeqAIJCheckInode_FactorLU(B));
194 PetscFunctionReturn(PETSC_SUCCESS);
195 }
196
197 /*
198 Trouble in factorization, should we dump the original matrix?
199 */
MatFactorDumpMatrix(Mat A)200 PetscErrorCode MatFactorDumpMatrix(Mat A)
201 {
202 PetscBool flg = PETSC_FALSE;
203
204 PetscFunctionBegin;
205 PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
206 if (flg) {
207 PetscViewer viewer;
208 char filename[PETSC_MAX_PATH_LEN];
209
210 PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
211 PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
212 PetscCall(MatView(A, viewer));
213 PetscCall(PetscViewerDestroy(&viewer));
214 }
215 PetscFunctionReturn(PETSC_SUCCESS);
216 }
217
MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo * info)218 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
219 {
220 Mat C = B;
221 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
222 IS isrow = b->row, isicol = b->icol;
223 const PetscInt *r, *ic, *ics;
224 const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
225 PetscInt i, j, k, nz, nzL, row, *pj;
226 const PetscInt *ajtmp, *bjtmp;
227 MatScalar *rtmp, *pc, multiplier, *pv;
228 const MatScalar *aa, *v;
229 MatScalar *ba;
230 PetscBool row_identity, col_identity;
231 FactorShiftCtx sctx;
232 const PetscInt *ddiag;
233 PetscReal rs;
234 MatScalar d;
235
236 PetscFunctionBegin;
237 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
238 PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
239 /* MatPivotSetUp(): initialize shift context sctx */
240 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
241
242 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
243 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
244 sctx.shift_top = info->zeropivot;
245 for (i = 0; i < n; i++) {
246 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
247 d = aa[ddiag[i]];
248 rs = -PetscAbsScalar(d) - PetscRealPart(d);
249 v = aa + ai[i];
250 nz = ai[i + 1] - ai[i];
251 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
252 if (rs > sctx.shift_top) sctx.shift_top = rs;
253 }
254 sctx.shift_top *= 1.1;
255 sctx.nshift_max = 5;
256 sctx.shift_lo = 0.;
257 sctx.shift_hi = 1.;
258 }
259
260 PetscCall(ISGetIndices(isrow, &r));
261 PetscCall(ISGetIndices(isicol, &ic));
262 PetscCall(PetscMalloc1(n + 1, &rtmp));
263 ics = ic;
264
265 do {
266 sctx.newshift = PETSC_FALSE;
267 for (i = 0; i < n; i++) {
268 /* zero rtmp */
269 /* L part */
270 nz = bi[i + 1] - bi[i];
271 bjtmp = bj + bi[i];
272 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
273
274 /* U part */
275 nz = bdiag[i] - bdiag[i + 1];
276 bjtmp = bj + bdiag[i + 1] + 1;
277 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
278
279 /* load in initial (unfactored row) */
280 nz = ai[r[i] + 1] - ai[r[i]];
281 ajtmp = aj + ai[r[i]];
282 v = aa + ai[r[i]];
283 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
284 /* ZeropivotApply() */
285 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
286
287 /* elimination */
288 bjtmp = bj + bi[i];
289 row = *bjtmp++;
290 nzL = bi[i + 1] - bi[i];
291 for (k = 0; k < nzL; k++) {
292 pc = rtmp + row;
293 if (*pc != 0.0) {
294 pv = ba + bdiag[row];
295 multiplier = *pc * (*pv);
296 *pc = multiplier;
297
298 pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
299 pv = ba + bdiag[row + 1] + 1;
300 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
301
302 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
303 PetscCall(PetscLogFlops(1 + 2.0 * nz));
304 }
305 row = *bjtmp++;
306 }
307
308 /* finished row so stick it into b->a */
309 rs = 0.0;
310 /* L part */
311 pv = ba + bi[i];
312 pj = b->j + bi[i];
313 nz = bi[i + 1] - bi[i];
314 for (j = 0; j < nz; j++) {
315 pv[j] = rtmp[pj[j]];
316 rs += PetscAbsScalar(pv[j]);
317 }
318
319 /* U part */
320 pv = ba + bdiag[i + 1] + 1;
321 pj = b->j + bdiag[i + 1] + 1;
322 nz = bdiag[i] - bdiag[i + 1] - 1;
323 for (j = 0; j < nz; j++) {
324 pv[j] = rtmp[pj[j]];
325 rs += PetscAbsScalar(pv[j]);
326 }
327
328 sctx.rs = rs;
329 sctx.pv = rtmp[i];
330 PetscCall(MatPivotCheck(B, A, info, &sctx, i));
331 if (sctx.newshift) break; /* break for-loop */
332 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
333
334 /* Mark diagonal and invert diagonal for simpler triangular solves */
335 pv = ba + bdiag[i];
336 *pv = 1.0 / rtmp[i];
337
338 } /* endof for (i=0; i<n; i++) { */
339
340 /* MatPivotRefine() */
341 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
342 /*
343 * if no shift in this attempt & shifting & started shifting & can refine,
344 * then try lower shift
345 */
346 sctx.shift_hi = sctx.shift_fraction;
347 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
348 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
349 sctx.newshift = PETSC_TRUE;
350 sctx.nshift++;
351 }
352 } while (sctx.newshift);
353
354 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
355 PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
356
357 PetscCall(PetscFree(rtmp));
358 PetscCall(ISRestoreIndices(isicol, &ic));
359 PetscCall(ISRestoreIndices(isrow, &r));
360
361 PetscCall(ISIdentity(isrow, &row_identity));
362 PetscCall(ISIdentity(isicol, &col_identity));
363 if (b->inode.size_csr) {
364 C->ops->solve = MatSolve_SeqAIJ_Inode;
365 } else if (row_identity && col_identity) {
366 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
367 } else {
368 C->ops->solve = MatSolve_SeqAIJ;
369 }
370 C->ops->solveadd = MatSolveAdd_SeqAIJ;
371 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
372 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
373 C->ops->matsolve = MatMatSolve_SeqAIJ;
374 C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
375 C->assembled = PETSC_TRUE;
376 C->preallocated = PETSC_TRUE;
377
378 PetscCall(PetscLogFlops(C->cmap->n));
379
380 /* MatShiftView(A,info,&sctx) */
381 if (sctx.nshift) {
382 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
383 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
384 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
385 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
386 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
387 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
388 }
389 }
390 PetscFunctionReturn(PETSC_SUCCESS);
391 }
392
393 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
394 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
395 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
396
MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo * info)397 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
398 {
399 Mat C = B;
400 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
401 IS isrow = b->row, isicol = b->icol;
402 const PetscInt *r, *ic, *ics;
403 PetscInt nz, row, i, j, n = A->rmap->n, diag;
404 const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
405 const PetscInt *ajtmp, *bjtmp, *ddiag, *pj;
406 MatScalar *pv, *rtmp, *pc, multiplier, d;
407 const MatScalar *v, *aa;
408 MatScalar *ba;
409 PetscReal rs = 0.0;
410 FactorShiftCtx sctx;
411 PetscBool row_identity, col_identity;
412
413 PetscFunctionBegin;
414 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
415
416 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
417 PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
418 /* MatPivotSetUp(): initialize shift context sctx */
419 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
420
421 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
422 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
423 sctx.shift_top = info->zeropivot;
424 for (i = 0; i < n; i++) {
425 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
426 d = aa[ddiag[i]];
427 rs = -PetscAbsScalar(d) - PetscRealPart(d);
428 v = aa + ai[i];
429 nz = ai[i + 1] - ai[i];
430 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
431 if (rs > sctx.shift_top) sctx.shift_top = rs;
432 }
433 sctx.shift_top *= 1.1;
434 sctx.nshift_max = 5;
435 sctx.shift_lo = 0.;
436 sctx.shift_hi = 1.;
437 }
438
439 PetscCall(ISGetIndices(isrow, &r));
440 PetscCall(ISGetIndices(isicol, &ic));
441 PetscCall(PetscMalloc1(n + 1, &rtmp));
442 ics = ic;
443
444 do {
445 sctx.newshift = PETSC_FALSE;
446 for (i = 0; i < n; i++) {
447 nz = bi[i + 1] - bi[i];
448 bjtmp = bj + bi[i];
449 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
450
451 /* load in initial (unfactored row) */
452 nz = ai[r[i] + 1] - ai[r[i]];
453 ajtmp = aj + ai[r[i]];
454 v = aa + ai[r[i]];
455 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
456 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
457
458 row = *bjtmp++;
459 while (row < i) {
460 pc = rtmp + row;
461 if (*pc != 0.0) {
462 pv = ba + ddiag[row];
463 pj = b->j + ddiag[row] + 1;
464 multiplier = *pc / *pv++;
465 *pc = multiplier;
466 nz = bi[row + 1] - ddiag[row] - 1;
467 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
468 PetscCall(PetscLogFlops(1 + 2.0 * nz));
469 }
470 row = *bjtmp++;
471 }
472 /* finished row so stick it into b->a */
473 pv = ba + bi[i];
474 pj = b->j + bi[i];
475 nz = bi[i + 1] - bi[i];
476 diag = ddiag[i] - bi[i];
477 rs = 0.0;
478 for (j = 0; j < nz; j++) {
479 pv[j] = rtmp[pj[j]];
480 rs += PetscAbsScalar(pv[j]);
481 }
482 rs -= PetscAbsScalar(pv[diag]);
483
484 sctx.rs = rs;
485 sctx.pv = pv[diag];
486 PetscCall(MatPivotCheck(B, A, info, &sctx, i));
487 if (sctx.newshift) break;
488 pv[diag] = sctx.pv;
489 }
490
491 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
492 /*
493 * if no shift in this attempt & shifting & started shifting & can refine,
494 * then try lower shift
495 */
496 sctx.shift_hi = sctx.shift_fraction;
497 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
498 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
499 sctx.newshift = PETSC_TRUE;
500 sctx.nshift++;
501 }
502 } while (sctx.newshift);
503
504 /* invert diagonal entries for simpler triangular solves */
505 for (i = 0; i < n; i++) ba[ddiag[i]] = 1.0 / ba[ddiag[i]];
506 PetscCall(PetscFree(rtmp));
507 PetscCall(ISRestoreIndices(isicol, &ic));
508 PetscCall(ISRestoreIndices(isrow, &r));
509 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
510 PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
511
512 PetscCall(ISIdentity(isrow, &row_identity));
513 PetscCall(ISIdentity(isicol, &col_identity));
514 if (row_identity && col_identity) {
515 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
516 } else {
517 C->ops->solve = MatSolve_SeqAIJ_inplace;
518 }
519 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
520 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
521 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
522 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
523 C->ops->matsolvetranspose = NULL;
524
525 C->assembled = PETSC_TRUE;
526 C->preallocated = PETSC_TRUE;
527
528 PetscCall(PetscLogFlops(C->cmap->n));
529 if (sctx.nshift) {
530 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
531 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
532 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
533 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
534 }
535 }
536 C->ops->solve = MatSolve_SeqAIJ_inplace;
537 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
538
539 PetscCall(MatSeqAIJCheckInode(C));
540 PetscFunctionReturn(PETSC_SUCCESS);
541 }
542
543 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
544
545 /*
546 This routine implements inplace ILU(0) with row or/and column permutations.
547 Input:
548 A - original matrix
549 Output;
550 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
551 a->j (col index) is permuted by the inverse of colperm, then sorted
552 a->a reordered accordingly with a->j
553 a->diag (ptr to diagonal elements) is updated.
554 */
MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo * info)555 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
556 {
557 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
558 IS isrow = a->row, isicol = a->icol;
559 const PetscInt *r, *ic, *ics;
560 PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
561 PetscInt *ajtmp, nz, row;
562 PetscInt nbdiag, *pj;
563 PetscScalar *rtmp, *pc, multiplier, d;
564 MatScalar *pv, *v;
565 PetscReal rs;
566 FactorShiftCtx sctx;
567 MatScalar *aa, *vtmp;
568 PetscInt *diag;
569
570 PetscFunctionBegin;
571 PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
572
573 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, (const PetscInt **)&diag, NULL));
574 PetscCall(MatSeqAIJGetArray(A, &aa));
575 /* MatPivotSetUp(): initialize shift context sctx */
576 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
577
578 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
579 const PetscInt *ddiag;
580
581 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
582 sctx.shift_top = info->zeropivot;
583 for (i = 0; i < n; i++) {
584 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
585 d = aa[ddiag[i]];
586 rs = -PetscAbsScalar(d) - PetscRealPart(d);
587 vtmp = aa + ai[i];
588 nz = ai[i + 1] - ai[i];
589 for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
590 if (rs > sctx.shift_top) sctx.shift_top = rs;
591 }
592 sctx.shift_top *= 1.1;
593 sctx.nshift_max = 5;
594 sctx.shift_lo = 0.;
595 sctx.shift_hi = 1.;
596 }
597
598 PetscCall(ISGetIndices(isrow, &r));
599 PetscCall(ISGetIndices(isicol, &ic));
600 PetscCall(PetscMalloc1(n + 1, &rtmp));
601 PetscCall(PetscArrayzero(rtmp, n + 1));
602 ics = ic;
603
604 #if defined(MV)
605 sctx.shift_top = 0.;
606 sctx.nshift_max = 0;
607 sctx.shift_lo = 0.;
608 sctx.shift_hi = 0.;
609 sctx.shift_fraction = 0.;
610
611 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
612 sctx.shift_top = 0.;
613 for (i = 0; i < n; i++) {
614 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
615 d = aa[diag[i]];
616 rs = -PetscAbsScalar(d) - PetscRealPart(d);
617 v = aa + ai[i];
618 nz = ai[i + 1] - ai[i];
619 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
620 if (rs > sctx.shift_top) sctx.shift_top = rs;
621 }
622 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
623 sctx.shift_top *= 1.1;
624 sctx.nshift_max = 5;
625 sctx.shift_lo = 0.;
626 sctx.shift_hi = 1.;
627 }
628
629 sctx.shift_amount = 0.;
630 sctx.nshift = 0;
631 #endif
632
633 do {
634 sctx.newshift = PETSC_FALSE;
635 for (i = 0; i < n; i++) {
636 /* load in initial unfactored row */
637 nz = ai[r[i] + 1] - ai[r[i]];
638 ajtmp = aj + ai[r[i]];
639 v = aa + ai[r[i]];
640 /* sort permuted ajtmp and values v accordingly */
641 for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
642 PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
643
644 diag[r[i]] = ai[r[i]];
645 for (j = 0; j < nz; j++) {
646 rtmp[ajtmp[j]] = v[j];
647 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
648 }
649 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
650
651 row = *ajtmp++;
652 while (row < i) {
653 pc = rtmp + row;
654 if (*pc != 0.0) {
655 pv = aa + diag[r[row]];
656 pj = aj + diag[r[row]] + 1;
657
658 multiplier = *pc / *pv++;
659 *pc = multiplier;
660 nz = ai[r[row] + 1] - diag[r[row]] - 1;
661 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
662 PetscCall(PetscLogFlops(1 + 2.0 * nz));
663 }
664 row = *ajtmp++;
665 }
666 /* finished row so overwrite it onto aa */
667 pv = aa + ai[r[i]];
668 pj = aj + ai[r[i]];
669 nz = ai[r[i] + 1] - ai[r[i]];
670 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
671
672 rs = 0.0;
673 for (j = 0; j < nz; j++) {
674 pv[j] = rtmp[pj[j]];
675 if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
676 }
677
678 sctx.rs = rs;
679 sctx.pv = pv[nbdiag];
680 PetscCall(MatPivotCheck(B, A, info, &sctx, i));
681 if (sctx.newshift) break;
682 pv[nbdiag] = sctx.pv;
683 }
684
685 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
686 /*
687 * if no shift in this attempt & shifting & started shifting & can refine,
688 * then try lower shift
689 */
690 sctx.shift_hi = sctx.shift_fraction;
691 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
692 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
693 sctx.newshift = PETSC_TRUE;
694 sctx.nshift++;
695 }
696 } while (sctx.newshift);
697
698 /* invert diagonal entries for simpler triangular solves */
699 for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]];
700
701 PetscCall(MatSeqAIJRestoreArray(A, &aa));
702 PetscCall(PetscFree(rtmp));
703 PetscCall(ISRestoreIndices(isicol, &ic));
704 PetscCall(ISRestoreIndices(isrow, &r));
705
706 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
707 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
708 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
709 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
710
711 A->assembled = PETSC_TRUE;
712 A->preallocated = PETSC_TRUE;
713
714 PetscCall(PetscLogFlops(A->cmap->n));
715 if (sctx.nshift) {
716 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
717 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
718 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
719 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
720 }
721 }
722 PetscFunctionReturn(PETSC_SUCCESS);
723 }
724
MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo * info)725 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
726 {
727 Mat C;
728
729 PetscFunctionBegin;
730 PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
731 PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
732 PetscCall(MatLUFactorNumeric(C, A, info));
733
734 A->ops->solve = C->ops->solve;
735 A->ops->solvetranspose = C->ops->solvetranspose;
736
737 PetscCall(MatHeaderMerge(A, &C));
738 PetscFunctionReturn(PETSC_SUCCESS);
739 }
740
MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)741 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
742 {
743 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
744 IS iscol = a->col, isrow = a->row;
745 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
746 PetscInt nz;
747 const PetscInt *rout, *cout, *r, *c;
748 PetscScalar *x, *tmp, *tmps, sum;
749 const PetscScalar *b;
750 const MatScalar *aa, *v;
751 const PetscInt *adiag;
752
753 PetscFunctionBegin;
754 if (!n) PetscFunctionReturn(PETSC_SUCCESS);
755
756 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
757 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
758 PetscCall(VecGetArrayRead(bb, &b));
759 PetscCall(VecGetArrayWrite(xx, &x));
760 tmp = a->solve_work;
761
762 PetscCall(ISGetIndices(isrow, &rout));
763 r = rout;
764 PetscCall(ISGetIndices(iscol, &cout));
765 c = cout + (n - 1);
766
767 /* forward solve the lower triangular */
768 tmp[0] = b[*r++];
769 tmps = tmp;
770 for (i = 1; i < n; i++) {
771 v = aa + ai[i];
772 vi = aj + ai[i];
773 nz = adiag[i] - ai[i];
774 sum = b[*r++];
775 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
776 tmp[i] = sum;
777 }
778
779 /* backward solve the upper triangular */
780 for (i = n - 1; i >= 0; i--) {
781 v = aa + adiag[i] + 1;
782 vi = aj + adiag[i] + 1;
783 nz = ai[i + 1] - adiag[i] - 1;
784 sum = tmp[i];
785 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
786 x[*c--] = tmp[i] = sum * aa[adiag[i]];
787 }
788 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
789 PetscCall(ISRestoreIndices(isrow, &rout));
790 PetscCall(ISRestoreIndices(iscol, &cout));
791 PetscCall(VecRestoreArrayRead(bb, &b));
792 PetscCall(VecRestoreArrayWrite(xx, &x));
793 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
794 PetscFunctionReturn(PETSC_SUCCESS);
795 }
796
MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)797 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
798 {
799 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
800 IS iscol = a->col, isrow = a->row;
801 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
802 PetscInt nz, neq, ldb, ldx;
803 const PetscInt *rout, *cout, *r, *c;
804 PetscScalar *x, *tmp = a->solve_work, *tmps, sum;
805 const PetscScalar *b, *aa, *v;
806 PetscBool isdense;
807 const PetscInt *adiag;
808
809 PetscFunctionBegin;
810 if (!n) PetscFunctionReturn(PETSC_SUCCESS);
811 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
812 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
813 if (X != B) {
814 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
815 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
816 }
817 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
818 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
819 PetscCall(MatDenseGetArrayRead(B, &b));
820 PetscCall(MatDenseGetLDA(B, &ldb));
821 PetscCall(MatDenseGetArray(X, &x));
822 PetscCall(MatDenseGetLDA(X, &ldx));
823 PetscCall(ISGetIndices(isrow, &rout));
824 r = rout;
825 PetscCall(ISGetIndices(iscol, &cout));
826 c = cout;
827 for (neq = 0; neq < B->cmap->n; neq++) {
828 /* forward solve the lower triangular */
829 tmp[0] = b[r[0]];
830 tmps = tmp;
831 for (i = 1; i < n; i++) {
832 v = aa + ai[i];
833 vi = aj + ai[i];
834 nz = adiag[i] - ai[i];
835 sum = b[r[i]];
836 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
837 tmp[i] = sum;
838 }
839 /* backward solve the upper triangular */
840 for (i = n - 1; i >= 0; i--) {
841 v = aa + adiag[i] + 1;
842 vi = aj + adiag[i] + 1;
843 nz = ai[i + 1] - adiag[i] - 1;
844 sum = tmp[i];
845 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
846 x[c[i]] = tmp[i] = sum * aa[adiag[i]];
847 }
848 b += ldb;
849 x += ldx;
850 }
851 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
852 PetscCall(ISRestoreIndices(isrow, &rout));
853 PetscCall(ISRestoreIndices(iscol, &cout));
854 PetscCall(MatDenseRestoreArrayRead(B, &b));
855 PetscCall(MatDenseRestoreArray(X, &x));
856 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
857 PetscFunctionReturn(PETSC_SUCCESS);
858 }
859
MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)860 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
861 {
862 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
863 IS iscol = a->col, isrow = a->row;
864 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
865 const PetscInt *adiag;
866 PetscInt nz, neq, ldb, ldx;
867 const PetscInt *rout, *cout, *r, *c;
868 PetscScalar *x, *tmp = a->solve_work, sum;
869 const PetscScalar *b, *aa, *v;
870 PetscBool isdense;
871
872 PetscFunctionBegin;
873 if (!n) PetscFunctionReturn(PETSC_SUCCESS);
874 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
875 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
876 if (X != B) {
877 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
878 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
879 }
880 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
881 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
882 PetscCall(MatDenseGetArrayRead(B, &b));
883 PetscCall(MatDenseGetLDA(B, &ldb));
884 PetscCall(MatDenseGetArray(X, &x));
885 PetscCall(MatDenseGetLDA(X, &ldx));
886 PetscCall(ISGetIndices(isrow, &rout));
887 r = rout;
888 PetscCall(ISGetIndices(iscol, &cout));
889 c = cout;
890 for (neq = 0; neq < B->cmap->n; neq++) {
891 /* forward solve the lower triangular */
892 tmp[0] = b[r[0]];
893 v = aa;
894 vi = aj;
895 for (i = 1; i < n; i++) {
896 nz = ai[i + 1] - ai[i];
897 sum = b[r[i]];
898 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
899 tmp[i] = sum;
900 v += nz;
901 vi += nz;
902 }
903 /* backward solve the upper triangular */
904 for (i = n - 1; i >= 0; i--) {
905 v = aa + adiag[i + 1] + 1;
906 vi = aj + adiag[i + 1] + 1;
907 nz = adiag[i] - adiag[i + 1] - 1;
908 sum = tmp[i];
909 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
910 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
911 }
912 b += ldb;
913 x += ldx;
914 }
915 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
916 PetscCall(ISRestoreIndices(isrow, &rout));
917 PetscCall(ISRestoreIndices(iscol, &cout));
918 PetscCall(MatDenseRestoreArrayRead(B, &b));
919 PetscCall(MatDenseRestoreArray(X, &x));
920 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
921 PetscFunctionReturn(PETSC_SUCCESS);
922 }
923
MatMatSolveTranspose_SeqAIJ(Mat A,Mat B,Mat X)924 PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
925 {
926 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
927 IS iscol = a->col, isrow = a->row;
928 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, j;
929 const PetscInt *adiag = a->diag;
930 PetscInt nz, neq, ldb, ldx;
931 const PetscInt *rout, *cout, *r, *c;
932 PetscScalar *x, *tmp = a->solve_work, s1;
933 const PetscScalar *b, *aa, *v;
934 PetscBool isdense;
935
936 PetscFunctionBegin;
937 if (!n) PetscFunctionReturn(PETSC_SUCCESS);
938 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
939 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
940 if (X != B) {
941 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
942 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
943 }
944 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
945 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
946 PetscCall(MatDenseGetArrayRead(B, &b));
947 PetscCall(MatDenseGetLDA(B, &ldb));
948 PetscCall(MatDenseGetArray(X, &x));
949 PetscCall(MatDenseGetLDA(X, &ldx));
950 PetscCall(ISGetIndices(isrow, &rout));
951 r = rout;
952 PetscCall(ISGetIndices(iscol, &cout));
953 c = cout;
954 for (neq = 0; neq < B->cmap->n; neq++) {
955 /* copy the b into temp work space according to permutation */
956 for (i = 0; i < n; i++) tmp[i] = b[c[i]];
957
958 /* forward solve the U^T */
959 for (i = 0; i < n; i++) {
960 v = aa + adiag[i + 1] + 1;
961 vi = aj + adiag[i + 1] + 1;
962 nz = adiag[i] - adiag[i + 1] - 1;
963 s1 = tmp[i];
964 s1 *= v[nz]; /* multiply by inverse of diagonal entry */
965 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
966 tmp[i] = s1;
967 }
968
969 /* backward solve the L^T */
970 for (i = n - 1; i >= 0; i--) {
971 v = aa + ai[i];
972 vi = aj + ai[i];
973 nz = ai[i + 1] - ai[i];
974 s1 = tmp[i];
975 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
976 }
977
978 /* copy tmp into x according to permutation */
979 for (i = 0; i < n; i++) x[r[i]] = tmp[i];
980 b += ldb;
981 x += ldx;
982 }
983 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
984 PetscCall(ISRestoreIndices(isrow, &rout));
985 PetscCall(ISRestoreIndices(iscol, &cout));
986 PetscCall(MatDenseRestoreArrayRead(B, &b));
987 PetscCall(MatDenseRestoreArray(X, &x));
988 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
989 PetscFunctionReturn(PETSC_SUCCESS);
990 }
991
MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)992 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
993 {
994 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
995 IS iscol = a->col, isrow = a->row;
996 const PetscInt *r, *c, *rout, *cout, *adiag;
997 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
998 PetscInt nz, row;
999 PetscScalar *x, *tmp, *tmps, sum;
1000 const PetscScalar *b;
1001 const MatScalar *aa, *v;
1002
1003 PetscFunctionBegin;
1004 if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1005
1006 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1007 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1008 PetscCall(VecGetArrayRead(bb, &b));
1009 PetscCall(VecGetArrayWrite(xx, &x));
1010 tmp = a->solve_work;
1011
1012 PetscCall(ISGetIndices(isrow, &rout));
1013 r = rout;
1014 PetscCall(ISGetIndices(iscol, &cout));
1015 c = cout + (n - 1);
1016
1017 /* forward solve the lower triangular */
1018 tmp[0] = b[*r++];
1019 tmps = tmp;
1020 for (row = 1; row < n; row++) {
1021 i = rout[row]; /* permuted row */
1022 v = aa + ai[i];
1023 vi = aj + ai[i];
1024 nz = adiag[i] - ai[i];
1025 sum = b[*r++];
1026 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1027 tmp[row] = sum;
1028 }
1029
1030 /* backward solve the upper triangular */
1031 for (row = n - 1; row >= 0; row--) {
1032 i = rout[row]; /* permuted row */
1033 v = aa + adiag[i] + 1;
1034 vi = aj + adiag[i] + 1;
1035 nz = ai[i + 1] - adiag[i] - 1;
1036 sum = tmp[row];
1037 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1038 x[*c--] = tmp[row] = sum * aa[adiag[i]];
1039 }
1040 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1041 PetscCall(ISRestoreIndices(isrow, &rout));
1042 PetscCall(ISRestoreIndices(iscol, &cout));
1043 PetscCall(VecRestoreArrayRead(bb, &b));
1044 PetscCall(VecRestoreArrayWrite(xx, &x));
1045 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1046 PetscFunctionReturn(PETSC_SUCCESS);
1047 }
1048
1049 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)1050 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1051 {
1052 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1053 PetscInt n = A->rmap->n;
1054 const PetscInt *ai = a->i, *aj = a->j, *adiag;
1055 PetscScalar *x;
1056 const PetscScalar *b;
1057 const MatScalar *aa;
1058 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1059 PetscInt adiag_i, i, nz, ai_i;
1060 const PetscInt *vi;
1061 const MatScalar *v;
1062 PetscScalar sum;
1063 #endif
1064
1065 PetscFunctionBegin;
1066 if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1067 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1068 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1069 PetscCall(VecGetArrayRead(bb, &b));
1070 PetscCall(VecGetArrayWrite(xx, &x));
1071
1072 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1073 fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1074 #else
1075 /* forward solve the lower triangular */
1076 x[0] = b[0];
1077 for (i = 1; i < n; i++) {
1078 ai_i = ai[i];
1079 v = aa + ai_i;
1080 vi = aj + ai_i;
1081 nz = adiag[i] - ai_i;
1082 sum = b[i];
1083 PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1084 x[i] = sum;
1085 }
1086
1087 /* backward solve the upper triangular */
1088 for (i = n - 1; i >= 0; i--) {
1089 adiag_i = adiag[i];
1090 v = aa + adiag_i + 1;
1091 vi = aj + adiag_i + 1;
1092 nz = ai[i + 1] - adiag_i - 1;
1093 sum = x[i];
1094 PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1095 x[i] = sum * aa[adiag_i];
1096 }
1097 #endif
1098 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1099 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1100 PetscCall(VecRestoreArrayRead(bb, &b));
1101 PetscCall(VecRestoreArrayWrite(xx, &x));
1102 PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104
MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)1105 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1106 {
1107 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1108 IS iscol = a->col, isrow = a->row;
1109 PetscInt i, n = A->rmap->n, j;
1110 PetscInt nz;
1111 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag;
1112 PetscScalar *x, *tmp, sum;
1113 const PetscScalar *b;
1114 const MatScalar *aa, *v;
1115
1116 PetscFunctionBegin;
1117 if (yy != xx) PetscCall(VecCopy(yy, xx));
1118
1119 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1120 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1121 PetscCall(VecGetArrayRead(bb, &b));
1122 PetscCall(VecGetArray(xx, &x));
1123 tmp = a->solve_work;
1124
1125 PetscCall(ISGetIndices(isrow, &rout));
1126 r = rout;
1127 PetscCall(ISGetIndices(iscol, &cout));
1128 c = cout + (n - 1);
1129
1130 /* forward solve the lower triangular */
1131 tmp[0] = b[*r++];
1132 for (i = 1; i < n; i++) {
1133 v = aa + ai[i];
1134 vi = aj + ai[i];
1135 nz = adiag[i] - ai[i];
1136 sum = b[*r++];
1137 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1138 tmp[i] = sum;
1139 }
1140
1141 /* backward solve the upper triangular */
1142 for (i = n - 1; i >= 0; i--) {
1143 v = aa + adiag[i] + 1;
1144 vi = aj + adiag[i] + 1;
1145 nz = ai[i + 1] - adiag[i] - 1;
1146 sum = tmp[i];
1147 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1148 tmp[i] = sum * aa[adiag[i]];
1149 x[*c--] += tmp[i];
1150 }
1151
1152 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1153 PetscCall(ISRestoreIndices(isrow, &rout));
1154 PetscCall(ISRestoreIndices(iscol, &cout));
1155 PetscCall(VecRestoreArrayRead(bb, &b));
1156 PetscCall(VecRestoreArray(xx, &x));
1157 PetscCall(PetscLogFlops(2.0 * a->nz));
1158 PetscFunctionReturn(PETSC_SUCCESS);
1159 }
1160
MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)1161 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1162 {
1163 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1164 IS iscol = a->col, isrow = a->row;
1165 PetscInt i, n = A->rmap->n, j;
1166 PetscInt nz;
1167 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag;
1168 PetscScalar *x, *tmp, sum;
1169 const PetscScalar *b;
1170 const MatScalar *aa, *v;
1171
1172 PetscFunctionBegin;
1173 if (yy != xx) PetscCall(VecCopy(yy, xx));
1174
1175 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1176 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1177 PetscCall(VecGetArrayRead(bb, &b));
1178 PetscCall(VecGetArray(xx, &x));
1179 tmp = a->solve_work;
1180
1181 PetscCall(ISGetIndices(isrow, &rout));
1182 r = rout;
1183 PetscCall(ISGetIndices(iscol, &cout));
1184 c = cout;
1185
1186 /* forward solve the lower triangular */
1187 tmp[0] = b[r[0]];
1188 v = aa;
1189 vi = aj;
1190 for (i = 1; i < n; i++) {
1191 nz = ai[i + 1] - ai[i];
1192 sum = b[r[i]];
1193 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1194 tmp[i] = sum;
1195 v += nz;
1196 vi += nz;
1197 }
1198
1199 /* backward solve the upper triangular */
1200 v = aa + adiag[n - 1];
1201 vi = aj + adiag[n - 1];
1202 for (i = n - 1; i >= 0; i--) {
1203 nz = adiag[i] - adiag[i + 1] - 1;
1204 sum = tmp[i];
1205 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1206 tmp[i] = sum * v[nz];
1207 x[c[i]] += tmp[i];
1208 v += nz + 1;
1209 vi += nz + 1;
1210 }
1211
1212 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1213 PetscCall(ISRestoreIndices(isrow, &rout));
1214 PetscCall(ISRestoreIndices(iscol, &cout));
1215 PetscCall(VecRestoreArrayRead(bb, &b));
1216 PetscCall(VecRestoreArray(xx, &x));
1217 PetscCall(PetscLogFlops(2.0 * a->nz));
1218 PetscFunctionReturn(PETSC_SUCCESS);
1219 }
1220
MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)1221 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1222 {
1223 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1224 IS iscol = a->col, isrow = a->row;
1225 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1226 PetscInt i, n = A->rmap->n, j;
1227 PetscInt nz;
1228 PetscScalar *x, *tmp, s1;
1229 const MatScalar *aa, *v;
1230 const PetscScalar *b;
1231
1232 PetscFunctionBegin;
1233 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1234 PetscCall(VecGetArrayRead(bb, &b));
1235 PetscCall(VecGetArrayWrite(xx, &x));
1236 tmp = a->solve_work;
1237
1238 PetscCall(ISGetIndices(isrow, &rout));
1239 r = rout;
1240 PetscCall(ISGetIndices(iscol, &cout));
1241 c = cout;
1242
1243 /* copy the b into temp work space according to permutation */
1244 for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1245
1246 /* forward solve the U^T */
1247 for (i = 0; i < n; i++) {
1248 v = aa + diag[i];
1249 vi = aj + diag[i] + 1;
1250 nz = ai[i + 1] - diag[i] - 1;
1251 s1 = tmp[i];
1252 s1 *= (*v++); /* multiply by inverse of diagonal entry */
1253 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1254 tmp[i] = s1;
1255 }
1256
1257 /* backward solve the L^T */
1258 for (i = n - 1; i >= 0; i--) {
1259 v = aa + diag[i] - 1;
1260 vi = aj + diag[i] - 1;
1261 nz = diag[i] - ai[i];
1262 s1 = tmp[i];
1263 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1264 }
1265
1266 /* copy tmp into x according to permutation */
1267 for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1268
1269 PetscCall(ISRestoreIndices(isrow, &rout));
1270 PetscCall(ISRestoreIndices(iscol, &cout));
1271 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1272 PetscCall(VecRestoreArrayRead(bb, &b));
1273 PetscCall(VecRestoreArrayWrite(xx, &x));
1274
1275 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1276 PetscFunctionReturn(PETSC_SUCCESS);
1277 }
1278
MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)1279 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1280 {
1281 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1282 IS iscol = a->col, isrow = a->row;
1283 const PetscInt *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi;
1284 PetscInt i, n = A->rmap->n, j;
1285 PetscInt nz;
1286 PetscScalar *x, *tmp, s1;
1287 const MatScalar *aa, *v;
1288 const PetscScalar *b;
1289
1290 PetscFunctionBegin;
1291 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1292 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1293 PetscCall(VecGetArrayRead(bb, &b));
1294 PetscCall(VecGetArrayWrite(xx, &x));
1295 tmp = a->solve_work;
1296
1297 PetscCall(ISGetIndices(isrow, &rout));
1298 r = rout;
1299 PetscCall(ISGetIndices(iscol, &cout));
1300 c = cout;
1301
1302 /* copy the b into temp work space according to permutation */
1303 for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1304
1305 /* forward solve the U^T */
1306 for (i = 0; i < n; i++) {
1307 v = aa + adiag[i + 1] + 1;
1308 vi = aj + adiag[i + 1] + 1;
1309 nz = adiag[i] - adiag[i + 1] - 1;
1310 s1 = tmp[i];
1311 s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1312 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1313 tmp[i] = s1;
1314 }
1315
1316 /* backward solve the L^T */
1317 for (i = n - 1; i >= 0; i--) {
1318 v = aa + ai[i];
1319 vi = aj + ai[i];
1320 nz = ai[i + 1] - ai[i];
1321 s1 = tmp[i];
1322 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1323 }
1324
1325 /* copy tmp into x according to permutation */
1326 for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1327
1328 PetscCall(ISRestoreIndices(isrow, &rout));
1329 PetscCall(ISRestoreIndices(iscol, &cout));
1330 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1331 PetscCall(VecRestoreArrayRead(bb, &b));
1332 PetscCall(VecRestoreArrayWrite(xx, &x));
1333
1334 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1335 PetscFunctionReturn(PETSC_SUCCESS);
1336 }
1337
MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)1338 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1339 {
1340 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1341 IS iscol = a->col, isrow = a->row;
1342 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1343 PetscInt i, n = A->rmap->n, j;
1344 PetscInt nz;
1345 PetscScalar *x, *tmp, s1;
1346 const MatScalar *aa, *v;
1347 const PetscScalar *b;
1348
1349 PetscFunctionBegin;
1350 if (zz != xx) PetscCall(VecCopy(zz, xx));
1351 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1352 PetscCall(VecGetArrayRead(bb, &b));
1353 PetscCall(VecGetArray(xx, &x));
1354 tmp = a->solve_work;
1355
1356 PetscCall(ISGetIndices(isrow, &rout));
1357 r = rout;
1358 PetscCall(ISGetIndices(iscol, &cout));
1359 c = cout;
1360
1361 /* copy the b into temp work space according to permutation */
1362 for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1363
1364 /* forward solve the U^T */
1365 for (i = 0; i < n; i++) {
1366 v = aa + diag[i];
1367 vi = aj + diag[i] + 1;
1368 nz = ai[i + 1] - diag[i] - 1;
1369 s1 = tmp[i];
1370 s1 *= (*v++); /* multiply by inverse of diagonal entry */
1371 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1372 tmp[i] = s1;
1373 }
1374
1375 /* backward solve the L^T */
1376 for (i = n - 1; i >= 0; i--) {
1377 v = aa + diag[i] - 1;
1378 vi = aj + diag[i] - 1;
1379 nz = diag[i] - ai[i];
1380 s1 = tmp[i];
1381 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1382 }
1383
1384 /* copy tmp into x according to permutation */
1385 for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1386
1387 PetscCall(ISRestoreIndices(isrow, &rout));
1388 PetscCall(ISRestoreIndices(iscol, &cout));
1389 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1390 PetscCall(VecRestoreArrayRead(bb, &b));
1391 PetscCall(VecRestoreArray(xx, &x));
1392
1393 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1394 PetscFunctionReturn(PETSC_SUCCESS);
1395 }
1396
MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)1397 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1398 {
1399 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1400 IS iscol = a->col, isrow = a->row;
1401 const PetscInt *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi;
1402 PetscInt i, n = A->rmap->n, j;
1403 PetscInt nz;
1404 PetscScalar *x, *tmp, s1;
1405 const MatScalar *aa, *v;
1406 const PetscScalar *b;
1407
1408 PetscFunctionBegin;
1409 if (zz != xx) PetscCall(VecCopy(zz, xx));
1410 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1411 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1412 PetscCall(VecGetArrayRead(bb, &b));
1413 PetscCall(VecGetArray(xx, &x));
1414 tmp = a->solve_work;
1415
1416 PetscCall(ISGetIndices(isrow, &rout));
1417 r = rout;
1418 PetscCall(ISGetIndices(iscol, &cout));
1419 c = cout;
1420
1421 /* copy the b into temp work space according to permutation */
1422 for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1423
1424 /* forward solve the U^T */
1425 for (i = 0; i < n; i++) {
1426 v = aa + adiag[i + 1] + 1;
1427 vi = aj + adiag[i + 1] + 1;
1428 nz = adiag[i] - adiag[i + 1] - 1;
1429 s1 = tmp[i];
1430 s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1431 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1432 tmp[i] = s1;
1433 }
1434
1435 /* backward solve the L^T */
1436 for (i = n - 1; i >= 0; i--) {
1437 v = aa + ai[i];
1438 vi = aj + ai[i];
1439 nz = ai[i + 1] - ai[i];
1440 s1 = tmp[i];
1441 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1442 }
1443
1444 /* copy tmp into x according to permutation */
1445 for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1446
1447 PetscCall(ISRestoreIndices(isrow, &rout));
1448 PetscCall(ISRestoreIndices(iscol, &cout));
1449 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1450 PetscCall(VecRestoreArrayRead(bb, &b));
1451 PetscCall(VecRestoreArray(xx, &x));
1452
1453 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1454 PetscFunctionReturn(PETSC_SUCCESS);
1455 }
1456
1457 /*
1458 ilu() under revised new data structure.
1459 Factored arrays bj and ba are stored as
1460 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1461
1462 bi=fact->i is an array of size n+1, in which
1463 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1464 bi[n]: points to L(n-1,n-1)+1
1465
1466 bdiag=fact->diag is an array of size n+1,in which
1467 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1468 bdiag[n]: points to entry of U(n-1,0)-1
1469
1470 U(i,:) contains bdiag[i] as its last entry, i.e.,
1471 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1472 */
MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo * info)1473 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1474 {
1475 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1476 const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag;
1477 PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag;
1478 IS isicol;
1479
1480 PetscFunctionBegin;
1481 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1482 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1483 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1484 b = (Mat_SeqAIJ *)fact->data;
1485
1486 /* allocate matrix arrays for new data structure */
1487 PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscScalar), (void **)&b->a));
1488 PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscInt), (void **)&b->j));
1489 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i));
1490 if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1491 b->free_a = PETSC_TRUE;
1492 b->free_ij = PETSC_TRUE;
1493
1494 if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1495 bdiag = b->diag;
1496
1497 /* set bi and bj with new data structure */
1498 bi = b->i;
1499 bj = b->j;
1500
1501 /* L part */
1502 bi[0] = 0;
1503 for (i = 0; i < n; i++) {
1504 nz = adiag[i] - ai[i];
1505 bi[i + 1] = bi[i] + nz;
1506 aj = a->j + ai[i];
1507 for (j = 0; j < nz; j++) bj[k++] = aj[j];
1508 }
1509
1510 /* U part */
1511 bdiag[n] = bi[n] - 1;
1512 for (i = n - 1; i >= 0; i--) {
1513 nz = ai[i + 1] - adiag[i] - 1;
1514 aj = a->j + adiag[i] + 1;
1515 for (j = 0; j < nz; j++) bj[k++] = aj[j];
1516 /* diag[i] */
1517 bj[k++] = i;
1518 bdiag[i] = bdiag[i + 1] + nz + 1;
1519 }
1520
1521 fact->factortype = MAT_FACTOR_ILU;
1522 fact->info.factor_mallocs = 0;
1523 fact->info.fill_ratio_given = info->fill;
1524 fact->info.fill_ratio_needed = 1.0;
1525 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1526 PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1527
1528 b = (Mat_SeqAIJ *)fact->data;
1529 b->row = isrow;
1530 b->col = iscol;
1531 b->icol = isicol;
1532 PetscCall(PetscMalloc1(fact->rmap->n, &b->solve_work));
1533 PetscCall(PetscObjectReference((PetscObject)isrow));
1534 PetscCall(PetscObjectReference((PetscObject)iscol));
1535 PetscFunctionReturn(PETSC_SUCCESS);
1536 }
1537
MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo * info)1538 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1539 {
1540 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1541 IS isicol;
1542 const PetscInt *r, *ic;
1543 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j;
1544 PetscInt *bi, *cols, nnz, *cols_lvl;
1545 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1546 PetscInt i, levels, diagonal_fill;
1547 PetscBool col_identity, row_identity;
1548 PetscReal f;
1549 PetscInt nlnk, *lnk, *lnk_lvl = NULL;
1550 PetscBT lnkbt;
1551 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr;
1552 PetscFreeSpaceList free_space = NULL, current_space = NULL;
1553 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1554 PetscBool diagDense;
1555
1556 PetscFunctionBegin;
1557 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1558 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
1559 PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
1560
1561 levels = (PetscInt)info->levels;
1562 PetscCall(ISIdentity(isrow, &row_identity));
1563 PetscCall(ISIdentity(iscol, &col_identity));
1564 if (!levels && row_identity && col_identity) {
1565 /* special case: ilu(0) with natural ordering */
1566 PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1567 if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1568 PetscFunctionReturn(PETSC_SUCCESS);
1569 }
1570
1571 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1572 PetscCall(ISGetIndices(isrow, &r));
1573 PetscCall(ISGetIndices(isicol, &ic));
1574
1575 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1576 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
1577 PetscCall(PetscMalloc1(n + 1, &bdiag));
1578 bi[0] = bdiag[0] = 0;
1579 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1580
1581 /* create a linked list for storing column indices of the active row */
1582 nlnk = n + 1;
1583 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1584
1585 /* initial FreeSpace size is f*(ai[n]+1) */
1586 f = info->fill;
1587 diagonal_fill = (PetscInt)info->diagonal_fill;
1588 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1589 current_space = free_space;
1590 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1591 current_space_lvl = free_space_lvl;
1592 for (i = 0; i < n; i++) {
1593 nzi = 0;
1594 /* copy current row into linked list */
1595 nnz = ai[r[i] + 1] - ai[r[i]];
1596 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1597 cols = aj + ai[r[i]];
1598 lnk[i] = -1; /* marker to indicate if diagonal exists */
1599 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1600 nzi += nlnk;
1601
1602 /* make sure diagonal entry is included */
1603 if (diagonal_fill && lnk[i] == -1) {
1604 fm = n;
1605 while (lnk[fm] < i) fm = lnk[fm];
1606 lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1607 lnk[fm] = i;
1608 lnk_lvl[i] = 0;
1609 nzi++;
1610 dcount++;
1611 }
1612
1613 /* add pivot rows into the active row */
1614 nzbd = 0;
1615 prow = lnk[n];
1616 while (prow < i) {
1617 nnz = bdiag[prow];
1618 cols = bj_ptr[prow] + nnz + 1;
1619 cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1620 nnz = bi[prow + 1] - bi[prow] - nnz - 1;
1621 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1622 nzi += nlnk;
1623 prow = lnk[prow];
1624 nzbd++;
1625 }
1626 bdiag[i] = nzbd;
1627 bi[i + 1] = bi[i] + nzi;
1628 /* if free space is not available, make more free space */
1629 if (current_space->local_remaining < nzi) {
1630 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1631 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
1632 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl));
1633 reallocs++;
1634 }
1635
1636 /* copy data into free_space and free_space_lvl, then initialize lnk */
1637 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1638 bj_ptr[i] = current_space->array;
1639 bjlvl_ptr[i] = current_space_lvl->array;
1640
1641 /* make sure the active row i has diagonal entry */
1642 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1643
1644 current_space->array += nzi;
1645 current_space->local_used += nzi;
1646 current_space->local_remaining -= nzi;
1647 current_space_lvl->array += nzi;
1648 current_space_lvl->local_used += nzi;
1649 current_space_lvl->local_remaining -= nzi;
1650 }
1651
1652 PetscCall(ISRestoreIndices(isrow, &r));
1653 PetscCall(ISRestoreIndices(isicol, &ic));
1654 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1655 PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
1656 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1657
1658 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1659 PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1660 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1661
1662 #if defined(PETSC_USE_INFO)
1663 {
1664 PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1665 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1666 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1667 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1668 PetscCall(PetscInfo(A, "for best performance.\n"));
1669 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1670 }
1671 #endif
1672 /* put together the new matrix */
1673 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1674 b = (Mat_SeqAIJ *)fact->data;
1675 b->free_ij = PETSC_TRUE;
1676 PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
1677 b->free_a = PETSC_TRUE;
1678 b->j = bj;
1679 b->i = bi;
1680 b->diag = bdiag;
1681 b->ilen = NULL;
1682 b->imax = NULL;
1683 b->row = isrow;
1684 b->col = iscol;
1685 PetscCall(PetscObjectReference((PetscObject)isrow));
1686 PetscCall(PetscObjectReference((PetscObject)iscol));
1687 b->icol = isicol;
1688
1689 PetscCall(PetscMalloc1(n, &b->solve_work));
1690 /* In b structure: Free imax, ilen, old a, old j.
1691 Allocate bdiag, solve_work, new a, new j */
1692 b->maxnz = b->nz = bdiag[0] + 1;
1693
1694 fact->info.factor_mallocs = reallocs;
1695 fact->info.fill_ratio_given = f;
1696 fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1697 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1698 if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1699 PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1700 PetscFunctionReturn(PETSC_SUCCESS);
1701 }
1702
MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo * info)1703 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
1704 {
1705 Mat C = B;
1706 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1707 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
1708 IS ip = b->row, iip = b->icol;
1709 const PetscInt *rip, *riip;
1710 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1711 PetscInt *ai = a->i, *aj = a->j;
1712 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1713 MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi;
1714 PetscBool perm_identity;
1715 FactorShiftCtx sctx;
1716 PetscReal rs;
1717 const MatScalar *aa, *v;
1718 MatScalar d;
1719 const PetscInt *adiag;
1720
1721 PetscFunctionBegin;
1722 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1723 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1724 /* MatPivotSetUp(): initialize shift context sctx */
1725 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1726
1727 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1728 sctx.shift_top = info->zeropivot;
1729 for (i = 0; i < mbs; i++) {
1730 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1731 d = aa[adiag[i]];
1732 rs = -PetscAbsScalar(d) - PetscRealPart(d);
1733 v = aa + ai[i];
1734 nz = ai[i + 1] - ai[i];
1735 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
1736 if (rs > sctx.shift_top) sctx.shift_top = rs;
1737 }
1738 sctx.shift_top *= 1.1;
1739 sctx.nshift_max = 5;
1740 sctx.shift_lo = 0.;
1741 sctx.shift_hi = 1.;
1742 }
1743
1744 PetscCall(ISGetIndices(ip, &rip));
1745 PetscCall(ISGetIndices(iip, &riip));
1746
1747 /* allocate working arrays
1748 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1749 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1750 */
1751 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
1752
1753 do {
1754 sctx.newshift = PETSC_FALSE;
1755
1756 for (i = 0; i < mbs; i++) c2r[i] = mbs;
1757 if (mbs) il[0] = 0;
1758
1759 for (k = 0; k < mbs; k++) {
1760 /* zero rtmp */
1761 nz = bi[k + 1] - bi[k];
1762 bjtmp = bj + bi[k];
1763 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1764
1765 /* load in initial unfactored row */
1766 bval = ba + bi[k];
1767 jmin = ai[rip[k]];
1768 jmax = ai[rip[k] + 1];
1769 for (j = jmin; j < jmax; j++) {
1770 col = riip[aj[j]];
1771 if (col >= k) { /* only take upper triangular entry */
1772 rtmp[col] = aa[j];
1773 *bval++ = 0.0; /* for in-place factorization */
1774 }
1775 }
1776 /* shift the diagonal of the matrix: ZeropivotApply() */
1777 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
1778
1779 /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1780 dk = rtmp[k];
1781 i = c2r[k]; /* first row to be added to k_th row */
1782
1783 while (i < k) {
1784 nexti = c2r[i]; /* next row to be added to k_th row */
1785
1786 /* compute multiplier, update diag(k) and U(i,k) */
1787 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1788 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1789 dk += uikdi * ba[ili]; /* update diag[k] */
1790 ba[ili] = uikdi; /* -U(i,k) */
1791
1792 /* add multiple of row i to k-th row */
1793 jmin = ili + 1;
1794 jmax = bi[i + 1];
1795 if (jmin < jmax) {
1796 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1797 /* update il and c2r for row i */
1798 il[i] = jmin;
1799 j = bj[jmin];
1800 c2r[i] = c2r[j];
1801 c2r[j] = i;
1802 }
1803 i = nexti;
1804 }
1805
1806 /* copy data into U(k,:) */
1807 rs = 0.0;
1808 jmin = bi[k];
1809 jmax = bi[k + 1] - 1;
1810 if (jmin < jmax) {
1811 for (j = jmin; j < jmax; j++) {
1812 col = bj[j];
1813 ba[j] = rtmp[col];
1814 rs += PetscAbsScalar(ba[j]);
1815 }
1816 /* add the k-th row into il and c2r */
1817 il[k] = jmin;
1818 i = bj[jmin];
1819 c2r[k] = c2r[i];
1820 c2r[i] = k;
1821 }
1822
1823 /* MatPivotCheck() */
1824 sctx.rs = rs;
1825 sctx.pv = dk;
1826 PetscCall(MatPivotCheck(B, A, info, &sctx, i));
1827 if (sctx.newshift) break;
1828 dk = sctx.pv;
1829
1830 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1831 }
1832 } while (sctx.newshift);
1833
1834 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1835 PetscCall(PetscFree3(rtmp, il, c2r));
1836 PetscCall(ISRestoreIndices(ip, &rip));
1837 PetscCall(ISRestoreIndices(iip, &riip));
1838
1839 PetscCall(ISIdentity(ip, &perm_identity));
1840 if (perm_identity) {
1841 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1842 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1843 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1844 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1845 } else {
1846 B->ops->solve = MatSolve_SeqSBAIJ_1;
1847 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
1848 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
1849 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
1850 }
1851
1852 C->assembled = PETSC_TRUE;
1853 C->preallocated = PETSC_TRUE;
1854
1855 PetscCall(PetscLogFlops(C->rmap->n));
1856
1857 /* MatPivotView() */
1858 if (sctx.nshift) {
1859 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1860 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1861 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1862 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1863 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1864 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1865 }
1866 }
1867 PetscFunctionReturn(PETSC_SUCCESS);
1868 }
1869
MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo * info)1870 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
1871 {
1872 Mat C = B;
1873 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1874 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
1875 IS ip = b->row, iip = b->icol;
1876 const PetscInt *rip, *riip;
1877 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
1878 PetscInt *ai = a->i, *aj = a->j;
1879 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1880 MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi;
1881 const MatScalar *aa, *v;
1882 PetscBool perm_identity;
1883 FactorShiftCtx sctx;
1884 PetscReal rs;
1885 MatScalar d;
1886 const PetscInt *adiag;
1887
1888 PetscFunctionBegin;
1889 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1890 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1891 /* MatPivotSetUp(): initialize shift context sctx */
1892 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1893
1894 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1895 sctx.shift_top = info->zeropivot;
1896 for (i = 0; i < mbs; i++) {
1897 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1898 d = aa[adiag[i]];
1899 rs = -PetscAbsScalar(d) - PetscRealPart(d);
1900 v = aa + ai[i];
1901 nz = ai[i + 1] - ai[i];
1902 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
1903 if (rs > sctx.shift_top) sctx.shift_top = rs;
1904 }
1905 sctx.shift_top *= 1.1;
1906 sctx.nshift_max = 5;
1907 sctx.shift_lo = 0.;
1908 sctx.shift_hi = 1.;
1909 }
1910
1911 PetscCall(ISGetIndices(ip, &rip));
1912 PetscCall(ISGetIndices(iip, &riip));
1913
1914 /* initialization */
1915 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
1916
1917 do {
1918 sctx.newshift = PETSC_FALSE;
1919
1920 for (i = 0; i < mbs; i++) jl[i] = mbs;
1921 il[0] = 0;
1922
1923 for (k = 0; k < mbs; k++) {
1924 /* zero rtmp */
1925 nz = bi[k + 1] - bi[k];
1926 bjtmp = bj + bi[k];
1927 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1928
1929 bval = ba + bi[k];
1930 /* initialize k-th row by the perm[k]-th row of A */
1931 jmin = ai[rip[k]];
1932 jmax = ai[rip[k] + 1];
1933 for (j = jmin; j < jmax; j++) {
1934 col = riip[aj[j]];
1935 if (col >= k) { /* only take upper triangular entry */
1936 rtmp[col] = aa[j];
1937 *bval++ = 0.0; /* for in-place factorization */
1938 }
1939 }
1940 /* shift the diagonal of the matrix */
1941 if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1942
1943 /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1944 dk = rtmp[k];
1945 i = jl[k]; /* first row to be added to k_th row */
1946
1947 while (i < k) {
1948 nexti = jl[i]; /* next row to be added to k_th row */
1949
1950 /* compute multiplier, update diag(k) and U(i,k) */
1951 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1952 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1953 dk += uikdi * ba[ili];
1954 ba[ili] = uikdi; /* -U(i,k) */
1955
1956 /* add multiple of row i to k-th row */
1957 jmin = ili + 1;
1958 jmax = bi[i + 1];
1959 if (jmin < jmax) {
1960 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1961 /* update il and jl for row i */
1962 il[i] = jmin;
1963 j = bj[jmin];
1964 jl[i] = jl[j];
1965 jl[j] = i;
1966 }
1967 i = nexti;
1968 }
1969
1970 /* shift the diagonals when zero pivot is detected */
1971 /* compute rs=sum of abs(off-diagonal) */
1972 rs = 0.0;
1973 jmin = bi[k] + 1;
1974 nz = bi[k + 1] - jmin;
1975 bcol = bj + jmin;
1976 for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
1977
1978 sctx.rs = rs;
1979 sctx.pv = dk;
1980 PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1981 if (sctx.newshift) break;
1982 dk = sctx.pv;
1983
1984 /* copy data into U(k,:) */
1985 ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1986 jmin = bi[k] + 1;
1987 jmax = bi[k + 1];
1988 if (jmin < jmax) {
1989 for (j = jmin; j < jmax; j++) {
1990 col = bj[j];
1991 ba[j] = rtmp[col];
1992 }
1993 /* add the k-th row into il and jl */
1994 il[k] = jmin;
1995 i = bj[jmin];
1996 jl[k] = jl[i];
1997 jl[i] = k;
1998 }
1999 }
2000 } while (sctx.newshift);
2001
2002 PetscCall(PetscFree3(rtmp, il, jl));
2003 PetscCall(ISRestoreIndices(ip, &rip));
2004 PetscCall(ISRestoreIndices(iip, &riip));
2005 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2006
2007 PetscCall(ISIdentity(ip, &perm_identity));
2008 if (perm_identity) {
2009 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2010 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2011 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2012 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2013 } else {
2014 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2015 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2016 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2017 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2018 }
2019
2020 C->assembled = PETSC_TRUE;
2021 C->preallocated = PETSC_TRUE;
2022
2023 PetscCall(PetscLogFlops(C->rmap->n));
2024 if (sctx.nshift) {
2025 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2026 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2027 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2028 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2029 }
2030 }
2031 PetscFunctionReturn(PETSC_SUCCESS);
2032 }
2033
2034 /*
2035 icc() under revised new data structure.
2036 Factored arrays bj and ba are stored as
2037 U(0,:),...,U(i,:),U(n-1,:)
2038
2039 ui=fact->i is an array of size n+1, in which
2040 ui+
2041 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2042 ui[n]: points to U(n-1,n-1)+1
2043
2044 udiag=fact->diag is an array of size n,in which
2045 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2046
2047 U(i,:) contains udiag[i] as its last entry, i.e.,
2048 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2049 */
2050
MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo * info)2051 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2052 {
2053 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2054 Mat_SeqSBAIJ *b;
2055 PetscBool perm_identity;
2056 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2057 const PetscInt *rip, *riip, *adiag;
2058 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2059 PetscInt nlnk, *lnk, *lnk_lvl = NULL;
2060 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2061 PetscReal fill = info->fill, levels = info->levels;
2062 PetscFreeSpaceList free_space = NULL, current_space = NULL;
2063 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2064 PetscBT lnkbt;
2065 IS iperm;
2066 PetscBool diagDense;
2067
2068 PetscFunctionBegin;
2069 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2070 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
2071 PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
2072 PetscCall(ISIdentity(perm, &perm_identity));
2073 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2074
2075 PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2076 PetscCall(PetscMalloc1(am + 1, &udiag));
2077 ui[0] = 0;
2078
2079 /* ICC(0) without matrix ordering: simply rearrange column indices */
2080 if (!levels && perm_identity) {
2081 for (i = 0; i < am; i++) {
2082 ncols = ai[i + 1] - adiag[i];
2083 ui[i + 1] = ui[i] + ncols;
2084 udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2085 }
2086 PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2087 cols = uj;
2088 for (i = 0; i < am; i++) {
2089 aj = a->j + adiag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2090 ncols = ai[i + 1] - adiag[i] - 1;
2091 for (j = 0; j < ncols; j++) *cols++ = aj[j];
2092 *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2093 }
2094 } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2095 PetscCall(ISGetIndices(iperm, &riip));
2096 PetscCall(ISGetIndices(perm, &rip));
2097
2098 /* initialization */
2099 PetscCall(PetscMalloc1(am + 1, &ajtmp));
2100
2101 /* jl: linked list for storing indices of the pivot rows
2102 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2103 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2104 for (i = 0; i < am; i++) {
2105 jl[i] = am;
2106 il[i] = 0;
2107 }
2108
2109 /* create and initialize a linked list for storing column indices of the active row k */
2110 nlnk = am + 1;
2111 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2112
2113 /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2114 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2115 current_space = free_space;
2116 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2117 current_space_lvl = free_space_lvl;
2118
2119 for (k = 0; k < am; k++) { /* for each active row k */
2120 /* initialize lnk by the column indices of row rip[k] of A */
2121 nzk = 0;
2122 ncols = ai[rip[k] + 1] - ai[rip[k]];
2123 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2124 ncols_upper = 0;
2125 for (j = 0; j < ncols; j++) {
2126 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2127 if (riip[i] >= k) { /* only take upper triangular entry */
2128 ajtmp[ncols_upper] = i;
2129 ncols_upper++;
2130 }
2131 }
2132 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2133 nzk += nlnk;
2134
2135 /* update lnk by computing fill-in for each pivot row to be merged in */
2136 prow = jl[k]; /* 1st pivot row */
2137
2138 while (prow < k) {
2139 nextprow = jl[prow];
2140
2141 /* merge prow into k-th row */
2142 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2143 jmax = ui[prow + 1];
2144 ncols = jmax - jmin;
2145 i = jmin - ui[prow];
2146 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2147 uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2148 j = *(uj - 1);
2149 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2150 nzk += nlnk;
2151
2152 /* update il and jl for prow */
2153 if (jmin < jmax) {
2154 il[prow] = jmin;
2155 j = *cols;
2156 jl[prow] = jl[j];
2157 jl[j] = prow;
2158 }
2159 prow = nextprow;
2160 }
2161
2162 /* if free space is not available, make more free space */
2163 if (current_space->local_remaining < nzk) {
2164 i = am - k + 1; /* num of unfactored rows */
2165 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2166 PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2167 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl));
2168 reallocs++;
2169 }
2170
2171 /* copy data into free_space and free_space_lvl, then initialize lnk */
2172 PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2173 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2174
2175 /* add the k-th row into il and jl */
2176 if (nzk > 1) {
2177 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2178 jl[k] = jl[i];
2179 jl[i] = k;
2180 il[k] = ui[k] + 1;
2181 }
2182 uj_ptr[k] = current_space->array;
2183 uj_lvl_ptr[k] = current_space_lvl->array;
2184
2185 current_space->array += nzk;
2186 current_space->local_used += nzk;
2187 current_space->local_remaining -= nzk;
2188
2189 current_space_lvl->array += nzk;
2190 current_space_lvl->local_used += nzk;
2191 current_space_lvl->local_remaining -= nzk;
2192
2193 ui[k + 1] = ui[k] + nzk;
2194 }
2195
2196 PetscCall(ISRestoreIndices(perm, &rip));
2197 PetscCall(ISRestoreIndices(iperm, &riip));
2198 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2199 PetscCall(PetscFree(ajtmp));
2200
2201 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2202 PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2203 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2204 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2205 PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2206
2207 } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2208
2209 /* put together the new matrix in MATSEQSBAIJ format */
2210 b = (Mat_SeqSBAIJ *)fact->data;
2211 b->free_ij = PETSC_TRUE;
2212 PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
2213 b->free_a = PETSC_TRUE;
2214 b->j = uj;
2215 b->i = ui;
2216 b->diag = udiag;
2217 b->ilen = NULL;
2218 b->imax = NULL;
2219 b->row = perm;
2220 b->col = perm;
2221 PetscCall(PetscObjectReference((PetscObject)perm));
2222 PetscCall(PetscObjectReference((PetscObject)perm));
2223 b->icol = iperm;
2224 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2225
2226 PetscCall(PetscMalloc1(am, &b->solve_work));
2227
2228 b->maxnz = b->nz = ui[am];
2229
2230 fact->info.factor_mallocs = reallocs;
2231 fact->info.fill_ratio_given = fill;
2232 if (ai[am] != 0) {
2233 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2234 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2235 } else {
2236 fact->info.fill_ratio_needed = 0.0;
2237 }
2238 #if defined(PETSC_USE_INFO)
2239 if (ai[am] != 0) {
2240 PetscReal af = fact->info.fill_ratio_needed;
2241 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2242 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2243 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2244 } else {
2245 PetscCall(PetscInfo(A, "Empty matrix\n"));
2246 }
2247 #endif
2248 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2249 PetscFunctionReturn(PETSC_SUCCESS);
2250 }
2251
MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo * info)2252 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2253 {
2254 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2255 Mat_SeqSBAIJ *b;
2256 PetscBool perm_identity;
2257 PetscReal fill = info->fill;
2258 const PetscInt *rip, *riip;
2259 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2260 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2261 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2262 PetscFreeSpaceList free_space = NULL, current_space = NULL;
2263 PetscBT lnkbt;
2264 IS iperm;
2265 PetscBool diagDense;
2266
2267 PetscFunctionBegin;
2268 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2269 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
2270 PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
2271
2272 /* check whether perm is the identity mapping */
2273 PetscCall(ISIdentity(perm, &perm_identity));
2274 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2275 PetscCall(ISGetIndices(iperm, &riip));
2276 PetscCall(ISGetIndices(perm, &rip));
2277
2278 /* initialization */
2279 PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2280 PetscCall(PetscMalloc1(am + 1, &udiag));
2281 ui[0] = 0;
2282
2283 /* jl: linked list for storing indices of the pivot rows
2284 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2285 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2286 for (i = 0; i < am; i++) {
2287 jl[i] = am;
2288 il[i] = 0;
2289 }
2290
2291 /* create and initialize a linked list for storing column indices of the active row k */
2292 nlnk = am + 1;
2293 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2294
2295 /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2296 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2297 current_space = free_space;
2298
2299 for (k = 0; k < am; k++) { /* for each active row k */
2300 /* initialize lnk by the column indices of row rip[k] of A */
2301 nzk = 0;
2302 ncols = ai[rip[k] + 1] - ai[rip[k]];
2303 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2304 ncols_upper = 0;
2305 for (j = 0; j < ncols; j++) {
2306 i = riip[*(aj + ai[rip[k]] + j)];
2307 if (i >= k) { /* only take upper triangular entry */
2308 cols[ncols_upper] = i;
2309 ncols_upper++;
2310 }
2311 }
2312 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2313 nzk += nlnk;
2314
2315 /* update lnk by computing fill-in for each pivot row to be merged in */
2316 prow = jl[k]; /* 1st pivot row */
2317
2318 while (prow < k) {
2319 nextprow = jl[prow];
2320 /* merge prow into k-th row */
2321 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2322 jmax = ui[prow + 1];
2323 ncols = jmax - jmin;
2324 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2325 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2326 nzk += nlnk;
2327
2328 /* update il and jl for prow */
2329 if (jmin < jmax) {
2330 il[prow] = jmin;
2331 j = *uj_ptr;
2332 jl[prow] = jl[j];
2333 jl[j] = prow;
2334 }
2335 prow = nextprow;
2336 }
2337
2338 /* if free space is not available, make more free space */
2339 if (current_space->local_remaining < nzk) {
2340 i = am - k + 1; /* num of unfactored rows */
2341 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2342 PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2343 reallocs++;
2344 }
2345
2346 /* copy data into free space, then initialize lnk */
2347 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2348
2349 /* add the k-th row into il and jl */
2350 if (nzk > 1) {
2351 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2352 jl[k] = jl[i];
2353 jl[i] = k;
2354 il[k] = ui[k] + 1;
2355 }
2356 ui_ptr[k] = current_space->array;
2357
2358 current_space->array += nzk;
2359 current_space->local_used += nzk;
2360 current_space->local_remaining -= nzk;
2361
2362 ui[k + 1] = ui[k] + nzk;
2363 }
2364
2365 PetscCall(ISRestoreIndices(perm, &rip));
2366 PetscCall(ISRestoreIndices(iperm, &riip));
2367 PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2368
2369 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2370 PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscInt), (void **)&uj));
2371 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2372 PetscCall(PetscLLDestroy(lnk, lnkbt));
2373
2374 /* put together the new matrix in MATSEQSBAIJ format */
2375 b = (Mat_SeqSBAIJ *)fact->data;
2376 b->free_ij = PETSC_TRUE;
2377 PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
2378 b->free_a = PETSC_TRUE;
2379 b->j = uj;
2380 b->i = ui;
2381 b->diag = udiag;
2382 b->ilen = NULL;
2383 b->imax = NULL;
2384 b->row = perm;
2385 b->col = perm;
2386
2387 PetscCall(PetscObjectReference((PetscObject)perm));
2388 PetscCall(PetscObjectReference((PetscObject)perm));
2389
2390 b->icol = iperm;
2391 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2392
2393 PetscCall(PetscMalloc1(am, &b->solve_work));
2394
2395 b->maxnz = b->nz = ui[am];
2396
2397 fact->info.factor_mallocs = reallocs;
2398 fact->info.fill_ratio_given = fill;
2399 if (ai[am] != 0) {
2400 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2401 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2402 } else {
2403 fact->info.fill_ratio_needed = 0.0;
2404 }
2405 #if defined(PETSC_USE_INFO)
2406 if (ai[am] != 0) {
2407 PetscReal af = fact->info.fill_ratio_needed;
2408 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2409 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2410 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2411 } else {
2412 PetscCall(PetscInfo(A, "Empty matrix\n"));
2413 }
2414 #endif
2415 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2416 PetscFunctionReturn(PETSC_SUCCESS);
2417 }
2418
MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)2419 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
2420 {
2421 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2422 PetscInt n = A->rmap->n;
2423 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
2424 PetscScalar *x, sum;
2425 const PetscScalar *b;
2426 const MatScalar *aa, *v;
2427 PetscInt i, nz;
2428
2429 PetscFunctionBegin;
2430 if (!n) PetscFunctionReturn(PETSC_SUCCESS);
2431
2432 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2433 PetscCall(VecGetArrayRead(bb, &b));
2434 PetscCall(VecGetArrayWrite(xx, &x));
2435
2436 /* forward solve the lower triangular */
2437 x[0] = b[0];
2438 v = aa;
2439 vi = aj;
2440 for (i = 1; i < n; i++) {
2441 nz = ai[i + 1] - ai[i];
2442 sum = b[i];
2443 PetscSparseDenseMinusDot(sum, x, v, vi, nz);
2444 v += nz;
2445 vi += nz;
2446 x[i] = sum;
2447 }
2448
2449 /* backward solve the upper triangular */
2450 for (i = n - 1; i >= 0; i--) {
2451 v = aa + adiag[i + 1] + 1;
2452 vi = aj + adiag[i + 1] + 1;
2453 nz = adiag[i] - adiag[i + 1] - 1;
2454 sum = x[i];
2455 PetscSparseDenseMinusDot(sum, x, v, vi, nz);
2456 x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
2457 }
2458
2459 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
2460 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2461 PetscCall(VecRestoreArrayRead(bb, &b));
2462 PetscCall(VecRestoreArrayWrite(xx, &x));
2463 PetscFunctionReturn(PETSC_SUCCESS);
2464 }
2465
MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)2466 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
2467 {
2468 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2469 IS iscol = a->col, isrow = a->row;
2470 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
2471 const PetscInt *rout, *cout, *r, *c;
2472 PetscScalar *x, *tmp, sum;
2473 const PetscScalar *b;
2474 const MatScalar *aa, *v;
2475
2476 PetscFunctionBegin;
2477 if (!n) PetscFunctionReturn(PETSC_SUCCESS);
2478
2479 PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2480 PetscCall(VecGetArrayRead(bb, &b));
2481 PetscCall(VecGetArrayWrite(xx, &x));
2482 tmp = a->solve_work;
2483
2484 PetscCall(ISGetIndices(isrow, &rout));
2485 r = rout;
2486 PetscCall(ISGetIndices(iscol, &cout));
2487 c = cout;
2488
2489 /* forward solve the lower triangular */
2490 tmp[0] = b[r[0]];
2491 v = aa;
2492 vi = aj;
2493 for (i = 1; i < n; i++) {
2494 nz = ai[i + 1] - ai[i];
2495 sum = b[r[i]];
2496 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
2497 tmp[i] = sum;
2498 v += nz;
2499 vi += nz;
2500 }
2501
2502 /* backward solve the upper triangular */
2503 for (i = n - 1; i >= 0; i--) {
2504 v = aa + adiag[i + 1] + 1;
2505 vi = aj + adiag[i + 1] + 1;
2506 nz = adiag[i] - adiag[i + 1] - 1;
2507 sum = tmp[i];
2508 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
2509 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
2510 }
2511
2512 PetscCall(ISRestoreIndices(isrow, &rout));
2513 PetscCall(ISRestoreIndices(iscol, &cout));
2514 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2515 PetscCall(VecRestoreArrayRead(bb, &b));
2516 PetscCall(VecRestoreArrayWrite(xx, &x));
2517 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
2518 PetscFunctionReturn(PETSC_SUCCESS);
2519 }
2520