12593348eSBarry Smith /*
2b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row)
32593348eSBarry Smith matrix storage format.
42593348eSBarry Smith */
5c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/
6c6db04a5SJed Brown #include <petscblaslapack.h>
7af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
8af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h>
943516a2dSKris Buschelman
1026cec326SBarry Smith /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
1126cec326SBarry Smith #define TYPE BAIJ
1226cec326SBarry Smith #define TYPE_BS
1326cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
1426cec326SBarry Smith #undef TYPE_BS
1526cec326SBarry Smith #define TYPE_BS _BS
1626cec326SBarry Smith #define TYPE_BS_ON
1726cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
1826cec326SBarry Smith #undef TYPE_BS
1926cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmat.h"
2026cec326SBarry Smith #undef TYPE
2126cec326SBarry Smith #undef TYPE_BS_ON
2226cec326SBarry Smith
237ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
247ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
257ea3e4caSstefano_zampini #endif
267ea3e4caSstefano_zampini
27b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
28fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
29b5b72c8aSIrina Sokolova #endif
30c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
31b5b72c8aSIrina Sokolova
32*421480d9SBarry Smith MatGetDiagonalMarkers(SeqBAIJ, A->rmap->bs)
33*421480d9SBarry Smith
MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal * reductions)34ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
35d71ae5a4SJacob Faibussowitsch {
369463ebdaSPierre Jolivet Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
37ff6a9541SJacob Faibussowitsch PetscInt m, n, ib, jb, bs = A->rmap->bs;
389463ebdaSPierre Jolivet MatScalar *a_val = a_aij->a;
399463ebdaSPierre Jolivet
409463ebdaSPierre Jolivet PetscFunctionBegin;
419566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &m, &n));
42ff6a9541SJacob Faibussowitsch PetscCall(PetscArrayzero(reductions, n));
439463ebdaSPierre Jolivet if (type == NORM_2) {
44ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
459463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) {
469463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) {
47857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
489463ebdaSPierre Jolivet a_val++;
499463ebdaSPierre Jolivet }
509463ebdaSPierre Jolivet }
519463ebdaSPierre Jolivet }
529463ebdaSPierre Jolivet } else if (type == NORM_1) {
53ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
549463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) {
559463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) {
56857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
579463ebdaSPierre Jolivet a_val++;
589463ebdaSPierre Jolivet }
599463ebdaSPierre Jolivet }
609463ebdaSPierre Jolivet }
619463ebdaSPierre Jolivet } else if (type == NORM_INFINITY) {
62ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
639463ebdaSPierre Jolivet for (jb = 0; jb < bs; jb++) {
649463ebdaSPierre Jolivet for (ib = 0; ib < bs; ib++) {
656497c311SBarry Smith PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb;
66857cbf51SRichard Tran Mills reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
679463ebdaSPierre Jolivet a_val++;
689463ebdaSPierre Jolivet }
699463ebdaSPierre Jolivet }
709463ebdaSPierre Jolivet }
71857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
72ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
73857cbf51SRichard Tran Mills for (jb = 0; jb < bs; jb++) {
74857cbf51SRichard Tran Mills for (ib = 0; ib < bs; ib++) {
75857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
76857cbf51SRichard Tran Mills a_val++;
77857cbf51SRichard Tran Mills }
78857cbf51SRichard Tran Mills }
79857cbf51SRichard Tran Mills }
80857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
81ff6a9541SJacob Faibussowitsch for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
82857cbf51SRichard Tran Mills for (jb = 0; jb < bs; jb++) {
83857cbf51SRichard Tran Mills for (ib = 0; ib < bs; ib++) {
84857cbf51SRichard Tran Mills reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
85857cbf51SRichard Tran Mills a_val++;
86857cbf51SRichard Tran Mills }
87857cbf51SRichard Tran Mills }
88857cbf51SRichard Tran Mills }
89857cbf51SRichard Tran Mills } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
909463ebdaSPierre Jolivet if (type == NORM_2) {
91ff6a9541SJacob Faibussowitsch for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
92857cbf51SRichard Tran Mills } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
93ff6a9541SJacob Faibussowitsch for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
949463ebdaSPierre Jolivet }
953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
969463ebdaSPierre Jolivet }
979463ebdaSPierre Jolivet
MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar ** values)9866976f2fSJacob Faibussowitsch static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
99d71ae5a4SJacob Faibussowitsch {
100b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
101*421480d9SBarry Smith PetscInt i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
1027f0c90edSBarry Smith MatScalar *v = a->a, *odiag, *diag, work[25], *v_work;
10362bba022SBarry Smith PetscReal shift = 0.0;
1041a9391e3SHong Zhang PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE;
105*421480d9SBarry Smith const PetscInt *adiag;
106b01c7715SBarry Smith
107b01c7715SBarry Smith PetscFunctionBegin;
108a455e926SHong Zhang allowzeropivot = PetscNot(A->erroriffailure);
109a455e926SHong Zhang
1109797317bSBarry Smith if (a->idiagvalid) {
1119797317bSBarry Smith if (values) *values = a->idiag;
1123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1139797317bSBarry Smith }
114*421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &adiag, NULL));
1153a7d0413SPierre Jolivet if (!a->idiag) PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag));
116b01c7715SBarry Smith diag = a->idiag;
117bbead8a2SBarry Smith if (values) *values = a->idiag;
118b01c7715SBarry Smith /* factor and invert each block */
119521d7252SBarry Smith switch (bs) {
120ab040260SJed Brown case 1:
121ab040260SJed Brown for (i = 0; i < mbs; i++) {
122*421480d9SBarry Smith odiag = v + 1 * adiag[i];
123ab040260SJed Brown diag[0] = odiag[0];
124ec1892c8SHong Zhang
125ec1892c8SHong Zhang if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
126966bd95aSPierre Jolivet PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
1277b6c816cSBarry Smith A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1287b6c816cSBarry Smith A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
1297b6c816cSBarry Smith A->factorerror_zeropivot_row = i;
1309566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
131ec1892c8SHong Zhang }
132ec1892c8SHong Zhang
133d4a378daSJed Brown diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
134ab040260SJed Brown diag += 1;
135ab040260SJed Brown }
136ab040260SJed Brown break;
137b01c7715SBarry Smith case 2:
138b01c7715SBarry Smith for (i = 0; i < mbs; i++) {
139*421480d9SBarry Smith odiag = v + 4 * adiag[i];
1409371c9d4SSatish Balay diag[0] = odiag[0];
1419371c9d4SSatish Balay diag[1] = odiag[1];
1429371c9d4SSatish Balay diag[2] = odiag[2];
1439371c9d4SSatish Balay diag[3] = odiag[3];
1449566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1457b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
146b01c7715SBarry Smith diag += 4;
147b01c7715SBarry Smith }
148b01c7715SBarry Smith break;
149b01c7715SBarry Smith case 3:
150b01c7715SBarry Smith for (i = 0; i < mbs; i++) {
151*421480d9SBarry Smith odiag = v + 9 * adiag[i];
1529371c9d4SSatish Balay diag[0] = odiag[0];
1539371c9d4SSatish Balay diag[1] = odiag[1];
1549371c9d4SSatish Balay diag[2] = odiag[2];
1559371c9d4SSatish Balay diag[3] = odiag[3];
1569371c9d4SSatish Balay diag[4] = odiag[4];
1579371c9d4SSatish Balay diag[5] = odiag[5];
1589371c9d4SSatish Balay diag[6] = odiag[6];
1599371c9d4SSatish Balay diag[7] = odiag[7];
160b01c7715SBarry Smith diag[8] = odiag[8];
1619566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
1627b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163b01c7715SBarry Smith diag += 9;
164b01c7715SBarry Smith }
165b01c7715SBarry Smith break;
166b01c7715SBarry Smith case 4:
167b01c7715SBarry Smith for (i = 0; i < mbs; i++) {
168*421480d9SBarry Smith odiag = v + 16 * adiag[i];
1699566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 16));
1709566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
1717b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
172b01c7715SBarry Smith diag += 16;
173b01c7715SBarry Smith }
174b01c7715SBarry Smith break;
175b01c7715SBarry Smith case 5:
176b01c7715SBarry Smith for (i = 0; i < mbs; i++) {
177*421480d9SBarry Smith odiag = v + 25 * adiag[i];
1789566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 25));
1799566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
1807b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
181b01c7715SBarry Smith diag += 25;
182b01c7715SBarry Smith }
183b01c7715SBarry Smith break;
184d49b2adcSBarry Smith case 6:
185d49b2adcSBarry Smith for (i = 0; i < mbs; i++) {
186*421480d9SBarry Smith odiag = v + 36 * adiag[i];
1879566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 36));
1889566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
1897b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
190d49b2adcSBarry Smith diag += 36;
191d49b2adcSBarry Smith }
192d49b2adcSBarry Smith break;
193de80f912SBarry Smith case 7:
194de80f912SBarry Smith for (i = 0; i < mbs; i++) {
195*421480d9SBarry Smith odiag = v + 49 * adiag[i];
1969566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, 49));
1979566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
1987b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
199de80f912SBarry Smith diag += 49;
200de80f912SBarry Smith }
201de80f912SBarry Smith break;
202b01c7715SBarry Smith default:
2039566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
204de80f912SBarry Smith for (i = 0; i < mbs; i++) {
205*421480d9SBarry Smith odiag = v + bs2 * adiag[i];
2069566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(diag, odiag, bs2));
2079566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
2087b6c816cSBarry Smith if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
209de80f912SBarry Smith diag += bs2;
210de80f912SBarry Smith }
2119566063dSJacob Faibussowitsch PetscCall(PetscFree2(v_work, v_pivots));
212b01c7715SBarry Smith }
213b01c7715SBarry Smith a->idiagvalid = PETSC_TRUE;
2143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
215b01c7715SBarry Smith }
216b01c7715SBarry Smith
MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)21766976f2fSJacob Faibussowitsch static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
218d71ae5a4SJacob Faibussowitsch {
2196d3beeddSMatthew Knepley Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
220e48d15efSToby Isaac PetscScalar *x, *work, *w, *workt, *t;
221e48d15efSToby Isaac const MatScalar *v, *aa = a->a, *idiag;
222e48d15efSToby Isaac const PetscScalar *b, *xb;
2235455b99fSToby Isaac PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
224e48d15efSToby Isaac PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
225c1ac3661SBarry Smith const PetscInt *diag, *ai = a->i, *aj = a->j, *vi;
226b01c7715SBarry Smith
227b01c7715SBarry Smith PetscFunctionBegin;
228b01c7715SBarry Smith its = its * lits;
2295f80ce2aSJacob Faibussowitsch PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
2305f80ce2aSJacob Faibussowitsch PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2315f80ce2aSJacob Faibussowitsch PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
2325f80ce2aSJacob Faibussowitsch PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
2335f80ce2aSJacob Faibussowitsch PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
234b01c7715SBarry Smith
2359566063dSJacob Faibussowitsch if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
236b01c7715SBarry Smith
2373ba16761SJacob Faibussowitsch if (!m) PetscFunctionReturn(PETSC_SUCCESS);
238b01c7715SBarry Smith diag = a->diag;
239b01c7715SBarry Smith idiag = a->idiag;
240de80f912SBarry Smith k = PetscMax(A->rmap->n, A->cmap->n);
24148a46eb9SPierre Jolivet if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
24248a46eb9SPierre Jolivet if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
24348a46eb9SPierre Jolivet if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
2443475c22fSBarry Smith work = a->mult_work;
2453475c22fSBarry Smith t = a->sor_workt;
246de80f912SBarry Smith w = a->sor_work;
247de80f912SBarry Smith
2489566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x));
2499566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b));
250de80f912SBarry Smith
251de80f912SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) {
252de80f912SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
253e48d15efSToby Isaac switch (bs) {
254e48d15efSToby Isaac case 1:
255e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(x, idiag, b);
256e48d15efSToby Isaac t[0] = b[0];
257e48d15efSToby Isaac i2 = 1;
258e48d15efSToby Isaac idiag += 1;
259e48d15efSToby Isaac for (i = 1; i < m; i++) {
260e48d15efSToby Isaac v = aa + ai[i];
261e48d15efSToby Isaac vi = aj + ai[i];
262e48d15efSToby Isaac nz = diag[i] - ai[i];
263e48d15efSToby Isaac s[0] = b[i2];
264e48d15efSToby Isaac for (j = 0; j < nz; j++) {
265e48d15efSToby Isaac xw[0] = x[vi[j]];
266e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
267e48d15efSToby Isaac }
268e48d15efSToby Isaac t[i2] = s[0];
269e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
270e48d15efSToby Isaac x[i2] = xw[0];
271e48d15efSToby Isaac idiag += 1;
272e48d15efSToby Isaac i2 += 1;
273e48d15efSToby Isaac }
274e48d15efSToby Isaac break;
275e48d15efSToby Isaac case 2:
276e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(x, idiag, b);
2779371c9d4SSatish Balay t[0] = b[0];
2789371c9d4SSatish Balay t[1] = b[1];
279e48d15efSToby Isaac i2 = 2;
280e48d15efSToby Isaac idiag += 4;
281e48d15efSToby Isaac for (i = 1; i < m; i++) {
282e48d15efSToby Isaac v = aa + 4 * ai[i];
283e48d15efSToby Isaac vi = aj + ai[i];
284e48d15efSToby Isaac nz = diag[i] - ai[i];
2859371c9d4SSatish Balay s[0] = b[i2];
2869371c9d4SSatish Balay s[1] = b[i2 + 1];
287e48d15efSToby Isaac for (j = 0; j < nz; j++) {
288e48d15efSToby Isaac idx = 2 * vi[j];
289e48d15efSToby Isaac it = 4 * j;
2909371c9d4SSatish Balay xw[0] = x[idx];
2919371c9d4SSatish Balay xw[1] = x[1 + idx];
292e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
293e48d15efSToby Isaac }
2949371c9d4SSatish Balay t[i2] = s[0];
2959371c9d4SSatish Balay t[i2 + 1] = s[1];
296e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
2979371c9d4SSatish Balay x[i2] = xw[0];
2989371c9d4SSatish Balay x[i2 + 1] = xw[1];
299e48d15efSToby Isaac idiag += 4;
300e48d15efSToby Isaac i2 += 2;
301e48d15efSToby Isaac }
302e48d15efSToby Isaac break;
303e48d15efSToby Isaac case 3:
304e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(x, idiag, b);
3059371c9d4SSatish Balay t[0] = b[0];
3069371c9d4SSatish Balay t[1] = b[1];
3079371c9d4SSatish Balay t[2] = b[2];
308e48d15efSToby Isaac i2 = 3;
309e48d15efSToby Isaac idiag += 9;
310e48d15efSToby Isaac for (i = 1; i < m; i++) {
311e48d15efSToby Isaac v = aa + 9 * ai[i];
312e48d15efSToby Isaac vi = aj + ai[i];
313e48d15efSToby Isaac nz = diag[i] - ai[i];
3149371c9d4SSatish Balay s[0] = b[i2];
3159371c9d4SSatish Balay s[1] = b[i2 + 1];
3169371c9d4SSatish Balay s[2] = b[i2 + 2];
317e48d15efSToby Isaac while (nz--) {
318e48d15efSToby Isaac idx = 3 * (*vi++);
3199371c9d4SSatish Balay xw[0] = x[idx];
3209371c9d4SSatish Balay xw[1] = x[1 + idx];
3219371c9d4SSatish Balay xw[2] = x[2 + idx];
322e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
323e48d15efSToby Isaac v += 9;
324e48d15efSToby Isaac }
3259371c9d4SSatish Balay t[i2] = s[0];
3269371c9d4SSatish Balay t[i2 + 1] = s[1];
3279371c9d4SSatish Balay t[i2 + 2] = s[2];
328e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
3299371c9d4SSatish Balay x[i2] = xw[0];
3309371c9d4SSatish Balay x[i2 + 1] = xw[1];
3319371c9d4SSatish Balay x[i2 + 2] = xw[2];
332e48d15efSToby Isaac idiag += 9;
333e48d15efSToby Isaac i2 += 3;
334e48d15efSToby Isaac }
335e48d15efSToby Isaac break;
336e48d15efSToby Isaac case 4:
337e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(x, idiag, b);
3389371c9d4SSatish Balay t[0] = b[0];
3399371c9d4SSatish Balay t[1] = b[1];
3409371c9d4SSatish Balay t[2] = b[2];
3419371c9d4SSatish Balay t[3] = b[3];
342e48d15efSToby Isaac i2 = 4;
343e48d15efSToby Isaac idiag += 16;
344e48d15efSToby Isaac for (i = 1; i < m; i++) {
345e48d15efSToby Isaac v = aa + 16 * ai[i];
346e48d15efSToby Isaac vi = aj + ai[i];
347e48d15efSToby Isaac nz = diag[i] - ai[i];
3489371c9d4SSatish Balay s[0] = b[i2];
3499371c9d4SSatish Balay s[1] = b[i2 + 1];
3509371c9d4SSatish Balay s[2] = b[i2 + 2];
3519371c9d4SSatish Balay s[3] = b[i2 + 3];
352e48d15efSToby Isaac while (nz--) {
353e48d15efSToby Isaac idx = 4 * (*vi++);
3549371c9d4SSatish Balay xw[0] = x[idx];
3559371c9d4SSatish Balay xw[1] = x[1 + idx];
3569371c9d4SSatish Balay xw[2] = x[2 + idx];
3579371c9d4SSatish Balay xw[3] = x[3 + idx];
358e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
359e48d15efSToby Isaac v += 16;
360e48d15efSToby Isaac }
3619371c9d4SSatish Balay t[i2] = s[0];
3629371c9d4SSatish Balay t[i2 + 1] = s[1];
3639371c9d4SSatish Balay t[i2 + 2] = s[2];
3649371c9d4SSatish Balay t[i2 + 3] = s[3];
365e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
3669371c9d4SSatish Balay x[i2] = xw[0];
3679371c9d4SSatish Balay x[i2 + 1] = xw[1];
3689371c9d4SSatish Balay x[i2 + 2] = xw[2];
3699371c9d4SSatish Balay x[i2 + 3] = xw[3];
370e48d15efSToby Isaac idiag += 16;
371e48d15efSToby Isaac i2 += 4;
372e48d15efSToby Isaac }
373e48d15efSToby Isaac break;
374e48d15efSToby Isaac case 5:
375e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(x, idiag, b);
3769371c9d4SSatish Balay t[0] = b[0];
3779371c9d4SSatish Balay t[1] = b[1];
3789371c9d4SSatish Balay t[2] = b[2];
3799371c9d4SSatish Balay t[3] = b[3];
3809371c9d4SSatish Balay t[4] = b[4];
381e48d15efSToby Isaac i2 = 5;
382e48d15efSToby Isaac idiag += 25;
383e48d15efSToby Isaac for (i = 1; i < m; i++) {
384e48d15efSToby Isaac v = aa + 25 * ai[i];
385e48d15efSToby Isaac vi = aj + ai[i];
386e48d15efSToby Isaac nz = diag[i] - ai[i];
3879371c9d4SSatish Balay s[0] = b[i2];
3889371c9d4SSatish Balay s[1] = b[i2 + 1];
3899371c9d4SSatish Balay s[2] = b[i2 + 2];
3909371c9d4SSatish Balay s[3] = b[i2 + 3];
3919371c9d4SSatish Balay s[4] = b[i2 + 4];
392e48d15efSToby Isaac while (nz--) {
393e48d15efSToby Isaac idx = 5 * (*vi++);
3949371c9d4SSatish Balay xw[0] = x[idx];
3959371c9d4SSatish Balay xw[1] = x[1 + idx];
3969371c9d4SSatish Balay xw[2] = x[2 + idx];
3979371c9d4SSatish Balay xw[3] = x[3 + idx];
3989371c9d4SSatish Balay xw[4] = x[4 + idx];
399e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
400e48d15efSToby Isaac v += 25;
401e48d15efSToby Isaac }
4029371c9d4SSatish Balay t[i2] = s[0];
4039371c9d4SSatish Balay t[i2 + 1] = s[1];
4049371c9d4SSatish Balay t[i2 + 2] = s[2];
4059371c9d4SSatish Balay t[i2 + 3] = s[3];
4069371c9d4SSatish Balay t[i2 + 4] = s[4];
407e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
4089371c9d4SSatish Balay x[i2] = xw[0];
4099371c9d4SSatish Balay x[i2 + 1] = xw[1];
4109371c9d4SSatish Balay x[i2 + 2] = xw[2];
4119371c9d4SSatish Balay x[i2 + 3] = xw[3];
4129371c9d4SSatish Balay x[i2 + 4] = xw[4];
413e48d15efSToby Isaac idiag += 25;
414e48d15efSToby Isaac i2 += 5;
415e48d15efSToby Isaac }
416e48d15efSToby Isaac break;
417e48d15efSToby Isaac case 6:
418e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(x, idiag, b);
4199371c9d4SSatish Balay t[0] = b[0];
4209371c9d4SSatish Balay t[1] = b[1];
4219371c9d4SSatish Balay t[2] = b[2];
4229371c9d4SSatish Balay t[3] = b[3];
4239371c9d4SSatish Balay t[4] = b[4];
4249371c9d4SSatish Balay t[5] = b[5];
425e48d15efSToby Isaac i2 = 6;
426e48d15efSToby Isaac idiag += 36;
427e48d15efSToby Isaac for (i = 1; i < m; i++) {
428e48d15efSToby Isaac v = aa + 36 * ai[i];
429e48d15efSToby Isaac vi = aj + ai[i];
430e48d15efSToby Isaac nz = diag[i] - ai[i];
4319371c9d4SSatish Balay s[0] = b[i2];
4329371c9d4SSatish Balay s[1] = b[i2 + 1];
4339371c9d4SSatish Balay s[2] = b[i2 + 2];
4349371c9d4SSatish Balay s[3] = b[i2 + 3];
4359371c9d4SSatish Balay s[4] = b[i2 + 4];
4369371c9d4SSatish Balay s[5] = b[i2 + 5];
437e48d15efSToby Isaac while (nz--) {
438e48d15efSToby Isaac idx = 6 * (*vi++);
4399371c9d4SSatish Balay xw[0] = x[idx];
4409371c9d4SSatish Balay xw[1] = x[1 + idx];
4419371c9d4SSatish Balay xw[2] = x[2 + idx];
4429371c9d4SSatish Balay xw[3] = x[3 + idx];
4439371c9d4SSatish Balay xw[4] = x[4 + idx];
4449371c9d4SSatish Balay xw[5] = x[5 + idx];
445e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
446e48d15efSToby Isaac v += 36;
447e48d15efSToby Isaac }
4489371c9d4SSatish Balay t[i2] = s[0];
4499371c9d4SSatish Balay t[i2 + 1] = s[1];
4509371c9d4SSatish Balay t[i2 + 2] = s[2];
4519371c9d4SSatish Balay t[i2 + 3] = s[3];
4529371c9d4SSatish Balay t[i2 + 4] = s[4];
4539371c9d4SSatish Balay t[i2 + 5] = s[5];
454e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
4559371c9d4SSatish Balay x[i2] = xw[0];
4569371c9d4SSatish Balay x[i2 + 1] = xw[1];
4579371c9d4SSatish Balay x[i2 + 2] = xw[2];
4589371c9d4SSatish Balay x[i2 + 3] = xw[3];
4599371c9d4SSatish Balay x[i2 + 4] = xw[4];
4609371c9d4SSatish Balay x[i2 + 5] = xw[5];
461e48d15efSToby Isaac idiag += 36;
462e48d15efSToby Isaac i2 += 6;
463e48d15efSToby Isaac }
464e48d15efSToby Isaac break;
465e48d15efSToby Isaac case 7:
466e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x, idiag, b);
4679371c9d4SSatish Balay t[0] = b[0];
4689371c9d4SSatish Balay t[1] = b[1];
4699371c9d4SSatish Balay t[2] = b[2];
4709371c9d4SSatish Balay t[3] = b[3];
4719371c9d4SSatish Balay t[4] = b[4];
4729371c9d4SSatish Balay t[5] = b[5];
4739371c9d4SSatish Balay t[6] = b[6];
474e48d15efSToby Isaac i2 = 7;
475e48d15efSToby Isaac idiag += 49;
476e48d15efSToby Isaac for (i = 1; i < m; i++) {
477e48d15efSToby Isaac v = aa + 49 * ai[i];
478e48d15efSToby Isaac vi = aj + ai[i];
479e48d15efSToby Isaac nz = diag[i] - ai[i];
4809371c9d4SSatish Balay s[0] = b[i2];
4819371c9d4SSatish Balay s[1] = b[i2 + 1];
4829371c9d4SSatish Balay s[2] = b[i2 + 2];
4839371c9d4SSatish Balay s[3] = b[i2 + 3];
4849371c9d4SSatish Balay s[4] = b[i2 + 4];
4859371c9d4SSatish Balay s[5] = b[i2 + 5];
4869371c9d4SSatish Balay s[6] = b[i2 + 6];
487e48d15efSToby Isaac while (nz--) {
488e48d15efSToby Isaac idx = 7 * (*vi++);
4899371c9d4SSatish Balay xw[0] = x[idx];
4909371c9d4SSatish Balay xw[1] = x[1 + idx];
4919371c9d4SSatish Balay xw[2] = x[2 + idx];
4929371c9d4SSatish Balay xw[3] = x[3 + idx];
4939371c9d4SSatish Balay xw[4] = x[4 + idx];
4949371c9d4SSatish Balay xw[5] = x[5 + idx];
4959371c9d4SSatish Balay xw[6] = x[6 + idx];
496e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
497e48d15efSToby Isaac v += 49;
498e48d15efSToby Isaac }
4999371c9d4SSatish Balay t[i2] = s[0];
5009371c9d4SSatish Balay t[i2 + 1] = s[1];
5019371c9d4SSatish Balay t[i2 + 2] = s[2];
5029371c9d4SSatish Balay t[i2 + 3] = s[3];
5039371c9d4SSatish Balay t[i2 + 4] = s[4];
5049371c9d4SSatish Balay t[i2 + 5] = s[5];
5059371c9d4SSatish Balay t[i2 + 6] = s[6];
506e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
5079371c9d4SSatish Balay x[i2] = xw[0];
5089371c9d4SSatish Balay x[i2 + 1] = xw[1];
5099371c9d4SSatish Balay x[i2 + 2] = xw[2];
5109371c9d4SSatish Balay x[i2 + 3] = xw[3];
5119371c9d4SSatish Balay x[i2 + 4] = xw[4];
5129371c9d4SSatish Balay x[i2 + 5] = xw[5];
5139371c9d4SSatish Balay x[i2 + 6] = xw[6];
514e48d15efSToby Isaac idiag += 49;
515e48d15efSToby Isaac i2 += 7;
516e48d15efSToby Isaac }
517e48d15efSToby Isaac break;
518e48d15efSToby Isaac default:
51996b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
5209566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t, b, bs));
521de80f912SBarry Smith i2 = bs;
522de80f912SBarry Smith idiag += bs2;
523de80f912SBarry Smith for (i = 1; i < m; i++) {
524de80f912SBarry Smith v = aa + bs2 * ai[i];
525de80f912SBarry Smith vi = aj + ai[i];
526de80f912SBarry Smith nz = diag[i] - ai[i];
527de80f912SBarry Smith
5289566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs));
529de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */
530de80f912SBarry Smith workt = work;
531de80f912SBarry Smith for (j = 0; j < nz; j++) {
5329566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
533de80f912SBarry Smith workt += bs;
534de80f912SBarry Smith }
53596b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
5369566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t + i2, w, bs));
53796b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
538de80f912SBarry Smith
539de80f912SBarry Smith idiag += bs2;
540de80f912SBarry Smith i2 += bs;
541de80f912SBarry Smith }
542e48d15efSToby Isaac break;
543e48d15efSToby Isaac }
544de80f912SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
5459566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
546e48d15efSToby Isaac xb = t;
5479371c9d4SSatish Balay } else xb = b;
548de80f912SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
549e48d15efSToby Isaac idiag = a->idiag + bs2 * (a->mbs - 1);
550e48d15efSToby Isaac i2 = bs * (m - 1);
551e48d15efSToby Isaac switch (bs) {
552e48d15efSToby Isaac case 1:
553e48d15efSToby Isaac s[0] = xb[i2];
554e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
555e48d15efSToby Isaac x[i2] = xw[0];
556e48d15efSToby Isaac i2 -= 1;
557e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) {
558e48d15efSToby Isaac v = aa + (diag[i] + 1);
559e48d15efSToby Isaac vi = aj + diag[i] + 1;
560e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1;
561e48d15efSToby Isaac s[0] = xb[i2];
562e48d15efSToby Isaac for (j = 0; j < nz; j++) {
563e48d15efSToby Isaac xw[0] = x[vi[j]];
564e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
565e48d15efSToby Isaac }
566e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
567e48d15efSToby Isaac x[i2] = xw[0];
568e48d15efSToby Isaac idiag -= 1;
569e48d15efSToby Isaac i2 -= 1;
570e48d15efSToby Isaac }
571e48d15efSToby Isaac break;
572e48d15efSToby Isaac case 2:
5739371c9d4SSatish Balay s[0] = xb[i2];
5749371c9d4SSatish Balay s[1] = xb[i2 + 1];
575e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5769371c9d4SSatish Balay x[i2] = xw[0];
5779371c9d4SSatish Balay x[i2 + 1] = xw[1];
578e48d15efSToby Isaac i2 -= 2;
579e48d15efSToby Isaac idiag -= 4;
580e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) {
581e48d15efSToby Isaac v = aa + 4 * (diag[i] + 1);
582e48d15efSToby Isaac vi = aj + diag[i] + 1;
583e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1;
5849371c9d4SSatish Balay s[0] = xb[i2];
5859371c9d4SSatish Balay s[1] = xb[i2 + 1];
586e48d15efSToby Isaac for (j = 0; j < nz; j++) {
587e48d15efSToby Isaac idx = 2 * vi[j];
588e48d15efSToby Isaac it = 4 * j;
5899371c9d4SSatish Balay xw[0] = x[idx];
5909371c9d4SSatish Balay xw[1] = x[1 + idx];
591e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
592e48d15efSToby Isaac }
593e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5949371c9d4SSatish Balay x[i2] = xw[0];
5959371c9d4SSatish Balay x[i2 + 1] = xw[1];
596e48d15efSToby Isaac idiag -= 4;
597e48d15efSToby Isaac i2 -= 2;
598e48d15efSToby Isaac }
599e48d15efSToby Isaac break;
600e48d15efSToby Isaac case 3:
6019371c9d4SSatish Balay s[0] = xb[i2];
6029371c9d4SSatish Balay s[1] = xb[i2 + 1];
6039371c9d4SSatish Balay s[2] = xb[i2 + 2];
604e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
6059371c9d4SSatish Balay x[i2] = xw[0];
6069371c9d4SSatish Balay x[i2 + 1] = xw[1];
6079371c9d4SSatish Balay x[i2 + 2] = xw[2];
608e48d15efSToby Isaac i2 -= 3;
609e48d15efSToby Isaac idiag -= 9;
610e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) {
611e48d15efSToby Isaac v = aa + 9 * (diag[i] + 1);
612e48d15efSToby Isaac vi = aj + diag[i] + 1;
613e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1;
6149371c9d4SSatish Balay s[0] = xb[i2];
6159371c9d4SSatish Balay s[1] = xb[i2 + 1];
6169371c9d4SSatish Balay s[2] = xb[i2 + 2];
617e48d15efSToby Isaac while (nz--) {
618e48d15efSToby Isaac idx = 3 * (*vi++);
6199371c9d4SSatish Balay xw[0] = x[idx];
6209371c9d4SSatish Balay xw[1] = x[1 + idx];
6219371c9d4SSatish Balay xw[2] = x[2 + idx];
622e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
623e48d15efSToby Isaac v += 9;
624e48d15efSToby Isaac }
625e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
6269371c9d4SSatish Balay x[i2] = xw[0];
6279371c9d4SSatish Balay x[i2 + 1] = xw[1];
6289371c9d4SSatish Balay x[i2 + 2] = xw[2];
629e48d15efSToby Isaac idiag -= 9;
630e48d15efSToby Isaac i2 -= 3;
631e48d15efSToby Isaac }
632e48d15efSToby Isaac break;
633e48d15efSToby Isaac case 4:
6349371c9d4SSatish Balay s[0] = xb[i2];
6359371c9d4SSatish Balay s[1] = xb[i2 + 1];
6369371c9d4SSatish Balay s[2] = xb[i2 + 2];
6379371c9d4SSatish Balay s[3] = xb[i2 + 3];
638e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6399371c9d4SSatish Balay x[i2] = xw[0];
6409371c9d4SSatish Balay x[i2 + 1] = xw[1];
6419371c9d4SSatish Balay x[i2 + 2] = xw[2];
6429371c9d4SSatish Balay x[i2 + 3] = xw[3];
643e48d15efSToby Isaac i2 -= 4;
644e48d15efSToby Isaac idiag -= 16;
645e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) {
646e48d15efSToby Isaac v = aa + 16 * (diag[i] + 1);
647e48d15efSToby Isaac vi = aj + diag[i] + 1;
648e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1;
6499371c9d4SSatish Balay s[0] = xb[i2];
6509371c9d4SSatish Balay s[1] = xb[i2 + 1];
6519371c9d4SSatish Balay s[2] = xb[i2 + 2];
6529371c9d4SSatish Balay s[3] = xb[i2 + 3];
653e48d15efSToby Isaac while (nz--) {
654e48d15efSToby Isaac idx = 4 * (*vi++);
6559371c9d4SSatish Balay xw[0] = x[idx];
6569371c9d4SSatish Balay xw[1] = x[1 + idx];
6579371c9d4SSatish Balay xw[2] = x[2 + idx];
6589371c9d4SSatish Balay xw[3] = x[3 + idx];
659e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
660e48d15efSToby Isaac v += 16;
661e48d15efSToby Isaac }
662e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6639371c9d4SSatish Balay x[i2] = xw[0];
6649371c9d4SSatish Balay x[i2 + 1] = xw[1];
6659371c9d4SSatish Balay x[i2 + 2] = xw[2];
6669371c9d4SSatish Balay x[i2 + 3] = xw[3];
667e48d15efSToby Isaac idiag -= 16;
668e48d15efSToby Isaac i2 -= 4;
669e48d15efSToby Isaac }
670e48d15efSToby Isaac break;
671e48d15efSToby Isaac case 5:
6729371c9d4SSatish Balay s[0] = xb[i2];
6739371c9d4SSatish Balay s[1] = xb[i2 + 1];
6749371c9d4SSatish Balay s[2] = xb[i2 + 2];
6759371c9d4SSatish Balay s[3] = xb[i2 + 3];
6769371c9d4SSatish Balay s[4] = xb[i2 + 4];
677e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
6789371c9d4SSatish Balay x[i2] = xw[0];
6799371c9d4SSatish Balay x[i2 + 1] = xw[1];
6809371c9d4SSatish Balay x[i2 + 2] = xw[2];
6819371c9d4SSatish Balay x[i2 + 3] = xw[3];
6829371c9d4SSatish Balay x[i2 + 4] = xw[4];
683e48d15efSToby Isaac i2 -= 5;
684e48d15efSToby Isaac idiag -= 25;
685e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) {
686e48d15efSToby Isaac v = aa + 25 * (diag[i] + 1);
687e48d15efSToby Isaac vi = aj + diag[i] + 1;
688e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1;
6899371c9d4SSatish Balay s[0] = xb[i2];
6909371c9d4SSatish Balay s[1] = xb[i2 + 1];
6919371c9d4SSatish Balay s[2] = xb[i2 + 2];
6929371c9d4SSatish Balay s[3] = xb[i2 + 3];
6939371c9d4SSatish Balay s[4] = xb[i2 + 4];
694e48d15efSToby Isaac while (nz--) {
695e48d15efSToby Isaac idx = 5 * (*vi++);
6969371c9d4SSatish Balay xw[0] = x[idx];
6979371c9d4SSatish Balay xw[1] = x[1 + idx];
6989371c9d4SSatish Balay xw[2] = x[2 + idx];
6999371c9d4SSatish Balay xw[3] = x[3 + idx];
7009371c9d4SSatish Balay xw[4] = x[4 + idx];
701e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
702e48d15efSToby Isaac v += 25;
703e48d15efSToby Isaac }
704e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
7059371c9d4SSatish Balay x[i2] = xw[0];
7069371c9d4SSatish Balay x[i2 + 1] = xw[1];
7079371c9d4SSatish Balay x[i2 + 2] = xw[2];
7089371c9d4SSatish Balay x[i2 + 3] = xw[3];
7099371c9d4SSatish Balay x[i2 + 4] = xw[4];
710e48d15efSToby Isaac idiag -= 25;
711e48d15efSToby Isaac i2 -= 5;
712e48d15efSToby Isaac }
713e48d15efSToby Isaac break;
714e48d15efSToby Isaac case 6:
7159371c9d4SSatish Balay s[0] = xb[i2];
7169371c9d4SSatish Balay s[1] = xb[i2 + 1];
7179371c9d4SSatish Balay s[2] = xb[i2 + 2];
7189371c9d4SSatish Balay s[3] = xb[i2 + 3];
7199371c9d4SSatish Balay s[4] = xb[i2 + 4];
7209371c9d4SSatish Balay s[5] = xb[i2 + 5];
721e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7229371c9d4SSatish Balay x[i2] = xw[0];
7239371c9d4SSatish Balay x[i2 + 1] = xw[1];
7249371c9d4SSatish Balay x[i2 + 2] = xw[2];
7259371c9d4SSatish Balay x[i2 + 3] = xw[3];
7269371c9d4SSatish Balay x[i2 + 4] = xw[4];
7279371c9d4SSatish Balay x[i2 + 5] = xw[5];
728e48d15efSToby Isaac i2 -= 6;
729e48d15efSToby Isaac idiag -= 36;
730e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) {
731e48d15efSToby Isaac v = aa + 36 * (diag[i] + 1);
732e48d15efSToby Isaac vi = aj + diag[i] + 1;
733e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1;
7349371c9d4SSatish Balay s[0] = xb[i2];
7359371c9d4SSatish Balay s[1] = xb[i2 + 1];
7369371c9d4SSatish Balay s[2] = xb[i2 + 2];
7379371c9d4SSatish Balay s[3] = xb[i2 + 3];
7389371c9d4SSatish Balay s[4] = xb[i2 + 4];
7399371c9d4SSatish Balay s[5] = xb[i2 + 5];
740e48d15efSToby Isaac while (nz--) {
741e48d15efSToby Isaac idx = 6 * (*vi++);
7429371c9d4SSatish Balay xw[0] = x[idx];
7439371c9d4SSatish Balay xw[1] = x[1 + idx];
7449371c9d4SSatish Balay xw[2] = x[2 + idx];
7459371c9d4SSatish Balay xw[3] = x[3 + idx];
7469371c9d4SSatish Balay xw[4] = x[4 + idx];
7479371c9d4SSatish Balay xw[5] = x[5 + idx];
748e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
749e48d15efSToby Isaac v += 36;
750e48d15efSToby Isaac }
751e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7529371c9d4SSatish Balay x[i2] = xw[0];
7539371c9d4SSatish Balay x[i2 + 1] = xw[1];
7549371c9d4SSatish Balay x[i2 + 2] = xw[2];
7559371c9d4SSatish Balay x[i2 + 3] = xw[3];
7569371c9d4SSatish Balay x[i2 + 4] = xw[4];
7579371c9d4SSatish Balay x[i2 + 5] = xw[5];
758e48d15efSToby Isaac idiag -= 36;
759e48d15efSToby Isaac i2 -= 6;
760e48d15efSToby Isaac }
761e48d15efSToby Isaac break;
762e48d15efSToby Isaac case 7:
7639371c9d4SSatish Balay s[0] = xb[i2];
7649371c9d4SSatish Balay s[1] = xb[i2 + 1];
7659371c9d4SSatish Balay s[2] = xb[i2 + 2];
7669371c9d4SSatish Balay s[3] = xb[i2 + 3];
7679371c9d4SSatish Balay s[4] = xb[i2 + 4];
7689371c9d4SSatish Balay s[5] = xb[i2 + 5];
7699371c9d4SSatish Balay s[6] = xb[i2 + 6];
770e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(x, idiag, b);
7719371c9d4SSatish Balay x[i2] = xw[0];
7729371c9d4SSatish Balay x[i2 + 1] = xw[1];
7739371c9d4SSatish Balay x[i2 + 2] = xw[2];
7749371c9d4SSatish Balay x[i2 + 3] = xw[3];
7759371c9d4SSatish Balay x[i2 + 4] = xw[4];
7769371c9d4SSatish Balay x[i2 + 5] = xw[5];
7779371c9d4SSatish Balay x[i2 + 6] = xw[6];
778e48d15efSToby Isaac i2 -= 7;
779e48d15efSToby Isaac idiag -= 49;
780e48d15efSToby Isaac for (i = m - 2; i >= 0; i--) {
781e48d15efSToby Isaac v = aa + 49 * (diag[i] + 1);
782e48d15efSToby Isaac vi = aj + diag[i] + 1;
783e48d15efSToby Isaac nz = ai[i + 1] - diag[i] - 1;
7849371c9d4SSatish Balay s[0] = xb[i2];
7859371c9d4SSatish Balay s[1] = xb[i2 + 1];
7869371c9d4SSatish Balay s[2] = xb[i2 + 2];
7879371c9d4SSatish Balay s[3] = xb[i2 + 3];
7889371c9d4SSatish Balay s[4] = xb[i2 + 4];
7899371c9d4SSatish Balay s[5] = xb[i2 + 5];
7909371c9d4SSatish Balay s[6] = xb[i2 + 6];
791e48d15efSToby Isaac while (nz--) {
792e48d15efSToby Isaac idx = 7 * (*vi++);
7939371c9d4SSatish Balay xw[0] = x[idx];
7949371c9d4SSatish Balay xw[1] = x[1 + idx];
7959371c9d4SSatish Balay xw[2] = x[2 + idx];
7969371c9d4SSatish Balay xw[3] = x[3 + idx];
7979371c9d4SSatish Balay xw[4] = x[4 + idx];
7989371c9d4SSatish Balay xw[5] = x[5 + idx];
7999371c9d4SSatish Balay xw[6] = x[6 + idx];
800e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
801e48d15efSToby Isaac v += 49;
802e48d15efSToby Isaac }
803e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
8049371c9d4SSatish Balay x[i2] = xw[0];
8059371c9d4SSatish Balay x[i2 + 1] = xw[1];
8069371c9d4SSatish Balay x[i2 + 2] = xw[2];
8079371c9d4SSatish Balay x[i2 + 3] = xw[3];
8089371c9d4SSatish Balay x[i2 + 4] = xw[4];
8099371c9d4SSatish Balay x[i2 + 5] = xw[5];
8109371c9d4SSatish Balay x[i2 + 6] = xw[6];
811e48d15efSToby Isaac idiag -= 49;
812e48d15efSToby Isaac i2 -= 7;
813e48d15efSToby Isaac }
814e48d15efSToby Isaac break;
815e48d15efSToby Isaac default:
8169566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, xb + i2, bs));
81796b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
818de80f912SBarry Smith i2 -= bs;
819e48d15efSToby Isaac idiag -= bs2;
820de80f912SBarry Smith for (i = m - 2; i >= 0; i--) {
821de80f912SBarry Smith v = aa + bs2 * (diag[i] + 1);
822de80f912SBarry Smith vi = aj + diag[i] + 1;
823de80f912SBarry Smith nz = ai[i + 1] - diag[i] - 1;
824de80f912SBarry Smith
8259566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, xb + i2, bs));
826de80f912SBarry Smith /* copy all rows of x that are needed into contiguous space */
827de80f912SBarry Smith workt = work;
828de80f912SBarry Smith for (j = 0; j < nz; j++) {
8299566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
830de80f912SBarry Smith workt += bs;
831de80f912SBarry Smith }
83296b95a6bSBarry Smith PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
83396b95a6bSBarry Smith PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
834e48d15efSToby Isaac
835de80f912SBarry Smith idiag -= bs2;
836de80f912SBarry Smith i2 -= bs;
837de80f912SBarry Smith }
838e48d15efSToby Isaac break;
839e48d15efSToby Isaac }
8409566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
841de80f912SBarry Smith }
842e48d15efSToby Isaac its--;
843e48d15efSToby Isaac }
844e48d15efSToby Isaac while (its--) {
845e48d15efSToby Isaac if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
846e48d15efSToby Isaac idiag = a->idiag;
847e48d15efSToby Isaac i2 = 0;
848e48d15efSToby Isaac switch (bs) {
849e48d15efSToby Isaac case 1:
850e48d15efSToby Isaac for (i = 0; i < m; i++) {
851e48d15efSToby Isaac v = aa + ai[i];
852e48d15efSToby Isaac vi = aj + ai[i];
853e48d15efSToby Isaac nz = ai[i + 1] - ai[i];
854e48d15efSToby Isaac s[0] = b[i2];
855e48d15efSToby Isaac for (j = 0; j < nz; j++) {
856e48d15efSToby Isaac xw[0] = x[vi[j]];
857e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
858e48d15efSToby Isaac }
859e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
860e48d15efSToby Isaac x[i2] += xw[0];
861e48d15efSToby Isaac idiag += 1;
862e48d15efSToby Isaac i2 += 1;
863e48d15efSToby Isaac }
864e48d15efSToby Isaac break;
865e48d15efSToby Isaac case 2:
866e48d15efSToby Isaac for (i = 0; i < m; i++) {
867e48d15efSToby Isaac v = aa + 4 * ai[i];
868e48d15efSToby Isaac vi = aj + ai[i];
869e48d15efSToby Isaac nz = ai[i + 1] - ai[i];
8709371c9d4SSatish Balay s[0] = b[i2];
8719371c9d4SSatish Balay s[1] = b[i2 + 1];
872e48d15efSToby Isaac for (j = 0; j < nz; j++) {
873e48d15efSToby Isaac idx = 2 * vi[j];
874e48d15efSToby Isaac it = 4 * j;
8759371c9d4SSatish Balay xw[0] = x[idx];
8769371c9d4SSatish Balay xw[1] = x[1 + idx];
877e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
878e48d15efSToby Isaac }
879e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
8809371c9d4SSatish Balay x[i2] += xw[0];
8819371c9d4SSatish Balay x[i2 + 1] += xw[1];
882e48d15efSToby Isaac idiag += 4;
883e48d15efSToby Isaac i2 += 2;
884e48d15efSToby Isaac }
885e48d15efSToby Isaac break;
886e48d15efSToby Isaac case 3:
887e48d15efSToby Isaac for (i = 0; i < m; i++) {
888e48d15efSToby Isaac v = aa + 9 * ai[i];
889e48d15efSToby Isaac vi = aj + ai[i];
890e48d15efSToby Isaac nz = ai[i + 1] - ai[i];
8919371c9d4SSatish Balay s[0] = b[i2];
8929371c9d4SSatish Balay s[1] = b[i2 + 1];
8939371c9d4SSatish Balay s[2] = b[i2 + 2];
894e48d15efSToby Isaac while (nz--) {
895e48d15efSToby Isaac idx = 3 * (*vi++);
8969371c9d4SSatish Balay xw[0] = x[idx];
8979371c9d4SSatish Balay xw[1] = x[1 + idx];
8989371c9d4SSatish Balay xw[2] = x[2 + idx];
899e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
900e48d15efSToby Isaac v += 9;
901e48d15efSToby Isaac }
902e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
9039371c9d4SSatish Balay x[i2] += xw[0];
9049371c9d4SSatish Balay x[i2 + 1] += xw[1];
9059371c9d4SSatish Balay x[i2 + 2] += xw[2];
906e48d15efSToby Isaac idiag += 9;
907e48d15efSToby Isaac i2 += 3;
908e48d15efSToby Isaac }
909e48d15efSToby Isaac break;
910e48d15efSToby Isaac case 4:
911e48d15efSToby Isaac for (i = 0; i < m; i++) {
912e48d15efSToby Isaac v = aa + 16 * ai[i];
913e48d15efSToby Isaac vi = aj + ai[i];
914e48d15efSToby Isaac nz = ai[i + 1] - ai[i];
9159371c9d4SSatish Balay s[0] = b[i2];
9169371c9d4SSatish Balay s[1] = b[i2 + 1];
9179371c9d4SSatish Balay s[2] = b[i2 + 2];
9189371c9d4SSatish Balay s[3] = b[i2 + 3];
919e48d15efSToby Isaac while (nz--) {
920e48d15efSToby Isaac idx = 4 * (*vi++);
9219371c9d4SSatish Balay xw[0] = x[idx];
9229371c9d4SSatish Balay xw[1] = x[1 + idx];
9239371c9d4SSatish Balay xw[2] = x[2 + idx];
9249371c9d4SSatish Balay xw[3] = x[3 + idx];
925e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
926e48d15efSToby Isaac v += 16;
927e48d15efSToby Isaac }
928e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
9299371c9d4SSatish Balay x[i2] += xw[0];
9309371c9d4SSatish Balay x[i2 + 1] += xw[1];
9319371c9d4SSatish Balay x[i2 + 2] += xw[2];
9329371c9d4SSatish Balay x[i2 + 3] += xw[3];
933e48d15efSToby Isaac idiag += 16;
934e48d15efSToby Isaac i2 += 4;
935e48d15efSToby Isaac }
936e48d15efSToby Isaac break;
937e48d15efSToby Isaac case 5:
938e48d15efSToby Isaac for (i = 0; i < m; i++) {
939e48d15efSToby Isaac v = aa + 25 * ai[i];
940e48d15efSToby Isaac vi = aj + ai[i];
941e48d15efSToby Isaac nz = ai[i + 1] - ai[i];
9429371c9d4SSatish Balay s[0] = b[i2];
9439371c9d4SSatish Balay s[1] = b[i2 + 1];
9449371c9d4SSatish Balay s[2] = b[i2 + 2];
9459371c9d4SSatish Balay s[3] = b[i2 + 3];
9469371c9d4SSatish Balay s[4] = b[i2 + 4];
947e48d15efSToby Isaac while (nz--) {
948e48d15efSToby Isaac idx = 5 * (*vi++);
9499371c9d4SSatish Balay xw[0] = x[idx];
9509371c9d4SSatish Balay xw[1] = x[1 + idx];
9519371c9d4SSatish Balay xw[2] = x[2 + idx];
9529371c9d4SSatish Balay xw[3] = x[3 + idx];
9539371c9d4SSatish Balay xw[4] = x[4 + idx];
954e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
955e48d15efSToby Isaac v += 25;
956e48d15efSToby Isaac }
957e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
9589371c9d4SSatish Balay x[i2] += xw[0];
9599371c9d4SSatish Balay x[i2 + 1] += xw[1];
9609371c9d4SSatish Balay x[i2 + 2] += xw[2];
9619371c9d4SSatish Balay x[i2 + 3] += xw[3];
9629371c9d4SSatish Balay x[i2 + 4] += xw[4];
963e48d15efSToby Isaac idiag += 25;
964e48d15efSToby Isaac i2 += 5;
965e48d15efSToby Isaac }
966e48d15efSToby Isaac break;
967e48d15efSToby Isaac case 6:
968e48d15efSToby Isaac for (i = 0; i < m; i++) {
969e48d15efSToby Isaac v = aa + 36 * ai[i];
970e48d15efSToby Isaac vi = aj + ai[i];
971e48d15efSToby Isaac nz = ai[i + 1] - ai[i];
9729371c9d4SSatish Balay s[0] = b[i2];
9739371c9d4SSatish Balay s[1] = b[i2 + 1];
9749371c9d4SSatish Balay s[2] = b[i2 + 2];
9759371c9d4SSatish Balay s[3] = b[i2 + 3];
9769371c9d4SSatish Balay s[4] = b[i2 + 4];
9779371c9d4SSatish Balay s[5] = b[i2 + 5];
978e48d15efSToby Isaac while (nz--) {
979e48d15efSToby Isaac idx = 6 * (*vi++);
9809371c9d4SSatish Balay xw[0] = x[idx];
9819371c9d4SSatish Balay xw[1] = x[1 + idx];
9829371c9d4SSatish Balay xw[2] = x[2 + idx];
9839371c9d4SSatish Balay xw[3] = x[3 + idx];
9849371c9d4SSatish Balay xw[4] = x[4 + idx];
9859371c9d4SSatish Balay xw[5] = x[5 + idx];
986e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
987e48d15efSToby Isaac v += 36;
988e48d15efSToby Isaac }
989e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
9909371c9d4SSatish Balay x[i2] += xw[0];
9919371c9d4SSatish Balay x[i2 + 1] += xw[1];
9929371c9d4SSatish Balay x[i2 + 2] += xw[2];
9939371c9d4SSatish Balay x[i2 + 3] += xw[3];
9949371c9d4SSatish Balay x[i2 + 4] += xw[4];
9959371c9d4SSatish Balay x[i2 + 5] += xw[5];
996e48d15efSToby Isaac idiag += 36;
997e48d15efSToby Isaac i2 += 6;
998e48d15efSToby Isaac }
999e48d15efSToby Isaac break;
1000e48d15efSToby Isaac case 7:
1001e48d15efSToby Isaac for (i = 0; i < m; i++) {
1002e48d15efSToby Isaac v = aa + 49 * ai[i];
1003e48d15efSToby Isaac vi = aj + ai[i];
1004e48d15efSToby Isaac nz = ai[i + 1] - ai[i];
10059371c9d4SSatish Balay s[0] = b[i2];
10069371c9d4SSatish Balay s[1] = b[i2 + 1];
10079371c9d4SSatish Balay s[2] = b[i2 + 2];
10089371c9d4SSatish Balay s[3] = b[i2 + 3];
10099371c9d4SSatish Balay s[4] = b[i2 + 4];
10109371c9d4SSatish Balay s[5] = b[i2 + 5];
10119371c9d4SSatish Balay s[6] = b[i2 + 6];
1012e48d15efSToby Isaac while (nz--) {
1013e48d15efSToby Isaac idx = 7 * (*vi++);
10149371c9d4SSatish Balay xw[0] = x[idx];
10159371c9d4SSatish Balay xw[1] = x[1 + idx];
10169371c9d4SSatish Balay xw[2] = x[2 + idx];
10179371c9d4SSatish Balay xw[3] = x[3 + idx];
10189371c9d4SSatish Balay xw[4] = x[4 + idx];
10199371c9d4SSatish Balay xw[5] = x[5 + idx];
10209371c9d4SSatish Balay xw[6] = x[6 + idx];
1021e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1022e48d15efSToby Isaac v += 49;
1023e48d15efSToby Isaac }
1024e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
10259371c9d4SSatish Balay x[i2] += xw[0];
10269371c9d4SSatish Balay x[i2 + 1] += xw[1];
10279371c9d4SSatish Balay x[i2 + 2] += xw[2];
10289371c9d4SSatish Balay x[i2 + 3] += xw[3];
10299371c9d4SSatish Balay x[i2 + 4] += xw[4];
10309371c9d4SSatish Balay x[i2 + 5] += xw[5];
10319371c9d4SSatish Balay x[i2 + 6] += xw[6];
1032e48d15efSToby Isaac idiag += 49;
1033e48d15efSToby Isaac i2 += 7;
1034e48d15efSToby Isaac }
1035e48d15efSToby Isaac break;
1036e48d15efSToby Isaac default:
1037e48d15efSToby Isaac for (i = 0; i < m; i++) {
1038e48d15efSToby Isaac v = aa + bs2 * ai[i];
1039e48d15efSToby Isaac vi = aj + ai[i];
1040e48d15efSToby Isaac nz = ai[i + 1] - ai[i];
1041e48d15efSToby Isaac
10429566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs));
1043e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */
1044e48d15efSToby Isaac workt = work;
1045e48d15efSToby Isaac for (j = 0; j < nz; j++) {
10469566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1047e48d15efSToby Isaac workt += bs;
1048e48d15efSToby Isaac }
1049e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1050e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1051e48d15efSToby Isaac
1052e48d15efSToby Isaac idiag += bs2;
1053e48d15efSToby Isaac i2 += bs;
1054e48d15efSToby Isaac }
1055e48d15efSToby Isaac break;
1056e48d15efSToby Isaac }
10579566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1058e48d15efSToby Isaac }
1059e48d15efSToby Isaac if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1060e48d15efSToby Isaac idiag = a->idiag + bs2 * (a->mbs - 1);
1061e48d15efSToby Isaac i2 = bs * (m - 1);
1062e48d15efSToby Isaac switch (bs) {
1063e48d15efSToby Isaac case 1:
1064e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) {
1065e48d15efSToby Isaac v = aa + ai[i];
1066e48d15efSToby Isaac vi = aj + ai[i];
1067e48d15efSToby Isaac nz = ai[i + 1] - ai[i];
1068e48d15efSToby Isaac s[0] = b[i2];
1069e48d15efSToby Isaac for (j = 0; j < nz; j++) {
1070e48d15efSToby Isaac xw[0] = x[vi[j]];
1071e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1072e48d15efSToby Isaac }
1073e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1074e48d15efSToby Isaac x[i2] += xw[0];
1075e48d15efSToby Isaac idiag -= 1;
1076e48d15efSToby Isaac i2 -= 1;
1077e48d15efSToby Isaac }
1078e48d15efSToby Isaac break;
1079e48d15efSToby Isaac case 2:
1080e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) {
1081e48d15efSToby Isaac v = aa + 4 * ai[i];
1082e48d15efSToby Isaac vi = aj + ai[i];
1083e48d15efSToby Isaac nz = ai[i + 1] - ai[i];
10849371c9d4SSatish Balay s[0] = b[i2];
10859371c9d4SSatish Balay s[1] = b[i2 + 1];
1086e48d15efSToby Isaac for (j = 0; j < nz; j++) {
1087e48d15efSToby Isaac idx = 2 * vi[j];
1088e48d15efSToby Isaac it = 4 * j;
10899371c9d4SSatish Balay xw[0] = x[idx];
10909371c9d4SSatish Balay xw[1] = x[1 + idx];
1091e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1092e48d15efSToby Isaac }
1093e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
10949371c9d4SSatish Balay x[i2] += xw[0];
10959371c9d4SSatish Balay x[i2 + 1] += xw[1];
1096e48d15efSToby Isaac idiag -= 4;
1097e48d15efSToby Isaac i2 -= 2;
1098e48d15efSToby Isaac }
1099e48d15efSToby Isaac break;
1100e48d15efSToby Isaac case 3:
1101e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) {
1102e48d15efSToby Isaac v = aa + 9 * ai[i];
1103e48d15efSToby Isaac vi = aj + ai[i];
1104e48d15efSToby Isaac nz = ai[i + 1] - ai[i];
11059371c9d4SSatish Balay s[0] = b[i2];
11069371c9d4SSatish Balay s[1] = b[i2 + 1];
11079371c9d4SSatish Balay s[2] = b[i2 + 2];
1108e48d15efSToby Isaac while (nz--) {
1109e48d15efSToby Isaac idx = 3 * (*vi++);
11109371c9d4SSatish Balay xw[0] = x[idx];
11119371c9d4SSatish Balay xw[1] = x[1 + idx];
11129371c9d4SSatish Balay xw[2] = x[2 + idx];
1113e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1114e48d15efSToby Isaac v += 9;
1115e48d15efSToby Isaac }
1116e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
11179371c9d4SSatish Balay x[i2] += xw[0];
11189371c9d4SSatish Balay x[i2 + 1] += xw[1];
11199371c9d4SSatish Balay x[i2 + 2] += xw[2];
1120e48d15efSToby Isaac idiag -= 9;
1121e48d15efSToby Isaac i2 -= 3;
1122e48d15efSToby Isaac }
1123e48d15efSToby Isaac break;
1124e48d15efSToby Isaac case 4:
1125e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) {
1126e48d15efSToby Isaac v = aa + 16 * ai[i];
1127e48d15efSToby Isaac vi = aj + ai[i];
1128e48d15efSToby Isaac nz = ai[i + 1] - ai[i];
11299371c9d4SSatish Balay s[0] = b[i2];
11309371c9d4SSatish Balay s[1] = b[i2 + 1];
11319371c9d4SSatish Balay s[2] = b[i2 + 2];
11329371c9d4SSatish Balay s[3] = b[i2 + 3];
1133e48d15efSToby Isaac while (nz--) {
1134e48d15efSToby Isaac idx = 4 * (*vi++);
11359371c9d4SSatish Balay xw[0] = x[idx];
11369371c9d4SSatish Balay xw[1] = x[1 + idx];
11379371c9d4SSatish Balay xw[2] = x[2 + idx];
11389371c9d4SSatish Balay xw[3] = x[3 + idx];
1139e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1140e48d15efSToby Isaac v += 16;
1141e48d15efSToby Isaac }
1142e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
11439371c9d4SSatish Balay x[i2] += xw[0];
11449371c9d4SSatish Balay x[i2 + 1] += xw[1];
11459371c9d4SSatish Balay x[i2 + 2] += xw[2];
11469371c9d4SSatish Balay x[i2 + 3] += xw[3];
1147e48d15efSToby Isaac idiag -= 16;
1148e48d15efSToby Isaac i2 -= 4;
1149e48d15efSToby Isaac }
1150e48d15efSToby Isaac break;
1151e48d15efSToby Isaac case 5:
1152e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) {
1153e48d15efSToby Isaac v = aa + 25 * ai[i];
1154e48d15efSToby Isaac vi = aj + ai[i];
1155e48d15efSToby Isaac nz = ai[i + 1] - ai[i];
11569371c9d4SSatish Balay s[0] = b[i2];
11579371c9d4SSatish Balay s[1] = b[i2 + 1];
11589371c9d4SSatish Balay s[2] = b[i2 + 2];
11599371c9d4SSatish Balay s[3] = b[i2 + 3];
11609371c9d4SSatish Balay s[4] = b[i2 + 4];
1161e48d15efSToby Isaac while (nz--) {
1162e48d15efSToby Isaac idx = 5 * (*vi++);
11639371c9d4SSatish Balay xw[0] = x[idx];
11649371c9d4SSatish Balay xw[1] = x[1 + idx];
11659371c9d4SSatish Balay xw[2] = x[2 + idx];
11669371c9d4SSatish Balay xw[3] = x[3 + idx];
11679371c9d4SSatish Balay xw[4] = x[4 + idx];
1168e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1169e48d15efSToby Isaac v += 25;
1170e48d15efSToby Isaac }
1171e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
11729371c9d4SSatish Balay x[i2] += xw[0];
11739371c9d4SSatish Balay x[i2 + 1] += xw[1];
11749371c9d4SSatish Balay x[i2 + 2] += xw[2];
11759371c9d4SSatish Balay x[i2 + 3] += xw[3];
11769371c9d4SSatish Balay x[i2 + 4] += xw[4];
1177e48d15efSToby Isaac idiag -= 25;
1178e48d15efSToby Isaac i2 -= 5;
1179e48d15efSToby Isaac }
1180e48d15efSToby Isaac break;
1181e48d15efSToby Isaac case 6:
1182e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) {
1183e48d15efSToby Isaac v = aa + 36 * ai[i];
1184e48d15efSToby Isaac vi = aj + ai[i];
1185e48d15efSToby Isaac nz = ai[i + 1] - ai[i];
11869371c9d4SSatish Balay s[0] = b[i2];
11879371c9d4SSatish Balay s[1] = b[i2 + 1];
11889371c9d4SSatish Balay s[2] = b[i2 + 2];
11899371c9d4SSatish Balay s[3] = b[i2 + 3];
11909371c9d4SSatish Balay s[4] = b[i2 + 4];
11919371c9d4SSatish Balay s[5] = b[i2 + 5];
1192e48d15efSToby Isaac while (nz--) {
1193e48d15efSToby Isaac idx = 6 * (*vi++);
11949371c9d4SSatish Balay xw[0] = x[idx];
11959371c9d4SSatish Balay xw[1] = x[1 + idx];
11969371c9d4SSatish Balay xw[2] = x[2 + idx];
11979371c9d4SSatish Balay xw[3] = x[3 + idx];
11989371c9d4SSatish Balay xw[4] = x[4 + idx];
11999371c9d4SSatish Balay xw[5] = x[5 + idx];
1200e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1201e48d15efSToby Isaac v += 36;
1202e48d15efSToby Isaac }
1203e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
12049371c9d4SSatish Balay x[i2] += xw[0];
12059371c9d4SSatish Balay x[i2 + 1] += xw[1];
12069371c9d4SSatish Balay x[i2 + 2] += xw[2];
12079371c9d4SSatish Balay x[i2 + 3] += xw[3];
12089371c9d4SSatish Balay x[i2 + 4] += xw[4];
12099371c9d4SSatish Balay x[i2 + 5] += xw[5];
1210e48d15efSToby Isaac idiag -= 36;
1211e48d15efSToby Isaac i2 -= 6;
1212e48d15efSToby Isaac }
1213e48d15efSToby Isaac break;
1214e48d15efSToby Isaac case 7:
1215e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) {
1216e48d15efSToby Isaac v = aa + 49 * ai[i];
1217e48d15efSToby Isaac vi = aj + ai[i];
1218e48d15efSToby Isaac nz = ai[i + 1] - ai[i];
12199371c9d4SSatish Balay s[0] = b[i2];
12209371c9d4SSatish Balay s[1] = b[i2 + 1];
12219371c9d4SSatish Balay s[2] = b[i2 + 2];
12229371c9d4SSatish Balay s[3] = b[i2 + 3];
12239371c9d4SSatish Balay s[4] = b[i2 + 4];
12249371c9d4SSatish Balay s[5] = b[i2 + 5];
12259371c9d4SSatish Balay s[6] = b[i2 + 6];
1226e48d15efSToby Isaac while (nz--) {
1227e48d15efSToby Isaac idx = 7 * (*vi++);
12289371c9d4SSatish Balay xw[0] = x[idx];
12299371c9d4SSatish Balay xw[1] = x[1 + idx];
12309371c9d4SSatish Balay xw[2] = x[2 + idx];
12319371c9d4SSatish Balay xw[3] = x[3 + idx];
12329371c9d4SSatish Balay xw[4] = x[4 + idx];
12339371c9d4SSatish Balay xw[5] = x[5 + idx];
12349371c9d4SSatish Balay xw[6] = x[6 + idx];
1235e48d15efSToby Isaac PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1236e48d15efSToby Isaac v += 49;
1237e48d15efSToby Isaac }
1238e48d15efSToby Isaac PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
12399371c9d4SSatish Balay x[i2] += xw[0];
12409371c9d4SSatish Balay x[i2 + 1] += xw[1];
12419371c9d4SSatish Balay x[i2 + 2] += xw[2];
12429371c9d4SSatish Balay x[i2 + 3] += xw[3];
12439371c9d4SSatish Balay x[i2 + 4] += xw[4];
12449371c9d4SSatish Balay x[i2 + 5] += xw[5];
12459371c9d4SSatish Balay x[i2 + 6] += xw[6];
1246e48d15efSToby Isaac idiag -= 49;
1247e48d15efSToby Isaac i2 -= 7;
1248e48d15efSToby Isaac }
1249e48d15efSToby Isaac break;
1250e48d15efSToby Isaac default:
1251e48d15efSToby Isaac for (i = m - 1; i >= 0; i--) {
1252e48d15efSToby Isaac v = aa + bs2 * ai[i];
1253e48d15efSToby Isaac vi = aj + ai[i];
1254e48d15efSToby Isaac nz = ai[i + 1] - ai[i];
1255e48d15efSToby Isaac
12569566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(w, b + i2, bs));
1257e48d15efSToby Isaac /* copy all rows of x that are needed into contiguous space */
1258e48d15efSToby Isaac workt = work;
1259e48d15efSToby Isaac for (j = 0; j < nz; j++) {
12609566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1261e48d15efSToby Isaac workt += bs;
1262e48d15efSToby Isaac }
1263e48d15efSToby Isaac PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1264e48d15efSToby Isaac PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1265e48d15efSToby Isaac
1266e48d15efSToby Isaac idiag -= bs2;
1267e48d15efSToby Isaac i2 -= bs;
1268e48d15efSToby Isaac }
1269e48d15efSToby Isaac break;
1270e48d15efSToby Isaac }
12719566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1272e48d15efSToby Isaac }
1273e48d15efSToby Isaac }
12749566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x));
12759566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b));
12763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1277de80f912SBarry Smith }
1278de80f912SBarry Smith
1279af674e45SBarry Smith /*
128081824310SBarry Smith Special version for direct calls from Fortran (Used in PETSc-fun3d)
1281af674e45SBarry Smith */
1282af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1283af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1284af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1285af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4
1286af674e45SBarry Smith #endif
1287af674e45SBarry Smith
matsetvaluesblocked4_(Mat * AA,PetscInt * mm,const PetscInt im[],PetscInt * nn,const PetscInt in[],const PetscScalar v[])1288d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1289d71ae5a4SJacob Faibussowitsch {
1290af674e45SBarry Smith Mat A = *AA;
1291af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1292c1ac3661SBarry Smith PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1293c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen;
129417ec6a02SBarry Smith PetscInt *aj = a->j, stepval, lastcol = -1;
1295f15d580aSBarry Smith const PetscScalar *value = v;
12964bb09213Spetsc MatScalar *ap, *aa = a->a, *bap;
1297af674e45SBarry Smith
1298af674e45SBarry Smith PetscFunctionBegin;
1299ce94432eSBarry Smith if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1300af674e45SBarry Smith stepval = (n - 1) * 4;
1301af674e45SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */
1302af674e45SBarry Smith row = im[k];
1303af674e45SBarry Smith rp = aj + ai[row];
1304af674e45SBarry Smith ap = aa + 16 * ai[row];
1305af674e45SBarry Smith nrow = ailen[row];
1306af674e45SBarry Smith low = 0;
130717ec6a02SBarry Smith high = nrow;
1308af674e45SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */
1309af674e45SBarry Smith col = in[l];
1310db4deed7SKarl Rupp if (col <= lastcol) low = 0;
1311db4deed7SKarl Rupp else high = nrow;
131217ec6a02SBarry Smith lastcol = col;
13131e3347e8SBarry Smith value = v + k * (stepval + 4 + l) * 4;
1314af674e45SBarry Smith while (high - low > 7) {
1315af674e45SBarry Smith t = (low + high) / 2;
1316af674e45SBarry Smith if (rp[t] > col) high = t;
1317af674e45SBarry Smith else low = t;
1318af674e45SBarry Smith }
1319af674e45SBarry Smith for (i = low; i < high; i++) {
1320af674e45SBarry Smith if (rp[i] > col) break;
1321af674e45SBarry Smith if (rp[i] == col) {
1322af674e45SBarry Smith bap = ap + 16 * i;
1323af674e45SBarry Smith for (ii = 0; ii < 4; ii++, value += stepval) {
1324ad540459SPierre Jolivet for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1325af674e45SBarry Smith }
1326af674e45SBarry Smith goto noinsert2;
1327af674e45SBarry Smith }
1328af674e45SBarry Smith }
1329af674e45SBarry Smith N = nrow++ - 1;
133017ec6a02SBarry Smith high++; /* added new column index thus must search to one higher than before */
1331af674e45SBarry Smith /* shift up all the later entries in this row */
1332af674e45SBarry Smith for (ii = N; ii >= i; ii--) {
1333af674e45SBarry Smith rp[ii + 1] = rp[ii];
13349566063dSJacob Faibussowitsch PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1335af674e45SBarry Smith }
133648a46eb9SPierre Jolivet if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1337af674e45SBarry Smith rp[i] = col;
1338af674e45SBarry Smith bap = ap + 16 * i;
1339af674e45SBarry Smith for (ii = 0; ii < 4; ii++, value += stepval) {
1340ad540459SPierre Jolivet for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1341af674e45SBarry Smith }
1342af674e45SBarry Smith noinsert2:;
1343af674e45SBarry Smith low = i;
1344af674e45SBarry Smith }
1345af674e45SBarry Smith ailen[row] = nrow;
1346af674e45SBarry Smith }
1347be1d678aSKris Buschelman PetscFunctionReturnVoid();
1348af674e45SBarry Smith }
1349af674e45SBarry Smith
1350af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1351af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4
1352af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1353af674e45SBarry Smith #define matsetvalues4_ matsetvalues4
1354af674e45SBarry Smith #endif
1355af674e45SBarry Smith
matsetvalues4_(Mat * AA,PetscInt * mm,PetscInt * im,PetscInt * nn,PetscInt * in,PetscScalar * v)1356d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1357d71ae5a4SJacob Faibussowitsch {
1358af674e45SBarry Smith Mat A = *AA;
1359af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1360580bdb30SBarry Smith PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1361c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen;
1362c1ac3661SBarry Smith PetscInt *aj = a->j, brow, bcol;
136317ec6a02SBarry Smith PetscInt ridx, cidx, lastcol = -1;
1364af674e45SBarry Smith MatScalar *ap, value, *aa = a->a, *bap;
1365af674e45SBarry Smith
1366af674e45SBarry Smith PetscFunctionBegin;
1367af674e45SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */
13689371c9d4SSatish Balay row = im[k];
13699371c9d4SSatish Balay brow = row / 4;
1370af674e45SBarry Smith rp = aj + ai[brow];
1371af674e45SBarry Smith ap = aa + 16 * ai[brow];
1372af674e45SBarry Smith nrow = ailen[brow];
1373af674e45SBarry Smith low = 0;
137417ec6a02SBarry Smith high = nrow;
1375af674e45SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */
13769371c9d4SSatish Balay col = in[l];
13779371c9d4SSatish Balay bcol = col / 4;
13789371c9d4SSatish Balay ridx = row % 4;
13799371c9d4SSatish Balay cidx = col % 4;
1380af674e45SBarry Smith value = v[l + k * n];
1381db4deed7SKarl Rupp if (col <= lastcol) low = 0;
1382db4deed7SKarl Rupp else high = nrow;
138317ec6a02SBarry Smith lastcol = col;
1384af674e45SBarry Smith while (high - low > 7) {
1385af674e45SBarry Smith t = (low + high) / 2;
1386af674e45SBarry Smith if (rp[t] > bcol) high = t;
1387af674e45SBarry Smith else low = t;
1388af674e45SBarry Smith }
1389af674e45SBarry Smith for (i = low; i < high; i++) {
1390af674e45SBarry Smith if (rp[i] > bcol) break;
1391af674e45SBarry Smith if (rp[i] == bcol) {
1392af674e45SBarry Smith bap = ap + 16 * i + 4 * cidx + ridx;
1393af674e45SBarry Smith *bap += value;
1394af674e45SBarry Smith goto noinsert1;
1395af674e45SBarry Smith }
1396af674e45SBarry Smith }
1397af674e45SBarry Smith N = nrow++ - 1;
139817ec6a02SBarry Smith high++; /* added new column thus must search to one higher than before */
1399af674e45SBarry Smith /* shift up all the later entries in this row */
14009566063dSJacob Faibussowitsch PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
14019566063dSJacob Faibussowitsch PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
14029566063dSJacob Faibussowitsch PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1403af674e45SBarry Smith rp[i] = bcol;
1404af674e45SBarry Smith ap[16 * i + 4 * cidx + ridx] = value;
1405af674e45SBarry Smith noinsert1:;
1406af674e45SBarry Smith low = i;
1407af674e45SBarry Smith }
1408af674e45SBarry Smith ailen[brow] = nrow;
1409af674e45SBarry Smith }
1410be1d678aSKris Buschelman PetscFunctionReturnVoid();
1411af674e45SBarry Smith }
1412af674e45SBarry Smith
MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt * nn,const PetscInt * inia[],const PetscInt * inja[],PetscBool * done)1413d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1414d71ae5a4SJacob Faibussowitsch {
14153b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14161a83f524SJed Brown PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
14171a83f524SJed Brown PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
14183b2fbd54SBarry Smith
14193a40ed3dSBarry Smith PetscFunctionBegin;
14203b2fbd54SBarry Smith *nn = n;
14213ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
14223b2fbd54SBarry Smith if (symmetric) {
14239566063dSJacob Faibussowitsch PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1424553b3c51SBarry Smith nz = tia[n];
14253b2fbd54SBarry Smith } else {
14269371c9d4SSatish Balay tia = a->i;
14279371c9d4SSatish Balay tja = a->j;
14283b2fbd54SBarry Smith }
14293b2fbd54SBarry Smith
1430ecc77c7aSBarry Smith if (!blockcompressed && bs > 1) {
1431ecc77c7aSBarry Smith (*nn) *= bs;
14328f7157efSSatish Balay /* malloc & create the natural set of indices */
14339566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((n + 1) * bs, ia));
14349985e31cSBarry Smith if (n) {
14352462f5fdSStefano Zampini (*ia)[0] = oshift;
1436ad540459SPierre Jolivet for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
14379985e31cSBarry Smith }
1438ecc77c7aSBarry Smith
1439ecc77c7aSBarry Smith for (i = 1; i < n; i++) {
1440ecc77c7aSBarry Smith (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1441ad540459SPierre Jolivet for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
14428f7157efSSatish Balay }
1443ad540459SPierre Jolivet if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1444ecc77c7aSBarry Smith
14451a83f524SJed Brown if (inja) {
14469566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz * bs * bs, ja));
14479985e31cSBarry Smith cnt = 0;
14489985e31cSBarry Smith for (i = 0; i < n; i++) {
14499985e31cSBarry Smith for (j = 0; j < bs; j++) {
14509985e31cSBarry Smith for (k = tia[i]; k < tia[i + 1]; k++) {
1451ad540459SPierre Jolivet for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
14529985e31cSBarry Smith }
14539985e31cSBarry Smith }
14549985e31cSBarry Smith }
14559985e31cSBarry Smith }
14569985e31cSBarry Smith
14578f7157efSSatish Balay if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
14589566063dSJacob Faibussowitsch PetscCall(PetscFree(tia));
14599566063dSJacob Faibussowitsch PetscCall(PetscFree(tja));
14608f7157efSSatish Balay }
1461f6d58c54SBarry Smith } else if (oshift == 1) {
1462715a17b5SBarry Smith if (symmetric) {
1463a2ea699eSBarry Smith nz = tia[A->rmap->n / bs];
1464715a17b5SBarry Smith /* add 1 to i and j indices */
1465715a17b5SBarry Smith for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1466715a17b5SBarry Smith *ia = tia;
1467715a17b5SBarry Smith if (ja) {
1468715a17b5SBarry Smith for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1469715a17b5SBarry Smith *ja = tja;
1470715a17b5SBarry Smith }
1471715a17b5SBarry Smith } else {
1472a2ea699eSBarry Smith nz = a->i[A->rmap->n / bs];
1473f6d58c54SBarry Smith /* malloc space and add 1 to i and j indices */
14749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1475f6d58c54SBarry Smith for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1476f6d58c54SBarry Smith if (ja) {
14779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, ja));
1478f6d58c54SBarry Smith for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1479f6d58c54SBarry Smith }
1480715a17b5SBarry Smith }
14818f7157efSSatish Balay } else {
14828f7157efSSatish Balay *ia = tia;
1483ecc77c7aSBarry Smith if (ja) *ja = tja;
14848f7157efSSatish Balay }
14853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
14863b2fbd54SBarry Smith }
14873b2fbd54SBarry Smith
MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt * nn,const PetscInt * ia[],const PetscInt * ja[],PetscBool * done)1488d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1489d71ae5a4SJacob Faibussowitsch {
14903a40ed3dSBarry Smith PetscFunctionBegin;
14913ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1492715a17b5SBarry Smith if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
14939566063dSJacob Faibussowitsch PetscCall(PetscFree(*ia));
14949566063dSJacob Faibussowitsch if (ja) PetscCall(PetscFree(*ja));
14953b2fbd54SBarry Smith }
14963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
14973b2fbd54SBarry Smith }
14983b2fbd54SBarry Smith
MatDestroy_SeqBAIJ(Mat A)1499d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1500d71ae5a4SJacob Faibussowitsch {
15012d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
15022d61bbb3SSatish Balay
1503433994e6SBarry Smith PetscFunctionBegin;
1504b4e2f619SBarry Smith if (A->hash_active) {
1505b4e2f619SBarry Smith PetscInt bs;
1506e3c72094SPierre Jolivet A->ops[0] = a->cops;
1507b4e2f619SBarry Smith PetscCall(PetscHMapIJVDestroy(&a->ht));
1508b4e2f619SBarry Smith PetscCall(MatGetBlockSize(A, &bs));
1509b4e2f619SBarry Smith if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1510b4e2f619SBarry Smith PetscCall(PetscFree(a->dnz));
1511b4e2f619SBarry Smith PetscCall(PetscFree(a->bdnz));
1512b4e2f619SBarry Smith A->hash_active = PETSC_FALSE;
1513b4e2f619SBarry Smith }
15143ba16761SJacob Faibussowitsch PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
15159566063dSJacob Faibussowitsch PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
15169566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->row));
15179566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->col));
1518*421480d9SBarry Smith PetscCall(PetscFree(a->diag));
15199566063dSJacob Faibussowitsch PetscCall(PetscFree(a->idiag));
15209566063dSJacob Faibussowitsch if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
15219566063dSJacob Faibussowitsch PetscCall(PetscFree(a->solve_work));
15229566063dSJacob Faibussowitsch PetscCall(PetscFree(a->mult_work));
15239566063dSJacob Faibussowitsch PetscCall(PetscFree(a->sor_workt));
15249566063dSJacob Faibussowitsch PetscCall(PetscFree(a->sor_work));
15259566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->icol));
15269566063dSJacob Faibussowitsch PetscCall(PetscFree(a->saved_values));
15279566063dSJacob Faibussowitsch PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1528c4319e64SHong Zhang
15299566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->sbaijMat));
15309566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->parent));
15319566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data));
1532901853e0SKris Buschelman
15339566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
15349566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
15359566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
15369566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
15379566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
15389566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
15399566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
15409566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
15419566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
15429566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
15439566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
15449566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
15457ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
15469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
15477ea3e4caSstefano_zampini #endif
15489566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
15492e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
15503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
15512d61bbb3SSatish Balay }
15522d61bbb3SSatish Balay
MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)155366976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1554d71ae5a4SJacob Faibussowitsch {
15552d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
15562d61bbb3SSatish Balay
15572d61bbb3SSatish Balay PetscFunctionBegin;
1558aa275fccSKris Buschelman switch (op) {
1559d71ae5a4SJacob Faibussowitsch case MAT_ROW_ORIENTED:
1560d71ae5a4SJacob Faibussowitsch a->roworiented = flg;
1561d71ae5a4SJacob Faibussowitsch break;
1562d71ae5a4SJacob Faibussowitsch case MAT_KEEP_NONZERO_PATTERN:
1563d71ae5a4SJacob Faibussowitsch a->keepnonzeropattern = flg;
1564d71ae5a4SJacob Faibussowitsch break;
1565d71ae5a4SJacob Faibussowitsch case MAT_NEW_NONZERO_LOCATIONS:
1566d71ae5a4SJacob Faibussowitsch a->nonew = (flg ? 0 : 1);
1567d71ae5a4SJacob Faibussowitsch break;
1568d71ae5a4SJacob Faibussowitsch case MAT_NEW_NONZERO_LOCATION_ERR:
1569d71ae5a4SJacob Faibussowitsch a->nonew = (flg ? -1 : 0);
1570d71ae5a4SJacob Faibussowitsch break;
1571d71ae5a4SJacob Faibussowitsch case MAT_NEW_NONZERO_ALLOCATION_ERR:
1572d71ae5a4SJacob Faibussowitsch a->nonew = (flg ? -2 : 0);
1573d71ae5a4SJacob Faibussowitsch break;
1574d71ae5a4SJacob Faibussowitsch case MAT_UNUSED_NONZERO_LOCATION_ERR:
1575d71ae5a4SJacob Faibussowitsch a->nounused = (flg ? -1 : 0);
1576d71ae5a4SJacob Faibussowitsch break;
1577d71ae5a4SJacob Faibussowitsch default:
1578888c827cSStefano Zampini break;
15792d61bbb3SSatish Balay }
15803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
15812d61bbb3SSatish Balay }
15822d61bbb3SSatish Balay
158352768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */
MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v,PetscInt * ai,PetscInt * aj,PetscScalar * aa)1584d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1585d71ae5a4SJacob Faibussowitsch {
158652768537SHong Zhang PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
158752768537SHong Zhang MatScalar *aa_i;
158887828ca2SBarry Smith PetscScalar *v_i;
15892d61bbb3SSatish Balay
15902d61bbb3SSatish Balay PetscFunctionBegin;
1591d0f46423SBarry Smith bs = A->rmap->bs;
159252768537SHong Zhang bs2 = bs * bs;
15935f80ce2aSJacob Faibussowitsch PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
15942d61bbb3SSatish Balay
15952d61bbb3SSatish Balay bn = row / bs; /* Block number */
15962d61bbb3SSatish Balay bp = row % bs; /* Block Position */
15972d61bbb3SSatish Balay M = ai[bn + 1] - ai[bn];
15982d61bbb3SSatish Balay *nz = bs * M;
15992d61bbb3SSatish Balay
16002d61bbb3SSatish Balay if (v) {
1601f4259b30SLisandro Dalcin *v = NULL;
16022d61bbb3SSatish Balay if (*nz) {
16039566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(*nz, v));
16042d61bbb3SSatish Balay for (i = 0; i < M; i++) { /* for each block in the block row */
16052d61bbb3SSatish Balay v_i = *v + i * bs;
16062d61bbb3SSatish Balay aa_i = aa + bs2 * (ai[bn] + i);
160726fbe8dcSKarl Rupp for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
16082d61bbb3SSatish Balay }
16092d61bbb3SSatish Balay }
16102d61bbb3SSatish Balay }
16112d61bbb3SSatish Balay
16122d61bbb3SSatish Balay if (idx) {
1613f4259b30SLisandro Dalcin *idx = NULL;
16142d61bbb3SSatish Balay if (*nz) {
16159566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(*nz, idx));
16162d61bbb3SSatish Balay for (i = 0; i < M; i++) { /* for each block in the block row */
16172d61bbb3SSatish Balay idx_i = *idx + i * bs;
16182d61bbb3SSatish Balay itmp = bs * aj[ai[bn] + i];
161926fbe8dcSKarl Rupp for (j = 0; j < bs; j++) idx_i[j] = itmp++;
16202d61bbb3SSatish Balay }
16212d61bbb3SSatish Balay }
16222d61bbb3SSatish Balay }
16233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
16242d61bbb3SSatish Balay }
16252d61bbb3SSatish Balay
MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1626d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1627d71ae5a4SJacob Faibussowitsch {
162852768537SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
162952768537SHong Zhang
163052768537SHong Zhang PetscFunctionBegin;
16319566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
16323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
163352768537SHong Zhang }
163452768537SHong Zhang
MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1635d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1636d71ae5a4SJacob Faibussowitsch {
16372d61bbb3SSatish Balay PetscFunctionBegin;
16389566063dSJacob Faibussowitsch if (idx) PetscCall(PetscFree(*idx));
16399566063dSJacob Faibussowitsch if (v) PetscCall(PetscFree(*v));
16403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
16412d61bbb3SSatish Balay }
16422d61bbb3SSatish Balay
MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat * B)164366976f2fSJacob Faibussowitsch static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1644d71ae5a4SJacob Faibussowitsch {
164520e84f26SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
16462d61bbb3SSatish Balay Mat C;
164720e84f26SHong Zhang PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
164820e84f26SHong Zhang PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr;
164920e84f26SHong Zhang MatScalar *ata, *aa = a->a;
16502d61bbb3SSatish Balay
16512d61bbb3SSatish Balay PetscFunctionBegin;
16527fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
16539566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(1 + nbs, &atfill));
1654cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
165520e84f26SHong Zhang for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
16562d61bbb3SSatish Balay
16579566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
16589566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
16599566063dSJacob Faibussowitsch PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
16609566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
166120e84f26SHong Zhang
166220e84f26SHong Zhang at = (Mat_SeqBAIJ *)C->data;
166320e84f26SHong Zhang ati = at->i;
166420e84f26SHong Zhang for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1665fc4dec0aSBarry Smith } else {
1666fc4dec0aSBarry Smith C = *B;
166720e84f26SHong Zhang at = (Mat_SeqBAIJ *)C->data;
166820e84f26SHong Zhang ati = at->i;
1669fc4dec0aSBarry Smith }
1670fc4dec0aSBarry Smith
167120e84f26SHong Zhang atj = at->j;
167220e84f26SHong Zhang ata = at->a;
167320e84f26SHong Zhang
167420e84f26SHong Zhang /* Copy ati into atfill so we have locations of the next free space in atj */
16759566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(atfill, ati, nbs));
167620e84f26SHong Zhang
167720e84f26SHong Zhang /* Walk through A row-wise and mark nonzero entries of A^T. */
16782d61bbb3SSatish Balay for (i = 0; i < mbs; i++) {
167920e84f26SHong Zhang anzj = ai[i + 1] - ai[i];
168020e84f26SHong Zhang for (j = 0; j < anzj; j++) {
168120e84f26SHong Zhang atj[atfill[*aj]] = i;
168220e84f26SHong Zhang for (kr = 0; kr < bs; kr++) {
1683ad540459SPierre Jolivet for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
16842d61bbb3SSatish Balay }
168520e84f26SHong Zhang atfill[*aj++] += 1;
168620e84f26SHong Zhang }
168720e84f26SHong Zhang }
16889566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
16899566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
16902d61bbb3SSatish Balay
169120e84f26SHong Zhang /* Clean up temporary space and complete requests. */
16929566063dSJacob Faibussowitsch PetscCall(PetscFree(atfill));
169320e84f26SHong Zhang
1694cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
169558b7e2c1SStefano Zampini PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs));
16962d61bbb3SSatish Balay *B = C;
16972d61bbb3SSatish Balay } else {
16989566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &C));
16992d61bbb3SSatish Balay }
17003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
17012d61bbb3SSatish Balay }
17022d61bbb3SSatish Balay
MatCompare_SeqBAIJ_Private(Mat A,Mat B,PetscReal tol,PetscBool * flg)170328636b0cSPierre Jolivet static PetscErrorCode MatCompare_SeqBAIJ_Private(Mat A, Mat B, PetscReal tol, PetscBool *flg)
170428636b0cSPierre Jolivet {
170528636b0cSPierre Jolivet Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
170628636b0cSPierre Jolivet
170728636b0cSPierre Jolivet PetscFunctionBegin;
170828636b0cSPierre Jolivet /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */
170928636b0cSPierre Jolivet if (A->rmap->N != B->rmap->N || A->cmap->n != B->cmap->n || A->rmap->bs != B->rmap->bs || a->nz != b->nz) {
171028636b0cSPierre Jolivet *flg = PETSC_FALSE;
171128636b0cSPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS);
171228636b0cSPierre Jolivet }
171328636b0cSPierre Jolivet
171428636b0cSPierre Jolivet /* if the a->i are the same */
171528636b0cSPierre Jolivet PetscCall(PetscArraycmp(a->i, b->i, a->mbs + 1, flg));
171628636b0cSPierre Jolivet if (!*flg) PetscFunctionReturn(PETSC_SUCCESS);
171728636b0cSPierre Jolivet
171828636b0cSPierre Jolivet /* if a->j are the same */
171928636b0cSPierre Jolivet PetscCall(PetscArraycmp(a->j, b->j, a->nz, flg));
172028636b0cSPierre Jolivet if (!*flg) PetscFunctionReturn(PETSC_SUCCESS);
172128636b0cSPierre Jolivet
172228636b0cSPierre Jolivet if (tol == 0.0) PetscCall(PetscArraycmp(a->a, b->a, a->nz * A->rmap->bs * A->rmap->bs, flg)); /* if a->a are the same */
172328636b0cSPierre Jolivet else {
172428636b0cSPierre Jolivet *flg = PETSC_TRUE;
172528636b0cSPierre Jolivet for (PetscInt i = 0; (i < a->nz * A->rmap->bs * A->rmap->bs) && *flg; ++i)
172628636b0cSPierre Jolivet if (PetscAbsScalar(a->a[i] - b->a[i]) > tol) *flg = PETSC_FALSE;
172728636b0cSPierre Jolivet }
172828636b0cSPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS);
172928636b0cSPierre Jolivet }
173028636b0cSPierre Jolivet
MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool * f)1731ff6a9541SJacob Faibussowitsch static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1732d71ae5a4SJacob Faibussowitsch {
1733453d3561SHong Zhang Mat Btrans;
1734453d3561SHong Zhang
1735453d3561SHong Zhang PetscFunctionBegin;
1736acd337a6SBarry Smith PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
173728636b0cSPierre Jolivet PetscCall(MatCompare_SeqBAIJ_Private(A, Btrans, tol, f));
17389566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Btrans));
17393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1740453d3561SHong Zhang }
1741453d3561SHong Zhang
MatEqual_SeqBAIJ(Mat A,Mat B,PetscBool * flg)174228636b0cSPierre Jolivet static PetscErrorCode MatEqual_SeqBAIJ(Mat A, Mat B, PetscBool *flg)
174328636b0cSPierre Jolivet {
174428636b0cSPierre Jolivet PetscFunctionBegin;
174528636b0cSPierre Jolivet PetscCall(MatCompare_SeqBAIJ_Private(A, B, 0.0, flg));
174628636b0cSPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS);
174728636b0cSPierre Jolivet }
174828636b0cSPierre Jolivet
1749618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)1750d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1751d71ae5a4SJacob Faibussowitsch {
1752b51a4376SLisandro Dalcin Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1753b51a4376SLisandro Dalcin PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1754b51a4376SLisandro Dalcin PetscInt *rowlens, *colidxs;
1755b51a4376SLisandro Dalcin PetscScalar *matvals;
17562593348eSBarry Smith
17573a40ed3dSBarry Smith PetscFunctionBegin;
17589566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer));
17593b2fbd54SBarry Smith
1760b51a4376SLisandro Dalcin M = mat->rmap->N;
1761b51a4376SLisandro Dalcin N = mat->cmap->N;
1762b51a4376SLisandro Dalcin m = mat->rmap->n;
1763b51a4376SLisandro Dalcin bs = mat->rmap->bs;
1764b51a4376SLisandro Dalcin nz = bs * bs * A->nz;
17652593348eSBarry Smith
1766b51a4376SLisandro Dalcin /* write matrix header */
1767b51a4376SLisandro Dalcin header[0] = MAT_FILE_CLASSID;
17689371c9d4SSatish Balay header[1] = M;
17699371c9d4SSatish Balay header[2] = N;
17709371c9d4SSatish Balay header[3] = nz;
17719566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
17722593348eSBarry Smith
1773b51a4376SLisandro Dalcin /* store row lengths */
17749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &rowlens));
1775b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++)
17769371c9d4SSatish Balay for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
17779566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
17789566063dSJacob Faibussowitsch PetscCall(PetscFree(rowlens));
1779b51a4376SLisandro Dalcin
1780b51a4376SLisandro Dalcin /* store column indices */
17819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &colidxs));
1782b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++)
1783b51a4376SLisandro Dalcin for (k = 0; k < bs; k++)
1784b51a4376SLisandro Dalcin for (j = A->i[i]; j < A->i[i + 1]; j++)
17859371c9d4SSatish Balay for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
17865f80ce2aSJacob Faibussowitsch PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
17879566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
17889566063dSJacob Faibussowitsch PetscCall(PetscFree(colidxs));
17892593348eSBarry Smith
17902593348eSBarry Smith /* store nonzero values */
17919566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &matvals));
1792b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++)
1793b51a4376SLisandro Dalcin for (k = 0; k < bs; k++)
1794b51a4376SLisandro Dalcin for (j = A->i[i]; j < A->i[i + 1]; j++)
17959371c9d4SSatish Balay for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
17965f80ce2aSJacob Faibussowitsch PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
17979566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
17989566063dSJacob Faibussowitsch PetscCall(PetscFree(matvals));
1799ce6f0cecSBarry Smith
1800b51a4376SLisandro Dalcin /* write block size option to the viewer's .info file */
18019566063dSJacob Faibussowitsch PetscCall(MatView_Binary_BlockSizes(mat, viewer));
18023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
18032593348eSBarry Smith }
18042593348eSBarry Smith
MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)1805d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1806d71ae5a4SJacob Faibussowitsch {
18077dc0baabSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
18087dc0baabSHong Zhang PetscInt i, bs = A->rmap->bs, k;
18097dc0baabSHong Zhang
18107dc0baabSHong Zhang PetscFunctionBegin;
18119566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
18127dc0baabSHong Zhang for (i = 0; i < a->mbs; i++) {
18139566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
181448a46eb9SPierre Jolivet for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
18159566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
18167dc0baabSHong Zhang }
18179566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
18183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
18197dc0baabSHong Zhang }
18207dc0baabSHong Zhang
MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)1821d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1822d71ae5a4SJacob Faibussowitsch {
1823b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1824d0f46423SBarry Smith PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1825f3ef73ceSBarry Smith PetscViewerFormat format;
18262593348eSBarry Smith
18273a40ed3dSBarry Smith PetscFunctionBegin;
18287dc0baabSHong Zhang if (A->structure_only) {
18299566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
18303ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
18317dc0baabSHong Zhang }
18327dc0baabSHong Zhang
18339566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format));
1834456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1835fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1836ade3a672SBarry Smith const char *matname;
1837bcd9e38bSBarry Smith Mat aij;
18389566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
18399566063dSJacob Faibussowitsch PetscCall(PetscObjectGetName((PetscObject)A, &matname));
18409566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)aij, matname));
18419566063dSJacob Faibussowitsch PetscCall(MatView(aij, viewer));
18429566063dSJacob Faibussowitsch PetscCall(MatDestroy(&aij));
184304929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
18443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1845fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) {
18469566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
184744cd7ae7SLois Curfman McInnes for (i = 0; i < a->mbs; i++) {
184844cd7ae7SLois Curfman McInnes for (j = 0; j < bs; j++) {
18499566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
185044cd7ae7SLois Curfman McInnes for (k = a->i[i]; k < a->i[i + 1]; k++) {
185144cd7ae7SLois Curfman McInnes for (l = 0; l < bs; l++) {
1852aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
18530e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18549371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18550e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18569371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18570e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18589566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
18590ef38995SBarry Smith }
186044cd7ae7SLois Curfman McInnes #else
186148a46eb9SPierre Jolivet if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
186244cd7ae7SLois Curfman McInnes #endif
186344cd7ae7SLois Curfman McInnes }
186444cd7ae7SLois Curfman McInnes }
18659566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
186644cd7ae7SLois Curfman McInnes }
186744cd7ae7SLois Curfman McInnes }
18689566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
18690ef38995SBarry Smith } else {
18709566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1871b6490206SBarry Smith for (i = 0; i < a->mbs; i++) {
1872b6490206SBarry Smith for (j = 0; j < bs; j++) {
18739566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1874b6490206SBarry Smith for (k = a->i[i]; k < a->i[i + 1]; k++) {
1875b6490206SBarry Smith for (l = 0; l < bs; l++) {
1876aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
18770e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
18789371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18790e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
18809371c9d4SSatish Balay PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18810ef38995SBarry Smith } else {
18829566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
188388685aaeSLois Curfman McInnes }
188488685aaeSLois Curfman McInnes #else
18859566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
188688685aaeSLois Curfman McInnes #endif
18872593348eSBarry Smith }
18882593348eSBarry Smith }
18899566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
18902593348eSBarry Smith }
18912593348eSBarry Smith }
18929566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1893b6490206SBarry Smith }
18949566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer));
18953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
18962593348eSBarry Smith }
18972593348eSBarry Smith
18989804daf3SBarry Smith #include <petscdraw.h>
MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void * Aa)1899d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1900d71ae5a4SJacob Faibussowitsch {
190177ed5343SBarry Smith Mat A = (Mat)Aa;
19023270192aSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
19036497c311SBarry Smith PetscInt row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
19040e6d2581SBarry Smith PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r;
19053f1db9ecSBarry Smith MatScalar *aa;
1906b0a32e0cSBarry Smith PetscViewer viewer;
1907b3e7f47fSJed Brown PetscViewerFormat format;
19086497c311SBarry Smith int color;
19093270192aSSatish Balay
19103a40ed3dSBarry Smith PetscFunctionBegin;
19119566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
19129566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format));
19139566063dSJacob Faibussowitsch PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
191477ed5343SBarry Smith
19153270192aSSatish Balay /* loop over matrix elements drawing boxes */
1916b3e7f47fSJed Brown
1917b3e7f47fSJed Brown if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1918d0609cedSBarry Smith PetscDrawCollectiveBegin(draw);
1919383922c3SLisandro Dalcin /* Blue for negative, Cyan for zero and Red for positive */
1920b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE;
19213270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) {
19223270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) {
19239371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0;
19249371c9d4SSatish Balay y_r = y_l + 1.0;
19259371c9d4SSatish Balay x_l = a->j[j] * bs;
19269371c9d4SSatish Balay x_r = x_l + 1.0;
19273270192aSSatish Balay aa = a->a + j * bs2;
19283270192aSSatish Balay for (k = 0; k < bs; k++) {
19293270192aSSatish Balay for (l = 0; l < bs; l++) {
19300e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue;
19319566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19323270192aSSatish Balay }
19333270192aSSatish Balay }
19343270192aSSatish Balay }
19353270192aSSatish Balay }
1936b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN;
19373270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) {
19383270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) {
19399371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0;
19409371c9d4SSatish Balay y_r = y_l + 1.0;
19419371c9d4SSatish Balay x_l = a->j[j] * bs;
19429371c9d4SSatish Balay x_r = x_l + 1.0;
19433270192aSSatish Balay aa = a->a + j * bs2;
19443270192aSSatish Balay for (k = 0; k < bs; k++) {
19453270192aSSatish Balay for (l = 0; l < bs; l++) {
19460e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue;
19479566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19483270192aSSatish Balay }
19493270192aSSatish Balay }
19503270192aSSatish Balay }
19513270192aSSatish Balay }
1952b0a32e0cSBarry Smith color = PETSC_DRAW_RED;
19533270192aSSatish Balay for (i = 0, row = 0; i < mbs; i++, row += bs) {
19543270192aSSatish Balay for (j = a->i[i]; j < a->i[i + 1]; j++) {
19559371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0;
19569371c9d4SSatish Balay y_r = y_l + 1.0;
19579371c9d4SSatish Balay x_l = a->j[j] * bs;
19589371c9d4SSatish Balay x_r = x_l + 1.0;
19593270192aSSatish Balay aa = a->a + j * bs2;
19603270192aSSatish Balay for (k = 0; k < bs; k++) {
19613270192aSSatish Balay for (l = 0; l < bs; l++) {
19620e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue;
19639566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19643270192aSSatish Balay }
19653270192aSSatish Balay }
19663270192aSSatish Balay }
19673270192aSSatish Balay }
1968d0609cedSBarry Smith PetscDrawCollectiveEnd(draw);
1969b3e7f47fSJed Brown } else {
1970b3e7f47fSJed Brown /* use contour shading to indicate magnitude of values */
1971b3e7f47fSJed Brown /* first determine max of all nonzero values */
1972b05fc000SLisandro Dalcin PetscReal minv = 0.0, maxv = 0.0;
1973b3e7f47fSJed Brown PetscDraw popup;
1974b3e7f47fSJed Brown
1975b3e7f47fSJed Brown for (i = 0; i < a->nz * a->bs2; i++) {
1976b3e7f47fSJed Brown if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1977b3e7f47fSJed Brown }
1978383922c3SLisandro Dalcin if (minv >= maxv) maxv = minv + PETSC_SMALL;
19799566063dSJacob Faibussowitsch PetscCall(PetscDrawGetPopup(draw, &popup));
19809566063dSJacob Faibussowitsch PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
1981383922c3SLisandro Dalcin
1982d0609cedSBarry Smith PetscDrawCollectiveBegin(draw);
1983b3e7f47fSJed Brown for (i = 0, row = 0; i < mbs; i++, row += bs) {
1984b3e7f47fSJed Brown for (j = a->i[i]; j < a->i[i + 1]; j++) {
19859371c9d4SSatish Balay y_l = A->rmap->N - row - 1.0;
19869371c9d4SSatish Balay y_r = y_l + 1.0;
19879371c9d4SSatish Balay x_l = a->j[j] * bs;
19889371c9d4SSatish Balay x_r = x_l + 1.0;
1989b3e7f47fSJed Brown aa = a->a + j * bs2;
1990b3e7f47fSJed Brown for (k = 0; k < bs; k++) {
1991b3e7f47fSJed Brown for (l = 0; l < bs; l++) {
1992383922c3SLisandro Dalcin MatScalar v = *aa++;
1993383922c3SLisandro Dalcin color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
19949566063dSJacob Faibussowitsch PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1995b3e7f47fSJed Brown }
1996b3e7f47fSJed Brown }
1997b3e7f47fSJed Brown }
1998b3e7f47fSJed Brown }
1999d0609cedSBarry Smith PetscDrawCollectiveEnd(draw);
2000b3e7f47fSJed Brown }
20013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
200277ed5343SBarry Smith }
20033270192aSSatish Balay
MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)2004d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2005d71ae5a4SJacob Faibussowitsch {
20060e6d2581SBarry Smith PetscReal xl, yl, xr, yr, w, h;
2007b0a32e0cSBarry Smith PetscDraw draw;
2008ace3abfcSBarry Smith PetscBool isnull;
20093270192aSSatish Balay
201077ed5343SBarry Smith PetscFunctionBegin;
20119566063dSJacob Faibussowitsch PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
20129566063dSJacob Faibussowitsch PetscCall(PetscDrawIsNull(draw, &isnull));
20133ba16761SJacob Faibussowitsch if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
201477ed5343SBarry Smith
20159371c9d4SSatish Balay xr = A->cmap->n;
20169371c9d4SSatish Balay yr = A->rmap->N;
20179371c9d4SSatish Balay h = yr / 10.0;
20189371c9d4SSatish Balay w = xr / 10.0;
20199371c9d4SSatish Balay xr += w;
20209371c9d4SSatish Balay yr += h;
20219371c9d4SSatish Balay xl = -w;
20229371c9d4SSatish Balay yl = -h;
20239566063dSJacob Faibussowitsch PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
20249566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
20259566063dSJacob Faibussowitsch PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
20269566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
20279566063dSJacob Faibussowitsch PetscCall(PetscDrawSave(draw));
20283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
20293270192aSSatish Balay }
20303270192aSSatish Balay
MatView_SeqBAIJ(Mat A,PetscViewer viewer)2031d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2032d71ae5a4SJacob Faibussowitsch {
20339f196a02SMartin Diehl PetscBool isascii, isbinary, isdraw;
20342593348eSBarry Smith
20353a40ed3dSBarry Smith PetscFunctionBegin;
20369f196a02SMartin Diehl PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
20379566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
20389566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
20399f196a02SMartin Diehl if (isascii) {
20409566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
20410f5bd95cSBarry Smith } else if (isbinary) {
20429566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
20430f5bd95cSBarry Smith } else if (isdraw) {
20449566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
20455cd90555SBarry Smith } else {
2046a5e6ed63SBarry Smith Mat B;
20479566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
20489566063dSJacob Faibussowitsch PetscCall(MatView(B, viewer));
20499566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B));
20502593348eSBarry Smith }
20513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
20522593348eSBarry Smith }
2053b6490206SBarry Smith
MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])2054d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2055d71ae5a4SJacob Faibussowitsch {
2056cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2057c1ac3661SBarry Smith PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2058c1ac3661SBarry Smith PetscInt *ai = a->i, *ailen = a->ilen;
2059d0f46423SBarry Smith PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
206097e567efSBarry Smith MatScalar *ap, *aa = a->a;
2061cd0e1443SSatish Balay
20623a40ed3dSBarry Smith PetscFunctionBegin;
20632d61bbb3SSatish Balay for (k = 0; k < m; k++) { /* loop over rows */
20649371c9d4SSatish Balay row = im[k];
20659371c9d4SSatish Balay brow = row / bs;
20669371c9d4SSatish Balay if (row < 0) {
20679371c9d4SSatish Balay v += n;
20689371c9d4SSatish Balay continue;
20699371c9d4SSatish Balay } /* negative row */
207054c59aa7SJacob Faibussowitsch PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
20718e3a54c0SPierre Jolivet rp = PetscSafePointerPlusOffset(aj, ai[brow]);
20728e3a54c0SPierre Jolivet ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
20732c3acbe9SBarry Smith nrow = ailen[brow];
20742d61bbb3SSatish Balay for (l = 0; l < n; l++) { /* loop over columns */
20759371c9d4SSatish Balay if (in[l] < 0) {
20769371c9d4SSatish Balay v++;
20779371c9d4SSatish Balay continue;
20789371c9d4SSatish Balay } /* negative column */
207954c59aa7SJacob Faibussowitsch PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
20802d61bbb3SSatish Balay col = in[l];
20812d61bbb3SSatish Balay bcol = col / bs;
20822d61bbb3SSatish Balay cidx = col % bs;
20832d61bbb3SSatish Balay ridx = row % bs;
20842d61bbb3SSatish Balay high = nrow;
20852d61bbb3SSatish Balay low = 0; /* assume unsorted */
20862d61bbb3SSatish Balay while (high - low > 5) {
2087cd0e1443SSatish Balay t = (low + high) / 2;
2088cd0e1443SSatish Balay if (rp[t] > bcol) high = t;
2089cd0e1443SSatish Balay else low = t;
2090cd0e1443SSatish Balay }
2091cd0e1443SSatish Balay for (i = low; i < high; i++) {
2092cd0e1443SSatish Balay if (rp[i] > bcol) break;
2093cd0e1443SSatish Balay if (rp[i] == bcol) {
20942d61bbb3SSatish Balay *v++ = ap[bs2 * i + bs * cidx + ridx];
20952d61bbb3SSatish Balay goto finished;
2096cd0e1443SSatish Balay }
2097cd0e1443SSatish Balay }
209897e567efSBarry Smith *v++ = 0.0;
20992d61bbb3SSatish Balay finished:;
2100cd0e1443SSatish Balay }
2101cd0e1443SSatish Balay }
21023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2103cd0e1443SSatish Balay }
2104cd0e1443SSatish Balay
MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)2105d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2106d71ae5a4SJacob Faibussowitsch {
210792c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2108e2ee6c50SBarry Smith PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2109c1ac3661SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2110d0f46423SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2111ace3abfcSBarry Smith PetscBool roworiented = a->roworiented;
2112dd6ea824SBarry Smith const PetscScalar *value = v;
21139d243f67SHong Zhang MatScalar *ap = NULL, *aa = a->a, *bap;
211492c4ed94SBarry Smith
21153a40ed3dSBarry Smith PetscFunctionBegin;
21160e324ae4SSatish Balay if (roworiented) {
21170e324ae4SSatish Balay stepval = (n - 1) * bs;
21180e324ae4SSatish Balay } else {
21190e324ae4SSatish Balay stepval = (m - 1) * bs;
21200e324ae4SSatish Balay }
212192c4ed94SBarry Smith for (k = 0; k < m; k++) { /* loop over added rows */
212292c4ed94SBarry Smith row = im[k];
21235ef9f2a5SBarry Smith if (row < 0) continue;
21246bdcaf15SBarry Smith PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
212592c4ed94SBarry Smith rp = aj + ai[row];
21267dc0baabSHong Zhang if (!A->structure_only) ap = aa + bs2 * ai[row];
212792c4ed94SBarry Smith rmax = imax[row];
212892c4ed94SBarry Smith nrow = ailen[row];
212992c4ed94SBarry Smith low = 0;
2130c71e6ed7SBarry Smith high = nrow;
213192c4ed94SBarry Smith for (l = 0; l < n; l++) { /* loop over added columns */
21325ef9f2a5SBarry Smith if (in[l] < 0) continue;
21336bdcaf15SBarry Smith PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
213492c4ed94SBarry Smith col = in[l];
21357dc0baabSHong Zhang if (!A->structure_only) {
213692c4ed94SBarry Smith if (roworiented) {
213753ef36baSBarry Smith value = v + (k * (stepval + bs) + l) * bs;
21380e324ae4SSatish Balay } else {
213953ef36baSBarry Smith value = v + (l * (stepval + bs) + k) * bs;
214092c4ed94SBarry Smith }
21417dc0baabSHong Zhang }
214226fbe8dcSKarl Rupp if (col <= lastcol) low = 0;
214326fbe8dcSKarl Rupp else high = nrow;
2144e2ee6c50SBarry Smith lastcol = col;
214592c4ed94SBarry Smith while (high - low > 7) {
214692c4ed94SBarry Smith t = (low + high) / 2;
214792c4ed94SBarry Smith if (rp[t] > col) high = t;
214892c4ed94SBarry Smith else low = t;
214992c4ed94SBarry Smith }
215092c4ed94SBarry Smith for (i = low; i < high; i++) {
215192c4ed94SBarry Smith if (rp[i] > col) break;
215292c4ed94SBarry Smith if (rp[i] == col) {
21537dc0baabSHong Zhang if (A->structure_only) goto noinsert2;
21548a84c255SSatish Balay bap = ap + bs2 * i;
21550e324ae4SSatish Balay if (roworiented) {
21568a84c255SSatish Balay if (is == ADD_VALUES) {
2157dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) {
2158ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2159dd9472c6SBarry Smith }
21600e324ae4SSatish Balay } else {
2161dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) {
2162ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2163dd9472c6SBarry Smith }
2164dd9472c6SBarry Smith }
21650e324ae4SSatish Balay } else {
21660e324ae4SSatish Balay if (is == ADD_VALUES) {
216753ef36baSBarry Smith for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2168ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
216953ef36baSBarry Smith bap += bs;
2170dd9472c6SBarry Smith }
21710e324ae4SSatish Balay } else {
217253ef36baSBarry Smith for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2173ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
217453ef36baSBarry Smith bap += bs;
21758a84c255SSatish Balay }
2176dd9472c6SBarry Smith }
2177dd9472c6SBarry Smith }
2178f1241b54SBarry Smith goto noinsert2;
217992c4ed94SBarry Smith }
218092c4ed94SBarry Smith }
218189280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2;
21825f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
21837dc0baabSHong Zhang if (A->structure_only) {
21847dc0baabSHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
21857dc0baabSHong Zhang } else {
2186fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
21877dc0baabSHong Zhang }
21889371c9d4SSatish Balay N = nrow++ - 1;
21899371c9d4SSatish Balay high++;
219092c4ed94SBarry Smith /* shift up all the later entries in this row */
21919566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
219292c4ed94SBarry Smith rp[i] = col;
21937dc0baabSHong Zhang if (!A->structure_only) {
21949566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
21958a84c255SSatish Balay bap = ap + bs2 * i;
21960e324ae4SSatish Balay if (roworiented) {
2197dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) {
2198ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2199dd9472c6SBarry Smith }
22000e324ae4SSatish Balay } else {
2201dd9472c6SBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) {
2202ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2203dd9472c6SBarry Smith }
2204dd9472c6SBarry Smith }
22057dc0baabSHong Zhang }
2206f1241b54SBarry Smith noinsert2:;
220792c4ed94SBarry Smith low = i;
220892c4ed94SBarry Smith }
220992c4ed94SBarry Smith ailen[row] = nrow;
221092c4ed94SBarry Smith }
22113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
221292c4ed94SBarry Smith }
221326e093fcSHong Zhang
MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)2214d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2215d71ae5a4SJacob Faibussowitsch {
2216584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2217580bdb30SBarry Smith PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2218d0f46423SBarry Smith PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen;
2219c1ac3661SBarry Smith PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0;
22203f1db9ecSBarry Smith MatScalar *aa = a->a, *ap;
22213447b6efSHong Zhang PetscReal ratio = 0.6;
2222584200bdSSatish Balay
22233a40ed3dSBarry Smith PetscFunctionBegin;
2224d32568d8SPierre Jolivet if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2225584200bdSSatish Balay
222643ee02c3SBarry Smith if (m) rmax = ailen[0];
2227584200bdSSatish Balay for (i = 1; i < mbs; i++) {
2228584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/
2229584200bdSSatish Balay fshift += imax[i - 1] - ailen[i - 1];
2230d402145bSBarry Smith rmax = PetscMax(rmax, ailen[i]);
2231584200bdSSatish Balay if (fshift) {
2232580bdb30SBarry Smith ip = aj + ai[i];
2233580bdb30SBarry Smith ap = aa + bs2 * ai[i];
2234584200bdSSatish Balay N = ailen[i];
22359566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ip - fshift, ip, N));
223648a46eb9SPierre Jolivet if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2237672ba085SHong Zhang }
2238584200bdSSatish Balay ai[i] = ai[i - 1] + ailen[i - 1];
2239584200bdSSatish Balay }
2240584200bdSSatish Balay if (mbs) {
2241584200bdSSatish Balay fshift += imax[mbs - 1] - ailen[mbs - 1];
2242584200bdSSatish Balay ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2243584200bdSSatish Balay }
22447c565772SBarry Smith
2245584200bdSSatish Balay /* reset ilen and imax for each row */
22467c565772SBarry Smith a->nonzerorowcnt = 0;
2247672ba085SHong Zhang if (A->structure_only) {
22489566063dSJacob Faibussowitsch PetscCall(PetscFree2(a->imax, a->ilen));
2249672ba085SHong Zhang } else { /* !A->structure_only */
2250584200bdSSatish Balay for (i = 0; i < mbs; i++) {
2251584200bdSSatish Balay ailen[i] = imax[i] = ai[i + 1] - ai[i];
22527c565772SBarry Smith a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2253584200bdSSatish Balay }
2254672ba085SHong Zhang }
2255a7c10996SSatish Balay a->nz = ai[mbs];
2256584200bdSSatish Balay
2257584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */
2258b01c7715SBarry Smith a->idiagvalid = PETSC_FALSE;
22595f80ce2aSJacob Faibussowitsch if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
22609566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
22619566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
22629566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
226326fbe8dcSKarl Rupp
22648e58a170SBarry Smith A->info.mallocs += a->reallocs;
2265e2f3b5e9SSatish Balay a->reallocs = 0;
22660e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift * bs2;
2267647a6520SHong Zhang a->rmax = rmax;
2268cf4441caSHong Zhang
226948a46eb9SPierre Jolivet if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
22703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2271584200bdSSatish Balay }
2272584200bdSSatish Balay
2273bea157c4SSatish Balay /*
2274bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous
2275bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
2276a5b23f4aSJose E. Roman then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2277bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values.
2278bea157c4SSatish Balay */
MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[],PetscInt * bs_max)2279d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2280d71ae5a4SJacob Faibussowitsch {
2281ff6a9541SJacob Faibussowitsch PetscInt j = 0;
22823a40ed3dSBarry Smith
2283433994e6SBarry Smith PetscFunctionBegin;
2284ff6a9541SJacob Faibussowitsch for (PetscInt i = 0; i < n; j++) {
2285ff6a9541SJacob Faibussowitsch PetscInt row = idx[i];
2286a5b23f4aSJose E. Roman if (row % bs != 0) { /* Not the beginning of a block */
2287bea157c4SSatish Balay sizes[j] = 1;
2288bea157c4SSatish Balay i++;
2289e4fda26cSSatish Balay } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2290bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */
2291bea157c4SSatish Balay i++;
22926aad120cSJose E. Roman } else { /* Beginning of the block, so check if the complete block exists */
2293ff6a9541SJacob Faibussowitsch PetscBool flg = PETSC_TRUE;
2294ff6a9541SJacob Faibussowitsch for (PetscInt k = 1; k < bs; k++) {
2295bea157c4SSatish Balay if (row + k != idx[i + k]) { /* break in the block */
2296bea157c4SSatish Balay flg = PETSC_FALSE;
2297bea157c4SSatish Balay break;
2298d9b7c43dSSatish Balay }
2299bea157c4SSatish Balay }
2300abc0a331SBarry Smith if (flg) { /* No break in the bs */
2301bea157c4SSatish Balay sizes[j] = bs;
2302bea157c4SSatish Balay i += bs;
2303bea157c4SSatish Balay } else {
2304bea157c4SSatish Balay sizes[j] = 1;
2305bea157c4SSatish Balay i++;
2306bea157c4SSatish Balay }
2307bea157c4SSatish Balay }
2308bea157c4SSatish Balay }
2309bea157c4SSatish Balay *bs_max = j;
23103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2311d9b7c43dSSatish Balay }
2312d9b7c43dSSatish Balay
MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x,Vec b)2313d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2314d71ae5a4SJacob Faibussowitsch {
2315d9b7c43dSSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data;
2316f4df32b1SMatthew Knepley PetscInt i, j, k, count, *rows;
2317d0f46423SBarry Smith PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
231887828ca2SBarry Smith PetscScalar zero = 0.0;
23193f1db9ecSBarry Smith MatScalar *aa;
232097b48c8fSBarry Smith const PetscScalar *xx;
232197b48c8fSBarry Smith PetscScalar *bb;
2322d9b7c43dSSatish Balay
23233a40ed3dSBarry Smith PetscFunctionBegin;
2324dd8e379bSPierre Jolivet /* fix right-hand side if needed */
232597b48c8fSBarry Smith if (x && b) {
23269566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx));
23279566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb));
2328ad540459SPierre Jolivet for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
23299566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx));
23309566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb));
233197b48c8fSBarry Smith }
233297b48c8fSBarry Smith
2333d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */
2334bea157c4SSatish Balay /* allocate memory for rows,sizes */
23359566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2336bea157c4SSatish Balay
2337563b5814SBarry Smith /* copy IS values to rows, and sort them */
233826fbe8dcSKarl Rupp for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
23399566063dSJacob Faibussowitsch PetscCall(PetscSortInt(is_n, rows));
234097b48c8fSBarry Smith
2341a9817697SBarry Smith if (baij->keepnonzeropattern) {
234226fbe8dcSKarl Rupp for (i = 0; i < is_n; i++) sizes[i] = 1;
2343dffd3267SBarry Smith bs_max = is_n;
2344dffd3267SBarry Smith } else {
23459566063dSJacob Faibussowitsch PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2346e56f5c9eSBarry Smith A->nonzerostate++;
2347dffd3267SBarry Smith }
2348bea157c4SSatish Balay
2349bea157c4SSatish Balay for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2350bea157c4SSatish Balay row = rows[j];
23515f80ce2aSJacob Faibussowitsch PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2352bea157c4SSatish Balay count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2353835f2295SStefano Zampini aa = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2354a9817697SBarry Smith if (sizes[i] == bs && !baij->keepnonzeropattern) {
2355d4a378daSJed Brown if (diag != (PetscScalar)0.0) {
2356bea157c4SSatish Balay if (baij->ilen[row / bs] > 0) {
2357bea157c4SSatish Balay baij->ilen[row / bs] = 1;
2358bea157c4SSatish Balay baij->j[baij->i[row / bs]] = row / bs;
235926fbe8dcSKarl Rupp
23609566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(aa, count * bs));
2361a07cd24cSSatish Balay }
2362563b5814SBarry Smith /* Now insert all the diagonal values for this bs */
23639927e4dfSBarry Smith for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES);
2364f4df32b1SMatthew Knepley } else { /* (diag == 0.0) */
2365bea157c4SSatish Balay baij->ilen[row / bs] = 0;
2366f4df32b1SMatthew Knepley } /* end (diag == 0.0) */
2367bea157c4SSatish Balay } else { /* (sizes[i] != bs) */
23686bdcaf15SBarry Smith PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2369bea157c4SSatish Balay for (k = 0; k < count; k++) {
2370d9b7c43dSSatish Balay aa[0] = zero;
2371d9b7c43dSSatish Balay aa += bs;
2372d9b7c43dSSatish Balay }
23739927e4dfSBarry Smith if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES);
2374d9b7c43dSSatish Balay }
2375bea157c4SSatish Balay }
2376bea157c4SSatish Balay
23779566063dSJacob Faibussowitsch PetscCall(PetscFree2(rows, sizes));
23789566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
23793ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2380d9b7c43dSSatish Balay }
23811c351548SSatish Balay
MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x,Vec b)2382ff6a9541SJacob Faibussowitsch static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2383d71ae5a4SJacob Faibussowitsch {
238497b48c8fSBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data;
238597b48c8fSBarry Smith PetscInt i, j, k, count;
238697b48c8fSBarry Smith PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col;
238797b48c8fSBarry Smith PetscScalar zero = 0.0;
238897b48c8fSBarry Smith MatScalar *aa;
238997b48c8fSBarry Smith const PetscScalar *xx;
239097b48c8fSBarry Smith PetscScalar *bb;
239156777dd2SBarry Smith PetscBool *zeroed, vecs = PETSC_FALSE;
239297b48c8fSBarry Smith
239397b48c8fSBarry Smith PetscFunctionBegin;
2394dd8e379bSPierre Jolivet /* fix right-hand side if needed */
239597b48c8fSBarry Smith if (x && b) {
23969566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx));
23979566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb));
239856777dd2SBarry Smith vecs = PETSC_TRUE;
239997b48c8fSBarry Smith }
240097b48c8fSBarry Smith
240197b48c8fSBarry Smith /* zero the columns */
24029566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
240397b48c8fSBarry Smith for (i = 0; i < is_n; i++) {
24045f80ce2aSJacob Faibussowitsch PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
240597b48c8fSBarry Smith zeroed[is_idx[i]] = PETSC_TRUE;
240697b48c8fSBarry Smith }
240797b48c8fSBarry Smith for (i = 0; i < A->rmap->N; i++) {
240897b48c8fSBarry Smith if (!zeroed[i]) {
240997b48c8fSBarry Smith row = i / bs;
241097b48c8fSBarry Smith for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
241197b48c8fSBarry Smith for (k = 0; k < bs; k++) {
241297b48c8fSBarry Smith col = bs * baij->j[j] + k;
241397b48c8fSBarry Smith if (zeroed[col]) {
2414835f2295SStefano Zampini aa = baij->a + j * bs2 + (i % bs) + bs * k;
241556777dd2SBarry Smith if (vecs) bb[i] -= aa[0] * xx[col];
241697b48c8fSBarry Smith aa[0] = 0.0;
241797b48c8fSBarry Smith }
241897b48c8fSBarry Smith }
241997b48c8fSBarry Smith }
242056777dd2SBarry Smith } else if (vecs) bb[i] = diag * xx[i];
242197b48c8fSBarry Smith }
24229566063dSJacob Faibussowitsch PetscCall(PetscFree(zeroed));
242356777dd2SBarry Smith if (vecs) {
24249566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx));
24259566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb));
242656777dd2SBarry Smith }
242797b48c8fSBarry Smith
242897b48c8fSBarry Smith /* zero the rows */
242997b48c8fSBarry Smith for (i = 0; i < is_n; i++) {
243097b48c8fSBarry Smith row = is_idx[i];
243197b48c8fSBarry Smith count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2432835f2295SStefano Zampini aa = baij->a + baij->i[row / bs] * bs2 + (row % bs);
243397b48c8fSBarry Smith for (k = 0; k < count; k++) {
243497b48c8fSBarry Smith aa[0] = zero;
243597b48c8fSBarry Smith aa += bs;
243697b48c8fSBarry Smith }
2437dbbe0bcdSBarry Smith if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
243897b48c8fSBarry Smith }
24399566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
24403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
244197b48c8fSBarry Smith }
244297b48c8fSBarry Smith
MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)2443d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2444d71ae5a4SJacob Faibussowitsch {
24452d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2446e2ee6c50SBarry Smith PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2447c1ac3661SBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2448d0f46423SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2449c1ac3661SBarry Smith PetscInt ridx, cidx, bs2 = a->bs2;
2450ace3abfcSBarry Smith PetscBool roworiented = a->roworiented;
2451d8cdefa3SHong Zhang MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap;
24522d61bbb3SSatish Balay
24532d61bbb3SSatish Balay PetscFunctionBegin;
24542d61bbb3SSatish Balay for (k = 0; k < m; k++) { /* loop over added rows */
2455085a36d4SBarry Smith row = im[k];
2456085a36d4SBarry Smith brow = row / bs;
24575ef9f2a5SBarry Smith if (row < 0) continue;
24586bdcaf15SBarry Smith PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
24598e3a54c0SPierre Jolivet rp = PetscSafePointerPlusOffset(aj, ai[brow]);
24608e3a54c0SPierre Jolivet if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
24612d61bbb3SSatish Balay rmax = imax[brow];
24622d61bbb3SSatish Balay nrow = ailen[brow];
24632d61bbb3SSatish Balay low = 0;
2464c71e6ed7SBarry Smith high = nrow;
24652d61bbb3SSatish Balay for (l = 0; l < n; l++) { /* loop over added columns */
24665ef9f2a5SBarry Smith if (in[l] < 0) continue;
24676bdcaf15SBarry Smith PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
24689371c9d4SSatish Balay col = in[l];
24699371c9d4SSatish Balay bcol = col / bs;
24709371c9d4SSatish Balay ridx = row % bs;
24719371c9d4SSatish Balay cidx = col % bs;
2472672ba085SHong Zhang if (!A->structure_only) {
24732d61bbb3SSatish Balay if (roworiented) {
24745ef9f2a5SBarry Smith value = v[l + k * n];
24752d61bbb3SSatish Balay } else {
24762d61bbb3SSatish Balay value = v[k + l * m];
24772d61bbb3SSatish Balay }
2478672ba085SHong Zhang }
24799371c9d4SSatish Balay if (col <= lastcol) low = 0;
24809371c9d4SSatish Balay else high = nrow;
2481e2ee6c50SBarry Smith lastcol = col;
24822d61bbb3SSatish Balay while (high - low > 7) {
24832d61bbb3SSatish Balay t = (low + high) / 2;
24842d61bbb3SSatish Balay if (rp[t] > bcol) high = t;
24852d61bbb3SSatish Balay else low = t;
24862d61bbb3SSatish Balay }
24872d61bbb3SSatish Balay for (i = low; i < high; i++) {
24882d61bbb3SSatish Balay if (rp[i] > bcol) break;
24892d61bbb3SSatish Balay if (rp[i] == bcol) {
24908e3a54c0SPierre Jolivet bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx);
2491672ba085SHong Zhang if (!A->structure_only) {
24922d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value;
24932d61bbb3SSatish Balay else *bap = value;
2494672ba085SHong Zhang }
24952d61bbb3SSatish Balay goto noinsert1;
24962d61bbb3SSatish Balay }
24972d61bbb3SSatish Balay }
24982d61bbb3SSatish Balay if (nonew == 1) goto noinsert1;
24995f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2500672ba085SHong Zhang if (A->structure_only) {
2501672ba085SHong Zhang MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2502672ba085SHong Zhang } else {
2503fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2504672ba085SHong Zhang }
25059371c9d4SSatish Balay N = nrow++ - 1;
25069371c9d4SSatish Balay high++;
25072d61bbb3SSatish Balay /* shift up all the later entries in this row */
25089566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
25092d61bbb3SSatish Balay rp[i] = bcol;
2510580bdb30SBarry Smith if (!A->structure_only) {
25119566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
25129566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2513580bdb30SBarry Smith ap[bs2 * i + bs * cidx + ridx] = value;
2514580bdb30SBarry Smith }
2515085a36d4SBarry Smith a->nz++;
25162d61bbb3SSatish Balay noinsert1:;
25172d61bbb3SSatish Balay low = i;
25182d61bbb3SSatish Balay }
25192d61bbb3SSatish Balay ailen[brow] = nrow;
25202d61bbb3SSatish Balay }
25213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
25222d61bbb3SSatish Balay }
25232d61bbb3SSatish Balay
MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo * info)2524ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2525d71ae5a4SJacob Faibussowitsch {
25262d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
25272d61bbb3SSatish Balay Mat outA;
2528ace3abfcSBarry Smith PetscBool row_identity, col_identity;
25292d61bbb3SSatish Balay
25302d61bbb3SSatish Balay PetscFunctionBegin;
25315f80ce2aSJacob Faibussowitsch PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
25329566063dSJacob Faibussowitsch PetscCall(ISIdentity(row, &row_identity));
25339566063dSJacob Faibussowitsch PetscCall(ISIdentity(col, &col_identity));
25345f80ce2aSJacob Faibussowitsch PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
25352d61bbb3SSatish Balay
25362d61bbb3SSatish Balay outA = inA;
2537d5f3da31SBarry Smith inA->factortype = MAT_FACTOR_LU;
25389566063dSJacob Faibussowitsch PetscCall(PetscFree(inA->solvertype));
25399566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
25402d61bbb3SSatish Balay
25419566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)row));
25429566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->row));
2543c3122656SLisandro Dalcin a->row = row;
25449566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)col));
25459566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->col));
2546c3122656SLisandro Dalcin a->col = col;
2547c38d4ed2SBarry Smith
2548c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
25499566063dSJacob Faibussowitsch PetscCall(ISDestroy(&a->icol));
25509566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2551c38d4ed2SBarry Smith
25529566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2553aa624791SPierre Jolivet if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
25549566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(outA, inA, info));
25553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
25562d61bbb3SSatish Balay }
2557d9b7c43dSSatish Balay
MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,const PetscInt * indices)2558ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2559d71ae5a4SJacob Faibussowitsch {
256027a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
256127a8da17SBarry Smith
256227a8da17SBarry Smith PetscFunctionBegin;
2563ff6a9541SJacob Faibussowitsch baij->nz = baij->maxnz;
2564ff6a9541SJacob Faibussowitsch PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2565ff6a9541SJacob Faibussowitsch PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
25663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
256727a8da17SBarry Smith }
256827a8da17SBarry Smith
256927a8da17SBarry Smith /*@
2570d8a51d2aSBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix.
257127a8da17SBarry Smith
257227a8da17SBarry Smith Input Parameters:
257311a5261eSBarry Smith + mat - the `MATSEQBAIJ` matrix
2574d8a51d2aSBarry Smith - indices - the block column indices
257527a8da17SBarry Smith
257615091d37SBarry Smith Level: advanced
257715091d37SBarry Smith
257827a8da17SBarry Smith Notes:
257927a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the
258027a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance
258111a5261eSBarry Smith of the `MatSetValues()` operation.
258227a8da17SBarry Smith
258327a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to
258411a5261eSBarry Smith `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
258527a8da17SBarry Smith
258611a5261eSBarry Smith MUST be called before any calls to `MatSetValues()`
258727a8da17SBarry Smith
25881cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
258927a8da17SBarry Smith @*/
MatSeqBAIJSetColumnIndices(Mat mat,PetscInt * indices)2590d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2591d71ae5a4SJacob Faibussowitsch {
259227a8da17SBarry Smith PetscFunctionBegin;
25930700a824SBarry Smith PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
25944f572ea9SToby Isaac PetscAssertPointer(indices, 2);
2595810441c8SPierre Jolivet PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices));
25963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
259727a8da17SBarry Smith }
259827a8da17SBarry Smith
MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])259966976f2fSJacob Faibussowitsch static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2600d71ae5a4SJacob Faibussowitsch {
2601273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2602c1ac3661SBarry Smith PetscInt i, j, n, row, bs, *ai, *aj, mbs;
2603273d9f13SBarry Smith PetscReal atmp;
260487828ca2SBarry Smith PetscScalar *x, zero = 0.0;
2605273d9f13SBarry Smith MatScalar *aa;
2606c1ac3661SBarry Smith PetscInt ncols, brow, krow, kcol;
2607273d9f13SBarry Smith
2608273d9f13SBarry Smith PetscFunctionBegin;
26095f80ce2aSJacob Faibussowitsch PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2610d0f46423SBarry Smith bs = A->rmap->bs;
2611273d9f13SBarry Smith aa = a->a;
2612273d9f13SBarry Smith ai = a->i;
2613273d9f13SBarry Smith aj = a->j;
2614273d9f13SBarry Smith mbs = a->mbs;
2615273d9f13SBarry Smith
26169566063dSJacob Faibussowitsch PetscCall(VecSet(v, zero));
26179566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &x));
26189566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(v, &n));
26195f80ce2aSJacob Faibussowitsch PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2620273d9f13SBarry Smith for (i = 0; i < mbs; i++) {
26219371c9d4SSatish Balay ncols = ai[1] - ai[0];
26229371c9d4SSatish Balay ai++;
2623273d9f13SBarry Smith brow = bs * i;
2624273d9f13SBarry Smith for (j = 0; j < ncols; j++) {
2625273d9f13SBarry Smith for (kcol = 0; kcol < bs; kcol++) {
2626273d9f13SBarry Smith for (krow = 0; krow < bs; krow++) {
26279371c9d4SSatish Balay atmp = PetscAbsScalar(*aa);
26289371c9d4SSatish Balay aa++;
2629273d9f13SBarry Smith row = brow + krow; /* row index */
26309371c9d4SSatish Balay if (PetscAbsScalar(x[row]) < atmp) {
26319371c9d4SSatish Balay x[row] = atmp;
26329371c9d4SSatish Balay if (idx) idx[row] = bs * (*aj) + kcol;
26339371c9d4SSatish Balay }
2634273d9f13SBarry Smith }
2635273d9f13SBarry Smith }
2636273d9f13SBarry Smith aj++;
2637273d9f13SBarry Smith }
2638273d9f13SBarry Smith }
26399566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &x));
26403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2641273d9f13SBarry Smith }
2642273d9f13SBarry Smith
MatGetRowSumAbs_SeqBAIJ(Mat A,Vec v)2643eede4a3fSMark Adams static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v)
2644eede4a3fSMark Adams {
2645eede4a3fSMark Adams Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2646eede4a3fSMark Adams PetscInt i, j, n, row, bs, *ai, mbs;
2647eede4a3fSMark Adams PetscReal atmp;
2648eede4a3fSMark Adams PetscScalar *x, zero = 0.0;
2649eede4a3fSMark Adams MatScalar *aa;
2650eede4a3fSMark Adams PetscInt ncols, brow, krow, kcol;
2651eede4a3fSMark Adams
2652eede4a3fSMark Adams PetscFunctionBegin;
2653eede4a3fSMark Adams PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2654eede4a3fSMark Adams bs = A->rmap->bs;
2655eede4a3fSMark Adams aa = a->a;
2656eede4a3fSMark Adams ai = a->i;
2657eede4a3fSMark Adams mbs = a->mbs;
2658eede4a3fSMark Adams
2659eede4a3fSMark Adams PetscCall(VecSet(v, zero));
2660eede4a3fSMark Adams PetscCall(VecGetArrayWrite(v, &x));
2661eede4a3fSMark Adams PetscCall(VecGetLocalSize(v, &n));
2662eede4a3fSMark Adams PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2663eede4a3fSMark Adams for (i = 0; i < mbs; i++) {
2664eede4a3fSMark Adams ncols = ai[1] - ai[0];
2665eede4a3fSMark Adams ai++;
2666eede4a3fSMark Adams brow = bs * i;
2667eede4a3fSMark Adams for (j = 0; j < ncols; j++) {
2668eede4a3fSMark Adams for (kcol = 0; kcol < bs; kcol++) {
2669eede4a3fSMark Adams for (krow = 0; krow < bs; krow++) {
2670eede4a3fSMark Adams atmp = PetscAbsScalar(*aa);
2671eede4a3fSMark Adams aa++;
2672eede4a3fSMark Adams row = brow + krow; /* row index */
2673eede4a3fSMark Adams x[row] += atmp;
2674eede4a3fSMark Adams }
2675eede4a3fSMark Adams }
2676eede4a3fSMark Adams }
2677eede4a3fSMark Adams }
2678eede4a3fSMark Adams PetscCall(VecRestoreArrayWrite(v, &x));
2679eede4a3fSMark Adams PetscFunctionReturn(PETSC_SUCCESS);
2680eede4a3fSMark Adams }
2681eede4a3fSMark Adams
MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)268266976f2fSJacob Faibussowitsch static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2683d71ae5a4SJacob Faibussowitsch {
26843c896bc6SHong Zhang PetscFunctionBegin;
26853c896bc6SHong Zhang /* If the two matrices have the same copy implementation, use fast copy. */
26863c896bc6SHong Zhang if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
26873c896bc6SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
26883c896bc6SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
2689d88c0aacSHong Zhang PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
26903c896bc6SHong Zhang
26915f80ce2aSJacob Faibussowitsch PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
26925f80ce2aSJacob Faibussowitsch PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
26939566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
26949566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B));
26953c896bc6SHong Zhang } else {
26969566063dSJacob Faibussowitsch PetscCall(MatCopy_Basic(A, B, str));
26973c896bc6SHong Zhang }
26983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
26993c896bc6SHong Zhang }
27003c896bc6SHong Zhang
MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar * array[])2701d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2702d71ae5a4SJacob Faibussowitsch {
2703f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
27046e111a19SKarl Rupp
2705f2a5309cSSatish Balay PetscFunctionBegin;
2706f2a5309cSSatish Balay *array = a->a;
27073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2708f2a5309cSSatish Balay }
2709f2a5309cSSatish Balay
MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar * array[])2710d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2711d71ae5a4SJacob Faibussowitsch {
2712f2a5309cSSatish Balay PetscFunctionBegin;
2713cda14afcSprj- *array = NULL;
27143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2715f2a5309cSSatish Balay }
2716f2a5309cSSatish Balay
MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt * nnz)2717d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2718d71ae5a4SJacob Faibussowitsch {
2719b264fe52SHong Zhang PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
272052768537SHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
272152768537SHong Zhang Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
272252768537SHong Zhang
272352768537SHong Zhang PetscFunctionBegin;
272452768537SHong Zhang /* Set the number of nonzeros in the new matrix */
27259566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
27263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
272752768537SHong Zhang }
272852768537SHong Zhang
MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)2729d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2730d71ae5a4SJacob Faibussowitsch {
273142ee4b1aSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
273231ce2d13SHong Zhang PetscInt bs = Y->rmap->bs, bs2 = bs * bs;
2733e838b9e7SJed Brown PetscBLASInt one = 1;
273442ee4b1aSHong Zhang
273542ee4b1aSHong Zhang PetscFunctionBegin;
2736134adf20SPierre Jolivet if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2737134adf20SPierre Jolivet PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2738134adf20SPierre Jolivet if (e) {
27399566063dSJacob Faibussowitsch PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2740134adf20SPierre Jolivet if (e) {
27419566063dSJacob Faibussowitsch PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2742134adf20SPierre Jolivet if (e) str = SAME_NONZERO_PATTERN;
2743134adf20SPierre Jolivet }
2744134adf20SPierre Jolivet }
274554c59aa7SJacob Faibussowitsch if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2746134adf20SPierre Jolivet }
274742ee4b1aSHong Zhang if (str == SAME_NONZERO_PATTERN) {
2748f4df32b1SMatthew Knepley PetscScalar alpha = a;
2749c5df96a5SBarry Smith PetscBLASInt bnz;
27509566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2751792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
27529566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2753ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
27549566063dSJacob Faibussowitsch PetscCall(MatAXPY_Basic(Y, a, X, str));
275542ee4b1aSHong Zhang } else {
275652768537SHong Zhang Mat B;
275752768537SHong Zhang PetscInt *nnz;
275854c59aa7SJacob Faibussowitsch PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
27599566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
27609566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
27619566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
27629566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
27639566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
27649566063dSJacob Faibussowitsch PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
27659566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
27669566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
27679566063dSJacob Faibussowitsch PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
27689566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(Y, &B));
27699566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz));
277042ee4b1aSHong Zhang }
27713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
277242ee4b1aSHong Zhang }
277342ee4b1aSHong Zhang
MatConjugate_SeqBAIJ(Mat A)2774d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2775d71ae5a4SJacob Faibussowitsch {
27762726fb6dSPierre Jolivet Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
27772726fb6dSPierre Jolivet PetscInt i, nz = a->bs2 * a->i[a->mbs];
27782726fb6dSPierre Jolivet MatScalar *aa = a->a;
27792726fb6dSPierre Jolivet
27802726fb6dSPierre Jolivet PetscFunctionBegin;
27812726fb6dSPierre Jolivet for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
27823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
27832726fb6dSPierre Jolivet }
27842726fb6dSPierre Jolivet
MatRealPart_SeqBAIJ(Mat A)2785ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2786d71ae5a4SJacob Faibussowitsch {
2787ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
278899cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
278999cafbc1SBarry Smith PetscInt i, nz = a->bs2 * a->i[a->mbs];
2790dd6ea824SBarry Smith MatScalar *aa = a->a;
279199cafbc1SBarry Smith
279299cafbc1SBarry Smith PetscFunctionBegin;
279399cafbc1SBarry Smith for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
27943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2795ff6a9541SJacob Faibussowitsch #else
2796ff6a9541SJacob Faibussowitsch (void)A;
2797ff6a9541SJacob Faibussowitsch return PETSC_SUCCESS;
2798ff6a9541SJacob Faibussowitsch #endif
279999cafbc1SBarry Smith }
280099cafbc1SBarry Smith
MatImaginaryPart_SeqBAIJ(Mat A)2801ff6a9541SJacob Faibussowitsch static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2802d71ae5a4SJacob Faibussowitsch {
2803ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
280499cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
280599cafbc1SBarry Smith PetscInt i, nz = a->bs2 * a->i[a->mbs];
2806dd6ea824SBarry Smith MatScalar *aa = a->a;
280799cafbc1SBarry Smith
280899cafbc1SBarry Smith PetscFunctionBegin;
280999cafbc1SBarry Smith for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
28103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2811ff6a9541SJacob Faibussowitsch #else
2812ff6a9541SJacob Faibussowitsch (void)A;
2813ff6a9541SJacob Faibussowitsch return PETSC_SUCCESS;
2814ff6a9541SJacob Faibussowitsch #endif
281599cafbc1SBarry Smith }
281699cafbc1SBarry Smith
28173acb8795SBarry Smith /*
28182479783cSJose E. Roman Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
28193acb8795SBarry Smith */
MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * nn,const PetscInt * ia[],const PetscInt * ja[],PetscBool * done)2820ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2821d71ae5a4SJacob Faibussowitsch {
28223acb8795SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
28233acb8795SBarry Smith PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
28243acb8795SBarry Smith PetscInt nz = a->i[m], row, *jj, mr, col;
28253acb8795SBarry Smith
28263acb8795SBarry Smith PetscFunctionBegin;
28273acb8795SBarry Smith *nn = n;
28283ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
28295f80ce2aSJacob Faibussowitsch PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
28309566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(n, &collengths));
28319566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &cia));
28329566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cja));
28333acb8795SBarry Smith jj = a->j;
2834ad540459SPierre Jolivet for (i = 0; i < nz; i++) collengths[jj[i]]++;
28353acb8795SBarry Smith cia[0] = oshift;
2836ad540459SPierre Jolivet for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
28379566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(collengths, n));
28383acb8795SBarry Smith jj = a->j;
28393acb8795SBarry Smith for (row = 0; row < m; row++) {
28403acb8795SBarry Smith mr = a->i[row + 1] - a->i[row];
28413acb8795SBarry Smith for (i = 0; i < mr; i++) {
28423acb8795SBarry Smith col = *jj++;
284326fbe8dcSKarl Rupp
28443acb8795SBarry Smith cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
28453acb8795SBarry Smith }
28463acb8795SBarry Smith }
28479566063dSJacob Faibussowitsch PetscCall(PetscFree(collengths));
28489371c9d4SSatish Balay *ia = cia;
28499371c9d4SSatish Balay *ja = cja;
28503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
28513acb8795SBarry Smith }
28523acb8795SBarry Smith
MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * n,const PetscInt * ia[],const PetscInt * ja[],PetscBool * done)2853ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2854d71ae5a4SJacob Faibussowitsch {
28553acb8795SBarry Smith PetscFunctionBegin;
28563ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
28579566063dSJacob Faibussowitsch PetscCall(PetscFree(*ia));
28589566063dSJacob Faibussowitsch PetscCall(PetscFree(*ja));
28593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
28603acb8795SBarry Smith }
28613acb8795SBarry Smith
2862525d23c0SHong Zhang /*
2863525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2864525d23c0SHong Zhang MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2865040ebd07SHong Zhang spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2866525d23c0SHong Zhang */
MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * nn,const PetscInt * ia[],const PetscInt * ja[],PetscInt * spidx[],PetscBool * done)2867d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2868d71ae5a4SJacob Faibussowitsch {
2869525d23c0SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2870c0349474SHong Zhang PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2871525d23c0SHong Zhang PetscInt nz = a->i[m], row, *jj, mr, col;
2872525d23c0SHong Zhang PetscInt *cspidx;
2873f6d58c54SBarry Smith
2874f6d58c54SBarry Smith PetscFunctionBegin;
2875525d23c0SHong Zhang *nn = n;
28763ba16761SJacob Faibussowitsch if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2877f6d58c54SBarry Smith
28789566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(n, &collengths));
28799566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &cia));
28809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cja));
28819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &cspidx));
2882525d23c0SHong Zhang jj = a->j;
2883ad540459SPierre Jolivet for (i = 0; i < nz; i++) collengths[jj[i]]++;
2884525d23c0SHong Zhang cia[0] = oshift;
2885ad540459SPierre Jolivet for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
28869566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(collengths, n));
2887525d23c0SHong Zhang jj = a->j;
2888525d23c0SHong Zhang for (row = 0; row < m; row++) {
2889525d23c0SHong Zhang mr = a->i[row + 1] - a->i[row];
2890525d23c0SHong Zhang for (i = 0; i < mr; i++) {
2891525d23c0SHong Zhang col = *jj++;
2892525d23c0SHong Zhang cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2893525d23c0SHong Zhang cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2894525d23c0SHong Zhang }
2895525d23c0SHong Zhang }
28969566063dSJacob Faibussowitsch PetscCall(PetscFree(collengths));
2897071fcb05SBarry Smith *ia = cia;
2898071fcb05SBarry Smith *ja = cja;
2899525d23c0SHong Zhang *spidx = cspidx;
29003ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2901f6d58c54SBarry Smith }
2902f6d58c54SBarry Smith
MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * n,const PetscInt * ia[],const PetscInt * ja[],PetscInt * spidx[],PetscBool * done)2903d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2904d71ae5a4SJacob Faibussowitsch {
2905525d23c0SHong Zhang PetscFunctionBegin;
29069566063dSJacob Faibussowitsch PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
29079566063dSJacob Faibussowitsch PetscCall(PetscFree(*spidx));
29083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2909f6d58c54SBarry Smith }
291099cafbc1SBarry Smith
MatShift_SeqBAIJ(Mat Y,PetscScalar a)291166976f2fSJacob Faibussowitsch static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2912d71ae5a4SJacob Faibussowitsch {
29137d68702bSBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
29147d68702bSBarry Smith
29157d68702bSBarry Smith PetscFunctionBegin;
291648a46eb9SPierre Jolivet if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
29179566063dSJacob Faibussowitsch PetscCall(MatShift_Basic(Y, a));
29183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
29197d68702bSBarry Smith }
29207d68702bSBarry Smith
MatEliminateZeros_SeqBAIJ(Mat A,PetscBool keep)292117ea310bSPierre Jolivet PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
292217ea310bSPierre Jolivet {
292317ea310bSPierre Jolivet Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
292417ea310bSPierre Jolivet PetscInt fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
292517ea310bSPierre Jolivet PetscInt m = A->rmap->N, *ailen = a->ilen;
292617ea310bSPierre Jolivet PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0;
292717ea310bSPierre Jolivet MatScalar *aa = a->a, *ap;
292817ea310bSPierre Jolivet PetscBool zero;
292917ea310bSPierre Jolivet
293017ea310bSPierre Jolivet PetscFunctionBegin;
293117ea310bSPierre Jolivet PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
293217ea310bSPierre Jolivet if (m) rmax = ailen[0];
293317ea310bSPierre Jolivet for (i = 1; i <= mbs; i++) {
293417ea310bSPierre Jolivet for (k = ai[i - 1]; k < ai[i]; k++) {
293517ea310bSPierre Jolivet zero = PETSC_TRUE;
293617ea310bSPierre Jolivet ap = aa + bs2 * k;
293717ea310bSPierre Jolivet for (j = 0; j < bs2 && zero; j++) {
293817ea310bSPierre Jolivet if (ap[j] != 0.0) zero = PETSC_FALSE;
293917ea310bSPierre Jolivet }
294017ea310bSPierre Jolivet if (zero && (aj[k] != i - 1 || !keep)) fshift++;
294117ea310bSPierre Jolivet else {
294217ea310bSPierre Jolivet if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
294317ea310bSPierre Jolivet aj[k - fshift] = aj[k];
294417ea310bSPierre Jolivet PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
294517ea310bSPierre Jolivet }
294617ea310bSPierre Jolivet }
294717ea310bSPierre Jolivet ai[i - 1] -= fshift_prev;
294817ea310bSPierre Jolivet fshift_prev = fshift;
294917ea310bSPierre Jolivet ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
295017ea310bSPierre Jolivet a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
295117ea310bSPierre Jolivet rmax = PetscMax(rmax, ailen[i - 1]);
295217ea310bSPierre Jolivet }
295317ea310bSPierre Jolivet if (fshift) {
295417ea310bSPierre Jolivet if (mbs) {
295517ea310bSPierre Jolivet ai[mbs] -= fshift;
295617ea310bSPierre Jolivet a->nz = ai[mbs];
295717ea310bSPierre Jolivet }
295817ea310bSPierre Jolivet PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
295917ea310bSPierre Jolivet A->nonzerostate++;
296017ea310bSPierre Jolivet A->info.nz_unneeded += (PetscReal)fshift;
296117ea310bSPierre Jolivet a->rmax = rmax;
296217ea310bSPierre Jolivet PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
296317ea310bSPierre Jolivet PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
296417ea310bSPierre Jolivet }
296517ea310bSPierre Jolivet PetscFunctionReturn(PETSC_SUCCESS);
296617ea310bSPierre Jolivet }
296717ea310bSPierre Jolivet
2968dec0b466SHong Zhang static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2969cc2dc46cSBarry Smith MatGetRow_SeqBAIJ,
2970cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ,
2971cc2dc46cSBarry Smith MatMult_SeqBAIJ_N,
297297304618SKris Buschelman /* 4*/ MatMultAdd_SeqBAIJ_N,
29737c922b88SBarry Smith MatMultTranspose_SeqBAIJ,
29747c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ,
2975f4259b30SLisandro Dalcin NULL,
2976f4259b30SLisandro Dalcin NULL,
2977f4259b30SLisandro Dalcin NULL,
2978f4259b30SLisandro Dalcin /* 10*/ NULL,
2979cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ,
2980f4259b30SLisandro Dalcin NULL,
2981f4259b30SLisandro Dalcin NULL,
2982f2501298SSatish Balay MatTranspose_SeqBAIJ,
298397304618SKris Buschelman /* 15*/ MatGetInfo_SeqBAIJ,
2984cc2dc46cSBarry Smith MatEqual_SeqBAIJ,
2985cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ,
2986cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ,
2987cc2dc46cSBarry Smith MatNorm_SeqBAIJ,
2988f4259b30SLisandro Dalcin /* 20*/ NULL,
2989cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ,
2990cc2dc46cSBarry Smith MatSetOption_SeqBAIJ,
2991cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ,
2992d519adbfSMatthew Knepley /* 24*/ MatZeroRows_SeqBAIJ,
2993f4259b30SLisandro Dalcin NULL,
2994f4259b30SLisandro Dalcin NULL,
2995f4259b30SLisandro Dalcin NULL,
2996f4259b30SLisandro Dalcin NULL,
299726cec326SBarry Smith /* 29*/ MatSetUp_Seq_Hash,
2998f4259b30SLisandro Dalcin NULL,
2999f4259b30SLisandro Dalcin NULL,
3000f4259b30SLisandro Dalcin NULL,
3001f4259b30SLisandro Dalcin NULL,
3002d519adbfSMatthew Knepley /* 34*/ MatDuplicate_SeqBAIJ,
3003f4259b30SLisandro Dalcin NULL,
3004f4259b30SLisandro Dalcin NULL,
3005cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ,
3006f4259b30SLisandro Dalcin NULL,
3007d519adbfSMatthew Knepley /* 39*/ MatAXPY_SeqBAIJ,
30087dae84e0SHong Zhang MatCreateSubMatrices_SeqBAIJ,
3009cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ,
3010cc2dc46cSBarry Smith MatGetValues_SeqBAIJ,
30113c896bc6SHong Zhang MatCopy_SeqBAIJ,
3012f4259b30SLisandro Dalcin /* 44*/ NULL,
3013cc2dc46cSBarry Smith MatScale_SeqBAIJ,
30147d68702bSBarry Smith MatShift_SeqBAIJ,
3015f4259b30SLisandro Dalcin NULL,
301697b48c8fSBarry Smith MatZeroRowsColumns_SeqBAIJ,
3017f4259b30SLisandro Dalcin /* 49*/ NULL,
30183b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ,
301992c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ,
30203acb8795SBarry Smith MatGetColumnIJ_SeqBAIJ,
30213acb8795SBarry Smith MatRestoreColumnIJ_SeqBAIJ,
302293dfae19SHong Zhang /* 54*/ MatFDColoringCreate_SeqXAIJ,
3023f4259b30SLisandro Dalcin NULL,
3024f4259b30SLisandro Dalcin NULL,
3025090001bdSToby Isaac NULL,
3026d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ,
30277dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3028b9b97703SBarry Smith MatDestroy_SeqBAIJ,
3029b9b97703SBarry Smith MatView_SeqBAIJ,
3030f4259b30SLisandro Dalcin NULL,
3031f4259b30SLisandro Dalcin NULL,
3032f4259b30SLisandro Dalcin /* 64*/ NULL,
3033f4259b30SLisandro Dalcin NULL,
3034f4259b30SLisandro Dalcin NULL,
3035f4259b30SLisandro Dalcin NULL,
30368bb0f5c6SPierre Jolivet MatGetRowMaxAbs_SeqBAIJ,
30378bb0f5c6SPierre Jolivet /* 69*/ NULL,
3038c87e5d42SMatthew Knepley MatConvert_Basic,
3039f4259b30SLisandro Dalcin NULL,
3040f6d58c54SBarry Smith MatFDColoringApply_BAIJ,
3041f4259b30SLisandro Dalcin NULL,
30428bb0f5c6SPierre Jolivet /* 74*/ NULL,
3043f4259b30SLisandro Dalcin NULL,
3044f4259b30SLisandro Dalcin NULL,
3045f4259b30SLisandro Dalcin NULL,
30465bba2384SShri Abhyankar MatLoad_SeqBAIJ,
30478bb0f5c6SPierre Jolivet /* 79*/ NULL,
30488bb0f5c6SPierre Jolivet NULL,
30498bb0f5c6SPierre Jolivet NULL,
30508bb0f5c6SPierre Jolivet NULL,
30518bb0f5c6SPierre Jolivet NULL,
3052f4259b30SLisandro Dalcin /* 84*/ NULL,
3053f4259b30SLisandro Dalcin NULL,
3054f4259b30SLisandro Dalcin NULL,
3055f4259b30SLisandro Dalcin NULL,
3056f4259b30SLisandro Dalcin NULL,
3057f4259b30SLisandro Dalcin /* 89*/ NULL,
3058f4259b30SLisandro Dalcin NULL,
3059f4259b30SLisandro Dalcin NULL,
3060f4259b30SLisandro Dalcin NULL,
30618bb0f5c6SPierre Jolivet MatConjugate_SeqBAIJ,
3062f4259b30SLisandro Dalcin /* 94*/ NULL,
3063f4259b30SLisandro Dalcin NULL,
30648bb0f5c6SPierre Jolivet MatRealPart_SeqBAIJ,
30658bb0f5c6SPierre Jolivet MatImaginaryPart_SeqBAIJ,
3066f4259b30SLisandro Dalcin NULL,
3067f4259b30SLisandro Dalcin /* 99*/ NULL,
3068f4259b30SLisandro Dalcin NULL,
3069f4259b30SLisandro Dalcin NULL,
3070f4259b30SLisandro Dalcin NULL,
30718bb0f5c6SPierre Jolivet NULL,
3072*421480d9SBarry Smith /*104*/ NULL,
30738bb0f5c6SPierre Jolivet NULL,
30748bb0f5c6SPierre Jolivet NULL,
3075f4259b30SLisandro Dalcin NULL,
3076f4259b30SLisandro Dalcin NULL,
3077f4259b30SLisandro Dalcin /*109*/ NULL,
3078f4259b30SLisandro Dalcin NULL,
3079547795f9SHong Zhang MatMultHermitianTranspose_SeqBAIJ,
3080d6037b41SHong Zhang MatMultHermitianTransposeAdd_SeqBAIJ,
3081f4259b30SLisandro Dalcin NULL,
3082*421480d9SBarry Smith /*114*/ NULL,
3083857cbf51SRichard Tran Mills MatGetColumnReductions_SeqBAIJ,
30843964eb88SJed Brown MatInvertBlockDiagonal_SeqBAIJ,
3085f4259b30SLisandro Dalcin NULL,
3086*421480d9SBarry Smith NULL,
30878bb0f5c6SPierre Jolivet /*119*/ NULL,
3088f4259b30SLisandro Dalcin NULL,
3089f4259b30SLisandro Dalcin NULL,
3090f4259b30SLisandro Dalcin NULL,
3091f4259b30SLisandro Dalcin NULL,
30928bb0f5c6SPierre Jolivet /*124*/ NULL,
30938bb0f5c6SPierre Jolivet NULL,
30948bb0f5c6SPierre Jolivet MatSetBlockSizes_Default,
30958bb0f5c6SPierre Jolivet NULL,
3096*421480d9SBarry Smith MatFDColoringSetUp_SeqXAIJ,
3097*421480d9SBarry Smith /*129*/ NULL,
30988bb0f5c6SPierre Jolivet MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
30998bb0f5c6SPierre Jolivet MatDestroySubMatrices_SeqBAIJ,
31008bb0f5c6SPierre Jolivet NULL,
3101f4259b30SLisandro Dalcin NULL,
3102*421480d9SBarry Smith /*134*/ NULL,
3103f4259b30SLisandro Dalcin NULL,
3104eede4a3fSMark Adams MatEliminateZeros_SeqBAIJ,
31054cc2b5b5SPierre Jolivet MatGetRowSumAbs_SeqBAIJ,
310642ce410bSJunchao Zhang NULL,
3107*421480d9SBarry Smith /*139*/ NULL,
310842ce410bSJunchao Zhang NULL,
310903db1824SAlex Lindsay MatCopyHashToXAIJ_Seq_Hash,
3110c2be7ffeSStefano Zampini NULL,
311103db1824SAlex Lindsay NULL};
31122593348eSBarry Smith
MatStoreValues_SeqBAIJ(Mat mat)3113ff6a9541SJacob Faibussowitsch static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3114d71ae5a4SJacob Faibussowitsch {
31153e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
31168ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs] * aij->bs2;
31173e90b805SBarry Smith
31183e90b805SBarry Smith PetscFunctionBegin;
31195f80ce2aSJacob Faibussowitsch PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
31203e90b805SBarry Smith
31213e90b805SBarry Smith /* allocate space for values if not already there */
3122ff6a9541SJacob Faibussowitsch if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
31233e90b805SBarry Smith
31243e90b805SBarry Smith /* copy values over */
31259566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
31263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
31273e90b805SBarry Smith }
31283e90b805SBarry Smith
MatRetrieveValues_SeqBAIJ(Mat mat)3129ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3130d71ae5a4SJacob Faibussowitsch {
31313e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
31328ece6314SShri Abhyankar PetscInt nz = aij->i[aij->mbs] * aij->bs2;
31333e90b805SBarry Smith
31343e90b805SBarry Smith PetscFunctionBegin;
31355f80ce2aSJacob Faibussowitsch PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
31365f80ce2aSJacob Faibussowitsch PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
31373e90b805SBarry Smith
31383e90b805SBarry Smith /* copy values over */
31399566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
31403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
31413e90b805SBarry Smith }
31423e90b805SBarry Smith
3143cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3144cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3145273d9f13SBarry Smith
MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])3146f9663b93SPierre Jolivet PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3147d71ae5a4SJacob Faibussowitsch {
3148ad79cf63SBarry Smith Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3149535b19f3SBarry Smith PetscInt i, mbs, nbs, bs2;
31508afaa268SBarry Smith PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3151a23d5eceSKris Buschelman
3152a23d5eceSKris Buschelman PetscFunctionBegin;
3153ad79cf63SBarry Smith if (B->hash_active) {
3154ad79cf63SBarry Smith PetscInt bs;
3155aea10558SJacob Faibussowitsch B->ops[0] = b->cops;
3156ad79cf63SBarry Smith PetscCall(PetscHMapIJVDestroy(&b->ht));
3157ad79cf63SBarry Smith PetscCall(MatGetBlockSize(B, &bs));
3158ad79cf63SBarry Smith if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3159ad79cf63SBarry Smith PetscCall(PetscFree(b->dnz));
3160ad79cf63SBarry Smith PetscCall(PetscFree(b->bdnz));
3161ad79cf63SBarry Smith B->hash_active = PETSC_FALSE;
3162ad79cf63SBarry Smith }
31632576faa2SJed Brown if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3164ab93d7beSBarry Smith if (nz == MAT_SKIP_ALLOCATION) {
3165ab93d7beSBarry Smith skipallocation = PETSC_TRUE;
3166ab93d7beSBarry Smith nz = 0;
3167ab93d7beSBarry Smith }
31688c07d4e3SBarry Smith
316958b7e2c1SStefano Zampini PetscCall(MatSetBlockSize(B, bs));
31709566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap));
31719566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap));
31729566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3173899cda47SBarry Smith
3174899cda47SBarry Smith B->preallocated = PETSC_TRUE;
3175899cda47SBarry Smith
3176d0f46423SBarry Smith mbs = B->rmap->n / bs;
3177d0f46423SBarry Smith nbs = B->cmap->n / bs;
3178a23d5eceSKris Buschelman bs2 = bs * bs;
3179a23d5eceSKris Buschelman
31805f80ce2aSJacob Faibussowitsch PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3181a23d5eceSKris Buschelman
3182a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
31835f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3184a23d5eceSKris Buschelman if (nnz) {
3185a23d5eceSKris Buschelman for (i = 0; i < mbs; i++) {
31865f80ce2aSJacob Faibussowitsch PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
31875f80ce2aSJacob Faibussowitsch PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3188a23d5eceSKris Buschelman }
3189a23d5eceSKris Buschelman }
3190a23d5eceSKris Buschelman
3191d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
31929566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3193d0609cedSBarry Smith PetscOptionsEnd();
31948c07d4e3SBarry Smith
3195a23d5eceSKris Buschelman if (!flg) {
3196a23d5eceSKris Buschelman switch (bs) {
3197a23d5eceSKris Buschelman case 1:
3198a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_1;
3199a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3200a23d5eceSKris Buschelman break;
3201a23d5eceSKris Buschelman case 2:
3202a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_2;
3203a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3204a23d5eceSKris Buschelman break;
3205a23d5eceSKris Buschelman case 3:
3206a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_3;
3207a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3208a23d5eceSKris Buschelman break;
3209a23d5eceSKris Buschelman case 4:
3210a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_4;
3211a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3212a23d5eceSKris Buschelman break;
3213a23d5eceSKris Buschelman case 5:
3214a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_5;
3215a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3216a23d5eceSKris Buschelman break;
3217a23d5eceSKris Buschelman case 6:
3218a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_6;
3219a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3220a23d5eceSKris Buschelman break;
3221a23d5eceSKris Buschelman case 7:
3222a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_7;
3223a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3224a23d5eceSKris Buschelman break;
32259371c9d4SSatish Balay case 9: {
32266679dcc1SBarry Smith PetscInt version = 1;
32279566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
32286679dcc1SBarry Smith switch (version) {
32295f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
32306679dcc1SBarry Smith case 1:
323196e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_9_AVX2;
323296e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3233835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32346679dcc1SBarry Smith break;
32356679dcc1SBarry Smith #endif
32366679dcc1SBarry Smith default:
323796e086a2SDaniel Kokron B->ops->mult = MatMult_SeqBAIJ_N;
323896e086a2SDaniel Kokron B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3239835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
324096e086a2SDaniel Kokron break;
32416679dcc1SBarry Smith }
32426679dcc1SBarry Smith break;
32436679dcc1SBarry Smith }
3244ebada01fSBarry Smith case 11:
3245ebada01fSBarry Smith B->ops->mult = MatMult_SeqBAIJ_11;
3246ebada01fSBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3247ebada01fSBarry Smith break;
32489371c9d4SSatish Balay case 12: {
32496679dcc1SBarry Smith PetscInt version = 1;
32509566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
32516679dcc1SBarry Smith switch (version) {
32526679dcc1SBarry Smith case 1:
32536679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver1;
32546679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3255835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32568ab949d8SShri Abhyankar break;
32576679dcc1SBarry Smith case 2:
32586679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_ver2;
32596679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3260835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32616679dcc1SBarry Smith break;
32626679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
32636679dcc1SBarry Smith case 3:
32646679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_12_AVX2;
32656679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3266835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32676679dcc1SBarry Smith break;
32686679dcc1SBarry Smith #endif
3269a23d5eceSKris Buschelman default:
3270a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_N;
3271a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3272835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32736679dcc1SBarry Smith break;
32746679dcc1SBarry Smith }
32756679dcc1SBarry Smith break;
32766679dcc1SBarry Smith }
32779371c9d4SSatish Balay case 15: {
32786679dcc1SBarry Smith PetscInt version = 1;
32799566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
32806679dcc1SBarry Smith switch (version) {
32816679dcc1SBarry Smith case 1:
32826679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3283835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32846679dcc1SBarry Smith break;
32856679dcc1SBarry Smith case 2:
32866679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3287835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32886679dcc1SBarry Smith break;
32896679dcc1SBarry Smith case 3:
32906679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3291835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32926679dcc1SBarry Smith break;
32936679dcc1SBarry Smith case 4:
32946679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3295835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32966679dcc1SBarry Smith break;
32976679dcc1SBarry Smith default:
32986679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N;
3299835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
33006679dcc1SBarry Smith break;
33016679dcc1SBarry Smith }
33026679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N;
33036679dcc1SBarry Smith break;
33046679dcc1SBarry Smith }
33056679dcc1SBarry Smith default:
33066679dcc1SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N;
33076679dcc1SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3308835f2295SStefano Zampini PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3309a23d5eceSKris Buschelman break;
3310a23d5eceSKris Buschelman }
3311a23d5eceSKris Buschelman }
3312e48d15efSToby Isaac B->ops->sor = MatSOR_SeqBAIJ;
3313a23d5eceSKris Buschelman b->mbs = mbs;
3314a23d5eceSKris Buschelman b->nbs = nbs;
3315ab93d7beSBarry Smith if (!skipallocation) {
33162ee49352SLisandro Dalcin if (!b->imax) {
33179566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
331826fbe8dcSKarl Rupp
33194fd072dbSBarry Smith b->free_imax_ilen = PETSC_TRUE;
33202ee49352SLisandro Dalcin }
3321ab93d7beSBarry Smith /* b->ilen will count nonzeros in each block row so far. */
332226fbe8dcSKarl Rupp for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3323a23d5eceSKris Buschelman if (!nnz) {
3324a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3325c62bd62aSJed Brown else if (nz < 0) nz = 1;
33265d2a9ed1SStefano Zampini nz = PetscMin(nz, nbs);
3327a23d5eceSKris Buschelman for (i = 0; i < mbs; i++) b->imax[i] = nz;
33289566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(nz, mbs, &nz));
3329a23d5eceSKris Buschelman } else {
3330c73702f5SBarry Smith PetscInt64 nz64 = 0;
33319371c9d4SSatish Balay for (i = 0; i < mbs; i++) {
33329371c9d4SSatish Balay b->imax[i] = nnz[i];
33339371c9d4SSatish Balay nz64 += nnz[i];
33349371c9d4SSatish Balay }
33359566063dSJacob Faibussowitsch PetscCall(PetscIntCast(nz64, &nz));
3336a23d5eceSKris Buschelman }
3337a23d5eceSKris Buschelman
3338a23d5eceSKris Buschelman /* allocate the matrix space */
33399566063dSJacob Faibussowitsch PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
33409f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j));
33419f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i));
3342672ba085SHong Zhang if (B->structure_only) {
33439f0612e4SBarry Smith b->free_a = PETSC_FALSE;
3344672ba085SHong Zhang } else {
33456679dcc1SBarry Smith PetscInt nzbs2 = 0;
33469566063dSJacob Faibussowitsch PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
33479f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a));
33489f0612e4SBarry Smith b->free_a = PETSC_TRUE;
33491766d9c3SPierre Jolivet PetscCall(PetscArrayzero(b->a, nzbs2));
3350672ba085SHong Zhang }
3351672ba085SHong Zhang b->free_ij = PETSC_TRUE;
33529f0612e4SBarry Smith PetscCall(PetscArrayzero(b->j, nz));
3353672ba085SHong Zhang
3354a23d5eceSKris Buschelman b->i[0] = 0;
3355ad540459SPierre Jolivet for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3356e811da20SHong Zhang } else {
3357e6b907acSBarry Smith b->free_a = PETSC_FALSE;
3358e6b907acSBarry Smith b->free_ij = PETSC_FALSE;
3359ab93d7beSBarry Smith }
3360a23d5eceSKris Buschelman
3361a23d5eceSKris Buschelman b->bs2 = bs2;
3362a23d5eceSKris Buschelman b->mbs = mbs;
3363a23d5eceSKris Buschelman b->nz = 0;
3364b32cb4a7SJed Brown b->maxnz = nz;
3365b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3366cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE;
3367cb7b82ddSBarry Smith B->assembled = PETSC_FALSE;
33689566063dSJacob Faibussowitsch if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
33693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3370a23d5eceSKris Buschelman }
3371a23d5eceSKris Buschelman
MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])337266976f2fSJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3373d71ae5a4SJacob Faibussowitsch {
3374725b52f3SLisandro Dalcin PetscInt i, m, nz, nz_max = 0, *nnz;
3375f4259b30SLisandro Dalcin PetscScalar *values = NULL;
3376d47bf9aaSJed Brown PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3377725b52f3SLisandro Dalcin
3378725b52f3SLisandro Dalcin PetscFunctionBegin;
33795f80ce2aSJacob Faibussowitsch PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
33809566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
33819566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
33829566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap));
33839566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap));
33849566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3385d0f46423SBarry Smith m = B->rmap->n / bs;
3386725b52f3SLisandro Dalcin
33875f80ce2aSJacob Faibussowitsch PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
33889566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &nnz));
3389725b52f3SLisandro Dalcin for (i = 0; i < m; i++) {
3390cf12db73SBarry Smith nz = ii[i + 1] - ii[i];
33915f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3392725b52f3SLisandro Dalcin nz_max = PetscMax(nz_max, nz);
3393725b52f3SLisandro Dalcin nnz[i] = nz;
3394725b52f3SLisandro Dalcin }
33959566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
33969566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz));
3397725b52f3SLisandro Dalcin
3398725b52f3SLisandro Dalcin values = (PetscScalar *)V;
339948a46eb9SPierre Jolivet if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3400725b52f3SLisandro Dalcin for (i = 0; i < m; i++) {
3401cf12db73SBarry Smith PetscInt ncols = ii[i + 1] - ii[i];
3402cf12db73SBarry Smith const PetscInt *icols = jj + ii[i];
3403bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) {
3404cf12db73SBarry Smith const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
34059566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
34063adadaf3SJed Brown } else {
34073adadaf3SJed Brown PetscInt j;
34083adadaf3SJed Brown for (j = 0; j < ncols; j++) {
34093adadaf3SJed Brown const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
34109566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
34113adadaf3SJed Brown }
34123adadaf3SJed Brown }
3413725b52f3SLisandro Dalcin }
34149566063dSJacob Faibussowitsch if (!V) PetscCall(PetscFree(values));
34159566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
34169566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
34179566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
34183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3419725b52f3SLisandro Dalcin }
3420725b52f3SLisandro Dalcin
3421cda14afcSprj- /*@C
342211a5261eSBarry Smith MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3423cda14afcSprj-
3424cda14afcSprj- Not Collective
3425cda14afcSprj-
3426cda14afcSprj- Input Parameter:
3427fe59aa6dSJacob Faibussowitsch . A - a `MATSEQBAIJ` matrix
3428cda14afcSprj-
3429cda14afcSprj- Output Parameter:
3430cda14afcSprj- . array - pointer to the data
3431cda14afcSprj-
3432cda14afcSprj- Level: intermediate
3433cda14afcSprj-
34341cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3435cda14afcSprj- @*/
MatSeqBAIJGetArray(Mat A,PetscScalar * array[])34365d83a8b1SBarry Smith PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[])
3437d71ae5a4SJacob Faibussowitsch {
3438cda14afcSprj- PetscFunctionBegin;
3439cac4c232SBarry Smith PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
34403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3441cda14afcSprj- }
3442cda14afcSprj-
3443cda14afcSprj- /*@C
344411a5261eSBarry Smith MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3445cda14afcSprj-
3446cda14afcSprj- Not Collective
3447cda14afcSprj-
3448cda14afcSprj- Input Parameters:
3449fe59aa6dSJacob Faibussowitsch + A - a `MATSEQBAIJ` matrix
3450cda14afcSprj- - array - pointer to the data
3451cda14afcSprj-
3452cda14afcSprj- Level: intermediate
3453cda14afcSprj-
34541cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3455cda14afcSprj- @*/
MatSeqBAIJRestoreArray(Mat A,PetscScalar * array[])34565d83a8b1SBarry Smith PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[])
3457d71ae5a4SJacob Faibussowitsch {
3458cda14afcSprj- PetscFunctionBegin;
3459cac4c232SBarry Smith PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
34603ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3461cda14afcSprj- }
3462cda14afcSprj-
34630bad9183SKris Buschelman /*MC
3464fafad747SKris Buschelman MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
34650bad9183SKris Buschelman block sparse compressed row format.
34660bad9183SKris Buschelman
34670bad9183SKris Buschelman Options Database Keys:
346820f4b53cSBarry Smith + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
34696679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
34700bad9183SKris Buschelman
34710bad9183SKris Buschelman Level: beginner
34720cd7f59aSBarry Smith
34730cd7f59aSBarry Smith Notes:
347411a5261eSBarry Smith `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
347511a5261eSBarry Smith space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
34760bad9183SKris Buschelman
34772ef1f0ffSBarry Smith Run with `-info` to see what version of the matrix-vector product is being used
34786679dcc1SBarry Smith
34791cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
34800bad9183SKris Buschelman M*/
34810bad9183SKris Buschelman
3482cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3483b24902e0SBarry Smith
MatCreate_SeqBAIJ(Mat B)3484d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3485d71ae5a4SJacob Faibussowitsch {
3486c1ac3661SBarry Smith PetscMPIInt size;
3487b6490206SBarry Smith Mat_SeqBAIJ *b;
34883b2fbd54SBarry Smith
34893a40ed3dSBarry Smith PetscFunctionBegin;
34909566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
34915f80ce2aSJacob Faibussowitsch PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3492b6490206SBarry Smith
34934dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b));
3494b0a32e0cSBarry Smith B->data = (void *)b;
3495aea10558SJacob Faibussowitsch B->ops[0] = MatOps_Values;
349626fbe8dcSKarl Rupp
3497f4259b30SLisandro Dalcin b->row = NULL;
3498f4259b30SLisandro Dalcin b->col = NULL;
3499f4259b30SLisandro Dalcin b->icol = NULL;
35002593348eSBarry Smith b->reallocs = 0;
3501f4259b30SLisandro Dalcin b->saved_values = NULL;
35022593348eSBarry Smith
3503c4992f7dSBarry Smith b->roworiented = PETSC_TRUE;
35042593348eSBarry Smith b->nonew = 0;
3505f4259b30SLisandro Dalcin b->diag = NULL;
3506f4259b30SLisandro Dalcin B->spptr = NULL;
3507b32cb4a7SJed Brown B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2;
3508a9817697SBarry Smith b->keepnonzeropattern = PETSC_FALSE;
35094e220ebcSLois Curfman McInnes
35109566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
35119566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
35129566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
35139566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
35149566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
35159566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
35169566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
35179566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
35189566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
35199566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
35207ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
35219566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
35227ea3e4caSstefano_zampini #endif
35239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
35249566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
35253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
35262593348eSBarry Smith }
35272593348eSBarry Smith
MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)3528d6acfc2dSPierre Jolivet PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3529d71ae5a4SJacob Faibussowitsch {
3530b24902e0SBarry Smith Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3531a96a251dSBarry Smith PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3532de6a44a3SBarry Smith
35333a40ed3dSBarry Smith PetscFunctionBegin;
353431fe6a7dSBarry Smith PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
35355f80ce2aSJacob Faibussowitsch PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
35362593348eSBarry Smith
35374fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
35384fd072dbSBarry Smith c->imax = a->imax;
35394fd072dbSBarry Smith c->ilen = a->ilen;
35404fd072dbSBarry Smith c->free_imax_ilen = PETSC_FALSE;
35414fd072dbSBarry Smith } else {
35429566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3543b6490206SBarry Smith for (i = 0; i < mbs; i++) {
35442593348eSBarry Smith c->imax[i] = a->imax[i];
35452593348eSBarry Smith c->ilen[i] = a->ilen[i];
35462593348eSBarry Smith }
35474fd072dbSBarry Smith c->free_imax_ilen = PETSC_TRUE;
35484fd072dbSBarry Smith }
35492593348eSBarry Smith
35502593348eSBarry Smith /* allocate the matrix space */
355116a2bf60SHong Zhang if (mallocmatspace) {
35524fd072dbSBarry Smith if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
35539f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
35549f0612e4SBarry Smith PetscCall(PetscArrayzero(c->a, bs2 * nz));
35559f0612e4SBarry Smith c->free_a = PETSC_TRUE;
35564fd072dbSBarry Smith c->i = a->i;
35574fd072dbSBarry Smith c->j = a->j;
3558379be0ddSLisandro Dalcin c->free_ij = PETSC_FALSE;
35594fd072dbSBarry Smith c->parent = A;
35601e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE;
35611e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE;
356226fbe8dcSKarl Rupp
35639566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A));
35649566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
35659566063dSJacob Faibussowitsch PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
35664fd072dbSBarry Smith } else {
35679f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
35689f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j));
35699f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i));
3570379be0ddSLisandro Dalcin c->free_a = PETSC_TRUE;
35714fd072dbSBarry Smith c->free_ij = PETSC_TRUE;
357226fbe8dcSKarl Rupp
35739566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3574b6490206SBarry Smith if (mbs > 0) {
35759566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->j, a->j, nz));
35762e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) {
35779566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
35782e8a6d31SBarry Smith } else {
35799566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(c->a, bs2 * nz));
35802593348eSBarry Smith }
35812593348eSBarry Smith }
35821e40a84eSLisandro Dalcin C->preallocated = PETSC_TRUE;
35831e40a84eSLisandro Dalcin C->assembled = PETSC_TRUE;
358416a2bf60SHong Zhang }
35854fd072dbSBarry Smith }
358616a2bf60SHong Zhang
35872593348eSBarry Smith c->roworiented = a->roworiented;
35882593348eSBarry Smith c->nonew = a->nonew;
358926fbe8dcSKarl Rupp
35909566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
35919566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
359226fbe8dcSKarl Rupp
35935c9eb25fSBarry Smith c->bs2 = a->bs2;
35945c9eb25fSBarry Smith c->mbs = a->mbs;
35955c9eb25fSBarry Smith c->nbs = a->nbs;
35962593348eSBarry Smith c->nz = a->nz;
3597f2cbd3d5SJed Brown c->maxnz = a->nz; /* Since we allocate exactly the right amount */
3598f361c04dSBarry Smith c->solve_work = NULL;
3599f361c04dSBarry Smith c->mult_work = NULL;
3600f361c04dSBarry Smith c->sor_workt = NULL;
3601f361c04dSBarry Smith c->sor_work = NULL;
360288e51ccdSHong Zhang
360388e51ccdSHong Zhang c->compressedrow.use = a->compressedrow.use;
360488e51ccdSHong Zhang c->compressedrow.nrows = a->compressedrow.nrows;
3605cd6b891eSBarry Smith if (a->compressedrow.use) {
360688e51ccdSHong Zhang i = a->compressedrow.nrows;
36079566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
36089566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
36099566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
361088e51ccdSHong Zhang } else {
361188e51ccdSHong Zhang c->compressedrow.use = PETSC_FALSE;
36120298fd71SBarry Smith c->compressedrow.i = NULL;
36130298fd71SBarry Smith c->compressedrow.rindex = NULL;
361488e51ccdSHong Zhang }
3615c05f355bSMark Adams c->nonzerorowcnt = a->nonzerorowcnt;
3616e56f5c9eSBarry Smith C->nonzerostate = A->nonzerostate;
361726fbe8dcSKarl Rupp
36189566063dSJacob Faibussowitsch PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
36193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
36202593348eSBarry Smith }
36212593348eSBarry Smith
MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat * B)3622d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3623d71ae5a4SJacob Faibussowitsch {
3624b24902e0SBarry Smith PetscFunctionBegin;
36259566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
36269566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
36279566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQBAIJ));
36289566063dSJacob Faibussowitsch PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
36293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3630b24902e0SBarry Smith }
3631b24902e0SBarry Smith
3632618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)3633d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3634d71ae5a4SJacob Faibussowitsch {
3635b51a4376SLisandro Dalcin PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3636b51a4376SLisandro Dalcin PetscInt *rowidxs, *colidxs;
3637b51a4376SLisandro Dalcin PetscScalar *matvals;
3638b51a4376SLisandro Dalcin
3639b51a4376SLisandro Dalcin PetscFunctionBegin;
36409566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer));
3641b51a4376SLisandro Dalcin
3642b51a4376SLisandro Dalcin /* read matrix header */
36439566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
36445f80ce2aSJacob Faibussowitsch PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
36459371c9d4SSatish Balay M = header[1];
36469371c9d4SSatish Balay N = header[2];
36479371c9d4SSatish Balay nz = header[3];
36485f80ce2aSJacob Faibussowitsch PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
36495f80ce2aSJacob Faibussowitsch PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
36505f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3651b51a4376SLisandro Dalcin
3652b51a4376SLisandro Dalcin /* set block sizes from the viewer's .info file */
36539566063dSJacob Faibussowitsch PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3654b51a4376SLisandro Dalcin /* set local and global sizes if not set already */
3655b51a4376SLisandro Dalcin if (mat->rmap->n < 0) mat->rmap->n = M;
3656b51a4376SLisandro Dalcin if (mat->cmap->n < 0) mat->cmap->n = N;
3657b51a4376SLisandro Dalcin if (mat->rmap->N < 0) mat->rmap->N = M;
3658b51a4376SLisandro Dalcin if (mat->cmap->N < 0) mat->cmap->N = N;
36599566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->rmap));
36609566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->cmap));
3661b51a4376SLisandro Dalcin
3662b51a4376SLisandro Dalcin /* check if the matrix sizes are correct */
36639566063dSJacob Faibussowitsch PetscCall(MatGetSize(mat, &rows, &cols));
36645f80ce2aSJacob Faibussowitsch PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
36659566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(mat, &bs));
36669566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat, &m, &n));
36679371c9d4SSatish Balay mbs = m / bs;
36689371c9d4SSatish Balay nbs = n / bs;
3669b51a4376SLisandro Dalcin
3670b51a4376SLisandro Dalcin /* read in row lengths, column indices and nonzero values */
36719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &rowidxs));
36729566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
36739371c9d4SSatish Balay rowidxs[0] = 0;
36749371c9d4SSatish Balay for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3675b51a4376SLisandro Dalcin sum = rowidxs[m];
36765f80ce2aSJacob Faibussowitsch PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3677b51a4376SLisandro Dalcin
3678b51a4376SLisandro Dalcin /* read in column indices and nonzero values */
36799566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
36809566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
36819566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3682b51a4376SLisandro Dalcin
3683b51a4376SLisandro Dalcin { /* preallocate matrix storage */
3684b51a4376SLisandro Dalcin PetscBT bt; /* helper bit set to count nonzeros */
3685b51a4376SLisandro Dalcin PetscInt *nnz;
3686618cc2edSLisandro Dalcin PetscBool sbaij;
3687b51a4376SLisandro Dalcin
36889566063dSJacob Faibussowitsch PetscCall(PetscBTCreate(nbs, &bt));
36899566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(mbs, &nnz));
36909566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3691b51a4376SLisandro Dalcin for (i = 0; i < mbs; i++) {
36929566063dSJacob Faibussowitsch PetscCall(PetscBTMemzero(nbs, bt));
3693618cc2edSLisandro Dalcin for (k = 0; k < bs; k++) {
3694618cc2edSLisandro Dalcin PetscInt row = bs * i + k;
3695618cc2edSLisandro Dalcin for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3696618cc2edSLisandro Dalcin PetscInt col = colidxs[j];
3697618cc2edSLisandro Dalcin if (!sbaij || col >= row)
3698618cc2edSLisandro Dalcin if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3699618cc2edSLisandro Dalcin }
3700618cc2edSLisandro Dalcin }
3701b51a4376SLisandro Dalcin }
37029566063dSJacob Faibussowitsch PetscCall(PetscBTDestroy(&bt));
37039566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
37049566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
37059566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz));
3706b51a4376SLisandro Dalcin }
3707b51a4376SLisandro Dalcin
3708b51a4376SLisandro Dalcin /* store matrix values */
3709b51a4376SLisandro Dalcin for (i = 0; i < m; i++) {
3710b51a4376SLisandro Dalcin PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
37119927e4dfSBarry Smith PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3712b51a4376SLisandro Dalcin }
3713b51a4376SLisandro Dalcin
37149566063dSJacob Faibussowitsch PetscCall(PetscFree(rowidxs));
37159566063dSJacob Faibussowitsch PetscCall(PetscFree2(colidxs, matvals));
37169566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
37179566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
37183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3719b51a4376SLisandro Dalcin }
3720b51a4376SLisandro Dalcin
MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)3721d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3722d71ae5a4SJacob Faibussowitsch {
37237f489da9SVaclav Hapla PetscBool isbinary;
3724f501eaabSShri Abhyankar
3725f501eaabSShri Abhyankar PetscFunctionBegin;
37269566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
37275f80ce2aSJacob Faibussowitsch PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
37289566063dSJacob Faibussowitsch PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
37293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3730f501eaabSShri Abhyankar }
3731f501eaabSShri Abhyankar
37325d83a8b1SBarry Smith /*@
373311a5261eSBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3734273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the
373520f4b53cSBarry Smith user should preallocate the matrix storage by setting the parameter `nz`
373620f4b53cSBarry Smith (or the array `nnz`).
37372593348eSBarry Smith
3738d083f849SBarry Smith Collective
3739273d9f13SBarry Smith
3740273d9f13SBarry Smith Input Parameters:
374111a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF`
374211a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
374311a5261eSBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3744273d9f13SBarry Smith . m - number of rows
3745273d9f13SBarry Smith . n - number of columns
374635d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows)
374735d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows
374820f4b53cSBarry Smith (possibly different for each block row) or `NULL`
3749273d9f13SBarry Smith
3750273d9f13SBarry Smith Output Parameter:
3751273d9f13SBarry Smith . A - the matrix
3752273d9f13SBarry Smith
3753273d9f13SBarry Smith Options Database Keys:
375411a5261eSBarry Smith + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3755a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use
3756273d9f13SBarry Smith
3757273d9f13SBarry Smith Level: intermediate
3758273d9f13SBarry Smith
3759273d9f13SBarry Smith Notes:
376077433607SBarry Smith It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
37612ef1f0ffSBarry Smith MatXXXXSetPreallocation() paradigm instead of this routine directly.
37622ef1f0ffSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
37632ef1f0ffSBarry Smith
3764d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize.
3765d1be2dadSMatthew Knepley
37662ef1f0ffSBarry Smith If the `nnz` parameter is given then the `nz` parameter is ignored
376749a6f317SBarry Smith
376835d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it
376935d8aa7fSBarry Smith
37702ef1f0ffSBarry Smith The `MATSEQBAIJ` format is fully compatible with standard Fortran
3771273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at
377220f4b53cSBarry Smith either one (as in Fortran) or zero.
3773273d9f13SBarry Smith
37742ef1f0ffSBarry Smith Specify the preallocated storage with either `nz` or `nnz` (not both).
37752ef1f0ffSBarry Smith Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3776651615e1SBarry Smith allocation. See [Sparse Matrices](sec_matsparse) for details.
3777273d9f13SBarry Smith matrices.
3778273d9f13SBarry Smith
37791cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3780273d9f13SBarry Smith @*/
MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat * A)3781d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3782d71ae5a4SJacob Faibussowitsch {
3783273d9f13SBarry Smith PetscFunctionBegin;
37849566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A));
37859566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, m, n));
37869566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQBAIJ));
37879566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
37883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3789273d9f13SBarry Smith }
3790273d9f13SBarry Smith
37915d83a8b1SBarry Smith /*@
3792273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3793273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the
379420f4b53cSBarry Smith user should preallocate the matrix storage by setting the parameter `nz`
379520f4b53cSBarry Smith (or the array `nnz`).
3796273d9f13SBarry Smith
3797d083f849SBarry Smith Collective
3798273d9f13SBarry Smith
3799273d9f13SBarry Smith Input Parameters:
38001c4f3114SJed Brown + B - the matrix
380111a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
380211a5261eSBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3803273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows)
3804273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows
38052ef1f0ffSBarry Smith (possibly different for each block row) or `NULL`
3806273d9f13SBarry Smith
3807273d9f13SBarry Smith Options Database Keys:
380811a5261eSBarry Smith + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3809a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use
3810273d9f13SBarry Smith
3811273d9f13SBarry Smith Level: intermediate
3812273d9f13SBarry Smith
3813273d9f13SBarry Smith Notes:
38142ef1f0ffSBarry Smith If the `nnz` parameter is given then the `nz` parameter is ignored
381549a6f317SBarry Smith
381611a5261eSBarry Smith You can call `MatGetInfo()` to get information on how effective the preallocation was;
3817aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
381820f4b53cSBarry Smith You can also run with the option `-info` and look for messages with the string
3819aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed.
3820aa95bbe8SBarry Smith
38212ef1f0ffSBarry Smith The `MATSEQBAIJ` format is fully compatible with standard Fortran
3822273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at
382320f4b53cSBarry Smith either one (as in Fortran) or zero.
3824273d9f13SBarry Smith
3825d8a51d2aSBarry Smith Specify the preallocated storage with either `nz` or `nnz` (not both).
38262ef1f0ffSBarry Smith Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3827651615e1SBarry Smith allocation. See [Sparse Matrices](sec_matsparse) for details.
3828273d9f13SBarry Smith
38291cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3830273d9f13SBarry Smith @*/
MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])3831d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3832d71ae5a4SJacob Faibussowitsch {
3833273d9f13SBarry Smith PetscFunctionBegin;
38346ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
38356ba663aaSJed Brown PetscValidType(B, 1);
38366ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2);
3837cac4c232SBarry Smith PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
38383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3839273d9f13SBarry Smith }
3840a1d92eedSBarry Smith
3841725b52f3SLisandro Dalcin /*@C
384211a5261eSBarry Smith MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3843725b52f3SLisandro Dalcin
3844d083f849SBarry Smith Collective
3845725b52f3SLisandro Dalcin
3846725b52f3SLisandro Dalcin Input Parameters:
38471c4f3114SJed Brown + B - the matrix
384820f4b53cSBarry Smith . bs - the blocksize
3849d8a51d2aSBarry Smith . i - the indices into `j` for the start of each local row (indices start with zero)
3850d8a51d2aSBarry Smith . j - the column indices for each local row (indices start with zero) these must be sorted for each row
3851d8a51d2aSBarry Smith - v - optional values in the matrix, use `NULL` if not provided
3852725b52f3SLisandro Dalcin
3853664954b6SBarry Smith Level: advanced
3854725b52f3SLisandro Dalcin
38553adadaf3SJed Brown Notes:
3856d8a51d2aSBarry Smith The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()`
3857d8a51d2aSBarry Smith
385811a5261eSBarry Smith The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs
385911a5261eSBarry Smith may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
38603adadaf3SJed Brown over rows within a block and the last index is over columns within a block row. Fortran programs will likely set
386111a5261eSBarry Smith `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
38623adadaf3SJed Brown block column and the second index is over columns within a block.
38633adadaf3SJed Brown
3864664954b6SBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3865664954b6SBarry Smith
38661cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3867725b52f3SLisandro Dalcin @*/
MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[],const PetscScalar v[])3868d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3869d71ae5a4SJacob Faibussowitsch {
3870725b52f3SLisandro Dalcin PetscFunctionBegin;
38716ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
38726ba663aaSJed Brown PetscValidType(B, 1);
38736ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2);
3874cac4c232SBarry Smith PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
38753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3876725b52f3SLisandro Dalcin }
3877725b52f3SLisandro Dalcin
3878c75a6043SHong Zhang /*@
387911a5261eSBarry Smith MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3880c75a6043SHong Zhang
3881d083f849SBarry Smith Collective
3882c75a6043SHong Zhang
3883c75a6043SHong Zhang Input Parameters:
3884c75a6043SHong Zhang + comm - must be an MPI communicator of size 1
3885c75a6043SHong Zhang . bs - size of block
3886c75a6043SHong Zhang . m - number of rows
3887c75a6043SHong Zhang . n - number of columns
3888483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3889c75a6043SHong Zhang . j - column indices
3890c75a6043SHong Zhang - a - matrix values
3891c75a6043SHong Zhang
3892c75a6043SHong Zhang Output Parameter:
3893c75a6043SHong Zhang . mat - the matrix
3894c75a6043SHong Zhang
3895dfb205c3SBarry Smith Level: advanced
3896c75a6043SHong Zhang
3897c75a6043SHong Zhang Notes:
38982ef1f0ffSBarry Smith The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3899c75a6043SHong Zhang once the matrix is destroyed
3900c75a6043SHong Zhang
3901c75a6043SHong Zhang You cannot set new nonzero locations into this matrix, that will generate an error.
3902c75a6043SHong Zhang
39032ef1f0ffSBarry Smith The `i` and `j` indices are 0 based
3904c75a6043SHong Zhang
390511a5261eSBarry Smith When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3906dfb205c3SBarry Smith
39073adadaf3SJed Brown The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
39083adadaf3SJed Brown the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
39093adadaf3SJed Brown block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory
39103adadaf3SJed Brown with column-major ordering within blocks.
3911dfb205c3SBarry Smith
39121cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3913c75a6043SHong Zhang @*/
MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat * mat)3914d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3915d71ae5a4SJacob Faibussowitsch {
3916c75a6043SHong Zhang Mat_SeqBAIJ *baij;
3917c75a6043SHong Zhang
3918c75a6043SHong Zhang PetscFunctionBegin;
39195f80ce2aSJacob Faibussowitsch PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
39205f80ce2aSJacob Faibussowitsch if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3921c75a6043SHong Zhang
39229566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, mat));
39239566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*mat, m, n, m, n));
39249566063dSJacob Faibussowitsch PetscCall(MatSetType(*mat, MATSEQBAIJ));
39259566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3926c75a6043SHong Zhang baij = (Mat_SeqBAIJ *)(*mat)->data;
39279566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3928c75a6043SHong Zhang
3929c75a6043SHong Zhang baij->i = i;
3930c75a6043SHong Zhang baij->j = j;
3931c75a6043SHong Zhang baij->a = a;
393226fbe8dcSKarl Rupp
3933c75a6043SHong Zhang baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3934e6b907acSBarry Smith baij->free_a = PETSC_FALSE;
3935e6b907acSBarry Smith baij->free_ij = PETSC_FALSE;
3936ceb5bf51SJacob Faibussowitsch baij->free_imax_ilen = PETSC_TRUE;
3937c75a6043SHong Zhang
3938ceb5bf51SJacob Faibussowitsch for (PetscInt ii = 0; ii < m; ii++) {
3939ceb5bf51SJacob Faibussowitsch const PetscInt row_len = i[ii + 1] - i[ii];
3940ceb5bf51SJacob Faibussowitsch
3941ceb5bf51SJacob Faibussowitsch baij->ilen[ii] = baij->imax[ii] = row_len;
3942ceb5bf51SJacob Faibussowitsch PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
3943c75a6043SHong Zhang }
394476bd3646SJed Brown if (PetscDefined(USE_DEBUG)) {
3945ceb5bf51SJacob Faibussowitsch for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
39466bdcaf15SBarry Smith PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
39476bdcaf15SBarry Smith PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3948c75a6043SHong Zhang }
394976bd3646SJed Brown }
3950c75a6043SHong Zhang
39519566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
39529566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
39533ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3954c75a6043SHong Zhang }
3955bdf6f3fcSHong Zhang
MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)3956d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3957d71ae5a4SJacob Faibussowitsch {
3958bdf6f3fcSHong Zhang PetscFunctionBegin;
39599566063dSJacob Faibussowitsch PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
39603ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3961bdf6f3fcSHong Zhang }
3962