xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 27f169480a6668db3ba90f8ef6ef68d542d113fa)
12593348eSBarry Smith /*
2b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
32593348eSBarry Smith   matrix storage format.
42593348eSBarry Smith */
5c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
6c6db04a5SJed Brown #include <petscblaslapack.h>
7af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
8af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h>
943516a2dSKris Buschelman 
1026cec326SBarry Smith /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
1126cec326SBarry Smith #define TYPE BAIJ
1226cec326SBarry Smith #define TYPE_BS
1326cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
1426cec326SBarry Smith #undef TYPE_BS
1526cec326SBarry Smith #define TYPE_BS _BS
1626cec326SBarry Smith #define TYPE_BS_ON
1726cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
1826cec326SBarry Smith #undef TYPE_BS
1926cec326SBarry Smith #include "../src/mat/impls/aij/seq/seqhashmat.h"
2026cec326SBarry Smith #undef TYPE
2126cec326SBarry Smith #undef TYPE_BS_ON
2226cec326SBarry Smith 
237ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
247ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
257ea3e4caSstefano_zampini #endif
267ea3e4caSstefano_zampini 
27b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
28fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
29b5b72c8aSIrina Sokolova #endif
30c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
31b5b72c8aSIrina Sokolova 
32*421480d9SBarry Smith MatGetDiagonalMarkers(SeqBAIJ, A->rmap->bs)
33*421480d9SBarry Smith 
MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal * reductions)34ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
35d71ae5a4SJacob Faibussowitsch {
369463ebdaSPierre Jolivet   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
37ff6a9541SJacob Faibussowitsch   PetscInt     m, n, ib, jb, bs = A->rmap->bs;
389463ebdaSPierre Jolivet   MatScalar   *a_val = a_aij->a;
399463ebdaSPierre Jolivet 
409463ebdaSPierre Jolivet   PetscFunctionBegin;
419566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &m, &n));
42ff6a9541SJacob Faibussowitsch   PetscCall(PetscArrayzero(reductions, n));
439463ebdaSPierre Jolivet   if (type == NORM_2) {
44ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
459463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
469463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
47857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
489463ebdaSPierre Jolivet           a_val++;
499463ebdaSPierre Jolivet         }
509463ebdaSPierre Jolivet       }
519463ebdaSPierre Jolivet     }
529463ebdaSPierre Jolivet   } else if (type == NORM_1) {
53ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
549463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
559463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
56857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
579463ebdaSPierre Jolivet           a_val++;
589463ebdaSPierre Jolivet         }
599463ebdaSPierre Jolivet       }
609463ebdaSPierre Jolivet     }
619463ebdaSPierre Jolivet   } else if (type == NORM_INFINITY) {
62ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
639463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
649463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
656497c311SBarry Smith           PetscInt col    = A->cmap->rstart + a_aij->j[i] * bs + jb;
66857cbf51SRichard Tran Mills           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
679463ebdaSPierre Jolivet           a_val++;
689463ebdaSPierre Jolivet         }
699463ebdaSPierre Jolivet       }
709463ebdaSPierre Jolivet     }
71857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
72ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
73857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
74857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
75857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
76857cbf51SRichard Tran Mills           a_val++;
77857cbf51SRichard Tran Mills         }
78857cbf51SRichard Tran Mills       }
79857cbf51SRichard Tran Mills     }
80857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
81ff6a9541SJacob Faibussowitsch     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
82857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
83857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
84857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
85857cbf51SRichard Tran Mills           a_val++;
86857cbf51SRichard Tran Mills         }
87857cbf51SRichard Tran Mills       }
88857cbf51SRichard Tran Mills     }
89857cbf51SRichard Tran Mills   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
909463ebdaSPierre Jolivet   if (type == NORM_2) {
91ff6a9541SJacob Faibussowitsch     for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
92857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
93ff6a9541SJacob Faibussowitsch     for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
949463ebdaSPierre Jolivet   }
953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
969463ebdaSPierre Jolivet }
979463ebdaSPierre Jolivet 
MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar ** values)9866976f2fSJacob Faibussowitsch static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
99d71ae5a4SJacob Faibussowitsch {
100b01c7715SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data;
101*421480d9SBarry Smith   PetscInt        i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
1027f0c90edSBarry Smith   MatScalar      *v     = a->a, *odiag, *diag, work[25], *v_work;
10362bba022SBarry Smith   PetscReal       shift = 0.0;
1041a9391e3SHong Zhang   PetscBool       allowzeropivot, zeropivotdetected = PETSC_FALSE;
105*421480d9SBarry Smith   const PetscInt *adiag;
106b01c7715SBarry Smith 
107b01c7715SBarry Smith   PetscFunctionBegin;
108a455e926SHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
109a455e926SHong Zhang 
1109797317bSBarry Smith   if (a->idiagvalid) {
1119797317bSBarry Smith     if (values) *values = a->idiag;
1123ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
1139797317bSBarry Smith   }
114*421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &adiag, NULL));
1153a7d0413SPierre Jolivet   if (!a->idiag) PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag));
116b01c7715SBarry Smith   diag = a->idiag;
117bbead8a2SBarry Smith   if (values) *values = a->idiag;
118b01c7715SBarry Smith   /* factor and invert each block */
119521d7252SBarry Smith   switch (bs) {
120ab040260SJed Brown   case 1:
121ab040260SJed Brown     for (i = 0; i < mbs; i++) {
122*421480d9SBarry Smith       odiag   = v + 1 * adiag[i];
123ab040260SJed Brown       diag[0] = odiag[0];
124ec1892c8SHong Zhang 
125ec1892c8SHong Zhang       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
126966bd95aSPierre Jolivet         PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
1277b6c816cSBarry Smith         A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1287b6c816cSBarry Smith         A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
1297b6c816cSBarry Smith         A->factorerror_zeropivot_row   = i;
1309566063dSJacob Faibussowitsch         PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
131ec1892c8SHong Zhang       }
132ec1892c8SHong Zhang 
133d4a378daSJed Brown       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
134ab040260SJed Brown       diag += 1;
135ab040260SJed Brown     }
136ab040260SJed Brown     break;
137b01c7715SBarry Smith   case 2:
138b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
139*421480d9SBarry Smith       odiag   = v + 4 * adiag[i];
1409371c9d4SSatish Balay       diag[0] = odiag[0];
1419371c9d4SSatish Balay       diag[1] = odiag[1];
1429371c9d4SSatish Balay       diag[2] = odiag[2];
1439371c9d4SSatish Balay       diag[3] = odiag[3];
1449566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1457b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
146b01c7715SBarry Smith       diag += 4;
147b01c7715SBarry Smith     }
148b01c7715SBarry Smith     break;
149b01c7715SBarry Smith   case 3:
150b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
151*421480d9SBarry Smith       odiag   = v + 9 * adiag[i];
1529371c9d4SSatish Balay       diag[0] = odiag[0];
1539371c9d4SSatish Balay       diag[1] = odiag[1];
1549371c9d4SSatish Balay       diag[2] = odiag[2];
1559371c9d4SSatish Balay       diag[3] = odiag[3];
1569371c9d4SSatish Balay       diag[4] = odiag[4];
1579371c9d4SSatish Balay       diag[5] = odiag[5];
1589371c9d4SSatish Balay       diag[6] = odiag[6];
1599371c9d4SSatish Balay       diag[7] = odiag[7];
160b01c7715SBarry Smith       diag[8] = odiag[8];
1619566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
1627b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163b01c7715SBarry Smith       diag += 9;
164b01c7715SBarry Smith     }
165b01c7715SBarry Smith     break;
166b01c7715SBarry Smith   case 4:
167b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
168*421480d9SBarry Smith       odiag = v + 16 * adiag[i];
1699566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 16));
1709566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
1717b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
172b01c7715SBarry Smith       diag += 16;
173b01c7715SBarry Smith     }
174b01c7715SBarry Smith     break;
175b01c7715SBarry Smith   case 5:
176b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
177*421480d9SBarry Smith       odiag = v + 25 * adiag[i];
1789566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 25));
1799566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
1807b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
181b01c7715SBarry Smith       diag += 25;
182b01c7715SBarry Smith     }
183b01c7715SBarry Smith     break;
184d49b2adcSBarry Smith   case 6:
185d49b2adcSBarry Smith     for (i = 0; i < mbs; i++) {
186*421480d9SBarry Smith       odiag = v + 36 * adiag[i];
1879566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 36));
1889566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
1897b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
190d49b2adcSBarry Smith       diag += 36;
191d49b2adcSBarry Smith     }
192d49b2adcSBarry Smith     break;
193de80f912SBarry Smith   case 7:
194de80f912SBarry Smith     for (i = 0; i < mbs; i++) {
195*421480d9SBarry Smith       odiag = v + 49 * adiag[i];
1969566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 49));
1979566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
1987b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
199de80f912SBarry Smith       diag += 49;
200de80f912SBarry Smith     }
201de80f912SBarry Smith     break;
202b01c7715SBarry Smith   default:
2039566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
204de80f912SBarry Smith     for (i = 0; i < mbs; i++) {
205*421480d9SBarry Smith       odiag = v + bs2 * adiag[i];
2069566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, bs2));
2079566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
2087b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
209de80f912SBarry Smith       diag += bs2;
210de80f912SBarry Smith     }
2119566063dSJacob Faibussowitsch     PetscCall(PetscFree2(v_work, v_pivots));
212b01c7715SBarry Smith   }
213b01c7715SBarry Smith   a->idiagvalid = PETSC_TRUE;
2143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
215b01c7715SBarry Smith }
216b01c7715SBarry Smith 
MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)21766976f2fSJacob Faibussowitsch static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
218d71ae5a4SJacob Faibussowitsch {
2196d3beeddSMatthew Knepley   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
220e48d15efSToby Isaac   PetscScalar       *x, *work, *w, *workt, *t;
221e48d15efSToby Isaac   const MatScalar   *v, *aa = a->a, *idiag;
222e48d15efSToby Isaac   const PetscScalar *b, *xb;
2235455b99fSToby Isaac   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
224e48d15efSToby Isaac   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
225c1ac3661SBarry Smith   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
226b01c7715SBarry Smith 
227b01c7715SBarry Smith   PetscFunctionBegin;
228b01c7715SBarry Smith   its = its * lits;
2295f80ce2aSJacob Faibussowitsch   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
2305f80ce2aSJacob Faibussowitsch   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2315f80ce2aSJacob Faibussowitsch   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
2325f80ce2aSJacob Faibussowitsch   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
2335f80ce2aSJacob Faibussowitsch   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
234b01c7715SBarry Smith 
2359566063dSJacob Faibussowitsch   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
236b01c7715SBarry Smith 
2373ba16761SJacob Faibussowitsch   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
238b01c7715SBarry Smith   diag  = a->diag;
239b01c7715SBarry Smith   idiag = a->idiag;
240de80f912SBarry Smith   k     = PetscMax(A->rmap->n, A->cmap->n);
24148a46eb9SPierre Jolivet   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
24248a46eb9SPierre Jolivet   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
24348a46eb9SPierre Jolivet   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
2443475c22fSBarry Smith   work = a->mult_work;
2453475c22fSBarry Smith   t    = a->sor_workt;
246de80f912SBarry Smith   w    = a->sor_work;
247de80f912SBarry Smith 
2489566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
2499566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
250de80f912SBarry Smith 
251de80f912SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
252de80f912SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
253e48d15efSToby Isaac       switch (bs) {
254e48d15efSToby Isaac       case 1:
255e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
256e48d15efSToby Isaac         t[0] = b[0];
257e48d15efSToby Isaac         i2   = 1;
258e48d15efSToby Isaac         idiag += 1;
259e48d15efSToby Isaac         for (i = 1; i < m; i++) {
260e48d15efSToby Isaac           v    = aa + ai[i];
261e48d15efSToby Isaac           vi   = aj + ai[i];
262e48d15efSToby Isaac           nz   = diag[i] - ai[i];
263e48d15efSToby Isaac           s[0] = b[i2];
264e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
265e48d15efSToby Isaac             xw[0] = x[vi[j]];
266e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
267e48d15efSToby Isaac           }
268e48d15efSToby Isaac           t[i2] = s[0];
269e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
270e48d15efSToby Isaac           x[i2] = xw[0];
271e48d15efSToby Isaac           idiag += 1;
272e48d15efSToby Isaac           i2 += 1;
273e48d15efSToby Isaac         }
274e48d15efSToby Isaac         break;
275e48d15efSToby Isaac       case 2:
276e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
2779371c9d4SSatish Balay         t[0] = b[0];
2789371c9d4SSatish Balay         t[1] = b[1];
279e48d15efSToby Isaac         i2   = 2;
280e48d15efSToby Isaac         idiag += 4;
281e48d15efSToby Isaac         for (i = 1; i < m; i++) {
282e48d15efSToby Isaac           v    = aa + 4 * ai[i];
283e48d15efSToby Isaac           vi   = aj + ai[i];
284e48d15efSToby Isaac           nz   = diag[i] - ai[i];
2859371c9d4SSatish Balay           s[0] = b[i2];
2869371c9d4SSatish Balay           s[1] = b[i2 + 1];
287e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
288e48d15efSToby Isaac             idx   = 2 * vi[j];
289e48d15efSToby Isaac             it    = 4 * j;
2909371c9d4SSatish Balay             xw[0] = x[idx];
2919371c9d4SSatish Balay             xw[1] = x[1 + idx];
292e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
293e48d15efSToby Isaac           }
2949371c9d4SSatish Balay           t[i2]     = s[0];
2959371c9d4SSatish Balay           t[i2 + 1] = s[1];
296e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
2979371c9d4SSatish Balay           x[i2]     = xw[0];
2989371c9d4SSatish Balay           x[i2 + 1] = xw[1];
299e48d15efSToby Isaac           idiag += 4;
300e48d15efSToby Isaac           i2 += 2;
301e48d15efSToby Isaac         }
302e48d15efSToby Isaac         break;
303e48d15efSToby Isaac       case 3:
304e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
3059371c9d4SSatish Balay         t[0] = b[0];
3069371c9d4SSatish Balay         t[1] = b[1];
3079371c9d4SSatish Balay         t[2] = b[2];
308e48d15efSToby Isaac         i2   = 3;
309e48d15efSToby Isaac         idiag += 9;
310e48d15efSToby Isaac         for (i = 1; i < m; i++) {
311e48d15efSToby Isaac           v    = aa + 9 * ai[i];
312e48d15efSToby Isaac           vi   = aj + ai[i];
313e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3149371c9d4SSatish Balay           s[0] = b[i2];
3159371c9d4SSatish Balay           s[1] = b[i2 + 1];
3169371c9d4SSatish Balay           s[2] = b[i2 + 2];
317e48d15efSToby Isaac           while (nz--) {
318e48d15efSToby Isaac             idx   = 3 * (*vi++);
3199371c9d4SSatish Balay             xw[0] = x[idx];
3209371c9d4SSatish Balay             xw[1] = x[1 + idx];
3219371c9d4SSatish Balay             xw[2] = x[2 + idx];
322e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
323e48d15efSToby Isaac             v += 9;
324e48d15efSToby Isaac           }
3259371c9d4SSatish Balay           t[i2]     = s[0];
3269371c9d4SSatish Balay           t[i2 + 1] = s[1];
3279371c9d4SSatish Balay           t[i2 + 2] = s[2];
328e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
3299371c9d4SSatish Balay           x[i2]     = xw[0];
3309371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3319371c9d4SSatish Balay           x[i2 + 2] = xw[2];
332e48d15efSToby Isaac           idiag += 9;
333e48d15efSToby Isaac           i2 += 3;
334e48d15efSToby Isaac         }
335e48d15efSToby Isaac         break;
336e48d15efSToby Isaac       case 4:
337e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
3389371c9d4SSatish Balay         t[0] = b[0];
3399371c9d4SSatish Balay         t[1] = b[1];
3409371c9d4SSatish Balay         t[2] = b[2];
3419371c9d4SSatish Balay         t[3] = b[3];
342e48d15efSToby Isaac         i2   = 4;
343e48d15efSToby Isaac         idiag += 16;
344e48d15efSToby Isaac         for (i = 1; i < m; i++) {
345e48d15efSToby Isaac           v    = aa + 16 * ai[i];
346e48d15efSToby Isaac           vi   = aj + ai[i];
347e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3489371c9d4SSatish Balay           s[0] = b[i2];
3499371c9d4SSatish Balay           s[1] = b[i2 + 1];
3509371c9d4SSatish Balay           s[2] = b[i2 + 2];
3519371c9d4SSatish Balay           s[3] = b[i2 + 3];
352e48d15efSToby Isaac           while (nz--) {
353e48d15efSToby Isaac             idx   = 4 * (*vi++);
3549371c9d4SSatish Balay             xw[0] = x[idx];
3559371c9d4SSatish Balay             xw[1] = x[1 + idx];
3569371c9d4SSatish Balay             xw[2] = x[2 + idx];
3579371c9d4SSatish Balay             xw[3] = x[3 + idx];
358e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
359e48d15efSToby Isaac             v += 16;
360e48d15efSToby Isaac           }
3619371c9d4SSatish Balay           t[i2]     = s[0];
3629371c9d4SSatish Balay           t[i2 + 1] = s[1];
3639371c9d4SSatish Balay           t[i2 + 2] = s[2];
3649371c9d4SSatish Balay           t[i2 + 3] = s[3];
365e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
3669371c9d4SSatish Balay           x[i2]     = xw[0];
3679371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3689371c9d4SSatish Balay           x[i2 + 2] = xw[2];
3699371c9d4SSatish Balay           x[i2 + 3] = xw[3];
370e48d15efSToby Isaac           idiag += 16;
371e48d15efSToby Isaac           i2 += 4;
372e48d15efSToby Isaac         }
373e48d15efSToby Isaac         break;
374e48d15efSToby Isaac       case 5:
375e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
3769371c9d4SSatish Balay         t[0] = b[0];
3779371c9d4SSatish Balay         t[1] = b[1];
3789371c9d4SSatish Balay         t[2] = b[2];
3799371c9d4SSatish Balay         t[3] = b[3];
3809371c9d4SSatish Balay         t[4] = b[4];
381e48d15efSToby Isaac         i2   = 5;
382e48d15efSToby Isaac         idiag += 25;
383e48d15efSToby Isaac         for (i = 1; i < m; i++) {
384e48d15efSToby Isaac           v    = aa + 25 * ai[i];
385e48d15efSToby Isaac           vi   = aj + ai[i];
386e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3879371c9d4SSatish Balay           s[0] = b[i2];
3889371c9d4SSatish Balay           s[1] = b[i2 + 1];
3899371c9d4SSatish Balay           s[2] = b[i2 + 2];
3909371c9d4SSatish Balay           s[3] = b[i2 + 3];
3919371c9d4SSatish Balay           s[4] = b[i2 + 4];
392e48d15efSToby Isaac           while (nz--) {
393e48d15efSToby Isaac             idx   = 5 * (*vi++);
3949371c9d4SSatish Balay             xw[0] = x[idx];
3959371c9d4SSatish Balay             xw[1] = x[1 + idx];
3969371c9d4SSatish Balay             xw[2] = x[2 + idx];
3979371c9d4SSatish Balay             xw[3] = x[3 + idx];
3989371c9d4SSatish Balay             xw[4] = x[4 + idx];
399e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
400e48d15efSToby Isaac             v += 25;
401e48d15efSToby Isaac           }
4029371c9d4SSatish Balay           t[i2]     = s[0];
4039371c9d4SSatish Balay           t[i2 + 1] = s[1];
4049371c9d4SSatish Balay           t[i2 + 2] = s[2];
4059371c9d4SSatish Balay           t[i2 + 3] = s[3];
4069371c9d4SSatish Balay           t[i2 + 4] = s[4];
407e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
4089371c9d4SSatish Balay           x[i2]     = xw[0];
4099371c9d4SSatish Balay           x[i2 + 1] = xw[1];
4109371c9d4SSatish Balay           x[i2 + 2] = xw[2];
4119371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4129371c9d4SSatish Balay           x[i2 + 4] = xw[4];
413e48d15efSToby Isaac           idiag += 25;
414e48d15efSToby Isaac           i2 += 5;
415e48d15efSToby Isaac         }
416e48d15efSToby Isaac         break;
417e48d15efSToby Isaac       case 6:
418e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
4199371c9d4SSatish Balay         t[0] = b[0];
4209371c9d4SSatish Balay         t[1] = b[1];
4219371c9d4SSatish Balay         t[2] = b[2];
4229371c9d4SSatish Balay         t[3] = b[3];
4239371c9d4SSatish Balay         t[4] = b[4];
4249371c9d4SSatish Balay         t[5] = b[5];
425e48d15efSToby Isaac         i2   = 6;
426e48d15efSToby Isaac         idiag += 36;
427e48d15efSToby Isaac         for (i = 1; i < m; i++) {
428e48d15efSToby Isaac           v    = aa + 36 * ai[i];
429e48d15efSToby Isaac           vi   = aj + ai[i];
430e48d15efSToby Isaac           nz   = diag[i] - ai[i];
4319371c9d4SSatish Balay           s[0] = b[i2];
4329371c9d4SSatish Balay           s[1] = b[i2 + 1];
4339371c9d4SSatish Balay           s[2] = b[i2 + 2];
4349371c9d4SSatish Balay           s[3] = b[i2 + 3];
4359371c9d4SSatish Balay           s[4] = b[i2 + 4];
4369371c9d4SSatish Balay           s[5] = b[i2 + 5];
437e48d15efSToby Isaac           while (nz--) {
438e48d15efSToby Isaac             idx   = 6 * (*vi++);
4399371c9d4SSatish Balay             xw[0] = x[idx];
4409371c9d4SSatish Balay             xw[1] = x[1 + idx];
4419371c9d4SSatish Balay             xw[2] = x[2 + idx];
4429371c9d4SSatish Balay             xw[3] = x[3 + idx];
4439371c9d4SSatish Balay             xw[4] = x[4 + idx];
4449371c9d4SSatish Balay             xw[5] = x[5 + idx];
445e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
446e48d15efSToby Isaac             v += 36;
447e48d15efSToby Isaac           }
4489371c9d4SSatish Balay           t[i2]     = s[0];
4499371c9d4SSatish Balay           t[i2 + 1] = s[1];
4509371c9d4SSatish Balay           t[i2 + 2] = s[2];
4519371c9d4SSatish Balay           t[i2 + 3] = s[3];
4529371c9d4SSatish Balay           t[i2 + 4] = s[4];
4539371c9d4SSatish Balay           t[i2 + 5] = s[5];
454e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
4559371c9d4SSatish Balay           x[i2]     = xw[0];
4569371c9d4SSatish Balay           x[i2 + 1] = xw[1];
4579371c9d4SSatish Balay           x[i2 + 2] = xw[2];
4589371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4599371c9d4SSatish Balay           x[i2 + 4] = xw[4];
4609371c9d4SSatish Balay           x[i2 + 5] = xw[5];
461e48d15efSToby Isaac           idiag += 36;
462e48d15efSToby Isaac           i2 += 6;
463e48d15efSToby Isaac         }
464e48d15efSToby Isaac         break;
465e48d15efSToby Isaac       case 7:
466e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
4679371c9d4SSatish Balay         t[0] = b[0];
4689371c9d4SSatish Balay         t[1] = b[1];
4699371c9d4SSatish Balay         t[2] = b[2];
4709371c9d4SSatish Balay         t[3] = b[3];
4719371c9d4SSatish Balay         t[4] = b[4];
4729371c9d4SSatish Balay         t[5] = b[5];
4739371c9d4SSatish Balay         t[6] = b[6];
474e48d15efSToby Isaac         i2   = 7;
475e48d15efSToby Isaac         idiag += 49;
476e48d15efSToby Isaac         for (i = 1; i < m; i++) {
477e48d15efSToby Isaac           v    = aa + 49 * ai[i];
478e48d15efSToby Isaac           vi   = aj + ai[i];
479e48d15efSToby Isaac           nz   = diag[i] - ai[i];
4809371c9d4SSatish Balay           s[0] = b[i2];
4819371c9d4SSatish Balay           s[1] = b[i2 + 1];
4829371c9d4SSatish Balay           s[2] = b[i2 + 2];
4839371c9d4SSatish Balay           s[3] = b[i2 + 3];
4849371c9d4SSatish Balay           s[4] = b[i2 + 4];
4859371c9d4SSatish Balay           s[5] = b[i2 + 5];
4869371c9d4SSatish Balay           s[6] = b[i2 + 6];
487e48d15efSToby Isaac           while (nz--) {
488e48d15efSToby Isaac             idx   = 7 * (*vi++);
4899371c9d4SSatish Balay             xw[0] = x[idx];
4909371c9d4SSatish Balay             xw[1] = x[1 + idx];
4919371c9d4SSatish Balay             xw[2] = x[2 + idx];
4929371c9d4SSatish Balay             xw[3] = x[3 + idx];
4939371c9d4SSatish Balay             xw[4] = x[4 + idx];
4949371c9d4SSatish Balay             xw[5] = x[5 + idx];
4959371c9d4SSatish Balay             xw[6] = x[6 + idx];
496e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
497e48d15efSToby Isaac             v += 49;
498e48d15efSToby Isaac           }
4999371c9d4SSatish Balay           t[i2]     = s[0];
5009371c9d4SSatish Balay           t[i2 + 1] = s[1];
5019371c9d4SSatish Balay           t[i2 + 2] = s[2];
5029371c9d4SSatish Balay           t[i2 + 3] = s[3];
5039371c9d4SSatish Balay           t[i2 + 4] = s[4];
5049371c9d4SSatish Balay           t[i2 + 5] = s[5];
5059371c9d4SSatish Balay           t[i2 + 6] = s[6];
506e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
5079371c9d4SSatish Balay           x[i2]     = xw[0];
5089371c9d4SSatish Balay           x[i2 + 1] = xw[1];
5099371c9d4SSatish Balay           x[i2 + 2] = xw[2];
5109371c9d4SSatish Balay           x[i2 + 3] = xw[3];
5119371c9d4SSatish Balay           x[i2 + 4] = xw[4];
5129371c9d4SSatish Balay           x[i2 + 5] = xw[5];
5139371c9d4SSatish Balay           x[i2 + 6] = xw[6];
514e48d15efSToby Isaac           idiag += 49;
515e48d15efSToby Isaac           i2 += 7;
516e48d15efSToby Isaac         }
517e48d15efSToby Isaac         break;
518e48d15efSToby Isaac       default:
51996b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
5209566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(t, b, bs));
521de80f912SBarry Smith         i2 = bs;
522de80f912SBarry Smith         idiag += bs2;
523de80f912SBarry Smith         for (i = 1; i < m; i++) {
524de80f912SBarry Smith           v  = aa + bs2 * ai[i];
525de80f912SBarry Smith           vi = aj + ai[i];
526de80f912SBarry Smith           nz = diag[i] - ai[i];
527de80f912SBarry Smith 
5289566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
529de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
530de80f912SBarry Smith           workt = work;
531de80f912SBarry Smith           for (j = 0; j < nz; j++) {
5329566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
533de80f912SBarry Smith             workt += bs;
534de80f912SBarry Smith           }
53596b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
5369566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(t + i2, w, bs));
53796b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
538de80f912SBarry Smith 
539de80f912SBarry Smith           idiag += bs2;
540de80f912SBarry Smith           i2 += bs;
541de80f912SBarry Smith         }
542e48d15efSToby Isaac         break;
543e48d15efSToby Isaac       }
544de80f912SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
5459566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
546e48d15efSToby Isaac       xb = t;
5479371c9d4SSatish Balay     } else xb = b;
548de80f912SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
549e48d15efSToby Isaac       idiag = a->idiag + bs2 * (a->mbs - 1);
550e48d15efSToby Isaac       i2    = bs * (m - 1);
551e48d15efSToby Isaac       switch (bs) {
552e48d15efSToby Isaac       case 1:
553e48d15efSToby Isaac         s[0] = xb[i2];
554e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
555e48d15efSToby Isaac         x[i2] = xw[0];
556e48d15efSToby Isaac         i2 -= 1;
557e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
558e48d15efSToby Isaac           v    = aa + (diag[i] + 1);
559e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
560e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
561e48d15efSToby Isaac           s[0] = xb[i2];
562e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
563e48d15efSToby Isaac             xw[0] = x[vi[j]];
564e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
565e48d15efSToby Isaac           }
566e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
567e48d15efSToby Isaac           x[i2] = xw[0];
568e48d15efSToby Isaac           idiag -= 1;
569e48d15efSToby Isaac           i2 -= 1;
570e48d15efSToby Isaac         }
571e48d15efSToby Isaac         break;
572e48d15efSToby Isaac       case 2:
5739371c9d4SSatish Balay         s[0] = xb[i2];
5749371c9d4SSatish Balay         s[1] = xb[i2 + 1];
575e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5769371c9d4SSatish Balay         x[i2]     = xw[0];
5779371c9d4SSatish Balay         x[i2 + 1] = xw[1];
578e48d15efSToby Isaac         i2 -= 2;
579e48d15efSToby Isaac         idiag -= 4;
580e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
581e48d15efSToby Isaac           v    = aa + 4 * (diag[i] + 1);
582e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
583e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
5849371c9d4SSatish Balay           s[0] = xb[i2];
5859371c9d4SSatish Balay           s[1] = xb[i2 + 1];
586e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
587e48d15efSToby Isaac             idx   = 2 * vi[j];
588e48d15efSToby Isaac             it    = 4 * j;
5899371c9d4SSatish Balay             xw[0] = x[idx];
5909371c9d4SSatish Balay             xw[1] = x[1 + idx];
591e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
592e48d15efSToby Isaac           }
593e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5949371c9d4SSatish Balay           x[i2]     = xw[0];
5959371c9d4SSatish Balay           x[i2 + 1] = xw[1];
596e48d15efSToby Isaac           idiag -= 4;
597e48d15efSToby Isaac           i2 -= 2;
598e48d15efSToby Isaac         }
599e48d15efSToby Isaac         break;
600e48d15efSToby Isaac       case 3:
6019371c9d4SSatish Balay         s[0] = xb[i2];
6029371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6039371c9d4SSatish Balay         s[2] = xb[i2 + 2];
604e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
6059371c9d4SSatish Balay         x[i2]     = xw[0];
6069371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6079371c9d4SSatish Balay         x[i2 + 2] = xw[2];
608e48d15efSToby Isaac         i2 -= 3;
609e48d15efSToby Isaac         idiag -= 9;
610e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
611e48d15efSToby Isaac           v    = aa + 9 * (diag[i] + 1);
612e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
613e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6149371c9d4SSatish Balay           s[0] = xb[i2];
6159371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6169371c9d4SSatish Balay           s[2] = xb[i2 + 2];
617e48d15efSToby Isaac           while (nz--) {
618e48d15efSToby Isaac             idx   = 3 * (*vi++);
6199371c9d4SSatish Balay             xw[0] = x[idx];
6209371c9d4SSatish Balay             xw[1] = x[1 + idx];
6219371c9d4SSatish Balay             xw[2] = x[2 + idx];
622e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
623e48d15efSToby Isaac             v += 9;
624e48d15efSToby Isaac           }
625e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
6269371c9d4SSatish Balay           x[i2]     = xw[0];
6279371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6289371c9d4SSatish Balay           x[i2 + 2] = xw[2];
629e48d15efSToby Isaac           idiag -= 9;
630e48d15efSToby Isaac           i2 -= 3;
631e48d15efSToby Isaac         }
632e48d15efSToby Isaac         break;
633e48d15efSToby Isaac       case 4:
6349371c9d4SSatish Balay         s[0] = xb[i2];
6359371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6369371c9d4SSatish Balay         s[2] = xb[i2 + 2];
6379371c9d4SSatish Balay         s[3] = xb[i2 + 3];
638e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6399371c9d4SSatish Balay         x[i2]     = xw[0];
6409371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6419371c9d4SSatish Balay         x[i2 + 2] = xw[2];
6429371c9d4SSatish Balay         x[i2 + 3] = xw[3];
643e48d15efSToby Isaac         i2 -= 4;
644e48d15efSToby Isaac         idiag -= 16;
645e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
646e48d15efSToby Isaac           v    = aa + 16 * (diag[i] + 1);
647e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
648e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6499371c9d4SSatish Balay           s[0] = xb[i2];
6509371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6519371c9d4SSatish Balay           s[2] = xb[i2 + 2];
6529371c9d4SSatish Balay           s[3] = xb[i2 + 3];
653e48d15efSToby Isaac           while (nz--) {
654e48d15efSToby Isaac             idx   = 4 * (*vi++);
6559371c9d4SSatish Balay             xw[0] = x[idx];
6569371c9d4SSatish Balay             xw[1] = x[1 + idx];
6579371c9d4SSatish Balay             xw[2] = x[2 + idx];
6589371c9d4SSatish Balay             xw[3] = x[3 + idx];
659e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
660e48d15efSToby Isaac             v += 16;
661e48d15efSToby Isaac           }
662e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6639371c9d4SSatish Balay           x[i2]     = xw[0];
6649371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6659371c9d4SSatish Balay           x[i2 + 2] = xw[2];
6669371c9d4SSatish Balay           x[i2 + 3] = xw[3];
667e48d15efSToby Isaac           idiag -= 16;
668e48d15efSToby Isaac           i2 -= 4;
669e48d15efSToby Isaac         }
670e48d15efSToby Isaac         break;
671e48d15efSToby Isaac       case 5:
6729371c9d4SSatish Balay         s[0] = xb[i2];
6739371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6749371c9d4SSatish Balay         s[2] = xb[i2 + 2];
6759371c9d4SSatish Balay         s[3] = xb[i2 + 3];
6769371c9d4SSatish Balay         s[4] = xb[i2 + 4];
677e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
6789371c9d4SSatish Balay         x[i2]     = xw[0];
6799371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6809371c9d4SSatish Balay         x[i2 + 2] = xw[2];
6819371c9d4SSatish Balay         x[i2 + 3] = xw[3];
6829371c9d4SSatish Balay         x[i2 + 4] = xw[4];
683e48d15efSToby Isaac         i2 -= 5;
684e48d15efSToby Isaac         idiag -= 25;
685e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
686e48d15efSToby Isaac           v    = aa + 25 * (diag[i] + 1);
687e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
688e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6899371c9d4SSatish Balay           s[0] = xb[i2];
6909371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6919371c9d4SSatish Balay           s[2] = xb[i2 + 2];
6929371c9d4SSatish Balay           s[3] = xb[i2 + 3];
6939371c9d4SSatish Balay           s[4] = xb[i2 + 4];
694e48d15efSToby Isaac           while (nz--) {
695e48d15efSToby Isaac             idx   = 5 * (*vi++);
6969371c9d4SSatish Balay             xw[0] = x[idx];
6979371c9d4SSatish Balay             xw[1] = x[1 + idx];
6989371c9d4SSatish Balay             xw[2] = x[2 + idx];
6999371c9d4SSatish Balay             xw[3] = x[3 + idx];
7009371c9d4SSatish Balay             xw[4] = x[4 + idx];
701e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
702e48d15efSToby Isaac             v += 25;
703e48d15efSToby Isaac           }
704e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
7059371c9d4SSatish Balay           x[i2]     = xw[0];
7069371c9d4SSatish Balay           x[i2 + 1] = xw[1];
7079371c9d4SSatish Balay           x[i2 + 2] = xw[2];
7089371c9d4SSatish Balay           x[i2 + 3] = xw[3];
7099371c9d4SSatish Balay           x[i2 + 4] = xw[4];
710e48d15efSToby Isaac           idiag -= 25;
711e48d15efSToby Isaac           i2 -= 5;
712e48d15efSToby Isaac         }
713e48d15efSToby Isaac         break;
714e48d15efSToby Isaac       case 6:
7159371c9d4SSatish Balay         s[0] = xb[i2];
7169371c9d4SSatish Balay         s[1] = xb[i2 + 1];
7179371c9d4SSatish Balay         s[2] = xb[i2 + 2];
7189371c9d4SSatish Balay         s[3] = xb[i2 + 3];
7199371c9d4SSatish Balay         s[4] = xb[i2 + 4];
7209371c9d4SSatish Balay         s[5] = xb[i2 + 5];
721e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7229371c9d4SSatish Balay         x[i2]     = xw[0];
7239371c9d4SSatish Balay         x[i2 + 1] = xw[1];
7249371c9d4SSatish Balay         x[i2 + 2] = xw[2];
7259371c9d4SSatish Balay         x[i2 + 3] = xw[3];
7269371c9d4SSatish Balay         x[i2 + 4] = xw[4];
7279371c9d4SSatish Balay         x[i2 + 5] = xw[5];
728e48d15efSToby Isaac         i2 -= 6;
729e48d15efSToby Isaac         idiag -= 36;
730e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
731e48d15efSToby Isaac           v    = aa + 36 * (diag[i] + 1);
732e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
733e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
7349371c9d4SSatish Balay           s[0] = xb[i2];
7359371c9d4SSatish Balay           s[1] = xb[i2 + 1];
7369371c9d4SSatish Balay           s[2] = xb[i2 + 2];
7379371c9d4SSatish Balay           s[3] = xb[i2 + 3];
7389371c9d4SSatish Balay           s[4] = xb[i2 + 4];
7399371c9d4SSatish Balay           s[5] = xb[i2 + 5];
740e48d15efSToby Isaac           while (nz--) {
741e48d15efSToby Isaac             idx   = 6 * (*vi++);
7429371c9d4SSatish Balay             xw[0] = x[idx];
7439371c9d4SSatish Balay             xw[1] = x[1 + idx];
7449371c9d4SSatish Balay             xw[2] = x[2 + idx];
7459371c9d4SSatish Balay             xw[3] = x[3 + idx];
7469371c9d4SSatish Balay             xw[4] = x[4 + idx];
7479371c9d4SSatish Balay             xw[5] = x[5 + idx];
748e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
749e48d15efSToby Isaac             v += 36;
750e48d15efSToby Isaac           }
751e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7529371c9d4SSatish Balay           x[i2]     = xw[0];
7539371c9d4SSatish Balay           x[i2 + 1] = xw[1];
7549371c9d4SSatish Balay           x[i2 + 2] = xw[2];
7559371c9d4SSatish Balay           x[i2 + 3] = xw[3];
7569371c9d4SSatish Balay           x[i2 + 4] = xw[4];
7579371c9d4SSatish Balay           x[i2 + 5] = xw[5];
758e48d15efSToby Isaac           idiag -= 36;
759e48d15efSToby Isaac           i2 -= 6;
760e48d15efSToby Isaac         }
761e48d15efSToby Isaac         break;
762e48d15efSToby Isaac       case 7:
7639371c9d4SSatish Balay         s[0] = xb[i2];
7649371c9d4SSatish Balay         s[1] = xb[i2 + 1];
7659371c9d4SSatish Balay         s[2] = xb[i2 + 2];
7669371c9d4SSatish Balay         s[3] = xb[i2 + 3];
7679371c9d4SSatish Balay         s[4] = xb[i2 + 4];
7689371c9d4SSatish Balay         s[5] = xb[i2 + 5];
7699371c9d4SSatish Balay         s[6] = xb[i2 + 6];
770e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
7719371c9d4SSatish Balay         x[i2]     = xw[0];
7729371c9d4SSatish Balay         x[i2 + 1] = xw[1];
7739371c9d4SSatish Balay         x[i2 + 2] = xw[2];
7749371c9d4SSatish Balay         x[i2 + 3] = xw[3];
7759371c9d4SSatish Balay         x[i2 + 4] = xw[4];
7769371c9d4SSatish Balay         x[i2 + 5] = xw[5];
7779371c9d4SSatish Balay         x[i2 + 6] = xw[6];
778e48d15efSToby Isaac         i2 -= 7;
779e48d15efSToby Isaac         idiag -= 49;
780e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
781e48d15efSToby Isaac           v    = aa + 49 * (diag[i] + 1);
782e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
783e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
7849371c9d4SSatish Balay           s[0] = xb[i2];
7859371c9d4SSatish Balay           s[1] = xb[i2 + 1];
7869371c9d4SSatish Balay           s[2] = xb[i2 + 2];
7879371c9d4SSatish Balay           s[3] = xb[i2 + 3];
7889371c9d4SSatish Balay           s[4] = xb[i2 + 4];
7899371c9d4SSatish Balay           s[5] = xb[i2 + 5];
7909371c9d4SSatish Balay           s[6] = xb[i2 + 6];
791e48d15efSToby Isaac           while (nz--) {
792e48d15efSToby Isaac             idx   = 7 * (*vi++);
7939371c9d4SSatish Balay             xw[0] = x[idx];
7949371c9d4SSatish Balay             xw[1] = x[1 + idx];
7959371c9d4SSatish Balay             xw[2] = x[2 + idx];
7969371c9d4SSatish Balay             xw[3] = x[3 + idx];
7979371c9d4SSatish Balay             xw[4] = x[4 + idx];
7989371c9d4SSatish Balay             xw[5] = x[5 + idx];
7999371c9d4SSatish Balay             xw[6] = x[6 + idx];
800e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
801e48d15efSToby Isaac             v += 49;
802e48d15efSToby Isaac           }
803e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
8049371c9d4SSatish Balay           x[i2]     = xw[0];
8059371c9d4SSatish Balay           x[i2 + 1] = xw[1];
8069371c9d4SSatish Balay           x[i2 + 2] = xw[2];
8079371c9d4SSatish Balay           x[i2 + 3] = xw[3];
8089371c9d4SSatish Balay           x[i2 + 4] = xw[4];
8099371c9d4SSatish Balay           x[i2 + 5] = xw[5];
8109371c9d4SSatish Balay           x[i2 + 6] = xw[6];
811e48d15efSToby Isaac           idiag -= 49;
812e48d15efSToby Isaac           i2 -= 7;
813e48d15efSToby Isaac         }
814e48d15efSToby Isaac         break;
815e48d15efSToby Isaac       default:
8169566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(w, xb + i2, bs));
81796b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
818de80f912SBarry Smith         i2 -= bs;
819e48d15efSToby Isaac         idiag -= bs2;
820de80f912SBarry Smith         for (i = m - 2; i >= 0; i--) {
821de80f912SBarry Smith           v  = aa + bs2 * (diag[i] + 1);
822de80f912SBarry Smith           vi = aj + diag[i] + 1;
823de80f912SBarry Smith           nz = ai[i + 1] - diag[i] - 1;
824de80f912SBarry Smith 
8259566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, xb + i2, bs));
826de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
827de80f912SBarry Smith           workt = work;
828de80f912SBarry Smith           for (j = 0; j < nz; j++) {
8299566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
830de80f912SBarry Smith             workt += bs;
831de80f912SBarry Smith           }
83296b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
83396b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
834e48d15efSToby Isaac 
835de80f912SBarry Smith           idiag -= bs2;
836de80f912SBarry Smith           i2 -= bs;
837de80f912SBarry Smith         }
838e48d15efSToby Isaac         break;
839e48d15efSToby Isaac       }
8409566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
841de80f912SBarry Smith     }
842e48d15efSToby Isaac     its--;
843e48d15efSToby Isaac   }
844e48d15efSToby Isaac   while (its--) {
845e48d15efSToby Isaac     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
846e48d15efSToby Isaac       idiag = a->idiag;
847e48d15efSToby Isaac       i2    = 0;
848e48d15efSToby Isaac       switch (bs) {
849e48d15efSToby Isaac       case 1:
850e48d15efSToby Isaac         for (i = 0; i < m; i++) {
851e48d15efSToby Isaac           v    = aa + ai[i];
852e48d15efSToby Isaac           vi   = aj + ai[i];
853e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
854e48d15efSToby Isaac           s[0] = b[i2];
855e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
856e48d15efSToby Isaac             xw[0] = x[vi[j]];
857e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
858e48d15efSToby Isaac           }
859e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
860e48d15efSToby Isaac           x[i2] += xw[0];
861e48d15efSToby Isaac           idiag += 1;
862e48d15efSToby Isaac           i2 += 1;
863e48d15efSToby Isaac         }
864e48d15efSToby Isaac         break;
865e48d15efSToby Isaac       case 2:
866e48d15efSToby Isaac         for (i = 0; i < m; i++) {
867e48d15efSToby Isaac           v    = aa + 4 * ai[i];
868e48d15efSToby Isaac           vi   = aj + ai[i];
869e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
8709371c9d4SSatish Balay           s[0] = b[i2];
8719371c9d4SSatish Balay           s[1] = b[i2 + 1];
872e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
873e48d15efSToby Isaac             idx   = 2 * vi[j];
874e48d15efSToby Isaac             it    = 4 * j;
8759371c9d4SSatish Balay             xw[0] = x[idx];
8769371c9d4SSatish Balay             xw[1] = x[1 + idx];
877e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
878e48d15efSToby Isaac           }
879e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
8809371c9d4SSatish Balay           x[i2] += xw[0];
8819371c9d4SSatish Balay           x[i2 + 1] += xw[1];
882e48d15efSToby Isaac           idiag += 4;
883e48d15efSToby Isaac           i2 += 2;
884e48d15efSToby Isaac         }
885e48d15efSToby Isaac         break;
886e48d15efSToby Isaac       case 3:
887e48d15efSToby Isaac         for (i = 0; i < m; i++) {
888e48d15efSToby Isaac           v    = aa + 9 * ai[i];
889e48d15efSToby Isaac           vi   = aj + ai[i];
890e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
8919371c9d4SSatish Balay           s[0] = b[i2];
8929371c9d4SSatish Balay           s[1] = b[i2 + 1];
8939371c9d4SSatish Balay           s[2] = b[i2 + 2];
894e48d15efSToby Isaac           while (nz--) {
895e48d15efSToby Isaac             idx   = 3 * (*vi++);
8969371c9d4SSatish Balay             xw[0] = x[idx];
8979371c9d4SSatish Balay             xw[1] = x[1 + idx];
8989371c9d4SSatish Balay             xw[2] = x[2 + idx];
899e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
900e48d15efSToby Isaac             v += 9;
901e48d15efSToby Isaac           }
902e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
9039371c9d4SSatish Balay           x[i2] += xw[0];
9049371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9059371c9d4SSatish Balay           x[i2 + 2] += xw[2];
906e48d15efSToby Isaac           idiag += 9;
907e48d15efSToby Isaac           i2 += 3;
908e48d15efSToby Isaac         }
909e48d15efSToby Isaac         break;
910e48d15efSToby Isaac       case 4:
911e48d15efSToby Isaac         for (i = 0; i < m; i++) {
912e48d15efSToby Isaac           v    = aa + 16 * ai[i];
913e48d15efSToby Isaac           vi   = aj + ai[i];
914e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9159371c9d4SSatish Balay           s[0] = b[i2];
9169371c9d4SSatish Balay           s[1] = b[i2 + 1];
9179371c9d4SSatish Balay           s[2] = b[i2 + 2];
9189371c9d4SSatish Balay           s[3] = b[i2 + 3];
919e48d15efSToby Isaac           while (nz--) {
920e48d15efSToby Isaac             idx   = 4 * (*vi++);
9219371c9d4SSatish Balay             xw[0] = x[idx];
9229371c9d4SSatish Balay             xw[1] = x[1 + idx];
9239371c9d4SSatish Balay             xw[2] = x[2 + idx];
9249371c9d4SSatish Balay             xw[3] = x[3 + idx];
925e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
926e48d15efSToby Isaac             v += 16;
927e48d15efSToby Isaac           }
928e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
9299371c9d4SSatish Balay           x[i2] += xw[0];
9309371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9319371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9329371c9d4SSatish Balay           x[i2 + 3] += xw[3];
933e48d15efSToby Isaac           idiag += 16;
934e48d15efSToby Isaac           i2 += 4;
935e48d15efSToby Isaac         }
936e48d15efSToby Isaac         break;
937e48d15efSToby Isaac       case 5:
938e48d15efSToby Isaac         for (i = 0; i < m; i++) {
939e48d15efSToby Isaac           v    = aa + 25 * ai[i];
940e48d15efSToby Isaac           vi   = aj + ai[i];
941e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9429371c9d4SSatish Balay           s[0] = b[i2];
9439371c9d4SSatish Balay           s[1] = b[i2 + 1];
9449371c9d4SSatish Balay           s[2] = b[i2 + 2];
9459371c9d4SSatish Balay           s[3] = b[i2 + 3];
9469371c9d4SSatish Balay           s[4] = b[i2 + 4];
947e48d15efSToby Isaac           while (nz--) {
948e48d15efSToby Isaac             idx   = 5 * (*vi++);
9499371c9d4SSatish Balay             xw[0] = x[idx];
9509371c9d4SSatish Balay             xw[1] = x[1 + idx];
9519371c9d4SSatish Balay             xw[2] = x[2 + idx];
9529371c9d4SSatish Balay             xw[3] = x[3 + idx];
9539371c9d4SSatish Balay             xw[4] = x[4 + idx];
954e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
955e48d15efSToby Isaac             v += 25;
956e48d15efSToby Isaac           }
957e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
9589371c9d4SSatish Balay           x[i2] += xw[0];
9599371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9609371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9619371c9d4SSatish Balay           x[i2 + 3] += xw[3];
9629371c9d4SSatish Balay           x[i2 + 4] += xw[4];
963e48d15efSToby Isaac           idiag += 25;
964e48d15efSToby Isaac           i2 += 5;
965e48d15efSToby Isaac         }
966e48d15efSToby Isaac         break;
967e48d15efSToby Isaac       case 6:
968e48d15efSToby Isaac         for (i = 0; i < m; i++) {
969e48d15efSToby Isaac           v    = aa + 36 * ai[i];
970e48d15efSToby Isaac           vi   = aj + ai[i];
971e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9729371c9d4SSatish Balay           s[0] = b[i2];
9739371c9d4SSatish Balay           s[1] = b[i2 + 1];
9749371c9d4SSatish Balay           s[2] = b[i2 + 2];
9759371c9d4SSatish Balay           s[3] = b[i2 + 3];
9769371c9d4SSatish Balay           s[4] = b[i2 + 4];
9779371c9d4SSatish Balay           s[5] = b[i2 + 5];
978e48d15efSToby Isaac           while (nz--) {
979e48d15efSToby Isaac             idx   = 6 * (*vi++);
9809371c9d4SSatish Balay             xw[0] = x[idx];
9819371c9d4SSatish Balay             xw[1] = x[1 + idx];
9829371c9d4SSatish Balay             xw[2] = x[2 + idx];
9839371c9d4SSatish Balay             xw[3] = x[3 + idx];
9849371c9d4SSatish Balay             xw[4] = x[4 + idx];
9859371c9d4SSatish Balay             xw[5] = x[5 + idx];
986e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
987e48d15efSToby Isaac             v += 36;
988e48d15efSToby Isaac           }
989e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
9909371c9d4SSatish Balay           x[i2] += xw[0];
9919371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9929371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9939371c9d4SSatish Balay           x[i2 + 3] += xw[3];
9949371c9d4SSatish Balay           x[i2 + 4] += xw[4];
9959371c9d4SSatish Balay           x[i2 + 5] += xw[5];
996e48d15efSToby Isaac           idiag += 36;
997e48d15efSToby Isaac           i2 += 6;
998e48d15efSToby Isaac         }
999e48d15efSToby Isaac         break;
1000e48d15efSToby Isaac       case 7:
1001e48d15efSToby Isaac         for (i = 0; i < m; i++) {
1002e48d15efSToby Isaac           v    = aa + 49 * ai[i];
1003e48d15efSToby Isaac           vi   = aj + ai[i];
1004e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
10059371c9d4SSatish Balay           s[0] = b[i2];
10069371c9d4SSatish Balay           s[1] = b[i2 + 1];
10079371c9d4SSatish Balay           s[2] = b[i2 + 2];
10089371c9d4SSatish Balay           s[3] = b[i2 + 3];
10099371c9d4SSatish Balay           s[4] = b[i2 + 4];
10109371c9d4SSatish Balay           s[5] = b[i2 + 5];
10119371c9d4SSatish Balay           s[6] = b[i2 + 6];
1012e48d15efSToby Isaac           while (nz--) {
1013e48d15efSToby Isaac             idx   = 7 * (*vi++);
10149371c9d4SSatish Balay             xw[0] = x[idx];
10159371c9d4SSatish Balay             xw[1] = x[1 + idx];
10169371c9d4SSatish Balay             xw[2] = x[2 + idx];
10179371c9d4SSatish Balay             xw[3] = x[3 + idx];
10189371c9d4SSatish Balay             xw[4] = x[4 + idx];
10199371c9d4SSatish Balay             xw[5] = x[5 + idx];
10209371c9d4SSatish Balay             xw[6] = x[6 + idx];
1021e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1022e48d15efSToby Isaac             v += 49;
1023e48d15efSToby Isaac           }
1024e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
10259371c9d4SSatish Balay           x[i2] += xw[0];
10269371c9d4SSatish Balay           x[i2 + 1] += xw[1];
10279371c9d4SSatish Balay           x[i2 + 2] += xw[2];
10289371c9d4SSatish Balay           x[i2 + 3] += xw[3];
10299371c9d4SSatish Balay           x[i2 + 4] += xw[4];
10309371c9d4SSatish Balay           x[i2 + 5] += xw[5];
10319371c9d4SSatish Balay           x[i2 + 6] += xw[6];
1032e48d15efSToby Isaac           idiag += 49;
1033e48d15efSToby Isaac           i2 += 7;
1034e48d15efSToby Isaac         }
1035e48d15efSToby Isaac         break;
1036e48d15efSToby Isaac       default:
1037e48d15efSToby Isaac         for (i = 0; i < m; i++) {
1038e48d15efSToby Isaac           v  = aa + bs2 * ai[i];
1039e48d15efSToby Isaac           vi = aj + ai[i];
1040e48d15efSToby Isaac           nz = ai[i + 1] - ai[i];
1041e48d15efSToby Isaac 
10429566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
1043e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
1044e48d15efSToby Isaac           workt = work;
1045e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
10469566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1047e48d15efSToby Isaac             workt += bs;
1048e48d15efSToby Isaac           }
1049e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1050e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1051e48d15efSToby Isaac 
1052e48d15efSToby Isaac           idiag += bs2;
1053e48d15efSToby Isaac           i2 += bs;
1054e48d15efSToby Isaac         }
1055e48d15efSToby Isaac         break;
1056e48d15efSToby Isaac       }
10579566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1058e48d15efSToby Isaac     }
1059e48d15efSToby Isaac     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1060e48d15efSToby Isaac       idiag = a->idiag + bs2 * (a->mbs - 1);
1061e48d15efSToby Isaac       i2    = bs * (m - 1);
1062e48d15efSToby Isaac       switch (bs) {
1063e48d15efSToby Isaac       case 1:
1064e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1065e48d15efSToby Isaac           v    = aa + ai[i];
1066e48d15efSToby Isaac           vi   = aj + ai[i];
1067e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
1068e48d15efSToby Isaac           s[0] = b[i2];
1069e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
1070e48d15efSToby Isaac             xw[0] = x[vi[j]];
1071e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1072e48d15efSToby Isaac           }
1073e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1074e48d15efSToby Isaac           x[i2] += xw[0];
1075e48d15efSToby Isaac           idiag -= 1;
1076e48d15efSToby Isaac           i2 -= 1;
1077e48d15efSToby Isaac         }
1078e48d15efSToby Isaac         break;
1079e48d15efSToby Isaac       case 2:
1080e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1081e48d15efSToby Isaac           v    = aa + 4 * ai[i];
1082e48d15efSToby Isaac           vi   = aj + ai[i];
1083e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
10849371c9d4SSatish Balay           s[0] = b[i2];
10859371c9d4SSatish Balay           s[1] = b[i2 + 1];
1086e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
1087e48d15efSToby Isaac             idx   = 2 * vi[j];
1088e48d15efSToby Isaac             it    = 4 * j;
10899371c9d4SSatish Balay             xw[0] = x[idx];
10909371c9d4SSatish Balay             xw[1] = x[1 + idx];
1091e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1092e48d15efSToby Isaac           }
1093e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
10949371c9d4SSatish Balay           x[i2] += xw[0];
10959371c9d4SSatish Balay           x[i2 + 1] += xw[1];
1096e48d15efSToby Isaac           idiag -= 4;
1097e48d15efSToby Isaac           i2 -= 2;
1098e48d15efSToby Isaac         }
1099e48d15efSToby Isaac         break;
1100e48d15efSToby Isaac       case 3:
1101e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1102e48d15efSToby Isaac           v    = aa + 9 * ai[i];
1103e48d15efSToby Isaac           vi   = aj + ai[i];
1104e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11059371c9d4SSatish Balay           s[0] = b[i2];
11069371c9d4SSatish Balay           s[1] = b[i2 + 1];
11079371c9d4SSatish Balay           s[2] = b[i2 + 2];
1108e48d15efSToby Isaac           while (nz--) {
1109e48d15efSToby Isaac             idx   = 3 * (*vi++);
11109371c9d4SSatish Balay             xw[0] = x[idx];
11119371c9d4SSatish Balay             xw[1] = x[1 + idx];
11129371c9d4SSatish Balay             xw[2] = x[2 + idx];
1113e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1114e48d15efSToby Isaac             v += 9;
1115e48d15efSToby Isaac           }
1116e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
11179371c9d4SSatish Balay           x[i2] += xw[0];
11189371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11199371c9d4SSatish Balay           x[i2 + 2] += xw[2];
1120e48d15efSToby Isaac           idiag -= 9;
1121e48d15efSToby Isaac           i2 -= 3;
1122e48d15efSToby Isaac         }
1123e48d15efSToby Isaac         break;
1124e48d15efSToby Isaac       case 4:
1125e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1126e48d15efSToby Isaac           v    = aa + 16 * ai[i];
1127e48d15efSToby Isaac           vi   = aj + ai[i];
1128e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11299371c9d4SSatish Balay           s[0] = b[i2];
11309371c9d4SSatish Balay           s[1] = b[i2 + 1];
11319371c9d4SSatish Balay           s[2] = b[i2 + 2];
11329371c9d4SSatish Balay           s[3] = b[i2 + 3];
1133e48d15efSToby Isaac           while (nz--) {
1134e48d15efSToby Isaac             idx   = 4 * (*vi++);
11359371c9d4SSatish Balay             xw[0] = x[idx];
11369371c9d4SSatish Balay             xw[1] = x[1 + idx];
11379371c9d4SSatish Balay             xw[2] = x[2 + idx];
11389371c9d4SSatish Balay             xw[3] = x[3 + idx];
1139e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1140e48d15efSToby Isaac             v += 16;
1141e48d15efSToby Isaac           }
1142e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
11439371c9d4SSatish Balay           x[i2] += xw[0];
11449371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11459371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11469371c9d4SSatish Balay           x[i2 + 3] += xw[3];
1147e48d15efSToby Isaac           idiag -= 16;
1148e48d15efSToby Isaac           i2 -= 4;
1149e48d15efSToby Isaac         }
1150e48d15efSToby Isaac         break;
1151e48d15efSToby Isaac       case 5:
1152e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1153e48d15efSToby Isaac           v    = aa + 25 * ai[i];
1154e48d15efSToby Isaac           vi   = aj + ai[i];
1155e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11569371c9d4SSatish Balay           s[0] = b[i2];
11579371c9d4SSatish Balay           s[1] = b[i2 + 1];
11589371c9d4SSatish Balay           s[2] = b[i2 + 2];
11599371c9d4SSatish Balay           s[3] = b[i2 + 3];
11609371c9d4SSatish Balay           s[4] = b[i2 + 4];
1161e48d15efSToby Isaac           while (nz--) {
1162e48d15efSToby Isaac             idx   = 5 * (*vi++);
11639371c9d4SSatish Balay             xw[0] = x[idx];
11649371c9d4SSatish Balay             xw[1] = x[1 + idx];
11659371c9d4SSatish Balay             xw[2] = x[2 + idx];
11669371c9d4SSatish Balay             xw[3] = x[3 + idx];
11679371c9d4SSatish Balay             xw[4] = x[4 + idx];
1168e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1169e48d15efSToby Isaac             v += 25;
1170e48d15efSToby Isaac           }
1171e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
11729371c9d4SSatish Balay           x[i2] += xw[0];
11739371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11749371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11759371c9d4SSatish Balay           x[i2 + 3] += xw[3];
11769371c9d4SSatish Balay           x[i2 + 4] += xw[4];
1177e48d15efSToby Isaac           idiag -= 25;
1178e48d15efSToby Isaac           i2 -= 5;
1179e48d15efSToby Isaac         }
1180e48d15efSToby Isaac         break;
1181e48d15efSToby Isaac       case 6:
1182e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1183e48d15efSToby Isaac           v    = aa + 36 * ai[i];
1184e48d15efSToby Isaac           vi   = aj + ai[i];
1185e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11869371c9d4SSatish Balay           s[0] = b[i2];
11879371c9d4SSatish Balay           s[1] = b[i2 + 1];
11889371c9d4SSatish Balay           s[2] = b[i2 + 2];
11899371c9d4SSatish Balay           s[3] = b[i2 + 3];
11909371c9d4SSatish Balay           s[4] = b[i2 + 4];
11919371c9d4SSatish Balay           s[5] = b[i2 + 5];
1192e48d15efSToby Isaac           while (nz--) {
1193e48d15efSToby Isaac             idx   = 6 * (*vi++);
11949371c9d4SSatish Balay             xw[0] = x[idx];
11959371c9d4SSatish Balay             xw[1] = x[1 + idx];
11969371c9d4SSatish Balay             xw[2] = x[2 + idx];
11979371c9d4SSatish Balay             xw[3] = x[3 + idx];
11989371c9d4SSatish Balay             xw[4] = x[4 + idx];
11999371c9d4SSatish Balay             xw[5] = x[5 + idx];
1200e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1201e48d15efSToby Isaac             v += 36;
1202e48d15efSToby Isaac           }
1203e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
12049371c9d4SSatish Balay           x[i2] += xw[0];
12059371c9d4SSatish Balay           x[i2 + 1] += xw[1];
12069371c9d4SSatish Balay           x[i2 + 2] += xw[2];
12079371c9d4SSatish Balay           x[i2 + 3] += xw[3];
12089371c9d4SSatish Balay           x[i2 + 4] += xw[4];
12099371c9d4SSatish Balay           x[i2 + 5] += xw[5];
1210e48d15efSToby Isaac           idiag -= 36;
1211e48d15efSToby Isaac           i2 -= 6;
1212e48d15efSToby Isaac         }
1213e48d15efSToby Isaac         break;
1214e48d15efSToby Isaac       case 7:
1215e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1216e48d15efSToby Isaac           v    = aa + 49 * ai[i];
1217e48d15efSToby Isaac           vi   = aj + ai[i];
1218e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
12199371c9d4SSatish Balay           s[0] = b[i2];
12209371c9d4SSatish Balay           s[1] = b[i2 + 1];
12219371c9d4SSatish Balay           s[2] = b[i2 + 2];
12229371c9d4SSatish Balay           s[3] = b[i2 + 3];
12239371c9d4SSatish Balay           s[4] = b[i2 + 4];
12249371c9d4SSatish Balay           s[5] = b[i2 + 5];
12259371c9d4SSatish Balay           s[6] = b[i2 + 6];
1226e48d15efSToby Isaac           while (nz--) {
1227e48d15efSToby Isaac             idx   = 7 * (*vi++);
12289371c9d4SSatish Balay             xw[0] = x[idx];
12299371c9d4SSatish Balay             xw[1] = x[1 + idx];
12309371c9d4SSatish Balay             xw[2] = x[2 + idx];
12319371c9d4SSatish Balay             xw[3] = x[3 + idx];
12329371c9d4SSatish Balay             xw[4] = x[4 + idx];
12339371c9d4SSatish Balay             xw[5] = x[5 + idx];
12349371c9d4SSatish Balay             xw[6] = x[6 + idx];
1235e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1236e48d15efSToby Isaac             v += 49;
1237e48d15efSToby Isaac           }
1238e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
12399371c9d4SSatish Balay           x[i2] += xw[0];
12409371c9d4SSatish Balay           x[i2 + 1] += xw[1];
12419371c9d4SSatish Balay           x[i2 + 2] += xw[2];
12429371c9d4SSatish Balay           x[i2 + 3] += xw[3];
12439371c9d4SSatish Balay           x[i2 + 4] += xw[4];
12449371c9d4SSatish Balay           x[i2 + 5] += xw[5];
12459371c9d4SSatish Balay           x[i2 + 6] += xw[6];
1246e48d15efSToby Isaac           idiag -= 49;
1247e48d15efSToby Isaac           i2 -= 7;
1248e48d15efSToby Isaac         }
1249e48d15efSToby Isaac         break;
1250e48d15efSToby Isaac       default:
1251e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1252e48d15efSToby Isaac           v  = aa + bs2 * ai[i];
1253e48d15efSToby Isaac           vi = aj + ai[i];
1254e48d15efSToby Isaac           nz = ai[i + 1] - ai[i];
1255e48d15efSToby Isaac 
12569566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
1257e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
1258e48d15efSToby Isaac           workt = work;
1259e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
12609566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1261e48d15efSToby Isaac             workt += bs;
1262e48d15efSToby Isaac           }
1263e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1264e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1265e48d15efSToby Isaac 
1266e48d15efSToby Isaac           idiag -= bs2;
1267e48d15efSToby Isaac           i2 -= bs;
1268e48d15efSToby Isaac         }
1269e48d15efSToby Isaac         break;
1270e48d15efSToby Isaac       }
12719566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1272e48d15efSToby Isaac     }
1273e48d15efSToby Isaac   }
12749566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
12759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1277de80f912SBarry Smith }
1278de80f912SBarry Smith 
1279af674e45SBarry Smith /*
128081824310SBarry Smith     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1281af674e45SBarry Smith */
1282af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1283af674e45SBarry Smith   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1284af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1285af674e45SBarry Smith   #define matsetvaluesblocked4_ matsetvaluesblocked4
1286af674e45SBarry Smith #endif
1287af674e45SBarry Smith 
matsetvaluesblocked4_(Mat * AA,PetscInt * mm,const PetscInt im[],PetscInt * nn,const PetscInt in[],const PetscScalar v[])1288d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1289d71ae5a4SJacob Faibussowitsch {
1290af674e45SBarry Smith   Mat                A = *AA;
1291af674e45SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1292c1ac3661SBarry Smith   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1293c1ac3661SBarry Smith   PetscInt          *ai = a->i, *ailen = a->ilen;
129417ec6a02SBarry Smith   PetscInt          *aj = a->j, stepval, lastcol = -1;
1295f15d580aSBarry Smith   const PetscScalar *value = v;
12964bb09213Spetsc   MatScalar         *ap, *aa = a->a, *bap;
1297af674e45SBarry Smith 
1298af674e45SBarry Smith   PetscFunctionBegin;
1299ce94432eSBarry Smith   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1300af674e45SBarry Smith   stepval = (n - 1) * 4;
1301af674e45SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
1302af674e45SBarry Smith     row  = im[k];
1303af674e45SBarry Smith     rp   = aj + ai[row];
1304af674e45SBarry Smith     ap   = aa + 16 * ai[row];
1305af674e45SBarry Smith     nrow = ailen[row];
1306af674e45SBarry Smith     low  = 0;
130717ec6a02SBarry Smith     high = nrow;
1308af674e45SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
1309af674e45SBarry Smith       col = in[l];
1310db4deed7SKarl Rupp       if (col <= lastcol) low = 0;
1311db4deed7SKarl Rupp       else high = nrow;
131217ec6a02SBarry Smith       lastcol = col;
13131e3347e8SBarry Smith       value   = v + k * (stepval + 4 + l) * 4;
1314af674e45SBarry Smith       while (high - low > 7) {
1315af674e45SBarry Smith         t = (low + high) / 2;
1316af674e45SBarry Smith         if (rp[t] > col) high = t;
1317af674e45SBarry Smith         else low = t;
1318af674e45SBarry Smith       }
1319af674e45SBarry Smith       for (i = low; i < high; i++) {
1320af674e45SBarry Smith         if (rp[i] > col) break;
1321af674e45SBarry Smith         if (rp[i] == col) {
1322af674e45SBarry Smith           bap = ap + 16 * i;
1323af674e45SBarry Smith           for (ii = 0; ii < 4; ii++, value += stepval) {
1324ad540459SPierre Jolivet             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1325af674e45SBarry Smith           }
1326af674e45SBarry Smith           goto noinsert2;
1327af674e45SBarry Smith         }
1328af674e45SBarry Smith       }
1329af674e45SBarry Smith       N = nrow++ - 1;
133017ec6a02SBarry Smith       high++; /* added new column index thus must search to one higher than before */
1331af674e45SBarry Smith       /* shift up all the later entries in this row */
1332af674e45SBarry Smith       for (ii = N; ii >= i; ii--) {
1333af674e45SBarry Smith         rp[ii + 1] = rp[ii];
13349566063dSJacob Faibussowitsch         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1335af674e45SBarry Smith       }
133648a46eb9SPierre Jolivet       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1337af674e45SBarry Smith       rp[i] = col;
1338af674e45SBarry Smith       bap   = ap + 16 * i;
1339af674e45SBarry Smith       for (ii = 0; ii < 4; ii++, value += stepval) {
1340ad540459SPierre Jolivet         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1341af674e45SBarry Smith       }
1342af674e45SBarry Smith     noinsert2:;
1343af674e45SBarry Smith       low = i;
1344af674e45SBarry Smith     }
1345af674e45SBarry Smith     ailen[row] = nrow;
1346af674e45SBarry Smith   }
1347be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1348af674e45SBarry Smith }
1349af674e45SBarry Smith 
1350af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1351af674e45SBarry Smith   #define matsetvalues4_ MATSETVALUES4
1352af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1353af674e45SBarry Smith   #define matsetvalues4_ matsetvalues4
1354af674e45SBarry Smith #endif
1355af674e45SBarry Smith 
matsetvalues4_(Mat * AA,PetscInt * mm,PetscInt * im,PetscInt * nn,PetscInt * in,PetscScalar * v)1356d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1357d71ae5a4SJacob Faibussowitsch {
1358af674e45SBarry Smith   Mat          A = *AA;
1359af674e45SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1360580bdb30SBarry Smith   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1361c1ac3661SBarry Smith   PetscInt    *ai = a->i, *ailen = a->ilen;
1362c1ac3661SBarry Smith   PetscInt    *aj = a->j, brow, bcol;
136317ec6a02SBarry Smith   PetscInt     ridx, cidx, lastcol = -1;
1364af674e45SBarry Smith   MatScalar   *ap, value, *aa      = a->a, *bap;
1365af674e45SBarry Smith 
1366af674e45SBarry Smith   PetscFunctionBegin;
1367af674e45SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
13689371c9d4SSatish Balay     row  = im[k];
13699371c9d4SSatish Balay     brow = row / 4;
1370af674e45SBarry Smith     rp   = aj + ai[brow];
1371af674e45SBarry Smith     ap   = aa + 16 * ai[brow];
1372af674e45SBarry Smith     nrow = ailen[brow];
1373af674e45SBarry Smith     low  = 0;
137417ec6a02SBarry Smith     high = nrow;
1375af674e45SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
13769371c9d4SSatish Balay       col   = in[l];
13779371c9d4SSatish Balay       bcol  = col / 4;
13789371c9d4SSatish Balay       ridx  = row % 4;
13799371c9d4SSatish Balay       cidx  = col % 4;
1380af674e45SBarry Smith       value = v[l + k * n];
1381db4deed7SKarl Rupp       if (col <= lastcol) low = 0;
1382db4deed7SKarl Rupp       else high = nrow;
138317ec6a02SBarry Smith       lastcol = col;
1384af674e45SBarry Smith       while (high - low > 7) {
1385af674e45SBarry Smith         t = (low + high) / 2;
1386af674e45SBarry Smith         if (rp[t] > bcol) high = t;
1387af674e45SBarry Smith         else low = t;
1388af674e45SBarry Smith       }
1389af674e45SBarry Smith       for (i = low; i < high; i++) {
1390af674e45SBarry Smith         if (rp[i] > bcol) break;
1391af674e45SBarry Smith         if (rp[i] == bcol) {
1392af674e45SBarry Smith           bap = ap + 16 * i + 4 * cidx + ridx;
1393af674e45SBarry Smith           *bap += value;
1394af674e45SBarry Smith           goto noinsert1;
1395af674e45SBarry Smith         }
1396af674e45SBarry Smith       }
1397af674e45SBarry Smith       N = nrow++ - 1;
139817ec6a02SBarry Smith       high++; /* added new column thus must search to one higher than before */
1399af674e45SBarry Smith       /* shift up all the later entries in this row */
14009566063dSJacob Faibussowitsch       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
14019566063dSJacob Faibussowitsch       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
14029566063dSJacob Faibussowitsch       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1403af674e45SBarry Smith       rp[i]                        = bcol;
1404af674e45SBarry Smith       ap[16 * i + 4 * cidx + ridx] = value;
1405af674e45SBarry Smith     noinsert1:;
1406af674e45SBarry Smith       low = i;
1407af674e45SBarry Smith     }
1408af674e45SBarry Smith     ailen[brow] = nrow;
1409af674e45SBarry Smith   }
1410be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1411af674e45SBarry Smith }
1412af674e45SBarry Smith 
MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt * nn,const PetscInt * inia[],const PetscInt * inja[],PetscBool * done)1413d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1414d71ae5a4SJacob Faibussowitsch {
14153b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14161a83f524SJed Brown   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
14171a83f524SJed Brown   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
14183b2fbd54SBarry Smith 
14193a40ed3dSBarry Smith   PetscFunctionBegin;
14203b2fbd54SBarry Smith   *nn = n;
14213ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
14223b2fbd54SBarry Smith   if (symmetric) {
14239566063dSJacob Faibussowitsch     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1424553b3c51SBarry Smith     nz = tia[n];
14253b2fbd54SBarry Smith   } else {
14269371c9d4SSatish Balay     tia = a->i;
14279371c9d4SSatish Balay     tja = a->j;
14283b2fbd54SBarry Smith   }
14293b2fbd54SBarry Smith 
1430ecc77c7aSBarry Smith   if (!blockcompressed && bs > 1) {
1431ecc77c7aSBarry Smith     (*nn) *= bs;
14328f7157efSSatish Balay     /* malloc & create the natural set of indices */
14339566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1((n + 1) * bs, ia));
14349985e31cSBarry Smith     if (n) {
14352462f5fdSStefano Zampini       (*ia)[0] = oshift;
1436ad540459SPierre Jolivet       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
14379985e31cSBarry Smith     }
1438ecc77c7aSBarry Smith 
1439ecc77c7aSBarry Smith     for (i = 1; i < n; i++) {
1440ecc77c7aSBarry Smith       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1441ad540459SPierre Jolivet       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
14428f7157efSSatish Balay     }
1443ad540459SPierre Jolivet     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1444ecc77c7aSBarry Smith 
14451a83f524SJed Brown     if (inja) {
14469566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nz * bs * bs, ja));
14479985e31cSBarry Smith       cnt = 0;
14489985e31cSBarry Smith       for (i = 0; i < n; i++) {
14499985e31cSBarry Smith         for (j = 0; j < bs; j++) {
14509985e31cSBarry Smith           for (k = tia[i]; k < tia[i + 1]; k++) {
1451ad540459SPierre Jolivet             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
14529985e31cSBarry Smith           }
14539985e31cSBarry Smith         }
14549985e31cSBarry Smith       }
14559985e31cSBarry Smith     }
14569985e31cSBarry Smith 
14578f7157efSSatish Balay     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
14589566063dSJacob Faibussowitsch       PetscCall(PetscFree(tia));
14599566063dSJacob Faibussowitsch       PetscCall(PetscFree(tja));
14608f7157efSSatish Balay     }
1461f6d58c54SBarry Smith   } else if (oshift == 1) {
1462715a17b5SBarry Smith     if (symmetric) {
1463a2ea699eSBarry Smith       nz = tia[A->rmap->n / bs];
1464715a17b5SBarry Smith       /*  add 1 to i and j indices */
1465715a17b5SBarry Smith       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1466715a17b5SBarry Smith       *ia = tia;
1467715a17b5SBarry Smith       if (ja) {
1468715a17b5SBarry Smith         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1469715a17b5SBarry Smith         *ja = tja;
1470715a17b5SBarry Smith       }
1471715a17b5SBarry Smith     } else {
1472a2ea699eSBarry Smith       nz = a->i[A->rmap->n / bs];
1473f6d58c54SBarry Smith       /* malloc space and  add 1 to i and j indices */
14749566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1475f6d58c54SBarry Smith       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1476f6d58c54SBarry Smith       if (ja) {
14779566063dSJacob Faibussowitsch         PetscCall(PetscMalloc1(nz, ja));
1478f6d58c54SBarry Smith         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1479f6d58c54SBarry Smith       }
1480715a17b5SBarry Smith     }
14818f7157efSSatish Balay   } else {
14828f7157efSSatish Balay     *ia = tia;
1483ecc77c7aSBarry Smith     if (ja) *ja = tja;
14848f7157efSSatish Balay   }
14853ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14863b2fbd54SBarry Smith }
14873b2fbd54SBarry Smith 
MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt * nn,const PetscInt * ia[],const PetscInt * ja[],PetscBool * done)1488d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1489d71ae5a4SJacob Faibussowitsch {
14903a40ed3dSBarry Smith   PetscFunctionBegin;
14913ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1492715a17b5SBarry Smith   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
14939566063dSJacob Faibussowitsch     PetscCall(PetscFree(*ia));
14949566063dSJacob Faibussowitsch     if (ja) PetscCall(PetscFree(*ja));
14953b2fbd54SBarry Smith   }
14963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14973b2fbd54SBarry Smith }
14983b2fbd54SBarry Smith 
MatDestroy_SeqBAIJ(Mat A)1499d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1500d71ae5a4SJacob Faibussowitsch {
15012d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
15022d61bbb3SSatish Balay 
1503433994e6SBarry Smith   PetscFunctionBegin;
1504b4e2f619SBarry Smith   if (A->hash_active) {
1505b4e2f619SBarry Smith     PetscInt bs;
1506e3c72094SPierre Jolivet     A->ops[0] = a->cops;
1507b4e2f619SBarry Smith     PetscCall(PetscHMapIJVDestroy(&a->ht));
1508b4e2f619SBarry Smith     PetscCall(MatGetBlockSize(A, &bs));
1509b4e2f619SBarry Smith     if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1510b4e2f619SBarry Smith     PetscCall(PetscFree(a->dnz));
1511b4e2f619SBarry Smith     PetscCall(PetscFree(a->bdnz));
1512b4e2f619SBarry Smith     A->hash_active = PETSC_FALSE;
1513b4e2f619SBarry Smith   }
15143ba16761SJacob Faibussowitsch   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
15159566063dSJacob Faibussowitsch   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
15169566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->row));
15179566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->col));
1518*421480d9SBarry Smith   PetscCall(PetscFree(a->diag));
15199566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->idiag));
15209566063dSJacob Faibussowitsch   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
15219566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->solve_work));
15229566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->mult_work));
15239566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->sor_workt));
15249566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->sor_work));
15259566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->icol));
15269566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->saved_values));
15279566063dSJacob Faibussowitsch   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1528c4319e64SHong Zhang 
15299566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&a->sbaijMat));
15309566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&a->parent));
15319566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1532901853e0SKris Buschelman 
15339566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
15349566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
15359566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
15369566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
15379566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
15389566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
15399566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
15409566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
15419566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
15429566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
15439566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
15449566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
15457ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
15469566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
15477ea3e4caSstefano_zampini #endif
15489566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
15492e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
15503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15512d61bbb3SSatish Balay }
15522d61bbb3SSatish Balay 
MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)155366976f2fSJacob Faibussowitsch static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1554d71ae5a4SJacob Faibussowitsch {
15552d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
15562d61bbb3SSatish Balay 
15572d61bbb3SSatish Balay   PetscFunctionBegin;
1558aa275fccSKris Buschelman   switch (op) {
1559d71ae5a4SJacob Faibussowitsch   case MAT_ROW_ORIENTED:
1560d71ae5a4SJacob Faibussowitsch     a->roworiented = flg;
1561d71ae5a4SJacob Faibussowitsch     break;
1562d71ae5a4SJacob Faibussowitsch   case MAT_KEEP_NONZERO_PATTERN:
1563d71ae5a4SJacob Faibussowitsch     a->keepnonzeropattern = flg;
1564d71ae5a4SJacob Faibussowitsch     break;
1565d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_LOCATIONS:
1566d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? 0 : 1);
1567d71ae5a4SJacob Faibussowitsch     break;
1568d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_LOCATION_ERR:
1569d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? -1 : 0);
1570d71ae5a4SJacob Faibussowitsch     break;
1571d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1572d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? -2 : 0);
1573d71ae5a4SJacob Faibussowitsch     break;
1574d71ae5a4SJacob Faibussowitsch   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1575d71ae5a4SJacob Faibussowitsch     a->nounused = (flg ? -1 : 0);
1576d71ae5a4SJacob Faibussowitsch     break;
1577d71ae5a4SJacob Faibussowitsch   default:
1578888c827cSStefano Zampini     break;
15792d61bbb3SSatish Balay   }
15803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15812d61bbb3SSatish Balay }
15822d61bbb3SSatish Balay 
158352768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */
MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v,PetscInt * ai,PetscInt * aj,PetscScalar * aa)1584d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1585d71ae5a4SJacob Faibussowitsch {
158652768537SHong Zhang   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
158752768537SHong Zhang   MatScalar   *aa_i;
158887828ca2SBarry Smith   PetscScalar *v_i;
15892d61bbb3SSatish Balay 
15902d61bbb3SSatish Balay   PetscFunctionBegin;
1591d0f46423SBarry Smith   bs  = A->rmap->bs;
159252768537SHong Zhang   bs2 = bs * bs;
15935f80ce2aSJacob Faibussowitsch   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
15942d61bbb3SSatish Balay 
15952d61bbb3SSatish Balay   bn  = row / bs; /* Block number */
15962d61bbb3SSatish Balay   bp  = row % bs; /* Block Position */
15972d61bbb3SSatish Balay   M   = ai[bn + 1] - ai[bn];
15982d61bbb3SSatish Balay   *nz = bs * M;
15992d61bbb3SSatish Balay 
16002d61bbb3SSatish Balay   if (v) {
1601f4259b30SLisandro Dalcin     *v = NULL;
16022d61bbb3SSatish Balay     if (*nz) {
16039566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(*nz, v));
16042d61bbb3SSatish Balay       for (i = 0; i < M; i++) { /* for each block in the block row */
16052d61bbb3SSatish Balay         v_i  = *v + i * bs;
16062d61bbb3SSatish Balay         aa_i = aa + bs2 * (ai[bn] + i);
160726fbe8dcSKarl Rupp         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
16082d61bbb3SSatish Balay       }
16092d61bbb3SSatish Balay     }
16102d61bbb3SSatish Balay   }
16112d61bbb3SSatish Balay 
16122d61bbb3SSatish Balay   if (idx) {
1613f4259b30SLisandro Dalcin     *idx = NULL;
16142d61bbb3SSatish Balay     if (*nz) {
16159566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(*nz, idx));
16162d61bbb3SSatish Balay       for (i = 0; i < M; i++) { /* for each block in the block row */
16172d61bbb3SSatish Balay         idx_i = *idx + i * bs;
16182d61bbb3SSatish Balay         itmp  = bs * aj[ai[bn] + i];
161926fbe8dcSKarl Rupp         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
16202d61bbb3SSatish Balay       }
16212d61bbb3SSatish Balay     }
16222d61bbb3SSatish Balay   }
16233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16242d61bbb3SSatish Balay }
16252d61bbb3SSatish Balay 
MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1626d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1627d71ae5a4SJacob Faibussowitsch {
162852768537SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
162952768537SHong Zhang 
163052768537SHong Zhang   PetscFunctionBegin;
16319566063dSJacob Faibussowitsch   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
16323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
163352768537SHong Zhang }
163452768537SHong Zhang 
MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1635d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1636d71ae5a4SJacob Faibussowitsch {
16372d61bbb3SSatish Balay   PetscFunctionBegin;
16389566063dSJacob Faibussowitsch   if (idx) PetscCall(PetscFree(*idx));
16399566063dSJacob Faibussowitsch   if (v) PetscCall(PetscFree(*v));
16403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16412d61bbb3SSatish Balay }
16422d61bbb3SSatish Balay 
MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat * B)164366976f2fSJacob Faibussowitsch static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1644d71ae5a4SJacob Faibussowitsch {
164520e84f26SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
16462d61bbb3SSatish Balay   Mat          C;
164720e84f26SHong Zhang   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
164820e84f26SHong Zhang   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
164920e84f26SHong Zhang   MatScalar   *ata, *aa = a->a;
16502d61bbb3SSatish Balay 
16512d61bbb3SSatish Balay   PetscFunctionBegin;
16527fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
16539566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1654cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
165520e84f26SHong Zhang     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
16562d61bbb3SSatish Balay 
16579566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
16589566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
16599566063dSJacob Faibussowitsch     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
16609566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
166120e84f26SHong Zhang 
166220e84f26SHong Zhang     at  = (Mat_SeqBAIJ *)C->data;
166320e84f26SHong Zhang     ati = at->i;
166420e84f26SHong Zhang     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1665fc4dec0aSBarry Smith   } else {
1666fc4dec0aSBarry Smith     C   = *B;
166720e84f26SHong Zhang     at  = (Mat_SeqBAIJ *)C->data;
166820e84f26SHong Zhang     ati = at->i;
1669fc4dec0aSBarry Smith   }
1670fc4dec0aSBarry Smith 
167120e84f26SHong Zhang   atj = at->j;
167220e84f26SHong Zhang   ata = at->a;
167320e84f26SHong Zhang 
167420e84f26SHong Zhang   /* Copy ati into atfill so we have locations of the next free space in atj */
16759566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(atfill, ati, nbs));
167620e84f26SHong Zhang 
167720e84f26SHong Zhang   /* Walk through A row-wise and mark nonzero entries of A^T. */
16782d61bbb3SSatish Balay   for (i = 0; i < mbs; i++) {
167920e84f26SHong Zhang     anzj = ai[i + 1] - ai[i];
168020e84f26SHong Zhang     for (j = 0; j < anzj; j++) {
168120e84f26SHong Zhang       atj[atfill[*aj]] = i;
168220e84f26SHong Zhang       for (kr = 0; kr < bs; kr++) {
1683ad540459SPierre Jolivet         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
16842d61bbb3SSatish Balay       }
168520e84f26SHong Zhang       atfill[*aj++] += 1;
168620e84f26SHong Zhang     }
168720e84f26SHong Zhang   }
16889566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
16899566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
16902d61bbb3SSatish Balay 
169120e84f26SHong Zhang   /* Clean up temporary space and complete requests. */
16929566063dSJacob Faibussowitsch   PetscCall(PetscFree(atfill));
169320e84f26SHong Zhang 
1694cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
169558b7e2c1SStefano Zampini     PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs));
16962d61bbb3SSatish Balay     *B = C;
16972d61bbb3SSatish Balay   } else {
16989566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(A, &C));
16992d61bbb3SSatish Balay   }
17003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17012d61bbb3SSatish Balay }
17022d61bbb3SSatish Balay 
MatCompare_SeqBAIJ_Private(Mat A,Mat B,PetscReal tol,PetscBool * flg)170328636b0cSPierre Jolivet static PetscErrorCode MatCompare_SeqBAIJ_Private(Mat A, Mat B, PetscReal tol, PetscBool *flg)
170428636b0cSPierre Jolivet {
170528636b0cSPierre Jolivet   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
170628636b0cSPierre Jolivet 
170728636b0cSPierre Jolivet   PetscFunctionBegin;
170828636b0cSPierre Jolivet   /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */
170928636b0cSPierre Jolivet   if (A->rmap->N != B->rmap->N || A->cmap->n != B->cmap->n || A->rmap->bs != B->rmap->bs || a->nz != b->nz) {
171028636b0cSPierre Jolivet     *flg = PETSC_FALSE;
171128636b0cSPierre Jolivet     PetscFunctionReturn(PETSC_SUCCESS);
171228636b0cSPierre Jolivet   }
171328636b0cSPierre Jolivet 
171428636b0cSPierre Jolivet   /* if the a->i are the same */
171528636b0cSPierre Jolivet   PetscCall(PetscArraycmp(a->i, b->i, a->mbs + 1, flg));
171628636b0cSPierre Jolivet   if (!*flg) PetscFunctionReturn(PETSC_SUCCESS);
171728636b0cSPierre Jolivet 
171828636b0cSPierre Jolivet   /* if a->j are the same */
171928636b0cSPierre Jolivet   PetscCall(PetscArraycmp(a->j, b->j, a->nz, flg));
172028636b0cSPierre Jolivet   if (!*flg) PetscFunctionReturn(PETSC_SUCCESS);
172128636b0cSPierre Jolivet 
172228636b0cSPierre Jolivet   if (tol == 0.0) PetscCall(PetscArraycmp(a->a, b->a, a->nz * A->rmap->bs * A->rmap->bs, flg)); /* if a->a are the same */
172328636b0cSPierre Jolivet   else {
172428636b0cSPierre Jolivet     *flg = PETSC_TRUE;
172528636b0cSPierre Jolivet     for (PetscInt i = 0; (i < a->nz * A->rmap->bs * A->rmap->bs) && *flg; ++i)
172628636b0cSPierre Jolivet       if (PetscAbsScalar(a->a[i] - b->a[i]) > tol) *flg = PETSC_FALSE;
172728636b0cSPierre Jolivet   }
172828636b0cSPierre Jolivet   PetscFunctionReturn(PETSC_SUCCESS);
172928636b0cSPierre Jolivet }
173028636b0cSPierre Jolivet 
MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool * f)1731ff6a9541SJacob Faibussowitsch static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1732d71ae5a4SJacob Faibussowitsch {
1733453d3561SHong Zhang   Mat Btrans;
1734453d3561SHong Zhang 
1735453d3561SHong Zhang   PetscFunctionBegin;
1736acd337a6SBarry Smith   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
173728636b0cSPierre Jolivet   PetscCall(MatCompare_SeqBAIJ_Private(A, Btrans, tol, f));
17389566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Btrans));
17393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1740453d3561SHong Zhang }
1741453d3561SHong Zhang 
MatEqual_SeqBAIJ(Mat A,Mat B,PetscBool * flg)174228636b0cSPierre Jolivet static PetscErrorCode MatEqual_SeqBAIJ(Mat A, Mat B, PetscBool *flg)
174328636b0cSPierre Jolivet {
174428636b0cSPierre Jolivet   PetscFunctionBegin;
174528636b0cSPierre Jolivet   PetscCall(MatCompare_SeqBAIJ_Private(A, B, 0.0, flg));
174628636b0cSPierre Jolivet   PetscFunctionReturn(PETSC_SUCCESS);
174728636b0cSPierre Jolivet }
174828636b0cSPierre Jolivet 
1749618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)1750d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1751d71ae5a4SJacob Faibussowitsch {
1752b51a4376SLisandro Dalcin   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1753b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1754b51a4376SLisandro Dalcin   PetscInt    *rowlens, *colidxs;
1755b51a4376SLisandro Dalcin   PetscScalar *matvals;
17562593348eSBarry Smith 
17573a40ed3dSBarry Smith   PetscFunctionBegin;
17589566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
17593b2fbd54SBarry Smith 
1760b51a4376SLisandro Dalcin   M  = mat->rmap->N;
1761b51a4376SLisandro Dalcin   N  = mat->cmap->N;
1762b51a4376SLisandro Dalcin   m  = mat->rmap->n;
1763b51a4376SLisandro Dalcin   bs = mat->rmap->bs;
1764b51a4376SLisandro Dalcin   nz = bs * bs * A->nz;
17652593348eSBarry Smith 
1766b51a4376SLisandro Dalcin   /* write matrix header */
1767b51a4376SLisandro Dalcin   header[0] = MAT_FILE_CLASSID;
17689371c9d4SSatish Balay   header[1] = M;
17699371c9d4SSatish Balay   header[2] = N;
17709371c9d4SSatish Balay   header[3] = nz;
17719566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
17722593348eSBarry Smith 
1773b51a4376SLisandro Dalcin   /* store row lengths */
17749566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &rowlens));
1775b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
17769371c9d4SSatish Balay     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
17779566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
17789566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowlens));
1779b51a4376SLisandro Dalcin 
1780b51a4376SLisandro Dalcin   /* store column indices  */
17819566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &colidxs));
1782b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
1783b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++)
1784b51a4376SLisandro Dalcin       for (j = A->i[i]; j < A->i[i + 1]; j++)
17859371c9d4SSatish Balay         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
17865f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
17879566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
17889566063dSJacob Faibussowitsch   PetscCall(PetscFree(colidxs));
17892593348eSBarry Smith 
17902593348eSBarry Smith   /* store nonzero values */
17919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &matvals));
1792b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
1793b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++)
1794b51a4376SLisandro Dalcin       for (j = A->i[i]; j < A->i[i + 1]; j++)
17959371c9d4SSatish Balay         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
17965f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
17979566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
17989566063dSJacob Faibussowitsch   PetscCall(PetscFree(matvals));
1799ce6f0cecSBarry Smith 
1800b51a4376SLisandro Dalcin   /* write block size option to the viewer's .info file */
18019566063dSJacob Faibussowitsch   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
18023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
18032593348eSBarry Smith }
18042593348eSBarry Smith 
MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)1805d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1806d71ae5a4SJacob Faibussowitsch {
18077dc0baabSHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
18087dc0baabSHong Zhang   PetscInt     i, bs = A->rmap->bs, k;
18097dc0baabSHong Zhang 
18107dc0baabSHong Zhang   PetscFunctionBegin;
18119566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
18127dc0baabSHong Zhang   for (i = 0; i < a->mbs; i++) {
18139566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
181448a46eb9SPierre Jolivet     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
18159566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
18167dc0baabSHong Zhang   }
18179566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
18183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
18197dc0baabSHong Zhang }
18207dc0baabSHong Zhang 
MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)1821d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1822d71ae5a4SJacob Faibussowitsch {
1823b6490206SBarry Smith   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1824d0f46423SBarry Smith   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1825f3ef73ceSBarry Smith   PetscViewerFormat format;
18262593348eSBarry Smith 
18273a40ed3dSBarry Smith   PetscFunctionBegin;
18287dc0baabSHong Zhang   if (A->structure_only) {
18299566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
18303ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
18317dc0baabSHong Zhang   }
18327dc0baabSHong Zhang 
18339566063dSJacob Faibussowitsch   PetscCall(PetscViewerGetFormat(viewer, &format));
1834456192e2SBarry Smith   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1835fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1836ade3a672SBarry Smith     const char *matname;
1837bcd9e38bSBarry Smith     Mat         aij;
18389566063dSJacob Faibussowitsch     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
18399566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
18409566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
18419566063dSJacob Faibussowitsch     PetscCall(MatView(aij, viewer));
18429566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&aij));
184304929863SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
18443ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
1845fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
18469566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
184744cd7ae7SLois Curfman McInnes     for (i = 0; i < a->mbs; i++) {
184844cd7ae7SLois Curfman McInnes       for (j = 0; j < bs; j++) {
18499566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
185044cd7ae7SLois Curfman McInnes         for (k = a->i[i]; k < a->i[i + 1]; k++) {
185144cd7ae7SLois Curfman McInnes           for (l = 0; l < bs; l++) {
1852aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
18530e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18549371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18550e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18569371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18570e6d2581SBarry Smith             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18589566063dSJacob Faibussowitsch               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
18590ef38995SBarry Smith             }
186044cd7ae7SLois Curfman McInnes #else
186148a46eb9SPierre Jolivet             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
186244cd7ae7SLois Curfman McInnes #endif
186344cd7ae7SLois Curfman McInnes           }
186444cd7ae7SLois Curfman McInnes         }
18659566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
186644cd7ae7SLois Curfman McInnes       }
186744cd7ae7SLois Curfman McInnes     }
18689566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
18690ef38995SBarry Smith   } else {
18709566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1871b6490206SBarry Smith     for (i = 0; i < a->mbs; i++) {
1872b6490206SBarry Smith       for (j = 0; j < bs; j++) {
18739566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1874b6490206SBarry Smith         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1875b6490206SBarry Smith           for (l = 0; l < bs; l++) {
1876aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
18770e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
18789371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18790e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
18809371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18810ef38995SBarry Smith             } else {
18829566063dSJacob Faibussowitsch               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
188388685aaeSLois Curfman McInnes             }
188488685aaeSLois Curfman McInnes #else
18859566063dSJacob Faibussowitsch             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
188688685aaeSLois Curfman McInnes #endif
18872593348eSBarry Smith           }
18882593348eSBarry Smith         }
18899566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
18902593348eSBarry Smith       }
18912593348eSBarry Smith     }
18929566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1893b6490206SBarry Smith   }
18949566063dSJacob Faibussowitsch   PetscCall(PetscViewerFlush(viewer));
18953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
18962593348eSBarry Smith }
18972593348eSBarry Smith 
18989804daf3SBarry Smith #include <petscdraw.h>
MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void * Aa)1899d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1900d71ae5a4SJacob Faibussowitsch {
190177ed5343SBarry Smith   Mat               A = (Mat)Aa;
19023270192aSSatish Balay   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
19036497c311SBarry Smith   PetscInt          row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
19040e6d2581SBarry Smith   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
19053f1db9ecSBarry Smith   MatScalar        *aa;
1906b0a32e0cSBarry Smith   PetscViewer       viewer;
1907b3e7f47fSJed Brown   PetscViewerFormat format;
19086497c311SBarry Smith   int               color;
19093270192aSSatish Balay 
19103a40ed3dSBarry Smith   PetscFunctionBegin;
19119566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
19129566063dSJacob Faibussowitsch   PetscCall(PetscViewerGetFormat(viewer, &format));
19139566063dSJacob Faibussowitsch   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
191477ed5343SBarry Smith 
19153270192aSSatish Balay   /* loop over matrix elements drawing boxes */
1916b3e7f47fSJed Brown 
1917b3e7f47fSJed Brown   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1918d0609cedSBarry Smith     PetscDrawCollectiveBegin(draw);
1919383922c3SLisandro Dalcin     /* Blue for negative, Cyan for zero and  Red for positive */
1920b0a32e0cSBarry Smith     color = PETSC_DRAW_BLUE;
19213270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19223270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19239371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19249371c9d4SSatish Balay         y_r = y_l + 1.0;
19259371c9d4SSatish Balay         x_l = a->j[j] * bs;
19269371c9d4SSatish Balay         x_r = x_l + 1.0;
19273270192aSSatish Balay         aa  = a->a + j * bs2;
19283270192aSSatish Balay         for (k = 0; k < bs; k++) {
19293270192aSSatish Balay           for (l = 0; l < bs; l++) {
19300e6d2581SBarry Smith             if (PetscRealPart(*aa++) >= 0.) continue;
19319566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19323270192aSSatish Balay           }
19333270192aSSatish Balay         }
19343270192aSSatish Balay       }
19353270192aSSatish Balay     }
1936b0a32e0cSBarry Smith     color = PETSC_DRAW_CYAN;
19373270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19383270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19399371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19409371c9d4SSatish Balay         y_r = y_l + 1.0;
19419371c9d4SSatish Balay         x_l = a->j[j] * bs;
19429371c9d4SSatish Balay         x_r = x_l + 1.0;
19433270192aSSatish Balay         aa  = a->a + j * bs2;
19443270192aSSatish Balay         for (k = 0; k < bs; k++) {
19453270192aSSatish Balay           for (l = 0; l < bs; l++) {
19460e6d2581SBarry Smith             if (PetscRealPart(*aa++) != 0.) continue;
19479566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19483270192aSSatish Balay           }
19493270192aSSatish Balay         }
19503270192aSSatish Balay       }
19513270192aSSatish Balay     }
1952b0a32e0cSBarry Smith     color = PETSC_DRAW_RED;
19533270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19543270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19559371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19569371c9d4SSatish Balay         y_r = y_l + 1.0;
19579371c9d4SSatish Balay         x_l = a->j[j] * bs;
19589371c9d4SSatish Balay         x_r = x_l + 1.0;
19593270192aSSatish Balay         aa  = a->a + j * bs2;
19603270192aSSatish Balay         for (k = 0; k < bs; k++) {
19613270192aSSatish Balay           for (l = 0; l < bs; l++) {
19620e6d2581SBarry Smith             if (PetscRealPart(*aa++) <= 0.) continue;
19639566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19643270192aSSatish Balay           }
19653270192aSSatish Balay         }
19663270192aSSatish Balay       }
19673270192aSSatish Balay     }
1968d0609cedSBarry Smith     PetscDrawCollectiveEnd(draw);
1969b3e7f47fSJed Brown   } else {
1970b3e7f47fSJed Brown     /* use contour shading to indicate magnitude of values */
1971b3e7f47fSJed Brown     /* first determine max of all nonzero values */
1972b05fc000SLisandro Dalcin     PetscReal minv = 0.0, maxv = 0.0;
1973b3e7f47fSJed Brown     PetscDraw popup;
1974b3e7f47fSJed Brown 
1975b3e7f47fSJed Brown     for (i = 0; i < a->nz * a->bs2; i++) {
1976b3e7f47fSJed Brown       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1977b3e7f47fSJed Brown     }
1978383922c3SLisandro Dalcin     if (minv >= maxv) maxv = minv + PETSC_SMALL;
19799566063dSJacob Faibussowitsch     PetscCall(PetscDrawGetPopup(draw, &popup));
19809566063dSJacob Faibussowitsch     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
1981383922c3SLisandro Dalcin 
1982d0609cedSBarry Smith     PetscDrawCollectiveBegin(draw);
1983b3e7f47fSJed Brown     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1984b3e7f47fSJed Brown       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19859371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19869371c9d4SSatish Balay         y_r = y_l + 1.0;
19879371c9d4SSatish Balay         x_l = a->j[j] * bs;
19889371c9d4SSatish Balay         x_r = x_l + 1.0;
1989b3e7f47fSJed Brown         aa  = a->a + j * bs2;
1990b3e7f47fSJed Brown         for (k = 0; k < bs; k++) {
1991b3e7f47fSJed Brown           for (l = 0; l < bs; l++) {
1992383922c3SLisandro Dalcin             MatScalar v = *aa++;
1993383922c3SLisandro Dalcin             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
19949566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1995b3e7f47fSJed Brown           }
1996b3e7f47fSJed Brown         }
1997b3e7f47fSJed Brown       }
1998b3e7f47fSJed Brown     }
1999d0609cedSBarry Smith     PetscDrawCollectiveEnd(draw);
2000b3e7f47fSJed Brown   }
20013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
200277ed5343SBarry Smith }
20033270192aSSatish Balay 
MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)2004d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2005d71ae5a4SJacob Faibussowitsch {
20060e6d2581SBarry Smith   PetscReal xl, yl, xr, yr, w, h;
2007b0a32e0cSBarry Smith   PetscDraw draw;
2008ace3abfcSBarry Smith   PetscBool isnull;
20093270192aSSatish Balay 
201077ed5343SBarry Smith   PetscFunctionBegin;
20119566063dSJacob Faibussowitsch   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
20129566063dSJacob Faibussowitsch   PetscCall(PetscDrawIsNull(draw, &isnull));
20133ba16761SJacob Faibussowitsch   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
201477ed5343SBarry Smith 
20159371c9d4SSatish Balay   xr = A->cmap->n;
20169371c9d4SSatish Balay   yr = A->rmap->N;
20179371c9d4SSatish Balay   h  = yr / 10.0;
20189371c9d4SSatish Balay   w  = xr / 10.0;
20199371c9d4SSatish Balay   xr += w;
20209371c9d4SSatish Balay   yr += h;
20219371c9d4SSatish Balay   xl = -w;
20229371c9d4SSatish Balay   yl = -h;
20239566063dSJacob Faibussowitsch   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
20249566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
20259566063dSJacob Faibussowitsch   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
20269566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
20279566063dSJacob Faibussowitsch   PetscCall(PetscDrawSave(draw));
20283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20293270192aSSatish Balay }
20303270192aSSatish Balay 
MatView_SeqBAIJ(Mat A,PetscViewer viewer)2031d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2032d71ae5a4SJacob Faibussowitsch {
20339f196a02SMartin Diehl   PetscBool isascii, isbinary, isdraw;
20342593348eSBarry Smith 
20353a40ed3dSBarry Smith   PetscFunctionBegin;
20369f196a02SMartin Diehl   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
20379566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
20389566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
20399f196a02SMartin Diehl   if (isascii) {
20409566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
20410f5bd95cSBarry Smith   } else if (isbinary) {
20429566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
20430f5bd95cSBarry Smith   } else if (isdraw) {
20449566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
20455cd90555SBarry Smith   } else {
2046a5e6ed63SBarry Smith     Mat B;
20479566063dSJacob Faibussowitsch     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
20489566063dSJacob Faibussowitsch     PetscCall(MatView(B, viewer));
20499566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&B));
20502593348eSBarry Smith   }
20513ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20522593348eSBarry Smith }
2053b6490206SBarry Smith 
MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])2054d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2055d71ae5a4SJacob Faibussowitsch {
2056cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2057c1ac3661SBarry Smith   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2058c1ac3661SBarry Smith   PetscInt    *ai = a->i, *ailen = a->ilen;
2059d0f46423SBarry Smith   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
206097e567efSBarry Smith   MatScalar   *ap, *aa = a->a;
2061cd0e1443SSatish Balay 
20623a40ed3dSBarry Smith   PetscFunctionBegin;
20632d61bbb3SSatish Balay   for (k = 0; k < m; k++) { /* loop over rows */
20649371c9d4SSatish Balay     row  = im[k];
20659371c9d4SSatish Balay     brow = row / bs;
20669371c9d4SSatish Balay     if (row < 0) {
20679371c9d4SSatish Balay       v += n;
20689371c9d4SSatish Balay       continue;
20699371c9d4SSatish Balay     } /* negative row */
207054c59aa7SJacob Faibussowitsch     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
20718e3a54c0SPierre Jolivet     rp   = PetscSafePointerPlusOffset(aj, ai[brow]);
20728e3a54c0SPierre Jolivet     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
20732c3acbe9SBarry Smith     nrow = ailen[brow];
20742d61bbb3SSatish Balay     for (l = 0; l < n; l++) { /* loop over columns */
20759371c9d4SSatish Balay       if (in[l] < 0) {
20769371c9d4SSatish Balay         v++;
20779371c9d4SSatish Balay         continue;
20789371c9d4SSatish Balay       } /* negative column */
207954c59aa7SJacob Faibussowitsch       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
20802d61bbb3SSatish Balay       col  = in[l];
20812d61bbb3SSatish Balay       bcol = col / bs;
20822d61bbb3SSatish Balay       cidx = col % bs;
20832d61bbb3SSatish Balay       ridx = row % bs;
20842d61bbb3SSatish Balay       high = nrow;
20852d61bbb3SSatish Balay       low  = 0; /* assume unsorted */
20862d61bbb3SSatish Balay       while (high - low > 5) {
2087cd0e1443SSatish Balay         t = (low + high) / 2;
2088cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
2089cd0e1443SSatish Balay         else low = t;
2090cd0e1443SSatish Balay       }
2091cd0e1443SSatish Balay       for (i = low; i < high; i++) {
2092cd0e1443SSatish Balay         if (rp[i] > bcol) break;
2093cd0e1443SSatish Balay         if (rp[i] == bcol) {
20942d61bbb3SSatish Balay           *v++ = ap[bs2 * i + bs * cidx + ridx];
20952d61bbb3SSatish Balay           goto finished;
2096cd0e1443SSatish Balay         }
2097cd0e1443SSatish Balay       }
209897e567efSBarry Smith       *v++ = 0.0;
20992d61bbb3SSatish Balay     finished:;
2100cd0e1443SSatish Balay     }
2101cd0e1443SSatish Balay   }
21023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2103cd0e1443SSatish Balay }
2104cd0e1443SSatish Balay 
MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)2105d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2106d71ae5a4SJacob Faibussowitsch {
210792c4ed94SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2108e2ee6c50SBarry Smith   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2109c1ac3661SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2110d0f46423SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2111ace3abfcSBarry Smith   PetscBool          roworiented = a->roworiented;
2112dd6ea824SBarry Smith   const PetscScalar *value       = v;
21139d243f67SHong Zhang   MatScalar         *ap = NULL, *aa = a->a, *bap;
211492c4ed94SBarry Smith 
21153a40ed3dSBarry Smith   PetscFunctionBegin;
21160e324ae4SSatish Balay   if (roworiented) {
21170e324ae4SSatish Balay     stepval = (n - 1) * bs;
21180e324ae4SSatish Balay   } else {
21190e324ae4SSatish Balay     stepval = (m - 1) * bs;
21200e324ae4SSatish Balay   }
212192c4ed94SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
212292c4ed94SBarry Smith     row = im[k];
21235ef9f2a5SBarry Smith     if (row < 0) continue;
21246bdcaf15SBarry Smith     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
212592c4ed94SBarry Smith     rp = aj + ai[row];
21267dc0baabSHong Zhang     if (!A->structure_only) ap = aa + bs2 * ai[row];
212792c4ed94SBarry Smith     rmax = imax[row];
212892c4ed94SBarry Smith     nrow = ailen[row];
212992c4ed94SBarry Smith     low  = 0;
2130c71e6ed7SBarry Smith     high = nrow;
213192c4ed94SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
21325ef9f2a5SBarry Smith       if (in[l] < 0) continue;
21336bdcaf15SBarry Smith       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
213492c4ed94SBarry Smith       col = in[l];
21357dc0baabSHong Zhang       if (!A->structure_only) {
213692c4ed94SBarry Smith         if (roworiented) {
213753ef36baSBarry Smith           value = v + (k * (stepval + bs) + l) * bs;
21380e324ae4SSatish Balay         } else {
213953ef36baSBarry Smith           value = v + (l * (stepval + bs) + k) * bs;
214092c4ed94SBarry Smith         }
21417dc0baabSHong Zhang       }
214226fbe8dcSKarl Rupp       if (col <= lastcol) low = 0;
214326fbe8dcSKarl Rupp       else high = nrow;
2144e2ee6c50SBarry Smith       lastcol = col;
214592c4ed94SBarry Smith       while (high - low > 7) {
214692c4ed94SBarry Smith         t = (low + high) / 2;
214792c4ed94SBarry Smith         if (rp[t] > col) high = t;
214892c4ed94SBarry Smith         else low = t;
214992c4ed94SBarry Smith       }
215092c4ed94SBarry Smith       for (i = low; i < high; i++) {
215192c4ed94SBarry Smith         if (rp[i] > col) break;
215292c4ed94SBarry Smith         if (rp[i] == col) {
21537dc0baabSHong Zhang           if (A->structure_only) goto noinsert2;
21548a84c255SSatish Balay           bap = ap + bs2 * i;
21550e324ae4SSatish Balay           if (roworiented) {
21568a84c255SSatish Balay             if (is == ADD_VALUES) {
2157dd9472c6SBarry Smith               for (ii = 0; ii < bs; ii++, value += stepval) {
2158ad540459SPierre Jolivet                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2159dd9472c6SBarry Smith               }
21600e324ae4SSatish Balay             } else {
2161dd9472c6SBarry Smith               for (ii = 0; ii < bs; ii++, value += stepval) {
2162ad540459SPierre Jolivet                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2163dd9472c6SBarry Smith               }
2164dd9472c6SBarry Smith             }
21650e324ae4SSatish Balay           } else {
21660e324ae4SSatish Balay             if (is == ADD_VALUES) {
216753ef36baSBarry Smith               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2168ad540459SPierre Jolivet                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
216953ef36baSBarry Smith                 bap += bs;
2170dd9472c6SBarry Smith               }
21710e324ae4SSatish Balay             } else {
217253ef36baSBarry Smith               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2173ad540459SPierre Jolivet                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
217453ef36baSBarry Smith                 bap += bs;
21758a84c255SSatish Balay               }
2176dd9472c6SBarry Smith             }
2177dd9472c6SBarry Smith           }
2178f1241b54SBarry Smith           goto noinsert2;
217992c4ed94SBarry Smith         }
218092c4ed94SBarry Smith       }
218189280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
21825f80ce2aSJacob Faibussowitsch       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
21837dc0baabSHong Zhang       if (A->structure_only) {
21847dc0baabSHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
21857dc0baabSHong Zhang       } else {
2186fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
21877dc0baabSHong Zhang       }
21889371c9d4SSatish Balay       N = nrow++ - 1;
21899371c9d4SSatish Balay       high++;
219092c4ed94SBarry Smith       /* shift up all the later entries in this row */
21919566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
219292c4ed94SBarry Smith       rp[i] = col;
21937dc0baabSHong Zhang       if (!A->structure_only) {
21949566063dSJacob Faibussowitsch         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
21958a84c255SSatish Balay         bap = ap + bs2 * i;
21960e324ae4SSatish Balay         if (roworiented) {
2197dd9472c6SBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
2198ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2199dd9472c6SBarry Smith           }
22000e324ae4SSatish Balay         } else {
2201dd9472c6SBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
2202ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2203dd9472c6SBarry Smith           }
2204dd9472c6SBarry Smith         }
22057dc0baabSHong Zhang       }
2206f1241b54SBarry Smith     noinsert2:;
220792c4ed94SBarry Smith       low = i;
220892c4ed94SBarry Smith     }
220992c4ed94SBarry Smith     ailen[row] = nrow;
221092c4ed94SBarry Smith   }
22113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
221292c4ed94SBarry Smith }
221326e093fcSHong Zhang 
MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)2214d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2215d71ae5a4SJacob Faibussowitsch {
2216584200bdSSatish Balay   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2217580bdb30SBarry Smith   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2218d0f46423SBarry Smith   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2219c1ac3661SBarry Smith   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
22203f1db9ecSBarry Smith   MatScalar   *aa    = a->a, *ap;
22213447b6efSHong Zhang   PetscReal    ratio = 0.6;
2222584200bdSSatish Balay 
22233a40ed3dSBarry Smith   PetscFunctionBegin;
2224d32568d8SPierre Jolivet   if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2225584200bdSSatish Balay 
222643ee02c3SBarry Smith   if (m) rmax = ailen[0];
2227584200bdSSatish Balay   for (i = 1; i < mbs; i++) {
2228584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
2229584200bdSSatish Balay     fshift += imax[i - 1] - ailen[i - 1];
2230d402145bSBarry Smith     rmax = PetscMax(rmax, ailen[i]);
2231584200bdSSatish Balay     if (fshift) {
2232580bdb30SBarry Smith       ip = aj + ai[i];
2233580bdb30SBarry Smith       ap = aa + bs2 * ai[i];
2234584200bdSSatish Balay       N  = ailen[i];
22359566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(ip - fshift, ip, N));
223648a46eb9SPierre Jolivet       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2237672ba085SHong Zhang     }
2238584200bdSSatish Balay     ai[i] = ai[i - 1] + ailen[i - 1];
2239584200bdSSatish Balay   }
2240584200bdSSatish Balay   if (mbs) {
2241584200bdSSatish Balay     fshift += imax[mbs - 1] - ailen[mbs - 1];
2242584200bdSSatish Balay     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2243584200bdSSatish Balay   }
22447c565772SBarry Smith 
2245584200bdSSatish Balay   /* reset ilen and imax for each row */
22467c565772SBarry Smith   a->nonzerorowcnt = 0;
2247672ba085SHong Zhang   if (A->structure_only) {
22489566063dSJacob Faibussowitsch     PetscCall(PetscFree2(a->imax, a->ilen));
2249672ba085SHong Zhang   } else { /* !A->structure_only */
2250584200bdSSatish Balay     for (i = 0; i < mbs; i++) {
2251584200bdSSatish Balay       ailen[i] = imax[i] = ai[i + 1] - ai[i];
22527c565772SBarry Smith       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2253584200bdSSatish Balay     }
2254672ba085SHong Zhang   }
2255a7c10996SSatish Balay   a->nz = ai[mbs];
2256584200bdSSatish Balay 
2257584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
2258b01c7715SBarry Smith   a->idiagvalid = PETSC_FALSE;
22595f80ce2aSJacob Faibussowitsch   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
22609566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
22619566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
22629566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
226326fbe8dcSKarl Rupp 
22648e58a170SBarry Smith   A->info.mallocs += a->reallocs;
2265e2f3b5e9SSatish Balay   a->reallocs         = 0;
22660e6d2581SBarry Smith   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2267647a6520SHong Zhang   a->rmax             = rmax;
2268cf4441caSHong Zhang 
226948a46eb9SPierre Jolivet   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
22703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2271584200bdSSatish Balay }
2272584200bdSSatish Balay 
2273bea157c4SSatish Balay /*
2274bea157c4SSatish Balay    This function returns an array of flags which indicate the locations of contiguous
2275bea157c4SSatish Balay    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2276a5b23f4aSJose E. Roman    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2277bea157c4SSatish Balay    Assume: sizes should be long enough to hold all the values.
2278bea157c4SSatish Balay */
MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[],PetscInt * bs_max)2279d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2280d71ae5a4SJacob Faibussowitsch {
2281ff6a9541SJacob Faibussowitsch   PetscInt j = 0;
22823a40ed3dSBarry Smith 
2283433994e6SBarry Smith   PetscFunctionBegin;
2284ff6a9541SJacob Faibussowitsch   for (PetscInt i = 0; i < n; j++) {
2285ff6a9541SJacob Faibussowitsch     PetscInt row = idx[i];
2286a5b23f4aSJose E. Roman     if (row % bs != 0) { /* Not the beginning of a block */
2287bea157c4SSatish Balay       sizes[j] = 1;
2288bea157c4SSatish Balay       i++;
2289e4fda26cSSatish Balay     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2290bea157c4SSatish Balay       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2291bea157c4SSatish Balay       i++;
22926aad120cSJose E. Roman     } else { /* Beginning of the block, so check if the complete block exists */
2293ff6a9541SJacob Faibussowitsch       PetscBool flg = PETSC_TRUE;
2294ff6a9541SJacob Faibussowitsch       for (PetscInt k = 1; k < bs; k++) {
2295bea157c4SSatish Balay         if (row + k != idx[i + k]) { /* break in the block */
2296bea157c4SSatish Balay           flg = PETSC_FALSE;
2297bea157c4SSatish Balay           break;
2298d9b7c43dSSatish Balay         }
2299bea157c4SSatish Balay       }
2300abc0a331SBarry Smith       if (flg) { /* No break in the bs */
2301bea157c4SSatish Balay         sizes[j] = bs;
2302bea157c4SSatish Balay         i += bs;
2303bea157c4SSatish Balay       } else {
2304bea157c4SSatish Balay         sizes[j] = 1;
2305bea157c4SSatish Balay         i++;
2306bea157c4SSatish Balay       }
2307bea157c4SSatish Balay     }
2308bea157c4SSatish Balay   }
2309bea157c4SSatish Balay   *bs_max = j;
23103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2311d9b7c43dSSatish Balay }
2312d9b7c43dSSatish Balay 
MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x,Vec b)2313d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2314d71ae5a4SJacob Faibussowitsch {
2315d9b7c43dSSatish Balay   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2316f4df32b1SMatthew Knepley   PetscInt           i, j, k, count, *rows;
2317d0f46423SBarry Smith   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
231887828ca2SBarry Smith   PetscScalar        zero = 0.0;
23193f1db9ecSBarry Smith   MatScalar         *aa;
232097b48c8fSBarry Smith   const PetscScalar *xx;
232197b48c8fSBarry Smith   PetscScalar       *bb;
2322d9b7c43dSSatish Balay 
23233a40ed3dSBarry Smith   PetscFunctionBegin;
2324dd8e379bSPierre Jolivet   /* fix right-hand side if needed */
232597b48c8fSBarry Smith   if (x && b) {
23269566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
23279566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
2328ad540459SPierre Jolivet     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
23299566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
23309566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
233197b48c8fSBarry Smith   }
233297b48c8fSBarry Smith 
2333d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
2334bea157c4SSatish Balay   /* allocate memory for rows,sizes */
23359566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2336bea157c4SSatish Balay 
2337563b5814SBarry Smith   /* copy IS values to rows, and sort them */
233826fbe8dcSKarl Rupp   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
23399566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(is_n, rows));
234097b48c8fSBarry Smith 
2341a9817697SBarry Smith   if (baij->keepnonzeropattern) {
234226fbe8dcSKarl Rupp     for (i = 0; i < is_n; i++) sizes[i] = 1;
2343dffd3267SBarry Smith     bs_max = is_n;
2344dffd3267SBarry Smith   } else {
23459566063dSJacob Faibussowitsch     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2346e56f5c9eSBarry Smith     A->nonzerostate++;
2347dffd3267SBarry Smith   }
2348bea157c4SSatish Balay 
2349bea157c4SSatish Balay   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2350bea157c4SSatish Balay     row = rows[j];
23515f80ce2aSJacob Faibussowitsch     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2352bea157c4SSatish Balay     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2353835f2295SStefano Zampini     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2354a9817697SBarry Smith     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2355d4a378daSJed Brown       if (diag != (PetscScalar)0.0) {
2356bea157c4SSatish Balay         if (baij->ilen[row / bs] > 0) {
2357bea157c4SSatish Balay           baij->ilen[row / bs]       = 1;
2358bea157c4SSatish Balay           baij->j[baij->i[row / bs]] = row / bs;
235926fbe8dcSKarl Rupp 
23609566063dSJacob Faibussowitsch           PetscCall(PetscArrayzero(aa, count * bs));
2361a07cd24cSSatish Balay         }
2362563b5814SBarry Smith         /* Now insert all the diagonal values for this bs */
23639927e4dfSBarry Smith         for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES);
2364f4df32b1SMatthew Knepley       } else { /* (diag == 0.0) */
2365bea157c4SSatish Balay         baij->ilen[row / bs] = 0;
2366f4df32b1SMatthew Knepley       } /* end (diag == 0.0) */
2367bea157c4SSatish Balay     } else { /* (sizes[i] != bs) */
23686bdcaf15SBarry Smith       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2369bea157c4SSatish Balay       for (k = 0; k < count; k++) {
2370d9b7c43dSSatish Balay         aa[0] = zero;
2371d9b7c43dSSatish Balay         aa += bs;
2372d9b7c43dSSatish Balay       }
23739927e4dfSBarry Smith       if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES);
2374d9b7c43dSSatish Balay     }
2375bea157c4SSatish Balay   }
2376bea157c4SSatish Balay 
23779566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rows, sizes));
23789566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
23793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2380d9b7c43dSSatish Balay }
23811c351548SSatish Balay 
MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x,Vec b)2382ff6a9541SJacob Faibussowitsch static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2383d71ae5a4SJacob Faibussowitsch {
238497b48c8fSBarry Smith   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
238597b48c8fSBarry Smith   PetscInt           i, j, k, count;
238697b48c8fSBarry Smith   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
238797b48c8fSBarry Smith   PetscScalar        zero = 0.0;
238897b48c8fSBarry Smith   MatScalar         *aa;
238997b48c8fSBarry Smith   const PetscScalar *xx;
239097b48c8fSBarry Smith   PetscScalar       *bb;
239156777dd2SBarry Smith   PetscBool         *zeroed, vecs = PETSC_FALSE;
239297b48c8fSBarry Smith 
239397b48c8fSBarry Smith   PetscFunctionBegin;
2394dd8e379bSPierre Jolivet   /* fix right-hand side if needed */
239597b48c8fSBarry Smith   if (x && b) {
23969566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
23979566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
239856777dd2SBarry Smith     vecs = PETSC_TRUE;
239997b48c8fSBarry Smith   }
240097b48c8fSBarry Smith 
240197b48c8fSBarry Smith   /* zero the columns */
24029566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
240397b48c8fSBarry Smith   for (i = 0; i < is_n; i++) {
24045f80ce2aSJacob Faibussowitsch     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
240597b48c8fSBarry Smith     zeroed[is_idx[i]] = PETSC_TRUE;
240697b48c8fSBarry Smith   }
240797b48c8fSBarry Smith   for (i = 0; i < A->rmap->N; i++) {
240897b48c8fSBarry Smith     if (!zeroed[i]) {
240997b48c8fSBarry Smith       row = i / bs;
241097b48c8fSBarry Smith       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
241197b48c8fSBarry Smith         for (k = 0; k < bs; k++) {
241297b48c8fSBarry Smith           col = bs * baij->j[j] + k;
241397b48c8fSBarry Smith           if (zeroed[col]) {
2414835f2295SStefano Zampini             aa = baij->a + j * bs2 + (i % bs) + bs * k;
241556777dd2SBarry Smith             if (vecs) bb[i] -= aa[0] * xx[col];
241697b48c8fSBarry Smith             aa[0] = 0.0;
241797b48c8fSBarry Smith           }
241897b48c8fSBarry Smith         }
241997b48c8fSBarry Smith       }
242056777dd2SBarry Smith     } else if (vecs) bb[i] = diag * xx[i];
242197b48c8fSBarry Smith   }
24229566063dSJacob Faibussowitsch   PetscCall(PetscFree(zeroed));
242356777dd2SBarry Smith   if (vecs) {
24249566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
24259566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
242656777dd2SBarry Smith   }
242797b48c8fSBarry Smith 
242897b48c8fSBarry Smith   /* zero the rows */
242997b48c8fSBarry Smith   for (i = 0; i < is_n; i++) {
243097b48c8fSBarry Smith     row   = is_idx[i];
243197b48c8fSBarry Smith     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2432835f2295SStefano Zampini     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
243397b48c8fSBarry Smith     for (k = 0; k < count; k++) {
243497b48c8fSBarry Smith       aa[0] = zero;
243597b48c8fSBarry Smith       aa += bs;
243697b48c8fSBarry Smith     }
2437dbbe0bcdSBarry Smith     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
243897b48c8fSBarry Smith   }
24399566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
24403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
244197b48c8fSBarry Smith }
244297b48c8fSBarry Smith 
MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)2443d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2444d71ae5a4SJacob Faibussowitsch {
24452d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2446e2ee6c50SBarry Smith   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2447c1ac3661SBarry Smith   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2448d0f46423SBarry Smith   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2449c1ac3661SBarry Smith   PetscInt     ridx, cidx, bs2                 = a->bs2;
2450ace3abfcSBarry Smith   PetscBool    roworiented = a->roworiented;
2451d8cdefa3SHong Zhang   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
24522d61bbb3SSatish Balay 
24532d61bbb3SSatish Balay   PetscFunctionBegin;
24542d61bbb3SSatish Balay   for (k = 0; k < m; k++) { /* loop over added rows */
2455085a36d4SBarry Smith     row  = im[k];
2456085a36d4SBarry Smith     brow = row / bs;
24575ef9f2a5SBarry Smith     if (row < 0) continue;
24586bdcaf15SBarry Smith     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
24598e3a54c0SPierre Jolivet     rp = PetscSafePointerPlusOffset(aj, ai[brow]);
24608e3a54c0SPierre Jolivet     if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
24612d61bbb3SSatish Balay     rmax = imax[brow];
24622d61bbb3SSatish Balay     nrow = ailen[brow];
24632d61bbb3SSatish Balay     low  = 0;
2464c71e6ed7SBarry Smith     high = nrow;
24652d61bbb3SSatish Balay     for (l = 0; l < n; l++) { /* loop over added columns */
24665ef9f2a5SBarry Smith       if (in[l] < 0) continue;
24676bdcaf15SBarry Smith       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
24689371c9d4SSatish Balay       col  = in[l];
24699371c9d4SSatish Balay       bcol = col / bs;
24709371c9d4SSatish Balay       ridx = row % bs;
24719371c9d4SSatish Balay       cidx = col % bs;
2472672ba085SHong Zhang       if (!A->structure_only) {
24732d61bbb3SSatish Balay         if (roworiented) {
24745ef9f2a5SBarry Smith           value = v[l + k * n];
24752d61bbb3SSatish Balay         } else {
24762d61bbb3SSatish Balay           value = v[k + l * m];
24772d61bbb3SSatish Balay         }
2478672ba085SHong Zhang       }
24799371c9d4SSatish Balay       if (col <= lastcol) low = 0;
24809371c9d4SSatish Balay       else high = nrow;
2481e2ee6c50SBarry Smith       lastcol = col;
24822d61bbb3SSatish Balay       while (high - low > 7) {
24832d61bbb3SSatish Balay         t = (low + high) / 2;
24842d61bbb3SSatish Balay         if (rp[t] > bcol) high = t;
24852d61bbb3SSatish Balay         else low = t;
24862d61bbb3SSatish Balay       }
24872d61bbb3SSatish Balay       for (i = low; i < high; i++) {
24882d61bbb3SSatish Balay         if (rp[i] > bcol) break;
24892d61bbb3SSatish Balay         if (rp[i] == bcol) {
24908e3a54c0SPierre Jolivet           bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx);
2491672ba085SHong Zhang           if (!A->structure_only) {
24922d61bbb3SSatish Balay             if (is == ADD_VALUES) *bap += value;
24932d61bbb3SSatish Balay             else *bap = value;
2494672ba085SHong Zhang           }
24952d61bbb3SSatish Balay           goto noinsert1;
24962d61bbb3SSatish Balay         }
24972d61bbb3SSatish Balay       }
24982d61bbb3SSatish Balay       if (nonew == 1) goto noinsert1;
24995f80ce2aSJacob Faibussowitsch       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2500672ba085SHong Zhang       if (A->structure_only) {
2501672ba085SHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2502672ba085SHong Zhang       } else {
2503fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2504672ba085SHong Zhang       }
25059371c9d4SSatish Balay       N = nrow++ - 1;
25069371c9d4SSatish Balay       high++;
25072d61bbb3SSatish Balay       /* shift up all the later entries in this row */
25089566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
25092d61bbb3SSatish Balay       rp[i] = bcol;
2510580bdb30SBarry Smith       if (!A->structure_only) {
25119566063dSJacob Faibussowitsch         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
25129566063dSJacob Faibussowitsch         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2513580bdb30SBarry Smith         ap[bs2 * i + bs * cidx + ridx] = value;
2514580bdb30SBarry Smith       }
2515085a36d4SBarry Smith       a->nz++;
25162d61bbb3SSatish Balay     noinsert1:;
25172d61bbb3SSatish Balay       low = i;
25182d61bbb3SSatish Balay     }
25192d61bbb3SSatish Balay     ailen[brow] = nrow;
25202d61bbb3SSatish Balay   }
25213ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25222d61bbb3SSatish Balay }
25232d61bbb3SSatish Balay 
MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo * info)2524ff6a9541SJacob Faibussowitsch static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2525d71ae5a4SJacob Faibussowitsch {
25262d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
25272d61bbb3SSatish Balay   Mat          outA;
2528ace3abfcSBarry Smith   PetscBool    row_identity, col_identity;
25292d61bbb3SSatish Balay 
25302d61bbb3SSatish Balay   PetscFunctionBegin;
25315f80ce2aSJacob Faibussowitsch   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
25329566063dSJacob Faibussowitsch   PetscCall(ISIdentity(row, &row_identity));
25339566063dSJacob Faibussowitsch   PetscCall(ISIdentity(col, &col_identity));
25345f80ce2aSJacob Faibussowitsch   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
25352d61bbb3SSatish Balay 
25362d61bbb3SSatish Balay   outA            = inA;
2537d5f3da31SBarry Smith   inA->factortype = MAT_FACTOR_LU;
25389566063dSJacob Faibussowitsch   PetscCall(PetscFree(inA->solvertype));
25399566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
25402d61bbb3SSatish Balay 
25419566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)row));
25429566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->row));
2543c3122656SLisandro Dalcin   a->row = row;
25449566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)col));
25459566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->col));
2546c3122656SLisandro Dalcin   a->col = col;
2547c38d4ed2SBarry Smith 
2548c38d4ed2SBarry Smith   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
25499566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->icol));
25509566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2551c38d4ed2SBarry Smith 
25529566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2553aa624791SPierre Jolivet   if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
25549566063dSJacob Faibussowitsch   PetscCall(MatLUFactorNumeric(outA, inA, info));
25553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25562d61bbb3SSatish Balay }
2557d9b7c43dSSatish Balay 
MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,const PetscInt * indices)2558ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2559d71ae5a4SJacob Faibussowitsch {
256027a8da17SBarry Smith   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
256127a8da17SBarry Smith 
256227a8da17SBarry Smith   PetscFunctionBegin;
2563ff6a9541SJacob Faibussowitsch   baij->nz = baij->maxnz;
2564ff6a9541SJacob Faibussowitsch   PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2565ff6a9541SJacob Faibussowitsch   PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
25663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
256727a8da17SBarry Smith }
256827a8da17SBarry Smith 
256927a8da17SBarry Smith /*@
2570d8a51d2aSBarry Smith   MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix.
257127a8da17SBarry Smith 
257227a8da17SBarry Smith   Input Parameters:
257311a5261eSBarry Smith + mat     - the `MATSEQBAIJ` matrix
2574d8a51d2aSBarry Smith - indices - the block column indices
257527a8da17SBarry Smith 
257615091d37SBarry Smith   Level: advanced
257715091d37SBarry Smith 
257827a8da17SBarry Smith   Notes:
257927a8da17SBarry Smith   This can be called if you have precomputed the nonzero structure of the
258027a8da17SBarry Smith   matrix and want to provide it to the matrix object to improve the performance
258111a5261eSBarry Smith   of the `MatSetValues()` operation.
258227a8da17SBarry Smith 
258327a8da17SBarry Smith   You MUST have set the correct numbers of nonzeros per row in the call to
258411a5261eSBarry Smith   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
258527a8da17SBarry Smith 
258611a5261eSBarry Smith   MUST be called before any calls to `MatSetValues()`
258727a8da17SBarry Smith 
25881cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
258927a8da17SBarry Smith @*/
MatSeqBAIJSetColumnIndices(Mat mat,PetscInt * indices)2590d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2591d71ae5a4SJacob Faibussowitsch {
259227a8da17SBarry Smith   PetscFunctionBegin;
25930700a824SBarry Smith   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
25944f572ea9SToby Isaac   PetscAssertPointer(indices, 2);
2595810441c8SPierre Jolivet   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices));
25963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
259727a8da17SBarry Smith }
259827a8da17SBarry Smith 
MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])259966976f2fSJacob Faibussowitsch static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2600d71ae5a4SJacob Faibussowitsch {
2601273d9f13SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2602c1ac3661SBarry Smith   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2603273d9f13SBarry Smith   PetscReal    atmp;
260487828ca2SBarry Smith   PetscScalar *x, zero = 0.0;
2605273d9f13SBarry Smith   MatScalar   *aa;
2606c1ac3661SBarry Smith   PetscInt     ncols, brow, krow, kcol;
2607273d9f13SBarry Smith 
2608273d9f13SBarry Smith   PetscFunctionBegin;
26095f80ce2aSJacob Faibussowitsch   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2610d0f46423SBarry Smith   bs  = A->rmap->bs;
2611273d9f13SBarry Smith   aa  = a->a;
2612273d9f13SBarry Smith   ai  = a->i;
2613273d9f13SBarry Smith   aj  = a->j;
2614273d9f13SBarry Smith   mbs = a->mbs;
2615273d9f13SBarry Smith 
26169566063dSJacob Faibussowitsch   PetscCall(VecSet(v, zero));
26179566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &x));
26189566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(v, &n));
26195f80ce2aSJacob Faibussowitsch   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2620273d9f13SBarry Smith   for (i = 0; i < mbs; i++) {
26219371c9d4SSatish Balay     ncols = ai[1] - ai[0];
26229371c9d4SSatish Balay     ai++;
2623273d9f13SBarry Smith     brow = bs * i;
2624273d9f13SBarry Smith     for (j = 0; j < ncols; j++) {
2625273d9f13SBarry Smith       for (kcol = 0; kcol < bs; kcol++) {
2626273d9f13SBarry Smith         for (krow = 0; krow < bs; krow++) {
26279371c9d4SSatish Balay           atmp = PetscAbsScalar(*aa);
26289371c9d4SSatish Balay           aa++;
2629273d9f13SBarry Smith           row = brow + krow; /* row index */
26309371c9d4SSatish Balay           if (PetscAbsScalar(x[row]) < atmp) {
26319371c9d4SSatish Balay             x[row] = atmp;
26329371c9d4SSatish Balay             if (idx) idx[row] = bs * (*aj) + kcol;
26339371c9d4SSatish Balay           }
2634273d9f13SBarry Smith         }
2635273d9f13SBarry Smith       }
2636273d9f13SBarry Smith       aj++;
2637273d9f13SBarry Smith     }
2638273d9f13SBarry Smith   }
26399566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &x));
26403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2641273d9f13SBarry Smith }
2642273d9f13SBarry Smith 
MatGetRowSumAbs_SeqBAIJ(Mat A,Vec v)2643eede4a3fSMark Adams static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v)
2644eede4a3fSMark Adams {
2645eede4a3fSMark Adams   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2646eede4a3fSMark Adams   PetscInt     i, j, n, row, bs, *ai, mbs;
2647eede4a3fSMark Adams   PetscReal    atmp;
2648eede4a3fSMark Adams   PetscScalar *x, zero = 0.0;
2649eede4a3fSMark Adams   MatScalar   *aa;
2650eede4a3fSMark Adams   PetscInt     ncols, brow, krow, kcol;
2651eede4a3fSMark Adams 
2652eede4a3fSMark Adams   PetscFunctionBegin;
2653eede4a3fSMark Adams   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2654eede4a3fSMark Adams   bs  = A->rmap->bs;
2655eede4a3fSMark Adams   aa  = a->a;
2656eede4a3fSMark Adams   ai  = a->i;
2657eede4a3fSMark Adams   mbs = a->mbs;
2658eede4a3fSMark Adams 
2659eede4a3fSMark Adams   PetscCall(VecSet(v, zero));
2660eede4a3fSMark Adams   PetscCall(VecGetArrayWrite(v, &x));
2661eede4a3fSMark Adams   PetscCall(VecGetLocalSize(v, &n));
2662eede4a3fSMark Adams   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2663eede4a3fSMark Adams   for (i = 0; i < mbs; i++) {
2664eede4a3fSMark Adams     ncols = ai[1] - ai[0];
2665eede4a3fSMark Adams     ai++;
2666eede4a3fSMark Adams     brow = bs * i;
2667eede4a3fSMark Adams     for (j = 0; j < ncols; j++) {
2668eede4a3fSMark Adams       for (kcol = 0; kcol < bs; kcol++) {
2669eede4a3fSMark Adams         for (krow = 0; krow < bs; krow++) {
2670eede4a3fSMark Adams           atmp = PetscAbsScalar(*aa);
2671eede4a3fSMark Adams           aa++;
2672eede4a3fSMark Adams           row = brow + krow; /* row index */
2673eede4a3fSMark Adams           x[row] += atmp;
2674eede4a3fSMark Adams         }
2675eede4a3fSMark Adams       }
2676eede4a3fSMark Adams     }
2677eede4a3fSMark Adams   }
2678eede4a3fSMark Adams   PetscCall(VecRestoreArrayWrite(v, &x));
2679eede4a3fSMark Adams   PetscFunctionReturn(PETSC_SUCCESS);
2680eede4a3fSMark Adams }
2681eede4a3fSMark Adams 
MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)268266976f2fSJacob Faibussowitsch static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2683d71ae5a4SJacob Faibussowitsch {
26843c896bc6SHong Zhang   PetscFunctionBegin;
26853c896bc6SHong Zhang   /* If the two matrices have the same copy implementation, use fast copy. */
26863c896bc6SHong Zhang   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
26873c896bc6SHong Zhang     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
26883c896bc6SHong Zhang     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2689d88c0aacSHong Zhang     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
26903c896bc6SHong Zhang 
26915f80ce2aSJacob Faibussowitsch     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
26925f80ce2aSJacob Faibussowitsch     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
26939566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
26949566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)B));
26953c896bc6SHong Zhang   } else {
26969566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A, B, str));
26973c896bc6SHong Zhang   }
26983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
26993c896bc6SHong Zhang }
27003c896bc6SHong Zhang 
MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar * array[])2701d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2702d71ae5a4SJacob Faibussowitsch {
2703f2a5309cSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
27046e111a19SKarl Rupp 
2705f2a5309cSSatish Balay   PetscFunctionBegin;
2706f2a5309cSSatish Balay   *array = a->a;
27073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2708f2a5309cSSatish Balay }
2709f2a5309cSSatish Balay 
MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar * array[])2710d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2711d71ae5a4SJacob Faibussowitsch {
2712f2a5309cSSatish Balay   PetscFunctionBegin;
2713cda14afcSprj-   *array = NULL;
27143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2715f2a5309cSSatish Balay }
2716f2a5309cSSatish Balay 
MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt * nnz)2717d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2718d71ae5a4SJacob Faibussowitsch {
2719b264fe52SHong Zhang   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
272052768537SHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
272152768537SHong Zhang   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
272252768537SHong Zhang 
272352768537SHong Zhang   PetscFunctionBegin;
272452768537SHong Zhang   /* Set the number of nonzeros in the new matrix */
27259566063dSJacob Faibussowitsch   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
27263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
272752768537SHong Zhang }
272852768537SHong Zhang 
MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)2729d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2730d71ae5a4SJacob Faibussowitsch {
273142ee4b1aSHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
273231ce2d13SHong Zhang   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2733e838b9e7SJed Brown   PetscBLASInt one = 1;
273442ee4b1aSHong Zhang 
273542ee4b1aSHong Zhang   PetscFunctionBegin;
2736134adf20SPierre Jolivet   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2737134adf20SPierre Jolivet     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2738134adf20SPierre Jolivet     if (e) {
27399566063dSJacob Faibussowitsch       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2740134adf20SPierre Jolivet       if (e) {
27419566063dSJacob Faibussowitsch         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2742134adf20SPierre Jolivet         if (e) str = SAME_NONZERO_PATTERN;
2743134adf20SPierre Jolivet       }
2744134adf20SPierre Jolivet     }
274554c59aa7SJacob Faibussowitsch     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2746134adf20SPierre Jolivet   }
274742ee4b1aSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
2748f4df32b1SMatthew Knepley     PetscScalar  alpha = a;
2749c5df96a5SBarry Smith     PetscBLASInt bnz;
27509566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2751792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
27529566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2753ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
27549566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y, a, X, str));
275542ee4b1aSHong Zhang   } else {
275652768537SHong Zhang     Mat       B;
275752768537SHong Zhang     PetscInt *nnz;
275854c59aa7SJacob Faibussowitsch     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
27599566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
27609566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
27619566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
27629566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
27639566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
27649566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
27659566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
27669566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
27679566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
27689566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y, &B));
27699566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz));
277042ee4b1aSHong Zhang   }
27713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
277242ee4b1aSHong Zhang }
277342ee4b1aSHong Zhang 
MatConjugate_SeqBAIJ(Mat A)2774d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2775d71ae5a4SJacob Faibussowitsch {
27762726fb6dSPierre Jolivet   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
27772726fb6dSPierre Jolivet   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
27782726fb6dSPierre Jolivet   MatScalar   *aa = a->a;
27792726fb6dSPierre Jolivet 
27802726fb6dSPierre Jolivet   PetscFunctionBegin;
27812726fb6dSPierre Jolivet   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
27823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
27832726fb6dSPierre Jolivet }
27842726fb6dSPierre Jolivet 
MatRealPart_SeqBAIJ(Mat A)2785ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2786d71ae5a4SJacob Faibussowitsch {
2787ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
278899cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
278999cafbc1SBarry Smith   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2790dd6ea824SBarry Smith   MatScalar   *aa = a->a;
279199cafbc1SBarry Smith 
279299cafbc1SBarry Smith   PetscFunctionBegin;
279399cafbc1SBarry Smith   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
27943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2795ff6a9541SJacob Faibussowitsch #else
2796ff6a9541SJacob Faibussowitsch   (void)A;
2797ff6a9541SJacob Faibussowitsch   return PETSC_SUCCESS;
2798ff6a9541SJacob Faibussowitsch #endif
279999cafbc1SBarry Smith }
280099cafbc1SBarry Smith 
MatImaginaryPart_SeqBAIJ(Mat A)2801ff6a9541SJacob Faibussowitsch static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2802d71ae5a4SJacob Faibussowitsch {
2803ff6a9541SJacob Faibussowitsch #if PetscDefined(USE_COMPLEX)
280499cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
280599cafbc1SBarry Smith   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2806dd6ea824SBarry Smith   MatScalar   *aa = a->a;
280799cafbc1SBarry Smith 
280899cafbc1SBarry Smith   PetscFunctionBegin;
280999cafbc1SBarry Smith   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
28103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2811ff6a9541SJacob Faibussowitsch #else
2812ff6a9541SJacob Faibussowitsch   (void)A;
2813ff6a9541SJacob Faibussowitsch   return PETSC_SUCCESS;
2814ff6a9541SJacob Faibussowitsch #endif
281599cafbc1SBarry Smith }
281699cafbc1SBarry Smith 
28173acb8795SBarry Smith /*
28182479783cSJose E. Roman     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
28193acb8795SBarry Smith */
MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * nn,const PetscInt * ia[],const PetscInt * ja[],PetscBool * done)2820ff6a9541SJacob Faibussowitsch static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2821d71ae5a4SJacob Faibussowitsch {
28223acb8795SBarry Smith   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
28233acb8795SBarry Smith   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
28243acb8795SBarry Smith   PetscInt     nz = a->i[m], row, *jj, mr, col;
28253acb8795SBarry Smith 
28263acb8795SBarry Smith   PetscFunctionBegin;
28273acb8795SBarry Smith   *nn = n;
28283ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
28295f80ce2aSJacob Faibussowitsch   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
28309566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &collengths));
28319566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &cia));
28329566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cja));
28333acb8795SBarry Smith   jj = a->j;
2834ad540459SPierre Jolivet   for (i = 0; i < nz; i++) collengths[jj[i]]++;
28353acb8795SBarry Smith   cia[0] = oshift;
2836ad540459SPierre Jolivet   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
28379566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(collengths, n));
28383acb8795SBarry Smith   jj = a->j;
28393acb8795SBarry Smith   for (row = 0; row < m; row++) {
28403acb8795SBarry Smith     mr = a->i[row + 1] - a->i[row];
28413acb8795SBarry Smith     for (i = 0; i < mr; i++) {
28423acb8795SBarry Smith       col = *jj++;
284326fbe8dcSKarl Rupp 
28443acb8795SBarry Smith       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
28453acb8795SBarry Smith     }
28463acb8795SBarry Smith   }
28479566063dSJacob Faibussowitsch   PetscCall(PetscFree(collengths));
28489371c9d4SSatish Balay   *ia = cia;
28499371c9d4SSatish Balay   *ja = cja;
28503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28513acb8795SBarry Smith }
28523acb8795SBarry Smith 
MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * n,const PetscInt * ia[],const PetscInt * ja[],PetscBool * done)2853ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2854d71ae5a4SJacob Faibussowitsch {
28553acb8795SBarry Smith   PetscFunctionBegin;
28563ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
28579566063dSJacob Faibussowitsch   PetscCall(PetscFree(*ia));
28589566063dSJacob Faibussowitsch   PetscCall(PetscFree(*ja));
28593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28603acb8795SBarry Smith }
28613acb8795SBarry Smith 
2862525d23c0SHong Zhang /*
2863525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2864525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2865040ebd07SHong Zhang  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2866525d23c0SHong Zhang  */
MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * nn,const PetscInt * ia[],const PetscInt * ja[],PetscInt * spidx[],PetscBool * done)2867d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2868d71ae5a4SJacob Faibussowitsch {
2869525d23c0SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2870c0349474SHong Zhang   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2871525d23c0SHong Zhang   PetscInt     nz = a->i[m], row, *jj, mr, col;
2872525d23c0SHong Zhang   PetscInt    *cspidx;
2873f6d58c54SBarry Smith 
2874f6d58c54SBarry Smith   PetscFunctionBegin;
2875525d23c0SHong Zhang   *nn = n;
28763ba16761SJacob Faibussowitsch   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2877f6d58c54SBarry Smith 
28789566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &collengths));
28799566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &cia));
28809566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cja));
28819566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cspidx));
2882525d23c0SHong Zhang   jj = a->j;
2883ad540459SPierre Jolivet   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2884525d23c0SHong Zhang   cia[0] = oshift;
2885ad540459SPierre Jolivet   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
28869566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(collengths, n));
2887525d23c0SHong Zhang   jj = a->j;
2888525d23c0SHong Zhang   for (row = 0; row < m; row++) {
2889525d23c0SHong Zhang     mr = a->i[row + 1] - a->i[row];
2890525d23c0SHong Zhang     for (i = 0; i < mr; i++) {
2891525d23c0SHong Zhang       col                                         = *jj++;
2892525d23c0SHong Zhang       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2893525d23c0SHong Zhang       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2894525d23c0SHong Zhang     }
2895525d23c0SHong Zhang   }
28969566063dSJacob Faibussowitsch   PetscCall(PetscFree(collengths));
2897071fcb05SBarry Smith   *ia    = cia;
2898071fcb05SBarry Smith   *ja    = cja;
2899525d23c0SHong Zhang   *spidx = cspidx;
29003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2901f6d58c54SBarry Smith }
2902f6d58c54SBarry Smith 
MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * n,const PetscInt * ia[],const PetscInt * ja[],PetscInt * spidx[],PetscBool * done)2903d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2904d71ae5a4SJacob Faibussowitsch {
2905525d23c0SHong Zhang   PetscFunctionBegin;
29069566063dSJacob Faibussowitsch   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
29079566063dSJacob Faibussowitsch   PetscCall(PetscFree(*spidx));
29083ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2909f6d58c54SBarry Smith }
291099cafbc1SBarry Smith 
MatShift_SeqBAIJ(Mat Y,PetscScalar a)291166976f2fSJacob Faibussowitsch static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2912d71ae5a4SJacob Faibussowitsch {
29137d68702bSBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
29147d68702bSBarry Smith 
29157d68702bSBarry Smith   PetscFunctionBegin;
291648a46eb9SPierre Jolivet   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
29179566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y, a));
29183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
29197d68702bSBarry Smith }
29207d68702bSBarry Smith 
MatEliminateZeros_SeqBAIJ(Mat A,PetscBool keep)292117ea310bSPierre Jolivet PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
292217ea310bSPierre Jolivet {
292317ea310bSPierre Jolivet   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
292417ea310bSPierre Jolivet   PetscInt     fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
292517ea310bSPierre Jolivet   PetscInt     m = A->rmap->N, *ailen = a->ilen;
292617ea310bSPierre Jolivet   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
292717ea310bSPierre Jolivet   MatScalar   *aa = a->a, *ap;
292817ea310bSPierre Jolivet   PetscBool    zero;
292917ea310bSPierre Jolivet 
293017ea310bSPierre Jolivet   PetscFunctionBegin;
293117ea310bSPierre Jolivet   PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
293217ea310bSPierre Jolivet   if (m) rmax = ailen[0];
293317ea310bSPierre Jolivet   for (i = 1; i <= mbs; i++) {
293417ea310bSPierre Jolivet     for (k = ai[i - 1]; k < ai[i]; k++) {
293517ea310bSPierre Jolivet       zero = PETSC_TRUE;
293617ea310bSPierre Jolivet       ap   = aa + bs2 * k;
293717ea310bSPierre Jolivet       for (j = 0; j < bs2 && zero; j++) {
293817ea310bSPierre Jolivet         if (ap[j] != 0.0) zero = PETSC_FALSE;
293917ea310bSPierre Jolivet       }
294017ea310bSPierre Jolivet       if (zero && (aj[k] != i - 1 || !keep)) fshift++;
294117ea310bSPierre Jolivet       else {
294217ea310bSPierre Jolivet         if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
294317ea310bSPierre Jolivet         aj[k - fshift] = aj[k];
294417ea310bSPierre Jolivet         PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
294517ea310bSPierre Jolivet       }
294617ea310bSPierre Jolivet     }
294717ea310bSPierre Jolivet     ai[i - 1] -= fshift_prev;
294817ea310bSPierre Jolivet     fshift_prev  = fshift;
294917ea310bSPierre Jolivet     ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
295017ea310bSPierre Jolivet     a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
295117ea310bSPierre Jolivet     rmax = PetscMax(rmax, ailen[i - 1]);
295217ea310bSPierre Jolivet   }
295317ea310bSPierre Jolivet   if (fshift) {
295417ea310bSPierre Jolivet     if (mbs) {
295517ea310bSPierre Jolivet       ai[mbs] -= fshift;
295617ea310bSPierre Jolivet       a->nz = ai[mbs];
295717ea310bSPierre Jolivet     }
295817ea310bSPierre Jolivet     PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
295917ea310bSPierre Jolivet     A->nonzerostate++;
296017ea310bSPierre Jolivet     A->info.nz_unneeded += (PetscReal)fshift;
296117ea310bSPierre Jolivet     a->rmax = rmax;
296217ea310bSPierre Jolivet     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
296317ea310bSPierre Jolivet     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
296417ea310bSPierre Jolivet   }
296517ea310bSPierre Jolivet   PetscFunctionReturn(PETSC_SUCCESS);
296617ea310bSPierre Jolivet }
296717ea310bSPierre Jolivet 
2968dec0b466SHong Zhang static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2969cc2dc46cSBarry Smith                                        MatGetRow_SeqBAIJ,
2970cc2dc46cSBarry Smith                                        MatRestoreRow_SeqBAIJ,
2971cc2dc46cSBarry Smith                                        MatMult_SeqBAIJ_N,
297297304618SKris Buschelman                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
29737c922b88SBarry Smith                                        MatMultTranspose_SeqBAIJ,
29747c922b88SBarry Smith                                        MatMultTransposeAdd_SeqBAIJ,
2975f4259b30SLisandro Dalcin                                        NULL,
2976f4259b30SLisandro Dalcin                                        NULL,
2977f4259b30SLisandro Dalcin                                        NULL,
2978f4259b30SLisandro Dalcin                                        /* 10*/ NULL,
2979cc2dc46cSBarry Smith                                        MatLUFactor_SeqBAIJ,
2980f4259b30SLisandro Dalcin                                        NULL,
2981f4259b30SLisandro Dalcin                                        NULL,
2982f2501298SSatish Balay                                        MatTranspose_SeqBAIJ,
298397304618SKris Buschelman                                        /* 15*/ MatGetInfo_SeqBAIJ,
2984cc2dc46cSBarry Smith                                        MatEqual_SeqBAIJ,
2985cc2dc46cSBarry Smith                                        MatGetDiagonal_SeqBAIJ,
2986cc2dc46cSBarry Smith                                        MatDiagonalScale_SeqBAIJ,
2987cc2dc46cSBarry Smith                                        MatNorm_SeqBAIJ,
2988f4259b30SLisandro Dalcin                                        /* 20*/ NULL,
2989cc2dc46cSBarry Smith                                        MatAssemblyEnd_SeqBAIJ,
2990cc2dc46cSBarry Smith                                        MatSetOption_SeqBAIJ,
2991cc2dc46cSBarry Smith                                        MatZeroEntries_SeqBAIJ,
2992d519adbfSMatthew Knepley                                        /* 24*/ MatZeroRows_SeqBAIJ,
2993f4259b30SLisandro Dalcin                                        NULL,
2994f4259b30SLisandro Dalcin                                        NULL,
2995f4259b30SLisandro Dalcin                                        NULL,
2996f4259b30SLisandro Dalcin                                        NULL,
299726cec326SBarry Smith                                        /* 29*/ MatSetUp_Seq_Hash,
2998f4259b30SLisandro Dalcin                                        NULL,
2999f4259b30SLisandro Dalcin                                        NULL,
3000f4259b30SLisandro Dalcin                                        NULL,
3001f4259b30SLisandro Dalcin                                        NULL,
3002d519adbfSMatthew Knepley                                        /* 34*/ MatDuplicate_SeqBAIJ,
3003f4259b30SLisandro Dalcin                                        NULL,
3004f4259b30SLisandro Dalcin                                        NULL,
3005cc2dc46cSBarry Smith                                        MatILUFactor_SeqBAIJ,
3006f4259b30SLisandro Dalcin                                        NULL,
3007d519adbfSMatthew Knepley                                        /* 39*/ MatAXPY_SeqBAIJ,
30087dae84e0SHong Zhang                                        MatCreateSubMatrices_SeqBAIJ,
3009cc2dc46cSBarry Smith                                        MatIncreaseOverlap_SeqBAIJ,
3010cc2dc46cSBarry Smith                                        MatGetValues_SeqBAIJ,
30113c896bc6SHong Zhang                                        MatCopy_SeqBAIJ,
3012f4259b30SLisandro Dalcin                                        /* 44*/ NULL,
3013cc2dc46cSBarry Smith                                        MatScale_SeqBAIJ,
30147d68702bSBarry Smith                                        MatShift_SeqBAIJ,
3015f4259b30SLisandro Dalcin                                        NULL,
301697b48c8fSBarry Smith                                        MatZeroRowsColumns_SeqBAIJ,
3017f4259b30SLisandro Dalcin                                        /* 49*/ NULL,
30183b2fbd54SBarry Smith                                        MatGetRowIJ_SeqBAIJ,
301992c4ed94SBarry Smith                                        MatRestoreRowIJ_SeqBAIJ,
30203acb8795SBarry Smith                                        MatGetColumnIJ_SeqBAIJ,
30213acb8795SBarry Smith                                        MatRestoreColumnIJ_SeqBAIJ,
302293dfae19SHong Zhang                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
3023f4259b30SLisandro Dalcin                                        NULL,
3024f4259b30SLisandro Dalcin                                        NULL,
3025090001bdSToby Isaac                                        NULL,
3026d3825aa8SBarry Smith                                        MatSetValuesBlocked_SeqBAIJ,
30277dae84e0SHong Zhang                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3028b9b97703SBarry Smith                                        MatDestroy_SeqBAIJ,
3029b9b97703SBarry Smith                                        MatView_SeqBAIJ,
3030f4259b30SLisandro Dalcin                                        NULL,
3031f4259b30SLisandro Dalcin                                        NULL,
3032f4259b30SLisandro Dalcin                                        /* 64*/ NULL,
3033f4259b30SLisandro Dalcin                                        NULL,
3034f4259b30SLisandro Dalcin                                        NULL,
3035f4259b30SLisandro Dalcin                                        NULL,
30368bb0f5c6SPierre Jolivet                                        MatGetRowMaxAbs_SeqBAIJ,
30378bb0f5c6SPierre Jolivet                                        /* 69*/ NULL,
3038c87e5d42SMatthew Knepley                                        MatConvert_Basic,
3039f4259b30SLisandro Dalcin                                        NULL,
3040f6d58c54SBarry Smith                                        MatFDColoringApply_BAIJ,
3041f4259b30SLisandro Dalcin                                        NULL,
30428bb0f5c6SPierre Jolivet                                        /* 74*/ NULL,
3043f4259b30SLisandro Dalcin                                        NULL,
3044f4259b30SLisandro Dalcin                                        NULL,
3045f4259b30SLisandro Dalcin                                        NULL,
30465bba2384SShri Abhyankar                                        MatLoad_SeqBAIJ,
30478bb0f5c6SPierre Jolivet                                        /* 79*/ NULL,
30488bb0f5c6SPierre Jolivet                                        NULL,
30498bb0f5c6SPierre Jolivet                                        NULL,
30508bb0f5c6SPierre Jolivet                                        NULL,
30518bb0f5c6SPierre Jolivet                                        NULL,
3052f4259b30SLisandro Dalcin                                        /* 84*/ NULL,
3053f4259b30SLisandro Dalcin                                        NULL,
3054f4259b30SLisandro Dalcin                                        NULL,
3055f4259b30SLisandro Dalcin                                        NULL,
3056f4259b30SLisandro Dalcin                                        NULL,
3057f4259b30SLisandro Dalcin                                        /* 89*/ NULL,
3058f4259b30SLisandro Dalcin                                        NULL,
3059f4259b30SLisandro Dalcin                                        NULL,
3060f4259b30SLisandro Dalcin                                        NULL,
30618bb0f5c6SPierre Jolivet                                        MatConjugate_SeqBAIJ,
3062f4259b30SLisandro Dalcin                                        /* 94*/ NULL,
3063f4259b30SLisandro Dalcin                                        NULL,
30648bb0f5c6SPierre Jolivet                                        MatRealPart_SeqBAIJ,
30658bb0f5c6SPierre Jolivet                                        MatImaginaryPart_SeqBAIJ,
3066f4259b30SLisandro Dalcin                                        NULL,
3067f4259b30SLisandro Dalcin                                        /* 99*/ NULL,
3068f4259b30SLisandro Dalcin                                        NULL,
3069f4259b30SLisandro Dalcin                                        NULL,
3070f4259b30SLisandro Dalcin                                        NULL,
30718bb0f5c6SPierre Jolivet                                        NULL,
3072*421480d9SBarry Smith                                        /*104*/ NULL,
30738bb0f5c6SPierre Jolivet                                        NULL,
30748bb0f5c6SPierre Jolivet                                        NULL,
3075f4259b30SLisandro Dalcin                                        NULL,
3076f4259b30SLisandro Dalcin                                        NULL,
3077f4259b30SLisandro Dalcin                                        /*109*/ NULL,
3078f4259b30SLisandro Dalcin                                        NULL,
3079547795f9SHong Zhang                                        MatMultHermitianTranspose_SeqBAIJ,
3080d6037b41SHong Zhang                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3081f4259b30SLisandro Dalcin                                        NULL,
3082*421480d9SBarry Smith                                        /*114*/ NULL,
3083857cbf51SRichard Tran Mills                                        MatGetColumnReductions_SeqBAIJ,
30843964eb88SJed Brown                                        MatInvertBlockDiagonal_SeqBAIJ,
3085f4259b30SLisandro Dalcin                                        NULL,
3086*421480d9SBarry Smith                                        NULL,
30878bb0f5c6SPierre Jolivet                                        /*119*/ NULL,
3088f4259b30SLisandro Dalcin                                        NULL,
3089f4259b30SLisandro Dalcin                                        NULL,
3090f4259b30SLisandro Dalcin                                        NULL,
3091f4259b30SLisandro Dalcin                                        NULL,
30928bb0f5c6SPierre Jolivet                                        /*124*/ NULL,
30938bb0f5c6SPierre Jolivet                                        NULL,
30948bb0f5c6SPierre Jolivet                                        MatSetBlockSizes_Default,
30958bb0f5c6SPierre Jolivet                                        NULL,
3096*421480d9SBarry Smith                                        MatFDColoringSetUp_SeqXAIJ,
3097*421480d9SBarry Smith                                        /*129*/ NULL,
30988bb0f5c6SPierre Jolivet                                        MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
30998bb0f5c6SPierre Jolivet                                        MatDestroySubMatrices_SeqBAIJ,
31008bb0f5c6SPierre Jolivet                                        NULL,
3101f4259b30SLisandro Dalcin                                        NULL,
3102*421480d9SBarry Smith                                        /*134*/ NULL,
3103f4259b30SLisandro Dalcin                                        NULL,
3104eede4a3fSMark Adams                                        MatEliminateZeros_SeqBAIJ,
31054cc2b5b5SPierre Jolivet                                        MatGetRowSumAbs_SeqBAIJ,
310642ce410bSJunchao Zhang                                        NULL,
3107*421480d9SBarry Smith                                        /*139*/ NULL,
310842ce410bSJunchao Zhang                                        NULL,
310903db1824SAlex Lindsay                                        MatCopyHashToXAIJ_Seq_Hash,
3110c2be7ffeSStefano Zampini                                        NULL,
311103db1824SAlex Lindsay                                        NULL};
31122593348eSBarry Smith 
MatStoreValues_SeqBAIJ(Mat mat)3113ff6a9541SJacob Faibussowitsch static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3114d71ae5a4SJacob Faibussowitsch {
31153e90b805SBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
31168ece6314SShri Abhyankar   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
31173e90b805SBarry Smith 
31183e90b805SBarry Smith   PetscFunctionBegin;
31195f80ce2aSJacob Faibussowitsch   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
31203e90b805SBarry Smith 
31213e90b805SBarry Smith   /* allocate space for values if not already there */
3122ff6a9541SJacob Faibussowitsch   if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
31233e90b805SBarry Smith 
31243e90b805SBarry Smith   /* copy values over */
31259566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
31263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31273e90b805SBarry Smith }
31283e90b805SBarry Smith 
MatRetrieveValues_SeqBAIJ(Mat mat)3129ff6a9541SJacob Faibussowitsch static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3130d71ae5a4SJacob Faibussowitsch {
31313e90b805SBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
31328ece6314SShri Abhyankar   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
31333e90b805SBarry Smith 
31343e90b805SBarry Smith   PetscFunctionBegin;
31355f80ce2aSJacob Faibussowitsch   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
31365f80ce2aSJacob Faibussowitsch   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
31373e90b805SBarry Smith 
31383e90b805SBarry Smith   /* copy values over */
31399566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
31403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31413e90b805SBarry Smith }
31423e90b805SBarry Smith 
3143cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3144cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3145273d9f13SBarry Smith 
MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])3146f9663b93SPierre Jolivet PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3147d71ae5a4SJacob Faibussowitsch {
3148ad79cf63SBarry Smith   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3149535b19f3SBarry Smith   PetscInt     i, mbs, nbs, bs2;
31508afaa268SBarry Smith   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3151a23d5eceSKris Buschelman 
3152a23d5eceSKris Buschelman   PetscFunctionBegin;
3153ad79cf63SBarry Smith   if (B->hash_active) {
3154ad79cf63SBarry Smith     PetscInt bs;
3155aea10558SJacob Faibussowitsch     B->ops[0] = b->cops;
3156ad79cf63SBarry Smith     PetscCall(PetscHMapIJVDestroy(&b->ht));
3157ad79cf63SBarry Smith     PetscCall(MatGetBlockSize(B, &bs));
3158ad79cf63SBarry Smith     if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3159ad79cf63SBarry Smith     PetscCall(PetscFree(b->dnz));
3160ad79cf63SBarry Smith     PetscCall(PetscFree(b->bdnz));
3161ad79cf63SBarry Smith     B->hash_active = PETSC_FALSE;
3162ad79cf63SBarry Smith   }
31632576faa2SJed Brown   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3164ab93d7beSBarry Smith   if (nz == MAT_SKIP_ALLOCATION) {
3165ab93d7beSBarry Smith     skipallocation = PETSC_TRUE;
3166ab93d7beSBarry Smith     nz             = 0;
3167ab93d7beSBarry Smith   }
31688c07d4e3SBarry Smith 
316958b7e2c1SStefano Zampini   PetscCall(MatSetBlockSize(B, bs));
31709566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
31719566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
31729566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3173899cda47SBarry Smith 
3174899cda47SBarry Smith   B->preallocated = PETSC_TRUE;
3175899cda47SBarry Smith 
3176d0f46423SBarry Smith   mbs = B->rmap->n / bs;
3177d0f46423SBarry Smith   nbs = B->cmap->n / bs;
3178a23d5eceSKris Buschelman   bs2 = bs * bs;
3179a23d5eceSKris Buschelman 
31805f80ce2aSJacob Faibussowitsch   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3181a23d5eceSKris Buschelman 
3182a23d5eceSKris Buschelman   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
31835f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3184a23d5eceSKris Buschelman   if (nnz) {
3185a23d5eceSKris Buschelman     for (i = 0; i < mbs; i++) {
31865f80ce2aSJacob Faibussowitsch       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
31875f80ce2aSJacob Faibussowitsch       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3188a23d5eceSKris Buschelman     }
3189a23d5eceSKris Buschelman   }
3190a23d5eceSKris Buschelman 
3191d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
31929566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3193d0609cedSBarry Smith   PetscOptionsEnd();
31948c07d4e3SBarry Smith 
3195a23d5eceSKris Buschelman   if (!flg) {
3196a23d5eceSKris Buschelman     switch (bs) {
3197a23d5eceSKris Buschelman     case 1:
3198a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_1;
3199a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3200a23d5eceSKris Buschelman       break;
3201a23d5eceSKris Buschelman     case 2:
3202a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_2;
3203a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3204a23d5eceSKris Buschelman       break;
3205a23d5eceSKris Buschelman     case 3:
3206a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_3;
3207a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3208a23d5eceSKris Buschelman       break;
3209a23d5eceSKris Buschelman     case 4:
3210a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_4;
3211a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3212a23d5eceSKris Buschelman       break;
3213a23d5eceSKris Buschelman     case 5:
3214a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_5;
3215a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3216a23d5eceSKris Buschelman       break;
3217a23d5eceSKris Buschelman     case 6:
3218a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_6;
3219a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3220a23d5eceSKris Buschelman       break;
3221a23d5eceSKris Buschelman     case 7:
3222a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_7;
3223a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3224a23d5eceSKris Buschelman       break;
32259371c9d4SSatish Balay     case 9: {
32266679dcc1SBarry Smith       PetscInt version = 1;
32279566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
32286679dcc1SBarry Smith       switch (version) {
32295f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
32306679dcc1SBarry Smith       case 1:
323196e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
323296e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3233835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32346679dcc1SBarry Smith         break;
32356679dcc1SBarry Smith #endif
32366679dcc1SBarry Smith       default:
323796e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_N;
323896e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3239835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
324096e086a2SDaniel Kokron         break;
32416679dcc1SBarry Smith       }
32426679dcc1SBarry Smith       break;
32436679dcc1SBarry Smith     }
3244ebada01fSBarry Smith     case 11:
3245ebada01fSBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_11;
3246ebada01fSBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3247ebada01fSBarry Smith       break;
32489371c9d4SSatish Balay     case 12: {
32496679dcc1SBarry Smith       PetscInt version = 1;
32509566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
32516679dcc1SBarry Smith       switch (version) {
32526679dcc1SBarry Smith       case 1:
32536679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
32546679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3255835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32568ab949d8SShri Abhyankar         break;
32576679dcc1SBarry Smith       case 2:
32586679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
32596679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3260835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32616679dcc1SBarry Smith         break;
32626679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
32636679dcc1SBarry Smith       case 3:
32646679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
32656679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3266835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32676679dcc1SBarry Smith         break;
32686679dcc1SBarry Smith #endif
3269a23d5eceSKris Buschelman       default:
3270a23d5eceSKris Buschelman         B->ops->mult    = MatMult_SeqBAIJ_N;
3271a23d5eceSKris Buschelman         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3272835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32736679dcc1SBarry Smith         break;
32746679dcc1SBarry Smith       }
32756679dcc1SBarry Smith       break;
32766679dcc1SBarry Smith     }
32779371c9d4SSatish Balay     case 15: {
32786679dcc1SBarry Smith       PetscInt version = 1;
32799566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
32806679dcc1SBarry Smith       switch (version) {
32816679dcc1SBarry Smith       case 1:
32826679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3283835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32846679dcc1SBarry Smith         break;
32856679dcc1SBarry Smith       case 2:
32866679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3287835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32886679dcc1SBarry Smith         break;
32896679dcc1SBarry Smith       case 3:
32906679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3291835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32926679dcc1SBarry Smith         break;
32936679dcc1SBarry Smith       case 4:
32946679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3295835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32966679dcc1SBarry Smith         break;
32976679dcc1SBarry Smith       default:
32986679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_N;
3299835f2295SStefano Zampini         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
33006679dcc1SBarry Smith         break;
33016679dcc1SBarry Smith       }
33026679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
33036679dcc1SBarry Smith       break;
33046679dcc1SBarry Smith     }
33056679dcc1SBarry Smith     default:
33066679dcc1SBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_N;
33076679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3308835f2295SStefano Zampini       PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3309a23d5eceSKris Buschelman       break;
3310a23d5eceSKris Buschelman     }
3311a23d5eceSKris Buschelman   }
3312e48d15efSToby Isaac   B->ops->sor = MatSOR_SeqBAIJ;
3313a23d5eceSKris Buschelman   b->mbs      = mbs;
3314a23d5eceSKris Buschelman   b->nbs      = nbs;
3315ab93d7beSBarry Smith   if (!skipallocation) {
33162ee49352SLisandro Dalcin     if (!b->imax) {
33179566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
331826fbe8dcSKarl Rupp 
33194fd072dbSBarry Smith       b->free_imax_ilen = PETSC_TRUE;
33202ee49352SLisandro Dalcin     }
3321ab93d7beSBarry Smith     /* b->ilen will count nonzeros in each block row so far. */
332226fbe8dcSKarl Rupp     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3323a23d5eceSKris Buschelman     if (!nnz) {
3324a23d5eceSKris Buschelman       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3325c62bd62aSJed Brown       else if (nz < 0) nz = 1;
33265d2a9ed1SStefano Zampini       nz = PetscMin(nz, nbs);
3327a23d5eceSKris Buschelman       for (i = 0; i < mbs; i++) b->imax[i] = nz;
33289566063dSJacob Faibussowitsch       PetscCall(PetscIntMultError(nz, mbs, &nz));
3329a23d5eceSKris Buschelman     } else {
3330c73702f5SBarry Smith       PetscInt64 nz64 = 0;
33319371c9d4SSatish Balay       for (i = 0; i < mbs; i++) {
33329371c9d4SSatish Balay         b->imax[i] = nnz[i];
33339371c9d4SSatish Balay         nz64 += nnz[i];
33349371c9d4SSatish Balay       }
33359566063dSJacob Faibussowitsch       PetscCall(PetscIntCast(nz64, &nz));
3336a23d5eceSKris Buschelman     }
3337a23d5eceSKris Buschelman 
3338a23d5eceSKris Buschelman     /* allocate the matrix space */
33399566063dSJacob Faibussowitsch     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
33409f0612e4SBarry Smith     PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j));
33419f0612e4SBarry Smith     PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i));
3342672ba085SHong Zhang     if (B->structure_only) {
33439f0612e4SBarry Smith       b->free_a = PETSC_FALSE;
3344672ba085SHong Zhang     } else {
33456679dcc1SBarry Smith       PetscInt nzbs2 = 0;
33469566063dSJacob Faibussowitsch       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
33479f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a));
33489f0612e4SBarry Smith       b->free_a = PETSC_TRUE;
33491766d9c3SPierre Jolivet       PetscCall(PetscArrayzero(b->a, nzbs2));
3350672ba085SHong Zhang     }
3351672ba085SHong Zhang     b->free_ij = PETSC_TRUE;
33529f0612e4SBarry Smith     PetscCall(PetscArrayzero(b->j, nz));
3353672ba085SHong Zhang 
3354a23d5eceSKris Buschelman     b->i[0] = 0;
3355ad540459SPierre Jolivet     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3356e811da20SHong Zhang   } else {
3357e6b907acSBarry Smith     b->free_a  = PETSC_FALSE;
3358e6b907acSBarry Smith     b->free_ij = PETSC_FALSE;
3359ab93d7beSBarry Smith   }
3360a23d5eceSKris Buschelman 
3361a23d5eceSKris Buschelman   b->bs2              = bs2;
3362a23d5eceSKris Buschelman   b->mbs              = mbs;
3363a23d5eceSKris Buschelman   b->nz               = 0;
3364b32cb4a7SJed Brown   b->maxnz            = nz;
3365b32cb4a7SJed Brown   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3366cb7b82ddSBarry Smith   B->was_assembled    = PETSC_FALSE;
3367cb7b82ddSBarry Smith   B->assembled        = PETSC_FALSE;
33689566063dSJacob Faibussowitsch   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
33693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3370a23d5eceSKris Buschelman }
3371a23d5eceSKris Buschelman 
MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])337266976f2fSJacob Faibussowitsch static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3373d71ae5a4SJacob Faibussowitsch {
3374725b52f3SLisandro Dalcin   PetscInt     i, m, nz, nz_max = 0, *nnz;
3375f4259b30SLisandro Dalcin   PetscScalar *values      = NULL;
3376d47bf9aaSJed Brown   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3377725b52f3SLisandro Dalcin 
3378725b52f3SLisandro Dalcin   PetscFunctionBegin;
33795f80ce2aSJacob Faibussowitsch   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
33809566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
33819566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
33829566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
33839566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
33849566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3385d0f46423SBarry Smith   m = B->rmap->n / bs;
3386725b52f3SLisandro Dalcin 
33875f80ce2aSJacob Faibussowitsch   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
33889566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &nnz));
3389725b52f3SLisandro Dalcin   for (i = 0; i < m; i++) {
3390cf12db73SBarry Smith     nz = ii[i + 1] - ii[i];
33915f80ce2aSJacob Faibussowitsch     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3392725b52f3SLisandro Dalcin     nz_max = PetscMax(nz_max, nz);
3393725b52f3SLisandro Dalcin     nnz[i] = nz;
3394725b52f3SLisandro Dalcin   }
33959566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
33969566063dSJacob Faibussowitsch   PetscCall(PetscFree(nnz));
3397725b52f3SLisandro Dalcin 
3398725b52f3SLisandro Dalcin   values = (PetscScalar *)V;
339948a46eb9SPierre Jolivet   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3400725b52f3SLisandro Dalcin   for (i = 0; i < m; i++) {
3401cf12db73SBarry Smith     PetscInt        ncols = ii[i + 1] - ii[i];
3402cf12db73SBarry Smith     const PetscInt *icols = jj + ii[i];
3403bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) {
3404cf12db73SBarry Smith       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
34059566063dSJacob Faibussowitsch       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
34063adadaf3SJed Brown     } else {
34073adadaf3SJed Brown       PetscInt j;
34083adadaf3SJed Brown       for (j = 0; j < ncols; j++) {
34093adadaf3SJed Brown         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
34109566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
34113adadaf3SJed Brown       }
34123adadaf3SJed Brown     }
3413725b52f3SLisandro Dalcin   }
34149566063dSJacob Faibussowitsch   if (!V) PetscCall(PetscFree(values));
34159566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
34169566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
34179566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
34183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3419725b52f3SLisandro Dalcin }
3420725b52f3SLisandro Dalcin 
3421cda14afcSprj- /*@C
342211a5261eSBarry Smith   MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3423cda14afcSprj- 
3424cda14afcSprj-   Not Collective
3425cda14afcSprj- 
3426cda14afcSprj-   Input Parameter:
3427fe59aa6dSJacob Faibussowitsch . A - a `MATSEQBAIJ` matrix
3428cda14afcSprj- 
3429cda14afcSprj-   Output Parameter:
3430cda14afcSprj- . array - pointer to the data
3431cda14afcSprj- 
3432cda14afcSprj-   Level: intermediate
3433cda14afcSprj- 
34341cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3435cda14afcSprj- @*/
MatSeqBAIJGetArray(Mat A,PetscScalar * array[])34365d83a8b1SBarry Smith PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[])
3437d71ae5a4SJacob Faibussowitsch {
3438cda14afcSprj-   PetscFunctionBegin;
3439cac4c232SBarry Smith   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
34403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3441cda14afcSprj- }
3442cda14afcSprj- 
3443cda14afcSprj- /*@C
344411a5261eSBarry Smith   MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3445cda14afcSprj- 
3446cda14afcSprj-   Not Collective
3447cda14afcSprj- 
3448cda14afcSprj-   Input Parameters:
3449fe59aa6dSJacob Faibussowitsch + A     - a `MATSEQBAIJ` matrix
3450cda14afcSprj- - array - pointer to the data
3451cda14afcSprj- 
3452cda14afcSprj-   Level: intermediate
3453cda14afcSprj- 
34541cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3455cda14afcSprj- @*/
MatSeqBAIJRestoreArray(Mat A,PetscScalar * array[])34565d83a8b1SBarry Smith PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[])
3457d71ae5a4SJacob Faibussowitsch {
3458cda14afcSprj-   PetscFunctionBegin;
3459cac4c232SBarry Smith   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
34603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3461cda14afcSprj- }
3462cda14afcSprj- 
34630bad9183SKris Buschelman /*MC
3464fafad747SKris Buschelman    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
34650bad9183SKris Buschelman    block sparse compressed row format.
34660bad9183SKris Buschelman 
34670bad9183SKris Buschelman    Options Database Keys:
346820f4b53cSBarry Smith + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
34696679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
34700bad9183SKris Buschelman 
34710bad9183SKris Buschelman    Level: beginner
34720cd7f59aSBarry Smith 
34730cd7f59aSBarry Smith    Notes:
347411a5261eSBarry Smith    `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
347511a5261eSBarry Smith    space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
34760bad9183SKris Buschelman 
34772ef1f0ffSBarry Smith    Run with `-info` to see what version of the matrix-vector product is being used
34786679dcc1SBarry Smith 
34791cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
34800bad9183SKris Buschelman M*/
34810bad9183SKris Buschelman 
3482cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3483b24902e0SBarry Smith 
MatCreate_SeqBAIJ(Mat B)3484d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3485d71ae5a4SJacob Faibussowitsch {
3486c1ac3661SBarry Smith   PetscMPIInt  size;
3487b6490206SBarry Smith   Mat_SeqBAIJ *b;
34883b2fbd54SBarry Smith 
34893a40ed3dSBarry Smith   PetscFunctionBegin;
34909566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
34915f80ce2aSJacob Faibussowitsch   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3492b6490206SBarry Smith 
34934dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
3494b0a32e0cSBarry Smith   B->data   = (void *)b;
3495aea10558SJacob Faibussowitsch   B->ops[0] = MatOps_Values;
349626fbe8dcSKarl Rupp 
3497f4259b30SLisandro Dalcin   b->row          = NULL;
3498f4259b30SLisandro Dalcin   b->col          = NULL;
3499f4259b30SLisandro Dalcin   b->icol         = NULL;
35002593348eSBarry Smith   b->reallocs     = 0;
3501f4259b30SLisandro Dalcin   b->saved_values = NULL;
35022593348eSBarry Smith 
3503c4992f7dSBarry Smith   b->roworiented        = PETSC_TRUE;
35042593348eSBarry Smith   b->nonew              = 0;
3505f4259b30SLisandro Dalcin   b->diag               = NULL;
3506f4259b30SLisandro Dalcin   B->spptr              = NULL;
3507b32cb4a7SJed Brown   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3508a9817697SBarry Smith   b->keepnonzeropattern = PETSC_FALSE;
35094e220ebcSLois Curfman McInnes 
35109566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
35119566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
35129566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
35139566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
35149566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
35159566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
35169566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
35179566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
35189566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
35199566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
35207ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
35219566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
35227ea3e4caSstefano_zampini #endif
35239566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
35249566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
35253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
35262593348eSBarry Smith }
35272593348eSBarry Smith 
MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)3528d6acfc2dSPierre Jolivet PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3529d71ae5a4SJacob Faibussowitsch {
3530b24902e0SBarry Smith   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3531a96a251dSBarry Smith   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3532de6a44a3SBarry Smith 
35333a40ed3dSBarry Smith   PetscFunctionBegin;
353431fe6a7dSBarry Smith   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
35355f80ce2aSJacob Faibussowitsch   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
35362593348eSBarry Smith 
35374fd072dbSBarry Smith   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
35384fd072dbSBarry Smith     c->imax           = a->imax;
35394fd072dbSBarry Smith     c->ilen           = a->ilen;
35404fd072dbSBarry Smith     c->free_imax_ilen = PETSC_FALSE;
35414fd072dbSBarry Smith   } else {
35429566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3543b6490206SBarry Smith     for (i = 0; i < mbs; i++) {
35442593348eSBarry Smith       c->imax[i] = a->imax[i];
35452593348eSBarry Smith       c->ilen[i] = a->ilen[i];
35462593348eSBarry Smith     }
35474fd072dbSBarry Smith     c->free_imax_ilen = PETSC_TRUE;
35484fd072dbSBarry Smith   }
35492593348eSBarry Smith 
35502593348eSBarry Smith   /* allocate the matrix space */
355116a2bf60SHong Zhang   if (mallocmatspace) {
35524fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
35539f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
35549f0612e4SBarry Smith       PetscCall(PetscArrayzero(c->a, bs2 * nz));
35559f0612e4SBarry Smith       c->free_a       = PETSC_TRUE;
35564fd072dbSBarry Smith       c->i            = a->i;
35574fd072dbSBarry Smith       c->j            = a->j;
3558379be0ddSLisandro Dalcin       c->free_ij      = PETSC_FALSE;
35594fd072dbSBarry Smith       c->parent       = A;
35601e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
35611e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
356226fbe8dcSKarl Rupp 
35639566063dSJacob Faibussowitsch       PetscCall(PetscObjectReference((PetscObject)A));
35649566063dSJacob Faibussowitsch       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
35659566063dSJacob Faibussowitsch       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
35664fd072dbSBarry Smith     } else {
35679f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
35689f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j));
35699f0612e4SBarry Smith       PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i));
3570379be0ddSLisandro Dalcin       c->free_a  = PETSC_TRUE;
35714fd072dbSBarry Smith       c->free_ij = PETSC_TRUE;
357226fbe8dcSKarl Rupp 
35739566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3574b6490206SBarry Smith       if (mbs > 0) {
35759566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(c->j, a->j, nz));
35762e8a6d31SBarry Smith         if (cpvalues == MAT_COPY_VALUES) {
35779566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
35782e8a6d31SBarry Smith         } else {
35799566063dSJacob Faibussowitsch           PetscCall(PetscArrayzero(c->a, bs2 * nz));
35802593348eSBarry Smith         }
35812593348eSBarry Smith       }
35821e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
35831e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
358416a2bf60SHong Zhang     }
35854fd072dbSBarry Smith   }
358616a2bf60SHong Zhang 
35872593348eSBarry Smith   c->roworiented = a->roworiented;
35882593348eSBarry Smith   c->nonew       = a->nonew;
358926fbe8dcSKarl Rupp 
35909566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
35919566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
359226fbe8dcSKarl Rupp 
35935c9eb25fSBarry Smith   c->bs2        = a->bs2;
35945c9eb25fSBarry Smith   c->mbs        = a->mbs;
35955c9eb25fSBarry Smith   c->nbs        = a->nbs;
35962593348eSBarry Smith   c->nz         = a->nz;
3597f2cbd3d5SJed Brown   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3598f361c04dSBarry Smith   c->solve_work = NULL;
3599f361c04dSBarry Smith   c->mult_work  = NULL;
3600f361c04dSBarry Smith   c->sor_workt  = NULL;
3601f361c04dSBarry Smith   c->sor_work   = NULL;
360288e51ccdSHong Zhang 
360388e51ccdSHong Zhang   c->compressedrow.use   = a->compressedrow.use;
360488e51ccdSHong Zhang   c->compressedrow.nrows = a->compressedrow.nrows;
3605cd6b891eSBarry Smith   if (a->compressedrow.use) {
360688e51ccdSHong Zhang     i = a->compressedrow.nrows;
36079566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
36089566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
36099566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
361088e51ccdSHong Zhang   } else {
361188e51ccdSHong Zhang     c->compressedrow.use    = PETSC_FALSE;
36120298fd71SBarry Smith     c->compressedrow.i      = NULL;
36130298fd71SBarry Smith     c->compressedrow.rindex = NULL;
361488e51ccdSHong Zhang   }
3615c05f355bSMark Adams   c->nonzerorowcnt = a->nonzerorowcnt;
3616e56f5c9eSBarry Smith   C->nonzerostate  = A->nonzerostate;
361726fbe8dcSKarl Rupp 
36189566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
36193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
36202593348eSBarry Smith }
36212593348eSBarry Smith 
MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat * B)3622d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3623d71ae5a4SJacob Faibussowitsch {
3624b24902e0SBarry Smith   PetscFunctionBegin;
36259566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
36269566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
36279566063dSJacob Faibussowitsch   PetscCall(MatSetType(*B, MATSEQBAIJ));
36289566063dSJacob Faibussowitsch   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
36293ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3630b24902e0SBarry Smith }
3631b24902e0SBarry Smith 
3632618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)3633d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3634d71ae5a4SJacob Faibussowitsch {
3635b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3636b51a4376SLisandro Dalcin   PetscInt    *rowidxs, *colidxs;
3637b51a4376SLisandro Dalcin   PetscScalar *matvals;
3638b51a4376SLisandro Dalcin 
3639b51a4376SLisandro Dalcin   PetscFunctionBegin;
36409566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
3641b51a4376SLisandro Dalcin 
3642b51a4376SLisandro Dalcin   /* read matrix header */
36439566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
36445f80ce2aSJacob Faibussowitsch   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
36459371c9d4SSatish Balay   M  = header[1];
36469371c9d4SSatish Balay   N  = header[2];
36479371c9d4SSatish Balay   nz = header[3];
36485f80ce2aSJacob Faibussowitsch   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
36495f80ce2aSJacob Faibussowitsch   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
36505f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3651b51a4376SLisandro Dalcin 
3652b51a4376SLisandro Dalcin   /* set block sizes from the viewer's .info file */
36539566063dSJacob Faibussowitsch   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3654b51a4376SLisandro Dalcin   /* set local and global sizes if not set already */
3655b51a4376SLisandro Dalcin   if (mat->rmap->n < 0) mat->rmap->n = M;
3656b51a4376SLisandro Dalcin   if (mat->cmap->n < 0) mat->cmap->n = N;
3657b51a4376SLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
3658b51a4376SLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
36599566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
36609566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
3661b51a4376SLisandro Dalcin 
3662b51a4376SLisandro Dalcin   /* check if the matrix sizes are correct */
36639566063dSJacob Faibussowitsch   PetscCall(MatGetSize(mat, &rows, &cols));
36645f80ce2aSJacob Faibussowitsch   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
36659566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(mat, &bs));
36669566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, &n));
36679371c9d4SSatish Balay   mbs = m / bs;
36689371c9d4SSatish Balay   nbs = n / bs;
3669b51a4376SLisandro Dalcin 
3670b51a4376SLisandro Dalcin   /* read in row lengths, column indices and nonzero values */
36719566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &rowidxs));
36729566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
36739371c9d4SSatish Balay   rowidxs[0] = 0;
36749371c9d4SSatish Balay   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3675b51a4376SLisandro Dalcin   sum = rowidxs[m];
36765f80ce2aSJacob Faibussowitsch   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3677b51a4376SLisandro Dalcin 
3678b51a4376SLisandro Dalcin   /* read in column indices and nonzero values */
36799566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
36809566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
36819566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3682b51a4376SLisandro Dalcin 
3683b51a4376SLisandro Dalcin   {               /* preallocate matrix storage */
3684b51a4376SLisandro Dalcin     PetscBT   bt; /* helper bit set to count nonzeros */
3685b51a4376SLisandro Dalcin     PetscInt *nnz;
3686618cc2edSLisandro Dalcin     PetscBool sbaij;
3687b51a4376SLisandro Dalcin 
36889566063dSJacob Faibussowitsch     PetscCall(PetscBTCreate(nbs, &bt));
36899566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(mbs, &nnz));
36909566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3691b51a4376SLisandro Dalcin     for (i = 0; i < mbs; i++) {
36929566063dSJacob Faibussowitsch       PetscCall(PetscBTMemzero(nbs, bt));
3693618cc2edSLisandro Dalcin       for (k = 0; k < bs; k++) {
3694618cc2edSLisandro Dalcin         PetscInt row = bs * i + k;
3695618cc2edSLisandro Dalcin         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3696618cc2edSLisandro Dalcin           PetscInt col = colidxs[j];
3697618cc2edSLisandro Dalcin           if (!sbaij || col >= row)
3698618cc2edSLisandro Dalcin             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3699618cc2edSLisandro Dalcin         }
3700618cc2edSLisandro Dalcin       }
3701b51a4376SLisandro Dalcin     }
37029566063dSJacob Faibussowitsch     PetscCall(PetscBTDestroy(&bt));
37039566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
37049566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
37059566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz));
3706b51a4376SLisandro Dalcin   }
3707b51a4376SLisandro Dalcin 
3708b51a4376SLisandro Dalcin   /* store matrix values */
3709b51a4376SLisandro Dalcin   for (i = 0; i < m; i++) {
3710b51a4376SLisandro Dalcin     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
37119927e4dfSBarry Smith     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3712b51a4376SLisandro Dalcin   }
3713b51a4376SLisandro Dalcin 
37149566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowidxs));
37159566063dSJacob Faibussowitsch   PetscCall(PetscFree2(colidxs, matvals));
37169566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
37179566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
37183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3719b51a4376SLisandro Dalcin }
3720b51a4376SLisandro Dalcin 
MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)3721d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3722d71ae5a4SJacob Faibussowitsch {
37237f489da9SVaclav Hapla   PetscBool isbinary;
3724f501eaabSShri Abhyankar 
3725f501eaabSShri Abhyankar   PetscFunctionBegin;
37269566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
37275f80ce2aSJacob Faibussowitsch   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
37289566063dSJacob Faibussowitsch   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
37293ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3730f501eaabSShri Abhyankar }
3731f501eaabSShri Abhyankar 
37325d83a8b1SBarry Smith /*@
373311a5261eSBarry Smith   MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3734273d9f13SBarry Smith   compressed row) format.  For good matrix assembly performance the
373520f4b53cSBarry Smith   user should preallocate the matrix storage by setting the parameter `nz`
373620f4b53cSBarry Smith   (or the array `nnz`).
37372593348eSBarry Smith 
3738d083f849SBarry Smith   Collective
3739273d9f13SBarry Smith 
3740273d9f13SBarry Smith   Input Parameters:
374111a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF`
374211a5261eSBarry Smith . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
374311a5261eSBarry Smith          blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3744273d9f13SBarry Smith . m    - number of rows
3745273d9f13SBarry Smith . n    - number of columns
374635d8aa7fSBarry Smith . nz   - number of nonzero blocks  per block row (same for all rows)
374735d8aa7fSBarry Smith - nnz  - array containing the number of nonzero blocks in the various block rows
374820f4b53cSBarry Smith          (possibly different for each block row) or `NULL`
3749273d9f13SBarry Smith 
3750273d9f13SBarry Smith   Output Parameter:
3751273d9f13SBarry Smith . A - the matrix
3752273d9f13SBarry Smith 
3753273d9f13SBarry Smith   Options Database Keys:
375411a5261eSBarry Smith + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3755a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use
3756273d9f13SBarry Smith 
3757273d9f13SBarry Smith   Level: intermediate
3758273d9f13SBarry Smith 
3759273d9f13SBarry Smith   Notes:
376077433607SBarry Smith   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
37612ef1f0ffSBarry Smith   MatXXXXSetPreallocation() paradigm instead of this routine directly.
37622ef1f0ffSBarry Smith   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
37632ef1f0ffSBarry Smith 
3764d1be2dadSMatthew Knepley   The number of rows and columns must be divisible by blocksize.
3765d1be2dadSMatthew Knepley 
37662ef1f0ffSBarry Smith   If the `nnz` parameter is given then the `nz` parameter is ignored
376749a6f317SBarry Smith 
376835d8aa7fSBarry Smith   A nonzero block is any block that as 1 or more nonzeros in it
376935d8aa7fSBarry Smith 
37702ef1f0ffSBarry Smith   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3771273d9f13SBarry Smith   storage.  That is, the stored row and column indices can begin at
377220f4b53cSBarry Smith   either one (as in Fortran) or zero.
3773273d9f13SBarry Smith 
37742ef1f0ffSBarry Smith   Specify the preallocated storage with either `nz` or `nnz` (not both).
37752ef1f0ffSBarry Smith   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3776651615e1SBarry Smith   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3777273d9f13SBarry Smith   matrices.
3778273d9f13SBarry Smith 
37791cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3780273d9f13SBarry Smith @*/
MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat * A)3781d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3782d71ae5a4SJacob Faibussowitsch {
3783273d9f13SBarry Smith   PetscFunctionBegin;
37849566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
37859566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
37869566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQBAIJ));
37879566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
37883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3789273d9f13SBarry Smith }
3790273d9f13SBarry Smith 
37915d83a8b1SBarry Smith /*@
3792273d9f13SBarry Smith   MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3793273d9f13SBarry Smith   per row in the matrix. For good matrix assembly performance the
379420f4b53cSBarry Smith   user should preallocate the matrix storage by setting the parameter `nz`
379520f4b53cSBarry Smith   (or the array `nnz`).
3796273d9f13SBarry Smith 
3797d083f849SBarry Smith   Collective
3798273d9f13SBarry Smith 
3799273d9f13SBarry Smith   Input Parameters:
38001c4f3114SJed Brown + B   - the matrix
380111a5261eSBarry Smith . bs  - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
380211a5261eSBarry Smith         blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3803273d9f13SBarry Smith . nz  - number of block nonzeros per block row (same for all rows)
3804273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows
38052ef1f0ffSBarry Smith         (possibly different for each block row) or `NULL`
3806273d9f13SBarry Smith 
3807273d9f13SBarry Smith   Options Database Keys:
380811a5261eSBarry Smith + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3809a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use
3810273d9f13SBarry Smith 
3811273d9f13SBarry Smith   Level: intermediate
3812273d9f13SBarry Smith 
3813273d9f13SBarry Smith   Notes:
38142ef1f0ffSBarry Smith   If the `nnz` parameter is given then the `nz` parameter is ignored
381549a6f317SBarry Smith 
381611a5261eSBarry Smith   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3817aa95bbe8SBarry Smith   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
381820f4b53cSBarry Smith   You can also run with the option `-info` and look for messages with the string
3819aa95bbe8SBarry Smith   malloc in them to see if additional memory allocation was needed.
3820aa95bbe8SBarry Smith 
38212ef1f0ffSBarry Smith   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3822273d9f13SBarry Smith   storage.  That is, the stored row and column indices can begin at
382320f4b53cSBarry Smith   either one (as in Fortran) or zero.
3824273d9f13SBarry Smith 
3825d8a51d2aSBarry Smith   Specify the preallocated storage with either `nz` or `nnz` (not both).
38262ef1f0ffSBarry Smith   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3827651615e1SBarry Smith   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3828273d9f13SBarry Smith 
38291cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3830273d9f13SBarry Smith @*/
MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])3831d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3832d71ae5a4SJacob Faibussowitsch {
3833273d9f13SBarry Smith   PetscFunctionBegin;
38346ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
38356ba663aaSJed Brown   PetscValidType(B, 1);
38366ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
3837cac4c232SBarry Smith   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
38383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3839273d9f13SBarry Smith }
3840a1d92eedSBarry Smith 
3841725b52f3SLisandro Dalcin /*@C
384211a5261eSBarry Smith   MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3843725b52f3SLisandro Dalcin 
3844d083f849SBarry Smith   Collective
3845725b52f3SLisandro Dalcin 
3846725b52f3SLisandro Dalcin   Input Parameters:
38471c4f3114SJed Brown + B  - the matrix
384820f4b53cSBarry Smith . bs - the blocksize
3849d8a51d2aSBarry Smith . i  - the indices into `j` for the start of each local row (indices start with zero)
3850d8a51d2aSBarry Smith . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
3851d8a51d2aSBarry Smith - v  - optional values in the matrix, use `NULL` if not provided
3852725b52f3SLisandro Dalcin 
3853664954b6SBarry Smith   Level: advanced
3854725b52f3SLisandro Dalcin 
38553adadaf3SJed Brown   Notes:
3856d8a51d2aSBarry Smith   The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()`
3857d8a51d2aSBarry Smith 
385811a5261eSBarry Smith   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
385911a5261eSBarry Smith   may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
38603adadaf3SJed Brown   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
386111a5261eSBarry Smith   `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
38623adadaf3SJed Brown   block column and the second index is over columns within a block.
38633adadaf3SJed Brown 
3864664954b6SBarry Smith   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3865664954b6SBarry Smith 
38661cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3867725b52f3SLisandro Dalcin @*/
MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[],const PetscScalar v[])3868d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3869d71ae5a4SJacob Faibussowitsch {
3870725b52f3SLisandro Dalcin   PetscFunctionBegin;
38716ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
38726ba663aaSJed Brown   PetscValidType(B, 1);
38736ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
3874cac4c232SBarry Smith   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
38753ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3876725b52f3SLisandro Dalcin }
3877725b52f3SLisandro Dalcin 
3878c75a6043SHong Zhang /*@
387911a5261eSBarry Smith   MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3880c75a6043SHong Zhang 
3881d083f849SBarry Smith   Collective
3882c75a6043SHong Zhang 
3883c75a6043SHong Zhang   Input Parameters:
3884c75a6043SHong Zhang + comm - must be an MPI communicator of size 1
3885c75a6043SHong Zhang . bs   - size of block
3886c75a6043SHong Zhang . m    - number of rows
3887c75a6043SHong Zhang . n    - number of columns
3888483a2f95SBarry Smith . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3889c75a6043SHong Zhang . j    - column indices
3890c75a6043SHong Zhang - a    - matrix values
3891c75a6043SHong Zhang 
3892c75a6043SHong Zhang   Output Parameter:
3893c75a6043SHong Zhang . mat - the matrix
3894c75a6043SHong Zhang 
3895dfb205c3SBarry Smith   Level: advanced
3896c75a6043SHong Zhang 
3897c75a6043SHong Zhang   Notes:
38982ef1f0ffSBarry Smith   The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3899c75a6043SHong Zhang   once the matrix is destroyed
3900c75a6043SHong Zhang 
3901c75a6043SHong Zhang   You cannot set new nonzero locations into this matrix, that will generate an error.
3902c75a6043SHong Zhang 
39032ef1f0ffSBarry Smith   The `i` and `j` indices are 0 based
3904c75a6043SHong Zhang 
390511a5261eSBarry Smith   When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3906dfb205c3SBarry Smith 
39073adadaf3SJed Brown   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
39083adadaf3SJed Brown   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
39093adadaf3SJed Brown   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
39103adadaf3SJed Brown   with column-major ordering within blocks.
3911dfb205c3SBarry Smith 
39121cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3913c75a6043SHong Zhang @*/
MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat * mat)3914d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3915d71ae5a4SJacob Faibussowitsch {
3916c75a6043SHong Zhang   Mat_SeqBAIJ *baij;
3917c75a6043SHong Zhang 
3918c75a6043SHong Zhang   PetscFunctionBegin;
39195f80ce2aSJacob Faibussowitsch   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
39205f80ce2aSJacob Faibussowitsch   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3921c75a6043SHong Zhang 
39229566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
39239566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, m, n));
39249566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATSEQBAIJ));
39259566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3926c75a6043SHong Zhang   baij = (Mat_SeqBAIJ *)(*mat)->data;
39279566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3928c75a6043SHong Zhang 
3929c75a6043SHong Zhang   baij->i = i;
3930c75a6043SHong Zhang   baij->j = j;
3931c75a6043SHong Zhang   baij->a = a;
393226fbe8dcSKarl Rupp 
3933c75a6043SHong Zhang   baij->nonew          = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3934e6b907acSBarry Smith   baij->free_a         = PETSC_FALSE;
3935e6b907acSBarry Smith   baij->free_ij        = PETSC_FALSE;
3936ceb5bf51SJacob Faibussowitsch   baij->free_imax_ilen = PETSC_TRUE;
3937c75a6043SHong Zhang 
3938ceb5bf51SJacob Faibussowitsch   for (PetscInt ii = 0; ii < m; ii++) {
3939ceb5bf51SJacob Faibussowitsch     const PetscInt row_len = i[ii + 1] - i[ii];
3940ceb5bf51SJacob Faibussowitsch 
3941ceb5bf51SJacob Faibussowitsch     baij->ilen[ii] = baij->imax[ii] = row_len;
3942ceb5bf51SJacob Faibussowitsch     PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
3943c75a6043SHong Zhang   }
394476bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
3945ceb5bf51SJacob Faibussowitsch     for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
39466bdcaf15SBarry Smith       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
39476bdcaf15SBarry Smith       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3948c75a6043SHong Zhang     }
394976bd3646SJed Brown   }
3950c75a6043SHong Zhang 
39519566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
39529566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
39533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3954c75a6043SHong Zhang }
3955bdf6f3fcSHong Zhang 
MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)3956d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3957d71ae5a4SJacob Faibussowitsch {
3958bdf6f3fcSHong Zhang   PetscFunctionBegin;
39599566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
39603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3961bdf6f3fcSHong Zhang }
3962