xref: /petsc/src/mat/impls/baij/seq/baijsolvnat11.c (revision 6996bd1a6dda9216f11f3a1c5d2357ea301aa80d)
12c733ed4SBarry Smith #include <../src/mat/impls/baij/seq/baij.h>
22c733ed4SBarry Smith #include <petsc/private/kernels/blockinvert.h>
32c733ed4SBarry Smith 
4*15229ffcSPierre Jolivet /* Block operations are done by accessing one column at a time */
52c733ed4SBarry Smith /* Default MatSolve for block size 11 */
62c733ed4SBarry Smith 
MatSolve_SeqBAIJ_11_NaturalOrdering(Mat A,Vec bb,Vec xx)7d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_11_NaturalOrdering(Mat A, Vec bb, Vec xx)
8d71ae5a4SJacob Faibussowitsch {
92c733ed4SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
102c733ed4SBarry Smith   const PetscInt     n = a->mbs, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi, bs = A->rmap->bs, bs2 = a->bs2;
112c733ed4SBarry Smith   PetscInt           i, k, nz, idx, idt, m;
122c733ed4SBarry Smith   const MatScalar   *aa = a->a, *v;
132c733ed4SBarry Smith   PetscScalar        s[11];
142c733ed4SBarry Smith   PetscScalar       *x, xv;
152c733ed4SBarry Smith   const PetscScalar *b;
162c733ed4SBarry Smith 
172c733ed4SBarry Smith   PetscFunctionBegin;
189566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
199566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
202c733ed4SBarry Smith 
212c733ed4SBarry Smith   /* forward solve the lower triangular */
222c733ed4SBarry Smith   for (i = 0; i < n; i++) {
232c733ed4SBarry Smith     v           = aa + bs2 * ai[i];
242c733ed4SBarry Smith     vi          = aj + ai[i];
252c733ed4SBarry Smith     nz          = ai[i + 1] - ai[i];
262c733ed4SBarry Smith     idt         = bs * i;
279371c9d4SSatish Balay     x[idt]      = b[idt];
289371c9d4SSatish Balay     x[1 + idt]  = b[1 + idt];
299371c9d4SSatish Balay     x[2 + idt]  = b[2 + idt];
309371c9d4SSatish Balay     x[3 + idt]  = b[3 + idt];
319371c9d4SSatish Balay     x[4 + idt]  = b[4 + idt];
329371c9d4SSatish Balay     x[5 + idt]  = b[5 + idt];
339371c9d4SSatish Balay     x[6 + idt]  = b[6 + idt];
349371c9d4SSatish Balay     x[7 + idt]  = b[7 + idt];
359371c9d4SSatish Balay     x[8 + idt]  = b[8 + idt];
369371c9d4SSatish Balay     x[9 + idt]  = b[9 + idt];
372c733ed4SBarry Smith     x[10 + idt] = b[10 + idt];
382c733ed4SBarry Smith     for (m = 0; m < nz; m++) {
392c733ed4SBarry Smith       idx = bs * vi[m];
402c733ed4SBarry Smith       for (k = 0; k < 11; k++) {
412c733ed4SBarry Smith         xv = x[k + idx];
422c733ed4SBarry Smith         x[idt] -= v[0] * xv;
432c733ed4SBarry Smith         x[1 + idt] -= v[1] * xv;
442c733ed4SBarry Smith         x[2 + idt] -= v[2] * xv;
452c733ed4SBarry Smith         x[3 + idt] -= v[3] * xv;
462c733ed4SBarry Smith         x[4 + idt] -= v[4] * xv;
472c733ed4SBarry Smith         x[5 + idt] -= v[5] * xv;
482c733ed4SBarry Smith         x[6 + idt] -= v[6] * xv;
492c733ed4SBarry Smith         x[7 + idt] -= v[7] * xv;
502c733ed4SBarry Smith         x[8 + idt] -= v[8] * xv;
512c733ed4SBarry Smith         x[9 + idt] -= v[9] * xv;
522c733ed4SBarry Smith         x[10 + idt] -= v[10] * xv;
532c733ed4SBarry Smith         v += 11;
542c733ed4SBarry Smith       }
552c733ed4SBarry Smith     }
562c733ed4SBarry Smith   }
572c733ed4SBarry Smith   /* backward solve the upper triangular */
582c733ed4SBarry Smith   for (i = n - 1; i >= 0; i--) {
592c733ed4SBarry Smith     v     = aa + bs2 * (adiag[i + 1] + 1);
602c733ed4SBarry Smith     vi    = aj + adiag[i + 1] + 1;
612c733ed4SBarry Smith     nz    = adiag[i] - adiag[i + 1] - 1;
622c733ed4SBarry Smith     idt   = bs * i;
639371c9d4SSatish Balay     s[0]  = x[idt];
649371c9d4SSatish Balay     s[1]  = x[1 + idt];
659371c9d4SSatish Balay     s[2]  = x[2 + idt];
669371c9d4SSatish Balay     s[3]  = x[3 + idt];
679371c9d4SSatish Balay     s[4]  = x[4 + idt];
689371c9d4SSatish Balay     s[5]  = x[5 + idt];
699371c9d4SSatish Balay     s[6]  = x[6 + idt];
709371c9d4SSatish Balay     s[7]  = x[7 + idt];
719371c9d4SSatish Balay     s[8]  = x[8 + idt];
729371c9d4SSatish Balay     s[9]  = x[9 + idt];
732c733ed4SBarry Smith     s[10] = x[10 + idt];
742c733ed4SBarry Smith 
752c733ed4SBarry Smith     for (m = 0; m < nz; m++) {
762c733ed4SBarry Smith       idx = bs * vi[m];
772c733ed4SBarry Smith       for (k = 0; k < 11; k++) {
782c733ed4SBarry Smith         xv = x[k + idx];
792c733ed4SBarry Smith         s[0] -= v[0] * xv;
802c733ed4SBarry Smith         s[1] -= v[1] * xv;
812c733ed4SBarry Smith         s[2] -= v[2] * xv;
822c733ed4SBarry Smith         s[3] -= v[3] * xv;
832c733ed4SBarry Smith         s[4] -= v[4] * xv;
842c733ed4SBarry Smith         s[5] -= v[5] * xv;
852c733ed4SBarry Smith         s[6] -= v[6] * xv;
862c733ed4SBarry Smith         s[7] -= v[7] * xv;
872c733ed4SBarry Smith         s[8] -= v[8] * xv;
882c733ed4SBarry Smith         s[9] -= v[9] * xv;
892c733ed4SBarry Smith         s[10] -= v[10] * xv;
902c733ed4SBarry Smith         v += 11;
912c733ed4SBarry Smith       }
922c733ed4SBarry Smith     }
939566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(x + idt, bs));
942c733ed4SBarry Smith     for (k = 0; k < 11; k++) {
952c733ed4SBarry Smith       x[idt] += v[0] * s[k];
962c733ed4SBarry Smith       x[1 + idt] += v[1] * s[k];
972c733ed4SBarry Smith       x[2 + idt] += v[2] * s[k];
982c733ed4SBarry Smith       x[3 + idt] += v[3] * s[k];
992c733ed4SBarry Smith       x[4 + idt] += v[4] * s[k];
1002c733ed4SBarry Smith       x[5 + idt] += v[5] * s[k];
1012c733ed4SBarry Smith       x[6 + idt] += v[6] * s[k];
1022c733ed4SBarry Smith       x[7 + idt] += v[7] * s[k];
1032c733ed4SBarry Smith       x[8 + idt] += v[8] * s[k];
1042c733ed4SBarry Smith       x[9 + idt] += v[9] * s[k];
1052c733ed4SBarry Smith       x[10 + idt] += v[10] * s[k];
1062c733ed4SBarry Smith       v += 11;
1072c733ed4SBarry Smith     }
1082c733ed4SBarry Smith   }
1099566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
1109566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
1119566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz) - bs * A->cmap->n));
1123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1132c733ed4SBarry Smith }
114