1 #include <../src/mat/impls/baij/seq/baij.h> 2 #include <petsc/private/kernels/blockinvert.h> 3 4 /* ----------------------------------------------------------- */ 5 PetscErrorCode MatSolveTranspose_SeqBAIJ_N_inplace(Mat A,Vec bb,Vec xx) 6 { 7 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data; 8 IS iscol=a->col,isrow=a->row; 9 PetscErrorCode ierr; 10 const PetscInt *r,*c,*rout,*cout,*ai=a->i,*aj=a->j,*vi; 11 PetscInt i,nz,j; 12 const PetscInt n =a->mbs,bs=A->rmap->bs,bs2=a->bs2; 13 const MatScalar *aa=a->a,*v; 14 PetscScalar *x,*t,*ls; 15 const PetscScalar *b; 16 17 PetscFunctionBegin; 18 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 19 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 20 t = a->solve_work; 21 22 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 23 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 24 25 /* copy the b into temp work space according to permutation */ 26 for (i=0; i<n; i++) { 27 for (j=0; j<bs; j++) { 28 t[i*bs+j] = b[c[i]*bs+j]; 29 } 30 } 31 32 33 /* forward solve the upper triangular transpose */ 34 ls = a->solve_work + A->cmap->n; 35 for (i=0; i<n; i++) { 36 ierr = PetscMemcpy(ls,t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 37 PetscKernel_w_gets_transA_times_v(bs,ls,aa+bs2*a->diag[i],t+i*bs); 38 v = aa + bs2*(a->diag[i] + 1); 39 vi = aj + a->diag[i] + 1; 40 nz = ai[i+1] - a->diag[i] - 1; 41 while (nz--) { 42 PetscKernel_v_gets_v_minus_transA_times_w(bs,t+bs*(*vi++),v,t+i*bs); 43 v += bs2; 44 } 45 } 46 47 /* backward solve the lower triangular transpose */ 48 for (i=n-1; i>=0; i--) { 49 v = aa + bs2*ai[i]; 50 vi = aj + ai[i]; 51 nz = a->diag[i] - ai[i]; 52 while (nz--) { 53 PetscKernel_v_gets_v_minus_transA_times_w(bs,t+bs*(*vi++),v,t+i*bs); 54 v += bs2; 55 } 56 } 57 58 /* copy t into x according to permutation */ 59 for (i=0; i<n; i++) { 60 for (j=0; j<bs; j++) { 61 x[bs*r[i]+j] = t[bs*i+j]; 62 } 63 } 64 65 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 66 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 67 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 68 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 69 ierr = PetscLogFlops(2.0*(a->bs2)*(a->nz) - A->rmap->bs*A->cmap->n);CHKERRQ(ierr); 70 PetscFunctionReturn(0); 71 } 72 73 PetscErrorCode MatSolveTranspose_SeqBAIJ_N(Mat A,Vec bb,Vec xx) 74 { 75 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data; 76 IS iscol=a->col,isrow=a->row; 77 PetscErrorCode ierr; 78 const PetscInt *r,*c,*rout,*cout; 79 const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*vi,*diag=a->diag; 80 PetscInt i,j,nz; 81 const PetscInt bs =A->rmap->bs,bs2=a->bs2; 82 const MatScalar *aa=a->a,*v; 83 PetscScalar *x,*t,*ls; 84 const PetscScalar *b; 85 86 PetscFunctionBegin; 87 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 88 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 89 t = a->solve_work; 90 91 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 92 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 93 94 /* copy the b into temp work space according to permutation */ 95 for (i=0; i<n; i++) { 96 for (j=0; j<bs; j++) { 97 t[i*bs+j] = b[c[i]*bs+j]; 98 } 99 } 100 101 102 /* forward solve the upper triangular transpose */ 103 ls = a->solve_work + A->cmap->n; 104 for (i=0; i<n; i++) { 105 ierr = PetscMemcpy(ls,t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 106 PetscKernel_w_gets_transA_times_v(bs,ls,aa+bs2*diag[i],t+i*bs); 107 v = aa + bs2*(diag[i] - 1); 108 vi = aj + diag[i] - 1; 109 nz = diag[i] - diag[i+1] - 1; 110 for (j=0; j>-nz; j--) { 111 PetscKernel_v_gets_v_minus_transA_times_w(bs,t+bs*(vi[j]),v,t+i*bs); 112 v -= bs2; 113 } 114 } 115 116 /* backward solve the lower triangular transpose */ 117 for (i=n-1; i>=0; i--) { 118 v = aa + bs2*ai[i]; 119 vi = aj + ai[i]; 120 nz = ai[i+1] - ai[i]; 121 for (j=0; j<nz; j++) { 122 PetscKernel_v_gets_v_minus_transA_times_w(bs,t+bs*(vi[j]),v,t+i*bs); 123 v += bs2; 124 } 125 } 126 127 /* copy t into x according to permutation */ 128 for (i=0; i<n; i++) { 129 for (j=0; j<bs; j++) { 130 x[bs*r[i]+j] = t[bs*i+j]; 131 } 132 } 133 134 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 135 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 136 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 137 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 138 ierr = PetscLogFlops(2.0*(a->bs2)*(a->nz) - A->rmap->bs*A->cmap->n);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142