1 /*$Id: baijfact.c,v 1.90 2001/03/23 23:22:07 balay Exp $*/ 2 /* 3 Factorization code for BAIJ format. 4 */ 5 #include "src/mat/impls/baij/seq/baij.h" 6 #include "src/inline/ilu.h" 7 8 /* ------------------------------------------------------------*/ 9 /* 10 Version for when blocks are 2 by 2 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2" 14 int MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B) 15 { 16 Mat C = *B; 17 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 18 IS isrow = b->row,isicol = b->icol; 19 int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 20 int *ajtmpold,*ajtmp,nz,row; 21 int *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj; 22 MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4; 23 MatScalar p1,p2,p3,p4; 24 MatScalar *ba = b->a,*aa = a->a; 25 26 PetscFunctionBegin; 27 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 28 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 29 ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 30 31 for (i=0; i<n; i++) { 32 nz = bi[i+1] - bi[i]; 33 ajtmp = bj + bi[i]; 34 for (j=0; j<nz; j++) { 35 x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0; 36 } 37 /* load in initial (unfactored row) */ 38 idx = r[i]; 39 nz = ai[idx+1] - ai[idx]; 40 ajtmpold = aj + ai[idx]; 41 v = aa + 4*ai[idx]; 42 for (j=0; j<nz; j++) { 43 x = rtmp+4*ic[ajtmpold[j]]; 44 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 45 v += 4; 46 } 47 row = *ajtmp++; 48 while (row < i) { 49 pc = rtmp + 4*row; 50 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 51 if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 52 pv = ba + 4*diag_offset[row]; 53 pj = bj + diag_offset[row] + 1; 54 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 55 pc[0] = m1 = p1*x1 + p3*x2; 56 pc[1] = m2 = p2*x1 + p4*x2; 57 pc[2] = m3 = p1*x3 + p3*x4; 58 pc[3] = m4 = p2*x3 + p4*x4; 59 nz = bi[row+1] - diag_offset[row] - 1; 60 pv += 4; 61 for (j=0; j<nz; j++) { 62 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 63 x = rtmp + 4*pj[j]; 64 x[0] -= m1*x1 + m3*x2; 65 x[1] -= m2*x1 + m4*x2; 66 x[2] -= m1*x3 + m3*x4; 67 x[3] -= m2*x3 + m4*x4; 68 pv += 4; 69 } 70 PetscLogFlops(16*nz+12); 71 } 72 row = *ajtmp++; 73 } 74 /* finished row so stick it into b->a */ 75 pv = ba + 4*bi[i]; 76 pj = bj + bi[i]; 77 nz = bi[i+1] - bi[i]; 78 for (j=0; j<nz; j++) { 79 x = rtmp+4*pj[j]; 80 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 81 pv += 4; 82 } 83 /* invert diagonal block */ 84 w = ba + 4*diag_offset[i]; 85 ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr); 86 } 87 88 ierr = PetscFree(rtmp);CHKERRQ(ierr); 89 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 90 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 91 C->factor = FACTOR_LU; 92 C->assembled = PETSC_TRUE; 93 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 94 PetscFunctionReturn(0); 95 } 96 /* 97 Version for when blocks are 2 by 2 Using natural ordering 98 */ 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering" 101 int MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B) 102 { 103 Mat C = *B; 104 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 105 int ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 106 int *ajtmpold,*ajtmp,nz,row; 107 int *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; 108 MatScalar *pv,*v,*rtmp,*pc,*w,*x; 109 MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4; 110 MatScalar *ba = b->a,*aa = a->a; 111 112 PetscFunctionBegin; 113 ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 114 115 for (i=0; i<n; i++) { 116 nz = bi[i+1] - bi[i]; 117 ajtmp = bj + bi[i]; 118 for (j=0; j<nz; j++) { 119 x = rtmp+4*ajtmp[j]; 120 x[0] = x[1] = x[2] = x[3] = 0.0; 121 } 122 /* load in initial (unfactored row) */ 123 nz = ai[i+1] - ai[i]; 124 ajtmpold = aj + ai[i]; 125 v = aa + 4*ai[i]; 126 for (j=0; j<nz; j++) { 127 x = rtmp+4*ajtmpold[j]; 128 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 129 v += 4; 130 } 131 row = *ajtmp++; 132 while (row < i) { 133 pc = rtmp + 4*row; 134 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 135 if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 136 pv = ba + 4*diag_offset[row]; 137 pj = bj + diag_offset[row] + 1; 138 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 139 pc[0] = m1 = p1*x1 + p3*x2; 140 pc[1] = m2 = p2*x1 + p4*x2; 141 pc[2] = m3 = p1*x3 + p3*x4; 142 pc[3] = m4 = p2*x3 + p4*x4; 143 nz = bi[row+1] - diag_offset[row] - 1; 144 pv += 4; 145 for (j=0; j<nz; j++) { 146 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 147 x = rtmp + 4*pj[j]; 148 x[0] -= m1*x1 + m3*x2; 149 x[1] -= m2*x1 + m4*x2; 150 x[2] -= m1*x3 + m3*x4; 151 x[3] -= m2*x3 + m4*x4; 152 pv += 4; 153 } 154 PetscLogFlops(16*nz+12); 155 } 156 row = *ajtmp++; 157 } 158 /* finished row so stick it into b->a */ 159 pv = ba + 4*bi[i]; 160 pj = bj + bi[i]; 161 nz = bi[i+1] - bi[i]; 162 for (j=0; j<nz; j++) { 163 x = rtmp+4*pj[j]; 164 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 165 pv += 4; 166 } 167 /* invert diagonal block */ 168 w = ba + 4*diag_offset[i]; 169 ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr); 170 /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/ 171 } 172 173 ierr = PetscFree(rtmp);CHKERRQ(ierr); 174 C->factor = FACTOR_LU; 175 C->assembled = PETSC_TRUE; 176 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 177 PetscFunctionReturn(0); 178 } 179 180 /* ----------------------------------------------------------- */ 181 /* 182 Version for when blocks are 1 by 1. 183 */ 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1" 186 int MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B) 187 { 188 Mat C = *B; 189 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 190 IS isrow = b->row,isicol = b->icol; 191 int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 192 int *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j; 193 int *diag_offset = b->diag,diag,*pj; 194 MatScalar *pv,*v,*rtmp,multiplier,*pc; 195 MatScalar *ba = b->a,*aa = a->a; 196 197 PetscFunctionBegin; 198 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 199 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 200 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 201 202 for (i=0; i<n; i++) { 203 nz = bi[i+1] - bi[i]; 204 ajtmp = bj + bi[i]; 205 for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0; 206 207 /* load in initial (unfactored row) */ 208 nz = ai[r[i]+1] - ai[r[i]]; 209 ajtmpold = aj + ai[r[i]]; 210 v = aa + ai[r[i]]; 211 for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j]; 212 213 row = *ajtmp++; 214 while (row < i) { 215 pc = rtmp + row; 216 if (*pc != 0.0) { 217 pv = ba + diag_offset[row]; 218 pj = bj + diag_offset[row] + 1; 219 multiplier = *pc * *pv++; 220 *pc = multiplier; 221 nz = bi[row+1] - diag_offset[row] - 1; 222 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 223 PetscLogFlops(1+2*nz); 224 } 225 row = *ajtmp++; 226 } 227 /* finished row so stick it into b->a */ 228 pv = ba + bi[i]; 229 pj = bj + bi[i]; 230 nz = bi[i+1] - bi[i]; 231 for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];} 232 diag = diag_offset[i] - bi[i]; 233 /* check pivot entry for current row */ 234 if (pv[diag] == 0.0) { 235 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 236 } 237 pv[diag] = 1.0/pv[diag]; 238 } 239 240 ierr = PetscFree(rtmp);CHKERRQ(ierr); 241 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 242 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 243 C->factor = FACTOR_LU; 244 C->assembled = PETSC_TRUE; 245 PetscLogFlops(C->n); 246 PetscFunctionReturn(0); 247 } 248 249 250 /* ----------------------------------------------------------- */ 251 #undef __FUNCT__ 252 #define __FUNCT__ "MatLUFactor_SeqBAIJ" 253 int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 254 { 255 int ierr; 256 Mat C; 257 258 PetscFunctionBegin; 259 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 260 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 261 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 262 PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol); 263 PetscFunctionReturn(0); 264 } 265 266 267