1 /* 2 Factorization code for BAIJ format. 3 */ 4 #include "src/mat/impls/baij/seq/baij.h" 5 #include "src/inline/ilu.h" 6 7 /* ------------------------------------------------------------*/ 8 /* 9 Version for when blocks are 2 by 2 10 */ 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2" 13 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B) 14 { 15 Mat C = *B; 16 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 17 IS isrow = b->row,isicol = b->icol; 18 PetscErrorCode ierr; 19 int *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 20 int *ajtmpold,*ajtmp,nz,row; 21 int *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj; 22 MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4; 23 MatScalar p1,p2,p3,p4; 24 MatScalar *ba = b->a,*aa = a->a; 25 26 PetscFunctionBegin; 27 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 28 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 29 ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 30 31 for (i=0; i<n; i++) { 32 nz = bi[i+1] - bi[i]; 33 ajtmp = bj + bi[i]; 34 for (j=0; j<nz; j++) { 35 x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0; 36 } 37 /* load in initial (unfactored row) */ 38 idx = r[i]; 39 nz = ai[idx+1] - ai[idx]; 40 ajtmpold = aj + ai[idx]; 41 v = aa + 4*ai[idx]; 42 for (j=0; j<nz; j++) { 43 x = rtmp+4*ic[ajtmpold[j]]; 44 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 45 v += 4; 46 } 47 row = *ajtmp++; 48 while (row < i) { 49 pc = rtmp + 4*row; 50 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 51 if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 52 pv = ba + 4*diag_offset[row]; 53 pj = bj + diag_offset[row] + 1; 54 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 55 pc[0] = m1 = p1*x1 + p3*x2; 56 pc[1] = m2 = p2*x1 + p4*x2; 57 pc[2] = m3 = p1*x3 + p3*x4; 58 pc[3] = m4 = p2*x3 + p4*x4; 59 nz = bi[row+1] - diag_offset[row] - 1; 60 pv += 4; 61 for (j=0; j<nz; j++) { 62 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 63 x = rtmp + 4*pj[j]; 64 x[0] -= m1*x1 + m3*x2; 65 x[1] -= m2*x1 + m4*x2; 66 x[2] -= m1*x3 + m3*x4; 67 x[3] -= m2*x3 + m4*x4; 68 pv += 4; 69 } 70 PetscLogFlops(16*nz+12); 71 } 72 row = *ajtmp++; 73 } 74 /* finished row so stick it into b->a */ 75 pv = ba + 4*bi[i]; 76 pj = bj + bi[i]; 77 nz = bi[i+1] - bi[i]; 78 for (j=0; j<nz; j++) { 79 x = rtmp+4*pj[j]; 80 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 81 pv += 4; 82 } 83 /* invert diagonal block */ 84 w = ba + 4*diag_offset[i]; 85 ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr); 86 } 87 88 ierr = PetscFree(rtmp);CHKERRQ(ierr); 89 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 90 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 91 C->factor = FACTOR_LU; 92 C->assembled = PETSC_TRUE; 93 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 94 PetscFunctionReturn(0); 95 } 96 /* 97 Version for when blocks are 2 by 2 Using natural ordering 98 */ 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering" 101 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B) 102 { 103 Mat C = *B; 104 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 105 PetscErrorCode ierr; 106 int i,j,n = a->mbs,*bi = b->i,*bj = b->j; 107 int *ajtmpold,*ajtmp,nz,row; 108 int *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; 109 MatScalar *pv,*v,*rtmp,*pc,*w,*x; 110 MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4; 111 MatScalar *ba = b->a,*aa = a->a; 112 113 PetscFunctionBegin; 114 ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 115 116 for (i=0; i<n; i++) { 117 nz = bi[i+1] - bi[i]; 118 ajtmp = bj + bi[i]; 119 for (j=0; j<nz; j++) { 120 x = rtmp+4*ajtmp[j]; 121 x[0] = x[1] = x[2] = x[3] = 0.0; 122 } 123 /* load in initial (unfactored row) */ 124 nz = ai[i+1] - ai[i]; 125 ajtmpold = aj + ai[i]; 126 v = aa + 4*ai[i]; 127 for (j=0; j<nz; j++) { 128 x = rtmp+4*ajtmpold[j]; 129 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 130 v += 4; 131 } 132 row = *ajtmp++; 133 while (row < i) { 134 pc = rtmp + 4*row; 135 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 136 if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 137 pv = ba + 4*diag_offset[row]; 138 pj = bj + diag_offset[row] + 1; 139 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 140 pc[0] = m1 = p1*x1 + p3*x2; 141 pc[1] = m2 = p2*x1 + p4*x2; 142 pc[2] = m3 = p1*x3 + p3*x4; 143 pc[3] = m4 = p2*x3 + p4*x4; 144 nz = bi[row+1] - diag_offset[row] - 1; 145 pv += 4; 146 for (j=0; j<nz; j++) { 147 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 148 x = rtmp + 4*pj[j]; 149 x[0] -= m1*x1 + m3*x2; 150 x[1] -= m2*x1 + m4*x2; 151 x[2] -= m1*x3 + m3*x4; 152 x[3] -= m2*x3 + m4*x4; 153 pv += 4; 154 } 155 PetscLogFlops(16*nz+12); 156 } 157 row = *ajtmp++; 158 } 159 /* finished row so stick it into b->a */ 160 pv = ba + 4*bi[i]; 161 pj = bj + bi[i]; 162 nz = bi[i+1] - bi[i]; 163 for (j=0; j<nz; j++) { 164 x = rtmp+4*pj[j]; 165 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 166 pv += 4; 167 } 168 /* invert diagonal block */ 169 w = ba + 4*diag_offset[i]; 170 ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr); 171 /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/ 172 } 173 174 ierr = PetscFree(rtmp);CHKERRQ(ierr); 175 C->factor = FACTOR_LU; 176 C->assembled = PETSC_TRUE; 177 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 178 PetscFunctionReturn(0); 179 } 180 181 /* ----------------------------------------------------------- */ 182 /* 183 Version for when blocks are 1 by 1. 184 */ 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1" 187 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B) 188 { 189 Mat C = *B; 190 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 191 IS isrow = b->row,isicol = b->icol; 192 PetscErrorCode ierr; 193 int *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 194 int *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j; 195 int *diag_offset = b->diag,diag,*pj; 196 MatScalar *pv,*v,*rtmp,multiplier,*pc; 197 MatScalar *ba = b->a,*aa = a->a; 198 199 PetscFunctionBegin; 200 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 201 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 202 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 203 204 for (i=0; i<n; i++) { 205 nz = bi[i+1] - bi[i]; 206 ajtmp = bj + bi[i]; 207 for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0; 208 209 /* load in initial (unfactored row) */ 210 nz = ai[r[i]+1] - ai[r[i]]; 211 ajtmpold = aj + ai[r[i]]; 212 v = aa + ai[r[i]]; 213 for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j]; 214 215 row = *ajtmp++; 216 while (row < i) { 217 pc = rtmp + row; 218 if (*pc != 0.0) { 219 pv = ba + diag_offset[row]; 220 pj = bj + diag_offset[row] + 1; 221 multiplier = *pc * *pv++; 222 *pc = multiplier; 223 nz = bi[row+1] - diag_offset[row] - 1; 224 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 225 PetscLogFlops(1+2*nz); 226 } 227 row = *ajtmp++; 228 } 229 /* finished row so stick it into b->a */ 230 pv = ba + bi[i]; 231 pj = bj + bi[i]; 232 nz = bi[i+1] - bi[i]; 233 for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];} 234 diag = diag_offset[i] - bi[i]; 235 /* check pivot entry for current row */ 236 if (pv[diag] == 0.0) { 237 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 238 } 239 pv[diag] = 1.0/pv[diag]; 240 } 241 242 ierr = PetscFree(rtmp);CHKERRQ(ierr); 243 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 245 C->factor = FACTOR_LU; 246 C->assembled = PETSC_TRUE; 247 PetscLogFlops(C->n); 248 PetscFunctionReturn(0); 249 } 250 251 252 /* ----------------------------------------------------------- */ 253 #undef __FUNCT__ 254 #define __FUNCT__ "MatLUFactor_SeqBAIJ" 255 PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 256 { 257 PetscErrorCode ierr; 258 Mat C; 259 260 PetscFunctionBegin; 261 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 262 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 263 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 264 PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol); 265 PetscFunctionReturn(0); 266 } 267 268 269