1 /*$Id: sbaij2.c,v 1.7 2000/09/06 19:53:50 hzhang Exp hzhang $*/ 2 3 #include "petscsys.h" 4 #include "src/mat/impls/baij/seq/baij.h" 5 #include "src/vec/vecimpl.h" 6 #include "src/inline/spops.h" 7 #include "src/inline/ilu.h" 8 #include "petscbt.h" 9 #include "sbaij.h" 10 11 #undef __FUNC__ 12 #define __FUNC__ "MatIncreaseOverlap_SeqSBAIJ" 13 int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS *is,int ov) 14 { 15 PetscFunctionBegin; 16 SETERRQ(1,1,"Function not yet written for SBAIJ format"); 17 PetscFunctionReturn(0); 18 } 19 20 #undef __FUNC__ 21 #define __FUNC__ "MatGetSubMatrix_SeqSBAIJ_Private" 22 int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 23 { 24 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c; 25 int *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens; 26 int row,mat_i,*mat_j,tcol,*mat_ilen; 27 int *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2; 28 int *aj = a->j,*ai = a->i; 29 MatScalar *mat_a; 30 Mat C; 31 PetscTruth flag; 32 33 PetscFunctionBegin; 34 35 if (isrow != iscol) SETERRA(1,0,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 36 ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 37 if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"IS is not sorted"); 38 39 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 40 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 41 42 smap = (int*)PetscMalloc((1+oldcols)*sizeof(int));CHKPTRQ(smap); 43 ssmap = smap; 44 lens = (int*)PetscMalloc((1+nrows)*sizeof(int));CHKPTRQ(lens); 45 ierr = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr); 46 for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */ 47 /* determine lens of each row */ 48 for (i=0; i<nrows; i++) { 49 kstart = ai[irow[i]]; 50 kend = kstart + a->ilen[irow[i]]; 51 lens[i] = 0; 52 for (k=kstart; k<kend; k++) { 53 if (ssmap[aj[k]]) { 54 lens[i]++; 55 } 56 } 57 } 58 /* Create and fill new matrix */ 59 if (scall == MAT_REUSE_MATRIX) { 60 c = (Mat_SeqSBAIJ *)((*B)->data); 61 62 if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Submatrix wrong size"); 63 ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr); 64 if (flag == PETSC_FALSE) { 65 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Cannot reuse matrix. wrong no of nonzeros"); 66 } 67 ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr); 68 C = *B; 69 } else { 70 ierr = MatCreateSeqSBAIJ(A->comm,bs,nrows*bs,nrows*bs,0,lens,&C);CHKERRQ(ierr); 71 } 72 c = (Mat_SeqSBAIJ *)(C->data); 73 for (i=0; i<nrows; i++) { 74 row = irow[i]; 75 kstart = ai[row]; 76 kend = kstart + a->ilen[row]; 77 mat_i = c->i[i]; 78 mat_j = c->j + mat_i; 79 mat_a = c->a + mat_i*bs2; 80 mat_ilen = c->ilen + i; 81 for (k=kstart; k<kend; k++) { 82 if ((tcol=ssmap[a->j[k]])) { 83 *mat_j++ = tcol - 1; 84 ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 85 mat_a += bs2; 86 (*mat_ilen)++; 87 } 88 } 89 } 90 91 /* Free work space */ 92 ierr = PetscFree(smap);CHKERRQ(ierr); 93 ierr = PetscFree(lens);CHKERRQ(ierr); 94 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 95 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 96 97 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 98 *B = C; 99 PetscFunctionReturn(0); 100 } 101 102 #undef __FUNC__ 103 #define __FUNC__ "MatGetSubMatrix_SeqSBAIJ" 104 int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 105 { 106 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 107 IS is1; 108 int *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count; 109 110 PetscFunctionBegin; 111 if (isrow != iscol) SETERRA(1,0,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 112 113 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 114 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 115 116 /* Verify if the indices corespond to each element in a block 117 and form the IS with compressed IS */ 118 vary = (int*)PetscMalloc(2*(a->mbs+1)*sizeof(int));CHKPTRQ(vary); 119 iary = vary + a->mbs; 120 ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr); 121 for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 122 123 count = 0; 124 for (i=0; i<a->mbs; i++) { 125 if (vary[i]!=0 && vary[i]!=bs) SETERRA(1,0,"Index set does not match blocks"); 126 if (vary[i]==bs) iary[count++] = i; 127 } 128 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 129 130 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 131 ierr = PetscFree(vary);CHKERRQ(ierr); 132 133 ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr); 134 ISDestroy(is1); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNC__ 139 #define __FUNC__ "MatGetSubMatrices_SeqSBAIJ" 140 int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,IS *irow,IS *icol,MatReuse scall,Mat **B) 141 { 142 int ierr,i; 143 144 PetscFunctionBegin; 145 if (scall == MAT_INITIAL_MATRIX) { 146 *B = (Mat*)PetscMalloc((n+1)*sizeof(Mat));CHKPTRQ(*B); 147 } 148 149 for (i=0; i<n; i++) { 150 ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 /* -------------------------------------------------------*/ 156 /* Should check that shapes of vectors and matrices match */ 157 /* -------------------------------------------------------*/ 158 #include "petscblaslapack.h" 159 160 #undef __FUNC__ 161 #define __FUNC__ "MatMult_SeqSBAIJ_1" 162 int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz) 163 { 164 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 165 Scalar *x,*z,*xb,x1,zero=0.0; 166 MatScalar *v; 167 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 168 169 PetscFunctionBegin; 170 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 171 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 172 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 173 174 v = a->a; 175 xb = x; 176 177 for (i=0; i<mbs; i++) { 178 n = ai[1] - ai[0]; /* length of i_th row of A */ 179 x1 = xb[0]; 180 ib = aj + *ai; 181 z[i] += *v++ * x[*ib++]; /* (diag of A)*x */ 182 for (j=1; j<n; j++) { 183 cval = *ib; 184 z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 185 z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 186 } 187 xb++; ai++; 188 } 189 190 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 191 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 192 PLogFlops(2*(a->s_nz*2 - a->m) - a->m); /* s_nz = (nz+m)/2 */ 193 PetscFunctionReturn(0); 194 } 195 196 #undef __FUNC__ 197 #define __FUNC__ "MatMult_SeqSBAIJ_2" 198 int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz) 199 { 200 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 201 Scalar *x,*z,*xb,x1,x2,zero=0.0; 202 MatScalar *v; 203 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 204 205 206 PetscFunctionBegin; 207 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 208 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 209 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 210 211 v = a->a; 212 xb = x; 213 214 for (i=0; i<mbs; i++) { 215 n = ai[1] - ai[0]; /* length of i_th block row of A */ 216 x1 = xb[0]; x2 = xb[1]; 217 ib = aj + *ai; 218 /* (diag of A)*x */ 219 z[2*i] += v[0]*x1 + v[2]*x2; 220 z[2*i+1] += v[2]*x1 + v[3]*x2; 221 v += 4; 222 for (j=1; j<n; j++) { 223 /* (strict lower triangular part of A)*x */ 224 cval = ib[j]*2; 225 z[cval] += v[0]*x1 + v[1]*x2; 226 z[cval+1] += v[2]*x1 + v[3]*x2; 227 /* (strict upper triangular part of A)*x */ 228 z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 229 z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 230 v += 4; 231 } 232 xb +=2; ai++; 233 } 234 235 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 236 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 237 PLogFlops(8*(a->s_nz*2 - a->m) - a->m); 238 PetscFunctionReturn(0); 239 } 240 241 #undef __FUNC__ 242 #define __FUNC__ "MatMult_SeqSBAIJ_3" 243 int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz) 244 { 245 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 246 Scalar *x,*z,*xb,x1,x2,x3,zero=0.0; 247 MatScalar *v; 248 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 249 250 251 PetscFunctionBegin; 252 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 253 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 254 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 255 256 v = a->a; 257 xb = x; 258 259 for (i=0; i<mbs; i++) { 260 n = ai[1] - ai[0]; /* length of i_th block row of A */ 261 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 262 ib = aj + *ai; 263 /* (diag of A)*x */ 264 z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 265 z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 266 z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 267 v += 9; 268 for (j=1; j<n; j++) { 269 /* (strict lower triangular part of A)*x */ 270 cval = ib[j]*3; 271 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 272 z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 273 z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 274 /* (strict upper triangular part of A)*x */ 275 z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 276 z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 277 z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 278 v += 9; 279 } 280 xb +=3; ai++; 281 } 282 283 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 284 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 285 PLogFlops(18*(a->s_nz*2 - a->m) - a->m); 286 PetscFunctionReturn(0); 287 } 288 289 #undef __FUNC__ 290 #define __FUNC__ "MatMult_SeqSBAIJ_4" 291 int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz) 292 { 293 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 294 Scalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0; 295 MatScalar *v; 296 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 297 298 PetscFunctionBegin; 299 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 300 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 301 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 302 303 v = a->a; 304 xb = x; 305 306 for (i=0; i<mbs; i++) { 307 n = ai[1] - ai[0]; /* length of i_th block row of A */ 308 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 309 ib = aj + *ai; 310 /* (diag of A)*x */ 311 z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 312 z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 313 z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 314 z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 315 v += 16; 316 for (j=1; j<n; j++) { 317 /* (strict lower triangular part of A)*x */ 318 cval = ib[j]*4; 319 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 320 z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 321 z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 322 z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 323 /* (strict upper triangular part of A)*x */ 324 z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 325 z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 326 z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 327 z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 328 v += 16; 329 } 330 xb +=4; ai++; 331 } 332 333 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 334 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 335 PLogFlops(32*(a->s_nz*2 - a->m) - a->m); 336 PetscFunctionReturn(0); 337 } 338 339 #undef __FUNC__ 340 #define __FUNC__ "MatMult_SeqSBAIJ_5" 341 int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz) 342 { 343 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 344 Scalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0; 345 MatScalar *v; 346 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 347 348 PetscFunctionBegin; 349 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 350 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 351 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 352 353 v = a->a; 354 xb = x; 355 356 for (i=0; i<mbs; i++) { 357 n = ai[1] - ai[0]; /* length of i_th block row of A */ 358 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 359 ib = aj + *ai; 360 /* (diag of A)*x */ 361 z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 362 z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 363 z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 364 z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 365 z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 366 v += 25; 367 for (j=1; j<n; j++) { 368 /* (strict lower triangular part of A)*x */ 369 cval = ib[j]*5; 370 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 371 z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 372 z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 373 z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 374 z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 375 /* (strict upper triangular part of A)*x */ 376 z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 377 z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 378 z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 379 z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 380 z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 381 v += 25; 382 } 383 xb +=5; ai++; 384 } 385 386 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 387 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 388 PLogFlops(50*(a->s_nz*2 - a->m) - a->m); 389 PetscFunctionReturn(0); 390 } 391 392 393 #undef __FUNC__ 394 #define __FUNC__ "MatMult_SeqSBAIJ_6" 395 int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz) 396 { 397 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 398 Scalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0; 399 MatScalar *v; 400 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 401 402 PetscFunctionBegin; 403 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 404 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 405 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 406 407 v = a->a; 408 xb = x; 409 410 for (i=0; i<mbs; i++) { 411 n = ai[1] - ai[0]; /* length of i_th block row of A */ 412 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 413 ib = aj + *ai; 414 /* (diag of A)*x */ 415 z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 416 z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 417 z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 418 z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 419 z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 420 z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 421 v += 36; 422 for (j=1; j<n; j++) { 423 /* (strict lower triangular part of A)*x */ 424 cval = ib[j]*6; 425 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 426 z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 427 z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 428 z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 429 z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 430 z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 431 /* (strict upper triangular part of A)*x */ 432 z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 433 z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 434 z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 435 z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 436 z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 437 z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 438 v += 36; 439 } 440 xb +=6; ai++; 441 } 442 443 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 444 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 445 PLogFlops(72*(a->s_nz*2 - a->m) - a->m); 446 PetscFunctionReturn(0); 447 } 448 #undef __FUNC__ 449 #define __FUNC__ "MatMult_SeqSBAIJ_7" 450 int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz) 451 { 452 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 453 Scalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0; 454 MatScalar *v; 455 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 456 457 PetscFunctionBegin; 458 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 459 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 460 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 461 462 v = a->a; 463 xb = x; 464 465 for (i=0; i<mbs; i++) { 466 n = ai[1] - ai[0]; /* length of i_th block row of A */ 467 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 468 ib = aj + *ai; 469 /* (diag of A)*x */ 470 z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 471 z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 472 z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 473 z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 474 z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 475 z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 476 z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 477 v += 49; 478 for (j=1; j<n; j++) { 479 /* (strict lower triangular part of A)*x */ 480 cval = ib[j]*7; 481 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 482 z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 483 z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 484 z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 485 z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 486 z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 487 z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 488 /* (strict upper triangular part of A)*x */ 489 z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 490 z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 491 z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 492 z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 493 z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 494 z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 495 z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 496 v += 49; 497 } 498 xb +=7; ai++; 499 } 500 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 501 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 502 PLogFlops(98*(a->s_nz*2 - a->m) - a->m); 503 PetscFunctionReturn(0); 504 } 505 506 /* 507 This will not work with MatScalar == float because it calls the BLAS 508 */ 509 #undef __FUNC__ 510 #define __FUNC__ "MatMult_SeqSBAIJ_N" 511 int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz) 512 { 513 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 514 Scalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0; 515 MatScalar *v; 516 int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 517 int ncols,k; 518 519 PetscFunctionBegin; 520 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 521 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 522 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 523 524 aj = a->j; 525 v = a->a; 526 ii = a->i; 527 528 if (!a->mult_work) { 529 k = a->m; 530 a->mult_work = (Scalar*)PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 531 } 532 work = a->mult_work; 533 534 for (i=0; i<mbs; i++) { 535 n = ii[1] - ii[0]; ncols = n*bs; 536 workt = work; idx=aj+ii[0]; 537 538 /* upper triangular part */ 539 for (j=0; j<n; j++) { 540 /* col = bs*(*idx); */ 541 xb = x_ptr + bs*(*idx++); 542 for (k=0; k<bs; k++) workt[k] = xb[k]; 543 workt += bs; 544 } 545 /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 546 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 547 548 /* strict lower triangular part */ 549 ncols -= bs; 550 if (ncols > 0){ 551 workt = work; 552 ierr = PetscMemzero(workt,ncols*sizeof(Scalar));CHKERRQ(ierr); 553 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v+bs2,workt); 554 555 idx=aj+ii[0]+1; 556 for (j=1; j<n; j++) { 557 zb = z_ptr + bs*(*idx); 558 idx++; 559 for (k=0; k<bs; k++) zb[k] += workt[k] ; 560 workt += bs; 561 } 562 } 563 564 x += bs; v += n*bs2; z += bs; ii++; 565 } 566 567 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 568 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 569 PLogFlops(2*(a->s_nz*2 - a->m)*bs2 - a->m); 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNC__ 574 #define __FUNC__ "MatMultAdd_SeqSBAIJ_1" 575 int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 576 { 577 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 578 Scalar *x,*y,*z,*xb,x1; 579 MatScalar *v; 580 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 581 582 PetscFunctionBegin; 583 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 584 if (yy != xx) { 585 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 586 } else { 587 y = x; 588 } 589 if (zz != yy) { 590 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 591 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 592 } else { 593 z = y; 594 } 595 596 v = a->a; 597 xb = x; 598 599 for (i=0; i<mbs; i++) { 600 n = ai[1] - ai[0]; /* length of i_th row of A */ 601 x1 = xb[0]; 602 ib = aj + *ai; 603 z[i] += *v++ * x[*ib++]; /* (diag of A)*x */ 604 for (j=1; j<n; j++) { 605 cval = *ib; 606 z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 607 z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 608 } 609 xb++; ai++; 610 } 611 612 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 613 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 614 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 615 616 PLogFlops(2*(a->s_nz*2 - a->m)); 617 PetscFunctionReturn(0); 618 } 619 620 #undef __FUNC__ 621 #define __FUNC__ "MatMultAdd_SeqSBAIJ_2" 622 int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 623 { 624 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 625 Scalar *x,*y,*z,*xb,x1,x2; 626 MatScalar *v; 627 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 628 629 PetscFunctionBegin; 630 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 631 if (yy != xx) { 632 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 633 } else { 634 y = x; 635 } 636 if (zz != yy) { 637 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 638 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 639 } else { 640 z = y; 641 } 642 643 v = a->a; 644 xb = x; 645 646 for (i=0; i<mbs; i++) { 647 n = ai[1] - ai[0]; /* length of i_th block row of A */ 648 x1 = xb[0]; x2 = xb[1]; 649 ib = aj + *ai; 650 /* (diag of A)*x */ 651 z[2*i] += v[0]*x1 + v[2]*x2; 652 z[2*i+1] += v[2]*x1 + v[3]*x2; 653 v += 4; 654 for (j=1; j<n; j++) { 655 /* (strict lower triangular part of A)*x */ 656 cval = ib[j]*2; 657 z[cval] += v[0]*x1 + v[1]*x2; 658 z[cval+1] += v[2]*x1 + v[3]*x2; 659 /* (strict upper triangular part of A)*x */ 660 z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 661 z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 662 v += 4; 663 } 664 xb +=2; ai++; 665 } 666 667 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 668 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 669 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 670 671 PLogFlops(4*(a->s_nz*2 - a->m)); 672 PetscFunctionReturn(0); 673 } 674 675 #undef __FUNC__ 676 #define __FUNC__ "MatMultAdd_SeqSBAIJ_3" 677 int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 678 { 679 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 680 Scalar *x,*y,*z,*xb,x1,x2,x3; 681 MatScalar *v; 682 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 683 684 PetscFunctionBegin; 685 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 686 if (yy != xx) { 687 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 688 } else { 689 y = x; 690 } 691 if (zz != yy) { 692 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 693 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 694 } else { 695 z = y; 696 } 697 698 v = a->a; 699 xb = x; 700 701 for (i=0; i<mbs; i++) { 702 n = ai[1] - ai[0]; /* length of i_th block row of A */ 703 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 704 ib = aj + *ai; 705 /* (diag of A)*x */ 706 z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 707 z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 708 z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 709 v += 9; 710 for (j=1; j<n; j++) { 711 /* (strict lower triangular part of A)*x */ 712 cval = ib[j]*3; 713 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 714 z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 715 z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 716 /* (strict upper triangular part of A)*x */ 717 z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 718 z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 719 z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 720 v += 9; 721 } 722 xb +=3; ai++; 723 } 724 725 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 726 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 727 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 728 729 PLogFlops(18*(a->s_nz*2 - a->m)); 730 PetscFunctionReturn(0); 731 } 732 733 #undef __FUNC__ 734 #define __FUNC__ "MatMultAdd_SeqSBAIJ_4" 735 int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 736 { 737 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 738 Scalar *x,*y,*z,*xb,x1,x2,x3,x4; 739 MatScalar *v; 740 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 741 742 PetscFunctionBegin; 743 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 744 if (yy != xx) { 745 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 746 } else { 747 y = x; 748 } 749 if (zz != yy) { 750 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 751 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 752 } else { 753 z = y; 754 } 755 756 v = a->a; 757 xb = x; 758 759 for (i=0; i<mbs; i++) { 760 n = ai[1] - ai[0]; /* length of i_th block row of A */ 761 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 762 ib = aj + *ai; 763 /* (diag of A)*x */ 764 z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 765 z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 766 z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 767 z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 768 v += 16; 769 for (j=1; j<n; j++) { 770 /* (strict lower triangular part of A)*x */ 771 cval = ib[j]*4; 772 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 773 z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 774 z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 775 z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 776 /* (strict upper triangular part of A)*x */ 777 z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 778 z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 779 z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 780 z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 781 v += 16; 782 } 783 xb +=4; ai++; 784 } 785 786 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 787 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 788 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 789 790 PLogFlops(32*(a->s_nz*2 - a->m)); 791 PetscFunctionReturn(0); 792 } 793 794 #undef __FUNC__ 795 #define __FUNC__ "MatMultAdd_SeqSBAIJ_5" 796 int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 797 { 798 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 799 Scalar *x,*y,*z,*xb,x1,x2,x3,x4,x5; 800 MatScalar *v; 801 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 802 803 PetscFunctionBegin; 804 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 805 if (yy != xx) { 806 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 807 } else { 808 y = x; 809 } 810 if (zz != yy) { 811 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 812 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 813 } else { 814 z = y; 815 } 816 817 v = a->a; 818 xb = x; 819 820 for (i=0; i<mbs; i++) { 821 n = ai[1] - ai[0]; /* length of i_th block row of A */ 822 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 823 ib = aj + *ai; 824 /* (diag of A)*x */ 825 z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 826 z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 827 z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 828 z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 829 z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 830 v += 25; 831 for (j=1; j<n; j++) { 832 /* (strict lower triangular part of A)*x */ 833 cval = ib[j]*5; 834 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 835 z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 836 z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 837 z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 838 z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 839 /* (strict upper triangular part of A)*x */ 840 z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 841 z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 842 z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 843 z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 844 z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 845 v += 25; 846 } 847 xb +=5; ai++; 848 } 849 850 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 851 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 852 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 853 854 PLogFlops(50*(a->s_nz*2 - a->m)); 855 PetscFunctionReturn(0); 856 } 857 #undef __FUNC__ 858 #define __FUNC__ "MatMultAdd_SeqSBAIJ_6" 859 int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 860 { 861 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 862 Scalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6; 863 MatScalar *v; 864 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 865 866 PetscFunctionBegin; 867 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 868 if (yy != xx) { 869 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 870 } else { 871 y = x; 872 } 873 if (zz != yy) { 874 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 875 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 876 } else { 877 z = y; 878 } 879 880 v = a->a; 881 xb = x; 882 883 for (i=0; i<mbs; i++) { 884 n = ai[1] - ai[0]; /* length of i_th block row of A */ 885 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 886 ib = aj + *ai; 887 /* (diag of A)*x */ 888 z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 889 z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 890 z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 891 z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 892 z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 893 z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 894 v += 36; 895 for (j=1; j<n; j++) { 896 /* (strict lower triangular part of A)*x */ 897 cval = ib[j]*6; 898 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 899 z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 900 z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 901 z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 902 z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 903 z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 904 /* (strict upper triangular part of A)*x */ 905 z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 906 z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 907 z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 908 z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 909 z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 910 z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 911 v += 36; 912 } 913 xb +=6; ai++; 914 } 915 916 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 917 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 918 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 919 920 PLogFlops(72*(a->s_nz*2 - a->m)); 921 PetscFunctionReturn(0); 922 } 923 924 #undef __FUNC__ 925 #define __FUNC__ "MatMultAdd_SeqSBAIJ_7" 926 int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 927 { 928 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 929 Scalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 930 MatScalar *v; 931 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j; 932 933 PetscFunctionBegin; 934 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 935 if (yy != xx) { 936 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 937 } else { 938 y = x; 939 } 940 if (zz != yy) { 941 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 942 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 943 } else { 944 z = y; 945 } 946 947 v = a->a; 948 xb = x; 949 950 for (i=0; i<mbs; i++) { 951 n = ai[1] - ai[0]; /* length of i_th block row of A */ 952 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 953 ib = aj + *ai; 954 /* (diag of A)*x */ 955 z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 956 z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 957 z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 958 z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 959 z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 960 z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 961 z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 962 v += 49; 963 for (j=1; j<n; j++) { 964 /* (strict lower triangular part of A)*x */ 965 cval = ib[j]*7; 966 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 967 z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 968 z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 969 z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 970 z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 971 z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 972 z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 973 /* (strict upper triangular part of A)*x */ 974 z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 975 z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 976 z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 977 z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 978 z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 979 z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 980 z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 981 v += 49; 982 } 983 xb +=7; ai++; 984 } 985 986 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 987 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 988 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 989 990 PLogFlops(98*(a->s_nz*2 - a->m)); 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNC__ 995 #define __FUNC__ "MatMultAdd_SeqSBAIJ_N" 996 int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 997 { 998 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 999 Scalar *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt; 1000 MatScalar *v; 1001 int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 1002 int ncols,k; 1003 1004 PetscFunctionBegin; 1005 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 1006 if (yy != xx) { 1007 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1008 } else { 1009 y = x; 1010 } 1011 if (zz != yy) { 1012 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 1013 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 1014 } else { 1015 z = y; 1016 } 1017 1018 aj = a->j; 1019 v = a->a; 1020 ii = a->i; 1021 1022 if (!a->mult_work) { 1023 k = a->m; 1024 a->mult_work = (Scalar*)PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1025 } 1026 work = a->mult_work; 1027 1028 1029 for (i=0; i<mbs; i++) { 1030 n = ii[1] - ii[0]; ncols = n*bs; 1031 workt = work; idx=aj+ii[0]; 1032 1033 /* upper triangular part */ 1034 for (j=0; j<n; j++) { 1035 /* col = bs*(*idx); */ 1036 xb = x_ptr + bs*(*idx++); 1037 for (k=0; k<bs; k++) workt[k] = xb[k]; 1038 workt += bs; 1039 } 1040 /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 1041 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 1042 1043 /* strict lower triangular part */ 1044 ncols -= bs; 1045 if (ncols > 0){ 1046 workt = work; 1047 ierr = PetscMemzero(workt,ncols*sizeof(Scalar));CHKERRQ(ierr); 1048 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v+bs2,workt); 1049 1050 idx=aj+ii[0]+1; 1051 for (j=1; j<n; j++) { 1052 zb = z_ptr + bs*(*idx); 1053 idx++; 1054 for (k=0; k<bs; k++) zb[k] += workt[k] ; 1055 workt += bs; 1056 } 1057 } 1058 1059 x += bs; v += n*bs2; z += bs; ii++; 1060 } 1061 1062 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1063 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1064 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1065 1066 PLogFlops(2*(a->s_nz*2 - a->m)); 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNC__ 1071 #define __FUNC__ "MatMultTranspose_SeqSBAIJ" 1072 int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz) 1073 { 1074 PetscFunctionBegin; 1075 SETERRQ(1,1,"Matrix is symmetric. Call MatMult()."); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNC__ 1080 #define __FUNC__ "MatMultTransposeAdd_SeqSBAIJ" 1081 int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1082 1083 { 1084 PetscFunctionBegin; 1085 SETERRQ(1,1,"Matrix is symmetric. Call MatMultAdd()."); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNC__ 1090 #define __FUNC__ "MatScale_SeqSBAIJ" 1091 int MatScale_SeqSBAIJ(Scalar *alpha,Mat inA) 1092 { 1093 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1094 int one = 1,totalnz = a->bs2*a->s_nz; 1095 1096 PetscFunctionBegin; 1097 BLscal_(&totalnz,alpha,a->a,&one); 1098 PLogFlops(totalnz); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNC__ 1103 #define __FUNC__ "MatNorm_SeqSBAIJ" 1104 int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm) 1105 { 1106 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1107 MatScalar *v = a->a; 1108 PetscReal sum_diag = 0.0, sum_off = 0.0, *sum; 1109 int i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs; 1110 int *jl,*il,jmin,jmax,ierr,nexti,ik; 1111 1112 PetscFunctionBegin; 1113 if (type == NORM_FROBENIUS) { 1114 for (k=0; k<mbs; k++){ 1115 jmin = a->i[k]; jmax = a->i[k+1]; 1116 /* diagonal block */ 1117 for (i=0; i<bs2; i++){ 1118 #if defined(PETSC_USE_COMPLEX) 1119 sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++; 1120 #else 1121 sum_diag += (*v)*(*v); v++; 1122 #endif 1123 } 1124 for (j=jmin+1; j<jmax; j++){ /* off-diagonal blocks */ 1125 for (i=0; i<bs2; i++){ 1126 #if defined(PETSC_USE_COMPLEX) 1127 sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++; 1128 #else 1129 sum_off += (*v)*(*v); v++; 1130 #endif 1131 } 1132 } 1133 } 1134 *norm = sqrt(sum_diag + 2*sum_off); 1135 1136 } else if (type == NORM_INFINITY) { /* maximum row sum */ 1137 il = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(il); 1138 jl = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(jl); 1139 sum = (PetscReal*)PetscMalloc(bs*sizeof(PetscReal));CHKPTRQ(sum); 1140 for (i=0; i<mbs; i++) { 1141 jl[i] = mbs; il[0] = 0; 1142 } 1143 1144 *norm = 0.0; 1145 for (k=0; k<mbs; k++) { /* k_th block row */ 1146 for (j=0; j<bs; j++) sum[j]=0.0; 1147 1148 /*-- col sum --*/ 1149 i = jl[k]; /* first |A(i,k)| to be added */ 1150 while (i<mbs){ 1151 nexti = jl[i]; /* next block row to be added */ 1152 ik = il[i]; /* block index of A(i,k) in the array a */ 1153 for (j=0; j<bs; j++){ 1154 v = a->a + ik*bs2 + j*bs; 1155 for (k1=0; k1<bs; k1++) { 1156 sum[j] += PetscAbsScalar(*v); v++; 1157 } 1158 } 1159 /* update il, jl */ 1160 jmin = ik + 1; jmax = a->i[i+1]; 1161 if (jmin < jmax){ 1162 il[i] = jmin; 1163 j = a->j[jmin]; 1164 jl[i] = jl[j]; jl[j]=i; 1165 } 1166 i = nexti; 1167 } 1168 1169 /*-- row sum --*/ 1170 jmin = a->i[k]; jmax = a->i[k+1]; 1171 for (i=jmin; i<jmax; i++) { 1172 for (j=0; j<bs; j++){ 1173 v = a->a + i*bs2 + j; 1174 for (k1=0; k1<bs; k1++){ 1175 sum[j] += PetscAbsScalar(*v); 1176 v += bs; 1177 } 1178 } 1179 } 1180 /* add k_th block row to il, jl */ 1181 jmin++; 1182 if (jmin < jmax){ 1183 il[k] = jmin; 1184 j = a->j[jmin]; 1185 jl[k] = jl[j]; jl[j] = k; 1186 } 1187 for (j=0; j<bs; j++){ 1188 if (sum[j] > *norm) *norm = sum[j]; 1189 } 1190 } 1191 ierr = PetscFree(il);CHKERRQ(ierr); 1192 ierr = PetscFree(jl);CHKERRQ(ierr); 1193 ierr = PetscFree(sum);CHKERRQ(ierr); 1194 } else { 1195 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1196 } 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #undef __FUNC__ 1201 #define __FUNC__ "MatEqual_SeqSBAIJ" 1202 int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg) 1203 { 1204 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data; 1205 int ierr; 1206 1207 PetscFunctionBegin; 1208 if (B->type !=MATSEQSBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1209 1210 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1211 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| (a->s_nz != b->s_nz)) { 1212 *flg = PETSC_FALSE; PetscFunctionReturn(0); 1213 } 1214 1215 /* if the a->i are the same */ 1216 ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr); 1217 if (*flg == PETSC_FALSE) { 1218 PetscFunctionReturn(0); 1219 } 1220 1221 /* if a->j are the same */ 1222 ierr = PetscMemcmp(a->j,b->j,(a->s_nz)*sizeof(int),flg);CHKERRQ(ierr); 1223 if (*flg == PETSC_FALSE) { 1224 PetscFunctionReturn(0); 1225 } 1226 /* if a->a are the same */ 1227 ierr = PetscMemcmp(a->a,b->a,(a->s_nz)*(a->bs)*(a->bs)*sizeof(Scalar),flg);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 1230 } 1231 1232 #undef __FUNC__ 1233 #define __FUNC__ "MatGetDiagonal_SeqSBAIJ" 1234 int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v) 1235 { 1236 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1237 int ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1238 Scalar *x,zero = 0.0; 1239 MatScalar *aa,*aa_j; 1240 1241 PetscFunctionBegin; 1242 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Not for factored matrix"); 1243 bs = a->bs; 1244 aa = a->a; 1245 ai = a->i; 1246 aj = a->j; 1247 ambs = a->mbs; 1248 bs2 = a->bs2; 1249 1250 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1251 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1252 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1253 /* if (n != a->m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming matrix and vector");*/ 1254 for (i=0; i<ambs; i++) { 1255 j=ai[i]; 1256 if (aj[j] == i) { /* if this is a diagonal element */ 1257 row = i*bs; 1258 aa_j = aa + j*bs2; 1259 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1260 } 1261 } 1262 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNC__ 1267 #define __FUNC__ "MatDiagonalScale_SeqSBAIJ" 1268 int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr) 1269 { 1270 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1271 Scalar *l,*r,x,*li,*ri; 1272 MatScalar *aa,*v; 1273 int ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2; 1274 1275 PetscFunctionBegin; 1276 ai = a->i; 1277 aj = a->j; 1278 aa = a->a; 1279 m = a->m; 1280 bs = a->bs; 1281 mbs = a->mbs; 1282 bs2 = a->bs2; 1283 1284 if (ll != rr) { 1285 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, left and right scaling vectors must be same\n"); 1286 } 1287 if (ll) { 1288 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 1289 ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1290 if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector wrong length"); 1291 for (i=0; i<mbs; i++) { /* for each block row */ 1292 M = ai[i+1] - ai[i]; 1293 li = l + i*bs; 1294 v = aa + bs2*ai[i]; 1295 for (j=0; j<M; j++) { /* for each block */ 1296 for (k=0; k<bs2; k++) { 1297 (*v++) *= li[k%bs]; 1298 } 1299 #ifdef CONT 1300 /* will be used to replace the above loop */ 1301 ri = l + bs*aj[ai[i]+j]; 1302 for (k=0; k<bs; k++) { /* column value */ 1303 x = ri[k]; 1304 for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x; 1305 } 1306 #endif 1307 1308 } 1309 } 1310 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1311 PLogFlops(2*a->s_nz); 1312 } 1313 /* will be deleted */ 1314 if (rr) { 1315 ierr = VecGetArray(rr,&r);CHKERRQ(ierr); 1316 ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1317 if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vector wrong length"); 1318 for (i=0; i<mbs; i++) { /* for each block row */ 1319 M = ai[i+1] - ai[i]; 1320 v = aa + bs2*ai[i]; 1321 for (j=0; j<M; j++) { /* for each block */ 1322 ri = r + bs*aj[ai[i]+j]; 1323 for (k=0; k<bs; k++) { 1324 x = ri[k]; 1325 for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 1326 } 1327 } 1328 } 1329 ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 1330 PLogFlops(a->s_nz); 1331 } 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNC__ 1336 #define __FUNC__ "MatGetInfo_SeqSBAIJ" 1337 int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1338 { 1339 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1340 1341 PetscFunctionBegin; 1342 info->rows_global = (double)a->m; 1343 info->columns_global = (double)a->m; 1344 info->rows_local = (double)a->m; 1345 info->columns_local = (double)a->m; 1346 info->block_size = a->bs2; 1347 info->nz_allocated = a->s_maxnz; /*num. of nonzeros in upper triangular part */ 1348 info->nz_used = a->bs2*a->s_nz; /*num. of nonzeros in upper triangular part */ 1349 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1350 info->assemblies = A->num_ass; 1351 info->mallocs = a->reallocs; 1352 info->memory = A->mem; 1353 if (A->factor) { 1354 info->fill_ratio_given = A->info.fill_ratio_given; 1355 info->fill_ratio_needed = A->info.fill_ratio_needed; 1356 info->factor_mallocs = A->info.factor_mallocs; 1357 } else { 1358 info->fill_ratio_given = 0; 1359 info->fill_ratio_needed = 0; 1360 info->factor_mallocs = 0; 1361 } 1362 PetscFunctionReturn(0); 1363 } 1364 1365 1366 #undef __FUNC__ 1367 #define __FUNC__ "MatZeroEntries_SeqSBAIJ" 1368 int MatZeroEntries_SeqSBAIJ(Mat A) 1369 { 1370 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1371 int ierr; 1372 1373 PetscFunctionBegin; 1374 ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377