1 /*$Id: sbaij2.c,v 1.32 2001/08/07 03:03:01 balay Exp $*/ 2 3 #include "src/mat/impls/baij/seq/baij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 #include "src/inline/ilu.h" 7 #include "petscbt.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ" 12 int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS *is,int ov) 13 { 14 PetscFunctionBegin; 15 SETERRQ(1,"Function not yet written for SBAIJ format"); 16 /* PetscFunctionReturn(0); */ 17 } 18 19 #undef __FUNCT__ 20 #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ_Private" 21 int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 22 { 23 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c; 24 int *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens; 25 int row,mat_i,*mat_j,tcol,*mat_ilen; 26 int *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2; 27 int *aj = a->j,*ai = a->i; 28 MatScalar *mat_a; 29 Mat C; 30 PetscTruth flag; 31 32 PetscFunctionBegin; 33 34 if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 35 ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 36 if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 37 38 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 39 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 40 41 ierr = PetscMalloc((1+oldcols)*sizeof(int),&smap);CHKERRQ(ierr); 42 ssmap = smap; 43 ierr = PetscMalloc((1+nrows)*sizeof(int),&lens);CHKERRQ(ierr); 44 ierr = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr); 45 for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */ 46 /* determine lens of each row */ 47 for (i=0; i<nrows; i++) { 48 kstart = ai[irow[i]]; 49 kend = kstart + a->ilen[irow[i]]; 50 lens[i] = 0; 51 for (k=kstart; k<kend; k++) { 52 if (ssmap[aj[k]]) { 53 lens[i]++; 54 } 55 } 56 } 57 /* Create and fill new matrix */ 58 if (scall == MAT_REUSE_MATRIX) { 59 c = (Mat_SeqSBAIJ *)((*B)->data); 60 61 if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 62 ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr); 63 if (flag == PETSC_FALSE) { 64 SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 65 } 66 ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr); 67 C = *B; 68 } else { 69 ierr = MatCreateSeqSBAIJ(A->comm,bs,nrows*bs,nrows*bs,0,lens,&C);CHKERRQ(ierr); 70 } 71 c = (Mat_SeqSBAIJ *)(C->data); 72 for (i=0; i<nrows; i++) { 73 row = irow[i]; 74 kstart = ai[row]; 75 kend = kstart + a->ilen[row]; 76 mat_i = c->i[i]; 77 mat_j = c->j + mat_i; 78 mat_a = c->a + mat_i*bs2; 79 mat_ilen = c->ilen + i; 80 for (k=kstart; k<kend; k++) { 81 if ((tcol=ssmap[a->j[k]])) { 82 *mat_j++ = tcol - 1; 83 ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 84 mat_a += bs2; 85 (*mat_ilen)++; 86 } 87 } 88 } 89 90 /* Free work space */ 91 ierr = PetscFree(smap);CHKERRQ(ierr); 92 ierr = PetscFree(lens);CHKERRQ(ierr); 93 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 94 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 95 96 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 97 *B = C; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ" 103 int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 IS is1; 107 int *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count; 108 109 PetscFunctionBegin; 110 if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro"); 111 112 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 113 ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); 114 115 /* Verify if the indices corespond to each element in a block 116 and form the IS with compressed IS */ 117 ierr = PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);CHKERRQ(ierr); 118 iary = vary + a->mbs; 119 ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr); 120 for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 121 122 count = 0; 123 for (i=0; i<a->mbs; i++) { 124 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks"); 125 if (vary[i]==bs) iary[count++] = i; 126 } 127 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 128 129 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 130 ierr = PetscFree(vary);CHKERRQ(ierr); 131 132 ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);CHKERRQ(ierr); 133 ISDestroy(is1); 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ" 139 int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,IS *irow,IS *icol,MatReuse scall,Mat **B) 140 { 141 int ierr,i; 142 143 PetscFunctionBegin; 144 if (scall == MAT_INITIAL_MATRIX) { 145 ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 146 } 147 148 for (i=0; i<n; i++) { 149 ierr = MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 150 } 151 PetscFunctionReturn(0); 152 } 153 154 /* -------------------------------------------------------*/ 155 /* Should check that shapes of vectors and matrices match */ 156 /* -------------------------------------------------------*/ 157 #include "petscblaslapack.h" 158 159 #undef __FUNCT__ 160 #define __FUNCT__ "MatMult_SeqSBAIJ_1" 161 int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz) 162 { 163 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 164 PetscScalar *x,*z,*xb,x1,zero=0.0; 165 MatScalar *v; 166 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 167 168 PetscFunctionBegin; 169 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 170 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 171 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 172 173 v = a->a; 174 xb = x; 175 176 for (i=0; i<mbs; i++) { 177 n = ai[1] - ai[0]; /* length of i_th row of A */ 178 x1 = xb[0]; 179 ib = aj + *ai; 180 jmin = 0; 181 if (*ib == i) { /* (diag of A)*x */ 182 z[i] += *v++ * x[*ib++]; 183 jmin++; 184 } 185 for (j=jmin; j<n; j++) { 186 cval = *ib; 187 z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 188 z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 189 } 190 xb++; ai++; 191 } 192 193 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 194 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 195 PetscLogFlops(2*(a->s_nz*2 - A->m) - A->m); /* s_nz = (nz+m)/2 */ 196 PetscFunctionReturn(0); 197 } 198 199 #undef __FUNCT__ 200 #define __FUNCT__ "MatMult_SeqSBAIJ_2" 201 int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz) 202 { 203 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 204 PetscScalar *x,*z,*xb,x1,x2,zero=0.0; 205 MatScalar *v; 206 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 207 208 209 PetscFunctionBegin; 210 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 211 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 212 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 213 214 v = a->a; 215 xb = x; 216 217 for (i=0; i<mbs; i++) { 218 n = ai[1] - ai[0]; /* length of i_th block row of A */ 219 x1 = xb[0]; x2 = xb[1]; 220 ib = aj + *ai; 221 jmin = 0; 222 if (*ib == i){ /* (diag of A)*x */ 223 z[2*i] += v[0]*x1 + v[2]*x2; 224 z[2*i+1] += v[2]*x1 + v[3]*x2; 225 v += 4; jmin++; 226 } 227 for (j=jmin; j<n; j++) { 228 /* (strict lower triangular part of A)*x */ 229 cval = ib[j]*2; 230 z[cval] += v[0]*x1 + v[1]*x2; 231 z[cval+1] += v[2]*x1 + v[3]*x2; 232 /* (strict upper triangular part of A)*x */ 233 z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 234 z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 235 v += 4; 236 } 237 xb +=2; ai++; 238 } 239 240 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 241 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 242 PetscLogFlops(8*(a->s_nz*2 - A->m) - A->m); 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "MatMult_SeqSBAIJ_3" 248 int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz) 249 { 250 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 251 PetscScalar *x,*z,*xb,x1,x2,x3,zero=0.0; 252 MatScalar *v; 253 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 254 255 256 PetscFunctionBegin; 257 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 258 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 259 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 260 261 v = a->a; 262 xb = x; 263 264 for (i=0; i<mbs; i++) { 265 n = ai[1] - ai[0]; /* length of i_th block row of A */ 266 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 267 ib = aj + *ai; 268 jmin = 0; 269 if (*ib == i){ /* (diag of A)*x */ 270 z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 271 z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 272 z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 273 v += 9; jmin++; 274 } 275 for (j=jmin; j<n; j++) { 276 /* (strict lower triangular part of A)*x */ 277 cval = ib[j]*3; 278 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 279 z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 280 z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 281 /* (strict upper triangular part of A)*x */ 282 z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 283 z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 284 z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 285 v += 9; 286 } 287 xb +=3; ai++; 288 } 289 290 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 291 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 292 PetscLogFlops(18*(a->s_nz*2 - A->m) - A->m); 293 PetscFunctionReturn(0); 294 } 295 296 #undef __FUNCT__ 297 #define __FUNCT__ "MatMult_SeqSBAIJ_4" 298 int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz) 299 { 300 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 301 PetscScalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0; 302 MatScalar *v; 303 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 304 305 PetscFunctionBegin; 306 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 307 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 308 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 309 310 v = a->a; 311 xb = x; 312 313 for (i=0; i<mbs; i++) { 314 n = ai[1] - ai[0]; /* length of i_th block row of A */ 315 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 316 ib = aj + *ai; 317 jmin = 0; 318 if (*ib == i){ /* (diag of A)*x */ 319 z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 320 z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 321 z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 322 z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 323 v += 16; jmin++; 324 } 325 for (j=jmin; j<n; j++) { 326 /* (strict lower triangular part of A)*x */ 327 cval = ib[j]*4; 328 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 329 z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 330 z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 331 z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 332 /* (strict upper triangular part of A)*x */ 333 z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 334 z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 335 z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 336 z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 337 v += 16; 338 } 339 xb +=4; ai++; 340 } 341 342 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 343 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 344 PetscLogFlops(32*(a->s_nz*2 - A->m) - A->m); 345 PetscFunctionReturn(0); 346 } 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatMult_SeqSBAIJ_5" 350 int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz) 351 { 352 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 353 PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0; 354 MatScalar *v; 355 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 356 357 PetscFunctionBegin; 358 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 359 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 360 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 361 362 v = a->a; 363 xb = x; 364 365 for (i=0; i<mbs; i++) { 366 n = ai[1] - ai[0]; /* length of i_th block row of A */ 367 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 368 ib = aj + *ai; 369 jmin = 0; 370 if (*ib == i){ /* (diag of A)*x */ 371 z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 372 z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 373 z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 374 z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 375 z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 376 v += 25; jmin++; 377 } 378 for (j=jmin; j<n; j++) { 379 /* (strict lower triangular part of A)*x */ 380 cval = ib[j]*5; 381 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 382 z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 383 z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 384 z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 385 z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 386 /* (strict upper triangular part of A)*x */ 387 z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 388 z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 389 z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 390 z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 391 z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 392 v += 25; 393 } 394 xb +=5; ai++; 395 } 396 397 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 398 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 399 PetscLogFlops(50*(a->s_nz*2 - A->m) - A->m); 400 PetscFunctionReturn(0); 401 } 402 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatMult_SeqSBAIJ_6" 406 int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz) 407 { 408 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 409 PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0; 410 MatScalar *v; 411 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 412 413 PetscFunctionBegin; 414 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 415 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 416 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 417 418 v = a->a; 419 xb = x; 420 421 for (i=0; i<mbs; i++) { 422 n = ai[1] - ai[0]; /* length of i_th block row of A */ 423 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 424 ib = aj + *ai; 425 jmin = 0; 426 if (*ib == i){ /* (diag of A)*x */ 427 z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 428 z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 429 z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 430 z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 431 z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 432 z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 433 v += 36; jmin++; 434 } 435 for (j=jmin; j<n; j++) { 436 /* (strict lower triangular part of A)*x */ 437 cval = ib[j]*6; 438 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 439 z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 440 z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 441 z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 442 z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 443 z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 444 /* (strict upper triangular part of A)*x */ 445 z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 446 z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 447 z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 448 z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 449 z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 450 z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 451 v += 36; 452 } 453 xb +=6; ai++; 454 } 455 456 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 457 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 458 PetscLogFlops(72*(a->s_nz*2 - A->m) - A->m); 459 PetscFunctionReturn(0); 460 } 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatMult_SeqSBAIJ_7" 463 int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz) 464 { 465 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 466 PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0; 467 MatScalar *v; 468 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 469 470 PetscFunctionBegin; 471 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 472 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 473 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 474 475 v = a->a; 476 xb = x; 477 478 for (i=0; i<mbs; i++) { 479 n = ai[1] - ai[0]; /* length of i_th block row of A */ 480 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 481 ib = aj + *ai; 482 jmin = 0; 483 if (*ib == i){ /* (diag of A)*x */ 484 z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 485 z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 486 z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 487 z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 488 z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 489 z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 490 z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 491 v += 49; jmin++; 492 } 493 for (j=jmin; j<n; j++) { 494 /* (strict lower triangular part of A)*x */ 495 cval = ib[j]*7; 496 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 497 z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 498 z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 499 z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 500 z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 501 z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 502 z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 503 /* (strict upper triangular part of A)*x */ 504 z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 505 z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 506 z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 507 z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 508 z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 509 z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 510 z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 511 v += 49; 512 } 513 xb +=7; ai++; 514 } 515 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 516 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 517 PetscLogFlops(98*(a->s_nz*2 - A->m) - A->m); 518 PetscFunctionReturn(0); 519 } 520 521 /* 522 This will not work with MatScalar == float because it calls the BLAS 523 */ 524 #undef __FUNCT__ 525 #define __FUNCT__ "MatMult_SeqSBAIJ_N" 526 int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz) 527 { 528 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 529 PetscScalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0; 530 MatScalar *v; 531 int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 532 int ncols,k; 533 534 PetscFunctionBegin; 535 ierr = VecSet(&zero,zz);CHKERRQ(ierr); 536 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 537 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 538 539 aj = a->j; 540 v = a->a; 541 ii = a->i; 542 543 if (!a->mult_work) { 544 ierr = PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 545 } 546 work = a->mult_work; 547 548 for (i=0; i<mbs; i++) { 549 n = ii[1] - ii[0]; ncols = n*bs; 550 workt = work; idx=aj+ii[0]; 551 552 /* upper triangular part */ 553 for (j=0; j<n; j++) { 554 xb = x_ptr + bs*(*idx++); 555 for (k=0; k<bs; k++) workt[k] = xb[k]; 556 workt += bs; 557 } 558 /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 559 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 560 561 /* strict lower triangular part */ 562 idx = aj+ii[0]; 563 if (*idx == i){ 564 ncols -= bs; v += bs2; idx++; n--; 565 } 566 567 if (ncols > 0){ 568 workt = work; 569 ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 570 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 571 for (j=0; j<n; j++) { 572 zb = z_ptr + bs*(*idx++); 573 for (k=0; k<bs; k++) zb[k] += workt[k] ; 574 workt += bs; 575 } 576 } 577 x += bs; v += n*bs2; z += bs; ii++; 578 } 579 580 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 581 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 582 PetscLogFlops(2*(a->s_nz*2 - A->m)*bs2 - A->m); 583 PetscFunctionReturn(0); 584 } 585 586 #undef __FUNCT__ 587 #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1" 588 int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 589 { 590 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 591 PetscScalar *x,*y,*z,*xb,x1; 592 MatScalar *v; 593 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 594 595 PetscFunctionBegin; 596 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 597 if (yy != xx) { 598 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 599 } else { 600 y = x; 601 } 602 if (zz != yy) { 603 /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 604 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 605 ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 606 } else { 607 z = y; 608 } 609 610 v = a->a; 611 xb = x; 612 613 for (i=0; i<mbs; i++) { 614 n = ai[1] - ai[0]; /* length of i_th row of A */ 615 x1 = xb[0]; 616 ib = aj + *ai; 617 jmin = 0; 618 if (*ib == i) { /* (diag of A)*x */ 619 z[i] += *v++ * x[*ib++]; jmin++; 620 } 621 for (j=jmin; j<n; j++) { 622 cval = *ib; 623 z[cval] += *v * x1; /* (strict lower triangular part of A)*x */ 624 z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */ 625 } 626 xb++; ai++; 627 } 628 629 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 630 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 631 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 632 633 PetscLogFlops(2*(a->s_nz*2 - A->m)); 634 PetscFunctionReturn(0); 635 } 636 637 #undef __FUNCT__ 638 #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2" 639 int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 640 { 641 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 642 PetscScalar *x,*y,*z,*xb,x1,x2; 643 MatScalar *v; 644 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 645 646 PetscFunctionBegin; 647 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 648 if (yy != xx) { 649 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 650 } else { 651 y = x; 652 } 653 if (zz != yy) { 654 /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 655 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 656 ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 657 } else { 658 z = y; 659 } 660 661 v = a->a; 662 xb = x; 663 664 for (i=0; i<mbs; i++) { 665 n = ai[1] - ai[0]; /* length of i_th block row of A */ 666 x1 = xb[0]; x2 = xb[1]; 667 ib = aj + *ai; 668 jmin = 0; 669 if (*ib == i){ /* (diag of A)*x */ 670 z[2*i] += v[0]*x1 + v[2]*x2; 671 z[2*i+1] += v[2]*x1 + v[3]*x2; 672 v += 4; jmin++; 673 } 674 for (j=jmin; j<n; j++) { 675 /* (strict lower triangular part of A)*x */ 676 cval = ib[j]*2; 677 z[cval] += v[0]*x1 + v[1]*x2; 678 z[cval+1] += v[2]*x1 + v[3]*x2; 679 /* (strict upper triangular part of A)*x */ 680 z[2*i] += v[0]*x[cval] + v[2]*x[cval+1]; 681 z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1]; 682 v += 4; 683 } 684 xb +=2; ai++; 685 } 686 687 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 688 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 689 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 690 691 PetscLogFlops(4*(a->s_nz*2 - A->m)); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3" 697 int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 698 { 699 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 700 PetscScalar *x,*y,*z,*xb,x1,x2,x3; 701 MatScalar *v; 702 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 703 704 PetscFunctionBegin; 705 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 706 if (yy != xx) { 707 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 708 } else { 709 y = x; 710 } 711 if (zz != yy) { 712 /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 713 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 714 ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 715 } else { 716 z = y; 717 } 718 719 v = a->a; 720 xb = x; 721 722 for (i=0; i<mbs; i++) { 723 n = ai[1] - ai[0]; /* length of i_th block row of A */ 724 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 725 ib = aj + *ai; 726 jmin = 0; 727 if (*ib == i){ /* (diag of A)*x */ 728 z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; 729 z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; 730 z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 731 v += 9; jmin++; 732 } 733 for (j=jmin; j<n; j++) { 734 /* (strict lower triangular part of A)*x */ 735 cval = ib[j]*3; 736 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3; 737 z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3; 738 z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; 739 /* (strict upper triangular part of A)*x */ 740 z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2]; 741 z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2]; 742 z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2]; 743 v += 9; 744 } 745 xb +=3; ai++; 746 } 747 748 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 749 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 750 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 751 752 PetscLogFlops(18*(a->s_nz*2 - A->m)); 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4" 758 int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 759 { 760 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 761 PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4; 762 MatScalar *v; 763 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 764 765 PetscFunctionBegin; 766 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 767 if (yy != xx) { 768 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 769 } else { 770 y = x; 771 } 772 if (zz != yy) { 773 /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 774 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 775 ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 776 } else { 777 z = y; 778 } 779 780 v = a->a; 781 xb = x; 782 783 for (i=0; i<mbs; i++) { 784 n = ai[1] - ai[0]; /* length of i_th block row of A */ 785 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 786 ib = aj + *ai; 787 jmin = 0; 788 if (*ib == i){ /* (diag of A)*x */ 789 z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 790 z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 791 z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; 792 z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; 793 v += 16; jmin++; 794 } 795 for (j=jmin; j<n; j++) { 796 /* (strict lower triangular part of A)*x */ 797 cval = ib[j]*4; 798 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 799 z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 800 z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 801 z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 802 /* (strict upper triangular part of A)*x */ 803 z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3]; 804 z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3]; 805 z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3]; 806 z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3]; 807 v += 16; 808 } 809 xb +=4; ai++; 810 } 811 812 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 813 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 814 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 815 816 PetscLogFlops(32*(a->s_nz*2 - A->m)); 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5" 822 int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 823 { 824 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 825 PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5; 826 MatScalar *v; 827 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 828 829 PetscFunctionBegin; 830 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 831 if (yy != xx) { 832 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 833 } else { 834 y = x; 835 } 836 if (zz != yy) { 837 /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 838 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 839 ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 840 } else { 841 z = y; 842 } 843 844 v = a->a; 845 xb = x; 846 847 for (i=0; i<mbs; i++) { 848 n = ai[1] - ai[0]; /* length of i_th block row of A */ 849 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; 850 ib = aj + *ai; 851 jmin = 0; 852 if (*ib == i){ /* (diag of A)*x */ 853 z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; 854 z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; 855 z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; 856 z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; 857 z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 858 v += 25; jmin++; 859 } 860 for (j=jmin; j<n; j++) { 861 /* (strict lower triangular part of A)*x */ 862 cval = ib[j]*5; 863 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 864 z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 865 z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5; 866 z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5; 867 z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; 868 /* (strict upper triangular part of A)*x */ 869 z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4]; 870 z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4]; 871 z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4]; 872 z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4]; 873 z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4]; 874 v += 25; 875 } 876 xb +=5; ai++; 877 } 878 879 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 880 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 881 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 882 883 PetscLogFlops(50*(a->s_nz*2 - A->m)); 884 PetscFunctionReturn(0); 885 } 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6" 888 int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 889 { 890 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 891 PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6; 892 MatScalar *v; 893 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 894 895 PetscFunctionBegin; 896 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 897 if (yy != xx) { 898 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 899 } else { 900 y = x; 901 } 902 if (zz != yy) { 903 /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 904 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 905 ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 906 } else { 907 z = y; 908 } 909 910 v = a->a; 911 xb = x; 912 913 for (i=0; i<mbs; i++) { 914 n = ai[1] - ai[0]; /* length of i_th block row of A */ 915 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; 916 ib = aj + *ai; 917 jmin = 0; 918 if (*ib == i){ /* (diag of A)*x */ 919 z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; 920 z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; 921 z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; 922 z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; 923 z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; 924 z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 925 v += 36; jmin++; 926 } 927 for (j=jmin; j<n; j++) { 928 /* (strict lower triangular part of A)*x */ 929 cval = ib[j]*6; 930 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6; 931 z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6; 932 z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6; 933 z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6; 934 z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6; 935 z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; 936 /* (strict upper triangular part of A)*x */ 937 z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5]; 938 z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5]; 939 z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5]; 940 z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5]; 941 z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5]; 942 z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5]; 943 v += 36; 944 } 945 xb +=6; ai++; 946 } 947 948 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 949 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 950 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 951 952 PetscLogFlops(72*(a->s_nz*2 - A->m)); 953 PetscFunctionReturn(0); 954 } 955 956 #undef __FUNCT__ 957 #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7" 958 int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 959 { 960 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 961 PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7; 962 MatScalar *v; 963 int mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin; 964 965 PetscFunctionBegin; 966 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 967 if (yy != xx) { 968 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 969 } else { 970 y = x; 971 } 972 if (zz != yy) { 973 /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 974 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 975 ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 976 } else { 977 z = y; 978 } 979 980 v = a->a; 981 xb = x; 982 983 for (i=0; i<mbs; i++) { 984 n = ai[1] - ai[0]; /* length of i_th block row of A */ 985 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6]; 986 ib = aj + *ai; 987 jmin = 0; 988 if (*ib == i){ /* (diag of A)*x */ 989 z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; 990 z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; 991 z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; 992 z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; 993 z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; 994 z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; 995 z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 996 v += 49; jmin++; 997 } 998 for (j=jmin; j<n; j++) { 999 /* (strict lower triangular part of A)*x */ 1000 cval = ib[j]*7; 1001 z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7; 1002 z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7; 1003 z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7; 1004 z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7; 1005 z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7; 1006 z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7; 1007 z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; 1008 /* (strict upper triangular part of A)*x */ 1009 z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6]; 1010 z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6]; 1011 z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6]; 1012 z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6]; 1013 z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6]; 1014 z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6]; 1015 z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6]; 1016 v += 49; 1017 } 1018 xb +=7; ai++; 1019 } 1020 1021 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1022 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1023 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1024 1025 PetscLogFlops(98*(a->s_nz*2 - A->m)); 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N" 1031 int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1032 { 1033 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1034 PetscScalar *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt; 1035 MatScalar *v; 1036 int ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2; 1037 int ncols,k; 1038 1039 PetscFunctionBegin; 1040 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); x_ptr=x; 1041 if (yy != xx) { 1042 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1043 } else { 1044 y = x; 1045 } 1046 if (zz != yy) { 1047 /* ierr = VecCopy(yy,zz);CHKERRQ(ierr); */ 1048 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; 1049 ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 1050 } else { 1051 z = y; 1052 } 1053 1054 aj = a->j; 1055 v = a->a; 1056 ii = a->i; 1057 1058 if (!a->mult_work) { 1059 ierr = PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1060 } 1061 work = a->mult_work; 1062 1063 1064 for (i=0; i<mbs; i++) { 1065 n = ii[1] - ii[0]; ncols = n*bs; 1066 workt = work; idx=aj+ii[0]; 1067 1068 /* upper triangular part */ 1069 for (j=0; j<n; j++) { 1070 xb = x_ptr + bs*(*idx++); 1071 for (k=0; k<bs; k++) workt[k] = xb[k]; 1072 workt += bs; 1073 } 1074 /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */ 1075 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 1076 1077 /* strict lower triangular part */ 1078 idx = aj+ii[0]; 1079 if (*idx == i){ 1080 ncols -= bs; v += bs2; idx++; n--; 1081 } 1082 if (ncols > 0){ 1083 workt = work; 1084 ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 1085 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); 1086 for (j=0; j<n; j++) { 1087 zb = z_ptr + bs*(*idx++); 1088 /* idx++; */ 1089 for (k=0; k<bs; k++) zb[k] += workt[k] ; 1090 workt += bs; 1091 } 1092 } 1093 1094 x += bs; v += n*bs2; z += bs; ii++; 1095 } 1096 1097 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1098 if (yy != xx) ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1099 if (zz != yy) ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1100 1101 PetscLogFlops(2*(a->s_nz*2 - A->m)); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatMultTranspose_SeqSBAIJ" 1107 int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz) 1108 { 1109 PetscFunctionBegin; 1110 SETERRQ(1,"Matrix is symmetric. Call MatMult()."); 1111 /* PetscFunctionReturn(0); */ 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatMultTransposeAdd_SeqSBAIJ" 1116 int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1117 1118 { 1119 PetscFunctionBegin; 1120 SETERRQ(1,"Matrix is symmetric. Call MatMultAdd()."); 1121 /* PetscFunctionReturn(0); */ 1122 } 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "MatScale_SeqSBAIJ" 1126 int MatScale_SeqSBAIJ(PetscScalar *alpha,Mat inA) 1127 { 1128 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1129 int one = 1,totalnz = a->bs2*a->s_nz; 1130 1131 PetscFunctionBegin; 1132 BLscal_(&totalnz,alpha,a->a,&one); 1133 PetscLogFlops(totalnz); 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatNorm_SeqSBAIJ" 1139 int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm) 1140 { 1141 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1142 MatScalar *v = a->a; 1143 PetscReal sum_diag = 0.0, sum_off = 0.0, *sum; 1144 int i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j; 1145 int *jl,*il,jmin,jmax,ierr,nexti,ik,*col; 1146 1147 PetscFunctionBegin; 1148 if (type == NORM_FROBENIUS) { 1149 for (k=0; k<mbs; k++){ 1150 jmin = a->i[k]; jmax = a->i[k+1]; 1151 col = aj + jmin; 1152 if (*col == k){ /* diagonal block */ 1153 for (i=0; i<bs2; i++){ 1154 #if defined(PETSC_USE_COMPLEX) 1155 sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++; 1156 #else 1157 sum_diag += (*v)*(*v); v++; 1158 #endif 1159 } 1160 jmin++; 1161 } 1162 for (j=jmin; j<jmax; j++){ /* off-diagonal blocks */ 1163 for (i=0; i<bs2; i++){ 1164 #if defined(PETSC_USE_COMPLEX) 1165 sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++; 1166 #else 1167 sum_off += (*v)*(*v); v++; 1168 #endif 1169 } 1170 } 1171 } 1172 *norm = sqrt(sum_diag + 2*sum_off); 1173 1174 } else if (type == NORM_INFINITY) { /* maximum row sum */ 1175 ierr = PetscMalloc(mbs*sizeof(int),&il);CHKERRQ(ierr); 1176 ierr = PetscMalloc(mbs*sizeof(int),&jl);CHKERRQ(ierr); 1177 ierr = PetscMalloc(bs*sizeof(PetscReal),&sum);CHKERRQ(ierr); 1178 for (i=0; i<mbs; i++) { 1179 jl[i] = mbs; il[0] = 0; 1180 } 1181 1182 *norm = 0.0; 1183 for (k=0; k<mbs; k++) { /* k_th block row */ 1184 for (j=0; j<bs; j++) sum[j]=0.0; 1185 1186 /*-- col sum --*/ 1187 i = jl[k]; /* first |A(i,k)| to be added */ 1188 /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window) 1189 at step k */ 1190 while (i<mbs){ 1191 nexti = jl[i]; /* next block row to be added */ 1192 ik = il[i]; /* block index of A(i,k) in the array a */ 1193 for (j=0; j<bs; j++){ 1194 v = a->a + ik*bs2 + j*bs; 1195 for (k1=0; k1<bs; k1++) { 1196 sum[j] += PetscAbsScalar(*v); v++; 1197 } 1198 } 1199 /* update il, jl */ 1200 jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */ 1201 jmax = a->i[i+1]; 1202 if (jmin < jmax){ 1203 il[i] = jmin; 1204 j = a->j[jmin]; 1205 jl[i] = jl[j]; jl[j]=i; 1206 } 1207 i = nexti; 1208 } 1209 1210 /*-- row sum --*/ 1211 jmin = a->i[k]; jmax = a->i[k+1]; 1212 for (i=jmin; i<jmax; i++) { 1213 for (j=0; j<bs; j++){ 1214 v = a->a + i*bs2 + j; 1215 for (k1=0; k1<bs; k1++){ 1216 sum[j] += PetscAbsScalar(*v); 1217 v += bs; 1218 } 1219 } 1220 } 1221 /* add k_th block row to il, jl */ 1222 col = aj+jmin; 1223 if (*col == k) jmin++; 1224 if (jmin < jmax){ 1225 il[k] = jmin; 1226 j = a->j[jmin]; 1227 jl[k] = jl[j]; jl[j] = k; 1228 } 1229 for (j=0; j<bs; j++){ 1230 if (sum[j] > *norm) *norm = sum[j]; 1231 } 1232 } 1233 ierr = PetscFree(il);CHKERRQ(ierr); 1234 ierr = PetscFree(jl);CHKERRQ(ierr); 1235 ierr = PetscFree(sum);CHKERRQ(ierr); 1236 } else { 1237 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 1238 } 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatEqual_SeqSBAIJ" 1244 int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg) 1245 { 1246 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data; 1247 int ierr; 1248 PetscTruth flag; 1249 1250 PetscFunctionBegin; 1251 ierr = PetscTypeCompare((PetscObject)A,MATSEQBAIJ,&flag);CHKERRQ(ierr); 1252 if (!flag) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1253 1254 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1255 if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->s_nz != b->s_nz)) { 1256 *flg = PETSC_FALSE; 1257 PetscFunctionReturn(0); 1258 } 1259 1260 /* if the a->i are the same */ 1261 ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr); 1262 if (*flg == PETSC_FALSE) { 1263 PetscFunctionReturn(0); 1264 } 1265 1266 /* if a->j are the same */ 1267 ierr = PetscMemcmp(a->j,b->j,(a->s_nz)*sizeof(int),flg);CHKERRQ(ierr); 1268 if (*flg == PETSC_FALSE) { 1269 PetscFunctionReturn(0); 1270 } 1271 /* if a->a are the same */ 1272 ierr = PetscMemcmp(a->a,b->a,(a->s_nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ" 1279 int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v) 1280 { 1281 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1282 int ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1283 PetscScalar *x,zero = 0.0; 1284 MatScalar *aa,*aa_j; 1285 1286 PetscFunctionBegin; 1287 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1288 bs = a->bs; 1289 aa = a->a; 1290 ai = a->i; 1291 aj = a->j; 1292 ambs = a->mbs; 1293 bs2 = a->bs2; 1294 1295 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1296 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1297 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1298 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1299 for (i=0; i<ambs; i++) { 1300 j=ai[i]; 1301 if (aj[j] == i) { /* if this is a diagonal element */ 1302 row = i*bs; 1303 aa_j = aa + j*bs2; 1304 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1305 } 1306 } 1307 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatDiagonalScale_SeqSBAIJ" 1313 int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr) 1314 { 1315 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1316 PetscScalar *l,*r,x,*li,*ri; 1317 MatScalar *aa,*v; 1318 int ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2; 1319 1320 PetscFunctionBegin; 1321 ai = a->i; 1322 aj = a->j; 1323 aa = a->a; 1324 m = A->m; 1325 bs = a->bs; 1326 mbs = a->mbs; 1327 bs2 = a->bs2; 1328 1329 if (ll != rr) { 1330 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1331 } 1332 if (ll) { 1333 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 1334 ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1335 if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 1336 for (i=0; i<mbs; i++) { /* for each block row */ 1337 M = ai[i+1] - ai[i]; 1338 li = l + i*bs; 1339 v = aa + bs2*ai[i]; 1340 for (j=0; j<M; j++) { /* for each block */ 1341 for (k=0; k<bs2; k++) { 1342 (*v++) *= li[k%bs]; 1343 } 1344 #ifdef CONT 1345 /* will be used to replace the above loop */ 1346 ri = l + bs*aj[ai[i]+j]; 1347 for (k=0; k<bs; k++) { /* column value */ 1348 x = ri[k]; 1349 for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x; 1350 } 1351 #endif 1352 1353 } 1354 } 1355 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1356 PetscLogFlops(2*a->s_nz); 1357 } 1358 /* will be deleted */ 1359 if (rr) { 1360 ierr = VecGetArray(rr,&r);CHKERRQ(ierr); 1361 ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1362 if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 1363 for (i=0; i<mbs; i++) { /* for each block row */ 1364 M = ai[i+1] - ai[i]; 1365 v = aa + bs2*ai[i]; 1366 for (j=0; j<M; j++) { /* for each block */ 1367 ri = r + bs*aj[ai[i]+j]; 1368 for (k=0; k<bs; k++) { 1369 x = ri[k]; 1370 for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 1371 } 1372 } 1373 } 1374 ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 1375 PetscLogFlops(a->s_nz); 1376 } 1377 PetscFunctionReturn(0); 1378 } 1379 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatGetInfo_SeqSBAIJ" 1382 int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1383 { 1384 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1385 1386 PetscFunctionBegin; 1387 info->rows_global = (double)A->m; 1388 info->columns_global = (double)A->m; 1389 info->rows_local = (double)A->m; 1390 info->columns_local = (double)A->m; 1391 info->block_size = a->bs2; 1392 info->nz_allocated = a->s_maxnz; /*num. of nonzeros in upper triangular part */ 1393 info->nz_used = a->bs2*a->s_nz; /*num. of nonzeros in upper triangular part */ 1394 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1395 info->assemblies = A->num_ass; 1396 info->mallocs = a->reallocs; 1397 info->memory = A->mem; 1398 if (A->factor) { 1399 info->fill_ratio_given = A->info.fill_ratio_given; 1400 info->fill_ratio_needed = A->info.fill_ratio_needed; 1401 info->factor_mallocs = A->info.factor_mallocs; 1402 } else { 1403 info->fill_ratio_given = 0; 1404 info->fill_ratio_needed = 0; 1405 info->factor_mallocs = 0; 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatZeroEntries_SeqSBAIJ" 1413 int MatZeroEntries_SeqSBAIJ(Mat A) 1414 { 1415 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1416 int ierr; 1417 1418 PetscFunctionBegin; 1419 ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatGetRowMax_SeqSBAIJ" 1425 int MatGetRowMax_SeqSBAIJ(Mat A,Vec v) 1426 { 1427 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1428 int ierr,i,j,n,row,col,bs,*ai,*aj,mbs; 1429 PetscReal atmp; 1430 MatScalar *aa; 1431 PetscScalar zero = 0.0,*x; 1432 int ncols,brow,bcol,krow,kcol; 1433 1434 PetscFunctionBegin; 1435 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1436 bs = a->bs; 1437 aa = a->a; 1438 ai = a->i; 1439 aj = a->j; 1440 mbs = a->mbs; 1441 1442 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1443 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1444 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1445 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1446 for (i=0; i<mbs; i++) { 1447 ncols = ai[1] - ai[0]; ai++; 1448 brow = bs*i; 1449 for (j=0; j<ncols; j++){ 1450 bcol = bs*(*aj); 1451 for (kcol=0; kcol<bs; kcol++){ 1452 col = bcol + kcol; /* col index */ 1453 for (krow=0; krow<bs; krow++){ 1454 atmp = PetscAbsScalar(*aa); aa++; 1455 row = brow + krow; /* row index */ 1456 /* printf("val[%d,%d]: %g\n",row,col,atmp); */ 1457 if (PetscRealPart(x[row]) < atmp) x[row] = atmp; 1458 if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp; 1459 } 1460 } 1461 aj++; 1462 } 1463 } 1464 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1465 PetscFunctionReturn(0); 1466 } 1467