#include <../src/mat/impls/baij/seq/baij.h> #include #include #include <../src/mat/impls/sbaij/seq/sbaij.h> #include #undef __FUNCT__ #define __FUNCT__ "MatIncreaseOverlap_SeqSBAIJ" PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscErrorCode ierr; PetscInt brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2; const PetscInt *idx; PetscBT table_out,table_in; PetscFunctionBegin; if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); mbs = a->mbs; ai = a->i; aj = a->j; bs = A->rmap->bs; ierr = PetscBTCreate(mbs,&table_out);CHKERRQ(ierr); ierr = PetscMalloc1(mbs+1,&nidx);CHKERRQ(ierr); ierr = PetscMalloc1(A->rmap->N+1,&nidx2);CHKERRQ(ierr); ierr = PetscBTCreate(mbs,&table_in);CHKERRQ(ierr); for (i=0; i= mbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); if (!PetscBTLookupSet(table_out,brow)) { nidx[isz++] = brow; if (bcol_max < brow) bcol_max = brow; } } ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); ierr = ISDestroy(&is[i]);CHKERRQ(ierr); k = 0; for (j=0; j= n) break; /* for (brow=0; brow bcol_max) break; if (PetscBTLookup(table_in,bcol)) { if (!PetscBTLookupSet(table_out,brow)) nidx[isz++] = brow; break; /* for l = start; ldata,*c; PetscErrorCode ierr; PetscInt *smap,i,k,kstart,kend,oldcols = a->mbs,*lens; PetscInt row,mat_i,*mat_j,tcol,*mat_ilen; PetscInt nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2; const PetscInt *irow,*aj = a->j,*ai = a->i; MatScalar *mat_a; Mat C; PetscBool flag,sorted; PetscFunctionBegin; ierr = ISSorted(isrow,&sorted);CHKERRQ(ierr); if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); ierr = PetscMalloc1(oldcols,&smap);CHKERRQ(ierr); ierr = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr); ssmap = smap; ierr = PetscMalloc1(1+nrows,&lens);CHKERRQ(ierr); for (i=0; iilen[irow[i]]; lens[i] = 0; for (k=kstart; kdata); if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr); C = *B; } else { ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); ierr = MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);CHKERRQ(ierr); } c = (Mat_SeqSBAIJ*)(C->data); for (i=0; iilen[row]; mat_i = c->i[i]; mat_j = c->j + mat_i; mat_a = c->a + mat_i*bs2; mat_ilen = c->ilen + i; for (k=kstart; kj[k]])) { *mat_j++ = tcol - 1; ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); mat_a += bs2; (*mat_ilen)++; } } } /* Free work space */ ierr = PetscFree(smap);CHKERRQ(ierr); ierr = PetscFree(lens);CHKERRQ(ierr); ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); *B = C; PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatGetSubMatrix_SeqSBAIJ" PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; IS is1; PetscErrorCode ierr; PetscInt *vary,*iary,nrows,i,bs=A->rmap->bs,count; const PetscInt *irow; PetscFunctionBegin; if (isrow != iscol) { PetscBool isequal; ierr = ISEqual(isrow,iscol,&isequal);CHKERRQ(ierr); if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); } ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); ierr = ISGetSize(isrow,&nrows);CHKERRQ(ierr); /* Verify if the indices corespond to each element in a block and form the IS with compressed IS */ ierr = PetscMalloc2(a->mbs,&vary,a->mbs,&iary);CHKERRQ(ierr); ierr = PetscMemzero(vary,a->mbs*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; imbs; i++) { if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Index set does not match blocks"); if (vary[i]==bs) iary[count++] = i; } ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr); ierr = PetscFree2(vary,iary);CHKERRQ(ierr); ierr = MatGetSubMatrix_SeqSBAIJ_Private(A,is1,scall,B);CHKERRQ(ierr); ierr = ISDestroy(&is1);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatGetSubMatrices_SeqSBAIJ" PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) { PetscErrorCode ierr; PetscInt i; PetscFunctionBegin; if (scall == MAT_INITIAL_MATRIX) { ierr = PetscMalloc1(n+1,B);CHKERRQ(ierr); } for (i=0; idata; PetscScalar *z,x1,x2,zero=0.0; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; ierr = VecSet(zz,zero);CHKERRQ(ierr); ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&z);CHKERRQ(ierr); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[2*i] += v[0]*x1 + v[2]*x2; z[2*i+1] += v[2]*x1 + v[3]*x2; v += 4; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMult_SeqSBAIJ_3" PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3,zero=0.0; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj = a->j,*ai = a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; ierr = VecSet(zz,zero);CHKERRQ(ierr); ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&z);CHKERRQ(ierr); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; v += 9; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMult_SeqSBAIJ_4" PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3,x4,zero=0.0; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj = a->j,*ai = a->i,*ib; PetscInt nonzerorow = 0; PetscFunctionBegin; ierr = VecSet(zz,zero);CHKERRQ(ierr); ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&z);CHKERRQ(ierr); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; v += 16; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMult_SeqSBAIJ_5" PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3,x4,x5,zero=0.0; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj = a->j,*ai = a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; ierr = VecSet(zz,zero);CHKERRQ(ierr); ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&z);CHKERRQ(ierr); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; v += 25; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMult_SeqSBAIJ_6" PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3,x4,x5,x6,zero=0.0; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; ierr = VecSet(zz,zero);CHKERRQ(ierr); ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&z);CHKERRQ(ierr); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; v += 36; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMult_SeqSBAIJ_7" PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3,x4,x5,x6,x7,zero=0.0; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; ierr = VecSet(zz,zero);CHKERRQ(ierr); ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&z);CHKERRQ(ierr); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; v += 49; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow) - nonzerorow);CHKERRQ(ierr); PetscFunctionReturn(0); } /* This will not work with MatScalar == float because it calls the BLAS */ #undef __FUNCT__ #define __FUNCT__ "MatMult_SeqSBAIJ_N" PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,*z_ptr,*zb,*work,*workt,zero=0.0; const PetscScalar *x,*x_ptr,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs =a->mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k; const PetscInt *idx,*aj,*ii; PetscInt nonzerorow=0; PetscFunctionBegin; ierr = VecSet(zz,zero);CHKERRQ(ierr); ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);x_ptr = x; ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; aj = a->j; v = a->a; ii = a->i; if (!a->mult_work) { ierr = PetscMalloc1(A->rmap->N+1,&a->mult_work);CHKERRQ(ierr); } work = a->mult_work; for (i=0; i0); /* upper triangular part */ for (j=0; j 0) { workt = work; ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); for (j=0; jnz*2.0 - nonzerorow)*bs2 - nonzerorow);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMultAdd_SeqSBAIJ_1" PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs =a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; ierr = VecCopy(yy,zz);CHKERRQ(ierr); ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&z);CHKERRQ(ierr); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[i] += *v++ * x[*ib++]; jmin++; } for (j=jmin; jnz*2.0 - nonzerorow));CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMultAdd_SeqSBAIJ_2" PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs =a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; ierr = VecCopy(yy,zz);CHKERRQ(ierr); ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&z);CHKERRQ(ierr); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[2*i] += v[0]*x1 + v[2]*x2; z[2*i+1] += v[2]*x1 + v[3]*x2; v += 4; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow));CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMultAdd_SeqSBAIJ_3" PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; ierr = VecCopy(yy,zz);CHKERRQ(ierr); ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&z);CHKERRQ(ierr); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3; z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3; z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3; v += 9; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow));CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMultAdd_SeqSBAIJ_4" PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3,x4; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; ierr = VecCopy(yy,zz);CHKERRQ(ierr); ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&z);CHKERRQ(ierr); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4; z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4; v += 16; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow));CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMultAdd_SeqSBAIJ_5" PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3,x4,x5; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; ierr = VecCopy(yy,zz);CHKERRQ(ierr); ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&z);CHKERRQ(ierr); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5; z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5; z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5; z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5; z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5; v += 25; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow));CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMultAdd_SeqSBAIJ_6" PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3,x4,x5,x6; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; ierr = VecCopy(yy,zz);CHKERRQ(ierr); ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&z);CHKERRQ(ierr); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6; z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6; z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6; z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6; z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6; z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6; v += 36; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow));CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMultAdd_SeqSBAIJ_7" PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,x1,x2,x3,x4,x5,x6,x7; const PetscScalar *x,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,n,cval,j,jmin; const PetscInt *aj=a->j,*ai=a->i,*ib; PetscInt nonzerorow=0; PetscFunctionBegin; ierr = VecCopy(yy,zz);CHKERRQ(ierr); ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&z);CHKERRQ(ierr); v = a->a; xb = x; for (i=0; i0); if (*ib == i) { /* (diag of A)*x */ z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7; z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7; z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7; z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7; z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7; z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7; z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7; v += 49; jmin++; } PetscPrefetchBlock(ib+jmin+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=jmin; jnz*2.0 - nonzerorow));CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMultAdd_SeqSBAIJ_N" PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscScalar *z,*z_ptr=0,*zb,*work,*workt; const PetscScalar *x,*x_ptr,*xb; const MatScalar *v; PetscErrorCode ierr; PetscInt mbs = a->mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k; const PetscInt *idx,*aj,*ii; PetscInt nonzerorow=0; PetscFunctionBegin; ierr = VecCopy(yy,zz);CHKERRQ(ierr); ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); x_ptr=x; ierr = VecGetArray(zz,&z);CHKERRQ(ierr); z_ptr=z; aj = a->j; v = a->a; ii = a->i; if (!a->mult_work) { ierr = PetscMalloc1(A->rmap->n+1,&a->mult_work);CHKERRQ(ierr); } work = a->mult_work; for (i=0; i0); /* upper triangular part */ for (j=0; j 0) { workt = work; ierr = PetscMemzero(workt,ncols*sizeof(PetscScalar));CHKERRQ(ierr); PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt); for (j=0; jnz*2.0 - nonzerorow));CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatScale_SeqSBAIJ" PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; PetscScalar oalpha = alpha; PetscErrorCode ierr; PetscBLASInt one = 1,totalnz; PetscFunctionBegin; ierr = PetscBLASIntCast(a->bs2*a->nz,&totalnz);CHKERRQ(ierr); PetscStackCallBLAS("BLASscal",BLASscal_(&totalnz,&oalpha,a->a,&one)); ierr = PetscLogFlops(totalnz);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatNorm_SeqSBAIJ" PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; const MatScalar *v = a->a; PetscReal sum_diag = 0.0, sum_off = 0.0, *sum; PetscInt i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,jmin,jmax,nexti,ik,*jl,*il; PetscErrorCode ierr; const PetscInt *aj=a->j,*col; PetscFunctionBegin; if (type == NORM_FROBENIUS) { for (k=0; ki[k]; jmax = a->i[k+1]; col = aj + jmin; if (*col == k) { /* diagonal block */ for (i=0; ia + ik*bs2 + j*bs; for (k1=0; k1i[i+1]; if (jmin < jmax) { il[i] = jmin; j = a->j[jmin]; jl[i] = jl[j]; jl[j]=i; } i = nexti; } /*-- row sum --*/ jmin = a->i[k]; jmax = a->i[k+1]; for (i=jmin; ia + i*bs2 + j; for (k1=0; k1j[jmin]; jl[k] = jl[j]; jl[j] = k; } for (j=0; j *norm) *norm = sum[j]; } } ierr = PetscFree3(sum,il,jl);CHKERRQ(ierr); } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatEqual_SeqSBAIJ" PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscBool * flg) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)B->data; PetscErrorCode ierr; PetscFunctionBegin; /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) { *flg = PETSC_FALSE; PetscFunctionReturn(0); } /* if the a->i are the same */ ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr); if (!*flg) PetscFunctionReturn(0); /* if a->j are the same */ ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr); if (!*flg) PetscFunctionReturn(0); /* if a->a are the same */ ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatGetDiagonal_SeqSBAIJ" PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscErrorCode ierr; PetscInt i,j,k,row,bs,ambs,bs2; const PetscInt *ai,*aj; PetscScalar *x,zero = 0.0; const MatScalar *aa,*aa_j; PetscFunctionBegin; bs = A->rmap->bs; if (A->factortype && bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1"); aa = a->a; ambs = a->mbs; if (A->factortype == MAT_FACTOR_CHOLESKY || A->factortype == MAT_FACTOR_ICC) { PetscInt *diag=a->diag; aa = a->a; ambs = a->mbs; ierr = VecGetArray(v,&x);CHKERRQ(ierr); for (i=0; ii; aj = a->j; bs2 = a->bs2; ierr = VecSet(v,zero);CHKERRQ(ierr); ierr = VecGetArray(v,&x);CHKERRQ(ierr); for (i=0; idata; PetscScalar x; const PetscScalar *l,*li,*ri; MatScalar *aa,*v; PetscErrorCode ierr; PetscInt i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2; PetscBool flg; PetscFunctionBegin; if (ll != rr) { ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); } if (!ll) PetscFunctionReturn(0); ai = a->i; aj = a->j; aa = a->a; m = A->rmap->N; bs = A->rmap->bs; mbs = a->mbs; bs2 = a->bs2; ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); for (i=0; inz);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatGetInfo_SeqSBAIJ" PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscFunctionBegin; info->block_size = a->bs2; info->nz_allocated = a->bs2*a->maxnz; /*num. of nonzeros in upper triangular part */ info->nz_used = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */ info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); info->assemblies = A->num_ass; info->mallocs = A->info.mallocs; info->memory = ((PetscObject)A)->mem; if (A->factortype) { info->fill_ratio_given = A->info.fill_ratio_given; info->fill_ratio_needed = A->info.fill_ratio_needed; info->factor_mallocs = A->info.factor_mallocs; } else { info->fill_ratio_given = 0; info->fill_ratio_needed = 0; info->factor_mallocs = 0; } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatZeroEntries_SeqSBAIJ" PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatGetRowMaxAbs_SeqSBAIJ" /* This code does not work since it only checks the upper triangular part of the matrix. Hence it is not listed in the function table. */ PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[]) { Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; PetscErrorCode ierr; PetscInt i,j,n,row,col,bs,mbs; const PetscInt *ai,*aj; PetscReal atmp; const MatScalar *aa; PetscScalar *x; PetscInt ncols,brow,bcol,krow,kcol; PetscFunctionBegin; if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); bs = A->rmap->bs; aa = a->a; ai = a->i; aj = a->j; mbs = a->mbs; ierr = VecSet(v,0.0);CHKERRQ(ierr); ierr = VecGetArray(v,&x);CHKERRQ(ierr); ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); for (i=0; i i && PetscRealPart(x[col]) < atmp) x[col] = atmp; } } aj++; } } ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); PetscFunctionReturn(0); }