1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/seq/baij.h" 4 #include "src/inline/spops.h" 5 #include "src/inline/ilu.h" 6 #include "petscbt.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ" 10 PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov) 11 { 12 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 13 PetscErrorCode ierr; 14 PetscInt row,i,j,k,l,m,n,*idx,*nidx,isz,val,ival; 15 PetscInt start,end,*ai,*aj,bs,*nidx2; 16 PetscBT table; 17 18 PetscFunctionBegin; 19 m = a->mbs; 20 ai = a->i; 21 aj = a->j; 22 bs = A->rmap.bs; 23 24 if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); 25 26 ierr = PetscBTCreate(m,table);CHKERRQ(ierr); 27 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr); 28 ierr = PetscMalloc((A->rmap.N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr); 29 30 for (i=0; i<is_max; i++) { 31 /* Initialise the two local arrays */ 32 isz = 0; 33 ierr = PetscBTMemzero(m,table);CHKERRQ(ierr); 34 35 /* Extract the indices, assume there can be duplicate entries */ 36 ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 37 ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr); 38 39 /* Enter these into the temp arrays i.e mark table[row], enter row into new index */ 40 for (j=0; j<n ; ++j){ 41 ival = idx[j]/bs; /* convert the indices into block indices */ 42 if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 43 if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;} 44 } 45 ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 46 ierr = ISDestroy(is[i]);CHKERRQ(ierr); 47 48 k = 0; 49 for (j=0; j<ov; j++){ /* for each overlap*/ 50 n = isz; 51 for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */ 52 row = nidx[k]; 53 start = ai[row]; 54 end = ai[row+1]; 55 for (l = start; l<end ; l++){ 56 val = aj[l]; 57 if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;} 58 } 59 } 60 } 61 /* expand the Index Set */ 62 for (j=0; j<isz; j++) { 63 for (k=0; k<bs; k++) 64 nidx2[j*bs+k] = nidx[j]*bs+k; 65 } 66 ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr); 67 } 68 ierr = PetscBTDestroy(table);CHKERRQ(ierr); 69 ierr = PetscFree(nidx);CHKERRQ(ierr); 70 ierr = PetscFree(nidx2);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private" 76 PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 77 { 78 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*c; 79 PetscErrorCode ierr; 80 PetscInt *smap,i,k,kstart,kend,oldcols = a->nbs,*lens; 81 PetscInt row,mat_i,*mat_j,tcol,*mat_ilen; 82 PetscInt *irow,*icol,nrows,ncols,*ssmap,bs=A->rmap.bs,bs2=a->bs2; 83 PetscInt *aj = a->j,*ai = a->i; 84 MatScalar *mat_a; 85 Mat C; 86 PetscTruth flag; 87 88 PetscFunctionBegin; 89 ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 90 if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 91 92 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 93 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 94 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 95 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 96 97 ierr = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr); 98 ssmap = smap; 99 ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 100 ierr = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr); 101 for (i=0; i<ncols; i++) smap[icol[i]] = i+1; 102 /* determine lens of each row */ 103 for (i=0; i<nrows; i++) { 104 kstart = ai[irow[i]]; 105 kend = kstart + a->ilen[irow[i]]; 106 lens[i] = 0; 107 for (k=kstart; k<kend; k++) { 108 if (ssmap[aj[k]]) { 109 lens[i]++; 110 } 111 } 112 } 113 /* Create and fill new matrix */ 114 if (scall == MAT_REUSE_MATRIX) { 115 c = (Mat_SeqBAIJ *)((*B)->data); 116 117 if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap.bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 118 ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); 119 if (!flag) { 120 SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 121 } 122 ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr); 123 C = *B; 124 } else { 125 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 126 ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 127 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 128 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,lens);CHKERRQ(ierr); 129 } 130 c = (Mat_SeqBAIJ *)(C->data); 131 for (i=0; i<nrows; i++) { 132 row = irow[i]; 133 kstart = ai[row]; 134 kend = kstart + a->ilen[row]; 135 mat_i = c->i[i]; 136 mat_j = c->j + mat_i; 137 mat_a = c->a + mat_i*bs2; 138 mat_ilen = c->ilen + i; 139 for (k=kstart; k<kend; k++) { 140 if ((tcol=ssmap[a->j[k]])) { 141 *mat_j++ = tcol - 1; 142 ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 143 mat_a += bs2; 144 (*mat_ilen)++; 145 } 146 } 147 } 148 149 /* Free work space */ 150 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 151 ierr = PetscFree(smap);CHKERRQ(ierr); 152 ierr = PetscFree(lens);CHKERRQ(ierr); 153 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 154 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 156 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 157 *B = C; 158 PetscFunctionReturn(0); 159 } 160 161 #undef __FUNCT__ 162 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ" 163 PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 164 { 165 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 166 IS is1,is2; 167 PetscErrorCode ierr; 168 PetscInt *vary,*iary,*irow,*icol,nrows,ncols,i,bs=A->rmap.bs,count; 169 170 PetscFunctionBegin; 171 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 172 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 173 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 174 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 175 176 /* Verify if the indices corespond to each element in a block 177 and form the IS with compressed IS */ 178 ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr); 179 iary = vary + a->mbs; 180 ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 181 for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 182 count = 0; 183 for (i=0; i<a->mbs; i++) { 184 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Index set does not match blocks"); 185 if (vary[i]==bs) iary[count++] = i; 186 } 187 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 188 189 ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 190 for (i=0; i<ncols; i++) vary[icol[i]/bs]++; 191 count = 0; 192 for (i=0; i<a->mbs; i++) { 193 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_PLIB,"Internal error in PETSc"); 194 if (vary[i]==bs) iary[count++] = i; 195 } 196 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr); 197 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 198 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 199 ierr = PetscFree(vary);CHKERRQ(ierr); 200 201 ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,cs,scall,B);CHKERRQ(ierr); 202 ISDestroy(is1); 203 ISDestroy(is2); 204 PetscFunctionReturn(0); 205 } 206 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ" 209 PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 210 { 211 PetscErrorCode ierr; 212 PetscInt i; 213 214 PetscFunctionBegin; 215 if (scall == MAT_INITIAL_MATRIX) { 216 ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 217 } 218 219 for (i=0; i<n; i++) { 220 ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 221 } 222 PetscFunctionReturn(0); 223 } 224 225 226 /* -------------------------------------------------------*/ 227 /* Should check that shapes of vectors and matrices match */ 228 /* -------------------------------------------------------*/ 229 #include "petscblaslapack.h" 230 231 #undef __FUNCT__ 232 #define __FUNCT__ "MatMult_SeqBAIJ_1" 233 PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 234 { 235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 236 PetscScalar *z,sum; 237 const PetscScalar *x; 238 const MatScalar *v; 239 PetscErrorCode ierr; 240 PetscInt mbs,i,*idx,*ii,n,*ridx=PETSC_NULL,nonzerorow=0; 241 PetscTruth usecprow=a->compressedrow.use; 242 243 PetscFunctionBegin; 244 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 245 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 246 247 idx = a->j; 248 v = a->a; 249 if (usecprow){ 250 mbs = a->compressedrow.nrows; 251 ii = a->compressedrow.i; 252 ridx = a->compressedrow.rindex; 253 } else { 254 mbs = a->mbs; 255 ii = a->i; 256 } 257 258 for (i=0; i<mbs; i++) { 259 n = ii[1] - ii[0]; ii++; 260 sum = 0.0; 261 nonzerorow += (n>0); 262 while (n--) sum += *v++ * x[*idx++]; 263 if (usecprow){ 264 z[ridx[i]] = sum; 265 } else { 266 z[i] = sum; 267 } 268 } 269 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 270 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 271 ierr = PetscLogFlops(2*a->nz - nonzerorow);CHKERRQ(ierr); 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatMult_SeqBAIJ_2" 277 PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 278 { 279 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 280 PetscScalar *z = 0,sum1,sum2,*zarray; 281 const PetscScalar *x,*xb; 282 PetscScalar x1,x2; 283 const MatScalar *v; 284 PetscErrorCode ierr; 285 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 286 PetscTruth usecprow=a->compressedrow.use; 287 288 PetscFunctionBegin; 289 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 290 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 291 292 idx = a->j; 293 v = a->a; 294 if (usecprow){ 295 mbs = a->compressedrow.nrows; 296 ii = a->compressedrow.i; 297 ridx = a->compressedrow.rindex; 298 } else { 299 mbs = a->mbs; 300 ii = a->i; 301 z = zarray; 302 } 303 304 for (i=0; i<mbs; i++) { 305 n = ii[1] - ii[0]; ii++; 306 sum1 = 0.0; sum2 = 0.0; 307 nonzerorow += (n>0); 308 for (j=0; j<n; j++) { 309 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 310 sum1 += v[0]*x1 + v[2]*x2; 311 sum2 += v[1]*x1 + v[3]*x2; 312 v += 4; 313 } 314 if (usecprow) z = zarray + 2*ridx[i]; 315 z[0] = sum1; z[1] = sum2; 316 if (!usecprow) z += 2; 317 } 318 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 319 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 320 ierr = PetscLogFlops(8*a->nz - 2*nonzerorow);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatMult_SeqBAIJ_3" 326 PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 327 { 328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 329 PetscScalar *z = 0,sum1,sum2,sum3,x1,x2,x3,*zarray; 330 const PetscScalar *x,*xb; 331 const MatScalar *v; 332 PetscErrorCode ierr; 333 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 334 PetscTruth usecprow=a->compressedrow.use; 335 336 337 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 338 #pragma disjoint(*v,*z,*xb) 339 #endif 340 341 PetscFunctionBegin; 342 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 343 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 344 345 idx = a->j; 346 v = a->a; 347 if (usecprow){ 348 mbs = a->compressedrow.nrows; 349 ii = a->compressedrow.i; 350 ridx = a->compressedrow.rindex; 351 } else { 352 mbs = a->mbs; 353 ii = a->i; 354 z = zarray; 355 } 356 357 for (i=0; i<mbs; i++) { 358 n = ii[1] - ii[0]; ii++; 359 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 360 nonzerorow += (n>0); 361 for (j=0; j<n; j++) { 362 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 363 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 364 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 365 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 366 v += 9; 367 } 368 if (usecprow) z = zarray + 3*ridx[i]; 369 z[0] = sum1; z[1] = sum2; z[2] = sum3; 370 if (!usecprow) z += 3; 371 } 372 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 373 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 374 ierr = PetscLogFlops(18*a->nz - 3*nonzerorow);CHKERRQ(ierr); 375 PetscFunctionReturn(0); 376 } 377 378 #undef __FUNCT__ 379 #define __FUNCT__ "MatMult_SeqBAIJ_4" 380 PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 381 { 382 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 383 PetscScalar *z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray; 384 const PetscScalar *x,*xb; 385 const MatScalar *v; 386 PetscErrorCode ierr; 387 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 388 PetscTruth usecprow=a->compressedrow.use; 389 390 PetscFunctionBegin; 391 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 392 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 393 394 idx = a->j; 395 v = a->a; 396 if (usecprow){ 397 mbs = a->compressedrow.nrows; 398 ii = a->compressedrow.i; 399 ridx = a->compressedrow.rindex; 400 } else { 401 mbs = a->mbs; 402 ii = a->i; 403 z = zarray; 404 } 405 406 for (i=0; i<mbs; i++) { 407 n = ii[1] - ii[0]; ii++; 408 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 409 nonzerorow += (n>0); 410 for (j=0; j<n; j++) { 411 xb = x + 4*(*idx++); 412 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 413 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 414 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 415 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 416 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 417 v += 16; 418 } 419 if (usecprow) z = zarray + 4*ridx[i]; 420 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 421 if (!usecprow) z += 4; 422 } 423 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 424 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 425 ierr = PetscLogFlops(32*a->nz - 4*nonzerorow);CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatMult_SeqBAIJ_5" 431 PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 432 { 433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 434 PetscScalar sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*z = 0,*zarray; 435 const PetscScalar *xb,*x; 436 const MatScalar *v; 437 PetscErrorCode ierr; 438 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 439 PetscTruth usecprow=a->compressedrow.use; 440 441 PetscFunctionBegin; 442 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 443 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 444 445 idx = a->j; 446 v = a->a; 447 if (usecprow){ 448 mbs = a->compressedrow.nrows; 449 ii = a->compressedrow.i; 450 ridx = a->compressedrow.rindex; 451 } else { 452 mbs = a->mbs; 453 ii = a->i; 454 z = zarray; 455 } 456 457 for (i=0; i<mbs; i++) { 458 n = ii[1] - ii[0]; ii++; 459 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 460 nonzerorow += (n>0); 461 for (j=0; j<n; j++) { 462 xb = x + 5*(*idx++); 463 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 464 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 465 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 466 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 467 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 468 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 469 v += 25; 470 } 471 if (usecprow) z = zarray + 5*ridx[i]; 472 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 473 if (!usecprow) z += 5; 474 } 475 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 476 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 477 ierr = PetscLogFlops(50*a->nz - 5*nonzerorow);CHKERRQ(ierr); 478 PetscFunctionReturn(0); 479 } 480 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatMult_SeqBAIJ_6" 484 PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz) 485 { 486 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 487 PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6; 488 const PetscScalar *x,*xb; 489 PetscScalar x1,x2,x3,x4,x5,x6,*zarray; 490 const MatScalar *v; 491 PetscErrorCode ierr; 492 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 493 PetscTruth usecprow=a->compressedrow.use; 494 495 PetscFunctionBegin; 496 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 497 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 498 499 idx = a->j; 500 v = a->a; 501 if (usecprow){ 502 mbs = a->compressedrow.nrows; 503 ii = a->compressedrow.i; 504 ridx = a->compressedrow.rindex; 505 } else { 506 mbs = a->mbs; 507 ii = a->i; 508 z = zarray; 509 } 510 511 for (i=0; i<mbs; i++) { 512 n = ii[1] - ii[0]; ii++; 513 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; 514 nonzerorow += (n>0); 515 for (j=0; j<n; j++) { 516 xb = x + 6*(*idx++); 517 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 518 sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 519 sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 520 sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 521 sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 522 sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 523 sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 524 v += 36; 525 } 526 if (usecprow) z = zarray + 6*ridx[i]; 527 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; 528 if (!usecprow) z += 6; 529 } 530 531 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 532 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 533 ierr = PetscLogFlops(72*a->nz - 6*nonzerorow);CHKERRQ(ierr); 534 PetscFunctionReturn(0); 535 } 536 #undef __FUNCT__ 537 #define __FUNCT__ "MatMult_SeqBAIJ_7" 538 PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 539 { 540 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 541 PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 542 const PetscScalar *x,*xb; 543 PetscScalar x1,x2,x3,x4,x5,x6,x7,*zarray; 544 const MatScalar *v; 545 PetscErrorCode ierr; 546 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 547 PetscTruth usecprow=a->compressedrow.use; 548 549 PetscFunctionBegin; 550 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 551 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 552 553 idx = a->j; 554 v = a->a; 555 if (usecprow){ 556 mbs = a->compressedrow.nrows; 557 ii = a->compressedrow.i; 558 ridx = a->compressedrow.rindex; 559 } else { 560 mbs = a->mbs; 561 ii = a->i; 562 z = zarray; 563 } 564 565 for (i=0; i<mbs; i++) { 566 n = ii[1] - ii[0]; ii++; 567 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 568 nonzerorow += (n>0); 569 for (j=0; j<n; j++) { 570 xb = x + 7*(*idx++); 571 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 572 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 573 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 574 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 575 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 576 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 577 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 578 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 579 v += 49; 580 } 581 if (usecprow) z = zarray + 7*ridx[i]; 582 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 583 if (!usecprow) z += 7; 584 } 585 586 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 587 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 588 ierr = PetscLogFlops(98*a->nz - 7*nonzerorow);CHKERRQ(ierr); 589 PetscFunctionReturn(0); 590 } 591 592 /* 593 This will not work with MatScalar == float because it calls the BLAS 594 */ 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatMult_SeqBAIJ_N" 597 PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 598 { 599 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 600 PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray; 601 MatScalar *v; 602 PetscErrorCode ierr; 603 PetscInt mbs=a->mbs,i,*idx,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2; 604 PetscInt ncols,k,*ridx=PETSC_NULL,nonzerorow=0; 605 PetscTruth usecprow=a->compressedrow.use; 606 607 PetscFunctionBegin; 608 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 609 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 610 611 idx = a->j; 612 v = a->a; 613 if (usecprow){ 614 mbs = a->compressedrow.nrows; 615 ii = a->compressedrow.i; 616 ridx = a->compressedrow.rindex; 617 } else { 618 mbs = a->mbs; 619 ii = a->i; 620 z = zarray; 621 } 622 623 if (!a->mult_work) { 624 k = PetscMax(A->rmap.n,A->cmap.n); 625 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 626 } 627 work = a->mult_work; 628 for (i=0; i<mbs; i++) { 629 n = ii[1] - ii[0]; ii++; 630 ncols = n*bs; 631 workt = work; 632 nonzerorow += (n>0); 633 for (j=0; j<n; j++) { 634 xb = x + bs*(*idx++); 635 for (k=0; k<bs; k++) workt[k] = xb[k]; 636 workt += bs; 637 } 638 if (usecprow) z = zarray + bs*ridx[i]; 639 Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z); 640 /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */ 641 v += n*bs2; 642 if (!usecprow) z += bs; 643 } 644 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 645 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 646 ierr = PetscLogFlops(2*a->nz*bs2 - bs*nonzerorow);CHKERRQ(ierr); 647 PetscFunctionReturn(0); 648 } 649 650 extern PetscErrorCode VecCopy_Seq(Vec,Vec); 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1" 653 PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 654 { 655 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 656 PetscScalar *x,*y = 0,*z = 0,sum,*yarray,*zarray; 657 MatScalar *v; 658 PetscErrorCode ierr; 659 PetscInt mbs=a->mbs,i,*idx,*ii,n,*ridx=PETSC_NULL; 660 PetscTruth usecprow=a->compressedrow.use; 661 662 PetscFunctionBegin; 663 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 664 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 665 if (zz != yy) { 666 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 667 } else { 668 zarray = yarray; 669 } 670 671 idx = a->j; 672 v = a->a; 673 if (usecprow){ 674 if (zz != yy){ 675 ierr = PetscMemcpy(zarray,yarray,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 676 } 677 mbs = a->compressedrow.nrows; 678 ii = a->compressedrow.i; 679 ridx = a->compressedrow.rindex; 680 } else { 681 ii = a->i; 682 y = yarray; 683 z = zarray; 684 } 685 686 for (i=0; i<mbs; i++) { 687 n = ii[1] - ii[0]; ii++; 688 if (usecprow){ 689 z = zarray + ridx[i]; 690 y = yarray + ridx[i]; 691 } 692 sum = y[0]; 693 while (n--) sum += *v++ * x[*idx++]; 694 z[0] = sum; 695 if (!usecprow){ 696 z++; y++; 697 } 698 } 699 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 700 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 701 if (zz != yy) { 702 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 703 } 704 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2" 710 PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 711 { 712 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 713 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2; 714 PetscScalar x1,x2,*yarray,*zarray; 715 MatScalar *v; 716 PetscErrorCode ierr; 717 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 718 PetscTruth usecprow=a->compressedrow.use; 719 720 PetscFunctionBegin; 721 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 722 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 723 if (zz != yy) { 724 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 725 } else { 726 zarray = yarray; 727 } 728 729 idx = a->j; 730 v = a->a; 731 if (usecprow){ 732 if (zz != yy){ 733 ierr = PetscMemcpy(zarray,yarray,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 734 } 735 mbs = a->compressedrow.nrows; 736 ii = a->compressedrow.i; 737 ridx = a->compressedrow.rindex; 738 if (zz != yy){ 739 ierr = PetscMemcpy(zarray,yarray,a->mbs*sizeof(PetscScalar));CHKERRQ(ierr); 740 } 741 } else { 742 ii = a->i; 743 y = yarray; 744 z = zarray; 745 } 746 747 for (i=0; i<mbs; i++) { 748 n = ii[1] - ii[0]; ii++; 749 if (usecprow){ 750 z = zarray + 2*ridx[i]; 751 y = yarray + 2*ridx[i]; 752 } 753 sum1 = y[0]; sum2 = y[1]; 754 for (j=0; j<n; j++) { 755 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 756 sum1 += v[0]*x1 + v[2]*x2; 757 sum2 += v[1]*x1 + v[3]*x2; 758 v += 4; 759 } 760 z[0] = sum1; z[1] = sum2; 761 if (!usecprow){ 762 z += 2; y += 2; 763 } 764 } 765 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 766 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 767 if (zz != yy) { 768 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 769 } 770 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 771 PetscFunctionReturn(0); 772 } 773 774 #undef __FUNCT__ 775 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3" 776 PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 777 { 778 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 779 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray; 780 MatScalar *v; 781 PetscErrorCode ierr; 782 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 783 PetscTruth usecprow=a->compressedrow.use; 784 785 PetscFunctionBegin; 786 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 787 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 788 if (zz != yy) { 789 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 790 } else { 791 zarray = yarray; 792 } 793 794 idx = a->j; 795 v = a->a; 796 if (usecprow){ 797 if (zz != yy){ 798 ierr = PetscMemcpy(zarray,yarray,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 799 } 800 mbs = a->compressedrow.nrows; 801 ii = a->compressedrow.i; 802 ridx = a->compressedrow.rindex; 803 } else { 804 ii = a->i; 805 y = yarray; 806 z = zarray; 807 } 808 809 for (i=0; i<mbs; i++) { 810 n = ii[1] - ii[0]; ii++; 811 if (usecprow){ 812 z = zarray + 3*ridx[i]; 813 y = yarray + 3*ridx[i]; 814 } 815 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 816 for (j=0; j<n; j++) { 817 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 818 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 819 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 820 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 821 v += 9; 822 } 823 z[0] = sum1; z[1] = sum2; z[2] = sum3; 824 if (!usecprow){ 825 z += 3; y += 3; 826 } 827 } 828 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 829 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 830 if (zz != yy) { 831 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 832 } 833 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 834 PetscFunctionReturn(0); 835 } 836 837 #undef __FUNCT__ 838 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4" 839 PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 840 { 841 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 842 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray; 843 MatScalar *v; 844 PetscErrorCode ierr; 845 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 846 PetscTruth usecprow=a->compressedrow.use; 847 848 PetscFunctionBegin; 849 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 850 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 851 if (zz != yy) { 852 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 853 } else { 854 zarray = yarray; 855 } 856 857 idx = a->j; 858 v = a->a; 859 if (usecprow){ 860 if (zz != yy){ 861 ierr = PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 862 } 863 mbs = a->compressedrow.nrows; 864 ii = a->compressedrow.i; 865 ridx = a->compressedrow.rindex; 866 } else { 867 ii = a->i; 868 y = yarray; 869 z = zarray; 870 } 871 872 for (i=0; i<mbs; i++) { 873 n = ii[1] - ii[0]; ii++; 874 if (usecprow){ 875 z = zarray + 4*ridx[i]; 876 y = yarray + 4*ridx[i]; 877 } 878 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 879 for (j=0; j<n; j++) { 880 xb = x + 4*(*idx++); 881 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 882 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 883 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 884 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 885 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 886 v += 16; 887 } 888 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 889 if (!usecprow){ 890 z += 4; y += 4; 891 } 892 } 893 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 894 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 895 if (zz != yy) { 896 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 897 } 898 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5" 904 PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 905 { 906 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 907 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5; 908 PetscScalar *yarray,*zarray; 909 MatScalar *v; 910 PetscErrorCode ierr; 911 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 912 PetscTruth usecprow=a->compressedrow.use; 913 914 PetscFunctionBegin; 915 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 916 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 917 if (zz != yy) { 918 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 919 } else { 920 zarray = yarray; 921 } 922 923 idx = a->j; 924 v = a->a; 925 if (usecprow){ 926 if (zz != yy){ 927 ierr = PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 928 } 929 mbs = a->compressedrow.nrows; 930 ii = a->compressedrow.i; 931 ridx = a->compressedrow.rindex; 932 } else { 933 ii = a->i; 934 y = yarray; 935 z = zarray; 936 } 937 938 for (i=0; i<mbs; i++) { 939 n = ii[1] - ii[0]; ii++; 940 if (usecprow){ 941 z = zarray + 5*ridx[i]; 942 y = yarray + 5*ridx[i]; 943 } 944 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 945 for (j=0; j<n; j++) { 946 xb = x + 5*(*idx++); 947 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 948 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 949 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 950 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 951 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 952 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 953 v += 25; 954 } 955 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 956 if (!usecprow){ 957 z += 5; y += 5; 958 } 959 } 960 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 961 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 962 if (zz != yy) { 963 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 964 } 965 ierr = PetscLogFlops(50*a->nz);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6" 970 PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 971 { 972 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 973 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6; 974 PetscScalar x1,x2,x3,x4,x5,x6,*yarray,*zarray; 975 MatScalar *v; 976 PetscErrorCode ierr; 977 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 978 PetscTruth usecprow=a->compressedrow.use; 979 980 PetscFunctionBegin; 981 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 982 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 983 if (zz != yy) { 984 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 985 } else { 986 zarray = yarray; 987 } 988 989 idx = a->j; 990 v = a->a; 991 if (usecprow){ 992 if (zz != yy){ 993 ierr = PetscMemcpy(zarray,yarray,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 994 } 995 mbs = a->compressedrow.nrows; 996 ii = a->compressedrow.i; 997 ridx = a->compressedrow.rindex; 998 } else { 999 ii = a->i; 1000 y = yarray; 1001 z = zarray; 1002 } 1003 1004 for (i=0; i<mbs; i++) { 1005 n = ii[1] - ii[0]; ii++; 1006 if (usecprow){ 1007 z = zarray + 6*ridx[i]; 1008 y = yarray + 6*ridx[i]; 1009 } 1010 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; 1011 for (j=0; j<n; j++) { 1012 xb = x + 6*(*idx++); 1013 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 1014 sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 1015 sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 1016 sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 1017 sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 1018 sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 1019 sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 1020 v += 36; 1021 } 1022 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; 1023 if (!usecprow){ 1024 z += 6; y += 6; 1025 } 1026 } 1027 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1028 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 1029 if (zz != yy) { 1030 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1031 } 1032 ierr = PetscLogFlops(72*a->nz);CHKERRQ(ierr); 1033 PetscFunctionReturn(0); 1034 } 1035 1036 #undef __FUNCT__ 1037 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7" 1038 PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1039 { 1040 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1041 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1042 PetscScalar x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray; 1043 MatScalar *v; 1044 PetscErrorCode ierr; 1045 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 1046 PetscTruth usecprow=a->compressedrow.use; 1047 1048 PetscFunctionBegin; 1049 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1050 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 1051 if (zz != yy) { 1052 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 1053 } else { 1054 zarray = yarray; 1055 } 1056 1057 idx = a->j; 1058 v = a->a; 1059 if (usecprow){ 1060 if (zz != yy){ 1061 ierr = PetscMemcpy(zarray,yarray,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1062 } 1063 mbs = a->compressedrow.nrows; 1064 ii = a->compressedrow.i; 1065 ridx = a->compressedrow.rindex; 1066 } else { 1067 ii = a->i; 1068 y = yarray; 1069 z = zarray; 1070 } 1071 1072 for (i=0; i<mbs; i++) { 1073 n = ii[1] - ii[0]; ii++; 1074 if (usecprow){ 1075 z = zarray + 7*ridx[i]; 1076 y = yarray + 7*ridx[i]; 1077 } 1078 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1079 for (j=0; j<n; j++) { 1080 xb = x + 7*(*idx++); 1081 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1082 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1083 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1084 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1085 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1086 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1087 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1088 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1089 v += 49; 1090 } 1091 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1092 if (!usecprow){ 1093 z += 7; y += 7; 1094 } 1095 } 1096 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1097 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 1098 if (zz != yy) { 1099 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1100 } 1101 ierr = PetscLogFlops(98*a->nz);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N" 1107 PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1108 { 1109 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1110 PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray; 1111 MatScalar *v; 1112 PetscErrorCode ierr; 1113 PetscInt mbs,i,*idx,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2; 1114 PetscInt ncols,k,*ridx=PETSC_NULL; 1115 PetscTruth usecprow=a->compressedrow.use; 1116 1117 PetscFunctionBegin; 1118 ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 1119 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1120 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 1121 1122 idx = a->j; 1123 v = a->a; 1124 if (usecprow){ 1125 mbs = a->compressedrow.nrows; 1126 ii = a->compressedrow.i; 1127 ridx = a->compressedrow.rindex; 1128 } else { 1129 mbs = a->mbs; 1130 ii = a->i; 1131 z = zarray; 1132 } 1133 1134 if (!a->mult_work) { 1135 k = PetscMax(A->rmap.n,A->cmap.n); 1136 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1137 } 1138 work = a->mult_work; 1139 for (i=0; i<mbs; i++) { 1140 n = ii[1] - ii[0]; ii++; 1141 ncols = n*bs; 1142 workt = work; 1143 for (j=0; j<n; j++) { 1144 xb = x + bs*(*idx++); 1145 for (k=0; k<bs; k++) workt[k] = xb[k]; 1146 workt += bs; 1147 } 1148 if (usecprow) z = zarray + bs*ridx[i]; 1149 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 1150 /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */ 1151 v += n*bs2; 1152 if (!usecprow){ 1153 z += bs; 1154 } 1155 } 1156 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1157 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1158 ierr = PetscLogFlops(2*a->nz*bs2);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatMultTranspose_SeqBAIJ" 1164 PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz) 1165 { 1166 PetscScalar zero = 0.0; 1167 PetscErrorCode ierr; 1168 1169 PetscFunctionBegin; 1170 ierr = VecSet(zz,zero);CHKERRQ(ierr); 1171 ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ" 1177 PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1178 1179 { 1180 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1181 PetscScalar *zb,*x,*z,*xb = 0,x1,x2,x3,x4,x5; 1182 MatScalar *v; 1183 PetscErrorCode ierr; 1184 PetscInt mbs,i,*idx,*ii,rval,bs=A->rmap.bs,j,n,bs2=a->bs2,*ib,*ridx=PETSC_NULL; 1185 Mat_CompressedRow cprow = a->compressedrow; 1186 PetscTruth usecprow=cprow.use; 1187 1188 PetscFunctionBegin; 1189 if (yy != zz) { ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); } 1190 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1191 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 1192 1193 idx = a->j; 1194 v = a->a; 1195 if (usecprow){ 1196 mbs = cprow.nrows; 1197 ii = cprow.i; 1198 ridx = cprow.rindex; 1199 } else { 1200 mbs=a->mbs; 1201 ii = a->i; 1202 xb = x; 1203 } 1204 1205 switch (bs) { 1206 case 1: 1207 for (i=0; i<mbs; i++) { 1208 if (usecprow) xb = x + ridx[i]; 1209 x1 = xb[0]; 1210 ib = idx + ii[0]; 1211 n = ii[1] - ii[0]; ii++; 1212 for (j=0; j<n; j++) { 1213 rval = ib[j]; 1214 z[rval] += *v * x1; 1215 v++; 1216 } 1217 if (!usecprow) xb++; 1218 } 1219 break; 1220 case 2: 1221 for (i=0; i<mbs; i++) { 1222 if (usecprow) xb = x + 2*ridx[i]; 1223 x1 = xb[0]; x2 = xb[1]; 1224 ib = idx + ii[0]; 1225 n = ii[1] - ii[0]; ii++; 1226 for (j=0; j<n; j++) { 1227 rval = ib[j]*2; 1228 z[rval++] += v[0]*x1 + v[1]*x2; 1229 z[rval++] += v[2]*x1 + v[3]*x2; 1230 v += 4; 1231 } 1232 if (!usecprow) xb += 2; 1233 } 1234 break; 1235 case 3: 1236 for (i=0; i<mbs; i++) { 1237 if (usecprow) xb = x + 3*ridx[i]; 1238 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1239 ib = idx + ii[0]; 1240 n = ii[1] - ii[0]; ii++; 1241 for (j=0; j<n; j++) { 1242 rval = ib[j]*3; 1243 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1244 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1245 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1246 v += 9; 1247 } 1248 if (!usecprow) xb += 3; 1249 } 1250 break; 1251 case 4: 1252 for (i=0; i<mbs; i++) { 1253 if (usecprow) xb = x + 4*ridx[i]; 1254 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1255 ib = idx + ii[0]; 1256 n = ii[1] - ii[0]; ii++; 1257 for (j=0; j<n; j++) { 1258 rval = ib[j]*4; 1259 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1260 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1261 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1262 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1263 v += 16; 1264 } 1265 if (!usecprow) xb += 4; 1266 } 1267 break; 1268 case 5: 1269 for (i=0; i<mbs; i++) { 1270 if (usecprow) xb = x + 5*ridx[i]; 1271 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1272 x4 = xb[3]; x5 = xb[4]; 1273 ib = idx + ii[0]; 1274 n = ii[1] - ii[0]; ii++; 1275 for (j=0; j<n; j++) { 1276 rval = ib[j]*5; 1277 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1278 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1279 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1280 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1281 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1282 v += 25; 1283 } 1284 if (!usecprow) xb += 5; 1285 } 1286 break; 1287 default: { /* block sizes larger then 5 by 5 are handled by BLAS */ 1288 PetscInt ncols,k; 1289 PetscScalar *work,*workt,*xtmp; 1290 1291 if (!a->mult_work) { 1292 k = PetscMax(A->rmap.n,A->cmap.n); 1293 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1294 } 1295 work = a->mult_work; 1296 xtmp = x; 1297 for (i=0; i<mbs; i++) { 1298 n = ii[1] - ii[0]; ii++; 1299 ncols = n*bs; 1300 ierr = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 1301 if (usecprow) { 1302 xtmp = x + bs*ridx[i]; 1303 } 1304 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work); 1305 /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */ 1306 v += n*bs2; 1307 if (!usecprow) xtmp += bs; 1308 workt = work; 1309 for (j=0; j<n; j++) { 1310 zb = z + bs*(*idx++); 1311 for (k=0; k<bs; k++) zb[k] += workt[k] ; 1312 workt += bs; 1313 } 1314 } 1315 } 1316 } 1317 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1318 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1319 ierr = PetscLogFlops(2*a->nz*a->bs2);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatScale_SeqBAIJ" 1325 PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha) 1326 { 1327 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1328 PetscInt totalnz = a->bs2*a->nz; 1329 PetscScalar oalpha = alpha; 1330 PetscErrorCode ierr; 1331 PetscBLASInt one = 1,tnz = PetscBLASIntCast(totalnz); 1332 1333 PetscFunctionBegin; 1334 BLASscal_(&tnz,&oalpha,a->a,&one); 1335 ierr = PetscLogFlops(totalnz);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatNorm_SeqBAIJ" 1341 PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm) 1342 { 1343 PetscErrorCode ierr; 1344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1345 MatScalar *v = a->a; 1346 PetscReal sum = 0.0; 1347 PetscInt i,j,k,bs=A->rmap.bs,nz=a->nz,bs2=a->bs2,k1; 1348 1349 PetscFunctionBegin; 1350 if (type == NORM_FROBENIUS) { 1351 for (i=0; i< bs2*nz; i++) { 1352 #if defined(PETSC_USE_COMPLEX) 1353 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1354 #else 1355 sum += (*v)*(*v); v++; 1356 #endif 1357 } 1358 *norm = sqrt(sum); 1359 } else if (type == NORM_1) { /* maximum column sum */ 1360 PetscReal *tmp; 1361 PetscInt *bcol = a->j; 1362 ierr = PetscMalloc((A->cmap.n+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1363 ierr = PetscMemzero(tmp,A->cmap.n*sizeof(PetscReal));CHKERRQ(ierr); 1364 for (i=0; i<nz; i++){ 1365 for (j=0; j<bs; j++){ 1366 k1 = bs*(*bcol) + j; /* column index */ 1367 for (k=0; k<bs; k++){ 1368 tmp[k1] += PetscAbsScalar(*v); v++; 1369 } 1370 } 1371 bcol++; 1372 } 1373 *norm = 0.0; 1374 for (j=0; j<A->cmap.n; j++) { 1375 if (tmp[j] > *norm) *norm = tmp[j]; 1376 } 1377 ierr = PetscFree(tmp);CHKERRQ(ierr); 1378 } else if (type == NORM_INFINITY) { /* maximum row sum */ 1379 *norm = 0.0; 1380 for (k=0; k<bs; k++) { 1381 for (j=0; j<a->mbs; j++) { 1382 v = a->a + bs2*a->i[j] + k; 1383 sum = 0.0; 1384 for (i=0; i<a->i[j+1]-a->i[j]; i++) { 1385 for (k1=0; k1<bs; k1++){ 1386 sum += PetscAbsScalar(*v); 1387 v += bs; 1388 } 1389 } 1390 if (sum > *norm) *norm = sum; 1391 } 1392 } 1393 } else { 1394 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 1395 } 1396 PetscFunctionReturn(0); 1397 } 1398 1399 1400 #undef __FUNCT__ 1401 #define __FUNCT__ "MatEqual_SeqBAIJ" 1402 PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg) 1403 { 1404 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data; 1405 PetscErrorCode ierr; 1406 1407 PetscFunctionBegin; 1408 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1409 if ((A->rmap.N != B->rmap.N) || (A->cmap.n != B->cmap.n) || (A->rmap.bs != B->rmap.bs)|| (a->nz != b->nz)) { 1410 *flg = PETSC_FALSE; 1411 PetscFunctionReturn(0); 1412 } 1413 1414 /* if the a->i are the same */ 1415 ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1416 if (!*flg) { 1417 PetscFunctionReturn(0); 1418 } 1419 1420 /* if a->j are the same */ 1421 ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1422 if (!*flg) { 1423 PetscFunctionReturn(0); 1424 } 1425 /* if a->a are the same */ 1426 ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap.bs)*(B->rmap.bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 1427 PetscFunctionReturn(0); 1428 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ" 1433 PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1434 { 1435 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1436 PetscErrorCode ierr; 1437 PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1438 PetscScalar *x,zero = 0.0; 1439 MatScalar *aa,*aa_j; 1440 1441 PetscFunctionBegin; 1442 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1443 bs = A->rmap.bs; 1444 aa = a->a; 1445 ai = a->i; 1446 aj = a->j; 1447 ambs = a->mbs; 1448 bs2 = a->bs2; 1449 1450 ierr = VecSet(v,zero);CHKERRQ(ierr); 1451 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1452 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1453 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1454 for (i=0; i<ambs; i++) { 1455 for (j=ai[i]; j<ai[i+1]; j++) { 1456 if (aj[j] == i) { 1457 row = i*bs; 1458 aa_j = aa+j*bs2; 1459 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1460 break; 1461 } 1462 } 1463 } 1464 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ" 1470 PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1471 { 1472 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1473 PetscScalar *l,*r,x,*li,*ri; 1474 MatScalar *aa,*v; 1475 PetscErrorCode ierr; 1476 PetscInt i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1477 1478 PetscFunctionBegin; 1479 ai = a->i; 1480 aj = a->j; 1481 aa = a->a; 1482 m = A->rmap.n; 1483 n = A->cmap.n; 1484 bs = A->rmap.bs; 1485 mbs = a->mbs; 1486 bs2 = a->bs2; 1487 if (ll) { 1488 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 1489 ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1490 if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 1491 for (i=0; i<mbs; i++) { /* for each block row */ 1492 M = ai[i+1] - ai[i]; 1493 li = l + i*bs; 1494 v = aa + bs2*ai[i]; 1495 for (j=0; j<M; j++) { /* for each block */ 1496 for (k=0; k<bs2; k++) { 1497 (*v++) *= li[k%bs]; 1498 } 1499 } 1500 } 1501 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1502 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1503 } 1504 1505 if (rr) { 1506 ierr = VecGetArray(rr,&r);CHKERRQ(ierr); 1507 ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1508 if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 1509 for (i=0; i<mbs; i++) { /* for each block row */ 1510 M = ai[i+1] - ai[i]; 1511 v = aa + bs2*ai[i]; 1512 for (j=0; j<M; j++) { /* for each block */ 1513 ri = r + bs*aj[ai[i]+j]; 1514 for (k=0; k<bs; k++) { 1515 x = ri[k]; 1516 for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 1517 } 1518 } 1519 } 1520 ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 1521 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1522 } 1523 PetscFunctionReturn(0); 1524 } 1525 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "MatGetInfo_SeqBAIJ" 1529 PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1530 { 1531 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1532 1533 PetscFunctionBegin; 1534 info->rows_global = (double)A->rmap.N; 1535 info->columns_global = (double)A->cmap.n; 1536 info->rows_local = (double)A->rmap.n; 1537 info->columns_local = (double)A->cmap.n; 1538 info->block_size = a->bs2; 1539 info->nz_allocated = a->maxnz; 1540 info->nz_used = a->bs2*a->nz; 1541 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1542 info->assemblies = A->num_ass; 1543 info->mallocs = a->reallocs; 1544 info->memory = ((PetscObject)A)->mem; 1545 if (A->factor) { 1546 info->fill_ratio_given = A->info.fill_ratio_given; 1547 info->fill_ratio_needed = A->info.fill_ratio_needed; 1548 info->factor_mallocs = A->info.factor_mallocs; 1549 } else { 1550 info->fill_ratio_given = 0; 1551 info->fill_ratio_needed = 0; 1552 info->factor_mallocs = 0; 1553 } 1554 PetscFunctionReturn(0); 1555 } 1556 1557 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatZeroEntries_SeqBAIJ" 1560 PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A) 1561 { 1562 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1563 PetscErrorCode ierr; 1564 1565 PetscFunctionBegin; 1566 ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 1567 PetscFunctionReturn(0); 1568 } 1569