1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/seq/baij.h" 4 #include "src/inline/spops.h" 5 #include "src/inline/ilu.h" 6 #include "petscbt.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ" 10 PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov) 11 { 12 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 13 PetscErrorCode ierr; 14 PetscInt row,i,j,k,l,m,n,*nidx,isz,val,ival; 15 const PetscInt *idx; 16 PetscInt start,end,*ai,*aj,bs,*nidx2; 17 PetscBT table; 18 19 PetscFunctionBegin; 20 m = a->mbs; 21 ai = a->i; 22 aj = a->j; 23 bs = A->rmap->bs; 24 25 if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); 26 27 ierr = PetscBTCreate(m,table);CHKERRQ(ierr); 28 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr); 29 ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr); 30 31 for (i=0; i<is_max; i++) { 32 /* Initialise the two local arrays */ 33 isz = 0; 34 ierr = PetscBTMemzero(m,table);CHKERRQ(ierr); 35 36 /* Extract the indices, assume there can be duplicate entries */ 37 ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 38 ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr); 39 40 /* Enter these into the temp arrays i.e mark table[row], enter row into new index */ 41 for (j=0; j<n ; ++j){ 42 ival = idx[j]/bs; /* convert the indices into block indices */ 43 if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 44 if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;} 45 } 46 ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 47 ierr = ISDestroy(is[i]);CHKERRQ(ierr); 48 49 k = 0; 50 for (j=0; j<ov; j++){ /* for each overlap*/ 51 n = isz; 52 for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */ 53 row = nidx[k]; 54 start = ai[row]; 55 end = ai[row+1]; 56 for (l = start; l<end ; l++){ 57 val = aj[l]; 58 if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;} 59 } 60 } 61 } 62 /* expand the Index Set */ 63 for (j=0; j<isz; j++) { 64 for (k=0; k<bs; k++) 65 nidx2[j*bs+k] = nidx[j]*bs+k; 66 } 67 ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr); 68 } 69 ierr = PetscBTDestroy(table);CHKERRQ(ierr); 70 ierr = PetscFree(nidx);CHKERRQ(ierr); 71 ierr = PetscFree(nidx2);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private" 77 PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 78 { 79 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*c; 80 PetscErrorCode ierr; 81 PetscInt *smap,i,k,kstart,kend,oldcols = a->nbs,*lens; 82 PetscInt row,mat_i,*mat_j,tcol,*mat_ilen; 83 const PetscInt *irow,*icol; 84 PetscInt nrows,ncols,*ssmap,bs=A->rmap->bs,bs2=a->bs2; 85 PetscInt *aj = a->j,*ai = a->i; 86 MatScalar *mat_a; 87 Mat C; 88 PetscTruth flag; 89 90 PetscFunctionBegin; 91 ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 92 if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 93 94 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 95 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 96 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 97 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 98 99 ierr = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr); 100 ssmap = smap; 101 ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 102 ierr = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr); 103 for (i=0; i<ncols; i++) smap[icol[i]] = i+1; 104 /* determine lens of each row */ 105 for (i=0; i<nrows; i++) { 106 kstart = ai[irow[i]]; 107 kend = kstart + a->ilen[irow[i]]; 108 lens[i] = 0; 109 for (k=kstart; k<kend; k++) { 110 if (ssmap[aj[k]]) { 111 lens[i]++; 112 } 113 } 114 } 115 /* Create and fill new matrix */ 116 if (scall == MAT_REUSE_MATRIX) { 117 c = (Mat_SeqBAIJ *)((*B)->data); 118 119 if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 120 ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); 121 if (!flag) { 122 SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 123 } 124 ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr); 125 C = *B; 126 } else { 127 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 128 ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 129 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 130 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,lens);CHKERRQ(ierr); 131 } 132 c = (Mat_SeqBAIJ *)(C->data); 133 for (i=0; i<nrows; i++) { 134 row = irow[i]; 135 kstart = ai[row]; 136 kend = kstart + a->ilen[row]; 137 mat_i = c->i[i]; 138 mat_j = c->j + mat_i; 139 mat_a = c->a + mat_i*bs2; 140 mat_ilen = c->ilen + i; 141 for (k=kstart; k<kend; k++) { 142 if ((tcol=ssmap[a->j[k]])) { 143 *mat_j++ = tcol - 1; 144 ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 145 mat_a += bs2; 146 (*mat_ilen)++; 147 } 148 } 149 } 150 151 /* Free work space */ 152 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 153 ierr = PetscFree(smap);CHKERRQ(ierr); 154 ierr = PetscFree(lens);CHKERRQ(ierr); 155 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 156 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 157 158 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 159 *B = C; 160 PetscFunctionReturn(0); 161 } 162 163 #undef __FUNCT__ 164 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ" 165 PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 166 { 167 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 168 IS is1,is2; 169 PetscErrorCode ierr; 170 PetscInt *vary,*iary,nrows,ncols,i,bs=A->rmap->bs,count; 171 const PetscInt *irow,*icol; 172 173 PetscFunctionBegin; 174 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 175 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 176 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 177 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 178 179 /* Verify if the indices corespond to each element in a block 180 and form the IS with compressed IS */ 181 ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr); 182 iary = vary + a->mbs; 183 ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 184 for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 185 count = 0; 186 for (i=0; i<a->mbs; i++) { 187 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Index set does not match blocks"); 188 if (vary[i]==bs) iary[count++] = i; 189 } 190 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 191 192 ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 193 for (i=0; i<ncols; i++) vary[icol[i]/bs]++; 194 count = 0; 195 for (i=0; i<a->mbs; i++) { 196 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_PLIB,"Internal error in PETSc"); 197 if (vary[i]==bs) iary[count++] = i; 198 } 199 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr); 200 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 201 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 202 ierr = PetscFree(vary);CHKERRQ(ierr); 203 204 ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,cs,scall,B);CHKERRQ(ierr); 205 ISDestroy(is1); 206 ISDestroy(is2); 207 PetscFunctionReturn(0); 208 } 209 210 #undef __FUNCT__ 211 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ" 212 PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 213 { 214 PetscErrorCode ierr; 215 PetscInt i; 216 217 PetscFunctionBegin; 218 if (scall == MAT_INITIAL_MATRIX) { 219 ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 220 } 221 222 for (i=0; i<n; i++) { 223 ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 224 } 225 PetscFunctionReturn(0); 226 } 227 228 229 /* -------------------------------------------------------*/ 230 /* Should check that shapes of vectors and matrices match */ 231 /* -------------------------------------------------------*/ 232 #include "petscblaslapack.h" 233 234 #undef __FUNCT__ 235 #define __FUNCT__ "MatMult_SeqBAIJ_1" 236 PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 237 { 238 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 239 PetscScalar *z,sum; 240 const PetscScalar *x; 241 const MatScalar *v; 242 PetscErrorCode ierr; 243 PetscInt mbs,i,*idx,*ii,n,*ridx=PETSC_NULL,nonzerorow=0; 244 PetscTruth usecprow=a->compressedrow.use; 245 246 PetscFunctionBegin; 247 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 248 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 249 250 idx = a->j; 251 v = a->a; 252 if (usecprow){ 253 mbs = a->compressedrow.nrows; 254 ii = a->compressedrow.i; 255 ridx = a->compressedrow.rindex; 256 } else { 257 mbs = a->mbs; 258 ii = a->i; 259 } 260 261 for (i=0; i<mbs; i++) { 262 n = ii[1] - ii[0]; ii++; 263 sum = 0.0; 264 nonzerorow += (n>0); 265 while (n--) sum += *v++ * x[*idx++]; 266 if (usecprow){ 267 z[ridx[i]] = sum; 268 } else { 269 z[i] = sum; 270 } 271 } 272 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 273 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 274 ierr = PetscLogFlops(2*a->nz - nonzerorow);CHKERRQ(ierr); 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatMult_SeqBAIJ_2" 280 PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 281 { 282 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 283 PetscScalar *z = 0,sum1,sum2,*zarray; 284 const PetscScalar *x,*xb; 285 PetscScalar x1,x2; 286 const MatScalar *v; 287 PetscErrorCode ierr; 288 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 289 PetscTruth usecprow=a->compressedrow.use; 290 291 PetscFunctionBegin; 292 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 293 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 294 295 idx = a->j; 296 v = a->a; 297 if (usecprow){ 298 mbs = a->compressedrow.nrows; 299 ii = a->compressedrow.i; 300 ridx = a->compressedrow.rindex; 301 } else { 302 mbs = a->mbs; 303 ii = a->i; 304 z = zarray; 305 } 306 307 for (i=0; i<mbs; i++) { 308 n = ii[1] - ii[0]; ii++; 309 sum1 = 0.0; sum2 = 0.0; 310 nonzerorow += (n>0); 311 for (j=0; j<n; j++) { 312 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 313 sum1 += v[0]*x1 + v[2]*x2; 314 sum2 += v[1]*x1 + v[3]*x2; 315 v += 4; 316 } 317 if (usecprow) z = zarray + 2*ridx[i]; 318 z[0] = sum1; z[1] = sum2; 319 if (!usecprow) z += 2; 320 } 321 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 322 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 323 ierr = PetscLogFlops(8*a->nz - 2*nonzerorow);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatMult_SeqBAIJ_3" 329 PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 330 { 331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 332 PetscScalar *z = 0,sum1,sum2,sum3,x1,x2,x3,*zarray; 333 const PetscScalar *x,*xb; 334 const MatScalar *v; 335 PetscErrorCode ierr; 336 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 337 PetscTruth usecprow=a->compressedrow.use; 338 339 340 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 341 #pragma disjoint(*v,*z,*xb) 342 #endif 343 344 PetscFunctionBegin; 345 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 346 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 347 348 idx = a->j; 349 v = a->a; 350 if (usecprow){ 351 mbs = a->compressedrow.nrows; 352 ii = a->compressedrow.i; 353 ridx = a->compressedrow.rindex; 354 } else { 355 mbs = a->mbs; 356 ii = a->i; 357 z = zarray; 358 } 359 360 for (i=0; i<mbs; i++) { 361 n = ii[1] - ii[0]; ii++; 362 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 363 nonzerorow += (n>0); 364 for (j=0; j<n; j++) { 365 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 366 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 367 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 368 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 369 v += 9; 370 } 371 if (usecprow) z = zarray + 3*ridx[i]; 372 z[0] = sum1; z[1] = sum2; z[2] = sum3; 373 if (!usecprow) z += 3; 374 } 375 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 376 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 377 ierr = PetscLogFlops(18*a->nz - 3*nonzerorow);CHKERRQ(ierr); 378 PetscFunctionReturn(0); 379 } 380 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatMult_SeqBAIJ_4" 383 PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 384 { 385 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 386 PetscScalar *z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray; 387 const PetscScalar *x,*xb; 388 const MatScalar *v; 389 PetscErrorCode ierr; 390 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 391 PetscTruth usecprow=a->compressedrow.use; 392 393 PetscFunctionBegin; 394 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 395 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 396 397 idx = a->j; 398 v = a->a; 399 if (usecprow){ 400 mbs = a->compressedrow.nrows; 401 ii = a->compressedrow.i; 402 ridx = a->compressedrow.rindex; 403 } else { 404 mbs = a->mbs; 405 ii = a->i; 406 z = zarray; 407 } 408 409 for (i=0; i<mbs; i++) { 410 n = ii[1] - ii[0]; ii++; 411 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 412 nonzerorow += (n>0); 413 for (j=0; j<n; j++) { 414 xb = x + 4*(*idx++); 415 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 416 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 417 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 418 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 419 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 420 v += 16; 421 } 422 if (usecprow) z = zarray + 4*ridx[i]; 423 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 424 if (!usecprow) z += 4; 425 } 426 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 427 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 428 ierr = PetscLogFlops(32*a->nz - 4*nonzerorow);CHKERRQ(ierr); 429 PetscFunctionReturn(0); 430 } 431 432 #undef __FUNCT__ 433 #define __FUNCT__ "MatMult_SeqBAIJ_5" 434 PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 435 { 436 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 437 PetscScalar sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*z = 0,*zarray; 438 const PetscScalar *xb,*x; 439 const MatScalar *v; 440 PetscErrorCode ierr; 441 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 442 PetscTruth usecprow=a->compressedrow.use; 443 444 PetscFunctionBegin; 445 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 446 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 447 448 idx = a->j; 449 v = a->a; 450 if (usecprow){ 451 mbs = a->compressedrow.nrows; 452 ii = a->compressedrow.i; 453 ridx = a->compressedrow.rindex; 454 } else { 455 mbs = a->mbs; 456 ii = a->i; 457 z = zarray; 458 } 459 460 for (i=0; i<mbs; i++) { 461 n = ii[1] - ii[0]; ii++; 462 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 463 nonzerorow += (n>0); 464 for (j=0; j<n; j++) { 465 xb = x + 5*(*idx++); 466 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 467 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 468 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 469 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 470 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 471 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 472 v += 25; 473 } 474 if (usecprow) z = zarray + 5*ridx[i]; 475 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 476 if (!usecprow) z += 5; 477 } 478 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 479 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 480 ierr = PetscLogFlops(50*a->nz - 5*nonzerorow);CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "MatMult_SeqBAIJ_6" 487 PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz) 488 { 489 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 490 PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6; 491 const PetscScalar *x,*xb; 492 PetscScalar x1,x2,x3,x4,x5,x6,*zarray; 493 const MatScalar *v; 494 PetscErrorCode ierr; 495 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 496 PetscTruth usecprow=a->compressedrow.use; 497 498 PetscFunctionBegin; 499 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 500 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 501 502 idx = a->j; 503 v = a->a; 504 if (usecprow){ 505 mbs = a->compressedrow.nrows; 506 ii = a->compressedrow.i; 507 ridx = a->compressedrow.rindex; 508 } else { 509 mbs = a->mbs; 510 ii = a->i; 511 z = zarray; 512 } 513 514 for (i=0; i<mbs; i++) { 515 n = ii[1] - ii[0]; ii++; 516 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; 517 nonzerorow += (n>0); 518 for (j=0; j<n; j++) { 519 xb = x + 6*(*idx++); 520 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 521 sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 522 sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 523 sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 524 sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 525 sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 526 sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 527 v += 36; 528 } 529 if (usecprow) z = zarray + 6*ridx[i]; 530 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; 531 if (!usecprow) z += 6; 532 } 533 534 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 535 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 536 ierr = PetscLogFlops(72*a->nz - 6*nonzerorow);CHKERRQ(ierr); 537 PetscFunctionReturn(0); 538 } 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatMult_SeqBAIJ_7" 541 PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 542 { 543 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 544 PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 545 const PetscScalar *x,*xb; 546 PetscScalar x1,x2,x3,x4,x5,x6,x7,*zarray; 547 const MatScalar *v; 548 PetscErrorCode ierr; 549 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 550 PetscTruth usecprow=a->compressedrow.use; 551 552 PetscFunctionBegin; 553 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 554 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 555 556 idx = a->j; 557 v = a->a; 558 if (usecprow){ 559 mbs = a->compressedrow.nrows; 560 ii = a->compressedrow.i; 561 ridx = a->compressedrow.rindex; 562 } else { 563 mbs = a->mbs; 564 ii = a->i; 565 z = zarray; 566 } 567 568 for (i=0; i<mbs; i++) { 569 n = ii[1] - ii[0]; ii++; 570 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 571 nonzerorow += (n>0); 572 for (j=0; j<n; j++) { 573 xb = x + 7*(*idx++); 574 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 575 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 576 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 577 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 578 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 579 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 580 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 581 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 582 v += 49; 583 } 584 if (usecprow) z = zarray + 7*ridx[i]; 585 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 586 if (!usecprow) z += 7; 587 } 588 589 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 590 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 591 ierr = PetscLogFlops(98*a->nz - 7*nonzerorow);CHKERRQ(ierr); 592 PetscFunctionReturn(0); 593 } 594 595 /* 596 This will not work with MatScalar == float because it calls the BLAS 597 */ 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatMult_SeqBAIJ_N" 600 PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 601 { 602 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 603 PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray; 604 MatScalar *v; 605 PetscErrorCode ierr; 606 PetscInt mbs=a->mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2; 607 PetscInt ncols,k,*ridx=PETSC_NULL,nonzerorow=0; 608 PetscTruth usecprow=a->compressedrow.use; 609 610 PetscFunctionBegin; 611 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 612 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 613 614 idx = a->j; 615 v = a->a; 616 if (usecprow){ 617 mbs = a->compressedrow.nrows; 618 ii = a->compressedrow.i; 619 ridx = a->compressedrow.rindex; 620 } else { 621 mbs = a->mbs; 622 ii = a->i; 623 z = zarray; 624 } 625 626 if (!a->mult_work) { 627 k = PetscMax(A->rmap->n,A->cmap->n); 628 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 629 } 630 work = a->mult_work; 631 for (i=0; i<mbs; i++) { 632 n = ii[1] - ii[0]; ii++; 633 ncols = n*bs; 634 workt = work; 635 nonzerorow += (n>0); 636 for (j=0; j<n; j++) { 637 xb = x + bs*(*idx++); 638 for (k=0; k<bs; k++) workt[k] = xb[k]; 639 workt += bs; 640 } 641 if (usecprow) z = zarray + bs*ridx[i]; 642 Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z); 643 /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */ 644 v += n*bs2; 645 if (!usecprow) z += bs; 646 } 647 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 648 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 649 ierr = PetscLogFlops(2*a->nz*bs2 - bs*nonzerorow);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 extern PetscErrorCode VecCopy_Seq(Vec,Vec); 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1" 656 PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 657 { 658 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 659 PetscScalar *x,*y = 0,*z = 0,sum,*yarray,*zarray; 660 MatScalar *v; 661 PetscErrorCode ierr; 662 PetscInt mbs=a->mbs,i,*idx,*ii,n,*ridx=PETSC_NULL; 663 PetscTruth usecprow=a->compressedrow.use; 664 665 PetscFunctionBegin; 666 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 667 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 668 if (zz != yy) { 669 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 670 } else { 671 zarray = yarray; 672 } 673 674 idx = a->j; 675 v = a->a; 676 if (usecprow){ 677 if (zz != yy){ 678 ierr = PetscMemcpy(zarray,yarray,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 679 } 680 mbs = a->compressedrow.nrows; 681 ii = a->compressedrow.i; 682 ridx = a->compressedrow.rindex; 683 } else { 684 ii = a->i; 685 y = yarray; 686 z = zarray; 687 } 688 689 for (i=0; i<mbs; i++) { 690 n = ii[1] - ii[0]; ii++; 691 if (usecprow){ 692 z = zarray + ridx[i]; 693 y = yarray + ridx[i]; 694 } 695 sum = y[0]; 696 while (n--) sum += *v++ * x[*idx++]; 697 z[0] = sum; 698 if (!usecprow){ 699 z++; y++; 700 } 701 } 702 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 703 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 704 if (zz != yy) { 705 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 706 } 707 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2" 713 PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 714 { 715 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 716 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2; 717 PetscScalar x1,x2,*yarray,*zarray; 718 MatScalar *v; 719 PetscErrorCode ierr; 720 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 721 PetscTruth usecprow=a->compressedrow.use; 722 723 PetscFunctionBegin; 724 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 725 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 726 if (zz != yy) { 727 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 728 } else { 729 zarray = yarray; 730 } 731 732 idx = a->j; 733 v = a->a; 734 if (usecprow){ 735 if (zz != yy){ 736 ierr = PetscMemcpy(zarray,yarray,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 737 } 738 mbs = a->compressedrow.nrows; 739 ii = a->compressedrow.i; 740 ridx = a->compressedrow.rindex; 741 if (zz != yy){ 742 ierr = PetscMemcpy(zarray,yarray,a->mbs*sizeof(PetscScalar));CHKERRQ(ierr); 743 } 744 } else { 745 ii = a->i; 746 y = yarray; 747 z = zarray; 748 } 749 750 for (i=0; i<mbs; i++) { 751 n = ii[1] - ii[0]; ii++; 752 if (usecprow){ 753 z = zarray + 2*ridx[i]; 754 y = yarray + 2*ridx[i]; 755 } 756 sum1 = y[0]; sum2 = y[1]; 757 for (j=0; j<n; j++) { 758 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 759 sum1 += v[0]*x1 + v[2]*x2; 760 sum2 += v[1]*x1 + v[3]*x2; 761 v += 4; 762 } 763 z[0] = sum1; z[1] = sum2; 764 if (!usecprow){ 765 z += 2; y += 2; 766 } 767 } 768 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 769 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 770 if (zz != yy) { 771 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 772 } 773 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 774 PetscFunctionReturn(0); 775 } 776 777 #undef __FUNCT__ 778 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3" 779 PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 780 { 781 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 782 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray; 783 MatScalar *v; 784 PetscErrorCode ierr; 785 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 786 PetscTruth usecprow=a->compressedrow.use; 787 788 PetscFunctionBegin; 789 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 790 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 791 if (zz != yy) { 792 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 793 } else { 794 zarray = yarray; 795 } 796 797 idx = a->j; 798 v = a->a; 799 if (usecprow){ 800 if (zz != yy){ 801 ierr = PetscMemcpy(zarray,yarray,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 802 } 803 mbs = a->compressedrow.nrows; 804 ii = a->compressedrow.i; 805 ridx = a->compressedrow.rindex; 806 } else { 807 ii = a->i; 808 y = yarray; 809 z = zarray; 810 } 811 812 for (i=0; i<mbs; i++) { 813 n = ii[1] - ii[0]; ii++; 814 if (usecprow){ 815 z = zarray + 3*ridx[i]; 816 y = yarray + 3*ridx[i]; 817 } 818 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 819 for (j=0; j<n; j++) { 820 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 821 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 822 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 823 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 824 v += 9; 825 } 826 z[0] = sum1; z[1] = sum2; z[2] = sum3; 827 if (!usecprow){ 828 z += 3; y += 3; 829 } 830 } 831 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 832 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 833 if (zz != yy) { 834 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 835 } 836 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 837 PetscFunctionReturn(0); 838 } 839 840 #undef __FUNCT__ 841 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4" 842 PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 843 { 844 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 845 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray; 846 MatScalar *v; 847 PetscErrorCode ierr; 848 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 849 PetscTruth usecprow=a->compressedrow.use; 850 851 PetscFunctionBegin; 852 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 853 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 854 if (zz != yy) { 855 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 856 } else { 857 zarray = yarray; 858 } 859 860 idx = a->j; 861 v = a->a; 862 if (usecprow){ 863 if (zz != yy){ 864 ierr = PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 865 } 866 mbs = a->compressedrow.nrows; 867 ii = a->compressedrow.i; 868 ridx = a->compressedrow.rindex; 869 } else { 870 ii = a->i; 871 y = yarray; 872 z = zarray; 873 } 874 875 for (i=0; i<mbs; i++) { 876 n = ii[1] - ii[0]; ii++; 877 if (usecprow){ 878 z = zarray + 4*ridx[i]; 879 y = yarray + 4*ridx[i]; 880 } 881 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 882 for (j=0; j<n; j++) { 883 xb = x + 4*(*idx++); 884 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 885 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 886 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 887 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 888 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 889 v += 16; 890 } 891 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 892 if (!usecprow){ 893 z += 4; y += 4; 894 } 895 } 896 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 897 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 898 if (zz != yy) { 899 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 900 } 901 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } 904 905 #undef __FUNCT__ 906 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5" 907 PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 908 { 909 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 910 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5; 911 PetscScalar *yarray,*zarray; 912 MatScalar *v; 913 PetscErrorCode ierr; 914 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 915 PetscTruth usecprow=a->compressedrow.use; 916 917 PetscFunctionBegin; 918 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 919 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 920 if (zz != yy) { 921 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 922 } else { 923 zarray = yarray; 924 } 925 926 idx = a->j; 927 v = a->a; 928 if (usecprow){ 929 if (zz != yy){ 930 ierr = PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 931 } 932 mbs = a->compressedrow.nrows; 933 ii = a->compressedrow.i; 934 ridx = a->compressedrow.rindex; 935 } else { 936 ii = a->i; 937 y = yarray; 938 z = zarray; 939 } 940 941 for (i=0; i<mbs; i++) { 942 n = ii[1] - ii[0]; ii++; 943 if (usecprow){ 944 z = zarray + 5*ridx[i]; 945 y = yarray + 5*ridx[i]; 946 } 947 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 948 for (j=0; j<n; j++) { 949 xb = x + 5*(*idx++); 950 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 951 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 952 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 953 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 954 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 955 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 956 v += 25; 957 } 958 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 959 if (!usecprow){ 960 z += 5; y += 5; 961 } 962 } 963 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 964 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 965 if (zz != yy) { 966 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 967 } 968 ierr = PetscLogFlops(50*a->nz);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6" 973 PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 974 { 975 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 976 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6; 977 PetscScalar x1,x2,x3,x4,x5,x6,*yarray,*zarray; 978 MatScalar *v; 979 PetscErrorCode ierr; 980 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 981 PetscTruth usecprow=a->compressedrow.use; 982 983 PetscFunctionBegin; 984 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 985 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 986 if (zz != yy) { 987 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 988 } else { 989 zarray = yarray; 990 } 991 992 idx = a->j; 993 v = a->a; 994 if (usecprow){ 995 if (zz != yy){ 996 ierr = PetscMemcpy(zarray,yarray,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 997 } 998 mbs = a->compressedrow.nrows; 999 ii = a->compressedrow.i; 1000 ridx = a->compressedrow.rindex; 1001 } else { 1002 ii = a->i; 1003 y = yarray; 1004 z = zarray; 1005 } 1006 1007 for (i=0; i<mbs; i++) { 1008 n = ii[1] - ii[0]; ii++; 1009 if (usecprow){ 1010 z = zarray + 6*ridx[i]; 1011 y = yarray + 6*ridx[i]; 1012 } 1013 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; 1014 for (j=0; j<n; j++) { 1015 xb = x + 6*(*idx++); 1016 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 1017 sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 1018 sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 1019 sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 1020 sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 1021 sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 1022 sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 1023 v += 36; 1024 } 1025 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; 1026 if (!usecprow){ 1027 z += 6; y += 6; 1028 } 1029 } 1030 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1031 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 1032 if (zz != yy) { 1033 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1034 } 1035 ierr = PetscLogFlops(72*a->nz);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7" 1041 PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1042 { 1043 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1044 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1045 PetscScalar x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray; 1046 MatScalar *v; 1047 PetscErrorCode ierr; 1048 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 1049 PetscTruth usecprow=a->compressedrow.use; 1050 1051 PetscFunctionBegin; 1052 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1053 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 1054 if (zz != yy) { 1055 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 1056 } else { 1057 zarray = yarray; 1058 } 1059 1060 idx = a->j; 1061 v = a->a; 1062 if (usecprow){ 1063 if (zz != yy){ 1064 ierr = PetscMemcpy(zarray,yarray,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1065 } 1066 mbs = a->compressedrow.nrows; 1067 ii = a->compressedrow.i; 1068 ridx = a->compressedrow.rindex; 1069 } else { 1070 ii = a->i; 1071 y = yarray; 1072 z = zarray; 1073 } 1074 1075 for (i=0; i<mbs; i++) { 1076 n = ii[1] - ii[0]; ii++; 1077 if (usecprow){ 1078 z = zarray + 7*ridx[i]; 1079 y = yarray + 7*ridx[i]; 1080 } 1081 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1082 for (j=0; j<n; j++) { 1083 xb = x + 7*(*idx++); 1084 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1085 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1086 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1087 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1088 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1089 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1090 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1091 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1092 v += 49; 1093 } 1094 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1095 if (!usecprow){ 1096 z += 7; y += 7; 1097 } 1098 } 1099 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1100 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 1101 if (zz != yy) { 1102 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1103 } 1104 ierr = PetscLogFlops(98*a->nz);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N" 1110 PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1113 PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray; 1114 MatScalar *v; 1115 PetscErrorCode ierr; 1116 PetscInt mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2; 1117 PetscInt ncols,k,*ridx=PETSC_NULL; 1118 PetscTruth usecprow=a->compressedrow.use; 1119 1120 PetscFunctionBegin; 1121 ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 1122 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1123 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 1124 1125 idx = a->j; 1126 v = a->a; 1127 if (usecprow){ 1128 mbs = a->compressedrow.nrows; 1129 ii = a->compressedrow.i; 1130 ridx = a->compressedrow.rindex; 1131 } else { 1132 mbs = a->mbs; 1133 ii = a->i; 1134 z = zarray; 1135 } 1136 1137 if (!a->mult_work) { 1138 k = PetscMax(A->rmap->n,A->cmap->n); 1139 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1140 } 1141 work = a->mult_work; 1142 for (i=0; i<mbs; i++) { 1143 n = ii[1] - ii[0]; ii++; 1144 ncols = n*bs; 1145 workt = work; 1146 for (j=0; j<n; j++) { 1147 xb = x + bs*(*idx++); 1148 for (k=0; k<bs; k++) workt[k] = xb[k]; 1149 workt += bs; 1150 } 1151 if (usecprow) z = zarray + bs*ridx[i]; 1152 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 1153 /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */ 1154 v += n*bs2; 1155 if (!usecprow){ 1156 z += bs; 1157 } 1158 } 1159 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1160 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1161 ierr = PetscLogFlops(2*a->nz*bs2);CHKERRQ(ierr); 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatMultTranspose_SeqBAIJ" 1167 PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz) 1168 { 1169 PetscScalar zero = 0.0; 1170 PetscErrorCode ierr; 1171 1172 PetscFunctionBegin; 1173 ierr = VecSet(zz,zero);CHKERRQ(ierr); 1174 ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ" 1180 PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1181 1182 { 1183 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1184 PetscScalar *zb,*x,*z,*xb = 0,x1,x2,x3,x4,x5; 1185 MatScalar *v; 1186 PetscErrorCode ierr; 1187 PetscInt mbs,i,*idx,*ii,rval,bs=A->rmap->bs,j,n,bs2=a->bs2,*ib,*ridx=PETSC_NULL; 1188 Mat_CompressedRow cprow = a->compressedrow; 1189 PetscTruth usecprow=cprow.use; 1190 1191 PetscFunctionBegin; 1192 if (yy != zz) { ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); } 1193 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1194 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 1195 1196 idx = a->j; 1197 v = a->a; 1198 if (usecprow){ 1199 mbs = cprow.nrows; 1200 ii = cprow.i; 1201 ridx = cprow.rindex; 1202 } else { 1203 mbs=a->mbs; 1204 ii = a->i; 1205 xb = x; 1206 } 1207 1208 switch (bs) { 1209 case 1: 1210 for (i=0; i<mbs; i++) { 1211 if (usecprow) xb = x + ridx[i]; 1212 x1 = xb[0]; 1213 ib = idx + ii[0]; 1214 n = ii[1] - ii[0]; ii++; 1215 for (j=0; j<n; j++) { 1216 rval = ib[j]; 1217 z[rval] += *v * x1; 1218 v++; 1219 } 1220 if (!usecprow) xb++; 1221 } 1222 break; 1223 case 2: 1224 for (i=0; i<mbs; i++) { 1225 if (usecprow) xb = x + 2*ridx[i]; 1226 x1 = xb[0]; x2 = xb[1]; 1227 ib = idx + ii[0]; 1228 n = ii[1] - ii[0]; ii++; 1229 for (j=0; j<n; j++) { 1230 rval = ib[j]*2; 1231 z[rval++] += v[0]*x1 + v[1]*x2; 1232 z[rval++] += v[2]*x1 + v[3]*x2; 1233 v += 4; 1234 } 1235 if (!usecprow) xb += 2; 1236 } 1237 break; 1238 case 3: 1239 for (i=0; i<mbs; i++) { 1240 if (usecprow) xb = x + 3*ridx[i]; 1241 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1242 ib = idx + ii[0]; 1243 n = ii[1] - ii[0]; ii++; 1244 for (j=0; j<n; j++) { 1245 rval = ib[j]*3; 1246 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1247 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1248 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1249 v += 9; 1250 } 1251 if (!usecprow) xb += 3; 1252 } 1253 break; 1254 case 4: 1255 for (i=0; i<mbs; i++) { 1256 if (usecprow) xb = x + 4*ridx[i]; 1257 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1258 ib = idx + ii[0]; 1259 n = ii[1] - ii[0]; ii++; 1260 for (j=0; j<n; j++) { 1261 rval = ib[j]*4; 1262 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1263 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1264 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1265 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1266 v += 16; 1267 } 1268 if (!usecprow) xb += 4; 1269 } 1270 break; 1271 case 5: 1272 for (i=0; i<mbs; i++) { 1273 if (usecprow) xb = x + 5*ridx[i]; 1274 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1275 x4 = xb[3]; x5 = xb[4]; 1276 ib = idx + ii[0]; 1277 n = ii[1] - ii[0]; ii++; 1278 for (j=0; j<n; j++) { 1279 rval = ib[j]*5; 1280 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1281 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1282 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1283 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1284 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1285 v += 25; 1286 } 1287 if (!usecprow) xb += 5; 1288 } 1289 break; 1290 default: { /* block sizes larger then 5 by 5 are handled by BLAS */ 1291 PetscInt ncols,k; 1292 PetscScalar *work,*workt,*xtmp; 1293 1294 if (!a->mult_work) { 1295 k = PetscMax(A->rmap->n,A->cmap->n); 1296 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1297 } 1298 work = a->mult_work; 1299 xtmp = x; 1300 for (i=0; i<mbs; i++) { 1301 n = ii[1] - ii[0]; ii++; 1302 ncols = n*bs; 1303 ierr = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 1304 if (usecprow) { 1305 xtmp = x + bs*ridx[i]; 1306 } 1307 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work); 1308 /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */ 1309 v += n*bs2; 1310 if (!usecprow) xtmp += bs; 1311 workt = work; 1312 for (j=0; j<n; j++) { 1313 zb = z + bs*(*idx++); 1314 for (k=0; k<bs; k++) zb[k] += workt[k] ; 1315 workt += bs; 1316 } 1317 } 1318 } 1319 } 1320 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1321 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1322 ierr = PetscLogFlops(2*a->nz*a->bs2);CHKERRQ(ierr); 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatScale_SeqBAIJ" 1328 PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha) 1329 { 1330 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1331 PetscInt totalnz = a->bs2*a->nz; 1332 PetscScalar oalpha = alpha; 1333 PetscErrorCode ierr; 1334 PetscBLASInt one = 1,tnz = PetscBLASIntCast(totalnz); 1335 1336 PetscFunctionBegin; 1337 BLASscal_(&tnz,&oalpha,a->a,&one); 1338 ierr = PetscLogFlops(totalnz);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatNorm_SeqBAIJ" 1344 PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm) 1345 { 1346 PetscErrorCode ierr; 1347 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1348 MatScalar *v = a->a; 1349 PetscReal sum = 0.0; 1350 PetscInt i,j,k,bs=A->rmap->bs,nz=a->nz,bs2=a->bs2,k1; 1351 1352 PetscFunctionBegin; 1353 if (type == NORM_FROBENIUS) { 1354 for (i=0; i< bs2*nz; i++) { 1355 #if defined(PETSC_USE_COMPLEX) 1356 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1357 #else 1358 sum += (*v)*(*v); v++; 1359 #endif 1360 } 1361 *norm = sqrt(sum); 1362 } else if (type == NORM_1) { /* maximum column sum */ 1363 PetscReal *tmp; 1364 PetscInt *bcol = a->j; 1365 ierr = PetscMalloc((A->cmap->n+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1366 ierr = PetscMemzero(tmp,A->cmap->n*sizeof(PetscReal));CHKERRQ(ierr); 1367 for (i=0; i<nz; i++){ 1368 for (j=0; j<bs; j++){ 1369 k1 = bs*(*bcol) + j; /* column index */ 1370 for (k=0; k<bs; k++){ 1371 tmp[k1] += PetscAbsScalar(*v); v++; 1372 } 1373 } 1374 bcol++; 1375 } 1376 *norm = 0.0; 1377 for (j=0; j<A->cmap->n; j++) { 1378 if (tmp[j] > *norm) *norm = tmp[j]; 1379 } 1380 ierr = PetscFree(tmp);CHKERRQ(ierr); 1381 } else if (type == NORM_INFINITY) { /* maximum row sum */ 1382 *norm = 0.0; 1383 for (k=0; k<bs; k++) { 1384 for (j=0; j<a->mbs; j++) { 1385 v = a->a + bs2*a->i[j] + k; 1386 sum = 0.0; 1387 for (i=0; i<a->i[j+1]-a->i[j]; i++) { 1388 for (k1=0; k1<bs; k1++){ 1389 sum += PetscAbsScalar(*v); 1390 v += bs; 1391 } 1392 } 1393 if (sum > *norm) *norm = sum; 1394 } 1395 } 1396 } else { 1397 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 1398 } 1399 PetscFunctionReturn(0); 1400 } 1401 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatEqual_SeqBAIJ" 1405 PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg) 1406 { 1407 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data; 1408 PetscErrorCode ierr; 1409 1410 PetscFunctionBegin; 1411 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1412 if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) { 1413 *flg = PETSC_FALSE; 1414 PetscFunctionReturn(0); 1415 } 1416 1417 /* if the a->i are the same */ 1418 ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1419 if (!*flg) { 1420 PetscFunctionReturn(0); 1421 } 1422 1423 /* if a->j are the same */ 1424 ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1425 if (!*flg) { 1426 PetscFunctionReturn(0); 1427 } 1428 /* if a->a are the same */ 1429 ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(B->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ" 1436 PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1437 { 1438 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1439 PetscErrorCode ierr; 1440 PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1441 PetscScalar *x,zero = 0.0; 1442 MatScalar *aa,*aa_j; 1443 1444 PetscFunctionBegin; 1445 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1446 bs = A->rmap->bs; 1447 aa = a->a; 1448 ai = a->i; 1449 aj = a->j; 1450 ambs = a->mbs; 1451 bs2 = a->bs2; 1452 1453 ierr = VecSet(v,zero);CHKERRQ(ierr); 1454 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1455 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1456 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1457 for (i=0; i<ambs; i++) { 1458 for (j=ai[i]; j<ai[i+1]; j++) { 1459 if (aj[j] == i) { 1460 row = i*bs; 1461 aa_j = aa+j*bs2; 1462 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1463 break; 1464 } 1465 } 1466 } 1467 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ" 1473 PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1474 { 1475 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1476 PetscScalar *l,*r,x,*li,*ri; 1477 MatScalar *aa,*v; 1478 PetscErrorCode ierr; 1479 PetscInt i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1480 1481 PetscFunctionBegin; 1482 ai = a->i; 1483 aj = a->j; 1484 aa = a->a; 1485 m = A->rmap->n; 1486 n = A->cmap->n; 1487 bs = A->rmap->bs; 1488 mbs = a->mbs; 1489 bs2 = a->bs2; 1490 if (ll) { 1491 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 1492 ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1493 if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 1494 for (i=0; i<mbs; i++) { /* for each block row */ 1495 M = ai[i+1] - ai[i]; 1496 li = l + i*bs; 1497 v = aa + bs2*ai[i]; 1498 for (j=0; j<M; j++) { /* for each block */ 1499 for (k=0; k<bs2; k++) { 1500 (*v++) *= li[k%bs]; 1501 } 1502 } 1503 } 1504 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1505 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1506 } 1507 1508 if (rr) { 1509 ierr = VecGetArray(rr,&r);CHKERRQ(ierr); 1510 ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1511 if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 1512 for (i=0; i<mbs; i++) { /* for each block row */ 1513 M = ai[i+1] - ai[i]; 1514 v = aa + bs2*ai[i]; 1515 for (j=0; j<M; j++) { /* for each block */ 1516 ri = r + bs*aj[ai[i]+j]; 1517 for (k=0; k<bs; k++) { 1518 x = ri[k]; 1519 for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 1520 } 1521 } 1522 } 1523 ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 1524 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1525 } 1526 PetscFunctionReturn(0); 1527 } 1528 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatGetInfo_SeqBAIJ" 1532 PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1533 { 1534 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1535 1536 PetscFunctionBegin; 1537 info->rows_global = (double)A->rmap->N; 1538 info->columns_global = (double)A->cmap->n; 1539 info->rows_local = (double)A->rmap->n; 1540 info->columns_local = (double)A->cmap->n; 1541 info->block_size = a->bs2; 1542 info->nz_allocated = a->maxnz; 1543 info->nz_used = a->bs2*a->nz; 1544 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1545 info->assemblies = A->num_ass; 1546 info->mallocs = a->reallocs; 1547 info->memory = ((PetscObject)A)->mem; 1548 if (A->factor) { 1549 info->fill_ratio_given = A->info.fill_ratio_given; 1550 info->fill_ratio_needed = A->info.fill_ratio_needed; 1551 info->factor_mallocs = A->info.factor_mallocs; 1552 } else { 1553 info->fill_ratio_given = 0; 1554 info->fill_ratio_needed = 0; 1555 info->factor_mallocs = 0; 1556 } 1557 PetscFunctionReturn(0); 1558 } 1559 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatZeroEntries_SeqBAIJ" 1563 PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A) 1564 { 1565 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1566 PetscErrorCode ierr; 1567 1568 PetscFunctionBegin; 1569 ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 1570 PetscFunctionReturn(0); 1571 } 1572