1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/baij/seq/baij.h" 4 #include "../src/mat/blockinvert.h" 5 #include "petscbt.h" 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ" 9 PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov) 10 { 11 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 12 PetscErrorCode ierr; 13 PetscInt row,i,j,k,l,m,n,*nidx,isz,val,ival; 14 const PetscInt *idx; 15 PetscInt start,end,*ai,*aj,bs,*nidx2; 16 PetscBT table; 17 18 PetscFunctionBegin; 19 m = a->mbs; 20 ai = a->i; 21 aj = a->j; 22 bs = A->rmap->bs; 23 24 if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); 25 26 ierr = PetscBTCreate(m,table);CHKERRQ(ierr); 27 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr); 28 ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr); 29 30 for (i=0; i<is_max; i++) { 31 /* Initialise the two local arrays */ 32 isz = 0; 33 ierr = PetscBTMemzero(m,table);CHKERRQ(ierr); 34 35 /* Extract the indices, assume there can be duplicate entries */ 36 ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 37 ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr); 38 39 /* Enter these into the temp arrays i.e mark table[row], enter row into new index */ 40 for (j=0; j<n ; ++j){ 41 ival = idx[j]/bs; /* convert the indices into block indices */ 42 if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 43 if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;} 44 } 45 ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 46 ierr = ISDestroy(is[i]);CHKERRQ(ierr); 47 48 k = 0; 49 for (j=0; j<ov; j++){ /* for each overlap*/ 50 n = isz; 51 for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */ 52 row = nidx[k]; 53 start = ai[row]; 54 end = ai[row+1]; 55 for (l = start; l<end ; l++){ 56 val = aj[l]; 57 if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;} 58 } 59 } 60 } 61 /* expand the Index Set */ 62 for (j=0; j<isz; j++) { 63 for (k=0; k<bs; k++) 64 nidx2[j*bs+k] = nidx[j]*bs+k; 65 } 66 ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr); 67 } 68 ierr = PetscBTDestroy(table);CHKERRQ(ierr); 69 ierr = PetscFree(nidx);CHKERRQ(ierr); 70 ierr = PetscFree(nidx2);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private" 76 PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 77 { 78 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*c; 79 PetscErrorCode ierr; 80 PetscInt *smap,i,k,kstart,kend,oldcols = a->nbs,*lens; 81 PetscInt row,mat_i,*mat_j,tcol,*mat_ilen; 82 const PetscInt *irow,*icol; 83 PetscInt nrows,ncols,*ssmap,bs=A->rmap->bs,bs2=a->bs2; 84 PetscInt *aj = a->j,*ai = a->i; 85 MatScalar *mat_a; 86 Mat C; 87 PetscTruth flag,sorted; 88 89 PetscFunctionBegin; 90 ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr); 91 if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 92 93 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 94 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 95 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 96 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 97 98 ierr = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr); 99 ssmap = smap; 100 ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 101 ierr = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<ncols; i++) smap[icol[i]] = i+1; 103 /* determine lens of each row */ 104 for (i=0; i<nrows; i++) { 105 kstart = ai[irow[i]]; 106 kend = kstart + a->ilen[irow[i]]; 107 lens[i] = 0; 108 for (k=kstart; k<kend; k++) { 109 if (ssmap[aj[k]]) { 110 lens[i]++; 111 } 112 } 113 } 114 /* Create and fill new matrix */ 115 if (scall == MAT_REUSE_MATRIX) { 116 c = (Mat_SeqBAIJ *)((*B)->data); 117 118 if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 119 ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); 120 if (!flag) { 121 SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 122 } 123 ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr); 124 C = *B; 125 } else { 126 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 127 ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 128 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 129 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,lens);CHKERRQ(ierr); 130 } 131 c = (Mat_SeqBAIJ *)(C->data); 132 for (i=0; i<nrows; i++) { 133 row = irow[i]; 134 kstart = ai[row]; 135 kend = kstart + a->ilen[row]; 136 mat_i = c->i[i]; 137 mat_j = c->j + mat_i; 138 mat_a = c->a + mat_i*bs2; 139 mat_ilen = c->ilen + i; 140 for (k=kstart; k<kend; k++) { 141 if ((tcol=ssmap[a->j[k]])) { 142 *mat_j++ = tcol - 1; 143 ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 144 mat_a += bs2; 145 (*mat_ilen)++; 146 } 147 } 148 } 149 150 /* Free work space */ 151 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 152 ierr = PetscFree(smap);CHKERRQ(ierr); 153 ierr = PetscFree(lens);CHKERRQ(ierr); 154 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 156 157 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 158 *B = C; 159 PetscFunctionReturn(0); 160 } 161 162 #undef __FUNCT__ 163 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ" 164 PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 165 { 166 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 167 IS is1,is2; 168 PetscErrorCode ierr; 169 PetscInt *vary,*iary,nrows,ncols,i,bs=A->rmap->bs,count; 170 const PetscInt *irow,*icol; 171 172 PetscFunctionBegin; 173 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 174 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 175 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 176 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 177 178 /* Verify if the indices corespond to each element in a block 179 and form the IS with compressed IS */ 180 ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr); 181 iary = vary + a->mbs; 182 ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 183 for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 184 count = 0; 185 for (i=0; i<a->mbs; i++) { 186 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Index set does not match blocks"); 187 if (vary[i]==bs) iary[count++] = i; 188 } 189 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 190 191 ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 192 for (i=0; i<ncols; i++) vary[icol[i]/bs]++; 193 count = 0; 194 for (i=0; i<a->mbs; i++) { 195 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_PLIB,"Internal error in PETSc"); 196 if (vary[i]==bs) iary[count++] = i; 197 } 198 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr); 199 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 200 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 201 ierr = PetscFree(vary);CHKERRQ(ierr); 202 203 ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,scall,B);CHKERRQ(ierr); 204 ISDestroy(is1); 205 ISDestroy(is2); 206 PetscFunctionReturn(0); 207 } 208 209 #undef __FUNCT__ 210 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ" 211 PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 212 { 213 PetscErrorCode ierr; 214 PetscInt i; 215 216 PetscFunctionBegin; 217 if (scall == MAT_INITIAL_MATRIX) { 218 ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 219 } 220 221 for (i=0; i<n; i++) { 222 ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr); 223 } 224 PetscFunctionReturn(0); 225 } 226 227 228 /* -------------------------------------------------------*/ 229 /* Should check that shapes of vectors and matrices match */ 230 /* -------------------------------------------------------*/ 231 #include "petscblaslapack.h" 232 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatMult_SeqBAIJ_1" 235 PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 236 { 237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 238 PetscScalar *z,sum; 239 const PetscScalar *x; 240 const MatScalar *v; 241 PetscErrorCode ierr; 242 PetscInt mbs,i,*idx,*ii,n,*ridx=PETSC_NULL,nonzerorow=0; 243 PetscTruth usecprow=a->compressedrow.use; 244 245 PetscFunctionBegin; 246 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 247 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 248 249 idx = a->j; 250 v = a->a; 251 if (usecprow){ 252 mbs = a->compressedrow.nrows; 253 ii = a->compressedrow.i; 254 ridx = a->compressedrow.rindex; 255 } else { 256 mbs = a->mbs; 257 ii = a->i; 258 } 259 260 for (i=0; i<mbs; i++) { 261 n = ii[1] - ii[0]; ii++; 262 sum = 0.0; 263 nonzerorow += (n>0); 264 while (n--) sum += *v++ * x[*idx++]; 265 if (usecprow){ 266 z[ridx[i]] = sum; 267 } else { 268 z[i] = sum; 269 } 270 } 271 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 272 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 273 ierr = PetscLogFlops(2.0*a->nz - nonzerorow);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatMult_SeqBAIJ_2" 279 PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 280 { 281 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 282 PetscScalar *z = 0,sum1,sum2,*zarray; 283 const PetscScalar *x,*xb; 284 PetscScalar x1,x2; 285 const MatScalar *v; 286 PetscErrorCode ierr; 287 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 288 PetscTruth usecprow=a->compressedrow.use; 289 290 PetscFunctionBegin; 291 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 292 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 293 294 idx = a->j; 295 v = a->a; 296 if (usecprow){ 297 mbs = a->compressedrow.nrows; 298 ii = a->compressedrow.i; 299 ridx = a->compressedrow.rindex; 300 } else { 301 mbs = a->mbs; 302 ii = a->i; 303 z = zarray; 304 } 305 306 for (i=0; i<mbs; i++) { 307 n = ii[1] - ii[0]; ii++; 308 sum1 = 0.0; sum2 = 0.0; 309 nonzerorow += (n>0); 310 for (j=0; j<n; j++) { 311 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 312 sum1 += v[0]*x1 + v[2]*x2; 313 sum2 += v[1]*x1 + v[3]*x2; 314 v += 4; 315 } 316 if (usecprow) z = zarray + 2*ridx[i]; 317 z[0] = sum1; z[1] = sum2; 318 if (!usecprow) z += 2; 319 } 320 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 321 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 322 ierr = PetscLogFlops(8.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 323 PetscFunctionReturn(0); 324 } 325 326 #undef __FUNCT__ 327 #define __FUNCT__ "MatMult_SeqBAIJ_3" 328 PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 329 { 330 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 331 PetscScalar *z = 0,sum1,sum2,sum3,x1,x2,x3,*zarray; 332 const PetscScalar *x,*xb; 333 const MatScalar *v; 334 PetscErrorCode ierr; 335 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 336 PetscTruth usecprow=a->compressedrow.use; 337 338 339 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 340 #pragma disjoint(*v,*z,*xb) 341 #endif 342 343 PetscFunctionBegin; 344 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 345 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 346 347 idx = a->j; 348 v = a->a; 349 if (usecprow){ 350 mbs = a->compressedrow.nrows; 351 ii = a->compressedrow.i; 352 ridx = a->compressedrow.rindex; 353 } else { 354 mbs = a->mbs; 355 ii = a->i; 356 z = zarray; 357 } 358 359 for (i=0; i<mbs; i++) { 360 n = ii[1] - ii[0]; ii++; 361 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 362 nonzerorow += (n>0); 363 for (j=0; j<n; j++) { 364 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 365 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 366 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 367 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 368 v += 9; 369 } 370 if (usecprow) z = zarray + 3*ridx[i]; 371 z[0] = sum1; z[1] = sum2; z[2] = sum3; 372 if (!usecprow) z += 3; 373 } 374 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 375 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 376 ierr = PetscLogFlops(18.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 377 PetscFunctionReturn(0); 378 } 379 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatMult_SeqBAIJ_4" 382 PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 383 { 384 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 385 PetscScalar *z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray; 386 const PetscScalar *x,*xb; 387 const MatScalar *v; 388 PetscErrorCode ierr; 389 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 390 PetscTruth usecprow=a->compressedrow.use; 391 392 PetscFunctionBegin; 393 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 394 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 395 396 idx = a->j; 397 v = a->a; 398 if (usecprow){ 399 mbs = a->compressedrow.nrows; 400 ii = a->compressedrow.i; 401 ridx = a->compressedrow.rindex; 402 } else { 403 mbs = a->mbs; 404 ii = a->i; 405 z = zarray; 406 } 407 408 for (i=0; i<mbs; i++) { 409 n = ii[1] - ii[0]; ii++; 410 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 411 nonzerorow += (n>0); 412 for (j=0; j<n; j++) { 413 xb = x + 4*(*idx++); 414 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 415 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 416 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 417 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 418 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 419 v += 16; 420 } 421 if (usecprow) z = zarray + 4*ridx[i]; 422 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 423 if (!usecprow) z += 4; 424 } 425 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 426 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 427 ierr = PetscLogFlops(32.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 428 PetscFunctionReturn(0); 429 } 430 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatMult_SeqBAIJ_5" 433 PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 434 { 435 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 436 PetscScalar sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*z = 0,*zarray; 437 const PetscScalar *xb,*x; 438 const MatScalar *v; 439 PetscErrorCode ierr; 440 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 441 PetscTruth usecprow=a->compressedrow.use; 442 443 PetscFunctionBegin; 444 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 445 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 446 447 idx = a->j; 448 v = a->a; 449 if (usecprow){ 450 mbs = a->compressedrow.nrows; 451 ii = a->compressedrow.i; 452 ridx = a->compressedrow.rindex; 453 } else { 454 mbs = a->mbs; 455 ii = a->i; 456 z = zarray; 457 } 458 459 for (i=0; i<mbs; i++) { 460 n = ii[1] - ii[0]; ii++; 461 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 462 nonzerorow += (n>0); 463 for (j=0; j<n; j++) { 464 xb = x + 5*(*idx++); 465 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 466 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 467 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 468 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 469 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 470 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 471 v += 25; 472 } 473 if (usecprow) z = zarray + 5*ridx[i]; 474 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 475 if (!usecprow) z += 5; 476 } 477 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 478 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 479 ierr = PetscLogFlops(50.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 480 PetscFunctionReturn(0); 481 } 482 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "MatMult_SeqBAIJ_6" 486 PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz) 487 { 488 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 489 PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6; 490 const PetscScalar *x,*xb; 491 PetscScalar x1,x2,x3,x4,x5,x6,*zarray; 492 const MatScalar *v; 493 PetscErrorCode ierr; 494 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 495 PetscTruth usecprow=a->compressedrow.use; 496 497 PetscFunctionBegin; 498 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 499 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 500 501 idx = a->j; 502 v = a->a; 503 if (usecprow){ 504 mbs = a->compressedrow.nrows; 505 ii = a->compressedrow.i; 506 ridx = a->compressedrow.rindex; 507 } else { 508 mbs = a->mbs; 509 ii = a->i; 510 z = zarray; 511 } 512 513 for (i=0; i<mbs; i++) { 514 n = ii[1] - ii[0]; ii++; 515 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; 516 nonzerorow += (n>0); 517 for (j=0; j<n; j++) { 518 xb = x + 6*(*idx++); 519 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 520 sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 521 sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 522 sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 523 sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 524 sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 525 sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 526 v += 36; 527 } 528 if (usecprow) z = zarray + 6*ridx[i]; 529 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; 530 if (!usecprow) z += 6; 531 } 532 533 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 534 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 535 ierr = PetscLogFlops(72.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 #undef __FUNCT__ 539 #define __FUNCT__ "MatMult_SeqBAIJ_7" 540 PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 541 { 542 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 543 PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 544 const PetscScalar *x,*xb; 545 PetscScalar x1,x2,x3,x4,x5,x6,x7,*zarray; 546 const MatScalar *v; 547 PetscErrorCode ierr; 548 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 549 PetscTruth usecprow=a->compressedrow.use; 550 551 PetscFunctionBegin; 552 ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 553 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 554 555 idx = a->j; 556 v = a->a; 557 if (usecprow){ 558 mbs = a->compressedrow.nrows; 559 ii = a->compressedrow.i; 560 ridx = a->compressedrow.rindex; 561 } else { 562 mbs = a->mbs; 563 ii = a->i; 564 z = zarray; 565 } 566 567 for (i=0; i<mbs; i++) { 568 n = ii[1] - ii[0]; ii++; 569 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 570 nonzerorow += (n>0); 571 for (j=0; j<n; j++) { 572 xb = x + 7*(*idx++); 573 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 574 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 575 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 576 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 577 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 578 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 579 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 580 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 581 v += 49; 582 } 583 if (usecprow) z = zarray + 7*ridx[i]; 584 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 585 if (!usecprow) z += 7; 586 } 587 588 ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); 589 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 590 ierr = PetscLogFlops(98.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 594 /* 595 This will not work with MatScalar == float because it calls the BLAS 596 */ 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMult_SeqBAIJ_N" 599 PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 600 { 601 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 602 PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray; 603 MatScalar *v; 604 PetscErrorCode ierr; 605 PetscInt mbs=a->mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2; 606 PetscInt ncols,k,*ridx=PETSC_NULL,nonzerorow=0; 607 PetscTruth usecprow=a->compressedrow.use; 608 609 PetscFunctionBegin; 610 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 611 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 612 613 idx = a->j; 614 v = a->a; 615 if (usecprow){ 616 mbs = a->compressedrow.nrows; 617 ii = a->compressedrow.i; 618 ridx = a->compressedrow.rindex; 619 } else { 620 mbs = a->mbs; 621 ii = a->i; 622 z = zarray; 623 } 624 625 if (!a->mult_work) { 626 k = PetscMax(A->rmap->n,A->cmap->n); 627 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 628 } 629 work = a->mult_work; 630 for (i=0; i<mbs; i++) { 631 n = ii[1] - ii[0]; ii++; 632 ncols = n*bs; 633 workt = work; 634 nonzerorow += (n>0); 635 for (j=0; j<n; j++) { 636 xb = x + bs*(*idx++); 637 for (k=0; k<bs; k++) workt[k] = xb[k]; 638 workt += bs; 639 } 640 if (usecprow) z = zarray + bs*ridx[i]; 641 Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z); 642 /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */ 643 v += n*bs2; 644 if (!usecprow) z += bs; 645 } 646 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 647 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 648 ierr = PetscLogFlops(2.0*a->nz*bs2 - bs*nonzerorow);CHKERRQ(ierr); 649 PetscFunctionReturn(0); 650 } 651 652 extern PetscErrorCode VecCopy_Seq(Vec,Vec); 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1" 655 PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 656 { 657 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 658 PetscScalar *x,*y = 0,*z = 0,sum,*yarray,*zarray; 659 MatScalar *v; 660 PetscErrorCode ierr; 661 PetscInt mbs=a->mbs,i,*idx,*ii,n,*ridx=PETSC_NULL; 662 PetscTruth usecprow=a->compressedrow.use; 663 664 PetscFunctionBegin; 665 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 666 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 667 if (zz != yy) { 668 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 669 } else { 670 zarray = yarray; 671 } 672 673 idx = a->j; 674 v = a->a; 675 if (usecprow){ 676 if (zz != yy){ 677 ierr = PetscMemcpy(zarray,yarray,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 678 } 679 mbs = a->compressedrow.nrows; 680 ii = a->compressedrow.i; 681 ridx = a->compressedrow.rindex; 682 } else { 683 ii = a->i; 684 y = yarray; 685 z = zarray; 686 } 687 688 for (i=0; i<mbs; i++) { 689 n = ii[1] - ii[0]; ii++; 690 if (usecprow){ 691 z = zarray + ridx[i]; 692 y = yarray + ridx[i]; 693 } 694 sum = y[0]; 695 while (n--) sum += *v++ * x[*idx++]; 696 z[0] = sum; 697 if (!usecprow){ 698 z++; y++; 699 } 700 } 701 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 702 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 703 if (zz != yy) { 704 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 705 } 706 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2" 712 PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 713 { 714 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 715 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2; 716 PetscScalar x1,x2,*yarray,*zarray; 717 MatScalar *v; 718 PetscErrorCode ierr; 719 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 720 PetscTruth usecprow=a->compressedrow.use; 721 722 PetscFunctionBegin; 723 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 724 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 725 if (zz != yy) { 726 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 727 } else { 728 zarray = yarray; 729 } 730 731 idx = a->j; 732 v = a->a; 733 if (usecprow){ 734 if (zz != yy){ 735 ierr = PetscMemcpy(zarray,yarray,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 736 } 737 mbs = a->compressedrow.nrows; 738 ii = a->compressedrow.i; 739 ridx = a->compressedrow.rindex; 740 if (zz != yy){ 741 ierr = PetscMemcpy(zarray,yarray,a->mbs*sizeof(PetscScalar));CHKERRQ(ierr); 742 } 743 } else { 744 ii = a->i; 745 y = yarray; 746 z = zarray; 747 } 748 749 for (i=0; i<mbs; i++) { 750 n = ii[1] - ii[0]; ii++; 751 if (usecprow){ 752 z = zarray + 2*ridx[i]; 753 y = yarray + 2*ridx[i]; 754 } 755 sum1 = y[0]; sum2 = y[1]; 756 for (j=0; j<n; j++) { 757 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 758 sum1 += v[0]*x1 + v[2]*x2; 759 sum2 += v[1]*x1 + v[3]*x2; 760 v += 4; 761 } 762 z[0] = sum1; z[1] = sum2; 763 if (!usecprow){ 764 z += 2; y += 2; 765 } 766 } 767 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 768 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 769 if (zz != yy) { 770 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 771 } 772 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 773 PetscFunctionReturn(0); 774 } 775 776 #undef __FUNCT__ 777 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3" 778 PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 779 { 780 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 781 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray; 782 MatScalar *v; 783 PetscErrorCode ierr; 784 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 785 PetscTruth usecprow=a->compressedrow.use; 786 787 PetscFunctionBegin; 788 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 789 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 790 if (zz != yy) { 791 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 792 } else { 793 zarray = yarray; 794 } 795 796 idx = a->j; 797 v = a->a; 798 if (usecprow){ 799 if (zz != yy){ 800 ierr = PetscMemcpy(zarray,yarray,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 801 } 802 mbs = a->compressedrow.nrows; 803 ii = a->compressedrow.i; 804 ridx = a->compressedrow.rindex; 805 } else { 806 ii = a->i; 807 y = yarray; 808 z = zarray; 809 } 810 811 for (i=0; i<mbs; i++) { 812 n = ii[1] - ii[0]; ii++; 813 if (usecprow){ 814 z = zarray + 3*ridx[i]; 815 y = yarray + 3*ridx[i]; 816 } 817 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 818 for (j=0; j<n; j++) { 819 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 820 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 821 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 822 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 823 v += 9; 824 } 825 z[0] = sum1; z[1] = sum2; z[2] = sum3; 826 if (!usecprow){ 827 z += 3; y += 3; 828 } 829 } 830 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 831 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 832 if (zz != yy) { 833 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 834 } 835 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 836 PetscFunctionReturn(0); 837 } 838 839 #undef __FUNCT__ 840 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4" 841 PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 842 { 843 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 844 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray; 845 MatScalar *v; 846 PetscErrorCode ierr; 847 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 848 PetscTruth usecprow=a->compressedrow.use; 849 850 PetscFunctionBegin; 851 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 852 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 853 if (zz != yy) { 854 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 855 } else { 856 zarray = yarray; 857 } 858 859 idx = a->j; 860 v = a->a; 861 if (usecprow){ 862 if (zz != yy){ 863 ierr = PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 864 } 865 mbs = a->compressedrow.nrows; 866 ii = a->compressedrow.i; 867 ridx = a->compressedrow.rindex; 868 } else { 869 ii = a->i; 870 y = yarray; 871 z = zarray; 872 } 873 874 for (i=0; i<mbs; i++) { 875 n = ii[1] - ii[0]; ii++; 876 if (usecprow){ 877 z = zarray + 4*ridx[i]; 878 y = yarray + 4*ridx[i]; 879 } 880 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 881 for (j=0; j<n; j++) { 882 xb = x + 4*(*idx++); 883 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 884 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 885 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 886 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 887 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 888 v += 16; 889 } 890 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 891 if (!usecprow){ 892 z += 4; y += 4; 893 } 894 } 895 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 896 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 897 if (zz != yy) { 898 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 899 } 900 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5" 906 PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 907 { 908 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 909 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5; 910 PetscScalar *yarray,*zarray; 911 MatScalar *v; 912 PetscErrorCode ierr; 913 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 914 PetscTruth usecprow=a->compressedrow.use; 915 916 PetscFunctionBegin; 917 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 918 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 919 if (zz != yy) { 920 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 921 } else { 922 zarray = yarray; 923 } 924 925 idx = a->j; 926 v = a->a; 927 if (usecprow){ 928 if (zz != yy){ 929 ierr = PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 930 } 931 mbs = a->compressedrow.nrows; 932 ii = a->compressedrow.i; 933 ridx = a->compressedrow.rindex; 934 } else { 935 ii = a->i; 936 y = yarray; 937 z = zarray; 938 } 939 940 for (i=0; i<mbs; i++) { 941 n = ii[1] - ii[0]; ii++; 942 if (usecprow){ 943 z = zarray + 5*ridx[i]; 944 y = yarray + 5*ridx[i]; 945 } 946 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 947 for (j=0; j<n; j++) { 948 xb = x + 5*(*idx++); 949 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 950 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 951 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 952 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 953 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 954 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 955 v += 25; 956 } 957 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 958 if (!usecprow){ 959 z += 5; y += 5; 960 } 961 } 962 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 963 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 964 if (zz != yy) { 965 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 966 } 967 ierr = PetscLogFlops(50.0*a->nz);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 #undef __FUNCT__ 971 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6" 972 PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 973 { 974 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 975 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6; 976 PetscScalar x1,x2,x3,x4,x5,x6,*yarray,*zarray; 977 MatScalar *v; 978 PetscErrorCode ierr; 979 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 980 PetscTruth usecprow=a->compressedrow.use; 981 982 PetscFunctionBegin; 983 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 984 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 985 if (zz != yy) { 986 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 987 } else { 988 zarray = yarray; 989 } 990 991 idx = a->j; 992 v = a->a; 993 if (usecprow){ 994 if (zz != yy){ 995 ierr = PetscMemcpy(zarray,yarray,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 996 } 997 mbs = a->compressedrow.nrows; 998 ii = a->compressedrow.i; 999 ridx = a->compressedrow.rindex; 1000 } else { 1001 ii = a->i; 1002 y = yarray; 1003 z = zarray; 1004 } 1005 1006 for (i=0; i<mbs; i++) { 1007 n = ii[1] - ii[0]; ii++; 1008 if (usecprow){ 1009 z = zarray + 6*ridx[i]; 1010 y = yarray + 6*ridx[i]; 1011 } 1012 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; 1013 for (j=0; j<n; j++) { 1014 xb = x + 6*(*idx++); 1015 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 1016 sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 1017 sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 1018 sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 1019 sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 1020 sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 1021 sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 1022 v += 36; 1023 } 1024 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; 1025 if (!usecprow){ 1026 z += 6; y += 6; 1027 } 1028 } 1029 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1030 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 1031 if (zz != yy) { 1032 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1033 } 1034 ierr = PetscLogFlops(72.0*a->nz);CHKERRQ(ierr); 1035 PetscFunctionReturn(0); 1036 } 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7" 1040 PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1041 { 1042 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1043 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1044 PetscScalar x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray; 1045 MatScalar *v; 1046 PetscErrorCode ierr; 1047 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 1048 PetscTruth usecprow=a->compressedrow.use; 1049 1050 PetscFunctionBegin; 1051 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1052 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 1053 if (zz != yy) { 1054 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 1055 } else { 1056 zarray = yarray; 1057 } 1058 1059 idx = a->j; 1060 v = a->a; 1061 if (usecprow){ 1062 if (zz != yy){ 1063 ierr = PetscMemcpy(zarray,yarray,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1064 } 1065 mbs = a->compressedrow.nrows; 1066 ii = a->compressedrow.i; 1067 ridx = a->compressedrow.rindex; 1068 } else { 1069 ii = a->i; 1070 y = yarray; 1071 z = zarray; 1072 } 1073 1074 for (i=0; i<mbs; i++) { 1075 n = ii[1] - ii[0]; ii++; 1076 if (usecprow){ 1077 z = zarray + 7*ridx[i]; 1078 y = yarray + 7*ridx[i]; 1079 } 1080 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1081 for (j=0; j<n; j++) { 1082 xb = x + 7*(*idx++); 1083 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1084 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1085 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1086 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1087 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1088 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1089 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1090 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1091 v += 49; 1092 } 1093 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1094 if (!usecprow){ 1095 z += 7; y += 7; 1096 } 1097 } 1098 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1099 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 1100 if (zz != yy) { 1101 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1102 } 1103 ierr = PetscLogFlops(98.0*a->nz);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N" 1109 PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1110 { 1111 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1112 PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray; 1113 MatScalar *v; 1114 PetscErrorCode ierr; 1115 PetscInt mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2; 1116 PetscInt ncols,k,*ridx=PETSC_NULL; 1117 PetscTruth usecprow=a->compressedrow.use; 1118 1119 PetscFunctionBegin; 1120 ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 1121 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1122 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 1123 1124 idx = a->j; 1125 v = a->a; 1126 if (usecprow){ 1127 mbs = a->compressedrow.nrows; 1128 ii = a->compressedrow.i; 1129 ridx = a->compressedrow.rindex; 1130 } else { 1131 mbs = a->mbs; 1132 ii = a->i; 1133 z = zarray; 1134 } 1135 1136 if (!a->mult_work) { 1137 k = PetscMax(A->rmap->n,A->cmap->n); 1138 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1139 } 1140 work = a->mult_work; 1141 for (i=0; i<mbs; i++) { 1142 n = ii[1] - ii[0]; ii++; 1143 ncols = n*bs; 1144 workt = work; 1145 for (j=0; j<n; j++) { 1146 xb = x + bs*(*idx++); 1147 for (k=0; k<bs; k++) workt[k] = xb[k]; 1148 workt += bs; 1149 } 1150 if (usecprow) z = zarray + bs*ridx[i]; 1151 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 1152 /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */ 1153 v += n*bs2; 1154 if (!usecprow){ 1155 z += bs; 1156 } 1157 } 1158 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1159 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1160 ierr = PetscLogFlops(2.0*a->nz*bs2);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatMultTranspose_SeqBAIJ" 1166 PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz) 1167 { 1168 PetscScalar zero = 0.0; 1169 PetscErrorCode ierr; 1170 1171 PetscFunctionBegin; 1172 ierr = VecSet(zz,zero);CHKERRQ(ierr); 1173 ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ" 1179 PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1180 1181 { 1182 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1183 PetscScalar *zb,*x,*z,*xb = 0,x1,x2,x3,x4,x5; 1184 MatScalar *v; 1185 PetscErrorCode ierr; 1186 PetscInt mbs,i,*idx,*ii,rval,bs=A->rmap->bs,j,n,bs2=a->bs2,*ib,*ridx=PETSC_NULL; 1187 Mat_CompressedRow cprow = a->compressedrow; 1188 PetscTruth usecprow=cprow.use; 1189 1190 PetscFunctionBegin; 1191 if (yy != zz) { ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); } 1192 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1193 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 1194 1195 idx = a->j; 1196 v = a->a; 1197 if (usecprow){ 1198 mbs = cprow.nrows; 1199 ii = cprow.i; 1200 ridx = cprow.rindex; 1201 } else { 1202 mbs=a->mbs; 1203 ii = a->i; 1204 xb = x; 1205 } 1206 1207 switch (bs) { 1208 case 1: 1209 for (i=0; i<mbs; i++) { 1210 if (usecprow) xb = x + ridx[i]; 1211 x1 = xb[0]; 1212 ib = idx + ii[0]; 1213 n = ii[1] - ii[0]; ii++; 1214 for (j=0; j<n; j++) { 1215 rval = ib[j]; 1216 z[rval] += *v * x1; 1217 v++; 1218 } 1219 if (!usecprow) xb++; 1220 } 1221 break; 1222 case 2: 1223 for (i=0; i<mbs; i++) { 1224 if (usecprow) xb = x + 2*ridx[i]; 1225 x1 = xb[0]; x2 = xb[1]; 1226 ib = idx + ii[0]; 1227 n = ii[1] - ii[0]; ii++; 1228 for (j=0; j<n; j++) { 1229 rval = ib[j]*2; 1230 z[rval++] += v[0]*x1 + v[1]*x2; 1231 z[rval++] += v[2]*x1 + v[3]*x2; 1232 v += 4; 1233 } 1234 if (!usecprow) xb += 2; 1235 } 1236 break; 1237 case 3: 1238 for (i=0; i<mbs; i++) { 1239 if (usecprow) xb = x + 3*ridx[i]; 1240 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1241 ib = idx + ii[0]; 1242 n = ii[1] - ii[0]; ii++; 1243 for (j=0; j<n; j++) { 1244 rval = ib[j]*3; 1245 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1246 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1247 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1248 v += 9; 1249 } 1250 if (!usecprow) xb += 3; 1251 } 1252 break; 1253 case 4: 1254 for (i=0; i<mbs; i++) { 1255 if (usecprow) xb = x + 4*ridx[i]; 1256 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1257 ib = idx + ii[0]; 1258 n = ii[1] - ii[0]; ii++; 1259 for (j=0; j<n; j++) { 1260 rval = ib[j]*4; 1261 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1262 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1263 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1264 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1265 v += 16; 1266 } 1267 if (!usecprow) xb += 4; 1268 } 1269 break; 1270 case 5: 1271 for (i=0; i<mbs; i++) { 1272 if (usecprow) xb = x + 5*ridx[i]; 1273 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1274 x4 = xb[3]; x5 = xb[4]; 1275 ib = idx + ii[0]; 1276 n = ii[1] - ii[0]; ii++; 1277 for (j=0; j<n; j++) { 1278 rval = ib[j]*5; 1279 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1280 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1281 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1282 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1283 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1284 v += 25; 1285 } 1286 if (!usecprow) xb += 5; 1287 } 1288 break; 1289 default: { /* block sizes larger then 5 by 5 are handled by BLAS */ 1290 PetscInt ncols,k; 1291 PetscScalar *work,*workt,*xtmp; 1292 1293 if (!a->mult_work) { 1294 k = PetscMax(A->rmap->n,A->cmap->n); 1295 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1296 } 1297 work = a->mult_work; 1298 xtmp = x; 1299 for (i=0; i<mbs; i++) { 1300 n = ii[1] - ii[0]; ii++; 1301 ncols = n*bs; 1302 ierr = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 1303 if (usecprow) { 1304 xtmp = x + bs*ridx[i]; 1305 } 1306 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work); 1307 /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */ 1308 v += n*bs2; 1309 if (!usecprow) xtmp += bs; 1310 workt = work; 1311 for (j=0; j<n; j++) { 1312 zb = z + bs*(*idx++); 1313 for (k=0; k<bs; k++) zb[k] += workt[k] ; 1314 workt += bs; 1315 } 1316 } 1317 } 1318 } 1319 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1320 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1321 ierr = PetscLogFlops(2.0*a->nz*a->bs2);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatScale_SeqBAIJ" 1327 PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha) 1328 { 1329 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1330 PetscInt totalnz = a->bs2*a->nz; 1331 PetscScalar oalpha = alpha; 1332 PetscErrorCode ierr; 1333 PetscBLASInt one = 1,tnz = PetscBLASIntCast(totalnz); 1334 1335 PetscFunctionBegin; 1336 BLASscal_(&tnz,&oalpha,a->a,&one); 1337 ierr = PetscLogFlops(totalnz);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatNorm_SeqBAIJ" 1343 PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm) 1344 { 1345 PetscErrorCode ierr; 1346 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1347 MatScalar *v = a->a; 1348 PetscReal sum = 0.0; 1349 PetscInt i,j,k,bs=A->rmap->bs,nz=a->nz,bs2=a->bs2,k1; 1350 1351 PetscFunctionBegin; 1352 if (type == NORM_FROBENIUS) { 1353 for (i=0; i< bs2*nz; i++) { 1354 #if defined(PETSC_USE_COMPLEX) 1355 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1356 #else 1357 sum += (*v)*(*v); v++; 1358 #endif 1359 } 1360 *norm = sqrt(sum); 1361 } else if (type == NORM_1) { /* maximum column sum */ 1362 PetscReal *tmp; 1363 PetscInt *bcol = a->j; 1364 ierr = PetscMalloc((A->cmap->n+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1365 ierr = PetscMemzero(tmp,A->cmap->n*sizeof(PetscReal));CHKERRQ(ierr); 1366 for (i=0; i<nz; i++){ 1367 for (j=0; j<bs; j++){ 1368 k1 = bs*(*bcol) + j; /* column index */ 1369 for (k=0; k<bs; k++){ 1370 tmp[k1] += PetscAbsScalar(*v); v++; 1371 } 1372 } 1373 bcol++; 1374 } 1375 *norm = 0.0; 1376 for (j=0; j<A->cmap->n; j++) { 1377 if (tmp[j] > *norm) *norm = tmp[j]; 1378 } 1379 ierr = PetscFree(tmp);CHKERRQ(ierr); 1380 } else if (type == NORM_INFINITY) { /* maximum row sum */ 1381 *norm = 0.0; 1382 for (k=0; k<bs; k++) { 1383 for (j=0; j<a->mbs; j++) { 1384 v = a->a + bs2*a->i[j] + k; 1385 sum = 0.0; 1386 for (i=0; i<a->i[j+1]-a->i[j]; i++) { 1387 for (k1=0; k1<bs; k1++){ 1388 sum += PetscAbsScalar(*v); 1389 v += bs; 1390 } 1391 } 1392 if (sum > *norm) *norm = sum; 1393 } 1394 } 1395 } else { 1396 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 1397 } 1398 PetscFunctionReturn(0); 1399 } 1400 1401 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatEqual_SeqBAIJ" 1404 PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg) 1405 { 1406 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data; 1407 PetscErrorCode ierr; 1408 1409 PetscFunctionBegin; 1410 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1411 if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) { 1412 *flg = PETSC_FALSE; 1413 PetscFunctionReturn(0); 1414 } 1415 1416 /* if the a->i are the same */ 1417 ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1418 if (!*flg) { 1419 PetscFunctionReturn(0); 1420 } 1421 1422 /* if a->j are the same */ 1423 ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1424 if (!*flg) { 1425 PetscFunctionReturn(0); 1426 } 1427 /* if a->a are the same */ 1428 ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(B->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ" 1435 PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1436 { 1437 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1438 PetscErrorCode ierr; 1439 PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1440 PetscScalar *x,zero = 0.0; 1441 MatScalar *aa,*aa_j; 1442 1443 PetscFunctionBegin; 1444 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1445 bs = A->rmap->bs; 1446 aa = a->a; 1447 ai = a->i; 1448 aj = a->j; 1449 ambs = a->mbs; 1450 bs2 = a->bs2; 1451 1452 ierr = VecSet(v,zero);CHKERRQ(ierr); 1453 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1454 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1455 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1456 for (i=0; i<ambs; i++) { 1457 for (j=ai[i]; j<ai[i+1]; j++) { 1458 if (aj[j] == i) { 1459 row = i*bs; 1460 aa_j = aa+j*bs2; 1461 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1462 break; 1463 } 1464 } 1465 } 1466 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1467 PetscFunctionReturn(0); 1468 } 1469 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ" 1472 PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1473 { 1474 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1475 PetscScalar *l,*r,x,*li,*ri; 1476 MatScalar *aa,*v; 1477 PetscErrorCode ierr; 1478 PetscInt i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1479 1480 PetscFunctionBegin; 1481 ai = a->i; 1482 aj = a->j; 1483 aa = a->a; 1484 m = A->rmap->n; 1485 n = A->cmap->n; 1486 bs = A->rmap->bs; 1487 mbs = a->mbs; 1488 bs2 = a->bs2; 1489 if (ll) { 1490 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 1491 ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1492 if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 1493 for (i=0; i<mbs; i++) { /* for each block row */ 1494 M = ai[i+1] - ai[i]; 1495 li = l + i*bs; 1496 v = aa + bs2*ai[i]; 1497 for (j=0; j<M; j++) { /* for each block */ 1498 for (k=0; k<bs2; k++) { 1499 (*v++) *= li[k%bs]; 1500 } 1501 } 1502 } 1503 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1504 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1505 } 1506 1507 if (rr) { 1508 ierr = VecGetArray(rr,&r);CHKERRQ(ierr); 1509 ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1510 if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 1511 for (i=0; i<mbs; i++) { /* for each block row */ 1512 M = ai[i+1] - ai[i]; 1513 v = aa + bs2*ai[i]; 1514 for (j=0; j<M; j++) { /* for each block */ 1515 ri = r + bs*aj[ai[i]+j]; 1516 for (k=0; k<bs; k++) { 1517 x = ri[k]; 1518 for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 1519 } 1520 } 1521 } 1522 ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 1523 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1524 } 1525 PetscFunctionReturn(0); 1526 } 1527 1528 1529 #undef __FUNCT__ 1530 #define __FUNCT__ "MatGetInfo_SeqBAIJ" 1531 PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1532 { 1533 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1534 1535 PetscFunctionBegin; 1536 info->block_size = a->bs2; 1537 info->nz_allocated = a->maxnz; 1538 info->nz_used = a->bs2*a->nz; 1539 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1540 info->assemblies = A->num_ass; 1541 info->mallocs = a->reallocs; 1542 info->memory = ((PetscObject)A)->mem; 1543 if (A->factor) { 1544 info->fill_ratio_given = A->info.fill_ratio_given; 1545 info->fill_ratio_needed = A->info.fill_ratio_needed; 1546 info->factor_mallocs = A->info.factor_mallocs; 1547 } else { 1548 info->fill_ratio_given = 0; 1549 info->fill_ratio_needed = 0; 1550 info->factor_mallocs = 0; 1551 } 1552 PetscFunctionReturn(0); 1553 } 1554 1555 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatZeroEntries_SeqBAIJ" 1558 PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A) 1559 { 1560 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1561 PetscErrorCode ierr; 1562 1563 PetscFunctionBegin; 1564 ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 1565 PetscFunctionReturn(0); 1566 } 1567