1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/seq/baij.h" 4 #include "src/inline/spops.h" 5 #include "src/inline/ilu.h" 6 #include "petscbt.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ" 10 PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov) 11 { 12 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 13 PetscErrorCode ierr; 14 PetscInt row,i,j,k,l,m,n,*idx,*nidx,isz,val,ival; 15 PetscInt start,end,*ai,*aj,bs,*nidx2; 16 PetscBT table; 17 18 PetscFunctionBegin; 19 m = a->mbs; 20 ai = a->i; 21 aj = a->j; 22 bs = A->bs; 23 24 if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); 25 26 ierr = PetscBTCreate(m,table);CHKERRQ(ierr); 27 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr); 28 ierr = PetscMalloc((A->m+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr); 29 30 for (i=0; i<is_max; i++) { 31 /* Initialise the two local arrays */ 32 isz = 0; 33 ierr = PetscBTMemzero(m,table);CHKERRQ(ierr); 34 35 /* Extract the indices, assume there can be duplicate entries */ 36 ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 37 ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr); 38 39 /* Enter these into the temp arrays i.e mark table[row], enter row into new index */ 40 for (j=0; j<n ; ++j){ 41 ival = idx[j]/bs; /* convert the indices into block indices */ 42 if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 43 if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;} 44 } 45 ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 46 ierr = ISDestroy(is[i]);CHKERRQ(ierr); 47 48 k = 0; 49 for (j=0; j<ov; j++){ /* for each overlap*/ 50 n = isz; 51 for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */ 52 row = nidx[k]; 53 start = ai[row]; 54 end = ai[row+1]; 55 for (l = start; l<end ; l++){ 56 val = aj[l]; 57 if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;} 58 } 59 } 60 } 61 /* expand the Index Set */ 62 for (j=0; j<isz; j++) { 63 for (k=0; k<bs; k++) 64 nidx2[j*bs+k] = nidx[j]*bs+k; 65 } 66 ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr); 67 } 68 ierr = PetscBTDestroy(table);CHKERRQ(ierr); 69 ierr = PetscFree(nidx);CHKERRQ(ierr); 70 ierr = PetscFree(nidx2);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private" 76 PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 77 { 78 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*c; 79 PetscErrorCode ierr; 80 PetscInt *smap,i,k,kstart,kend,oldcols = a->nbs,*lens; 81 PetscInt row,mat_i,*mat_j,tcol,*mat_ilen; 82 PetscInt *irow,*icol,nrows,ncols,*ssmap,bs=A->bs,bs2=a->bs2; 83 PetscInt *aj = a->j,*ai = a->i; 84 MatScalar *mat_a; 85 Mat C; 86 PetscTruth flag; 87 88 PetscFunctionBegin; 89 ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 90 if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 91 92 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 93 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 94 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 95 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 96 97 ierr = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr); 98 ssmap = smap; 99 ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 100 ierr = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr); 101 for (i=0; i<ncols; i++) smap[icol[i]] = i+1; 102 /* determine lens of each row */ 103 for (i=0; i<nrows; i++) { 104 kstart = ai[irow[i]]; 105 kend = kstart + a->ilen[irow[i]]; 106 lens[i] = 0; 107 for (k=kstart; k<kend; k++) { 108 if (ssmap[aj[k]]) { 109 lens[i]++; 110 } 111 } 112 } 113 /* Create and fill new matrix */ 114 if (scall == MAT_REUSE_MATRIX) { 115 c = (Mat_SeqBAIJ *)((*B)->data); 116 117 if (c->mbs!=nrows || c->nbs!=ncols || (*B)->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 118 ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); 119 if (!flag) { 120 SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 121 } 122 ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr); 123 C = *B; 124 } else { 125 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 126 ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 127 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 128 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,lens);CHKERRQ(ierr); 129 } 130 c = (Mat_SeqBAIJ *)(C->data); 131 for (i=0; i<nrows; i++) { 132 row = irow[i]; 133 kstart = ai[row]; 134 kend = kstart + a->ilen[row]; 135 mat_i = c->i[i]; 136 mat_j = c->j + mat_i; 137 mat_a = c->a + mat_i*bs2; 138 mat_ilen = c->ilen + i; 139 for (k=kstart; k<kend; k++) { 140 if ((tcol=ssmap[a->j[k]])) { 141 *mat_j++ = tcol - 1; 142 ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 143 mat_a += bs2; 144 (*mat_ilen)++; 145 } 146 } 147 } 148 149 /* Free work space */ 150 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 151 ierr = PetscFree(smap);CHKERRQ(ierr); 152 ierr = PetscFree(lens);CHKERRQ(ierr); 153 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 154 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 156 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 157 *B = C; 158 PetscFunctionReturn(0); 159 } 160 161 #undef __FUNCT__ 162 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ" 163 PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 164 { 165 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 166 IS is1,is2; 167 PetscErrorCode ierr; 168 PetscInt *vary,*iary,*irow,*icol,nrows,ncols,i,bs=A->bs,count; 169 170 PetscFunctionBegin; 171 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 172 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 173 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 174 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 175 176 /* Verify if the indices corespond to each element in a block 177 and form the IS with compressed IS */ 178 ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr); 179 iary = vary + a->mbs; 180 ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 181 for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 182 count = 0; 183 for (i=0; i<a->mbs; i++) { 184 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Index set does not match blocks"); 185 if (vary[i]==bs) iary[count++] = i; 186 } 187 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 188 189 ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 190 for (i=0; i<ncols; i++) vary[icol[i]/bs]++; 191 count = 0; 192 for (i=0; i<a->mbs; i++) { 193 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_PLIB,"Internal error in PETSc"); 194 if (vary[i]==bs) iary[count++] = i; 195 } 196 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr); 197 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 198 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 199 ierr = PetscFree(vary);CHKERRQ(ierr); 200 201 ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,cs,scall,B);CHKERRQ(ierr); 202 ISDestroy(is1); 203 ISDestroy(is2); 204 PetscFunctionReturn(0); 205 } 206 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ" 209 PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 210 { 211 PetscErrorCode ierr; 212 PetscInt i; 213 214 PetscFunctionBegin; 215 if (scall == MAT_INITIAL_MATRIX) { 216 ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 217 } 218 219 for (i=0; i<n; i++) { 220 ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 221 } 222 PetscFunctionReturn(0); 223 } 224 225 226 /* -------------------------------------------------------*/ 227 /* Should check that shapes of vectors and matrices match */ 228 /* -------------------------------------------------------*/ 229 #include "petscblaslapack.h" 230 231 #undef __FUNCT__ 232 #define __FUNCT__ "MatMult_SeqBAIJ_1" 233 PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 234 { 235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 236 PetscScalar *x,*z,sum; 237 MatScalar *v; 238 PetscErrorCode ierr; 239 PetscInt mbs,i,*idx,*ii,n,*ridx=PETSC_NULL; 240 PetscTruth usecprow=a->compressedrow.use; 241 242 PetscFunctionBegin; 243 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 244 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 245 246 idx = a->j; 247 v = a->a; 248 if (usecprow){ 249 mbs = a->compressedrow.nrows; 250 ii = a->compressedrow.i; 251 ridx = a->compressedrow.rindex; 252 } else { 253 mbs = a->mbs; 254 ii = a->i; 255 } 256 257 for (i=0; i<mbs; i++) { 258 n = ii[1] - ii[0]; ii++; 259 sum = 0.0; 260 while (n--) sum += *v++ * x[*idx++]; 261 if (usecprow){ 262 z[ridx[i]] = sum; 263 } else { 264 z[i] = sum; 265 } 266 } 267 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 268 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 269 ierr = PetscLogFlops(2*a->nz - mbs);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatMult_SeqBAIJ_2" 275 PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 276 { 277 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 278 PetscScalar *x,*z = 0,*xb,sum1,sum2,*zarray; 279 PetscScalar x1,x2; 280 MatScalar *v; 281 PetscErrorCode ierr; 282 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 283 PetscTruth usecprow=a->compressedrow.use; 284 285 PetscFunctionBegin; 286 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 287 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 288 289 idx = a->j; 290 v = a->a; 291 if (usecprow){ 292 mbs = a->compressedrow.nrows; 293 ii = a->compressedrow.i; 294 ridx = a->compressedrow.rindex; 295 } else { 296 mbs = a->mbs; 297 ii = a->i; 298 z = zarray; 299 } 300 301 for (i=0; i<mbs; i++) { 302 n = ii[1] - ii[0]; ii++; 303 sum1 = 0.0; sum2 = 0.0; 304 for (j=0; j<n; j++) { 305 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 306 sum1 += v[0]*x1 + v[2]*x2; 307 sum2 += v[1]*x1 + v[3]*x2; 308 v += 4; 309 } 310 if (usecprow) z = zarray + 2*ridx[i]; 311 z[0] = sum1; z[1] = sum2; 312 if (!usecprow) z += 2; 313 } 314 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 315 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 316 ierr = PetscLogFlops(8*a->nz - 2*mbs);CHKERRQ(ierr); 317 PetscFunctionReturn(0); 318 } 319 320 #undef __FUNCT__ 321 #define __FUNCT__ "MatMult_SeqBAIJ_3" 322 PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 323 { 324 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 325 PetscScalar *x,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*zarray; 326 MatScalar *v; 327 PetscErrorCode ierr; 328 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 329 PetscTruth usecprow=a->compressedrow.use; 330 331 332 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 333 #pragma disjoint(*v,*z,*xb) 334 #endif 335 336 PetscFunctionBegin; 337 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 338 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 339 340 idx = a->j; 341 v = a->a; 342 if (usecprow){ 343 mbs = a->compressedrow.nrows; 344 ii = a->compressedrow.i; 345 ridx = a->compressedrow.rindex; 346 } else { 347 mbs = a->mbs; 348 ii = a->i; 349 z = zarray; 350 } 351 352 for (i=0; i<mbs; i++) { 353 n = ii[1] - ii[0]; ii++; 354 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 355 for (j=0; j<n; j++) { 356 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 357 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 358 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 359 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 360 v += 9; 361 } 362 if (usecprow) z = zarray + 3*ridx[i]; 363 z[0] = sum1; z[1] = sum2; z[2] = sum3; 364 if (!usecprow) z += 3; 365 } 366 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 367 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 368 ierr = PetscLogFlops(18*a->nz - 3*mbs);CHKERRQ(ierr); 369 PetscFunctionReturn(0); 370 } 371 372 #undef __FUNCT__ 373 #define __FUNCT__ "MatMult_SeqBAIJ_4" 374 PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 375 { 376 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 377 PetscScalar *x,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray; 378 MatScalar *v; 379 PetscErrorCode ierr; 380 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 381 PetscTruth usecprow=a->compressedrow.use; 382 383 PetscFunctionBegin; 384 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 385 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 386 387 idx = a->j; 388 v = a->a; 389 if (usecprow){ 390 mbs = a->compressedrow.nrows; 391 ii = a->compressedrow.i; 392 ridx = a->compressedrow.rindex; 393 } else { 394 mbs = a->mbs; 395 ii = a->i; 396 z = zarray; 397 } 398 399 for (i=0; i<mbs; i++) { 400 n = ii[1] - ii[0]; ii++; 401 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 402 for (j=0; j<n; j++) { 403 xb = x + 4*(*idx++); 404 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 405 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 406 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 407 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 408 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 409 v += 16; 410 } 411 if (usecprow) z = zarray + 4*ridx[i]; 412 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 413 if (!usecprow) z += 4; 414 } 415 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 416 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 417 ierr = PetscLogFlops(32*a->nz - 4*mbs);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatMult_SeqBAIJ_5" 423 PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 424 { 425 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 426 PetscScalar sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*xb,*z = 0,*x,*zarray; 427 MatScalar *v; 428 PetscErrorCode ierr; 429 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 430 PetscTruth usecprow=a->compressedrow.use; 431 432 PetscFunctionBegin; 433 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 434 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 435 436 idx = a->j; 437 v = a->a; 438 if (usecprow){ 439 mbs = a->compressedrow.nrows; 440 ii = a->compressedrow.i; 441 ridx = a->compressedrow.rindex; 442 } else { 443 mbs = a->mbs; 444 ii = a->i; 445 z = zarray; 446 } 447 448 for (i=0; i<mbs; i++) { 449 n = ii[1] - ii[0]; ii++; 450 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 451 for (j=0; j<n; j++) { 452 xb = x + 5*(*idx++); 453 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 454 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 455 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 456 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 457 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 458 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 459 v += 25; 460 } 461 if (usecprow) z = zarray + 5*ridx[i]; 462 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 463 if (!usecprow) z += 5; 464 } 465 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 466 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 467 ierr = PetscLogFlops(50*a->nz - 5*mbs);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatMult_SeqBAIJ_6" 474 PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz) 475 { 476 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 477 PetscScalar *x,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6; 478 PetscScalar x1,x2,x3,x4,x5,x6,*zarray; 479 MatScalar *v; 480 PetscErrorCode ierr; 481 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 482 PetscTruth usecprow=a->compressedrow.use; 483 484 PetscFunctionBegin; 485 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 486 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 487 488 idx = a->j; 489 v = a->a; 490 if (usecprow){ 491 mbs = a->compressedrow.nrows; 492 ii = a->compressedrow.i; 493 ridx = a->compressedrow.rindex; 494 } else { 495 mbs = a->mbs; 496 ii = a->i; 497 z = zarray; 498 } 499 500 for (i=0; i<mbs; i++) { 501 n = ii[1] - ii[0]; ii++; 502 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; 503 for (j=0; j<n; j++) { 504 xb = x + 6*(*idx++); 505 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 506 sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 507 sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 508 sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 509 sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 510 sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 511 sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 512 v += 36; 513 } 514 if (usecprow) z = zarray + 6*ridx[i]; 515 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; 516 if (!usecprow) z += 6; 517 } 518 519 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 520 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 521 ierr = PetscLogFlops(72*a->nz - 6*mbs);CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 #undef __FUNCT__ 525 #define __FUNCT__ "MatMult_SeqBAIJ_7" 526 PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 527 { 528 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 529 PetscScalar *x,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 530 PetscScalar x1,x2,x3,x4,x5,x6,x7,*zarray; 531 MatScalar *v; 532 PetscErrorCode ierr; 533 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 534 PetscTruth usecprow=a->compressedrow.use; 535 536 PetscFunctionBegin; 537 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 538 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 539 540 idx = a->j; 541 v = a->a; 542 if (usecprow){ 543 mbs = a->compressedrow.nrows; 544 ii = a->compressedrow.i; 545 ridx = a->compressedrow.rindex; 546 } else { 547 mbs = a->mbs; 548 ii = a->i; 549 z = zarray; 550 } 551 552 for (i=0; i<mbs; i++) { 553 n = ii[1] - ii[0]; ii++; 554 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 555 for (j=0; j<n; j++) { 556 xb = x + 7*(*idx++); 557 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 558 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 559 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 560 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 561 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 562 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 563 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 564 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 565 v += 49; 566 } 567 if (usecprow) z = zarray + 7*ridx[i]; 568 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 569 if (!usecprow) z += 7; 570 } 571 572 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 573 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 574 ierr = PetscLogFlops(98*a->nz - 7*mbs);CHKERRQ(ierr); 575 PetscFunctionReturn(0); 576 } 577 578 /* 579 This will not work with MatScalar == float because it calls the BLAS 580 */ 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatMult_SeqBAIJ_N" 583 PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 584 { 585 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 586 PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray; 587 MatScalar *v; 588 PetscErrorCode ierr; 589 PetscInt mbs=a->mbs,i,*idx,*ii,bs=A->bs,j,n,bs2=a->bs2; 590 PetscInt ncols,k,*ridx=PETSC_NULL; 591 PetscTruth usecprow=a->compressedrow.use; 592 593 PetscFunctionBegin; 594 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 595 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 596 597 idx = a->j; 598 v = a->a; 599 if (usecprow){ 600 mbs = a->compressedrow.nrows; 601 ii = a->compressedrow.i; 602 ridx = a->compressedrow.rindex; 603 } else { 604 mbs = a->mbs; 605 ii = a->i; 606 z = zarray; 607 } 608 609 if (!a->mult_work) { 610 k = PetscMax(A->m,A->n); 611 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 612 } 613 work = a->mult_work; 614 for (i=0; i<mbs; i++) { 615 n = ii[1] - ii[0]; ii++; 616 ncols = n*bs; 617 workt = work; 618 for (j=0; j<n; j++) { 619 xb = x + bs*(*idx++); 620 for (k=0; k<bs; k++) workt[k] = xb[k]; 621 workt += bs; 622 } 623 if (usecprow) z = zarray + bs*ridx[i]; 624 Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z); 625 /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */ 626 v += n*bs2; 627 if (!usecprow) z += bs; 628 } 629 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 630 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 631 ierr = PetscLogFlops(2*a->nz*bs2 - bs*mbs);CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634 635 #undef __FUNCT__ 636 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1" 637 PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 638 { 639 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 640 PetscScalar *x,*y = 0,*z = 0,sum,*yarray,*zarray; 641 MatScalar *v; 642 PetscErrorCode ierr; 643 PetscInt mbs=a->mbs,i,*idx,*ii,n,*ridx=PETSC_NULL; 644 PetscTruth usecprow=a->compressedrow.use; 645 646 PetscFunctionBegin; 647 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 648 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 649 if (zz != yy) { 650 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 651 } else { 652 zarray = yarray; 653 } 654 655 idx = a->j; 656 v = a->a; 657 if (usecprow){ 658 if (zz != yy){ 659 ierr = PetscMemcpy(zarray,yarray,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 660 } 661 mbs = a->compressedrow.nrows; 662 ii = a->compressedrow.i; 663 ridx = a->compressedrow.rindex; 664 } else { 665 ii = a->i; 666 y = yarray; 667 z = zarray; 668 } 669 670 for (i=0; i<mbs; i++) { 671 n = ii[1] - ii[0]; ii++; 672 if (usecprow){ 673 z = zarray + ridx[i]; 674 y = yarray + ridx[i]; 675 } 676 sum = y[0]; 677 while (n--) sum += *v++ * x[*idx++]; 678 z[0] = sum; 679 if (!usecprow){ 680 z++; y++; 681 } 682 } 683 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 684 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 685 if (zz != yy) { 686 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 687 } 688 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 689 PetscFunctionReturn(0); 690 } 691 692 #undef __FUNCT__ 693 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2" 694 PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 695 { 696 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 697 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2; 698 PetscScalar x1,x2,*yarray,*zarray; 699 MatScalar *v; 700 PetscErrorCode ierr; 701 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 702 PetscTruth usecprow=a->compressedrow.use; 703 704 PetscFunctionBegin; 705 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 706 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 707 if (zz != yy) { 708 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 709 } else { 710 zarray = yarray; 711 } 712 713 idx = a->j; 714 v = a->a; 715 if (usecprow){ 716 if (zz != yy){ 717 ierr = PetscMemcpy(zarray,yarray,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 718 } 719 mbs = a->compressedrow.nrows; 720 ii = a->compressedrow.i; 721 ridx = a->compressedrow.rindex; 722 if (zz != yy){ 723 ierr = PetscMemcpy(zarray,yarray,a->mbs*sizeof(PetscScalar));CHKERRQ(ierr); 724 } 725 } else { 726 ii = a->i; 727 y = yarray; 728 z = zarray; 729 } 730 731 for (i=0; i<mbs; i++) { 732 n = ii[1] - ii[0]; ii++; 733 if (usecprow){ 734 z = zarray + 2*ridx[i]; 735 y = yarray + 2*ridx[i]; 736 } 737 sum1 = y[0]; sum2 = y[1]; 738 for (j=0; j<n; j++) { 739 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 740 sum1 += v[0]*x1 + v[2]*x2; 741 sum2 += v[1]*x1 + v[3]*x2; 742 v += 4; 743 } 744 z[0] = sum1; z[1] = sum2; 745 if (!usecprow){ 746 z += 2; y += 2; 747 } 748 } 749 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 750 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 751 if (zz != yy) { 752 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 753 } 754 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3" 760 PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 761 { 762 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 763 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray; 764 MatScalar *v; 765 PetscErrorCode ierr; 766 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 767 PetscTruth usecprow=a->compressedrow.use; 768 769 PetscFunctionBegin; 770 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 771 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 772 if (zz != yy) { 773 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 774 } else { 775 zarray = yarray; 776 } 777 778 idx = a->j; 779 v = a->a; 780 if (usecprow){ 781 if (zz != yy){ 782 ierr = PetscMemcpy(zarray,yarray,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 783 } 784 mbs = a->compressedrow.nrows; 785 ii = a->compressedrow.i; 786 ridx = a->compressedrow.rindex; 787 } else { 788 ii = a->i; 789 y = yarray; 790 z = zarray; 791 } 792 793 for (i=0; i<mbs; i++) { 794 n = ii[1] - ii[0]; ii++; 795 if (usecprow){ 796 z = zarray + 3*ridx[i]; 797 y = yarray + 3*ridx[i]; 798 } 799 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 800 for (j=0; j<n; j++) { 801 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 802 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 803 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 804 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 805 v += 9; 806 } 807 z[0] = sum1; z[1] = sum2; z[2] = sum3; 808 if (!usecprow){ 809 z += 3; y += 3; 810 } 811 } 812 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 813 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 814 if (zz != yy) { 815 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 816 } 817 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNCT__ 822 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4" 823 PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 824 { 825 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 826 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray; 827 MatScalar *v; 828 PetscErrorCode ierr; 829 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 830 PetscTruth usecprow=a->compressedrow.use; 831 832 PetscFunctionBegin; 833 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 834 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 835 if (zz != yy) { 836 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 837 } else { 838 zarray = yarray; 839 } 840 841 idx = a->j; 842 v = a->a; 843 if (usecprow){ 844 if (zz != yy){ 845 ierr = PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 846 } 847 mbs = a->compressedrow.nrows; 848 ii = a->compressedrow.i; 849 ridx = a->compressedrow.rindex; 850 } else { 851 ii = a->i; 852 y = yarray; 853 z = zarray; 854 } 855 856 for (i=0; i<mbs; i++) { 857 n = ii[1] - ii[0]; ii++; 858 if (usecprow){ 859 z = zarray + 4*ridx[i]; 860 y = yarray + 4*ridx[i]; 861 } 862 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 863 for (j=0; j<n; j++) { 864 xb = x + 4*(*idx++); 865 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 866 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 867 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 868 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 869 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 870 v += 16; 871 } 872 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 873 if (!usecprow){ 874 z += 4; y += 4; 875 } 876 } 877 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 878 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 879 if (zz != yy) { 880 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 881 } 882 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5" 888 PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 889 { 890 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 891 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5; 892 PetscScalar *yarray,*zarray; 893 MatScalar *v; 894 PetscErrorCode ierr; 895 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 896 PetscTruth usecprow=a->compressedrow.use; 897 898 PetscFunctionBegin; 899 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 900 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 901 if (zz != yy) { 902 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 903 } else { 904 zarray = yarray; 905 } 906 907 idx = a->j; 908 v = a->a; 909 if (usecprow){ 910 if (zz != yy){ 911 ierr = PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 912 } 913 mbs = a->compressedrow.nrows; 914 ii = a->compressedrow.i; 915 ridx = a->compressedrow.rindex; 916 } else { 917 ii = a->i; 918 y = yarray; 919 z = zarray; 920 } 921 922 for (i=0; i<mbs; i++) { 923 n = ii[1] - ii[0]; ii++; 924 if (usecprow){ 925 z = zarray + 5*ridx[i]; 926 y = yarray + 5*ridx[i]; 927 } 928 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 929 for (j=0; j<n; j++) { 930 xb = x + 5*(*idx++); 931 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 932 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 933 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 934 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 935 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 936 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 937 v += 25; 938 } 939 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 940 if (!usecprow){ 941 z += 5; y += 5; 942 } 943 } 944 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 945 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 946 if (zz != yy) { 947 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 948 } 949 ierr = PetscLogFlops(50*a->nz);CHKERRQ(ierr); 950 PetscFunctionReturn(0); 951 } 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6" 954 PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 955 { 956 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 957 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6; 958 PetscScalar x1,x2,x3,x4,x5,x6,*yarray,*zarray; 959 MatScalar *v; 960 PetscErrorCode ierr; 961 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 962 PetscTruth usecprow=a->compressedrow.use; 963 964 PetscFunctionBegin; 965 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 966 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 967 if (zz != yy) { 968 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 969 } else { 970 zarray = yarray; 971 } 972 973 idx = a->j; 974 v = a->a; 975 if (usecprow){ 976 if (zz != yy){ 977 ierr = PetscMemcpy(zarray,yarray,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 978 } 979 mbs = a->compressedrow.nrows; 980 ii = a->compressedrow.i; 981 ridx = a->compressedrow.rindex; 982 } else { 983 ii = a->i; 984 y = yarray; 985 z = zarray; 986 } 987 988 for (i=0; i<mbs; i++) { 989 n = ii[1] - ii[0]; ii++; 990 if (usecprow){ 991 z = zarray + 6*ridx[i]; 992 y = yarray + 6*ridx[i]; 993 } 994 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; 995 for (j=0; j<n; j++) { 996 xb = x + 6*(*idx++); 997 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 998 sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 999 sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 1000 sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 1001 sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 1002 sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 1003 sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 1004 v += 36; 1005 } 1006 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; 1007 if (!usecprow){ 1008 z += 6; y += 6; 1009 } 1010 } 1011 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1012 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 1013 if (zz != yy) { 1014 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1015 } 1016 ierr = PetscLogFlops(72*a->nz);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7" 1022 PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1023 { 1024 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1025 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1026 PetscScalar x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray; 1027 MatScalar *v; 1028 PetscErrorCode ierr; 1029 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 1030 PetscTruth usecprow=a->compressedrow.use; 1031 1032 PetscFunctionBegin; 1033 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1034 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 1035 if (zz != yy) { 1036 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 1037 } else { 1038 zarray = yarray; 1039 } 1040 1041 idx = a->j; 1042 v = a->a; 1043 if (usecprow){ 1044 if (zz != yy){ 1045 ierr = PetscMemcpy(zarray,yarray,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1046 } 1047 mbs = a->compressedrow.nrows; 1048 ii = a->compressedrow.i; 1049 ridx = a->compressedrow.rindex; 1050 } else { 1051 ii = a->i; 1052 y = yarray; 1053 z = zarray; 1054 } 1055 1056 for (i=0; i<mbs; i++) { 1057 n = ii[1] - ii[0]; ii++; 1058 if (usecprow){ 1059 z = zarray + 7*ridx[i]; 1060 y = yarray + 7*ridx[i]; 1061 } 1062 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1063 for (j=0; j<n; j++) { 1064 xb = x + 7*(*idx++); 1065 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1066 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1067 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1068 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1069 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1070 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1071 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1072 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1073 v += 49; 1074 } 1075 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1076 if (!usecprow){ 1077 z += 7; y += 7; 1078 } 1079 } 1080 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1081 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 1082 if (zz != yy) { 1083 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1084 } 1085 ierr = PetscLogFlops(98*a->nz);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N" 1091 PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1092 { 1093 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1094 PetscScalar *x,*z = 0,*xb,*work,*workt,*y,*zarray; 1095 MatScalar *v; 1096 PetscErrorCode ierr; 1097 PetscInt mbs,i,*idx,*ii,bs=A->bs,j,n,bs2=a->bs2; 1098 PetscInt ncols,k,*ridx=PETSC_NULL; 1099 PetscTruth usecprow=a->compressedrow.use; 1100 1101 PetscFunctionBegin; 1102 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1103 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 1104 if (zz != yy) { 1105 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1106 ierr = PetscMemcpy(zarray,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr); 1107 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1108 } 1109 1110 idx = a->j; 1111 v = a->a; 1112 if (usecprow){ 1113 mbs = a->compressedrow.nrows; 1114 ii = a->compressedrow.i; 1115 ridx = a->compressedrow.rindex; 1116 } else { 1117 mbs = a->mbs; 1118 ii = a->i; 1119 z = zarray; 1120 } 1121 1122 if (!a->mult_work) { 1123 k = PetscMax(A->m,A->n); 1124 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1125 } 1126 work = a->mult_work; 1127 for (i=0; i<mbs; i++) { 1128 n = ii[1] - ii[0]; ii++; 1129 ncols = n*bs; 1130 workt = work; 1131 for (j=0; j<n; j++) { 1132 xb = x + bs*(*idx++); 1133 for (k=0; k<bs; k++) workt[k] = xb[k]; 1134 workt += bs; 1135 } 1136 if (usecprow) z = zarray + bs*ridx[i]; 1137 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 1138 /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */ 1139 v += n*bs2; 1140 if (!usecprow){ 1141 z += bs; 1142 } 1143 } 1144 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1145 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1146 ierr = PetscLogFlops(2*a->nz*bs2);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatMultTranspose_SeqBAIJ" 1152 PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz) 1153 { 1154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1155 PetscScalar zero = 0.0; 1156 PetscErrorCode ierr; 1157 1158 PetscFunctionBegin; 1159 ierr = VecSet(zz,zero);CHKERRQ(ierr); 1160 ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr); 1161 1162 ierr = PetscLogFlops(2*a->nz*a->bs2 - A->n);CHKERRQ(ierr); 1163 PetscFunctionReturn(0); 1164 } 1165 1166 #undef __FUNCT__ 1167 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ" 1168 PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1169 1170 { 1171 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1172 PetscScalar *zb,*x,*z,*xb = 0,x1,x2,x3,x4,x5; 1173 MatScalar *v; 1174 PetscErrorCode ierr; 1175 PetscInt mbs,i,*idx,*ii,rval,bs=A->bs,j,n,bs2=a->bs2,*ib,*ridx=PETSC_NULL; 1176 Mat_CompressedRow cprow = a->compressedrow; 1177 PetscTruth usecprow=cprow.use; 1178 1179 PetscFunctionBegin; 1180 if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr); } 1181 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1182 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 1183 1184 idx = a->j; 1185 v = a->a; 1186 if (usecprow){ 1187 mbs = cprow.nrows; 1188 ii = cprow.i; 1189 ridx = cprow.rindex; 1190 } else { 1191 mbs=a->mbs; 1192 ii = a->i; 1193 xb = x; 1194 } 1195 1196 switch (bs) { 1197 case 1: 1198 for (i=0; i<mbs; i++) { 1199 if (usecprow) xb = x + ridx[i]; 1200 x1 = xb[0]; 1201 ib = idx + ii[0]; 1202 n = ii[1] - ii[0]; ii++; 1203 for (j=0; j<n; j++) { 1204 rval = ib[j]; 1205 z[rval] += *v * x1; 1206 v++; 1207 } 1208 if (!usecprow) xb++; 1209 } 1210 break; 1211 case 2: 1212 for (i=0; i<mbs; i++) { 1213 if (usecprow) xb = x + 2*ridx[i]; 1214 x1 = xb[0]; x2 = xb[1]; 1215 ib = idx + ii[0]; 1216 n = ii[1] - ii[0]; ii++; 1217 for (j=0; j<n; j++) { 1218 rval = ib[j]*2; 1219 z[rval++] += v[0]*x1 + v[1]*x2; 1220 z[rval++] += v[2]*x1 + v[3]*x2; 1221 v += 4; 1222 } 1223 if (!usecprow) xb += 2; 1224 } 1225 break; 1226 case 3: 1227 for (i=0; i<mbs; i++) { 1228 if (usecprow) xb = x + 3*ridx[i]; 1229 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1230 ib = idx + ii[0]; 1231 n = ii[1] - ii[0]; ii++; 1232 for (j=0; j<n; j++) { 1233 rval = ib[j]*3; 1234 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1235 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1236 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1237 v += 9; 1238 } 1239 if (!usecprow) xb += 3; 1240 } 1241 break; 1242 case 4: 1243 for (i=0; i<mbs; i++) { 1244 if (usecprow) xb = x + 4*ridx[i]; 1245 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1246 ib = idx + ii[0]; 1247 n = ii[1] - ii[0]; ii++; 1248 for (j=0; j<n; j++) { 1249 rval = ib[j]*4; 1250 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1251 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1252 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1253 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1254 v += 16; 1255 } 1256 if (!usecprow) xb += 4; 1257 } 1258 break; 1259 case 5: 1260 for (i=0; i<mbs; i++) { 1261 if (usecprow) xb = x + 5*ridx[i]; 1262 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1263 x4 = xb[3]; x5 = xb[4]; 1264 ib = idx + ii[0]; 1265 n = ii[1] - ii[0]; ii++; 1266 for (j=0; j<n; j++) { 1267 rval = ib[j]*5; 1268 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1269 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1270 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1271 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1272 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1273 v += 25; 1274 } 1275 if (!usecprow) xb += 5; 1276 } 1277 break; 1278 default: { /* block sizes larger then 5 by 5 are handled by BLAS */ 1279 PetscInt ncols,k; 1280 PetscScalar *work,*workt,*xtmp; 1281 1282 if (!a->mult_work) { 1283 k = PetscMax(A->m,A->n); 1284 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1285 } 1286 work = a->mult_work; 1287 xtmp = x; 1288 for (i=0; i<mbs; i++) { 1289 n = ii[1] - ii[0]; ii++; 1290 ncols = n*bs; 1291 ierr = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 1292 if (usecprow) { 1293 xtmp = x + bs*ridx[i]; 1294 } 1295 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work); 1296 /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */ 1297 v += n*bs2; 1298 if (!usecprow) xtmp += bs; 1299 workt = work; 1300 for (j=0; j<n; j++) { 1301 zb = z + bs*(*idx++); 1302 for (k=0; k<bs; k++) zb[k] += workt[k] ; 1303 workt += bs; 1304 } 1305 } 1306 } 1307 } 1308 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1309 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1310 ierr = PetscLogFlops(2*a->nz*a->bs2);CHKERRQ(ierr); 1311 PetscFunctionReturn(0); 1312 } 1313 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "MatScale_SeqBAIJ" 1316 PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha) 1317 { 1318 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1319 PetscInt totalnz = a->bs2*a->nz; 1320 PetscScalar oalpha = alpha; 1321 PetscErrorCode ierr; 1322 #if defined(PETSC_USE_MAT_SINGLE) 1323 PetscInt i; 1324 #else 1325 PetscBLASInt tnz = (PetscBLASInt) totalnz,one = 1; 1326 #endif 1327 1328 PetscFunctionBegin; 1329 #if defined(PETSC_USE_MAT_SINGLE) 1330 for (i=0; i<totalnz; i++) a->a[i] *= oalpha; 1331 #else 1332 BLASscal_(&tnz,&oalpha,a->a,&one); 1333 #endif 1334 ierr = PetscLogFlops(totalnz);CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatNorm_SeqBAIJ" 1340 PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm) 1341 { 1342 PetscErrorCode ierr; 1343 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1344 MatScalar *v = a->a; 1345 PetscReal sum = 0.0; 1346 PetscInt i,j,k,bs=A->bs,nz=a->nz,bs2=a->bs2,k1; 1347 1348 PetscFunctionBegin; 1349 if (type == NORM_FROBENIUS) { 1350 for (i=0; i< bs2*nz; i++) { 1351 #if defined(PETSC_USE_COMPLEX) 1352 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1353 #else 1354 sum += (*v)*(*v); v++; 1355 #endif 1356 } 1357 *norm = sqrt(sum); 1358 } else if (type == NORM_1) { /* maximum column sum */ 1359 PetscReal *tmp; 1360 PetscInt *bcol = a->j; 1361 ierr = PetscMalloc((A->n+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1362 ierr = PetscMemzero(tmp,A->n*sizeof(PetscReal));CHKERRQ(ierr); 1363 for (i=0; i<nz; i++){ 1364 for (j=0; j<bs; j++){ 1365 k1 = bs*(*bcol) + j; /* column index */ 1366 for (k=0; k<bs; k++){ 1367 tmp[k1] += PetscAbsScalar(*v); v++; 1368 } 1369 } 1370 bcol++; 1371 } 1372 *norm = 0.0; 1373 for (j=0; j<A->n; j++) { 1374 if (tmp[j] > *norm) *norm = tmp[j]; 1375 } 1376 ierr = PetscFree(tmp);CHKERRQ(ierr); 1377 } else if (type == NORM_INFINITY) { /* maximum row sum */ 1378 *norm = 0.0; 1379 for (k=0; k<bs; k++) { 1380 for (j=0; j<a->mbs; j++) { 1381 v = a->a + bs2*a->i[j] + k; 1382 sum = 0.0; 1383 for (i=0; i<a->i[j+1]-a->i[j]; i++) { 1384 for (k1=0; k1<bs; k1++){ 1385 sum += PetscAbsScalar(*v); 1386 v += bs; 1387 } 1388 } 1389 if (sum > *norm) *norm = sum; 1390 } 1391 } 1392 } else { 1393 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 1394 } 1395 PetscFunctionReturn(0); 1396 } 1397 1398 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "MatEqual_SeqBAIJ" 1401 PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg) 1402 { 1403 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data; 1404 PetscErrorCode ierr; 1405 1406 PetscFunctionBegin; 1407 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1408 if ((A->m != B->m) || (A->n != B->n) || (A->bs != B->bs)|| (a->nz != b->nz)) { 1409 *flg = PETSC_FALSE; 1410 PetscFunctionReturn(0); 1411 } 1412 1413 /* if the a->i are the same */ 1414 ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1415 if (!*flg) { 1416 PetscFunctionReturn(0); 1417 } 1418 1419 /* if a->j are the same */ 1420 ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1421 if (!*flg) { 1422 PetscFunctionReturn(0); 1423 } 1424 /* if a->a are the same */ 1425 ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->bs)*(B->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 1428 } 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ" 1432 PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1433 { 1434 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1435 PetscErrorCode ierr; 1436 PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1437 PetscScalar *x,zero = 0.0; 1438 MatScalar *aa,*aa_j; 1439 1440 PetscFunctionBegin; 1441 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1442 bs = A->bs; 1443 aa = a->a; 1444 ai = a->i; 1445 aj = a->j; 1446 ambs = a->mbs; 1447 bs2 = a->bs2; 1448 1449 ierr = VecSet(v,zero);CHKERRQ(ierr); 1450 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1451 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1452 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1453 for (i=0; i<ambs; i++) { 1454 for (j=ai[i]; j<ai[i+1]; j++) { 1455 if (aj[j] == i) { 1456 row = i*bs; 1457 aa_j = aa+j*bs2; 1458 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1459 break; 1460 } 1461 } 1462 } 1463 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ" 1469 PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1470 { 1471 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1472 PetscScalar *l,*r,x,*li,*ri; 1473 MatScalar *aa,*v; 1474 PetscErrorCode ierr; 1475 PetscInt i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1476 1477 PetscFunctionBegin; 1478 ai = a->i; 1479 aj = a->j; 1480 aa = a->a; 1481 m = A->m; 1482 n = A->n; 1483 bs = A->bs; 1484 mbs = a->mbs; 1485 bs2 = a->bs2; 1486 if (ll) { 1487 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 1488 ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1489 if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 1490 for (i=0; i<mbs; i++) { /* for each block row */ 1491 M = ai[i+1] - ai[i]; 1492 li = l + i*bs; 1493 v = aa + bs2*ai[i]; 1494 for (j=0; j<M; j++) { /* for each block */ 1495 for (k=0; k<bs2; k++) { 1496 (*v++) *= li[k%bs]; 1497 } 1498 } 1499 } 1500 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1501 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1502 } 1503 1504 if (rr) { 1505 ierr = VecGetArray(rr,&r);CHKERRQ(ierr); 1506 ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1507 if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 1508 for (i=0; i<mbs; i++) { /* for each block row */ 1509 M = ai[i+1] - ai[i]; 1510 v = aa + bs2*ai[i]; 1511 for (j=0; j<M; j++) { /* for each block */ 1512 ri = r + bs*aj[ai[i]+j]; 1513 for (k=0; k<bs; k++) { 1514 x = ri[k]; 1515 for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 1516 } 1517 } 1518 } 1519 ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 1520 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1521 } 1522 PetscFunctionReturn(0); 1523 } 1524 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatGetInfo_SeqBAIJ" 1528 PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1529 { 1530 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1531 1532 PetscFunctionBegin; 1533 info->rows_global = (double)A->m; 1534 info->columns_global = (double)A->n; 1535 info->rows_local = (double)A->m; 1536 info->columns_local = (double)A->n; 1537 info->block_size = a->bs2; 1538 info->nz_allocated = a->maxnz; 1539 info->nz_used = a->bs2*a->nz; 1540 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1541 info->assemblies = A->num_ass; 1542 info->mallocs = a->reallocs; 1543 info->memory = A->mem; 1544 if (A->factor) { 1545 info->fill_ratio_given = A->info.fill_ratio_given; 1546 info->fill_ratio_needed = A->info.fill_ratio_needed; 1547 info->factor_mallocs = A->info.factor_mallocs; 1548 } else { 1549 info->fill_ratio_given = 0; 1550 info->fill_ratio_needed = 0; 1551 info->factor_mallocs = 0; 1552 } 1553 PetscFunctionReturn(0); 1554 } 1555 1556 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "MatZeroEntries_SeqBAIJ" 1559 PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A) 1560 { 1561 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1562 PetscErrorCode ierr; 1563 1564 PetscFunctionBegin; 1565 ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 1566 PetscFunctionReturn(0); 1567 } 1568