1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/seq/baij.h" 4 #include "src/inline/spops.h" 5 #include "src/inline/ilu.h" 6 #include "petscbt.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ" 10 PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov) 11 { 12 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 13 PetscErrorCode ierr; 14 PetscInt row,i,j,k,l,m,n,*idx,*nidx,isz,val,ival; 15 PetscInt start,end,*ai,*aj,bs,*nidx2; 16 PetscBT table; 17 18 PetscFunctionBegin; 19 m = a->mbs; 20 ai = a->i; 21 aj = a->j; 22 bs = A->rmap.bs; 23 24 if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); 25 26 ierr = PetscBTCreate(m,table);CHKERRQ(ierr); 27 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr); 28 ierr = PetscMalloc((A->rmap.N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr); 29 30 for (i=0; i<is_max; i++) { 31 /* Initialise the two local arrays */ 32 isz = 0; 33 ierr = PetscBTMemzero(m,table);CHKERRQ(ierr); 34 35 /* Extract the indices, assume there can be duplicate entries */ 36 ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 37 ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr); 38 39 /* Enter these into the temp arrays i.e mark table[row], enter row into new index */ 40 for (j=0; j<n ; ++j){ 41 ival = idx[j]/bs; /* convert the indices into block indices */ 42 if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 43 if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;} 44 } 45 ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 46 ierr = ISDestroy(is[i]);CHKERRQ(ierr); 47 48 k = 0; 49 for (j=0; j<ov; j++){ /* for each overlap*/ 50 n = isz; 51 for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */ 52 row = nidx[k]; 53 start = ai[row]; 54 end = ai[row+1]; 55 for (l = start; l<end ; l++){ 56 val = aj[l]; 57 if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;} 58 } 59 } 60 } 61 /* expand the Index Set */ 62 for (j=0; j<isz; j++) { 63 for (k=0; k<bs; k++) 64 nidx2[j*bs+k] = nidx[j]*bs+k; 65 } 66 ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr); 67 } 68 ierr = PetscBTDestroy(table);CHKERRQ(ierr); 69 ierr = PetscFree(nidx);CHKERRQ(ierr); 70 ierr = PetscFree(nidx2);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private" 76 PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 77 { 78 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*c; 79 PetscErrorCode ierr; 80 PetscInt *smap,i,k,kstart,kend,oldcols = a->nbs,*lens; 81 PetscInt row,mat_i,*mat_j,tcol,*mat_ilen; 82 PetscInt *irow,*icol,nrows,ncols,*ssmap,bs=A->rmap.bs,bs2=a->bs2; 83 PetscInt *aj = a->j,*ai = a->i; 84 MatScalar *mat_a; 85 Mat C; 86 PetscTruth flag; 87 88 PetscFunctionBegin; 89 ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 90 if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 91 92 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 93 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 94 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 95 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 96 97 ierr = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr); 98 ssmap = smap; 99 ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 100 ierr = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr); 101 for (i=0; i<ncols; i++) smap[icol[i]] = i+1; 102 /* determine lens of each row */ 103 for (i=0; i<nrows; i++) { 104 kstart = ai[irow[i]]; 105 kend = kstart + a->ilen[irow[i]]; 106 lens[i] = 0; 107 for (k=kstart; k<kend; k++) { 108 if (ssmap[aj[k]]) { 109 lens[i]++; 110 } 111 } 112 } 113 /* Create and fill new matrix */ 114 if (scall == MAT_REUSE_MATRIX) { 115 c = (Mat_SeqBAIJ *)((*B)->data); 116 117 if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap.bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 118 ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); 119 if (!flag) { 120 SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 121 } 122 ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr); 123 C = *B; 124 } else { 125 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 126 ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 127 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 128 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,lens);CHKERRQ(ierr); 129 } 130 c = (Mat_SeqBAIJ *)(C->data); 131 for (i=0; i<nrows; i++) { 132 row = irow[i]; 133 kstart = ai[row]; 134 kend = kstart + a->ilen[row]; 135 mat_i = c->i[i]; 136 mat_j = c->j + mat_i; 137 mat_a = c->a + mat_i*bs2; 138 mat_ilen = c->ilen + i; 139 for (k=kstart; k<kend; k++) { 140 if ((tcol=ssmap[a->j[k]])) { 141 *mat_j++ = tcol - 1; 142 ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 143 mat_a += bs2; 144 (*mat_ilen)++; 145 } 146 } 147 } 148 149 /* Free work space */ 150 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 151 ierr = PetscFree(smap);CHKERRQ(ierr); 152 ierr = PetscFree(lens);CHKERRQ(ierr); 153 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 154 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 156 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 157 *B = C; 158 PetscFunctionReturn(0); 159 } 160 161 #undef __FUNCT__ 162 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ" 163 PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 164 { 165 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 166 IS is1,is2; 167 PetscErrorCode ierr; 168 PetscInt *vary,*iary,*irow,*icol,nrows,ncols,i,bs=A->rmap.bs,count; 169 170 PetscFunctionBegin; 171 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 172 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 173 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 174 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 175 176 /* Verify if the indices corespond to each element in a block 177 and form the IS with compressed IS */ 178 ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr); 179 iary = vary + a->mbs; 180 ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 181 for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 182 count = 0; 183 for (i=0; i<a->mbs; i++) { 184 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Index set does not match blocks"); 185 if (vary[i]==bs) iary[count++] = i; 186 } 187 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 188 189 ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 190 for (i=0; i<ncols; i++) vary[icol[i]/bs]++; 191 count = 0; 192 for (i=0; i<a->mbs; i++) { 193 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_PLIB,"Internal error in PETSc"); 194 if (vary[i]==bs) iary[count++] = i; 195 } 196 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr); 197 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 198 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 199 ierr = PetscFree(vary);CHKERRQ(ierr); 200 201 ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,cs,scall,B);CHKERRQ(ierr); 202 ISDestroy(is1); 203 ISDestroy(is2); 204 PetscFunctionReturn(0); 205 } 206 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ" 209 PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 210 { 211 PetscErrorCode ierr; 212 PetscInt i; 213 214 PetscFunctionBegin; 215 if (scall == MAT_INITIAL_MATRIX) { 216 ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 217 } 218 219 for (i=0; i<n; i++) { 220 ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 221 } 222 PetscFunctionReturn(0); 223 } 224 225 226 /* -------------------------------------------------------*/ 227 /* Should check that shapes of vectors and matrices match */ 228 /* -------------------------------------------------------*/ 229 #include "petscblaslapack.h" 230 231 #undef __FUNCT__ 232 #define __FUNCT__ "MatMult_SeqBAIJ_1" 233 PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 234 { 235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 236 PetscScalar *x,*z,sum; 237 MatScalar *v; 238 PetscErrorCode ierr; 239 PetscInt mbs,i,*idx,*ii,n,*ridx=PETSC_NULL; 240 PetscTruth usecprow=a->compressedrow.use; 241 242 PetscFunctionBegin; 243 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 244 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 245 246 idx = a->j; 247 v = a->a; 248 if (usecprow){ 249 mbs = a->compressedrow.nrows; 250 ii = a->compressedrow.i; 251 ridx = a->compressedrow.rindex; 252 } else { 253 mbs = a->mbs; 254 ii = a->i; 255 } 256 257 for (i=0; i<mbs; i++) { 258 n = ii[1] - ii[0]; ii++; 259 sum = 0.0; 260 while (n--) sum += *v++ * x[*idx++]; 261 if (usecprow){ 262 z[ridx[i]] = sum; 263 } else { 264 z[i] = sum; 265 } 266 } 267 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 268 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 269 ierr = PetscLogFlops(2*a->nz - mbs);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatMult_SeqBAIJ_2" 275 PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 276 { 277 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 278 PetscScalar *x,*z = 0,*xb,sum1,sum2,*zarray; 279 PetscScalar x1,x2; 280 MatScalar *v; 281 PetscErrorCode ierr; 282 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 283 PetscTruth usecprow=a->compressedrow.use; 284 285 PetscFunctionBegin; 286 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 287 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 288 289 idx = a->j; 290 v = a->a; 291 if (usecprow){ 292 mbs = a->compressedrow.nrows; 293 ii = a->compressedrow.i; 294 ridx = a->compressedrow.rindex; 295 } else { 296 mbs = a->mbs; 297 ii = a->i; 298 z = zarray; 299 } 300 301 for (i=0; i<mbs; i++) { 302 n = ii[1] - ii[0]; ii++; 303 sum1 = 0.0; sum2 = 0.0; 304 for (j=0; j<n; j++) { 305 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 306 sum1 += v[0]*x1 + v[2]*x2; 307 sum2 += v[1]*x1 + v[3]*x2; 308 v += 4; 309 } 310 if (usecprow) z = zarray + 2*ridx[i]; 311 z[0] = sum1; z[1] = sum2; 312 if (!usecprow) z += 2; 313 } 314 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 315 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 316 ierr = PetscLogFlops(8*a->nz - 2*mbs);CHKERRQ(ierr); 317 PetscFunctionReturn(0); 318 } 319 320 #undef __FUNCT__ 321 #define __FUNCT__ "MatMult_SeqBAIJ_3" 322 PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 323 { 324 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 325 PetscScalar *x,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*zarray; 326 MatScalar *v; 327 PetscErrorCode ierr; 328 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 329 PetscTruth usecprow=a->compressedrow.use; 330 331 332 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 333 #pragma disjoint(*v,*z,*xb) 334 #endif 335 336 PetscFunctionBegin; 337 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 338 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 339 340 idx = a->j; 341 v = a->a; 342 if (usecprow){ 343 mbs = a->compressedrow.nrows; 344 ii = a->compressedrow.i; 345 ridx = a->compressedrow.rindex; 346 } else { 347 mbs = a->mbs; 348 ii = a->i; 349 z = zarray; 350 } 351 352 for (i=0; i<mbs; i++) { 353 n = ii[1] - ii[0]; ii++; 354 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 355 for (j=0; j<n; j++) { 356 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 357 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 358 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 359 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 360 v += 9; 361 } 362 if (usecprow) z = zarray + 3*ridx[i]; 363 z[0] = sum1; z[1] = sum2; z[2] = sum3; 364 if (!usecprow) z += 3; 365 } 366 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 367 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 368 ierr = PetscLogFlops(18*a->nz - 3*mbs);CHKERRQ(ierr); 369 PetscFunctionReturn(0); 370 } 371 372 #undef __FUNCT__ 373 #define __FUNCT__ "MatMult_SeqBAIJ_4" 374 PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 375 { 376 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 377 PetscScalar *x,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray; 378 MatScalar *v; 379 PetscErrorCode ierr; 380 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 381 PetscTruth usecprow=a->compressedrow.use; 382 383 PetscFunctionBegin; 384 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 385 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 386 387 idx = a->j; 388 v = a->a; 389 if (usecprow){ 390 mbs = a->compressedrow.nrows; 391 ii = a->compressedrow.i; 392 ridx = a->compressedrow.rindex; 393 } else { 394 mbs = a->mbs; 395 ii = a->i; 396 z = zarray; 397 } 398 399 for (i=0; i<mbs; i++) { 400 n = ii[1] - ii[0]; ii++; 401 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 402 for (j=0; j<n; j++) { 403 xb = x + 4*(*idx++); 404 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 405 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 406 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 407 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 408 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 409 v += 16; 410 } 411 if (usecprow) z = zarray + 4*ridx[i]; 412 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 413 if (!usecprow) z += 4; 414 } 415 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 416 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 417 ierr = PetscLogFlops(32*a->nz - 4*mbs);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatMult_SeqBAIJ_5" 423 PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 424 { 425 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 426 PetscScalar sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*xb,*z = 0,*x,*zarray; 427 MatScalar *v; 428 PetscErrorCode ierr; 429 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 430 PetscTruth usecprow=a->compressedrow.use; 431 432 PetscFunctionBegin; 433 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 434 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 435 436 idx = a->j; 437 v = a->a; 438 if (usecprow){ 439 mbs = a->compressedrow.nrows; 440 ii = a->compressedrow.i; 441 ridx = a->compressedrow.rindex; 442 } else { 443 mbs = a->mbs; 444 ii = a->i; 445 z = zarray; 446 } 447 448 for (i=0; i<mbs; i++) { 449 n = ii[1] - ii[0]; ii++; 450 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 451 for (j=0; j<n; j++) { 452 xb = x + 5*(*idx++); 453 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 454 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 455 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 456 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 457 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 458 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 459 v += 25; 460 } 461 if (usecprow) z = zarray + 5*ridx[i]; 462 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 463 if (!usecprow) z += 5; 464 } 465 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 466 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 467 ierr = PetscLogFlops(50*a->nz - 5*mbs);CHKERRQ(ierr); 468 PetscFunctionReturn(0); 469 } 470 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatMult_SeqBAIJ_6" 474 PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz) 475 { 476 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 477 PetscScalar *x,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6; 478 PetscScalar x1,x2,x3,x4,x5,x6,*zarray; 479 MatScalar *v; 480 PetscErrorCode ierr; 481 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 482 PetscTruth usecprow=a->compressedrow.use; 483 484 PetscFunctionBegin; 485 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 486 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 487 488 idx = a->j; 489 v = a->a; 490 if (usecprow){ 491 mbs = a->compressedrow.nrows; 492 ii = a->compressedrow.i; 493 ridx = a->compressedrow.rindex; 494 } else { 495 mbs = a->mbs; 496 ii = a->i; 497 z = zarray; 498 } 499 500 for (i=0; i<mbs; i++) { 501 n = ii[1] - ii[0]; ii++; 502 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; 503 for (j=0; j<n; j++) { 504 xb = x + 6*(*idx++); 505 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 506 sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 507 sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 508 sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 509 sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 510 sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 511 sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 512 v += 36; 513 } 514 if (usecprow) z = zarray + 6*ridx[i]; 515 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; 516 if (!usecprow) z += 6; 517 } 518 519 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 520 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 521 ierr = PetscLogFlops(72*a->nz - 6*mbs);CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 #undef __FUNCT__ 525 #define __FUNCT__ "MatMult_SeqBAIJ_7" 526 PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 527 { 528 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 529 PetscScalar *x,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 530 PetscScalar x1,x2,x3,x4,x5,x6,x7,*zarray; 531 MatScalar *v; 532 PetscErrorCode ierr; 533 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 534 PetscTruth usecprow=a->compressedrow.use; 535 536 PetscFunctionBegin; 537 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 538 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 539 540 idx = a->j; 541 v = a->a; 542 if (usecprow){ 543 mbs = a->compressedrow.nrows; 544 ii = a->compressedrow.i; 545 ridx = a->compressedrow.rindex; 546 } else { 547 mbs = a->mbs; 548 ii = a->i; 549 z = zarray; 550 } 551 552 for (i=0; i<mbs; i++) { 553 n = ii[1] - ii[0]; ii++; 554 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 555 for (j=0; j<n; j++) { 556 xb = x + 7*(*idx++); 557 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 558 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 559 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 560 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 561 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 562 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 563 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 564 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 565 v += 49; 566 } 567 if (usecprow) z = zarray + 7*ridx[i]; 568 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 569 if (!usecprow) z += 7; 570 } 571 572 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 573 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 574 ierr = PetscLogFlops(98*a->nz - 7*mbs);CHKERRQ(ierr); 575 PetscFunctionReturn(0); 576 } 577 578 /* 579 This will not work with MatScalar == float because it calls the BLAS 580 */ 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatMult_SeqBAIJ_N" 583 PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 584 { 585 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 586 PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray; 587 MatScalar *v; 588 PetscErrorCode ierr; 589 PetscInt mbs=a->mbs,i,*idx,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2; 590 PetscInt ncols,k,*ridx=PETSC_NULL; 591 PetscTruth usecprow=a->compressedrow.use; 592 593 PetscFunctionBegin; 594 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 595 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 596 597 idx = a->j; 598 v = a->a; 599 if (usecprow){ 600 mbs = a->compressedrow.nrows; 601 ii = a->compressedrow.i; 602 ridx = a->compressedrow.rindex; 603 } else { 604 mbs = a->mbs; 605 ii = a->i; 606 z = zarray; 607 } 608 609 if (!a->mult_work) { 610 k = PetscMax(A->rmap.n,A->cmap.n); 611 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 612 } 613 work = a->mult_work; 614 for (i=0; i<mbs; i++) { 615 n = ii[1] - ii[0]; ii++; 616 ncols = n*bs; 617 workt = work; 618 for (j=0; j<n; j++) { 619 xb = x + bs*(*idx++); 620 for (k=0; k<bs; k++) workt[k] = xb[k]; 621 workt += bs; 622 } 623 if (usecprow) z = zarray + bs*ridx[i]; 624 Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z); 625 /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */ 626 v += n*bs2; 627 if (!usecprow) z += bs; 628 } 629 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 630 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 631 ierr = PetscLogFlops(2*a->nz*bs2 - bs*mbs);CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634 635 extern PetscErrorCode VecCopy_Seq(Vec,Vec); 636 #undef __FUNCT__ 637 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1" 638 PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 639 { 640 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 641 PetscScalar *x,*y = 0,*z = 0,sum,*yarray,*zarray; 642 MatScalar *v; 643 PetscErrorCode ierr; 644 PetscInt mbs=a->mbs,i,*idx,*ii,n,*ridx=PETSC_NULL; 645 PetscTruth usecprow=a->compressedrow.use; 646 647 PetscFunctionBegin; 648 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 649 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 650 if (zz != yy) { 651 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 652 } else { 653 zarray = yarray; 654 } 655 656 idx = a->j; 657 v = a->a; 658 if (usecprow){ 659 if (zz != yy){ 660 ierr = PetscMemcpy(zarray,yarray,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 661 } 662 mbs = a->compressedrow.nrows; 663 ii = a->compressedrow.i; 664 ridx = a->compressedrow.rindex; 665 } else { 666 ii = a->i; 667 y = yarray; 668 z = zarray; 669 } 670 671 for (i=0; i<mbs; i++) { 672 n = ii[1] - ii[0]; ii++; 673 if (usecprow){ 674 z = zarray + ridx[i]; 675 y = yarray + ridx[i]; 676 } 677 sum = y[0]; 678 while (n--) sum += *v++ * x[*idx++]; 679 z[0] = sum; 680 if (!usecprow){ 681 z++; y++; 682 } 683 } 684 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 685 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 686 if (zz != yy) { 687 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 688 } 689 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 690 PetscFunctionReturn(0); 691 } 692 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2" 695 PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 696 { 697 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 698 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2; 699 PetscScalar x1,x2,*yarray,*zarray; 700 MatScalar *v; 701 PetscErrorCode ierr; 702 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 703 PetscTruth usecprow=a->compressedrow.use; 704 705 PetscFunctionBegin; 706 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 707 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 708 if (zz != yy) { 709 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 710 } else { 711 zarray = yarray; 712 } 713 714 idx = a->j; 715 v = a->a; 716 if (usecprow){ 717 if (zz != yy){ 718 ierr = PetscMemcpy(zarray,yarray,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 719 } 720 mbs = a->compressedrow.nrows; 721 ii = a->compressedrow.i; 722 ridx = a->compressedrow.rindex; 723 if (zz != yy){ 724 ierr = PetscMemcpy(zarray,yarray,a->mbs*sizeof(PetscScalar));CHKERRQ(ierr); 725 } 726 } else { 727 ii = a->i; 728 y = yarray; 729 z = zarray; 730 } 731 732 for (i=0; i<mbs; i++) { 733 n = ii[1] - ii[0]; ii++; 734 if (usecprow){ 735 z = zarray + 2*ridx[i]; 736 y = yarray + 2*ridx[i]; 737 } 738 sum1 = y[0]; sum2 = y[1]; 739 for (j=0; j<n; j++) { 740 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 741 sum1 += v[0]*x1 + v[2]*x2; 742 sum2 += v[1]*x1 + v[3]*x2; 743 v += 4; 744 } 745 z[0] = sum1; z[1] = sum2; 746 if (!usecprow){ 747 z += 2; y += 2; 748 } 749 } 750 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 751 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 752 if (zz != yy) { 753 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 754 } 755 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 756 PetscFunctionReturn(0); 757 } 758 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3" 761 PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 762 { 763 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 764 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray; 765 MatScalar *v; 766 PetscErrorCode ierr; 767 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 768 PetscTruth usecprow=a->compressedrow.use; 769 770 PetscFunctionBegin; 771 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 772 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 773 if (zz != yy) { 774 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 775 } else { 776 zarray = yarray; 777 } 778 779 idx = a->j; 780 v = a->a; 781 if (usecprow){ 782 if (zz != yy){ 783 ierr = PetscMemcpy(zarray,yarray,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 784 } 785 mbs = a->compressedrow.nrows; 786 ii = a->compressedrow.i; 787 ridx = a->compressedrow.rindex; 788 } else { 789 ii = a->i; 790 y = yarray; 791 z = zarray; 792 } 793 794 for (i=0; i<mbs; i++) { 795 n = ii[1] - ii[0]; ii++; 796 if (usecprow){ 797 z = zarray + 3*ridx[i]; 798 y = yarray + 3*ridx[i]; 799 } 800 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 801 for (j=0; j<n; j++) { 802 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 803 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 804 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 805 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 806 v += 9; 807 } 808 z[0] = sum1; z[1] = sum2; z[2] = sum3; 809 if (!usecprow){ 810 z += 3; y += 3; 811 } 812 } 813 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 814 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 815 if (zz != yy) { 816 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 817 } 818 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4" 824 PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 825 { 826 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 827 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray; 828 MatScalar *v; 829 PetscErrorCode ierr; 830 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 831 PetscTruth usecprow=a->compressedrow.use; 832 833 PetscFunctionBegin; 834 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 835 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 836 if (zz != yy) { 837 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 838 } else { 839 zarray = yarray; 840 } 841 842 idx = a->j; 843 v = a->a; 844 if (usecprow){ 845 if (zz != yy){ 846 ierr = PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 847 } 848 mbs = a->compressedrow.nrows; 849 ii = a->compressedrow.i; 850 ridx = a->compressedrow.rindex; 851 } else { 852 ii = a->i; 853 y = yarray; 854 z = zarray; 855 } 856 857 for (i=0; i<mbs; i++) { 858 n = ii[1] - ii[0]; ii++; 859 if (usecprow){ 860 z = zarray + 4*ridx[i]; 861 y = yarray + 4*ridx[i]; 862 } 863 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 864 for (j=0; j<n; j++) { 865 xb = x + 4*(*idx++); 866 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 867 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 868 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 869 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 870 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 871 v += 16; 872 } 873 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 874 if (!usecprow){ 875 z += 4; y += 4; 876 } 877 } 878 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 879 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 880 if (zz != yy) { 881 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 882 } 883 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 884 PetscFunctionReturn(0); 885 } 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5" 889 PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 890 { 891 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 892 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5; 893 PetscScalar *yarray,*zarray; 894 MatScalar *v; 895 PetscErrorCode ierr; 896 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 897 PetscTruth usecprow=a->compressedrow.use; 898 899 PetscFunctionBegin; 900 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 901 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 902 if (zz != yy) { 903 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 904 } else { 905 zarray = yarray; 906 } 907 908 idx = a->j; 909 v = a->a; 910 if (usecprow){ 911 if (zz != yy){ 912 ierr = PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 913 } 914 mbs = a->compressedrow.nrows; 915 ii = a->compressedrow.i; 916 ridx = a->compressedrow.rindex; 917 } else { 918 ii = a->i; 919 y = yarray; 920 z = zarray; 921 } 922 923 for (i=0; i<mbs; i++) { 924 n = ii[1] - ii[0]; ii++; 925 if (usecprow){ 926 z = zarray + 5*ridx[i]; 927 y = yarray + 5*ridx[i]; 928 } 929 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 930 for (j=0; j<n; j++) { 931 xb = x + 5*(*idx++); 932 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 933 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 934 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 935 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 936 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 937 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 938 v += 25; 939 } 940 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 941 if (!usecprow){ 942 z += 5; y += 5; 943 } 944 } 945 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 946 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 947 if (zz != yy) { 948 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 949 } 950 ierr = PetscLogFlops(50*a->nz);CHKERRQ(ierr); 951 PetscFunctionReturn(0); 952 } 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6" 955 PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 956 { 957 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 958 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6; 959 PetscScalar x1,x2,x3,x4,x5,x6,*yarray,*zarray; 960 MatScalar *v; 961 PetscErrorCode ierr; 962 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 963 PetscTruth usecprow=a->compressedrow.use; 964 965 PetscFunctionBegin; 966 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 967 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 968 if (zz != yy) { 969 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 970 } else { 971 zarray = yarray; 972 } 973 974 idx = a->j; 975 v = a->a; 976 if (usecprow){ 977 if (zz != yy){ 978 ierr = PetscMemcpy(zarray,yarray,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 979 } 980 mbs = a->compressedrow.nrows; 981 ii = a->compressedrow.i; 982 ridx = a->compressedrow.rindex; 983 } else { 984 ii = a->i; 985 y = yarray; 986 z = zarray; 987 } 988 989 for (i=0; i<mbs; i++) { 990 n = ii[1] - ii[0]; ii++; 991 if (usecprow){ 992 z = zarray + 6*ridx[i]; 993 y = yarray + 6*ridx[i]; 994 } 995 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; 996 for (j=0; j<n; j++) { 997 xb = x + 6*(*idx++); 998 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 999 sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 1000 sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 1001 sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 1002 sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 1003 sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 1004 sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 1005 v += 36; 1006 } 1007 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; 1008 if (!usecprow){ 1009 z += 6; y += 6; 1010 } 1011 } 1012 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1013 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 1014 if (zz != yy) { 1015 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1016 } 1017 ierr = PetscLogFlops(72*a->nz);CHKERRQ(ierr); 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7" 1023 PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1024 { 1025 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1026 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1027 PetscScalar x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray; 1028 MatScalar *v; 1029 PetscErrorCode ierr; 1030 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 1031 PetscTruth usecprow=a->compressedrow.use; 1032 1033 PetscFunctionBegin; 1034 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1035 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 1036 if (zz != yy) { 1037 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 1038 } else { 1039 zarray = yarray; 1040 } 1041 1042 idx = a->j; 1043 v = a->a; 1044 if (usecprow){ 1045 if (zz != yy){ 1046 ierr = PetscMemcpy(zarray,yarray,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1047 } 1048 mbs = a->compressedrow.nrows; 1049 ii = a->compressedrow.i; 1050 ridx = a->compressedrow.rindex; 1051 } else { 1052 ii = a->i; 1053 y = yarray; 1054 z = zarray; 1055 } 1056 1057 for (i=0; i<mbs; i++) { 1058 n = ii[1] - ii[0]; ii++; 1059 if (usecprow){ 1060 z = zarray + 7*ridx[i]; 1061 y = yarray + 7*ridx[i]; 1062 } 1063 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1064 for (j=0; j<n; j++) { 1065 xb = x + 7*(*idx++); 1066 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1067 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1068 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1069 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1070 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1071 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1072 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1073 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1074 v += 49; 1075 } 1076 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1077 if (!usecprow){ 1078 z += 7; y += 7; 1079 } 1080 } 1081 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1082 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 1083 if (zz != yy) { 1084 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1085 } 1086 ierr = PetscLogFlops(98*a->nz);CHKERRQ(ierr); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N" 1092 PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1093 { 1094 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1095 PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray; 1096 MatScalar *v; 1097 PetscErrorCode ierr; 1098 PetscInt mbs,i,*idx,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2; 1099 PetscInt ncols,k,*ridx=PETSC_NULL; 1100 PetscTruth usecprow=a->compressedrow.use; 1101 1102 PetscFunctionBegin; 1103 ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 1104 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1105 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 1106 1107 idx = a->j; 1108 v = a->a; 1109 if (usecprow){ 1110 mbs = a->compressedrow.nrows; 1111 ii = a->compressedrow.i; 1112 ridx = a->compressedrow.rindex; 1113 } else { 1114 mbs = a->mbs; 1115 ii = a->i; 1116 z = zarray; 1117 } 1118 1119 if (!a->mult_work) { 1120 k = PetscMax(A->rmap.n,A->cmap.n); 1121 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1122 } 1123 work = a->mult_work; 1124 for (i=0; i<mbs; i++) { 1125 n = ii[1] - ii[0]; ii++; 1126 ncols = n*bs; 1127 workt = work; 1128 for (j=0; j<n; j++) { 1129 xb = x + bs*(*idx++); 1130 for (k=0; k<bs; k++) workt[k] = xb[k]; 1131 workt += bs; 1132 } 1133 if (usecprow) z = zarray + bs*ridx[i]; 1134 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 1135 /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */ 1136 v += n*bs2; 1137 if (!usecprow){ 1138 z += bs; 1139 } 1140 } 1141 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1142 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1143 ierr = PetscLogFlops(2*a->nz*bs2);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatMultTranspose_SeqBAIJ" 1149 PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz) 1150 { 1151 PetscScalar zero = 0.0; 1152 PetscErrorCode ierr; 1153 #if defined (PETSC_USE_LOG) 1154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1155 #endif 1156 1157 PetscFunctionBegin; 1158 ierr = VecSet(zz,zero);CHKERRQ(ierr); 1159 ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr); 1160 1161 ierr = PetscLogFlops(2*a->nz*a->bs2 - A->cmap.n);CHKERRQ(ierr); 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ" 1167 PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1168 1169 { 1170 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1171 PetscScalar *zb,*x,*z,*xb = 0,x1,x2,x3,x4,x5; 1172 MatScalar *v; 1173 PetscErrorCode ierr; 1174 PetscInt mbs,i,*idx,*ii,rval,bs=A->rmap.bs,j,n,bs2=a->bs2,*ib,*ridx=PETSC_NULL; 1175 Mat_CompressedRow cprow = a->compressedrow; 1176 PetscTruth usecprow=cprow.use; 1177 1178 PetscFunctionBegin; 1179 if (yy != zz) { ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); } 1180 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1181 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 1182 1183 idx = a->j; 1184 v = a->a; 1185 if (usecprow){ 1186 mbs = cprow.nrows; 1187 ii = cprow.i; 1188 ridx = cprow.rindex; 1189 } else { 1190 mbs=a->mbs; 1191 ii = a->i; 1192 xb = x; 1193 } 1194 1195 switch (bs) { 1196 case 1: 1197 for (i=0; i<mbs; i++) { 1198 if (usecprow) xb = x + ridx[i]; 1199 x1 = xb[0]; 1200 ib = idx + ii[0]; 1201 n = ii[1] - ii[0]; ii++; 1202 for (j=0; j<n; j++) { 1203 rval = ib[j]; 1204 z[rval] += *v * x1; 1205 v++; 1206 } 1207 if (!usecprow) xb++; 1208 } 1209 break; 1210 case 2: 1211 for (i=0; i<mbs; i++) { 1212 if (usecprow) xb = x + 2*ridx[i]; 1213 x1 = xb[0]; x2 = xb[1]; 1214 ib = idx + ii[0]; 1215 n = ii[1] - ii[0]; ii++; 1216 for (j=0; j<n; j++) { 1217 rval = ib[j]*2; 1218 z[rval++] += v[0]*x1 + v[1]*x2; 1219 z[rval++] += v[2]*x1 + v[3]*x2; 1220 v += 4; 1221 } 1222 if (!usecprow) xb += 2; 1223 } 1224 break; 1225 case 3: 1226 for (i=0; i<mbs; i++) { 1227 if (usecprow) xb = x + 3*ridx[i]; 1228 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1229 ib = idx + ii[0]; 1230 n = ii[1] - ii[0]; ii++; 1231 for (j=0; j<n; j++) { 1232 rval = ib[j]*3; 1233 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1234 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1235 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1236 v += 9; 1237 } 1238 if (!usecprow) xb += 3; 1239 } 1240 break; 1241 case 4: 1242 for (i=0; i<mbs; i++) { 1243 if (usecprow) xb = x + 4*ridx[i]; 1244 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1245 ib = idx + ii[0]; 1246 n = ii[1] - ii[0]; ii++; 1247 for (j=0; j<n; j++) { 1248 rval = ib[j]*4; 1249 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1250 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1251 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1252 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1253 v += 16; 1254 } 1255 if (!usecprow) xb += 4; 1256 } 1257 break; 1258 case 5: 1259 for (i=0; i<mbs; i++) { 1260 if (usecprow) xb = x + 5*ridx[i]; 1261 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1262 x4 = xb[3]; x5 = xb[4]; 1263 ib = idx + ii[0]; 1264 n = ii[1] - ii[0]; ii++; 1265 for (j=0; j<n; j++) { 1266 rval = ib[j]*5; 1267 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1268 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1269 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1270 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1271 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1272 v += 25; 1273 } 1274 if (!usecprow) xb += 5; 1275 } 1276 break; 1277 default: { /* block sizes larger then 5 by 5 are handled by BLAS */ 1278 PetscInt ncols,k; 1279 PetscScalar *work,*workt,*xtmp; 1280 1281 if (!a->mult_work) { 1282 k = PetscMax(A->rmap.n,A->cmap.n); 1283 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1284 } 1285 work = a->mult_work; 1286 xtmp = x; 1287 for (i=0; i<mbs; i++) { 1288 n = ii[1] - ii[0]; ii++; 1289 ncols = n*bs; 1290 ierr = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 1291 if (usecprow) { 1292 xtmp = x + bs*ridx[i]; 1293 } 1294 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work); 1295 /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */ 1296 v += n*bs2; 1297 if (!usecprow) xtmp += bs; 1298 workt = work; 1299 for (j=0; j<n; j++) { 1300 zb = z + bs*(*idx++); 1301 for (k=0; k<bs; k++) zb[k] += workt[k] ; 1302 workt += bs; 1303 } 1304 } 1305 } 1306 } 1307 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1308 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1309 ierr = PetscLogFlops(2*a->nz*a->bs2);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatScale_SeqBAIJ" 1315 PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha) 1316 { 1317 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1318 PetscInt totalnz = a->bs2*a->nz; 1319 PetscScalar oalpha = alpha; 1320 PetscErrorCode ierr; 1321 #if defined(PETSC_USE_MAT_SINGLE) 1322 PetscInt i; 1323 #else 1324 PetscBLASInt tnz = (PetscBLASInt) totalnz,one = 1; 1325 #endif 1326 1327 PetscFunctionBegin; 1328 #if defined(PETSC_USE_MAT_SINGLE) 1329 for (i=0; i<totalnz; i++) a->a[i] *= oalpha; 1330 #else 1331 BLASscal_(&tnz,&oalpha,a->a,&one); 1332 #endif 1333 ierr = PetscLogFlops(totalnz);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatNorm_SeqBAIJ" 1339 PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm) 1340 { 1341 PetscErrorCode ierr; 1342 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1343 MatScalar *v = a->a; 1344 PetscReal sum = 0.0; 1345 PetscInt i,j,k,bs=A->rmap.bs,nz=a->nz,bs2=a->bs2,k1; 1346 1347 PetscFunctionBegin; 1348 if (type == NORM_FROBENIUS) { 1349 for (i=0; i< bs2*nz; i++) { 1350 #if defined(PETSC_USE_COMPLEX) 1351 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1352 #else 1353 sum += (*v)*(*v); v++; 1354 #endif 1355 } 1356 *norm = sqrt(sum); 1357 } else if (type == NORM_1) { /* maximum column sum */ 1358 PetscReal *tmp; 1359 PetscInt *bcol = a->j; 1360 ierr = PetscMalloc((A->cmap.n+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1361 ierr = PetscMemzero(tmp,A->cmap.n*sizeof(PetscReal));CHKERRQ(ierr); 1362 for (i=0; i<nz; i++){ 1363 for (j=0; j<bs; j++){ 1364 k1 = bs*(*bcol) + j; /* column index */ 1365 for (k=0; k<bs; k++){ 1366 tmp[k1] += PetscAbsScalar(*v); v++; 1367 } 1368 } 1369 bcol++; 1370 } 1371 *norm = 0.0; 1372 for (j=0; j<A->cmap.n; j++) { 1373 if (tmp[j] > *norm) *norm = tmp[j]; 1374 } 1375 ierr = PetscFree(tmp);CHKERRQ(ierr); 1376 } else if (type == NORM_INFINITY) { /* maximum row sum */ 1377 *norm = 0.0; 1378 for (k=0; k<bs; k++) { 1379 for (j=0; j<a->mbs; j++) { 1380 v = a->a + bs2*a->i[j] + k; 1381 sum = 0.0; 1382 for (i=0; i<a->i[j+1]-a->i[j]; i++) { 1383 for (k1=0; k1<bs; k1++){ 1384 sum += PetscAbsScalar(*v); 1385 v += bs; 1386 } 1387 } 1388 if (sum > *norm) *norm = sum; 1389 } 1390 } 1391 } else { 1392 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 1393 } 1394 PetscFunctionReturn(0); 1395 } 1396 1397 1398 #undef __FUNCT__ 1399 #define __FUNCT__ "MatEqual_SeqBAIJ" 1400 PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg) 1401 { 1402 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data; 1403 PetscErrorCode ierr; 1404 1405 PetscFunctionBegin; 1406 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1407 if ((A->rmap.N != B->rmap.N) || (A->cmap.n != B->cmap.n) || (A->rmap.bs != B->rmap.bs)|| (a->nz != b->nz)) { 1408 *flg = PETSC_FALSE; 1409 PetscFunctionReturn(0); 1410 } 1411 1412 /* if the a->i are the same */ 1413 ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1414 if (!*flg) { 1415 PetscFunctionReturn(0); 1416 } 1417 1418 /* if a->j are the same */ 1419 ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1420 if (!*flg) { 1421 PetscFunctionReturn(0); 1422 } 1423 /* if a->a are the same */ 1424 ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap.bs)*(B->rmap.bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ" 1431 PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1432 { 1433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1434 PetscErrorCode ierr; 1435 PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1436 PetscScalar *x,zero = 0.0; 1437 MatScalar *aa,*aa_j; 1438 1439 PetscFunctionBegin; 1440 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1441 bs = A->rmap.bs; 1442 aa = a->a; 1443 ai = a->i; 1444 aj = a->j; 1445 ambs = a->mbs; 1446 bs2 = a->bs2; 1447 1448 ierr = VecSet(v,zero);CHKERRQ(ierr); 1449 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1450 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1451 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1452 for (i=0; i<ambs; i++) { 1453 for (j=ai[i]; j<ai[i+1]; j++) { 1454 if (aj[j] == i) { 1455 row = i*bs; 1456 aa_j = aa+j*bs2; 1457 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1458 break; 1459 } 1460 } 1461 } 1462 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ" 1468 PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1469 { 1470 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1471 PetscScalar *l,*r,x,*li,*ri; 1472 MatScalar *aa,*v; 1473 PetscErrorCode ierr; 1474 PetscInt i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1475 1476 PetscFunctionBegin; 1477 ai = a->i; 1478 aj = a->j; 1479 aa = a->a; 1480 m = A->rmap.n; 1481 n = A->cmap.n; 1482 bs = A->rmap.bs; 1483 mbs = a->mbs; 1484 bs2 = a->bs2; 1485 if (ll) { 1486 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 1487 ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1488 if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 1489 for (i=0; i<mbs; i++) { /* for each block row */ 1490 M = ai[i+1] - ai[i]; 1491 li = l + i*bs; 1492 v = aa + bs2*ai[i]; 1493 for (j=0; j<M; j++) { /* for each block */ 1494 for (k=0; k<bs2; k++) { 1495 (*v++) *= li[k%bs]; 1496 } 1497 } 1498 } 1499 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1500 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1501 } 1502 1503 if (rr) { 1504 ierr = VecGetArray(rr,&r);CHKERRQ(ierr); 1505 ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1506 if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 1507 for (i=0; i<mbs; i++) { /* for each block row */ 1508 M = ai[i+1] - ai[i]; 1509 v = aa + bs2*ai[i]; 1510 for (j=0; j<M; j++) { /* for each block */ 1511 ri = r + bs*aj[ai[i]+j]; 1512 for (k=0; k<bs; k++) { 1513 x = ri[k]; 1514 for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 1515 } 1516 } 1517 } 1518 ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 1519 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1520 } 1521 PetscFunctionReturn(0); 1522 } 1523 1524 1525 #undef __FUNCT__ 1526 #define __FUNCT__ "MatGetInfo_SeqBAIJ" 1527 PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1528 { 1529 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1530 1531 PetscFunctionBegin; 1532 info->rows_global = (double)A->rmap.N; 1533 info->columns_global = (double)A->cmap.n; 1534 info->rows_local = (double)A->rmap.n; 1535 info->columns_local = (double)A->cmap.n; 1536 info->block_size = a->bs2; 1537 info->nz_allocated = a->maxnz; 1538 info->nz_used = a->bs2*a->nz; 1539 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1540 info->assemblies = A->num_ass; 1541 info->mallocs = a->reallocs; 1542 info->memory = A->mem; 1543 if (A->factor) { 1544 info->fill_ratio_given = A->info.fill_ratio_given; 1545 info->fill_ratio_needed = A->info.fill_ratio_needed; 1546 info->factor_mallocs = A->info.factor_mallocs; 1547 } else { 1548 info->fill_ratio_given = 0; 1549 info->fill_ratio_needed = 0; 1550 info->factor_mallocs = 0; 1551 } 1552 PetscFunctionReturn(0); 1553 } 1554 1555 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatZeroEntries_SeqBAIJ" 1558 PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A) 1559 { 1560 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1561 PetscErrorCode ierr; 1562 1563 PetscFunctionBegin; 1564 ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 1565 PetscFunctionReturn(0); 1566 } 1567