1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/baij/seq/baij.h" 4 #include "src/inline/spops.h" 5 #include "src/inline/ilu.h" 6 #include "petscbt.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ" 10 PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov) 11 { 12 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 13 PetscErrorCode ierr; 14 PetscInt row,i,j,k,l,m,n,*idx,*nidx,isz,val,ival; 15 PetscInt start,end,*ai,*aj,bs,*nidx2; 16 PetscBT table; 17 18 PetscFunctionBegin; 19 m = a->mbs; 20 ai = a->i; 21 aj = a->j; 22 bs = A->rmap.bs; 23 24 if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); 25 26 ierr = PetscBTCreate(m,table);CHKERRQ(ierr); 27 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr); 28 ierr = PetscMalloc((A->rmap.N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr); 29 30 for (i=0; i<is_max; i++) { 31 /* Initialise the two local arrays */ 32 isz = 0; 33 ierr = PetscBTMemzero(m,table);CHKERRQ(ierr); 34 35 /* Extract the indices, assume there can be duplicate entries */ 36 ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr); 37 ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr); 38 39 /* Enter these into the temp arrays i.e mark table[row], enter row into new index */ 40 for (j=0; j<n ; ++j){ 41 ival = idx[j]/bs; /* convert the indices into block indices */ 42 if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); 43 if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;} 44 } 45 ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr); 46 ierr = ISDestroy(is[i]);CHKERRQ(ierr); 47 48 k = 0; 49 for (j=0; j<ov; j++){ /* for each overlap*/ 50 n = isz; 51 for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */ 52 row = nidx[k]; 53 start = ai[row]; 54 end = ai[row+1]; 55 for (l = start; l<end ; l++){ 56 val = aj[l]; 57 if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;} 58 } 59 } 60 } 61 /* expand the Index Set */ 62 for (j=0; j<isz; j++) { 63 for (k=0; k<bs; k++) 64 nidx2[j*bs+k] = nidx[j]*bs+k; 65 } 66 ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr); 67 } 68 ierr = PetscBTDestroy(table);CHKERRQ(ierr); 69 ierr = PetscFree(nidx);CHKERRQ(ierr); 70 ierr = PetscFree(nidx2);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private" 76 PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 77 { 78 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*c; 79 PetscErrorCode ierr; 80 PetscInt *smap,i,k,kstart,kend,oldcols = a->nbs,*lens; 81 PetscInt row,mat_i,*mat_j,tcol,*mat_ilen; 82 PetscInt *irow,*icol,nrows,ncols,*ssmap,bs=A->rmap.bs,bs2=a->bs2; 83 PetscInt *aj = a->j,*ai = a->i; 84 MatScalar *mat_a; 85 Mat C; 86 PetscTruth flag; 87 88 PetscFunctionBegin; 89 ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr); 90 if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 91 92 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 93 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 94 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 95 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 96 97 ierr = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr); 98 ssmap = smap; 99 ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 100 ierr = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr); 101 for (i=0; i<ncols; i++) smap[icol[i]] = i+1; 102 /* determine lens of each row */ 103 for (i=0; i<nrows; i++) { 104 kstart = ai[irow[i]]; 105 kend = kstart + a->ilen[irow[i]]; 106 lens[i] = 0; 107 for (k=kstart; k<kend; k++) { 108 if (ssmap[aj[k]]) { 109 lens[i]++; 110 } 111 } 112 } 113 /* Create and fill new matrix */ 114 if (scall == MAT_REUSE_MATRIX) { 115 c = (Mat_SeqBAIJ *)((*B)->data); 116 117 if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap.bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size"); 118 ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); 119 if (!flag) { 120 SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); 121 } 122 ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr); 123 C = *B; 124 } else { 125 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 126 ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 127 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 128 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,lens);CHKERRQ(ierr); 129 } 130 c = (Mat_SeqBAIJ *)(C->data); 131 for (i=0; i<nrows; i++) { 132 row = irow[i]; 133 kstart = ai[row]; 134 kend = kstart + a->ilen[row]; 135 mat_i = c->i[i]; 136 mat_j = c->j + mat_i; 137 mat_a = c->a + mat_i*bs2; 138 mat_ilen = c->ilen + i; 139 for (k=kstart; k<kend; k++) { 140 if ((tcol=ssmap[a->j[k]])) { 141 *mat_j++ = tcol - 1; 142 ierr = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 143 mat_a += bs2; 144 (*mat_ilen)++; 145 } 146 } 147 } 148 149 /* Free work space */ 150 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 151 ierr = PetscFree(smap);CHKERRQ(ierr); 152 ierr = PetscFree(lens);CHKERRQ(ierr); 153 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 154 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 155 156 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 157 *B = C; 158 PetscFunctionReturn(0); 159 } 160 161 #undef __FUNCT__ 162 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ" 163 PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 164 { 165 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 166 IS is1,is2; 167 PetscErrorCode ierr; 168 PetscInt *vary,*iary,*irow,*icol,nrows,ncols,i,bs=A->rmap.bs,count; 169 170 PetscFunctionBegin; 171 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 172 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 173 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 174 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 175 176 /* Verify if the indices corespond to each element in a block 177 and form the IS with compressed IS */ 178 ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr); 179 iary = vary + a->mbs; 180 ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 181 for (i=0; i<nrows; i++) vary[irow[i]/bs]++; 182 count = 0; 183 for (i=0; i<a->mbs; i++) { 184 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Index set does not match blocks"); 185 if (vary[i]==bs) iary[count++] = i; 186 } 187 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr); 188 189 ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr); 190 for (i=0; i<ncols; i++) vary[icol[i]/bs]++; 191 count = 0; 192 for (i=0; i<a->mbs; i++) { 193 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_PLIB,"Internal error in PETSc"); 194 if (vary[i]==bs) iary[count++] = i; 195 } 196 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr); 197 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 198 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 199 ierr = PetscFree(vary);CHKERRQ(ierr); 200 201 ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,cs,scall,B);CHKERRQ(ierr); 202 ISDestroy(is1); 203 ISDestroy(is2); 204 PetscFunctionReturn(0); 205 } 206 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ" 209 PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 210 { 211 PetscErrorCode ierr; 212 PetscInt i; 213 214 PetscFunctionBegin; 215 if (scall == MAT_INITIAL_MATRIX) { 216 ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr); 217 } 218 219 for (i=0; i<n; i++) { 220 ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr); 221 } 222 PetscFunctionReturn(0); 223 } 224 225 226 /* -------------------------------------------------------*/ 227 /* Should check that shapes of vectors and matrices match */ 228 /* -------------------------------------------------------*/ 229 #include "petscblaslapack.h" 230 231 #undef __FUNCT__ 232 #define __FUNCT__ "MatMult_SeqBAIJ_1" 233 PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 234 { 235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 236 PetscScalar *x,*z,sum; 237 MatScalar *v; 238 PetscErrorCode ierr; 239 PetscInt mbs,i,*idx,*ii,n,*ridx=PETSC_NULL,nonzerorow=0; 240 PetscTruth usecprow=a->compressedrow.use; 241 242 PetscFunctionBegin; 243 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 244 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 245 246 idx = a->j; 247 v = a->a; 248 if (usecprow){ 249 mbs = a->compressedrow.nrows; 250 ii = a->compressedrow.i; 251 ridx = a->compressedrow.rindex; 252 } else { 253 mbs = a->mbs; 254 ii = a->i; 255 } 256 257 for (i=0; i<mbs; i++) { 258 n = ii[1] - ii[0]; ii++; 259 sum = 0.0; 260 nonzerorow += (n>0); 261 while (n--) sum += *v++ * x[*idx++]; 262 if (usecprow){ 263 z[ridx[i]] = sum; 264 } else { 265 z[i] = sum; 266 } 267 } 268 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 269 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 270 ierr = PetscLogFlops(2*a->nz - nonzerorow);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatMult_SeqBAIJ_2" 276 PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 277 { 278 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 279 PetscScalar *x,*z = 0,*xb,sum1,sum2,*zarray; 280 PetscScalar x1,x2; 281 MatScalar *v; 282 PetscErrorCode ierr; 283 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 284 PetscTruth usecprow=a->compressedrow.use; 285 286 PetscFunctionBegin; 287 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 288 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 289 290 idx = a->j; 291 v = a->a; 292 if (usecprow){ 293 mbs = a->compressedrow.nrows; 294 ii = a->compressedrow.i; 295 ridx = a->compressedrow.rindex; 296 } else { 297 mbs = a->mbs; 298 ii = a->i; 299 z = zarray; 300 } 301 302 for (i=0; i<mbs; i++) { 303 n = ii[1] - ii[0]; ii++; 304 sum1 = 0.0; sum2 = 0.0; 305 nonzerorow += (n>0); 306 for (j=0; j<n; j++) { 307 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 308 sum1 += v[0]*x1 + v[2]*x2; 309 sum2 += v[1]*x1 + v[3]*x2; 310 v += 4; 311 } 312 if (usecprow) z = zarray + 2*ridx[i]; 313 z[0] = sum1; z[1] = sum2; 314 if (!usecprow) z += 2; 315 } 316 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 317 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 318 ierr = PetscLogFlops(8*a->nz - 2*nonzerorow);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatMult_SeqBAIJ_3" 324 PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 325 { 326 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 327 PetscScalar *x,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*zarray; 328 MatScalar *v; 329 PetscErrorCode ierr; 330 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 331 PetscTruth usecprow=a->compressedrow.use; 332 333 334 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 335 #pragma disjoint(*v,*z,*xb) 336 #endif 337 338 PetscFunctionBegin; 339 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 340 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 341 342 idx = a->j; 343 v = a->a; 344 if (usecprow){ 345 mbs = a->compressedrow.nrows; 346 ii = a->compressedrow.i; 347 ridx = a->compressedrow.rindex; 348 } else { 349 mbs = a->mbs; 350 ii = a->i; 351 z = zarray; 352 } 353 354 for (i=0; i<mbs; i++) { 355 n = ii[1] - ii[0]; ii++; 356 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 357 nonzerorow += (n>0); 358 for (j=0; j<n; j++) { 359 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 360 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 361 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 362 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 363 v += 9; 364 } 365 if (usecprow) z = zarray + 3*ridx[i]; 366 z[0] = sum1; z[1] = sum2; z[2] = sum3; 367 if (!usecprow) z += 3; 368 } 369 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 370 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 371 ierr = PetscLogFlops(18*a->nz - 3*nonzerorow);CHKERRQ(ierr); 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "MatMult_SeqBAIJ_4" 377 PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 378 { 379 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 380 PetscScalar *x,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray; 381 MatScalar *v; 382 PetscErrorCode ierr; 383 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 384 PetscTruth usecprow=a->compressedrow.use; 385 386 PetscFunctionBegin; 387 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 388 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 389 390 idx = a->j; 391 v = a->a; 392 if (usecprow){ 393 mbs = a->compressedrow.nrows; 394 ii = a->compressedrow.i; 395 ridx = a->compressedrow.rindex; 396 } else { 397 mbs = a->mbs; 398 ii = a->i; 399 z = zarray; 400 } 401 402 for (i=0; i<mbs; i++) { 403 n = ii[1] - ii[0]; ii++; 404 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 405 nonzerorow += (n>0); 406 for (j=0; j<n; j++) { 407 xb = x + 4*(*idx++); 408 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 409 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 410 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 411 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 412 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 413 v += 16; 414 } 415 if (usecprow) z = zarray + 4*ridx[i]; 416 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 417 if (!usecprow) z += 4; 418 } 419 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 420 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 421 ierr = PetscLogFlops(32*a->nz - 4*nonzerorow);CHKERRQ(ierr); 422 PetscFunctionReturn(0); 423 } 424 425 #undef __FUNCT__ 426 #define __FUNCT__ "MatMult_SeqBAIJ_5" 427 PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 428 { 429 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 430 PetscScalar sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*xb,*z = 0,*x,*zarray; 431 MatScalar *v; 432 PetscErrorCode ierr; 433 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 434 PetscTruth usecprow=a->compressedrow.use; 435 436 PetscFunctionBegin; 437 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 438 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 439 440 idx = a->j; 441 v = a->a; 442 if (usecprow){ 443 mbs = a->compressedrow.nrows; 444 ii = a->compressedrow.i; 445 ridx = a->compressedrow.rindex; 446 } else { 447 mbs = a->mbs; 448 ii = a->i; 449 z = zarray; 450 } 451 452 for (i=0; i<mbs; i++) { 453 n = ii[1] - ii[0]; ii++; 454 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 455 nonzerorow += (n>0); 456 for (j=0; j<n; j++) { 457 xb = x + 5*(*idx++); 458 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 459 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 460 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 461 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 462 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 463 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 464 v += 25; 465 } 466 if (usecprow) z = zarray + 5*ridx[i]; 467 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 468 if (!usecprow) z += 5; 469 } 470 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 471 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 472 ierr = PetscLogFlops(50*a->nz - 5*nonzerorow);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatMult_SeqBAIJ_6" 479 PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz) 480 { 481 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 482 PetscScalar *x,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6; 483 PetscScalar x1,x2,x3,x4,x5,x6,*zarray; 484 MatScalar *v; 485 PetscErrorCode ierr; 486 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 487 PetscTruth usecprow=a->compressedrow.use; 488 489 PetscFunctionBegin; 490 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 491 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 492 493 idx = a->j; 494 v = a->a; 495 if (usecprow){ 496 mbs = a->compressedrow.nrows; 497 ii = a->compressedrow.i; 498 ridx = a->compressedrow.rindex; 499 } else { 500 mbs = a->mbs; 501 ii = a->i; 502 z = zarray; 503 } 504 505 for (i=0; i<mbs; i++) { 506 n = ii[1] - ii[0]; ii++; 507 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; 508 nonzerorow += (n>0); 509 for (j=0; j<n; j++) { 510 xb = x + 6*(*idx++); 511 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 512 sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 513 sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 514 sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 515 sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 516 sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 517 sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 518 v += 36; 519 } 520 if (usecprow) z = zarray + 6*ridx[i]; 521 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; 522 if (!usecprow) z += 6; 523 } 524 525 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 526 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 527 ierr = PetscLogFlops(72*a->nz - 6*nonzerorow);CHKERRQ(ierr); 528 PetscFunctionReturn(0); 529 } 530 #undef __FUNCT__ 531 #define __FUNCT__ "MatMult_SeqBAIJ_7" 532 PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 533 { 534 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 535 PetscScalar *x,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 536 PetscScalar x1,x2,x3,x4,x5,x6,x7,*zarray; 537 MatScalar *v; 538 PetscErrorCode ierr; 539 PetscInt mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0; 540 PetscTruth usecprow=a->compressedrow.use; 541 542 PetscFunctionBegin; 543 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 544 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 545 546 idx = a->j; 547 v = a->a; 548 if (usecprow){ 549 mbs = a->compressedrow.nrows; 550 ii = a->compressedrow.i; 551 ridx = a->compressedrow.rindex; 552 } else { 553 mbs = a->mbs; 554 ii = a->i; 555 z = zarray; 556 } 557 558 for (i=0; i<mbs; i++) { 559 n = ii[1] - ii[0]; ii++; 560 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 561 nonzerorow += (n>0); 562 for (j=0; j<n; j++) { 563 xb = x + 7*(*idx++); 564 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 565 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 566 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 567 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 568 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 569 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 570 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 571 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 572 v += 49; 573 } 574 if (usecprow) z = zarray + 7*ridx[i]; 575 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 576 if (!usecprow) z += 7; 577 } 578 579 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 580 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 581 ierr = PetscLogFlops(98*a->nz - 7*nonzerorow);CHKERRQ(ierr); 582 PetscFunctionReturn(0); 583 } 584 585 /* 586 This will not work with MatScalar == float because it calls the BLAS 587 */ 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatMult_SeqBAIJ_N" 590 PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 591 { 592 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 593 PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray; 594 MatScalar *v; 595 PetscErrorCode ierr; 596 PetscInt mbs=a->mbs,i,*idx,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2; 597 PetscInt ncols,k,*ridx=PETSC_NULL,nonzerorow=0; 598 PetscTruth usecprow=a->compressedrow.use; 599 600 PetscFunctionBegin; 601 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 602 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 603 604 idx = a->j; 605 v = a->a; 606 if (usecprow){ 607 mbs = a->compressedrow.nrows; 608 ii = a->compressedrow.i; 609 ridx = a->compressedrow.rindex; 610 } else { 611 mbs = a->mbs; 612 ii = a->i; 613 z = zarray; 614 } 615 616 if (!a->mult_work) { 617 k = PetscMax(A->rmap.n,A->cmap.n); 618 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 619 } 620 work = a->mult_work; 621 for (i=0; i<mbs; i++) { 622 n = ii[1] - ii[0]; ii++; 623 ncols = n*bs; 624 workt = work; 625 nonzerorow += (n>0); 626 for (j=0; j<n; j++) { 627 xb = x + bs*(*idx++); 628 for (k=0; k<bs; k++) workt[k] = xb[k]; 629 workt += bs; 630 } 631 if (usecprow) z = zarray + bs*ridx[i]; 632 Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z); 633 /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */ 634 v += n*bs2; 635 if (!usecprow) z += bs; 636 } 637 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 638 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 639 ierr = PetscLogFlops(2*a->nz*bs2 - bs*nonzerorow);CHKERRQ(ierr); 640 PetscFunctionReturn(0); 641 } 642 643 extern PetscErrorCode VecCopy_Seq(Vec,Vec); 644 #undef __FUNCT__ 645 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1" 646 PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 647 { 648 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 649 PetscScalar *x,*y = 0,*z = 0,sum,*yarray,*zarray; 650 MatScalar *v; 651 PetscErrorCode ierr; 652 PetscInt mbs=a->mbs,i,*idx,*ii,n,*ridx=PETSC_NULL; 653 PetscTruth usecprow=a->compressedrow.use; 654 655 PetscFunctionBegin; 656 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 657 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 658 if (zz != yy) { 659 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 660 } else { 661 zarray = yarray; 662 } 663 664 idx = a->j; 665 v = a->a; 666 if (usecprow){ 667 if (zz != yy){ 668 ierr = PetscMemcpy(zarray,yarray,mbs*sizeof(PetscScalar));CHKERRQ(ierr); 669 } 670 mbs = a->compressedrow.nrows; 671 ii = a->compressedrow.i; 672 ridx = a->compressedrow.rindex; 673 } else { 674 ii = a->i; 675 y = yarray; 676 z = zarray; 677 } 678 679 for (i=0; i<mbs; i++) { 680 n = ii[1] - ii[0]; ii++; 681 if (usecprow){ 682 z = zarray + ridx[i]; 683 y = yarray + ridx[i]; 684 } 685 sum = y[0]; 686 while (n--) sum += *v++ * x[*idx++]; 687 z[0] = sum; 688 if (!usecprow){ 689 z++; y++; 690 } 691 } 692 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 693 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 694 if (zz != yy) { 695 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 696 } 697 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 698 PetscFunctionReturn(0); 699 } 700 701 #undef __FUNCT__ 702 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2" 703 PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 704 { 705 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 706 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2; 707 PetscScalar x1,x2,*yarray,*zarray; 708 MatScalar *v; 709 PetscErrorCode ierr; 710 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 711 PetscTruth usecprow=a->compressedrow.use; 712 713 PetscFunctionBegin; 714 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 715 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 716 if (zz != yy) { 717 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 718 } else { 719 zarray = yarray; 720 } 721 722 idx = a->j; 723 v = a->a; 724 if (usecprow){ 725 if (zz != yy){ 726 ierr = PetscMemcpy(zarray,yarray,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 727 } 728 mbs = a->compressedrow.nrows; 729 ii = a->compressedrow.i; 730 ridx = a->compressedrow.rindex; 731 if (zz != yy){ 732 ierr = PetscMemcpy(zarray,yarray,a->mbs*sizeof(PetscScalar));CHKERRQ(ierr); 733 } 734 } else { 735 ii = a->i; 736 y = yarray; 737 z = zarray; 738 } 739 740 for (i=0; i<mbs; i++) { 741 n = ii[1] - ii[0]; ii++; 742 if (usecprow){ 743 z = zarray + 2*ridx[i]; 744 y = yarray + 2*ridx[i]; 745 } 746 sum1 = y[0]; sum2 = y[1]; 747 for (j=0; j<n; j++) { 748 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 749 sum1 += v[0]*x1 + v[2]*x2; 750 sum2 += v[1]*x1 + v[3]*x2; 751 v += 4; 752 } 753 z[0] = sum1; z[1] = sum2; 754 if (!usecprow){ 755 z += 2; y += 2; 756 } 757 } 758 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 759 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 760 if (zz != yy) { 761 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 762 } 763 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 #undef __FUNCT__ 768 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3" 769 PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 770 { 771 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 772 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray; 773 MatScalar *v; 774 PetscErrorCode ierr; 775 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 776 PetscTruth usecprow=a->compressedrow.use; 777 778 PetscFunctionBegin; 779 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 780 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 781 if (zz != yy) { 782 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 783 } else { 784 zarray = yarray; 785 } 786 787 idx = a->j; 788 v = a->a; 789 if (usecprow){ 790 if (zz != yy){ 791 ierr = PetscMemcpy(zarray,yarray,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 792 } 793 mbs = a->compressedrow.nrows; 794 ii = a->compressedrow.i; 795 ridx = a->compressedrow.rindex; 796 } else { 797 ii = a->i; 798 y = yarray; 799 z = zarray; 800 } 801 802 for (i=0; i<mbs; i++) { 803 n = ii[1] - ii[0]; ii++; 804 if (usecprow){ 805 z = zarray + 3*ridx[i]; 806 y = yarray + 3*ridx[i]; 807 } 808 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 809 for (j=0; j<n; j++) { 810 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 811 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 812 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 813 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 814 v += 9; 815 } 816 z[0] = sum1; z[1] = sum2; z[2] = sum3; 817 if (!usecprow){ 818 z += 3; y += 3; 819 } 820 } 821 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 822 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 823 if (zz != yy) { 824 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 825 } 826 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4" 832 PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 833 { 834 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 835 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray; 836 MatScalar *v; 837 PetscErrorCode ierr; 838 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 839 PetscTruth usecprow=a->compressedrow.use; 840 841 PetscFunctionBegin; 842 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 843 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 844 if (zz != yy) { 845 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 846 } else { 847 zarray = yarray; 848 } 849 850 idx = a->j; 851 v = a->a; 852 if (usecprow){ 853 if (zz != yy){ 854 ierr = PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 855 } 856 mbs = a->compressedrow.nrows; 857 ii = a->compressedrow.i; 858 ridx = a->compressedrow.rindex; 859 } else { 860 ii = a->i; 861 y = yarray; 862 z = zarray; 863 } 864 865 for (i=0; i<mbs; i++) { 866 n = ii[1] - ii[0]; ii++; 867 if (usecprow){ 868 z = zarray + 4*ridx[i]; 869 y = yarray + 4*ridx[i]; 870 } 871 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 872 for (j=0; j<n; j++) { 873 xb = x + 4*(*idx++); 874 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 875 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 876 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 877 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 878 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 879 v += 16; 880 } 881 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 882 if (!usecprow){ 883 z += 4; y += 4; 884 } 885 } 886 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 887 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 888 if (zz != yy) { 889 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 890 } 891 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5" 897 PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 898 { 899 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 900 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5; 901 PetscScalar *yarray,*zarray; 902 MatScalar *v; 903 PetscErrorCode ierr; 904 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 905 PetscTruth usecprow=a->compressedrow.use; 906 907 PetscFunctionBegin; 908 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 909 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 910 if (zz != yy) { 911 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 912 } else { 913 zarray = yarray; 914 } 915 916 idx = a->j; 917 v = a->a; 918 if (usecprow){ 919 if (zz != yy){ 920 ierr = PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 921 } 922 mbs = a->compressedrow.nrows; 923 ii = a->compressedrow.i; 924 ridx = a->compressedrow.rindex; 925 } else { 926 ii = a->i; 927 y = yarray; 928 z = zarray; 929 } 930 931 for (i=0; i<mbs; i++) { 932 n = ii[1] - ii[0]; ii++; 933 if (usecprow){ 934 z = zarray + 5*ridx[i]; 935 y = yarray + 5*ridx[i]; 936 } 937 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 938 for (j=0; j<n; j++) { 939 xb = x + 5*(*idx++); 940 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 941 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 942 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 943 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 944 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 945 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 946 v += 25; 947 } 948 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 949 if (!usecprow){ 950 z += 5; y += 5; 951 } 952 } 953 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 954 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 955 if (zz != yy) { 956 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 957 } 958 ierr = PetscLogFlops(50*a->nz);CHKERRQ(ierr); 959 PetscFunctionReturn(0); 960 } 961 #undef __FUNCT__ 962 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6" 963 PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 964 { 965 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 966 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6; 967 PetscScalar x1,x2,x3,x4,x5,x6,*yarray,*zarray; 968 MatScalar *v; 969 PetscErrorCode ierr; 970 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 971 PetscTruth usecprow=a->compressedrow.use; 972 973 PetscFunctionBegin; 974 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 975 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 976 if (zz != yy) { 977 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 978 } else { 979 zarray = yarray; 980 } 981 982 idx = a->j; 983 v = a->a; 984 if (usecprow){ 985 if (zz != yy){ 986 ierr = PetscMemcpy(zarray,yarray,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 987 } 988 mbs = a->compressedrow.nrows; 989 ii = a->compressedrow.i; 990 ridx = a->compressedrow.rindex; 991 } else { 992 ii = a->i; 993 y = yarray; 994 z = zarray; 995 } 996 997 for (i=0; i<mbs; i++) { 998 n = ii[1] - ii[0]; ii++; 999 if (usecprow){ 1000 z = zarray + 6*ridx[i]; 1001 y = yarray + 6*ridx[i]; 1002 } 1003 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; 1004 for (j=0; j<n; j++) { 1005 xb = x + 6*(*idx++); 1006 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; 1007 sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 1008 sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 1009 sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 1010 sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 1011 sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 1012 sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 1013 v += 36; 1014 } 1015 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; 1016 if (!usecprow){ 1017 z += 6; y += 6; 1018 } 1019 } 1020 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1021 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 1022 if (zz != yy) { 1023 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1024 } 1025 ierr = PetscLogFlops(72*a->nz);CHKERRQ(ierr); 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7" 1031 PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1032 { 1033 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1034 PetscScalar *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1035 PetscScalar x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray; 1036 MatScalar *v; 1037 PetscErrorCode ierr; 1038 PetscInt mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL; 1039 PetscTruth usecprow=a->compressedrow.use; 1040 1041 PetscFunctionBegin; 1042 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1043 ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr); 1044 if (zz != yy) { 1045 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 1046 } else { 1047 zarray = yarray; 1048 } 1049 1050 idx = a->j; 1051 v = a->a; 1052 if (usecprow){ 1053 if (zz != yy){ 1054 ierr = PetscMemcpy(zarray,yarray,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 1055 } 1056 mbs = a->compressedrow.nrows; 1057 ii = a->compressedrow.i; 1058 ridx = a->compressedrow.rindex; 1059 } else { 1060 ii = a->i; 1061 y = yarray; 1062 z = zarray; 1063 } 1064 1065 for (i=0; i<mbs; i++) { 1066 n = ii[1] - ii[0]; ii++; 1067 if (usecprow){ 1068 z = zarray + 7*ridx[i]; 1069 y = yarray + 7*ridx[i]; 1070 } 1071 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1072 for (j=0; j<n; j++) { 1073 xb = x + 7*(*idx++); 1074 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1075 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1076 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1077 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1078 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1079 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1080 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1081 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1082 v += 49; 1083 } 1084 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1085 if (!usecprow){ 1086 z += 7; y += 7; 1087 } 1088 } 1089 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1090 ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr); 1091 if (zz != yy) { 1092 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1093 } 1094 ierr = PetscLogFlops(98*a->nz);CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 #undef __FUNCT__ 1099 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N" 1100 PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1101 { 1102 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1103 PetscScalar *x,*z = 0,*xb,*work,*workt,*zarray; 1104 MatScalar *v; 1105 PetscErrorCode ierr; 1106 PetscInt mbs,i,*idx,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2; 1107 PetscInt ncols,k,*ridx=PETSC_NULL; 1108 PetscTruth usecprow=a->compressedrow.use; 1109 1110 PetscFunctionBegin; 1111 ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); 1112 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1113 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); 1114 1115 idx = a->j; 1116 v = a->a; 1117 if (usecprow){ 1118 mbs = a->compressedrow.nrows; 1119 ii = a->compressedrow.i; 1120 ridx = a->compressedrow.rindex; 1121 } else { 1122 mbs = a->mbs; 1123 ii = a->i; 1124 z = zarray; 1125 } 1126 1127 if (!a->mult_work) { 1128 k = PetscMax(A->rmap.n,A->cmap.n); 1129 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1130 } 1131 work = a->mult_work; 1132 for (i=0; i<mbs; i++) { 1133 n = ii[1] - ii[0]; ii++; 1134 ncols = n*bs; 1135 workt = work; 1136 for (j=0; j<n; j++) { 1137 xb = x + bs*(*idx++); 1138 for (k=0; k<bs; k++) workt[k] = xb[k]; 1139 workt += bs; 1140 } 1141 if (usecprow) z = zarray + bs*ridx[i]; 1142 Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z); 1143 /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */ 1144 v += n*bs2; 1145 if (!usecprow){ 1146 z += bs; 1147 } 1148 } 1149 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1150 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); 1151 ierr = PetscLogFlops(2*a->nz*bs2);CHKERRQ(ierr); 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatMultTranspose_SeqBAIJ" 1157 PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz) 1158 { 1159 PetscScalar zero = 0.0; 1160 PetscErrorCode ierr; 1161 1162 PetscFunctionBegin; 1163 ierr = VecSet(zz,zero);CHKERRQ(ierr); 1164 ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 #undef __FUNCT__ 1169 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ" 1170 PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1171 1172 { 1173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1174 PetscScalar *zb,*x,*z,*xb = 0,x1,x2,x3,x4,x5; 1175 MatScalar *v; 1176 PetscErrorCode ierr; 1177 PetscInt mbs,i,*idx,*ii,rval,bs=A->rmap.bs,j,n,bs2=a->bs2,*ib,*ridx=PETSC_NULL; 1178 Mat_CompressedRow cprow = a->compressedrow; 1179 PetscTruth usecprow=cprow.use; 1180 1181 PetscFunctionBegin; 1182 if (yy != zz) { ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); } 1183 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1184 ierr = VecGetArray(zz,&z);CHKERRQ(ierr); 1185 1186 idx = a->j; 1187 v = a->a; 1188 if (usecprow){ 1189 mbs = cprow.nrows; 1190 ii = cprow.i; 1191 ridx = cprow.rindex; 1192 } else { 1193 mbs=a->mbs; 1194 ii = a->i; 1195 xb = x; 1196 } 1197 1198 switch (bs) { 1199 case 1: 1200 for (i=0; i<mbs; i++) { 1201 if (usecprow) xb = x + ridx[i]; 1202 x1 = xb[0]; 1203 ib = idx + ii[0]; 1204 n = ii[1] - ii[0]; ii++; 1205 for (j=0; j<n; j++) { 1206 rval = ib[j]; 1207 z[rval] += *v * x1; 1208 v++; 1209 } 1210 if (!usecprow) xb++; 1211 } 1212 break; 1213 case 2: 1214 for (i=0; i<mbs; i++) { 1215 if (usecprow) xb = x + 2*ridx[i]; 1216 x1 = xb[0]; x2 = xb[1]; 1217 ib = idx + ii[0]; 1218 n = ii[1] - ii[0]; ii++; 1219 for (j=0; j<n; j++) { 1220 rval = ib[j]*2; 1221 z[rval++] += v[0]*x1 + v[1]*x2; 1222 z[rval++] += v[2]*x1 + v[3]*x2; 1223 v += 4; 1224 } 1225 if (!usecprow) xb += 2; 1226 } 1227 break; 1228 case 3: 1229 for (i=0; i<mbs; i++) { 1230 if (usecprow) xb = x + 3*ridx[i]; 1231 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1232 ib = idx + ii[0]; 1233 n = ii[1] - ii[0]; ii++; 1234 for (j=0; j<n; j++) { 1235 rval = ib[j]*3; 1236 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1237 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1238 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1239 v += 9; 1240 } 1241 if (!usecprow) xb += 3; 1242 } 1243 break; 1244 case 4: 1245 for (i=0; i<mbs; i++) { 1246 if (usecprow) xb = x + 4*ridx[i]; 1247 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1248 ib = idx + ii[0]; 1249 n = ii[1] - ii[0]; ii++; 1250 for (j=0; j<n; j++) { 1251 rval = ib[j]*4; 1252 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1253 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1254 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1255 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1256 v += 16; 1257 } 1258 if (!usecprow) xb += 4; 1259 } 1260 break; 1261 case 5: 1262 for (i=0; i<mbs; i++) { 1263 if (usecprow) xb = x + 5*ridx[i]; 1264 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1265 x4 = xb[3]; x5 = xb[4]; 1266 ib = idx + ii[0]; 1267 n = ii[1] - ii[0]; ii++; 1268 for (j=0; j<n; j++) { 1269 rval = ib[j]*5; 1270 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1271 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1272 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1273 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1274 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1275 v += 25; 1276 } 1277 if (!usecprow) xb += 5; 1278 } 1279 break; 1280 default: { /* block sizes larger then 5 by 5 are handled by BLAS */ 1281 PetscInt ncols,k; 1282 PetscScalar *work,*workt,*xtmp; 1283 1284 if (!a->mult_work) { 1285 k = PetscMax(A->rmap.n,A->cmap.n); 1286 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1287 } 1288 work = a->mult_work; 1289 xtmp = x; 1290 for (i=0; i<mbs; i++) { 1291 n = ii[1] - ii[0]; ii++; 1292 ncols = n*bs; 1293 ierr = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr); 1294 if (usecprow) { 1295 xtmp = x + bs*ridx[i]; 1296 } 1297 Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work); 1298 /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */ 1299 v += n*bs2; 1300 if (!usecprow) xtmp += bs; 1301 workt = work; 1302 for (j=0; j<n; j++) { 1303 zb = z + bs*(*idx++); 1304 for (k=0; k<bs; k++) zb[k] += workt[k] ; 1305 workt += bs; 1306 } 1307 } 1308 } 1309 } 1310 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1311 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); 1312 ierr = PetscLogFlops(2*a->nz*a->bs2);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatScale_SeqBAIJ" 1318 PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha) 1319 { 1320 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1321 PetscInt totalnz = a->bs2*a->nz; 1322 PetscScalar oalpha = alpha; 1323 PetscErrorCode ierr; 1324 #if defined(PETSC_USE_MAT_SINGLE) 1325 PetscInt i; 1326 #else 1327 PetscBLASInt tnz = (PetscBLASInt) totalnz,one = 1; 1328 #endif 1329 1330 PetscFunctionBegin; 1331 #if defined(PETSC_USE_MAT_SINGLE) 1332 for (i=0; i<totalnz; i++) a->a[i] *= oalpha; 1333 #else 1334 BLASscal_(&tnz,&oalpha,a->a,&one); 1335 #endif 1336 ierr = PetscLogFlops(totalnz);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatNorm_SeqBAIJ" 1342 PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm) 1343 { 1344 PetscErrorCode ierr; 1345 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1346 MatScalar *v = a->a; 1347 PetscReal sum = 0.0; 1348 PetscInt i,j,k,bs=A->rmap.bs,nz=a->nz,bs2=a->bs2,k1; 1349 1350 PetscFunctionBegin; 1351 if (type == NORM_FROBENIUS) { 1352 for (i=0; i< bs2*nz; i++) { 1353 #if defined(PETSC_USE_COMPLEX) 1354 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1355 #else 1356 sum += (*v)*(*v); v++; 1357 #endif 1358 } 1359 *norm = sqrt(sum); 1360 } else if (type == NORM_1) { /* maximum column sum */ 1361 PetscReal *tmp; 1362 PetscInt *bcol = a->j; 1363 ierr = PetscMalloc((A->cmap.n+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1364 ierr = PetscMemzero(tmp,A->cmap.n*sizeof(PetscReal));CHKERRQ(ierr); 1365 for (i=0; i<nz; i++){ 1366 for (j=0; j<bs; j++){ 1367 k1 = bs*(*bcol) + j; /* column index */ 1368 for (k=0; k<bs; k++){ 1369 tmp[k1] += PetscAbsScalar(*v); v++; 1370 } 1371 } 1372 bcol++; 1373 } 1374 *norm = 0.0; 1375 for (j=0; j<A->cmap.n; j++) { 1376 if (tmp[j] > *norm) *norm = tmp[j]; 1377 } 1378 ierr = PetscFree(tmp);CHKERRQ(ierr); 1379 } else if (type == NORM_INFINITY) { /* maximum row sum */ 1380 *norm = 0.0; 1381 for (k=0; k<bs; k++) { 1382 for (j=0; j<a->mbs; j++) { 1383 v = a->a + bs2*a->i[j] + k; 1384 sum = 0.0; 1385 for (i=0; i<a->i[j+1]-a->i[j]; i++) { 1386 for (k1=0; k1<bs; k1++){ 1387 sum += PetscAbsScalar(*v); 1388 v += bs; 1389 } 1390 } 1391 if (sum > *norm) *norm = sum; 1392 } 1393 } 1394 } else { 1395 SETERRQ(PETSC_ERR_SUP,"No support for this norm yet"); 1396 } 1397 PetscFunctionReturn(0); 1398 } 1399 1400 1401 #undef __FUNCT__ 1402 #define __FUNCT__ "MatEqual_SeqBAIJ" 1403 PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg) 1404 { 1405 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data; 1406 PetscErrorCode ierr; 1407 1408 PetscFunctionBegin; 1409 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1410 if ((A->rmap.N != B->rmap.N) || (A->cmap.n != B->cmap.n) || (A->rmap.bs != B->rmap.bs)|| (a->nz != b->nz)) { 1411 *flg = PETSC_FALSE; 1412 PetscFunctionReturn(0); 1413 } 1414 1415 /* if the a->i are the same */ 1416 ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1417 if (!*flg) { 1418 PetscFunctionReturn(0); 1419 } 1420 1421 /* if a->j are the same */ 1422 ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr); 1423 if (!*flg) { 1424 PetscFunctionReturn(0); 1425 } 1426 /* if a->a are the same */ 1427 ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap.bs)*(B->rmap.bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr); 1428 PetscFunctionReturn(0); 1429 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ" 1434 PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1435 { 1436 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1437 PetscErrorCode ierr; 1438 PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1439 PetscScalar *x,zero = 0.0; 1440 MatScalar *aa,*aa_j; 1441 1442 PetscFunctionBegin; 1443 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1444 bs = A->rmap.bs; 1445 aa = a->a; 1446 ai = a->i; 1447 aj = a->j; 1448 ambs = a->mbs; 1449 bs2 = a->bs2; 1450 1451 ierr = VecSet(v,zero);CHKERRQ(ierr); 1452 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1453 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1454 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1455 for (i=0; i<ambs; i++) { 1456 for (j=ai[i]; j<ai[i+1]; j++) { 1457 if (aj[j] == i) { 1458 row = i*bs; 1459 aa_j = aa+j*bs2; 1460 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1461 break; 1462 } 1463 } 1464 } 1465 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ" 1471 PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1472 { 1473 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1474 PetscScalar *l,*r,x,*li,*ri; 1475 MatScalar *aa,*v; 1476 PetscErrorCode ierr; 1477 PetscInt i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1478 1479 PetscFunctionBegin; 1480 ai = a->i; 1481 aj = a->j; 1482 aa = a->a; 1483 m = A->rmap.n; 1484 n = A->cmap.n; 1485 bs = A->rmap.bs; 1486 mbs = a->mbs; 1487 bs2 = a->bs2; 1488 if (ll) { 1489 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 1490 ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr); 1491 if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 1492 for (i=0; i<mbs; i++) { /* for each block row */ 1493 M = ai[i+1] - ai[i]; 1494 li = l + i*bs; 1495 v = aa + bs2*ai[i]; 1496 for (j=0; j<M; j++) { /* for each block */ 1497 for (k=0; k<bs2; k++) { 1498 (*v++) *= li[k%bs]; 1499 } 1500 } 1501 } 1502 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 1503 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1504 } 1505 1506 if (rr) { 1507 ierr = VecGetArray(rr,&r);CHKERRQ(ierr); 1508 ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr); 1509 if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 1510 for (i=0; i<mbs; i++) { /* for each block row */ 1511 M = ai[i+1] - ai[i]; 1512 v = aa + bs2*ai[i]; 1513 for (j=0; j<M; j++) { /* for each block */ 1514 ri = r + bs*aj[ai[i]+j]; 1515 for (k=0; k<bs; k++) { 1516 x = ri[k]; 1517 for (tmp=0; tmp<bs; tmp++) (*v++) *= x; 1518 } 1519 } 1520 } 1521 ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr); 1522 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1523 } 1524 PetscFunctionReturn(0); 1525 } 1526 1527 1528 #undef __FUNCT__ 1529 #define __FUNCT__ "MatGetInfo_SeqBAIJ" 1530 PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1531 { 1532 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1533 1534 PetscFunctionBegin; 1535 info->rows_global = (double)A->rmap.N; 1536 info->columns_global = (double)A->cmap.n; 1537 info->rows_local = (double)A->rmap.n; 1538 info->columns_local = (double)A->cmap.n; 1539 info->block_size = a->bs2; 1540 info->nz_allocated = a->maxnz; 1541 info->nz_used = a->bs2*a->nz; 1542 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1543 info->assemblies = A->num_ass; 1544 info->mallocs = a->reallocs; 1545 info->memory = ((PetscObject)A)->mem; 1546 if (A->factor) { 1547 info->fill_ratio_given = A->info.fill_ratio_given; 1548 info->fill_ratio_needed = A->info.fill_ratio_needed; 1549 info->factor_mallocs = A->info.factor_mallocs; 1550 } else { 1551 info->fill_ratio_given = 0; 1552 info->fill_ratio_needed = 0; 1553 info->factor_mallocs = 0; 1554 } 1555 PetscFunctionReturn(0); 1556 } 1557 1558 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatZeroEntries_SeqBAIJ" 1561 PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A) 1562 { 1563 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1564 PetscErrorCode ierr; 1565 1566 PetscFunctionBegin; 1567 ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr); 1568 PetscFunctionReturn(0); 1569 } 1570