1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the MAIJ matrix storage format. 5 This format is used for restriction and interpolation operations for 6 multicomponent problems. It interpolates each component the same way 7 independently. 8 9 We provide: 10 MatMult() 11 MatMultTranspose() 12 MatMultTransposeAdd() 13 MatMultAdd() 14 and 15 MatCreateMAIJ(Mat,dof,Mat*) 16 17 This single directory handles both the sequential and parallel codes 18 */ 19 20 #include "../src/mat/impls/maij/maij.h" /*I "petscmat.h" I*/ 21 #include "../src/mat/utils/freespace.h" 22 #include "private/vecimpl.h" 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatMAIJGetAIJ" 26 /*@C 27 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 28 29 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 30 31 Input Parameter: 32 . A - the MAIJ matrix 33 34 Output Parameter: 35 . B - the AIJ matrix 36 37 Level: advanced 38 39 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 40 41 .seealso: MatCreateMAIJ() 42 @*/ 43 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 44 { 45 PetscErrorCode ierr; 46 PetscTruth ismpimaij,isseqmaij; 47 48 PetscFunctionBegin; 49 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 50 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 51 if (ismpimaij) { 52 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 53 54 *B = b->A; 55 } else if (isseqmaij) { 56 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 57 58 *B = b->AIJ; 59 } else { 60 *B = A; 61 } 62 PetscFunctionReturn(0); 63 } 64 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatMAIJRedimension" 67 /*@C 68 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 69 70 Logically Collective 71 72 Input Parameter: 73 + A - the MAIJ matrix 74 - dof - the block size for the new matrix 75 76 Output Parameter: 77 . B - the new MAIJ matrix 78 79 Level: advanced 80 81 .seealso: MatCreateMAIJ() 82 @*/ 83 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 84 { 85 PetscErrorCode ierr; 86 Mat Aij = PETSC_NULL; 87 88 PetscFunctionBegin; 89 PetscValidLogicalCollectiveInt(A,dof,2); 90 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 91 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 92 PetscFunctionReturn(0); 93 } 94 95 #undef __FUNCT__ 96 #define __FUNCT__ "MatDestroy_SeqMAIJ" 97 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 98 { 99 PetscErrorCode ierr; 100 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 101 102 PetscFunctionBegin; 103 if (b->AIJ) { 104 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 105 } 106 ierr = PetscFree(b);CHKERRQ(ierr); 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "MatView_SeqMAIJ" 112 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 113 { 114 PetscErrorCode ierr; 115 Mat B; 116 117 PetscFunctionBegin; 118 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 119 ierr = MatView(B,viewer);CHKERRQ(ierr); 120 ierr = MatDestroy(B);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatView_MPIMAIJ" 126 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 127 { 128 PetscErrorCode ierr; 129 Mat B; 130 131 PetscFunctionBegin; 132 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 133 ierr = MatView(B,viewer);CHKERRQ(ierr); 134 ierr = MatDestroy(B);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatDestroy_MPIMAIJ" 140 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 144 145 PetscFunctionBegin; 146 if (b->AIJ) { 147 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 148 } 149 if (b->OAIJ) { 150 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 151 } 152 if (b->A) { 153 ierr = MatDestroy(b->A);CHKERRQ(ierr); 154 } 155 if (b->ctx) { 156 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 157 } 158 if (b->w) { 159 ierr = VecDestroy(b->w);CHKERRQ(ierr); 160 } 161 ierr = PetscFree(b);CHKERRQ(ierr); 162 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 163 PetscFunctionReturn(0); 164 } 165 166 /*MC 167 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 168 multicomponent problems, interpolating or restricting each component the same way independently. 169 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 170 171 Operations provided: 172 . MatMult 173 . MatMultTranspose 174 . MatMultAdd 175 . MatMultTransposeAdd 176 177 Level: advanced 178 179 .seealso: MatCreateSeqDense 180 M*/ 181 182 EXTERN_C_BEGIN 183 #undef __FUNCT__ 184 #define __FUNCT__ "MatCreate_MAIJ" 185 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 186 { 187 PetscErrorCode ierr; 188 Mat_MPIMAIJ *b; 189 PetscMPIInt size; 190 191 PetscFunctionBegin; 192 ierr = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr); 193 A->data = (void*)b; 194 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 195 A->mapping = 0; 196 197 b->AIJ = 0; 198 b->dof = 0; 199 b->OAIJ = 0; 200 b->ctx = 0; 201 b->w = 0; 202 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 203 if (size == 1){ 204 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 205 } else { 206 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 207 } 208 PetscFunctionReturn(0); 209 } 210 EXTERN_C_END 211 212 /* --------------------------------------------------------------------------------------*/ 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatMult_SeqMAIJ_2" 215 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 216 { 217 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 218 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 219 const PetscScalar *x,*v; 220 PetscScalar *y, sum1, sum2; 221 PetscErrorCode ierr; 222 PetscInt nonzerorow=0,n,i,jrow,j; 223 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 224 225 PetscFunctionBegin; 226 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 227 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 228 idx = a->j; 229 v = a->a; 230 ii = a->i; 231 232 for (i=0; i<m; i++) { 233 jrow = ii[i]; 234 n = ii[i+1] - jrow; 235 sum1 = 0.0; 236 sum2 = 0.0; 237 nonzerorow += (n>0); 238 for (j=0; j<n; j++) { 239 sum1 += v[jrow]*x[2*idx[jrow]]; 240 sum2 += v[jrow]*x[2*idx[jrow]+1]; 241 jrow++; 242 } 243 y[2*i] = sum1; 244 y[2*i+1] = sum2; 245 } 246 247 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 248 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 249 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 250 PetscFunctionReturn(0); 251 } 252 253 #undef __FUNCT__ 254 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 255 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 256 { 257 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 258 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 259 const PetscScalar *x,*v; 260 PetscScalar *y,alpha1,alpha2; 261 PetscErrorCode ierr; 262 PetscInt n,i; 263 const PetscInt m = b->AIJ->rmap->n,*idx; 264 265 PetscFunctionBegin; 266 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 267 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 268 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 269 270 for (i=0; i<m; i++) { 271 idx = a->j + a->i[i] ; 272 v = a->a + a->i[i] ; 273 n = a->i[i+1] - a->i[i]; 274 alpha1 = x[2*i]; 275 alpha2 = x[2*i+1]; 276 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 277 } 278 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 279 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 280 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 281 PetscFunctionReturn(0); 282 } 283 284 #undef __FUNCT__ 285 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 286 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 287 { 288 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 289 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 290 const PetscScalar *x,*v; 291 PetscScalar *y,sum1, sum2; 292 PetscErrorCode ierr; 293 PetscInt n,i,jrow,j; 294 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 295 296 PetscFunctionBegin; 297 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 298 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 299 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 300 idx = a->j; 301 v = a->a; 302 ii = a->i; 303 304 for (i=0; i<m; i++) { 305 jrow = ii[i]; 306 n = ii[i+1] - jrow; 307 sum1 = 0.0; 308 sum2 = 0.0; 309 for (j=0; j<n; j++) { 310 sum1 += v[jrow]*x[2*idx[jrow]]; 311 sum2 += v[jrow]*x[2*idx[jrow]+1]; 312 jrow++; 313 } 314 y[2*i] += sum1; 315 y[2*i+1] += sum2; 316 } 317 318 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 319 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 320 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323 #undef __FUNCT__ 324 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 325 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 326 { 327 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 328 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 329 const PetscScalar *x,*v; 330 PetscScalar *y,alpha1,alpha2; 331 PetscErrorCode ierr; 332 PetscInt n,i; 333 const PetscInt m = b->AIJ->rmap->n,*idx; 334 335 PetscFunctionBegin; 336 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 337 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 338 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 339 340 for (i=0; i<m; i++) { 341 idx = a->j + a->i[i] ; 342 v = a->a + a->i[i] ; 343 n = a->i[i+1] - a->i[i]; 344 alpha1 = x[2*i]; 345 alpha2 = x[2*i+1]; 346 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 347 } 348 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 349 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 350 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 351 PetscFunctionReturn(0); 352 } 353 /* --------------------------------------------------------------------------------------*/ 354 #undef __FUNCT__ 355 #define __FUNCT__ "MatMult_SeqMAIJ_3" 356 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 357 { 358 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 359 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 360 const PetscScalar *x,*v; 361 PetscScalar *y,sum1, sum2, sum3; 362 PetscErrorCode ierr; 363 PetscInt nonzerorow=0,n,i,jrow,j; 364 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 365 366 PetscFunctionBegin; 367 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 368 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 369 idx = a->j; 370 v = a->a; 371 ii = a->i; 372 373 for (i=0; i<m; i++) { 374 jrow = ii[i]; 375 n = ii[i+1] - jrow; 376 sum1 = 0.0; 377 sum2 = 0.0; 378 sum3 = 0.0; 379 nonzerorow += (n>0); 380 for (j=0; j<n; j++) { 381 sum1 += v[jrow]*x[3*idx[jrow]]; 382 sum2 += v[jrow]*x[3*idx[jrow]+1]; 383 sum3 += v[jrow]*x[3*idx[jrow]+2]; 384 jrow++; 385 } 386 y[3*i] = sum1; 387 y[3*i+1] = sum2; 388 y[3*i+2] = sum3; 389 } 390 391 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 392 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 393 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 #undef __FUNCT__ 398 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 399 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 400 { 401 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 402 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 403 const PetscScalar *x,*v; 404 PetscScalar *y,alpha1,alpha2,alpha3; 405 PetscErrorCode ierr; 406 PetscInt n,i; 407 const PetscInt m = b->AIJ->rmap->n,*idx; 408 409 PetscFunctionBegin; 410 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 411 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 412 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 413 414 for (i=0; i<m; i++) { 415 idx = a->j + a->i[i]; 416 v = a->a + a->i[i]; 417 n = a->i[i+1] - a->i[i]; 418 alpha1 = x[3*i]; 419 alpha2 = x[3*i+1]; 420 alpha3 = x[3*i+2]; 421 while (n-->0) { 422 y[3*(*idx)] += alpha1*(*v); 423 y[3*(*idx)+1] += alpha2*(*v); 424 y[3*(*idx)+2] += alpha3*(*v); 425 idx++; v++; 426 } 427 } 428 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 429 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 430 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 #undef __FUNCT__ 435 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 436 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 437 { 438 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 439 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 440 const PetscScalar *x,*v; 441 PetscScalar *y,sum1, sum2, sum3; 442 PetscErrorCode ierr; 443 PetscInt n,i,jrow,j; 444 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 445 446 PetscFunctionBegin; 447 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 448 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 449 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 450 idx = a->j; 451 v = a->a; 452 ii = a->i; 453 454 for (i=0; i<m; i++) { 455 jrow = ii[i]; 456 n = ii[i+1] - jrow; 457 sum1 = 0.0; 458 sum2 = 0.0; 459 sum3 = 0.0; 460 for (j=0; j<n; j++) { 461 sum1 += v[jrow]*x[3*idx[jrow]]; 462 sum2 += v[jrow]*x[3*idx[jrow]+1]; 463 sum3 += v[jrow]*x[3*idx[jrow]+2]; 464 jrow++; 465 } 466 y[3*i] += sum1; 467 y[3*i+1] += sum2; 468 y[3*i+2] += sum3; 469 } 470 471 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 472 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 473 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 478 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 479 { 480 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 481 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 482 const PetscScalar *x,*v; 483 PetscScalar *y,alpha1,alpha2,alpha3; 484 PetscErrorCode ierr; 485 PetscInt n,i; 486 const PetscInt m = b->AIJ->rmap->n,*idx; 487 488 PetscFunctionBegin; 489 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 490 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 491 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 492 for (i=0; i<m; i++) { 493 idx = a->j + a->i[i] ; 494 v = a->a + a->i[i] ; 495 n = a->i[i+1] - a->i[i]; 496 alpha1 = x[3*i]; 497 alpha2 = x[3*i+1]; 498 alpha3 = x[3*i+2]; 499 while (n-->0) { 500 y[3*(*idx)] += alpha1*(*v); 501 y[3*(*idx)+1] += alpha2*(*v); 502 y[3*(*idx)+2] += alpha3*(*v); 503 idx++; v++; 504 } 505 } 506 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 507 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 508 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 /* ------------------------------------------------------------------------------*/ 513 #undef __FUNCT__ 514 #define __FUNCT__ "MatMult_SeqMAIJ_4" 515 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 516 { 517 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 518 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 519 const PetscScalar *x,*v; 520 PetscScalar *y,sum1, sum2, sum3, sum4; 521 PetscErrorCode ierr; 522 PetscInt nonzerorow=0,n,i,jrow,j; 523 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 524 525 PetscFunctionBegin; 526 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 527 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 528 idx = a->j; 529 v = a->a; 530 ii = a->i; 531 532 for (i=0; i<m; i++) { 533 jrow = ii[i]; 534 n = ii[i+1] - jrow; 535 sum1 = 0.0; 536 sum2 = 0.0; 537 sum3 = 0.0; 538 sum4 = 0.0; 539 nonzerorow += (n>0); 540 for (j=0; j<n; j++) { 541 sum1 += v[jrow]*x[4*idx[jrow]]; 542 sum2 += v[jrow]*x[4*idx[jrow]+1]; 543 sum3 += v[jrow]*x[4*idx[jrow]+2]; 544 sum4 += v[jrow]*x[4*idx[jrow]+3]; 545 jrow++; 546 } 547 y[4*i] = sum1; 548 y[4*i+1] = sum2; 549 y[4*i+2] = sum3; 550 y[4*i+3] = sum4; 551 } 552 553 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 554 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 555 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 561 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 562 { 563 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 564 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 565 const PetscScalar *x,*v; 566 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 567 PetscErrorCode ierr; 568 PetscInt n,i; 569 const PetscInt m = b->AIJ->rmap->n,*idx; 570 571 PetscFunctionBegin; 572 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 573 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 574 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 575 for (i=0; i<m; i++) { 576 idx = a->j + a->i[i] ; 577 v = a->a + a->i[i] ; 578 n = a->i[i+1] - a->i[i]; 579 alpha1 = x[4*i]; 580 alpha2 = x[4*i+1]; 581 alpha3 = x[4*i+2]; 582 alpha4 = x[4*i+3]; 583 while (n-->0) { 584 y[4*(*idx)] += alpha1*(*v); 585 y[4*(*idx)+1] += alpha2*(*v); 586 y[4*(*idx)+2] += alpha3*(*v); 587 y[4*(*idx)+3] += alpha4*(*v); 588 idx++; v++; 589 } 590 } 591 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 592 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 593 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 599 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 600 { 601 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 602 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 603 const PetscScalar *x,*v; 604 PetscScalar *y,sum1, sum2, sum3, sum4; 605 PetscErrorCode ierr; 606 PetscInt n,i,jrow,j; 607 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 608 609 PetscFunctionBegin; 610 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 611 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 612 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 613 idx = a->j; 614 v = a->a; 615 ii = a->i; 616 617 for (i=0; i<m; i++) { 618 jrow = ii[i]; 619 n = ii[i+1] - jrow; 620 sum1 = 0.0; 621 sum2 = 0.0; 622 sum3 = 0.0; 623 sum4 = 0.0; 624 for (j=0; j<n; j++) { 625 sum1 += v[jrow]*x[4*idx[jrow]]; 626 sum2 += v[jrow]*x[4*idx[jrow]+1]; 627 sum3 += v[jrow]*x[4*idx[jrow]+2]; 628 sum4 += v[jrow]*x[4*idx[jrow]+3]; 629 jrow++; 630 } 631 y[4*i] += sum1; 632 y[4*i+1] += sum2; 633 y[4*i+2] += sum3; 634 y[4*i+3] += sum4; 635 } 636 637 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 638 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 639 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 640 PetscFunctionReturn(0); 641 } 642 #undef __FUNCT__ 643 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 644 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 645 { 646 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 647 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 648 const PetscScalar *x,*v; 649 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 650 PetscErrorCode ierr; 651 PetscInt n,i; 652 const PetscInt m = b->AIJ->rmap->n,*idx; 653 654 PetscFunctionBegin; 655 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 656 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 657 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 658 659 for (i=0; i<m; i++) { 660 idx = a->j + a->i[i] ; 661 v = a->a + a->i[i] ; 662 n = a->i[i+1] - a->i[i]; 663 alpha1 = x[4*i]; 664 alpha2 = x[4*i+1]; 665 alpha3 = x[4*i+2]; 666 alpha4 = x[4*i+3]; 667 while (n-->0) { 668 y[4*(*idx)] += alpha1*(*v); 669 y[4*(*idx)+1] += alpha2*(*v); 670 y[4*(*idx)+2] += alpha3*(*v); 671 y[4*(*idx)+3] += alpha4*(*v); 672 idx++; v++; 673 } 674 } 675 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 676 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 677 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 /* ------------------------------------------------------------------------------*/ 681 682 #undef __FUNCT__ 683 #define __FUNCT__ "MatMult_SeqMAIJ_5" 684 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 685 { 686 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 687 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 688 const PetscScalar *x,*v; 689 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 690 PetscErrorCode ierr; 691 PetscInt nonzerorow=0,n,i,jrow,j; 692 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 693 694 PetscFunctionBegin; 695 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 696 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 697 idx = a->j; 698 v = a->a; 699 ii = a->i; 700 701 for (i=0; i<m; i++) { 702 jrow = ii[i]; 703 n = ii[i+1] - jrow; 704 sum1 = 0.0; 705 sum2 = 0.0; 706 sum3 = 0.0; 707 sum4 = 0.0; 708 sum5 = 0.0; 709 nonzerorow += (n>0); 710 for (j=0; j<n; j++) { 711 sum1 += v[jrow]*x[5*idx[jrow]]; 712 sum2 += v[jrow]*x[5*idx[jrow]+1]; 713 sum3 += v[jrow]*x[5*idx[jrow]+2]; 714 sum4 += v[jrow]*x[5*idx[jrow]+3]; 715 sum5 += v[jrow]*x[5*idx[jrow]+4]; 716 jrow++; 717 } 718 y[5*i] = sum1; 719 y[5*i+1] = sum2; 720 y[5*i+2] = sum3; 721 y[5*i+3] = sum4; 722 y[5*i+4] = sum5; 723 } 724 725 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 726 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 727 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 733 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 734 { 735 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 736 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 737 const PetscScalar *x,*v; 738 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 739 PetscErrorCode ierr; 740 PetscInt n,i; 741 const PetscInt m = b->AIJ->rmap->n,*idx; 742 743 PetscFunctionBegin; 744 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 745 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 746 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 747 748 for (i=0; i<m; i++) { 749 idx = a->j + a->i[i] ; 750 v = a->a + a->i[i] ; 751 n = a->i[i+1] - a->i[i]; 752 alpha1 = x[5*i]; 753 alpha2 = x[5*i+1]; 754 alpha3 = x[5*i+2]; 755 alpha4 = x[5*i+3]; 756 alpha5 = x[5*i+4]; 757 while (n-->0) { 758 y[5*(*idx)] += alpha1*(*v); 759 y[5*(*idx)+1] += alpha2*(*v); 760 y[5*(*idx)+2] += alpha3*(*v); 761 y[5*(*idx)+3] += alpha4*(*v); 762 y[5*(*idx)+4] += alpha5*(*v); 763 idx++; v++; 764 } 765 } 766 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 767 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 768 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 769 PetscFunctionReturn(0); 770 } 771 772 #undef __FUNCT__ 773 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 774 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 775 { 776 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 777 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 778 const PetscScalar *x,*v; 779 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 780 PetscErrorCode ierr; 781 PetscInt n,i,jrow,j; 782 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 783 784 PetscFunctionBegin; 785 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 786 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 787 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 788 idx = a->j; 789 v = a->a; 790 ii = a->i; 791 792 for (i=0; i<m; i++) { 793 jrow = ii[i]; 794 n = ii[i+1] - jrow; 795 sum1 = 0.0; 796 sum2 = 0.0; 797 sum3 = 0.0; 798 sum4 = 0.0; 799 sum5 = 0.0; 800 for (j=0; j<n; j++) { 801 sum1 += v[jrow]*x[5*idx[jrow]]; 802 sum2 += v[jrow]*x[5*idx[jrow]+1]; 803 sum3 += v[jrow]*x[5*idx[jrow]+2]; 804 sum4 += v[jrow]*x[5*idx[jrow]+3]; 805 sum5 += v[jrow]*x[5*idx[jrow]+4]; 806 jrow++; 807 } 808 y[5*i] += sum1; 809 y[5*i+1] += sum2; 810 y[5*i+2] += sum3; 811 y[5*i+3] += sum4; 812 y[5*i+4] += sum5; 813 } 814 815 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 816 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 817 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNCT__ 822 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 823 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 824 { 825 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 826 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 827 const PetscScalar *x,*v; 828 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 829 PetscErrorCode ierr; 830 PetscInt n,i; 831 const PetscInt m = b->AIJ->rmap->n,*idx; 832 833 PetscFunctionBegin; 834 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 835 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 836 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 837 838 for (i=0; i<m; i++) { 839 idx = a->j + a->i[i] ; 840 v = a->a + a->i[i] ; 841 n = a->i[i+1] - a->i[i]; 842 alpha1 = x[5*i]; 843 alpha2 = x[5*i+1]; 844 alpha3 = x[5*i+2]; 845 alpha4 = x[5*i+3]; 846 alpha5 = x[5*i+4]; 847 while (n-->0) { 848 y[5*(*idx)] += alpha1*(*v); 849 y[5*(*idx)+1] += alpha2*(*v); 850 y[5*(*idx)+2] += alpha3*(*v); 851 y[5*(*idx)+3] += alpha4*(*v); 852 y[5*(*idx)+4] += alpha5*(*v); 853 idx++; v++; 854 } 855 } 856 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 857 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 858 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 /* ------------------------------------------------------------------------------*/ 863 #undef __FUNCT__ 864 #define __FUNCT__ "MatMult_SeqMAIJ_6" 865 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 866 { 867 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 868 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 869 const PetscScalar *x,*v; 870 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 871 PetscErrorCode ierr; 872 PetscInt nonzerorow=0,n,i,jrow,j; 873 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 874 875 PetscFunctionBegin; 876 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 877 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 878 idx = a->j; 879 v = a->a; 880 ii = a->i; 881 882 for (i=0; i<m; i++) { 883 jrow = ii[i]; 884 n = ii[i+1] - jrow; 885 sum1 = 0.0; 886 sum2 = 0.0; 887 sum3 = 0.0; 888 sum4 = 0.0; 889 sum5 = 0.0; 890 sum6 = 0.0; 891 nonzerorow += (n>0); 892 for (j=0; j<n; j++) { 893 sum1 += v[jrow]*x[6*idx[jrow]]; 894 sum2 += v[jrow]*x[6*idx[jrow]+1]; 895 sum3 += v[jrow]*x[6*idx[jrow]+2]; 896 sum4 += v[jrow]*x[6*idx[jrow]+3]; 897 sum5 += v[jrow]*x[6*idx[jrow]+4]; 898 sum6 += v[jrow]*x[6*idx[jrow]+5]; 899 jrow++; 900 } 901 y[6*i] = sum1; 902 y[6*i+1] = sum2; 903 y[6*i+2] = sum3; 904 y[6*i+3] = sum4; 905 y[6*i+4] = sum5; 906 y[6*i+5] = sum6; 907 } 908 909 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 910 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 911 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 912 PetscFunctionReturn(0); 913 } 914 915 #undef __FUNCT__ 916 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 917 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 918 { 919 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 920 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 921 const PetscScalar *x,*v; 922 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 923 PetscErrorCode ierr; 924 PetscInt n,i; 925 const PetscInt m = b->AIJ->rmap->n,*idx; 926 927 PetscFunctionBegin; 928 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 929 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 930 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 931 932 for (i=0; i<m; i++) { 933 idx = a->j + a->i[i] ; 934 v = a->a + a->i[i] ; 935 n = a->i[i+1] - a->i[i]; 936 alpha1 = x[6*i]; 937 alpha2 = x[6*i+1]; 938 alpha3 = x[6*i+2]; 939 alpha4 = x[6*i+3]; 940 alpha5 = x[6*i+4]; 941 alpha6 = x[6*i+5]; 942 while (n-->0) { 943 y[6*(*idx)] += alpha1*(*v); 944 y[6*(*idx)+1] += alpha2*(*v); 945 y[6*(*idx)+2] += alpha3*(*v); 946 y[6*(*idx)+3] += alpha4*(*v); 947 y[6*(*idx)+4] += alpha5*(*v); 948 y[6*(*idx)+5] += alpha6*(*v); 949 idx++; v++; 950 } 951 } 952 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 953 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 954 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } 957 958 #undef __FUNCT__ 959 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 960 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 961 { 962 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 963 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 964 const PetscScalar *x,*v; 965 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 966 PetscErrorCode ierr; 967 PetscInt n,i,jrow,j; 968 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 969 970 PetscFunctionBegin; 971 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 972 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 973 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 974 idx = a->j; 975 v = a->a; 976 ii = a->i; 977 978 for (i=0; i<m; i++) { 979 jrow = ii[i]; 980 n = ii[i+1] - jrow; 981 sum1 = 0.0; 982 sum2 = 0.0; 983 sum3 = 0.0; 984 sum4 = 0.0; 985 sum5 = 0.0; 986 sum6 = 0.0; 987 for (j=0; j<n; j++) { 988 sum1 += v[jrow]*x[6*idx[jrow]]; 989 sum2 += v[jrow]*x[6*idx[jrow]+1]; 990 sum3 += v[jrow]*x[6*idx[jrow]+2]; 991 sum4 += v[jrow]*x[6*idx[jrow]+3]; 992 sum5 += v[jrow]*x[6*idx[jrow]+4]; 993 sum6 += v[jrow]*x[6*idx[jrow]+5]; 994 jrow++; 995 } 996 y[6*i] += sum1; 997 y[6*i+1] += sum2; 998 y[6*i+2] += sum3; 999 y[6*i+3] += sum4; 1000 y[6*i+4] += sum5; 1001 y[6*i+5] += sum6; 1002 } 1003 1004 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1005 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1006 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1007 PetscFunctionReturn(0); 1008 } 1009 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 1012 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 1013 { 1014 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1015 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1016 const PetscScalar *x,*v; 1017 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 1018 PetscErrorCode ierr; 1019 PetscInt n,i; 1020 const PetscInt m = b->AIJ->rmap->n,*idx; 1021 1022 PetscFunctionBegin; 1023 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1024 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1025 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1026 1027 for (i=0; i<m; i++) { 1028 idx = a->j + a->i[i] ; 1029 v = a->a + a->i[i] ; 1030 n = a->i[i+1] - a->i[i]; 1031 alpha1 = x[6*i]; 1032 alpha2 = x[6*i+1]; 1033 alpha3 = x[6*i+2]; 1034 alpha4 = x[6*i+3]; 1035 alpha5 = x[6*i+4]; 1036 alpha6 = x[6*i+5]; 1037 while (n-->0) { 1038 y[6*(*idx)] += alpha1*(*v); 1039 y[6*(*idx)+1] += alpha2*(*v); 1040 y[6*(*idx)+2] += alpha3*(*v); 1041 y[6*(*idx)+3] += alpha4*(*v); 1042 y[6*(*idx)+4] += alpha5*(*v); 1043 y[6*(*idx)+5] += alpha6*(*v); 1044 idx++; v++; 1045 } 1046 } 1047 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1048 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1049 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1050 PetscFunctionReturn(0); 1051 } 1052 1053 /* ------------------------------------------------------------------------------*/ 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatMult_SeqMAIJ_7" 1056 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1057 { 1058 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1059 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1060 const PetscScalar *x,*v; 1061 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1062 PetscErrorCode ierr; 1063 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1064 PetscInt nonzerorow=0,n,i,jrow,j; 1065 1066 PetscFunctionBegin; 1067 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1068 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1069 idx = a->j; 1070 v = a->a; 1071 ii = a->i; 1072 1073 for (i=0; i<m; i++) { 1074 jrow = ii[i]; 1075 n = ii[i+1] - jrow; 1076 sum1 = 0.0; 1077 sum2 = 0.0; 1078 sum3 = 0.0; 1079 sum4 = 0.0; 1080 sum5 = 0.0; 1081 sum6 = 0.0; 1082 sum7 = 0.0; 1083 nonzerorow += (n>0); 1084 for (j=0; j<n; j++) { 1085 sum1 += v[jrow]*x[7*idx[jrow]]; 1086 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1087 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1088 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1089 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1090 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1091 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1092 jrow++; 1093 } 1094 y[7*i] = sum1; 1095 y[7*i+1] = sum2; 1096 y[7*i+2] = sum3; 1097 y[7*i+3] = sum4; 1098 y[7*i+4] = sum5; 1099 y[7*i+5] = sum6; 1100 y[7*i+6] = sum7; 1101 } 1102 1103 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1104 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1105 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1111 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1112 { 1113 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1114 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1115 const PetscScalar *x,*v; 1116 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1117 PetscErrorCode ierr; 1118 const PetscInt m = b->AIJ->rmap->n,*idx; 1119 PetscInt n,i; 1120 1121 PetscFunctionBegin; 1122 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1123 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1124 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1125 1126 for (i=0; i<m; i++) { 1127 idx = a->j + a->i[i] ; 1128 v = a->a + a->i[i] ; 1129 n = a->i[i+1] - a->i[i]; 1130 alpha1 = x[7*i]; 1131 alpha2 = x[7*i+1]; 1132 alpha3 = x[7*i+2]; 1133 alpha4 = x[7*i+3]; 1134 alpha5 = x[7*i+4]; 1135 alpha6 = x[7*i+5]; 1136 alpha7 = x[7*i+6]; 1137 while (n-->0) { 1138 y[7*(*idx)] += alpha1*(*v); 1139 y[7*(*idx)+1] += alpha2*(*v); 1140 y[7*(*idx)+2] += alpha3*(*v); 1141 y[7*(*idx)+3] += alpha4*(*v); 1142 y[7*(*idx)+4] += alpha5*(*v); 1143 y[7*(*idx)+5] += alpha6*(*v); 1144 y[7*(*idx)+6] += alpha7*(*v); 1145 idx++; v++; 1146 } 1147 } 1148 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1149 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1150 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1156 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1157 { 1158 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1159 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1160 const PetscScalar *x,*v; 1161 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1162 PetscErrorCode ierr; 1163 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1164 PetscInt n,i,jrow,j; 1165 1166 PetscFunctionBegin; 1167 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1168 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1169 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1170 idx = a->j; 1171 v = a->a; 1172 ii = a->i; 1173 1174 for (i=0; i<m; i++) { 1175 jrow = ii[i]; 1176 n = ii[i+1] - jrow; 1177 sum1 = 0.0; 1178 sum2 = 0.0; 1179 sum3 = 0.0; 1180 sum4 = 0.0; 1181 sum5 = 0.0; 1182 sum6 = 0.0; 1183 sum7 = 0.0; 1184 for (j=0; j<n; j++) { 1185 sum1 += v[jrow]*x[7*idx[jrow]]; 1186 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1187 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1188 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1189 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1190 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1191 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1192 jrow++; 1193 } 1194 y[7*i] += sum1; 1195 y[7*i+1] += sum2; 1196 y[7*i+2] += sum3; 1197 y[7*i+3] += sum4; 1198 y[7*i+4] += sum5; 1199 y[7*i+5] += sum6; 1200 y[7*i+6] += sum7; 1201 } 1202 1203 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1204 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1205 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1211 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1212 { 1213 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1214 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1215 const PetscScalar *x,*v; 1216 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1217 PetscErrorCode ierr; 1218 const PetscInt m = b->AIJ->rmap->n,*idx; 1219 PetscInt n,i; 1220 1221 PetscFunctionBegin; 1222 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1223 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1224 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1225 for (i=0; i<m; i++) { 1226 idx = a->j + a->i[i] ; 1227 v = a->a + a->i[i] ; 1228 n = a->i[i+1] - a->i[i]; 1229 alpha1 = x[7*i]; 1230 alpha2 = x[7*i+1]; 1231 alpha3 = x[7*i+2]; 1232 alpha4 = x[7*i+3]; 1233 alpha5 = x[7*i+4]; 1234 alpha6 = x[7*i+5]; 1235 alpha7 = x[7*i+6]; 1236 while (n-->0) { 1237 y[7*(*idx)] += alpha1*(*v); 1238 y[7*(*idx)+1] += alpha2*(*v); 1239 y[7*(*idx)+2] += alpha3*(*v); 1240 y[7*(*idx)+3] += alpha4*(*v); 1241 y[7*(*idx)+4] += alpha5*(*v); 1242 y[7*(*idx)+5] += alpha6*(*v); 1243 y[7*(*idx)+6] += alpha7*(*v); 1244 idx++; v++; 1245 } 1246 } 1247 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1248 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1249 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1255 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1256 { 1257 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1258 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1259 const PetscScalar *x,*v; 1260 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1261 PetscErrorCode ierr; 1262 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1263 PetscInt nonzerorow=0,n,i,jrow,j; 1264 1265 PetscFunctionBegin; 1266 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1267 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1268 idx = a->j; 1269 v = a->a; 1270 ii = a->i; 1271 1272 for (i=0; i<m; i++) { 1273 jrow = ii[i]; 1274 n = ii[i+1] - jrow; 1275 sum1 = 0.0; 1276 sum2 = 0.0; 1277 sum3 = 0.0; 1278 sum4 = 0.0; 1279 sum5 = 0.0; 1280 sum6 = 0.0; 1281 sum7 = 0.0; 1282 sum8 = 0.0; 1283 nonzerorow += (n>0); 1284 for (j=0; j<n; j++) { 1285 sum1 += v[jrow]*x[8*idx[jrow]]; 1286 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1287 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1288 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1289 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1290 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1291 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1292 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1293 jrow++; 1294 } 1295 y[8*i] = sum1; 1296 y[8*i+1] = sum2; 1297 y[8*i+2] = sum3; 1298 y[8*i+3] = sum4; 1299 y[8*i+4] = sum5; 1300 y[8*i+5] = sum6; 1301 y[8*i+6] = sum7; 1302 y[8*i+7] = sum8; 1303 } 1304 1305 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1306 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1307 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1313 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1314 { 1315 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1317 const PetscScalar *x,*v; 1318 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1319 PetscErrorCode ierr; 1320 const PetscInt m = b->AIJ->rmap->n,*idx; 1321 PetscInt n,i; 1322 1323 PetscFunctionBegin; 1324 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1325 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1326 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1327 1328 for (i=0; i<m; i++) { 1329 idx = a->j + a->i[i] ; 1330 v = a->a + a->i[i] ; 1331 n = a->i[i+1] - a->i[i]; 1332 alpha1 = x[8*i]; 1333 alpha2 = x[8*i+1]; 1334 alpha3 = x[8*i+2]; 1335 alpha4 = x[8*i+3]; 1336 alpha5 = x[8*i+4]; 1337 alpha6 = x[8*i+5]; 1338 alpha7 = x[8*i+6]; 1339 alpha8 = x[8*i+7]; 1340 while (n-->0) { 1341 y[8*(*idx)] += alpha1*(*v); 1342 y[8*(*idx)+1] += alpha2*(*v); 1343 y[8*(*idx)+2] += alpha3*(*v); 1344 y[8*(*idx)+3] += alpha4*(*v); 1345 y[8*(*idx)+4] += alpha5*(*v); 1346 y[8*(*idx)+5] += alpha6*(*v); 1347 y[8*(*idx)+6] += alpha7*(*v); 1348 y[8*(*idx)+7] += alpha8*(*v); 1349 idx++; v++; 1350 } 1351 } 1352 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1353 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1354 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1360 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1361 { 1362 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1363 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1364 const PetscScalar *x,*v; 1365 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1366 PetscErrorCode ierr; 1367 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1368 PetscInt n,i,jrow,j; 1369 1370 PetscFunctionBegin; 1371 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1372 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1373 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1374 idx = a->j; 1375 v = a->a; 1376 ii = a->i; 1377 1378 for (i=0; i<m; i++) { 1379 jrow = ii[i]; 1380 n = ii[i+1] - jrow; 1381 sum1 = 0.0; 1382 sum2 = 0.0; 1383 sum3 = 0.0; 1384 sum4 = 0.0; 1385 sum5 = 0.0; 1386 sum6 = 0.0; 1387 sum7 = 0.0; 1388 sum8 = 0.0; 1389 for (j=0; j<n; j++) { 1390 sum1 += v[jrow]*x[8*idx[jrow]]; 1391 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1392 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1393 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1394 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1395 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1396 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1397 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1398 jrow++; 1399 } 1400 y[8*i] += sum1; 1401 y[8*i+1] += sum2; 1402 y[8*i+2] += sum3; 1403 y[8*i+3] += sum4; 1404 y[8*i+4] += sum5; 1405 y[8*i+5] += sum6; 1406 y[8*i+6] += sum7; 1407 y[8*i+7] += sum8; 1408 } 1409 1410 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1411 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1412 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1418 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1419 { 1420 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1421 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1422 const PetscScalar *x,*v; 1423 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1424 PetscErrorCode ierr; 1425 const PetscInt m = b->AIJ->rmap->n,*idx; 1426 PetscInt n,i; 1427 1428 PetscFunctionBegin; 1429 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1430 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1431 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1432 for (i=0; i<m; i++) { 1433 idx = a->j + a->i[i] ; 1434 v = a->a + a->i[i] ; 1435 n = a->i[i+1] - a->i[i]; 1436 alpha1 = x[8*i]; 1437 alpha2 = x[8*i+1]; 1438 alpha3 = x[8*i+2]; 1439 alpha4 = x[8*i+3]; 1440 alpha5 = x[8*i+4]; 1441 alpha6 = x[8*i+5]; 1442 alpha7 = x[8*i+6]; 1443 alpha8 = x[8*i+7]; 1444 while (n-->0) { 1445 y[8*(*idx)] += alpha1*(*v); 1446 y[8*(*idx)+1] += alpha2*(*v); 1447 y[8*(*idx)+2] += alpha3*(*v); 1448 y[8*(*idx)+3] += alpha4*(*v); 1449 y[8*(*idx)+4] += alpha5*(*v); 1450 y[8*(*idx)+5] += alpha6*(*v); 1451 y[8*(*idx)+6] += alpha7*(*v); 1452 y[8*(*idx)+7] += alpha8*(*v); 1453 idx++; v++; 1454 } 1455 } 1456 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1457 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1458 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1459 PetscFunctionReturn(0); 1460 } 1461 1462 /* ------------------------------------------------------------------------------*/ 1463 #undef __FUNCT__ 1464 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1465 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1466 { 1467 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1468 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1469 const PetscScalar *x,*v; 1470 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1471 PetscErrorCode ierr; 1472 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1473 PetscInt nonzerorow=0,n,i,jrow,j; 1474 1475 PetscFunctionBegin; 1476 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1477 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1478 idx = a->j; 1479 v = a->a; 1480 ii = a->i; 1481 1482 for (i=0; i<m; i++) { 1483 jrow = ii[i]; 1484 n = ii[i+1] - jrow; 1485 sum1 = 0.0; 1486 sum2 = 0.0; 1487 sum3 = 0.0; 1488 sum4 = 0.0; 1489 sum5 = 0.0; 1490 sum6 = 0.0; 1491 sum7 = 0.0; 1492 sum8 = 0.0; 1493 sum9 = 0.0; 1494 nonzerorow += (n>0); 1495 for (j=0; j<n; j++) { 1496 sum1 += v[jrow]*x[9*idx[jrow]]; 1497 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1498 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1499 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1500 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1501 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1502 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1503 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1504 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1505 jrow++; 1506 } 1507 y[9*i] = sum1; 1508 y[9*i+1] = sum2; 1509 y[9*i+2] = sum3; 1510 y[9*i+3] = sum4; 1511 y[9*i+4] = sum5; 1512 y[9*i+5] = sum6; 1513 y[9*i+6] = sum7; 1514 y[9*i+7] = sum8; 1515 y[9*i+8] = sum9; 1516 } 1517 1518 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1519 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1520 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1521 PetscFunctionReturn(0); 1522 } 1523 1524 /* ------------------------------------------------------------------------------*/ 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1528 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1529 { 1530 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1531 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1532 const PetscScalar *x,*v; 1533 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1534 PetscErrorCode ierr; 1535 const PetscInt m = b->AIJ->rmap->n,*idx; 1536 PetscInt n,i; 1537 1538 PetscFunctionBegin; 1539 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1540 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1541 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1542 1543 for (i=0; i<m; i++) { 1544 idx = a->j + a->i[i] ; 1545 v = a->a + a->i[i] ; 1546 n = a->i[i+1] - a->i[i]; 1547 alpha1 = x[9*i]; 1548 alpha2 = x[9*i+1]; 1549 alpha3 = x[9*i+2]; 1550 alpha4 = x[9*i+3]; 1551 alpha5 = x[9*i+4]; 1552 alpha6 = x[9*i+5]; 1553 alpha7 = x[9*i+6]; 1554 alpha8 = x[9*i+7]; 1555 alpha9 = x[9*i+8]; 1556 while (n-->0) { 1557 y[9*(*idx)] += alpha1*(*v); 1558 y[9*(*idx)+1] += alpha2*(*v); 1559 y[9*(*idx)+2] += alpha3*(*v); 1560 y[9*(*idx)+3] += alpha4*(*v); 1561 y[9*(*idx)+4] += alpha5*(*v); 1562 y[9*(*idx)+5] += alpha6*(*v); 1563 y[9*(*idx)+6] += alpha7*(*v); 1564 y[9*(*idx)+7] += alpha8*(*v); 1565 y[9*(*idx)+8] += alpha9*(*v); 1566 idx++; v++; 1567 } 1568 } 1569 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1570 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1571 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1572 PetscFunctionReturn(0); 1573 } 1574 1575 #undef __FUNCT__ 1576 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1577 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1578 { 1579 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1580 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1581 const PetscScalar *x,*v; 1582 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1583 PetscErrorCode ierr; 1584 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1585 PetscInt n,i,jrow,j; 1586 1587 PetscFunctionBegin; 1588 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1589 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1590 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1591 idx = a->j; 1592 v = a->a; 1593 ii = a->i; 1594 1595 for (i=0; i<m; i++) { 1596 jrow = ii[i]; 1597 n = ii[i+1] - jrow; 1598 sum1 = 0.0; 1599 sum2 = 0.0; 1600 sum3 = 0.0; 1601 sum4 = 0.0; 1602 sum5 = 0.0; 1603 sum6 = 0.0; 1604 sum7 = 0.0; 1605 sum8 = 0.0; 1606 sum9 = 0.0; 1607 for (j=0; j<n; j++) { 1608 sum1 += v[jrow]*x[9*idx[jrow]]; 1609 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1610 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1611 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1612 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1613 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1614 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1615 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1616 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1617 jrow++; 1618 } 1619 y[9*i] += sum1; 1620 y[9*i+1] += sum2; 1621 y[9*i+2] += sum3; 1622 y[9*i+3] += sum4; 1623 y[9*i+4] += sum5; 1624 y[9*i+5] += sum6; 1625 y[9*i+6] += sum7; 1626 y[9*i+7] += sum8; 1627 y[9*i+8] += sum9; 1628 } 1629 1630 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1631 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1632 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1633 PetscFunctionReturn(0); 1634 } 1635 1636 #undef __FUNCT__ 1637 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1638 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1639 { 1640 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1641 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1642 const PetscScalar *x,*v; 1643 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1644 PetscErrorCode ierr; 1645 const PetscInt m = b->AIJ->rmap->n,*idx; 1646 PetscInt n,i; 1647 1648 PetscFunctionBegin; 1649 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1650 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1651 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1652 for (i=0; i<m; i++) { 1653 idx = a->j + a->i[i] ; 1654 v = a->a + a->i[i] ; 1655 n = a->i[i+1] - a->i[i]; 1656 alpha1 = x[9*i]; 1657 alpha2 = x[9*i+1]; 1658 alpha3 = x[9*i+2]; 1659 alpha4 = x[9*i+3]; 1660 alpha5 = x[9*i+4]; 1661 alpha6 = x[9*i+5]; 1662 alpha7 = x[9*i+6]; 1663 alpha8 = x[9*i+7]; 1664 alpha9 = x[9*i+8]; 1665 while (n-->0) { 1666 y[9*(*idx)] += alpha1*(*v); 1667 y[9*(*idx)+1] += alpha2*(*v); 1668 y[9*(*idx)+2] += alpha3*(*v); 1669 y[9*(*idx)+3] += alpha4*(*v); 1670 y[9*(*idx)+4] += alpha5*(*v); 1671 y[9*(*idx)+5] += alpha6*(*v); 1672 y[9*(*idx)+6] += alpha7*(*v); 1673 y[9*(*idx)+7] += alpha8*(*v); 1674 y[9*(*idx)+8] += alpha9*(*v); 1675 idx++; v++; 1676 } 1677 } 1678 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1679 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1680 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1681 PetscFunctionReturn(0); 1682 } 1683 #undef __FUNCT__ 1684 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1685 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1686 { 1687 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1689 const PetscScalar *x,*v; 1690 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1691 PetscErrorCode ierr; 1692 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1693 PetscInt nonzerorow=0,n,i,jrow,j; 1694 1695 PetscFunctionBegin; 1696 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1697 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1698 idx = a->j; 1699 v = a->a; 1700 ii = a->i; 1701 1702 for (i=0; i<m; i++) { 1703 jrow = ii[i]; 1704 n = ii[i+1] - jrow; 1705 sum1 = 0.0; 1706 sum2 = 0.0; 1707 sum3 = 0.0; 1708 sum4 = 0.0; 1709 sum5 = 0.0; 1710 sum6 = 0.0; 1711 sum7 = 0.0; 1712 sum8 = 0.0; 1713 sum9 = 0.0; 1714 sum10 = 0.0; 1715 nonzerorow += (n>0); 1716 for (j=0; j<n; j++) { 1717 sum1 += v[jrow]*x[10*idx[jrow]]; 1718 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1719 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1720 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1721 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1722 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1723 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1724 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1725 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1726 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1727 jrow++; 1728 } 1729 y[10*i] = sum1; 1730 y[10*i+1] = sum2; 1731 y[10*i+2] = sum3; 1732 y[10*i+3] = sum4; 1733 y[10*i+4] = sum5; 1734 y[10*i+5] = sum6; 1735 y[10*i+6] = sum7; 1736 y[10*i+7] = sum8; 1737 y[10*i+8] = sum9; 1738 y[10*i+9] = sum10; 1739 } 1740 1741 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1742 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1743 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1744 PetscFunctionReturn(0); 1745 } 1746 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10" 1749 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1750 { 1751 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1752 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1753 const PetscScalar *x,*v; 1754 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1755 PetscErrorCode ierr; 1756 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1757 PetscInt n,i,jrow,j; 1758 1759 PetscFunctionBegin; 1760 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1761 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1762 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1763 idx = a->j; 1764 v = a->a; 1765 ii = a->i; 1766 1767 for (i=0; i<m; i++) { 1768 jrow = ii[i]; 1769 n = ii[i+1] - jrow; 1770 sum1 = 0.0; 1771 sum2 = 0.0; 1772 sum3 = 0.0; 1773 sum4 = 0.0; 1774 sum5 = 0.0; 1775 sum6 = 0.0; 1776 sum7 = 0.0; 1777 sum8 = 0.0; 1778 sum9 = 0.0; 1779 sum10 = 0.0; 1780 for (j=0; j<n; j++) { 1781 sum1 += v[jrow]*x[10*idx[jrow]]; 1782 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1783 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1784 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1785 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1786 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1787 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1788 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1789 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1790 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1791 jrow++; 1792 } 1793 y[10*i] += sum1; 1794 y[10*i+1] += sum2; 1795 y[10*i+2] += sum3; 1796 y[10*i+3] += sum4; 1797 y[10*i+4] += sum5; 1798 y[10*i+5] += sum6; 1799 y[10*i+6] += sum7; 1800 y[10*i+7] += sum8; 1801 y[10*i+8] += sum9; 1802 y[10*i+9] += sum10; 1803 } 1804 1805 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1806 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1807 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1808 PetscFunctionReturn(0); 1809 } 1810 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1813 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1814 { 1815 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1816 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1817 const PetscScalar *x,*v; 1818 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1819 PetscErrorCode ierr; 1820 const PetscInt m = b->AIJ->rmap->n,*idx; 1821 PetscInt n,i; 1822 1823 PetscFunctionBegin; 1824 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1825 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1826 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1827 1828 for (i=0; i<m; i++) { 1829 idx = a->j + a->i[i] ; 1830 v = a->a + a->i[i] ; 1831 n = a->i[i+1] - a->i[i]; 1832 alpha1 = x[10*i]; 1833 alpha2 = x[10*i+1]; 1834 alpha3 = x[10*i+2]; 1835 alpha4 = x[10*i+3]; 1836 alpha5 = x[10*i+4]; 1837 alpha6 = x[10*i+5]; 1838 alpha7 = x[10*i+6]; 1839 alpha8 = x[10*i+7]; 1840 alpha9 = x[10*i+8]; 1841 alpha10 = x[10*i+9]; 1842 while (n-->0) { 1843 y[10*(*idx)] += alpha1*(*v); 1844 y[10*(*idx)+1] += alpha2*(*v); 1845 y[10*(*idx)+2] += alpha3*(*v); 1846 y[10*(*idx)+3] += alpha4*(*v); 1847 y[10*(*idx)+4] += alpha5*(*v); 1848 y[10*(*idx)+5] += alpha6*(*v); 1849 y[10*(*idx)+6] += alpha7*(*v); 1850 y[10*(*idx)+7] += alpha8*(*v); 1851 y[10*(*idx)+8] += alpha9*(*v); 1852 y[10*(*idx)+9] += alpha10*(*v); 1853 idx++; v++; 1854 } 1855 } 1856 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1857 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1858 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1859 PetscFunctionReturn(0); 1860 } 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1864 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1865 { 1866 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1867 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1868 const PetscScalar *x,*v; 1869 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1870 PetscErrorCode ierr; 1871 const PetscInt m = b->AIJ->rmap->n,*idx; 1872 PetscInt n,i; 1873 1874 PetscFunctionBegin; 1875 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1876 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1877 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1878 for (i=0; i<m; i++) { 1879 idx = a->j + a->i[i] ; 1880 v = a->a + a->i[i] ; 1881 n = a->i[i+1] - a->i[i]; 1882 alpha1 = x[10*i]; 1883 alpha2 = x[10*i+1]; 1884 alpha3 = x[10*i+2]; 1885 alpha4 = x[10*i+3]; 1886 alpha5 = x[10*i+4]; 1887 alpha6 = x[10*i+5]; 1888 alpha7 = x[10*i+6]; 1889 alpha8 = x[10*i+7]; 1890 alpha9 = x[10*i+8]; 1891 alpha10 = x[10*i+9]; 1892 while (n-->0) { 1893 y[10*(*idx)] += alpha1*(*v); 1894 y[10*(*idx)+1] += alpha2*(*v); 1895 y[10*(*idx)+2] += alpha3*(*v); 1896 y[10*(*idx)+3] += alpha4*(*v); 1897 y[10*(*idx)+4] += alpha5*(*v); 1898 y[10*(*idx)+5] += alpha6*(*v); 1899 y[10*(*idx)+6] += alpha7*(*v); 1900 y[10*(*idx)+7] += alpha8*(*v); 1901 y[10*(*idx)+8] += alpha9*(*v); 1902 y[10*(*idx)+9] += alpha10*(*v); 1903 idx++; v++; 1904 } 1905 } 1906 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1907 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1908 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 1913 /*--------------------------------------------------------------------------------------------*/ 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatMult_SeqMAIJ_11" 1916 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1917 { 1918 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1919 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1920 const PetscScalar *x,*v; 1921 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1922 PetscErrorCode ierr; 1923 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1924 PetscInt nonzerorow=0,n,i,jrow,j; 1925 1926 PetscFunctionBegin; 1927 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1928 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1929 idx = a->j; 1930 v = a->a; 1931 ii = a->i; 1932 1933 for (i=0; i<m; i++) { 1934 jrow = ii[i]; 1935 n = ii[i+1] - jrow; 1936 sum1 = 0.0; 1937 sum2 = 0.0; 1938 sum3 = 0.0; 1939 sum4 = 0.0; 1940 sum5 = 0.0; 1941 sum6 = 0.0; 1942 sum7 = 0.0; 1943 sum8 = 0.0; 1944 sum9 = 0.0; 1945 sum10 = 0.0; 1946 sum11 = 0.0; 1947 nonzerorow += (n>0); 1948 for (j=0; j<n; j++) { 1949 sum1 += v[jrow]*x[11*idx[jrow]]; 1950 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1951 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1952 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1953 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1954 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1955 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1956 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1957 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1958 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1959 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1960 jrow++; 1961 } 1962 y[11*i] = sum1; 1963 y[11*i+1] = sum2; 1964 y[11*i+2] = sum3; 1965 y[11*i+3] = sum4; 1966 y[11*i+4] = sum5; 1967 y[11*i+5] = sum6; 1968 y[11*i+6] = sum7; 1969 y[11*i+7] = sum8; 1970 y[11*i+8] = sum9; 1971 y[11*i+9] = sum10; 1972 y[11*i+10] = sum11; 1973 } 1974 1975 ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1976 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1977 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1978 PetscFunctionReturn(0); 1979 } 1980 1981 #undef __FUNCT__ 1982 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11" 1983 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1984 { 1985 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1986 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1987 const PetscScalar *x,*v; 1988 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1989 PetscErrorCode ierr; 1990 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1991 PetscInt n,i,jrow,j; 1992 1993 PetscFunctionBegin; 1994 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1995 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1996 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1997 idx = a->j; 1998 v = a->a; 1999 ii = a->i; 2000 2001 for (i=0; i<m; i++) { 2002 jrow = ii[i]; 2003 n = ii[i+1] - jrow; 2004 sum1 = 0.0; 2005 sum2 = 0.0; 2006 sum3 = 0.0; 2007 sum4 = 0.0; 2008 sum5 = 0.0; 2009 sum6 = 0.0; 2010 sum7 = 0.0; 2011 sum8 = 0.0; 2012 sum9 = 0.0; 2013 sum10 = 0.0; 2014 sum11 = 0.0; 2015 for (j=0; j<n; j++) { 2016 sum1 += v[jrow]*x[11*idx[jrow]]; 2017 sum2 += v[jrow]*x[11*idx[jrow]+1]; 2018 sum3 += v[jrow]*x[11*idx[jrow]+2]; 2019 sum4 += v[jrow]*x[11*idx[jrow]+3]; 2020 sum5 += v[jrow]*x[11*idx[jrow]+4]; 2021 sum6 += v[jrow]*x[11*idx[jrow]+5]; 2022 sum7 += v[jrow]*x[11*idx[jrow]+6]; 2023 sum8 += v[jrow]*x[11*idx[jrow]+7]; 2024 sum9 += v[jrow]*x[11*idx[jrow]+8]; 2025 sum10 += v[jrow]*x[11*idx[jrow]+9]; 2026 sum11 += v[jrow]*x[11*idx[jrow]+10]; 2027 jrow++; 2028 } 2029 y[11*i] += sum1; 2030 y[11*i+1] += sum2; 2031 y[11*i+2] += sum3; 2032 y[11*i+3] += sum4; 2033 y[11*i+4] += sum5; 2034 y[11*i+5] += sum6; 2035 y[11*i+6] += sum7; 2036 y[11*i+7] += sum8; 2037 y[11*i+8] += sum9; 2038 y[11*i+9] += sum10; 2039 y[11*i+10] += sum11; 2040 } 2041 2042 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2043 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2044 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2045 PetscFunctionReturn(0); 2046 } 2047 2048 #undef __FUNCT__ 2049 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11" 2050 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 2051 { 2052 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2053 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2054 const PetscScalar *x,*v; 2055 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2056 PetscErrorCode ierr; 2057 const PetscInt m = b->AIJ->rmap->n,*idx; 2058 PetscInt n,i; 2059 2060 PetscFunctionBegin; 2061 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2062 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2063 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2064 2065 for (i=0; i<m; i++) { 2066 idx = a->j + a->i[i] ; 2067 v = a->a + a->i[i] ; 2068 n = a->i[i+1] - a->i[i]; 2069 alpha1 = x[11*i]; 2070 alpha2 = x[11*i+1]; 2071 alpha3 = x[11*i+2]; 2072 alpha4 = x[11*i+3]; 2073 alpha5 = x[11*i+4]; 2074 alpha6 = x[11*i+5]; 2075 alpha7 = x[11*i+6]; 2076 alpha8 = x[11*i+7]; 2077 alpha9 = x[11*i+8]; 2078 alpha10 = x[11*i+9]; 2079 alpha11 = x[11*i+10]; 2080 while (n-->0) { 2081 y[11*(*idx)] += alpha1*(*v); 2082 y[11*(*idx)+1] += alpha2*(*v); 2083 y[11*(*idx)+2] += alpha3*(*v); 2084 y[11*(*idx)+3] += alpha4*(*v); 2085 y[11*(*idx)+4] += alpha5*(*v); 2086 y[11*(*idx)+5] += alpha6*(*v); 2087 y[11*(*idx)+6] += alpha7*(*v); 2088 y[11*(*idx)+7] += alpha8*(*v); 2089 y[11*(*idx)+8] += alpha9*(*v); 2090 y[11*(*idx)+9] += alpha10*(*v); 2091 y[11*(*idx)+10] += alpha11*(*v); 2092 idx++; v++; 2093 } 2094 } 2095 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2096 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2097 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2098 PetscFunctionReturn(0); 2099 } 2100 2101 #undef __FUNCT__ 2102 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11" 2103 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2104 { 2105 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2106 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2107 const PetscScalar *x,*v; 2108 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2109 PetscErrorCode ierr; 2110 const PetscInt m = b->AIJ->rmap->n,*idx; 2111 PetscInt n,i; 2112 2113 PetscFunctionBegin; 2114 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2115 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2116 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2117 for (i=0; i<m; i++) { 2118 idx = a->j + a->i[i] ; 2119 v = a->a + a->i[i] ; 2120 n = a->i[i+1] - a->i[i]; 2121 alpha1 = x[11*i]; 2122 alpha2 = x[11*i+1]; 2123 alpha3 = x[11*i+2]; 2124 alpha4 = x[11*i+3]; 2125 alpha5 = x[11*i+4]; 2126 alpha6 = x[11*i+5]; 2127 alpha7 = x[11*i+6]; 2128 alpha8 = x[11*i+7]; 2129 alpha9 = x[11*i+8]; 2130 alpha10 = x[11*i+9]; 2131 alpha11 = x[11*i+10]; 2132 while (n-->0) { 2133 y[11*(*idx)] += alpha1*(*v); 2134 y[11*(*idx)+1] += alpha2*(*v); 2135 y[11*(*idx)+2] += alpha3*(*v); 2136 y[11*(*idx)+3] += alpha4*(*v); 2137 y[11*(*idx)+4] += alpha5*(*v); 2138 y[11*(*idx)+5] += alpha6*(*v); 2139 y[11*(*idx)+6] += alpha7*(*v); 2140 y[11*(*idx)+7] += alpha8*(*v); 2141 y[11*(*idx)+8] += alpha9*(*v); 2142 y[11*(*idx)+9] += alpha10*(*v); 2143 y[11*(*idx)+10] += alpha11*(*v); 2144 idx++; v++; 2145 } 2146 } 2147 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2148 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2149 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2150 PetscFunctionReturn(0); 2151 } 2152 2153 2154 /*--------------------------------------------------------------------------------------------*/ 2155 #undef __FUNCT__ 2156 #define __FUNCT__ "MatMult_SeqMAIJ_16" 2157 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2158 { 2159 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2160 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2161 const PetscScalar *x,*v; 2162 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2163 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2164 PetscErrorCode ierr; 2165 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2166 PetscInt nonzerorow=0,n,i,jrow,j; 2167 2168 PetscFunctionBegin; 2169 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2170 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2171 idx = a->j; 2172 v = a->a; 2173 ii = a->i; 2174 2175 for (i=0; i<m; i++) { 2176 jrow = ii[i]; 2177 n = ii[i+1] - jrow; 2178 sum1 = 0.0; 2179 sum2 = 0.0; 2180 sum3 = 0.0; 2181 sum4 = 0.0; 2182 sum5 = 0.0; 2183 sum6 = 0.0; 2184 sum7 = 0.0; 2185 sum8 = 0.0; 2186 sum9 = 0.0; 2187 sum10 = 0.0; 2188 sum11 = 0.0; 2189 sum12 = 0.0; 2190 sum13 = 0.0; 2191 sum14 = 0.0; 2192 sum15 = 0.0; 2193 sum16 = 0.0; 2194 nonzerorow += (n>0); 2195 for (j=0; j<n; j++) { 2196 sum1 += v[jrow]*x[16*idx[jrow]]; 2197 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2198 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2199 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2200 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2201 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2202 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2203 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2204 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2205 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2206 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2207 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2208 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2209 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2210 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2211 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2212 jrow++; 2213 } 2214 y[16*i] = sum1; 2215 y[16*i+1] = sum2; 2216 y[16*i+2] = sum3; 2217 y[16*i+3] = sum4; 2218 y[16*i+4] = sum5; 2219 y[16*i+5] = sum6; 2220 y[16*i+6] = sum7; 2221 y[16*i+7] = sum8; 2222 y[16*i+8] = sum9; 2223 y[16*i+9] = sum10; 2224 y[16*i+10] = sum11; 2225 y[16*i+11] = sum12; 2226 y[16*i+12] = sum13; 2227 y[16*i+13] = sum14; 2228 y[16*i+14] = sum15; 2229 y[16*i+15] = sum16; 2230 } 2231 2232 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2233 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2234 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2235 PetscFunctionReturn(0); 2236 } 2237 2238 #undef __FUNCT__ 2239 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 2240 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2241 { 2242 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2243 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2244 const PetscScalar *x,*v; 2245 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2246 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2247 PetscErrorCode ierr; 2248 const PetscInt m = b->AIJ->rmap->n,*idx; 2249 PetscInt n,i; 2250 2251 PetscFunctionBegin; 2252 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2253 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2254 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2255 2256 for (i=0; i<m; i++) { 2257 idx = a->j + a->i[i] ; 2258 v = a->a + a->i[i] ; 2259 n = a->i[i+1] - a->i[i]; 2260 alpha1 = x[16*i]; 2261 alpha2 = x[16*i+1]; 2262 alpha3 = x[16*i+2]; 2263 alpha4 = x[16*i+3]; 2264 alpha5 = x[16*i+4]; 2265 alpha6 = x[16*i+5]; 2266 alpha7 = x[16*i+6]; 2267 alpha8 = x[16*i+7]; 2268 alpha9 = x[16*i+8]; 2269 alpha10 = x[16*i+9]; 2270 alpha11 = x[16*i+10]; 2271 alpha12 = x[16*i+11]; 2272 alpha13 = x[16*i+12]; 2273 alpha14 = x[16*i+13]; 2274 alpha15 = x[16*i+14]; 2275 alpha16 = x[16*i+15]; 2276 while (n-->0) { 2277 y[16*(*idx)] += alpha1*(*v); 2278 y[16*(*idx)+1] += alpha2*(*v); 2279 y[16*(*idx)+2] += alpha3*(*v); 2280 y[16*(*idx)+3] += alpha4*(*v); 2281 y[16*(*idx)+4] += alpha5*(*v); 2282 y[16*(*idx)+5] += alpha6*(*v); 2283 y[16*(*idx)+6] += alpha7*(*v); 2284 y[16*(*idx)+7] += alpha8*(*v); 2285 y[16*(*idx)+8] += alpha9*(*v); 2286 y[16*(*idx)+9] += alpha10*(*v); 2287 y[16*(*idx)+10] += alpha11*(*v); 2288 y[16*(*idx)+11] += alpha12*(*v); 2289 y[16*(*idx)+12] += alpha13*(*v); 2290 y[16*(*idx)+13] += alpha14*(*v); 2291 y[16*(*idx)+14] += alpha15*(*v); 2292 y[16*(*idx)+15] += alpha16*(*v); 2293 idx++; v++; 2294 } 2295 } 2296 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2297 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2298 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 #undef __FUNCT__ 2303 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 2304 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2305 { 2306 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2307 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2308 const PetscScalar *x,*v; 2309 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2310 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2311 PetscErrorCode ierr; 2312 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2313 PetscInt n,i,jrow,j; 2314 2315 PetscFunctionBegin; 2316 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2317 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2318 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2319 idx = a->j; 2320 v = a->a; 2321 ii = a->i; 2322 2323 for (i=0; i<m; i++) { 2324 jrow = ii[i]; 2325 n = ii[i+1] - jrow; 2326 sum1 = 0.0; 2327 sum2 = 0.0; 2328 sum3 = 0.0; 2329 sum4 = 0.0; 2330 sum5 = 0.0; 2331 sum6 = 0.0; 2332 sum7 = 0.0; 2333 sum8 = 0.0; 2334 sum9 = 0.0; 2335 sum10 = 0.0; 2336 sum11 = 0.0; 2337 sum12 = 0.0; 2338 sum13 = 0.0; 2339 sum14 = 0.0; 2340 sum15 = 0.0; 2341 sum16 = 0.0; 2342 for (j=0; j<n; j++) { 2343 sum1 += v[jrow]*x[16*idx[jrow]]; 2344 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2345 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2346 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2347 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2348 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2349 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2350 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2351 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2352 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2353 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2354 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2355 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2356 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2357 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2358 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2359 jrow++; 2360 } 2361 y[16*i] += sum1; 2362 y[16*i+1] += sum2; 2363 y[16*i+2] += sum3; 2364 y[16*i+3] += sum4; 2365 y[16*i+4] += sum5; 2366 y[16*i+5] += sum6; 2367 y[16*i+6] += sum7; 2368 y[16*i+7] += sum8; 2369 y[16*i+8] += sum9; 2370 y[16*i+9] += sum10; 2371 y[16*i+10] += sum11; 2372 y[16*i+11] += sum12; 2373 y[16*i+12] += sum13; 2374 y[16*i+13] += sum14; 2375 y[16*i+14] += sum15; 2376 y[16*i+15] += sum16; 2377 } 2378 2379 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2380 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2381 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2382 PetscFunctionReturn(0); 2383 } 2384 2385 #undef __FUNCT__ 2386 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2387 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2388 { 2389 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2390 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2391 const PetscScalar *x,*v; 2392 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2393 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2394 PetscErrorCode ierr; 2395 const PetscInt m = b->AIJ->rmap->n,*idx; 2396 PetscInt n,i; 2397 2398 PetscFunctionBegin; 2399 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2400 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2401 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2402 for (i=0; i<m; i++) { 2403 idx = a->j + a->i[i] ; 2404 v = a->a + a->i[i] ; 2405 n = a->i[i+1] - a->i[i]; 2406 alpha1 = x[16*i]; 2407 alpha2 = x[16*i+1]; 2408 alpha3 = x[16*i+2]; 2409 alpha4 = x[16*i+3]; 2410 alpha5 = x[16*i+4]; 2411 alpha6 = x[16*i+5]; 2412 alpha7 = x[16*i+6]; 2413 alpha8 = x[16*i+7]; 2414 alpha9 = x[16*i+8]; 2415 alpha10 = x[16*i+9]; 2416 alpha11 = x[16*i+10]; 2417 alpha12 = x[16*i+11]; 2418 alpha13 = x[16*i+12]; 2419 alpha14 = x[16*i+13]; 2420 alpha15 = x[16*i+14]; 2421 alpha16 = x[16*i+15]; 2422 while (n-->0) { 2423 y[16*(*idx)] += alpha1*(*v); 2424 y[16*(*idx)+1] += alpha2*(*v); 2425 y[16*(*idx)+2] += alpha3*(*v); 2426 y[16*(*idx)+3] += alpha4*(*v); 2427 y[16*(*idx)+4] += alpha5*(*v); 2428 y[16*(*idx)+5] += alpha6*(*v); 2429 y[16*(*idx)+6] += alpha7*(*v); 2430 y[16*(*idx)+7] += alpha8*(*v); 2431 y[16*(*idx)+8] += alpha9*(*v); 2432 y[16*(*idx)+9] += alpha10*(*v); 2433 y[16*(*idx)+10] += alpha11*(*v); 2434 y[16*(*idx)+11] += alpha12*(*v); 2435 y[16*(*idx)+12] += alpha13*(*v); 2436 y[16*(*idx)+13] += alpha14*(*v); 2437 y[16*(*idx)+14] += alpha15*(*v); 2438 y[16*(*idx)+15] += alpha16*(*v); 2439 idx++; v++; 2440 } 2441 } 2442 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2443 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2444 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2445 PetscFunctionReturn(0); 2446 } 2447 2448 /*--------------------------------------------------------------------------------------------*/ 2449 #undef __FUNCT__ 2450 #define __FUNCT__ "MatMult_SeqMAIJ_18" 2451 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2452 { 2453 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2454 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2455 const PetscScalar *x,*v; 2456 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2457 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2458 PetscErrorCode ierr; 2459 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2460 PetscInt nonzerorow=0,n,i,jrow,j; 2461 2462 PetscFunctionBegin; 2463 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2464 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2465 idx = a->j; 2466 v = a->a; 2467 ii = a->i; 2468 2469 for (i=0; i<m; i++) { 2470 jrow = ii[i]; 2471 n = ii[i+1] - jrow; 2472 sum1 = 0.0; 2473 sum2 = 0.0; 2474 sum3 = 0.0; 2475 sum4 = 0.0; 2476 sum5 = 0.0; 2477 sum6 = 0.0; 2478 sum7 = 0.0; 2479 sum8 = 0.0; 2480 sum9 = 0.0; 2481 sum10 = 0.0; 2482 sum11 = 0.0; 2483 sum12 = 0.0; 2484 sum13 = 0.0; 2485 sum14 = 0.0; 2486 sum15 = 0.0; 2487 sum16 = 0.0; 2488 sum17 = 0.0; 2489 sum18 = 0.0; 2490 nonzerorow += (n>0); 2491 for (j=0; j<n; j++) { 2492 sum1 += v[jrow]*x[18*idx[jrow]]; 2493 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2494 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2495 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2496 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2497 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2498 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2499 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2500 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2501 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2502 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2503 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2504 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2505 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2506 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2507 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2508 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2509 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2510 jrow++; 2511 } 2512 y[18*i] = sum1; 2513 y[18*i+1] = sum2; 2514 y[18*i+2] = sum3; 2515 y[18*i+3] = sum4; 2516 y[18*i+4] = sum5; 2517 y[18*i+5] = sum6; 2518 y[18*i+6] = sum7; 2519 y[18*i+7] = sum8; 2520 y[18*i+8] = sum9; 2521 y[18*i+9] = sum10; 2522 y[18*i+10] = sum11; 2523 y[18*i+11] = sum12; 2524 y[18*i+12] = sum13; 2525 y[18*i+13] = sum14; 2526 y[18*i+14] = sum15; 2527 y[18*i+15] = sum16; 2528 y[18*i+16] = sum17; 2529 y[18*i+17] = sum18; 2530 } 2531 2532 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2533 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2534 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 #undef __FUNCT__ 2539 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2540 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2541 { 2542 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2543 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2544 const PetscScalar *x,*v; 2545 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2546 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2547 PetscErrorCode ierr; 2548 const PetscInt m = b->AIJ->rmap->n,*idx; 2549 PetscInt n,i; 2550 2551 PetscFunctionBegin; 2552 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2553 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2554 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2555 2556 for (i=0; i<m; i++) { 2557 idx = a->j + a->i[i] ; 2558 v = a->a + a->i[i] ; 2559 n = a->i[i+1] - a->i[i]; 2560 alpha1 = x[18*i]; 2561 alpha2 = x[18*i+1]; 2562 alpha3 = x[18*i+2]; 2563 alpha4 = x[18*i+3]; 2564 alpha5 = x[18*i+4]; 2565 alpha6 = x[18*i+5]; 2566 alpha7 = x[18*i+6]; 2567 alpha8 = x[18*i+7]; 2568 alpha9 = x[18*i+8]; 2569 alpha10 = x[18*i+9]; 2570 alpha11 = x[18*i+10]; 2571 alpha12 = x[18*i+11]; 2572 alpha13 = x[18*i+12]; 2573 alpha14 = x[18*i+13]; 2574 alpha15 = x[18*i+14]; 2575 alpha16 = x[18*i+15]; 2576 alpha17 = x[18*i+16]; 2577 alpha18 = x[18*i+17]; 2578 while (n-->0) { 2579 y[18*(*idx)] += alpha1*(*v); 2580 y[18*(*idx)+1] += alpha2*(*v); 2581 y[18*(*idx)+2] += alpha3*(*v); 2582 y[18*(*idx)+3] += alpha4*(*v); 2583 y[18*(*idx)+4] += alpha5*(*v); 2584 y[18*(*idx)+5] += alpha6*(*v); 2585 y[18*(*idx)+6] += alpha7*(*v); 2586 y[18*(*idx)+7] += alpha8*(*v); 2587 y[18*(*idx)+8] += alpha9*(*v); 2588 y[18*(*idx)+9] += alpha10*(*v); 2589 y[18*(*idx)+10] += alpha11*(*v); 2590 y[18*(*idx)+11] += alpha12*(*v); 2591 y[18*(*idx)+12] += alpha13*(*v); 2592 y[18*(*idx)+13] += alpha14*(*v); 2593 y[18*(*idx)+14] += alpha15*(*v); 2594 y[18*(*idx)+15] += alpha16*(*v); 2595 y[18*(*idx)+16] += alpha17*(*v); 2596 y[18*(*idx)+17] += alpha18*(*v); 2597 idx++; v++; 2598 } 2599 } 2600 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2601 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2602 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2603 PetscFunctionReturn(0); 2604 } 2605 2606 #undef __FUNCT__ 2607 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2608 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2609 { 2610 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2611 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2612 const PetscScalar *x,*v; 2613 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2614 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2615 PetscErrorCode ierr; 2616 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2617 PetscInt n,i,jrow,j; 2618 2619 PetscFunctionBegin; 2620 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2621 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2622 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2623 idx = a->j; 2624 v = a->a; 2625 ii = a->i; 2626 2627 for (i=0; i<m; i++) { 2628 jrow = ii[i]; 2629 n = ii[i+1] - jrow; 2630 sum1 = 0.0; 2631 sum2 = 0.0; 2632 sum3 = 0.0; 2633 sum4 = 0.0; 2634 sum5 = 0.0; 2635 sum6 = 0.0; 2636 sum7 = 0.0; 2637 sum8 = 0.0; 2638 sum9 = 0.0; 2639 sum10 = 0.0; 2640 sum11 = 0.0; 2641 sum12 = 0.0; 2642 sum13 = 0.0; 2643 sum14 = 0.0; 2644 sum15 = 0.0; 2645 sum16 = 0.0; 2646 sum17 = 0.0; 2647 sum18 = 0.0; 2648 for (j=0; j<n; j++) { 2649 sum1 += v[jrow]*x[18*idx[jrow]]; 2650 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2651 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2652 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2653 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2654 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2655 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2656 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2657 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2658 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2659 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2660 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2661 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2662 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2663 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2664 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2665 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2666 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2667 jrow++; 2668 } 2669 y[18*i] += sum1; 2670 y[18*i+1] += sum2; 2671 y[18*i+2] += sum3; 2672 y[18*i+3] += sum4; 2673 y[18*i+4] += sum5; 2674 y[18*i+5] += sum6; 2675 y[18*i+6] += sum7; 2676 y[18*i+7] += sum8; 2677 y[18*i+8] += sum9; 2678 y[18*i+9] += sum10; 2679 y[18*i+10] += sum11; 2680 y[18*i+11] += sum12; 2681 y[18*i+12] += sum13; 2682 y[18*i+13] += sum14; 2683 y[18*i+14] += sum15; 2684 y[18*i+15] += sum16; 2685 y[18*i+16] += sum17; 2686 y[18*i+17] += sum18; 2687 } 2688 2689 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2690 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2691 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2692 PetscFunctionReturn(0); 2693 } 2694 2695 #undef __FUNCT__ 2696 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2697 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2698 { 2699 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2700 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2701 const PetscScalar *x,*v; 2702 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2703 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2704 PetscErrorCode ierr; 2705 const PetscInt m = b->AIJ->rmap->n,*idx; 2706 PetscInt n,i; 2707 2708 PetscFunctionBegin; 2709 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2710 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2711 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2712 for (i=0; i<m; i++) { 2713 idx = a->j + a->i[i] ; 2714 v = a->a + a->i[i] ; 2715 n = a->i[i+1] - a->i[i]; 2716 alpha1 = x[18*i]; 2717 alpha2 = x[18*i+1]; 2718 alpha3 = x[18*i+2]; 2719 alpha4 = x[18*i+3]; 2720 alpha5 = x[18*i+4]; 2721 alpha6 = x[18*i+5]; 2722 alpha7 = x[18*i+6]; 2723 alpha8 = x[18*i+7]; 2724 alpha9 = x[18*i+8]; 2725 alpha10 = x[18*i+9]; 2726 alpha11 = x[18*i+10]; 2727 alpha12 = x[18*i+11]; 2728 alpha13 = x[18*i+12]; 2729 alpha14 = x[18*i+13]; 2730 alpha15 = x[18*i+14]; 2731 alpha16 = x[18*i+15]; 2732 alpha17 = x[18*i+16]; 2733 alpha18 = x[18*i+17]; 2734 while (n-->0) { 2735 y[18*(*idx)] += alpha1*(*v); 2736 y[18*(*idx)+1] += alpha2*(*v); 2737 y[18*(*idx)+2] += alpha3*(*v); 2738 y[18*(*idx)+3] += alpha4*(*v); 2739 y[18*(*idx)+4] += alpha5*(*v); 2740 y[18*(*idx)+5] += alpha6*(*v); 2741 y[18*(*idx)+6] += alpha7*(*v); 2742 y[18*(*idx)+7] += alpha8*(*v); 2743 y[18*(*idx)+8] += alpha9*(*v); 2744 y[18*(*idx)+9] += alpha10*(*v); 2745 y[18*(*idx)+10] += alpha11*(*v); 2746 y[18*(*idx)+11] += alpha12*(*v); 2747 y[18*(*idx)+12] += alpha13*(*v); 2748 y[18*(*idx)+13] += alpha14*(*v); 2749 y[18*(*idx)+14] += alpha15*(*v); 2750 y[18*(*idx)+15] += alpha16*(*v); 2751 y[18*(*idx)+16] += alpha17*(*v); 2752 y[18*(*idx)+17] += alpha18*(*v); 2753 idx++; v++; 2754 } 2755 } 2756 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2757 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2758 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2759 PetscFunctionReturn(0); 2760 } 2761 2762 #undef __FUNCT__ 2763 #define __FUNCT__ "MatMult_SeqMAIJ_N" 2764 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2765 { 2766 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2767 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2768 const PetscScalar *x,*v; 2769 PetscScalar *y,*sums; 2770 PetscErrorCode ierr; 2771 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2772 PetscInt n,i,jrow,j,dof = b->dof,k; 2773 2774 PetscFunctionBegin; 2775 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2776 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2777 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2778 idx = a->j; 2779 v = a->a; 2780 ii = a->i; 2781 2782 for (i=0; i<m; i++) { 2783 jrow = ii[i]; 2784 n = ii[i+1] - jrow; 2785 sums = y + dof*i; 2786 for (j=0; j<n; j++) { 2787 for (k=0; k<dof; k++) { 2788 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2789 } 2790 jrow++; 2791 } 2792 } 2793 2794 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2795 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2796 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2797 PetscFunctionReturn(0); 2798 } 2799 2800 #undef __FUNCT__ 2801 #define __FUNCT__ "MatMultAdd_SeqMAIJ_N" 2802 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2803 { 2804 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2805 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2806 const PetscScalar *x,*v; 2807 PetscScalar *y,*sums; 2808 PetscErrorCode ierr; 2809 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2810 PetscInt n,i,jrow,j,dof = b->dof,k; 2811 2812 PetscFunctionBegin; 2813 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2814 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2815 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2816 idx = a->j; 2817 v = a->a; 2818 ii = a->i; 2819 2820 for (i=0; i<m; i++) { 2821 jrow = ii[i]; 2822 n = ii[i+1] - jrow; 2823 sums = y + dof*i; 2824 for (j=0; j<n; j++) { 2825 for (k=0; k<dof; k++) { 2826 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2827 } 2828 jrow++; 2829 } 2830 } 2831 2832 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2833 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2834 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2835 PetscFunctionReturn(0); 2836 } 2837 2838 #undef __FUNCT__ 2839 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N" 2840 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2841 { 2842 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2843 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2844 const PetscScalar *x,*v,*alpha; 2845 PetscScalar *y; 2846 PetscErrorCode ierr; 2847 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2848 PetscInt n,i,k; 2849 2850 PetscFunctionBegin; 2851 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2852 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2853 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2854 for (i=0; i<m; i++) { 2855 idx = a->j + a->i[i] ; 2856 v = a->a + a->i[i] ; 2857 n = a->i[i+1] - a->i[i]; 2858 alpha = x + dof*i; 2859 while (n-->0) { 2860 for (k=0; k<dof; k++) { 2861 y[dof*(*idx)+k] += alpha[k]*(*v); 2862 } 2863 idx++; v++; 2864 } 2865 } 2866 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2867 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2868 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2869 PetscFunctionReturn(0); 2870 } 2871 2872 #undef __FUNCT__ 2873 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N" 2874 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2875 { 2876 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2877 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2878 const PetscScalar *x,*v,*alpha; 2879 PetscScalar *y; 2880 PetscErrorCode ierr; 2881 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2882 PetscInt n,i,k; 2883 2884 PetscFunctionBegin; 2885 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2886 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2887 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2888 for (i=0; i<m; i++) { 2889 idx = a->j + a->i[i] ; 2890 v = a->a + a->i[i] ; 2891 n = a->i[i+1] - a->i[i]; 2892 alpha = x + dof*i; 2893 while (n-->0) { 2894 for (k=0; k<dof; k++) { 2895 y[dof*(*idx)+k] += alpha[k]*(*v); 2896 } 2897 idx++; v++; 2898 } 2899 } 2900 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2901 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2902 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2903 PetscFunctionReturn(0); 2904 } 2905 2906 /*===================================================================================*/ 2907 #undef __FUNCT__ 2908 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2909 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2910 { 2911 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2912 PetscErrorCode ierr; 2913 2914 PetscFunctionBegin; 2915 /* start the scatter */ 2916 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2917 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2918 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2919 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2920 PetscFunctionReturn(0); 2921 } 2922 2923 #undef __FUNCT__ 2924 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2925 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2926 { 2927 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2928 PetscErrorCode ierr; 2929 2930 PetscFunctionBegin; 2931 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2932 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2933 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2934 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2935 PetscFunctionReturn(0); 2936 } 2937 2938 #undef __FUNCT__ 2939 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2940 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2941 { 2942 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2943 PetscErrorCode ierr; 2944 2945 PetscFunctionBegin; 2946 /* start the scatter */ 2947 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2948 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2949 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2950 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2951 PetscFunctionReturn(0); 2952 } 2953 2954 #undef __FUNCT__ 2955 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2956 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2957 { 2958 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2959 PetscErrorCode ierr; 2960 2961 PetscFunctionBegin; 2962 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2963 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2964 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2965 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2966 PetscFunctionReturn(0); 2967 } 2968 2969 /* ----------------------------------------------------------------*/ 2970 #undef __FUNCT__ 2971 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2972 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2973 { 2974 /* This routine requires testing -- but it's getting better. */ 2975 PetscErrorCode ierr; 2976 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2977 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2978 Mat P=pp->AIJ; 2979 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2980 PetscInt *pti,*ptj,*ptJ; 2981 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2982 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2983 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 2984 MatScalar *ca; 2985 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 2986 2987 PetscFunctionBegin; 2988 /* Start timer */ 2989 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2990 2991 /* Get ij structure of P^T */ 2992 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2993 2994 cn = pn*ppdof; 2995 /* Allocate ci array, arrays for fill computation and */ 2996 /* free space for accumulating nonzero column info */ 2997 ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2998 ci[0] = 0; 2999 3000 /* Work arrays for rows of P^T*A */ 3001 ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr); 3002 ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 3003 ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr); 3004 3005 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 3006 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 3007 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 3008 ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 3009 current_space = free_space; 3010 3011 /* Determine symbolic info for each row of C: */ 3012 for (i=0;i<pn;i++) { 3013 ptnzi = pti[i+1] - pti[i]; 3014 ptJ = ptj + pti[i]; 3015 for (dof=0;dof<ppdof;dof++) { 3016 ptanzi = 0; 3017 /* Determine symbolic row of PtA: */ 3018 for (j=0;j<ptnzi;j++) { 3019 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 3020 arow = ptJ[j]*ppdof + dof; 3021 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 3022 anzj = ai[arow+1] - ai[arow]; 3023 ajj = aj + ai[arow]; 3024 for (k=0;k<anzj;k++) { 3025 if (!ptadenserow[ajj[k]]) { 3026 ptadenserow[ajj[k]] = -1; 3027 ptasparserow[ptanzi++] = ajj[k]; 3028 } 3029 } 3030 } 3031 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 3032 ptaj = ptasparserow; 3033 cnzi = 0; 3034 for (j=0;j<ptanzi;j++) { 3035 /* Get offset within block of P */ 3036 pshift = *ptaj%ppdof; 3037 /* Get block row of P */ 3038 prow = (*ptaj++)/ppdof; /* integer division */ 3039 /* P has same number of nonzeros per row as the compressed form */ 3040 pnzj = pi[prow+1] - pi[prow]; 3041 pjj = pj + pi[prow]; 3042 for (k=0;k<pnzj;k++) { 3043 /* Locations in C are shifted by the offset within the block */ 3044 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 3045 if (!denserow[pjj[k]*ppdof+pshift]) { 3046 denserow[pjj[k]*ppdof+pshift] = -1; 3047 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 3048 } 3049 } 3050 } 3051 3052 /* sort sparserow */ 3053 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 3054 3055 /* If free space is not available, make more free space */ 3056 /* Double the amount of total space in the list */ 3057 if (current_space->local_remaining<cnzi) { 3058 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3059 } 3060 3061 /* Copy data into free space, and zero out denserows */ 3062 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 3063 current_space->array += cnzi; 3064 current_space->local_used += cnzi; 3065 current_space->local_remaining -= cnzi; 3066 3067 for (j=0;j<ptanzi;j++) { 3068 ptadenserow[ptasparserow[j]] = 0; 3069 } 3070 for (j=0;j<cnzi;j++) { 3071 denserow[sparserow[j]] = 0; 3072 } 3073 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 3074 /* For now, we will recompute what is needed. */ 3075 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 3076 } 3077 } 3078 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 3079 /* Allocate space for cj, initialize cj, and */ 3080 /* destroy list of free space and other temporary array(s) */ 3081 ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3082 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 3083 ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 3084 3085 /* Allocate space for ca */ 3086 ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 3087 ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 3088 3089 /* put together the new matrix */ 3090 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 3091 3092 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3093 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3094 c = (Mat_SeqAIJ *)((*C)->data); 3095 c->free_a = PETSC_TRUE; 3096 c->free_ij = PETSC_TRUE; 3097 c->nonew = 0; 3098 3099 /* Clean up. */ 3100 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 3101 3102 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 3103 PetscFunctionReturn(0); 3104 } 3105 3106 #undef __FUNCT__ 3107 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 3108 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 3109 { 3110 /* This routine requires testing -- first draft only */ 3111 PetscErrorCode ierr; 3112 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 3113 Mat P=pp->AIJ; 3114 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 3115 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 3116 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 3117 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 3118 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3119 const PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3120 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3121 const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj; 3122 MatScalar *ca=c->a,*caj,*apa; 3123 3124 PetscFunctionBegin; 3125 /* Allocate temporary array for storage of one row of A*P */ 3126 ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr); 3127 ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr); 3128 ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr); 3129 ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr); 3130 3131 /* Clear old values in C */ 3132 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 3133 3134 for (i=0;i<am;i++) { 3135 /* Form sparse row of A*P */ 3136 anzi = ai[i+1] - ai[i]; 3137 apnzj = 0; 3138 for (j=0;j<anzi;j++) { 3139 /* Get offset within block of P */ 3140 pshift = *aj%ppdof; 3141 /* Get block row of P */ 3142 prow = *aj++/ppdof; /* integer division */ 3143 pnzj = pi[prow+1] - pi[prow]; 3144 pjj = pj + pi[prow]; 3145 paj = pa + pi[prow]; 3146 for (k=0;k<pnzj;k++) { 3147 poffset = pjj[k]*ppdof+pshift; 3148 if (!apjdense[poffset]) { 3149 apjdense[poffset] = -1; 3150 apj[apnzj++] = poffset; 3151 } 3152 apa[poffset] += (*aa)*paj[k]; 3153 } 3154 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 3155 aa++; 3156 } 3157 3158 /* Sort the j index array for quick sparse axpy. */ 3159 /* Note: a array does not need sorting as it is in dense storage locations. */ 3160 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 3161 3162 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3163 prow = i/ppdof; /* integer division */ 3164 pshift = i%ppdof; 3165 poffset = pi[prow]; 3166 pnzi = pi[prow+1] - poffset; 3167 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3168 pJ = pj+poffset; 3169 pA = pa+poffset; 3170 for (j=0;j<pnzi;j++) { 3171 crow = (*pJ)*ppdof+pshift; 3172 cjj = cj + ci[crow]; 3173 caj = ca + ci[crow]; 3174 pJ++; 3175 /* Perform sparse axpy operation. Note cjj includes apj. */ 3176 for (k=0,nextap=0;nextap<apnzj;k++) { 3177 if (cjj[k]==apj[nextap]) { 3178 caj[k] += (*pA)*apa[apj[nextap++]]; 3179 } 3180 } 3181 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 3182 pA++; 3183 } 3184 3185 /* Zero the current row info for A*P */ 3186 for (j=0;j<apnzj;j++) { 3187 apa[apj[j]] = 0.; 3188 apjdense[apj[j]] = 0; 3189 } 3190 } 3191 3192 /* Assemble the final matrix and clean up */ 3193 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3194 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3195 ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 3196 PetscFunctionReturn(0); 3197 } 3198 3199 #undef __FUNCT__ 3200 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 3201 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3202 { 3203 PetscErrorCode ierr; 3204 3205 PetscFunctionBegin; 3206 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 3207 ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 3208 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 3209 PetscFunctionReturn(0); 3210 } 3211 3212 #undef __FUNCT__ 3213 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 3214 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3215 { 3216 PetscFunctionBegin; 3217 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3218 PetscFunctionReturn(0); 3219 } 3220 3221 EXTERN_C_BEGIN 3222 #undef __FUNCT__ 3223 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 3224 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3225 { 3226 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3227 Mat a = b->AIJ,B; 3228 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3229 PetscErrorCode ierr; 3230 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3231 PetscInt *cols; 3232 PetscScalar *vals; 3233 3234 PetscFunctionBegin; 3235 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3236 ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 3237 for (i=0; i<m; i++) { 3238 nmax = PetscMax(nmax,aij->ilen[i]); 3239 for (j=0; j<dof; j++) { 3240 ilen[dof*i+j] = aij->ilen[i]; 3241 } 3242 } 3243 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 3244 ierr = PetscFree(ilen);CHKERRQ(ierr); 3245 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 3246 ii = 0; 3247 for (i=0; i<m; i++) { 3248 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3249 for (j=0; j<dof; j++) { 3250 for (k=0; k<ncols; k++) { 3251 icols[k] = dof*cols[k]+j; 3252 } 3253 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3254 ii++; 3255 } 3256 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3257 } 3258 ierr = PetscFree(icols);CHKERRQ(ierr); 3259 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3260 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3261 3262 if (reuse == MAT_REUSE_MATRIX) { 3263 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3264 } else { 3265 *newmat = B; 3266 } 3267 PetscFunctionReturn(0); 3268 } 3269 EXTERN_C_END 3270 3271 #include "../src/mat/impls/aij/mpi/mpiaij.h" 3272 3273 EXTERN_C_BEGIN 3274 #undef __FUNCT__ 3275 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 3276 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3277 { 3278 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3279 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3280 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3281 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3282 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3283 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3284 PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 3285 PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 3286 PetscInt rstart,cstart,*garray,ii,k; 3287 PetscErrorCode ierr; 3288 PetscScalar *vals,*ovals; 3289 3290 PetscFunctionBegin; 3291 ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr); 3292 for (i=0; i<A->rmap->n/dof; i++) { 3293 nmax = PetscMax(nmax,AIJ->ilen[i]); 3294 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3295 for (j=0; j<dof; j++) { 3296 dnz[dof*i+j] = AIJ->ilen[i]; 3297 onz[dof*i+j] = OAIJ->ilen[i]; 3298 } 3299 } 3300 ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3301 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3302 3303 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 3304 rstart = dof*maij->A->rmap->rstart; 3305 cstart = dof*maij->A->cmap->rstart; 3306 garray = mpiaij->garray; 3307 3308 ii = rstart; 3309 for (i=0; i<A->rmap->n/dof; i++) { 3310 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3311 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3312 for (j=0; j<dof; j++) { 3313 for (k=0; k<ncols; k++) { 3314 icols[k] = cstart + dof*cols[k]+j; 3315 } 3316 for (k=0; k<oncols; k++) { 3317 oicols[k] = dof*garray[ocols[k]]+j; 3318 } 3319 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3320 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3321 ii++; 3322 } 3323 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3324 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3325 } 3326 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3327 3328 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3329 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3330 3331 if (reuse == MAT_REUSE_MATRIX) { 3332 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3333 ((PetscObject)A)->refct = 1; 3334 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3335 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3336 } else { 3337 *newmat = B; 3338 } 3339 PetscFunctionReturn(0); 3340 } 3341 EXTERN_C_END 3342 3343 3344 /* ---------------------------------------------------------------------------------- */ 3345 #undef __FUNCT__ 3346 #define __FUNCT__ "MatCreateMAIJ" 3347 /*@C 3348 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3349 operations for multicomponent problems. It interpolates each component the same 3350 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3351 and MATMPIAIJ for distributed matrices. 3352 3353 Collective 3354 3355 Input Parameters: 3356 + A - the AIJ matrix describing the action on blocks 3357 - dof - the block size (number of components per node) 3358 3359 Output Parameter: 3360 . maij - the new MAIJ matrix 3361 3362 Operations provided: 3363 + MatMult 3364 . MatMultTranspose 3365 . MatMultAdd 3366 . MatMultTransposeAdd 3367 - MatView 3368 3369 Level: advanced 3370 3371 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension() 3372 @*/ 3373 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3374 { 3375 PetscErrorCode ierr; 3376 PetscMPIInt size; 3377 PetscInt n; 3378 Mat B; 3379 3380 PetscFunctionBegin; 3381 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3382 3383 if (dof == 1) { 3384 *maij = A; 3385 } else { 3386 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3387 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3388 ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3389 ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3390 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3391 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3392 B->assembled = PETSC_TRUE; 3393 3394 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3395 if (size == 1) { 3396 Mat_SeqMAIJ *b; 3397 3398 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3399 B->ops->destroy = MatDestroy_SeqMAIJ; 3400 B->ops->view = MatView_SeqMAIJ; 3401 b = (Mat_SeqMAIJ*)B->data; 3402 b->dof = dof; 3403 b->AIJ = A; 3404 if (dof == 2) { 3405 B->ops->mult = MatMult_SeqMAIJ_2; 3406 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3407 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3408 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3409 } else if (dof == 3) { 3410 B->ops->mult = MatMult_SeqMAIJ_3; 3411 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3412 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3413 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3414 } else if (dof == 4) { 3415 B->ops->mult = MatMult_SeqMAIJ_4; 3416 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3417 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3418 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3419 } else if (dof == 5) { 3420 B->ops->mult = MatMult_SeqMAIJ_5; 3421 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3422 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3423 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3424 } else if (dof == 6) { 3425 B->ops->mult = MatMult_SeqMAIJ_6; 3426 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3427 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3428 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3429 } else if (dof == 7) { 3430 B->ops->mult = MatMult_SeqMAIJ_7; 3431 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3432 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3433 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3434 } else if (dof == 8) { 3435 B->ops->mult = MatMult_SeqMAIJ_8; 3436 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3437 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3438 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3439 } else if (dof == 9) { 3440 B->ops->mult = MatMult_SeqMAIJ_9; 3441 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3442 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3443 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3444 } else if (dof == 10) { 3445 B->ops->mult = MatMult_SeqMAIJ_10; 3446 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3447 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3448 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3449 } else if (dof == 11) { 3450 B->ops->mult = MatMult_SeqMAIJ_11; 3451 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3452 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3453 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3454 } else if (dof == 16) { 3455 B->ops->mult = MatMult_SeqMAIJ_16; 3456 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3457 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3458 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3459 } else if (dof == 18) { 3460 B->ops->mult = MatMult_SeqMAIJ_18; 3461 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3462 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3463 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3464 } else { 3465 B->ops->mult = MatMult_SeqMAIJ_N; 3466 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3467 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3468 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3469 } 3470 B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 3471 B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 3472 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3473 } else { 3474 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3475 Mat_MPIMAIJ *b; 3476 IS from,to; 3477 Vec gvec; 3478 PetscInt *garray,i; 3479 3480 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3481 B->ops->destroy = MatDestroy_MPIMAIJ; 3482 B->ops->view = MatView_MPIMAIJ; 3483 b = (Mat_MPIMAIJ*)B->data; 3484 b->dof = dof; 3485 b->A = A; 3486 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 3487 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 3488 3489 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3490 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 3491 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3492 3493 /* create two temporary Index sets for build scatter gather */ 3494 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 3495 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 3496 ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);CHKERRQ(ierr); 3497 ierr = PetscFree(garray);CHKERRQ(ierr); 3498 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3499 3500 /* create temporary global vector to generate scatter context */ 3501 ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 3502 ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 3503 3504 /* generate the scatter context */ 3505 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3506 3507 ierr = ISDestroy(from);CHKERRQ(ierr); 3508 ierr = ISDestroy(to);CHKERRQ(ierr); 3509 ierr = VecDestroy(gvec);CHKERRQ(ierr); 3510 3511 B->ops->mult = MatMult_MPIMAIJ_dof; 3512 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3513 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3514 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3515 B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ; 3516 B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ; 3517 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3518 } 3519 *maij = B; 3520 ierr = MatView_Private(B);CHKERRQ(ierr); 3521 } 3522 PetscFunctionReturn(0); 3523 } 3524