1 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20 #include <../src/mat/utils/freespace.h> 21 #include <private/vecimpl.h> 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatMAIJGetAIJ" 25 /*@C 26 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 27 28 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 29 30 Input Parameter: 31 . A - the MAIJ matrix 32 33 Output Parameter: 34 . B - the AIJ matrix 35 36 Level: advanced 37 38 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 39 40 .seealso: MatCreateMAIJ() 41 @*/ 42 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 43 { 44 PetscErrorCode ierr; 45 PetscBool ismpimaij,isseqmaij; 46 47 PetscFunctionBegin; 48 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 49 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 50 if (ismpimaij) { 51 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 52 53 *B = b->A; 54 } else if (isseqmaij) { 55 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 56 57 *B = b->AIJ; 58 } else { 59 *B = A; 60 } 61 PetscFunctionReturn(0); 62 } 63 64 #undef __FUNCT__ 65 #define __FUNCT__ "MatMAIJRedimension" 66 /*@C 67 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 68 69 Logically Collective 70 71 Input Parameter: 72 + A - the MAIJ matrix 73 - dof - the block size for the new matrix 74 75 Output Parameter: 76 . B - the new MAIJ matrix 77 78 Level: advanced 79 80 .seealso: MatCreateMAIJ() 81 @*/ 82 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 83 { 84 PetscErrorCode ierr; 85 Mat Aij = PETSC_NULL; 86 87 PetscFunctionBegin; 88 PetscValidLogicalCollectiveInt(A,dof,2); 89 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 90 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatDestroy_SeqMAIJ" 96 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 97 { 98 PetscErrorCode ierr; 99 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 100 101 PetscFunctionBegin; 102 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 103 ierr = PetscFree(A->data);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatView_SeqMAIJ" 109 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 110 { 111 PetscErrorCode ierr; 112 Mat B; 113 114 PetscFunctionBegin; 115 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 116 ierr = MatView(B,viewer);CHKERRQ(ierr); 117 ierr = MatDestroy(&B);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "MatView_MPIMAIJ" 123 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 124 { 125 PetscErrorCode ierr; 126 Mat B; 127 128 PetscFunctionBegin; 129 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 130 ierr = MatView(B,viewer);CHKERRQ(ierr); 131 ierr = MatDestroy(&B);CHKERRQ(ierr); 132 PetscFunctionReturn(0); 133 } 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatDestroy_MPIMAIJ" 137 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 138 { 139 PetscErrorCode ierr; 140 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 141 142 PetscFunctionBegin; 143 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 144 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 145 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 146 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 147 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 148 ierr = PetscFree(A->data);CHKERRQ(ierr); 149 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 /*MC 154 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 155 multicomponent problems, interpolating or restricting each component the same way independently. 156 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 157 158 Operations provided: 159 . MatMult 160 . MatMultTranspose 161 . MatMultAdd 162 . MatMultTransposeAdd 163 164 Level: advanced 165 166 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 167 M*/ 168 169 EXTERN_C_BEGIN 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatCreate_MAIJ" 172 PetscErrorCode MatCreate_MAIJ(Mat A) 173 { 174 PetscErrorCode ierr; 175 Mat_MPIMAIJ *b; 176 PetscMPIInt size; 177 178 PetscFunctionBegin; 179 ierr = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr); 180 A->data = (void*)b; 181 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 182 183 b->AIJ = 0; 184 b->dof = 0; 185 b->OAIJ = 0; 186 b->ctx = 0; 187 b->w = 0; 188 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 189 if (size == 1){ 190 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 191 } else { 192 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 193 } 194 PetscFunctionReturn(0); 195 } 196 EXTERN_C_END 197 198 /* --------------------------------------------------------------------------------------*/ 199 #undef __FUNCT__ 200 #define __FUNCT__ "MatMult_SeqMAIJ_2" 201 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 202 { 203 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 205 const PetscScalar *x,*v; 206 PetscScalar *y, sum1, sum2; 207 PetscErrorCode ierr; 208 PetscInt nonzerorow=0,n,i,jrow,j; 209 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 210 211 PetscFunctionBegin; 212 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 213 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 214 idx = a->j; 215 v = a->a; 216 ii = a->i; 217 218 for (i=0; i<m; i++) { 219 jrow = ii[i]; 220 n = ii[i+1] - jrow; 221 sum1 = 0.0; 222 sum2 = 0.0; 223 nonzerorow += (n>0); 224 for (j=0; j<n; j++) { 225 sum1 += v[jrow]*x[2*idx[jrow]]; 226 sum2 += v[jrow]*x[2*idx[jrow]+1]; 227 jrow++; 228 } 229 y[2*i] = sum1; 230 y[2*i+1] = sum2; 231 } 232 233 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 234 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 235 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 241 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 242 { 243 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 244 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 245 const PetscScalar *x,*v; 246 PetscScalar *y,alpha1,alpha2; 247 PetscErrorCode ierr; 248 PetscInt n,i; 249 const PetscInt m = b->AIJ->rmap->n,*idx; 250 251 PetscFunctionBegin; 252 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 253 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 254 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 255 256 for (i=0; i<m; i++) { 257 idx = a->j + a->i[i] ; 258 v = a->a + a->i[i] ; 259 n = a->i[i+1] - a->i[i]; 260 alpha1 = x[2*i]; 261 alpha2 = x[2*i+1]; 262 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 263 } 264 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 265 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 266 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 272 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 273 { 274 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 275 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 276 const PetscScalar *x,*v; 277 PetscScalar *y,sum1, sum2; 278 PetscErrorCode ierr; 279 PetscInt n,i,jrow,j; 280 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 281 282 PetscFunctionBegin; 283 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 284 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 285 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 286 idx = a->j; 287 v = a->a; 288 ii = a->i; 289 290 for (i=0; i<m; i++) { 291 jrow = ii[i]; 292 n = ii[i+1] - jrow; 293 sum1 = 0.0; 294 sum2 = 0.0; 295 for (j=0; j<n; j++) { 296 sum1 += v[jrow]*x[2*idx[jrow]]; 297 sum2 += v[jrow]*x[2*idx[jrow]+1]; 298 jrow++; 299 } 300 y[2*i] += sum1; 301 y[2*i+1] += sum2; 302 } 303 304 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 305 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 306 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 311 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 312 { 313 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 315 const PetscScalar *x,*v; 316 PetscScalar *y,alpha1,alpha2; 317 PetscErrorCode ierr; 318 PetscInt n,i; 319 const PetscInt m = b->AIJ->rmap->n,*idx; 320 321 PetscFunctionBegin; 322 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 323 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 324 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 325 326 for (i=0; i<m; i++) { 327 idx = a->j + a->i[i] ; 328 v = a->a + a->i[i] ; 329 n = a->i[i+1] - a->i[i]; 330 alpha1 = x[2*i]; 331 alpha2 = x[2*i+1]; 332 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 333 } 334 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 335 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 336 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 337 PetscFunctionReturn(0); 338 } 339 /* --------------------------------------------------------------------------------------*/ 340 #undef __FUNCT__ 341 #define __FUNCT__ "MatMult_SeqMAIJ_3" 342 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 343 { 344 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 345 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 346 const PetscScalar *x,*v; 347 PetscScalar *y,sum1, sum2, sum3; 348 PetscErrorCode ierr; 349 PetscInt nonzerorow=0,n,i,jrow,j; 350 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 351 352 PetscFunctionBegin; 353 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 354 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 355 idx = a->j; 356 v = a->a; 357 ii = a->i; 358 359 for (i=0; i<m; i++) { 360 jrow = ii[i]; 361 n = ii[i+1] - jrow; 362 sum1 = 0.0; 363 sum2 = 0.0; 364 sum3 = 0.0; 365 nonzerorow += (n>0); 366 for (j=0; j<n; j++) { 367 sum1 += v[jrow]*x[3*idx[jrow]]; 368 sum2 += v[jrow]*x[3*idx[jrow]+1]; 369 sum3 += v[jrow]*x[3*idx[jrow]+2]; 370 jrow++; 371 } 372 y[3*i] = sum1; 373 y[3*i+1] = sum2; 374 y[3*i+2] = sum3; 375 } 376 377 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 378 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 379 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 380 PetscFunctionReturn(0); 381 } 382 383 #undef __FUNCT__ 384 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 385 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 386 { 387 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 388 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 389 const PetscScalar *x,*v; 390 PetscScalar *y,alpha1,alpha2,alpha3; 391 PetscErrorCode ierr; 392 PetscInt n,i; 393 const PetscInt m = b->AIJ->rmap->n,*idx; 394 395 PetscFunctionBegin; 396 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 397 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 398 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 399 400 for (i=0; i<m; i++) { 401 idx = a->j + a->i[i]; 402 v = a->a + a->i[i]; 403 n = a->i[i+1] - a->i[i]; 404 alpha1 = x[3*i]; 405 alpha2 = x[3*i+1]; 406 alpha3 = x[3*i+2]; 407 while (n-->0) { 408 y[3*(*idx)] += alpha1*(*v); 409 y[3*(*idx)+1] += alpha2*(*v); 410 y[3*(*idx)+2] += alpha3*(*v); 411 idx++; v++; 412 } 413 } 414 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 416 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 422 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 423 { 424 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 425 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 426 const PetscScalar *x,*v; 427 PetscScalar *y,sum1, sum2, sum3; 428 PetscErrorCode ierr; 429 PetscInt n,i,jrow,j; 430 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 431 432 PetscFunctionBegin; 433 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 434 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 435 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 436 idx = a->j; 437 v = a->a; 438 ii = a->i; 439 440 for (i=0; i<m; i++) { 441 jrow = ii[i]; 442 n = ii[i+1] - jrow; 443 sum1 = 0.0; 444 sum2 = 0.0; 445 sum3 = 0.0; 446 for (j=0; j<n; j++) { 447 sum1 += v[jrow]*x[3*idx[jrow]]; 448 sum2 += v[jrow]*x[3*idx[jrow]+1]; 449 sum3 += v[jrow]*x[3*idx[jrow]+2]; 450 jrow++; 451 } 452 y[3*i] += sum1; 453 y[3*i+1] += sum2; 454 y[3*i+2] += sum3; 455 } 456 457 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 458 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 459 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 464 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 465 { 466 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 467 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 468 const PetscScalar *x,*v; 469 PetscScalar *y,alpha1,alpha2,alpha3; 470 PetscErrorCode ierr; 471 PetscInt n,i; 472 const PetscInt m = b->AIJ->rmap->n,*idx; 473 474 PetscFunctionBegin; 475 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 476 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 477 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 478 for (i=0; i<m; i++) { 479 idx = a->j + a->i[i] ; 480 v = a->a + a->i[i] ; 481 n = a->i[i+1] - a->i[i]; 482 alpha1 = x[3*i]; 483 alpha2 = x[3*i+1]; 484 alpha3 = x[3*i+2]; 485 while (n-->0) { 486 y[3*(*idx)] += alpha1*(*v); 487 y[3*(*idx)+1] += alpha2*(*v); 488 y[3*(*idx)+2] += alpha3*(*v); 489 idx++; v++; 490 } 491 } 492 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 493 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 494 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 /* ------------------------------------------------------------------------------*/ 499 #undef __FUNCT__ 500 #define __FUNCT__ "MatMult_SeqMAIJ_4" 501 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 502 { 503 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 504 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 505 const PetscScalar *x,*v; 506 PetscScalar *y,sum1, sum2, sum3, sum4; 507 PetscErrorCode ierr; 508 PetscInt nonzerorow=0,n,i,jrow,j; 509 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 510 511 PetscFunctionBegin; 512 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 513 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 514 idx = a->j; 515 v = a->a; 516 ii = a->i; 517 518 for (i=0; i<m; i++) { 519 jrow = ii[i]; 520 n = ii[i+1] - jrow; 521 sum1 = 0.0; 522 sum2 = 0.0; 523 sum3 = 0.0; 524 sum4 = 0.0; 525 nonzerorow += (n>0); 526 for (j=0; j<n; j++) { 527 sum1 += v[jrow]*x[4*idx[jrow]]; 528 sum2 += v[jrow]*x[4*idx[jrow]+1]; 529 sum3 += v[jrow]*x[4*idx[jrow]+2]; 530 sum4 += v[jrow]*x[4*idx[jrow]+3]; 531 jrow++; 532 } 533 y[4*i] = sum1; 534 y[4*i+1] = sum2; 535 y[4*i+2] = sum3; 536 y[4*i+3] = sum4; 537 } 538 539 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 540 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 541 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 547 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 548 { 549 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 550 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 551 const PetscScalar *x,*v; 552 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 553 PetscErrorCode ierr; 554 PetscInt n,i; 555 const PetscInt m = b->AIJ->rmap->n,*idx; 556 557 PetscFunctionBegin; 558 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 559 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 560 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 561 for (i=0; i<m; i++) { 562 idx = a->j + a->i[i] ; 563 v = a->a + a->i[i] ; 564 n = a->i[i+1] - a->i[i]; 565 alpha1 = x[4*i]; 566 alpha2 = x[4*i+1]; 567 alpha3 = x[4*i+2]; 568 alpha4 = x[4*i+3]; 569 while (n-->0) { 570 y[4*(*idx)] += alpha1*(*v); 571 y[4*(*idx)+1] += alpha2*(*v); 572 y[4*(*idx)+2] += alpha3*(*v); 573 y[4*(*idx)+3] += alpha4*(*v); 574 idx++; v++; 575 } 576 } 577 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 578 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 579 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 585 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 586 { 587 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 588 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 589 const PetscScalar *x,*v; 590 PetscScalar *y,sum1, sum2, sum3, sum4; 591 PetscErrorCode ierr; 592 PetscInt n,i,jrow,j; 593 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 594 595 PetscFunctionBegin; 596 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 597 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 598 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 599 idx = a->j; 600 v = a->a; 601 ii = a->i; 602 603 for (i=0; i<m; i++) { 604 jrow = ii[i]; 605 n = ii[i+1] - jrow; 606 sum1 = 0.0; 607 sum2 = 0.0; 608 sum3 = 0.0; 609 sum4 = 0.0; 610 for (j=0; j<n; j++) { 611 sum1 += v[jrow]*x[4*idx[jrow]]; 612 sum2 += v[jrow]*x[4*idx[jrow]+1]; 613 sum3 += v[jrow]*x[4*idx[jrow]+2]; 614 sum4 += v[jrow]*x[4*idx[jrow]+3]; 615 jrow++; 616 } 617 y[4*i] += sum1; 618 y[4*i+1] += sum2; 619 y[4*i+2] += sum3; 620 y[4*i+3] += sum4; 621 } 622 623 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 624 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 625 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 630 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 631 { 632 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 633 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 634 const PetscScalar *x,*v; 635 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 636 PetscErrorCode ierr; 637 PetscInt n,i; 638 const PetscInt m = b->AIJ->rmap->n,*idx; 639 640 PetscFunctionBegin; 641 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 642 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 643 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 644 645 for (i=0; i<m; i++) { 646 idx = a->j + a->i[i] ; 647 v = a->a + a->i[i] ; 648 n = a->i[i+1] - a->i[i]; 649 alpha1 = x[4*i]; 650 alpha2 = x[4*i+1]; 651 alpha3 = x[4*i+2]; 652 alpha4 = x[4*i+3]; 653 while (n-->0) { 654 y[4*(*idx)] += alpha1*(*v); 655 y[4*(*idx)+1] += alpha2*(*v); 656 y[4*(*idx)+2] += alpha3*(*v); 657 y[4*(*idx)+3] += alpha4*(*v); 658 idx++; v++; 659 } 660 } 661 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 662 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 663 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 /* ------------------------------------------------------------------------------*/ 667 668 #undef __FUNCT__ 669 #define __FUNCT__ "MatMult_SeqMAIJ_5" 670 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 671 { 672 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 673 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 674 const PetscScalar *x,*v; 675 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 676 PetscErrorCode ierr; 677 PetscInt nonzerorow=0,n,i,jrow,j; 678 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 679 680 PetscFunctionBegin; 681 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 682 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 683 idx = a->j; 684 v = a->a; 685 ii = a->i; 686 687 for (i=0; i<m; i++) { 688 jrow = ii[i]; 689 n = ii[i+1] - jrow; 690 sum1 = 0.0; 691 sum2 = 0.0; 692 sum3 = 0.0; 693 sum4 = 0.0; 694 sum5 = 0.0; 695 nonzerorow += (n>0); 696 for (j=0; j<n; j++) { 697 sum1 += v[jrow]*x[5*idx[jrow]]; 698 sum2 += v[jrow]*x[5*idx[jrow]+1]; 699 sum3 += v[jrow]*x[5*idx[jrow]+2]; 700 sum4 += v[jrow]*x[5*idx[jrow]+3]; 701 sum5 += v[jrow]*x[5*idx[jrow]+4]; 702 jrow++; 703 } 704 y[5*i] = sum1; 705 y[5*i+1] = sum2; 706 y[5*i+2] = sum3; 707 y[5*i+3] = sum4; 708 y[5*i+4] = sum5; 709 } 710 711 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 712 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 713 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 714 PetscFunctionReturn(0); 715 } 716 717 #undef __FUNCT__ 718 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 719 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 720 { 721 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 722 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 723 const PetscScalar *x,*v; 724 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 725 PetscErrorCode ierr; 726 PetscInt n,i; 727 const PetscInt m = b->AIJ->rmap->n,*idx; 728 729 PetscFunctionBegin; 730 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 731 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 732 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 733 734 for (i=0; i<m; i++) { 735 idx = a->j + a->i[i] ; 736 v = a->a + a->i[i] ; 737 n = a->i[i+1] - a->i[i]; 738 alpha1 = x[5*i]; 739 alpha2 = x[5*i+1]; 740 alpha3 = x[5*i+2]; 741 alpha4 = x[5*i+3]; 742 alpha5 = x[5*i+4]; 743 while (n-->0) { 744 y[5*(*idx)] += alpha1*(*v); 745 y[5*(*idx)+1] += alpha2*(*v); 746 y[5*(*idx)+2] += alpha3*(*v); 747 y[5*(*idx)+3] += alpha4*(*v); 748 y[5*(*idx)+4] += alpha5*(*v); 749 idx++; v++; 750 } 751 } 752 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 753 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 754 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 760 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 761 { 762 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 763 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 764 const PetscScalar *x,*v; 765 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 766 PetscErrorCode ierr; 767 PetscInt n,i,jrow,j; 768 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 769 770 PetscFunctionBegin; 771 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 772 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 773 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 774 idx = a->j; 775 v = a->a; 776 ii = a->i; 777 778 for (i=0; i<m; i++) { 779 jrow = ii[i]; 780 n = ii[i+1] - jrow; 781 sum1 = 0.0; 782 sum2 = 0.0; 783 sum3 = 0.0; 784 sum4 = 0.0; 785 sum5 = 0.0; 786 for (j=0; j<n; j++) { 787 sum1 += v[jrow]*x[5*idx[jrow]]; 788 sum2 += v[jrow]*x[5*idx[jrow]+1]; 789 sum3 += v[jrow]*x[5*idx[jrow]+2]; 790 sum4 += v[jrow]*x[5*idx[jrow]+3]; 791 sum5 += v[jrow]*x[5*idx[jrow]+4]; 792 jrow++; 793 } 794 y[5*i] += sum1; 795 y[5*i+1] += sum2; 796 y[5*i+2] += sum3; 797 y[5*i+3] += sum4; 798 y[5*i+4] += sum5; 799 } 800 801 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 802 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 803 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 804 PetscFunctionReturn(0); 805 } 806 807 #undef __FUNCT__ 808 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 809 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 810 { 811 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 812 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 813 const PetscScalar *x,*v; 814 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 815 PetscErrorCode ierr; 816 PetscInt n,i; 817 const PetscInt m = b->AIJ->rmap->n,*idx; 818 819 PetscFunctionBegin; 820 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 821 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 822 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 823 824 for (i=0; i<m; i++) { 825 idx = a->j + a->i[i] ; 826 v = a->a + a->i[i] ; 827 n = a->i[i+1] - a->i[i]; 828 alpha1 = x[5*i]; 829 alpha2 = x[5*i+1]; 830 alpha3 = x[5*i+2]; 831 alpha4 = x[5*i+3]; 832 alpha5 = x[5*i+4]; 833 while (n-->0) { 834 y[5*(*idx)] += alpha1*(*v); 835 y[5*(*idx)+1] += alpha2*(*v); 836 y[5*(*idx)+2] += alpha3*(*v); 837 y[5*(*idx)+3] += alpha4*(*v); 838 y[5*(*idx)+4] += alpha5*(*v); 839 idx++; v++; 840 } 841 } 842 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 843 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 844 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 /* ------------------------------------------------------------------------------*/ 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatMult_SeqMAIJ_6" 851 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 852 { 853 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 854 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 855 const PetscScalar *x,*v; 856 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 857 PetscErrorCode ierr; 858 PetscInt nonzerorow=0,n,i,jrow,j; 859 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 860 861 PetscFunctionBegin; 862 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 863 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 864 idx = a->j; 865 v = a->a; 866 ii = a->i; 867 868 for (i=0; i<m; i++) { 869 jrow = ii[i]; 870 n = ii[i+1] - jrow; 871 sum1 = 0.0; 872 sum2 = 0.0; 873 sum3 = 0.0; 874 sum4 = 0.0; 875 sum5 = 0.0; 876 sum6 = 0.0; 877 nonzerorow += (n>0); 878 for (j=0; j<n; j++) { 879 sum1 += v[jrow]*x[6*idx[jrow]]; 880 sum2 += v[jrow]*x[6*idx[jrow]+1]; 881 sum3 += v[jrow]*x[6*idx[jrow]+2]; 882 sum4 += v[jrow]*x[6*idx[jrow]+3]; 883 sum5 += v[jrow]*x[6*idx[jrow]+4]; 884 sum6 += v[jrow]*x[6*idx[jrow]+5]; 885 jrow++; 886 } 887 y[6*i] = sum1; 888 y[6*i+1] = sum2; 889 y[6*i+2] = sum3; 890 y[6*i+3] = sum4; 891 y[6*i+4] = sum5; 892 y[6*i+5] = sum6; 893 } 894 895 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 896 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 897 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 898 PetscFunctionReturn(0); 899 } 900 901 #undef __FUNCT__ 902 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 903 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 904 { 905 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 906 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 907 const PetscScalar *x,*v; 908 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 909 PetscErrorCode ierr; 910 PetscInt n,i; 911 const PetscInt m = b->AIJ->rmap->n,*idx; 912 913 PetscFunctionBegin; 914 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 915 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 916 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 917 918 for (i=0; i<m; i++) { 919 idx = a->j + a->i[i] ; 920 v = a->a + a->i[i] ; 921 n = a->i[i+1] - a->i[i]; 922 alpha1 = x[6*i]; 923 alpha2 = x[6*i+1]; 924 alpha3 = x[6*i+2]; 925 alpha4 = x[6*i+3]; 926 alpha5 = x[6*i+4]; 927 alpha6 = x[6*i+5]; 928 while (n-->0) { 929 y[6*(*idx)] += alpha1*(*v); 930 y[6*(*idx)+1] += alpha2*(*v); 931 y[6*(*idx)+2] += alpha3*(*v); 932 y[6*(*idx)+3] += alpha4*(*v); 933 y[6*(*idx)+4] += alpha5*(*v); 934 y[6*(*idx)+5] += alpha6*(*v); 935 idx++; v++; 936 } 937 } 938 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 939 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 940 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 941 PetscFunctionReturn(0); 942 } 943 944 #undef __FUNCT__ 945 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 946 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 947 { 948 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 949 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 950 const PetscScalar *x,*v; 951 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 952 PetscErrorCode ierr; 953 PetscInt n,i,jrow,j; 954 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 955 956 PetscFunctionBegin; 957 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 958 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 959 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 960 idx = a->j; 961 v = a->a; 962 ii = a->i; 963 964 for (i=0; i<m; i++) { 965 jrow = ii[i]; 966 n = ii[i+1] - jrow; 967 sum1 = 0.0; 968 sum2 = 0.0; 969 sum3 = 0.0; 970 sum4 = 0.0; 971 sum5 = 0.0; 972 sum6 = 0.0; 973 for (j=0; j<n; j++) { 974 sum1 += v[jrow]*x[6*idx[jrow]]; 975 sum2 += v[jrow]*x[6*idx[jrow]+1]; 976 sum3 += v[jrow]*x[6*idx[jrow]+2]; 977 sum4 += v[jrow]*x[6*idx[jrow]+3]; 978 sum5 += v[jrow]*x[6*idx[jrow]+4]; 979 sum6 += v[jrow]*x[6*idx[jrow]+5]; 980 jrow++; 981 } 982 y[6*i] += sum1; 983 y[6*i+1] += sum2; 984 y[6*i+2] += sum3; 985 y[6*i+3] += sum4; 986 y[6*i+4] += sum5; 987 y[6*i+5] += sum6; 988 } 989 990 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 991 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 992 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 998 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 999 { 1000 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1001 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1002 const PetscScalar *x,*v; 1003 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 1004 PetscErrorCode ierr; 1005 PetscInt n,i; 1006 const PetscInt m = b->AIJ->rmap->n,*idx; 1007 1008 PetscFunctionBegin; 1009 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1010 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1011 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1012 1013 for (i=0; i<m; i++) { 1014 idx = a->j + a->i[i] ; 1015 v = a->a + a->i[i] ; 1016 n = a->i[i+1] - a->i[i]; 1017 alpha1 = x[6*i]; 1018 alpha2 = x[6*i+1]; 1019 alpha3 = x[6*i+2]; 1020 alpha4 = x[6*i+3]; 1021 alpha5 = x[6*i+4]; 1022 alpha6 = x[6*i+5]; 1023 while (n-->0) { 1024 y[6*(*idx)] += alpha1*(*v); 1025 y[6*(*idx)+1] += alpha2*(*v); 1026 y[6*(*idx)+2] += alpha3*(*v); 1027 y[6*(*idx)+3] += alpha4*(*v); 1028 y[6*(*idx)+4] += alpha5*(*v); 1029 y[6*(*idx)+5] += alpha6*(*v); 1030 idx++; v++; 1031 } 1032 } 1033 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1034 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1035 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 1039 /* ------------------------------------------------------------------------------*/ 1040 #undef __FUNCT__ 1041 #define __FUNCT__ "MatMult_SeqMAIJ_7" 1042 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1043 { 1044 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1045 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1046 const PetscScalar *x,*v; 1047 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1048 PetscErrorCode ierr; 1049 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1050 PetscInt nonzerorow=0,n,i,jrow,j; 1051 1052 PetscFunctionBegin; 1053 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1054 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1055 idx = a->j; 1056 v = a->a; 1057 ii = a->i; 1058 1059 for (i=0; i<m; i++) { 1060 jrow = ii[i]; 1061 n = ii[i+1] - jrow; 1062 sum1 = 0.0; 1063 sum2 = 0.0; 1064 sum3 = 0.0; 1065 sum4 = 0.0; 1066 sum5 = 0.0; 1067 sum6 = 0.0; 1068 sum7 = 0.0; 1069 nonzerorow += (n>0); 1070 for (j=0; j<n; j++) { 1071 sum1 += v[jrow]*x[7*idx[jrow]]; 1072 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1073 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1074 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1075 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1076 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1077 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1078 jrow++; 1079 } 1080 y[7*i] = sum1; 1081 y[7*i+1] = sum2; 1082 y[7*i+2] = sum3; 1083 y[7*i+3] = sum4; 1084 y[7*i+4] = sum5; 1085 y[7*i+5] = sum6; 1086 y[7*i+6] = sum7; 1087 } 1088 1089 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1090 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1091 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1097 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1098 { 1099 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1100 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1101 const PetscScalar *x,*v; 1102 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1103 PetscErrorCode ierr; 1104 const PetscInt m = b->AIJ->rmap->n,*idx; 1105 PetscInt n,i; 1106 1107 PetscFunctionBegin; 1108 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1109 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1110 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1111 1112 for (i=0; i<m; i++) { 1113 idx = a->j + a->i[i] ; 1114 v = a->a + a->i[i] ; 1115 n = a->i[i+1] - a->i[i]; 1116 alpha1 = x[7*i]; 1117 alpha2 = x[7*i+1]; 1118 alpha3 = x[7*i+2]; 1119 alpha4 = x[7*i+3]; 1120 alpha5 = x[7*i+4]; 1121 alpha6 = x[7*i+5]; 1122 alpha7 = x[7*i+6]; 1123 while (n-->0) { 1124 y[7*(*idx)] += alpha1*(*v); 1125 y[7*(*idx)+1] += alpha2*(*v); 1126 y[7*(*idx)+2] += alpha3*(*v); 1127 y[7*(*idx)+3] += alpha4*(*v); 1128 y[7*(*idx)+4] += alpha5*(*v); 1129 y[7*(*idx)+5] += alpha6*(*v); 1130 y[7*(*idx)+6] += alpha7*(*v); 1131 idx++; v++; 1132 } 1133 } 1134 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1135 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1136 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1142 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1143 { 1144 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1145 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1146 const PetscScalar *x,*v; 1147 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1148 PetscErrorCode ierr; 1149 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1150 PetscInt n,i,jrow,j; 1151 1152 PetscFunctionBegin; 1153 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1154 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1155 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1156 idx = a->j; 1157 v = a->a; 1158 ii = a->i; 1159 1160 for (i=0; i<m; i++) { 1161 jrow = ii[i]; 1162 n = ii[i+1] - jrow; 1163 sum1 = 0.0; 1164 sum2 = 0.0; 1165 sum3 = 0.0; 1166 sum4 = 0.0; 1167 sum5 = 0.0; 1168 sum6 = 0.0; 1169 sum7 = 0.0; 1170 for (j=0; j<n; j++) { 1171 sum1 += v[jrow]*x[7*idx[jrow]]; 1172 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1173 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1174 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1175 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1176 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1177 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1178 jrow++; 1179 } 1180 y[7*i] += sum1; 1181 y[7*i+1] += sum2; 1182 y[7*i+2] += sum3; 1183 y[7*i+3] += sum4; 1184 y[7*i+4] += sum5; 1185 y[7*i+5] += sum6; 1186 y[7*i+6] += sum7; 1187 } 1188 1189 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1190 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1191 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1197 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1198 { 1199 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1200 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1201 const PetscScalar *x,*v; 1202 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1203 PetscErrorCode ierr; 1204 const PetscInt m = b->AIJ->rmap->n,*idx; 1205 PetscInt n,i; 1206 1207 PetscFunctionBegin; 1208 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1209 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1210 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1211 for (i=0; i<m; i++) { 1212 idx = a->j + a->i[i] ; 1213 v = a->a + a->i[i] ; 1214 n = a->i[i+1] - a->i[i]; 1215 alpha1 = x[7*i]; 1216 alpha2 = x[7*i+1]; 1217 alpha3 = x[7*i+2]; 1218 alpha4 = x[7*i+3]; 1219 alpha5 = x[7*i+4]; 1220 alpha6 = x[7*i+5]; 1221 alpha7 = x[7*i+6]; 1222 while (n-->0) { 1223 y[7*(*idx)] += alpha1*(*v); 1224 y[7*(*idx)+1] += alpha2*(*v); 1225 y[7*(*idx)+2] += alpha3*(*v); 1226 y[7*(*idx)+3] += alpha4*(*v); 1227 y[7*(*idx)+4] += alpha5*(*v); 1228 y[7*(*idx)+5] += alpha6*(*v); 1229 y[7*(*idx)+6] += alpha7*(*v); 1230 idx++; v++; 1231 } 1232 } 1233 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1234 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1235 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1241 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1242 { 1243 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1244 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1245 const PetscScalar *x,*v; 1246 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1247 PetscErrorCode ierr; 1248 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1249 PetscInt nonzerorow=0,n,i,jrow,j; 1250 1251 PetscFunctionBegin; 1252 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1253 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1254 idx = a->j; 1255 v = a->a; 1256 ii = a->i; 1257 1258 for (i=0; i<m; i++) { 1259 jrow = ii[i]; 1260 n = ii[i+1] - jrow; 1261 sum1 = 0.0; 1262 sum2 = 0.0; 1263 sum3 = 0.0; 1264 sum4 = 0.0; 1265 sum5 = 0.0; 1266 sum6 = 0.0; 1267 sum7 = 0.0; 1268 sum8 = 0.0; 1269 nonzerorow += (n>0); 1270 for (j=0; j<n; j++) { 1271 sum1 += v[jrow]*x[8*idx[jrow]]; 1272 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1273 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1274 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1275 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1276 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1277 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1278 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1279 jrow++; 1280 } 1281 y[8*i] = sum1; 1282 y[8*i+1] = sum2; 1283 y[8*i+2] = sum3; 1284 y[8*i+3] = sum4; 1285 y[8*i+4] = sum5; 1286 y[8*i+5] = sum6; 1287 y[8*i+6] = sum7; 1288 y[8*i+7] = sum8; 1289 } 1290 1291 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1292 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1293 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1299 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1300 { 1301 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1302 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1303 const PetscScalar *x,*v; 1304 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1305 PetscErrorCode ierr; 1306 const PetscInt m = b->AIJ->rmap->n,*idx; 1307 PetscInt n,i; 1308 1309 PetscFunctionBegin; 1310 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1311 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1312 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1313 1314 for (i=0; i<m; i++) { 1315 idx = a->j + a->i[i] ; 1316 v = a->a + a->i[i] ; 1317 n = a->i[i+1] - a->i[i]; 1318 alpha1 = x[8*i]; 1319 alpha2 = x[8*i+1]; 1320 alpha3 = x[8*i+2]; 1321 alpha4 = x[8*i+3]; 1322 alpha5 = x[8*i+4]; 1323 alpha6 = x[8*i+5]; 1324 alpha7 = x[8*i+6]; 1325 alpha8 = x[8*i+7]; 1326 while (n-->0) { 1327 y[8*(*idx)] += alpha1*(*v); 1328 y[8*(*idx)+1] += alpha2*(*v); 1329 y[8*(*idx)+2] += alpha3*(*v); 1330 y[8*(*idx)+3] += alpha4*(*v); 1331 y[8*(*idx)+4] += alpha5*(*v); 1332 y[8*(*idx)+5] += alpha6*(*v); 1333 y[8*(*idx)+6] += alpha7*(*v); 1334 y[8*(*idx)+7] += alpha8*(*v); 1335 idx++; v++; 1336 } 1337 } 1338 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1339 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1340 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1346 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1347 { 1348 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1349 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1350 const PetscScalar *x,*v; 1351 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1352 PetscErrorCode ierr; 1353 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1354 PetscInt n,i,jrow,j; 1355 1356 PetscFunctionBegin; 1357 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1358 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1359 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1360 idx = a->j; 1361 v = a->a; 1362 ii = a->i; 1363 1364 for (i=0; i<m; i++) { 1365 jrow = ii[i]; 1366 n = ii[i+1] - jrow; 1367 sum1 = 0.0; 1368 sum2 = 0.0; 1369 sum3 = 0.0; 1370 sum4 = 0.0; 1371 sum5 = 0.0; 1372 sum6 = 0.0; 1373 sum7 = 0.0; 1374 sum8 = 0.0; 1375 for (j=0; j<n; j++) { 1376 sum1 += v[jrow]*x[8*idx[jrow]]; 1377 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1378 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1379 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1380 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1381 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1382 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1383 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1384 jrow++; 1385 } 1386 y[8*i] += sum1; 1387 y[8*i+1] += sum2; 1388 y[8*i+2] += sum3; 1389 y[8*i+3] += sum4; 1390 y[8*i+4] += sum5; 1391 y[8*i+5] += sum6; 1392 y[8*i+6] += sum7; 1393 y[8*i+7] += sum8; 1394 } 1395 1396 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1397 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1398 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1399 PetscFunctionReturn(0); 1400 } 1401 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1404 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1405 { 1406 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1407 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1408 const PetscScalar *x,*v; 1409 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1410 PetscErrorCode ierr; 1411 const PetscInt m = b->AIJ->rmap->n,*idx; 1412 PetscInt n,i; 1413 1414 PetscFunctionBegin; 1415 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1416 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1417 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1418 for (i=0; i<m; i++) { 1419 idx = a->j + a->i[i] ; 1420 v = a->a + a->i[i] ; 1421 n = a->i[i+1] - a->i[i]; 1422 alpha1 = x[8*i]; 1423 alpha2 = x[8*i+1]; 1424 alpha3 = x[8*i+2]; 1425 alpha4 = x[8*i+3]; 1426 alpha5 = x[8*i+4]; 1427 alpha6 = x[8*i+5]; 1428 alpha7 = x[8*i+6]; 1429 alpha8 = x[8*i+7]; 1430 while (n-->0) { 1431 y[8*(*idx)] += alpha1*(*v); 1432 y[8*(*idx)+1] += alpha2*(*v); 1433 y[8*(*idx)+2] += alpha3*(*v); 1434 y[8*(*idx)+3] += alpha4*(*v); 1435 y[8*(*idx)+4] += alpha5*(*v); 1436 y[8*(*idx)+5] += alpha6*(*v); 1437 y[8*(*idx)+6] += alpha7*(*v); 1438 y[8*(*idx)+7] += alpha8*(*v); 1439 idx++; v++; 1440 } 1441 } 1442 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1443 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1444 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 /* ------------------------------------------------------------------------------*/ 1449 #undef __FUNCT__ 1450 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1451 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1452 { 1453 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1454 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1455 const PetscScalar *x,*v; 1456 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1457 PetscErrorCode ierr; 1458 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1459 PetscInt nonzerorow=0,n,i,jrow,j; 1460 1461 PetscFunctionBegin; 1462 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1463 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1464 idx = a->j; 1465 v = a->a; 1466 ii = a->i; 1467 1468 for (i=0; i<m; i++) { 1469 jrow = ii[i]; 1470 n = ii[i+1] - jrow; 1471 sum1 = 0.0; 1472 sum2 = 0.0; 1473 sum3 = 0.0; 1474 sum4 = 0.0; 1475 sum5 = 0.0; 1476 sum6 = 0.0; 1477 sum7 = 0.0; 1478 sum8 = 0.0; 1479 sum9 = 0.0; 1480 nonzerorow += (n>0); 1481 for (j=0; j<n; j++) { 1482 sum1 += v[jrow]*x[9*idx[jrow]]; 1483 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1484 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1485 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1486 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1487 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1488 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1489 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1490 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1491 jrow++; 1492 } 1493 y[9*i] = sum1; 1494 y[9*i+1] = sum2; 1495 y[9*i+2] = sum3; 1496 y[9*i+3] = sum4; 1497 y[9*i+4] = sum5; 1498 y[9*i+5] = sum6; 1499 y[9*i+6] = sum7; 1500 y[9*i+7] = sum8; 1501 y[9*i+8] = sum9; 1502 } 1503 1504 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1505 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1506 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1507 PetscFunctionReturn(0); 1508 } 1509 1510 /* ------------------------------------------------------------------------------*/ 1511 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1514 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1515 { 1516 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1517 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1518 const PetscScalar *x,*v; 1519 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1520 PetscErrorCode ierr; 1521 const PetscInt m = b->AIJ->rmap->n,*idx; 1522 PetscInt n,i; 1523 1524 PetscFunctionBegin; 1525 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1526 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1527 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1528 1529 for (i=0; i<m; i++) { 1530 idx = a->j + a->i[i] ; 1531 v = a->a + a->i[i] ; 1532 n = a->i[i+1] - a->i[i]; 1533 alpha1 = x[9*i]; 1534 alpha2 = x[9*i+1]; 1535 alpha3 = x[9*i+2]; 1536 alpha4 = x[9*i+3]; 1537 alpha5 = x[9*i+4]; 1538 alpha6 = x[9*i+5]; 1539 alpha7 = x[9*i+6]; 1540 alpha8 = x[9*i+7]; 1541 alpha9 = x[9*i+8]; 1542 while (n-->0) { 1543 y[9*(*idx)] += alpha1*(*v); 1544 y[9*(*idx)+1] += alpha2*(*v); 1545 y[9*(*idx)+2] += alpha3*(*v); 1546 y[9*(*idx)+3] += alpha4*(*v); 1547 y[9*(*idx)+4] += alpha5*(*v); 1548 y[9*(*idx)+5] += alpha6*(*v); 1549 y[9*(*idx)+6] += alpha7*(*v); 1550 y[9*(*idx)+7] += alpha8*(*v); 1551 y[9*(*idx)+8] += alpha9*(*v); 1552 idx++; v++; 1553 } 1554 } 1555 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1556 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1557 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1563 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1564 { 1565 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1566 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1567 const PetscScalar *x,*v; 1568 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1569 PetscErrorCode ierr; 1570 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1571 PetscInt n,i,jrow,j; 1572 1573 PetscFunctionBegin; 1574 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1575 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1576 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1577 idx = a->j; 1578 v = a->a; 1579 ii = a->i; 1580 1581 for (i=0; i<m; i++) { 1582 jrow = ii[i]; 1583 n = ii[i+1] - jrow; 1584 sum1 = 0.0; 1585 sum2 = 0.0; 1586 sum3 = 0.0; 1587 sum4 = 0.0; 1588 sum5 = 0.0; 1589 sum6 = 0.0; 1590 sum7 = 0.0; 1591 sum8 = 0.0; 1592 sum9 = 0.0; 1593 for (j=0; j<n; j++) { 1594 sum1 += v[jrow]*x[9*idx[jrow]]; 1595 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1596 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1597 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1598 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1599 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1600 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1601 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1602 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1603 jrow++; 1604 } 1605 y[9*i] += sum1; 1606 y[9*i+1] += sum2; 1607 y[9*i+2] += sum3; 1608 y[9*i+3] += sum4; 1609 y[9*i+4] += sum5; 1610 y[9*i+5] += sum6; 1611 y[9*i+6] += sum7; 1612 y[9*i+7] += sum8; 1613 y[9*i+8] += sum9; 1614 } 1615 1616 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1617 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1618 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1619 PetscFunctionReturn(0); 1620 } 1621 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1624 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1625 { 1626 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1627 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1628 const PetscScalar *x,*v; 1629 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1630 PetscErrorCode ierr; 1631 const PetscInt m = b->AIJ->rmap->n,*idx; 1632 PetscInt n,i; 1633 1634 PetscFunctionBegin; 1635 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1636 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1637 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1638 for (i=0; i<m; i++) { 1639 idx = a->j + a->i[i] ; 1640 v = a->a + a->i[i] ; 1641 n = a->i[i+1] - a->i[i]; 1642 alpha1 = x[9*i]; 1643 alpha2 = x[9*i+1]; 1644 alpha3 = x[9*i+2]; 1645 alpha4 = x[9*i+3]; 1646 alpha5 = x[9*i+4]; 1647 alpha6 = x[9*i+5]; 1648 alpha7 = x[9*i+6]; 1649 alpha8 = x[9*i+7]; 1650 alpha9 = x[9*i+8]; 1651 while (n-->0) { 1652 y[9*(*idx)] += alpha1*(*v); 1653 y[9*(*idx)+1] += alpha2*(*v); 1654 y[9*(*idx)+2] += alpha3*(*v); 1655 y[9*(*idx)+3] += alpha4*(*v); 1656 y[9*(*idx)+4] += alpha5*(*v); 1657 y[9*(*idx)+5] += alpha6*(*v); 1658 y[9*(*idx)+6] += alpha7*(*v); 1659 y[9*(*idx)+7] += alpha8*(*v); 1660 y[9*(*idx)+8] += alpha9*(*v); 1661 idx++; v++; 1662 } 1663 } 1664 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1665 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1666 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1667 PetscFunctionReturn(0); 1668 } 1669 #undef __FUNCT__ 1670 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1671 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1672 { 1673 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1674 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1675 const PetscScalar *x,*v; 1676 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1677 PetscErrorCode ierr; 1678 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1679 PetscInt nonzerorow=0,n,i,jrow,j; 1680 1681 PetscFunctionBegin; 1682 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1683 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1684 idx = a->j; 1685 v = a->a; 1686 ii = a->i; 1687 1688 for (i=0; i<m; i++) { 1689 jrow = ii[i]; 1690 n = ii[i+1] - jrow; 1691 sum1 = 0.0; 1692 sum2 = 0.0; 1693 sum3 = 0.0; 1694 sum4 = 0.0; 1695 sum5 = 0.0; 1696 sum6 = 0.0; 1697 sum7 = 0.0; 1698 sum8 = 0.0; 1699 sum9 = 0.0; 1700 sum10 = 0.0; 1701 nonzerorow += (n>0); 1702 for (j=0; j<n; j++) { 1703 sum1 += v[jrow]*x[10*idx[jrow]]; 1704 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1705 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1706 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1707 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1708 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1709 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1710 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1711 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1712 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1713 jrow++; 1714 } 1715 y[10*i] = sum1; 1716 y[10*i+1] = sum2; 1717 y[10*i+2] = sum3; 1718 y[10*i+3] = sum4; 1719 y[10*i+4] = sum5; 1720 y[10*i+5] = sum6; 1721 y[10*i+6] = sum7; 1722 y[10*i+7] = sum8; 1723 y[10*i+8] = sum9; 1724 y[10*i+9] = sum10; 1725 } 1726 1727 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1728 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1729 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10" 1735 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1736 { 1737 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1738 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1739 const PetscScalar *x,*v; 1740 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1741 PetscErrorCode ierr; 1742 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1743 PetscInt n,i,jrow,j; 1744 1745 PetscFunctionBegin; 1746 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1747 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1748 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1749 idx = a->j; 1750 v = a->a; 1751 ii = a->i; 1752 1753 for (i=0; i<m; i++) { 1754 jrow = ii[i]; 1755 n = ii[i+1] - jrow; 1756 sum1 = 0.0; 1757 sum2 = 0.0; 1758 sum3 = 0.0; 1759 sum4 = 0.0; 1760 sum5 = 0.0; 1761 sum6 = 0.0; 1762 sum7 = 0.0; 1763 sum8 = 0.0; 1764 sum9 = 0.0; 1765 sum10 = 0.0; 1766 for (j=0; j<n; j++) { 1767 sum1 += v[jrow]*x[10*idx[jrow]]; 1768 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1769 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1770 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1771 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1772 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1773 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1774 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1775 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1776 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1777 jrow++; 1778 } 1779 y[10*i] += sum1; 1780 y[10*i+1] += sum2; 1781 y[10*i+2] += sum3; 1782 y[10*i+3] += sum4; 1783 y[10*i+4] += sum5; 1784 y[10*i+5] += sum6; 1785 y[10*i+6] += sum7; 1786 y[10*i+7] += sum8; 1787 y[10*i+8] += sum9; 1788 y[10*i+9] += sum10; 1789 } 1790 1791 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1792 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1793 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1794 PetscFunctionReturn(0); 1795 } 1796 1797 #undef __FUNCT__ 1798 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1799 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1800 { 1801 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1802 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1803 const PetscScalar *x,*v; 1804 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1805 PetscErrorCode ierr; 1806 const PetscInt m = b->AIJ->rmap->n,*idx; 1807 PetscInt n,i; 1808 1809 PetscFunctionBegin; 1810 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1811 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1812 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1813 1814 for (i=0; i<m; i++) { 1815 idx = a->j + a->i[i] ; 1816 v = a->a + a->i[i] ; 1817 n = a->i[i+1] - a->i[i]; 1818 alpha1 = x[10*i]; 1819 alpha2 = x[10*i+1]; 1820 alpha3 = x[10*i+2]; 1821 alpha4 = x[10*i+3]; 1822 alpha5 = x[10*i+4]; 1823 alpha6 = x[10*i+5]; 1824 alpha7 = x[10*i+6]; 1825 alpha8 = x[10*i+7]; 1826 alpha9 = x[10*i+8]; 1827 alpha10 = x[10*i+9]; 1828 while (n-->0) { 1829 y[10*(*idx)] += alpha1*(*v); 1830 y[10*(*idx)+1] += alpha2*(*v); 1831 y[10*(*idx)+2] += alpha3*(*v); 1832 y[10*(*idx)+3] += alpha4*(*v); 1833 y[10*(*idx)+4] += alpha5*(*v); 1834 y[10*(*idx)+5] += alpha6*(*v); 1835 y[10*(*idx)+6] += alpha7*(*v); 1836 y[10*(*idx)+7] += alpha8*(*v); 1837 y[10*(*idx)+8] += alpha9*(*v); 1838 y[10*(*idx)+9] += alpha10*(*v); 1839 idx++; v++; 1840 } 1841 } 1842 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1843 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1844 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1845 PetscFunctionReturn(0); 1846 } 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1850 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1851 { 1852 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1853 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1854 const PetscScalar *x,*v; 1855 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1856 PetscErrorCode ierr; 1857 const PetscInt m = b->AIJ->rmap->n,*idx; 1858 PetscInt n,i; 1859 1860 PetscFunctionBegin; 1861 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1862 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1863 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1864 for (i=0; i<m; i++) { 1865 idx = a->j + a->i[i] ; 1866 v = a->a + a->i[i] ; 1867 n = a->i[i+1] - a->i[i]; 1868 alpha1 = x[10*i]; 1869 alpha2 = x[10*i+1]; 1870 alpha3 = x[10*i+2]; 1871 alpha4 = x[10*i+3]; 1872 alpha5 = x[10*i+4]; 1873 alpha6 = x[10*i+5]; 1874 alpha7 = x[10*i+6]; 1875 alpha8 = x[10*i+7]; 1876 alpha9 = x[10*i+8]; 1877 alpha10 = x[10*i+9]; 1878 while (n-->0) { 1879 y[10*(*idx)] += alpha1*(*v); 1880 y[10*(*idx)+1] += alpha2*(*v); 1881 y[10*(*idx)+2] += alpha3*(*v); 1882 y[10*(*idx)+3] += alpha4*(*v); 1883 y[10*(*idx)+4] += alpha5*(*v); 1884 y[10*(*idx)+5] += alpha6*(*v); 1885 y[10*(*idx)+6] += alpha7*(*v); 1886 y[10*(*idx)+7] += alpha8*(*v); 1887 y[10*(*idx)+8] += alpha9*(*v); 1888 y[10*(*idx)+9] += alpha10*(*v); 1889 idx++; v++; 1890 } 1891 } 1892 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1893 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1894 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1895 PetscFunctionReturn(0); 1896 } 1897 1898 1899 /*--------------------------------------------------------------------------------------------*/ 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatMult_SeqMAIJ_11" 1902 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1903 { 1904 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1905 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1906 const PetscScalar *x,*v; 1907 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1908 PetscErrorCode ierr; 1909 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1910 PetscInt nonzerorow=0,n,i,jrow,j; 1911 1912 PetscFunctionBegin; 1913 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1914 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1915 idx = a->j; 1916 v = a->a; 1917 ii = a->i; 1918 1919 for (i=0; i<m; i++) { 1920 jrow = ii[i]; 1921 n = ii[i+1] - jrow; 1922 sum1 = 0.0; 1923 sum2 = 0.0; 1924 sum3 = 0.0; 1925 sum4 = 0.0; 1926 sum5 = 0.0; 1927 sum6 = 0.0; 1928 sum7 = 0.0; 1929 sum8 = 0.0; 1930 sum9 = 0.0; 1931 sum10 = 0.0; 1932 sum11 = 0.0; 1933 nonzerorow += (n>0); 1934 for (j=0; j<n; j++) { 1935 sum1 += v[jrow]*x[11*idx[jrow]]; 1936 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1937 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1938 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1939 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1940 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1941 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1942 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1943 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1944 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1945 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1946 jrow++; 1947 } 1948 y[11*i] = sum1; 1949 y[11*i+1] = sum2; 1950 y[11*i+2] = sum3; 1951 y[11*i+3] = sum4; 1952 y[11*i+4] = sum5; 1953 y[11*i+5] = sum6; 1954 y[11*i+6] = sum7; 1955 y[11*i+7] = sum8; 1956 y[11*i+8] = sum9; 1957 y[11*i+9] = sum10; 1958 y[11*i+10] = sum11; 1959 } 1960 1961 ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1962 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1963 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1964 PetscFunctionReturn(0); 1965 } 1966 1967 #undef __FUNCT__ 1968 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11" 1969 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1970 { 1971 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1972 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1973 const PetscScalar *x,*v; 1974 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1975 PetscErrorCode ierr; 1976 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1977 PetscInt n,i,jrow,j; 1978 1979 PetscFunctionBegin; 1980 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1981 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1982 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1983 idx = a->j; 1984 v = a->a; 1985 ii = a->i; 1986 1987 for (i=0; i<m; i++) { 1988 jrow = ii[i]; 1989 n = ii[i+1] - jrow; 1990 sum1 = 0.0; 1991 sum2 = 0.0; 1992 sum3 = 0.0; 1993 sum4 = 0.0; 1994 sum5 = 0.0; 1995 sum6 = 0.0; 1996 sum7 = 0.0; 1997 sum8 = 0.0; 1998 sum9 = 0.0; 1999 sum10 = 0.0; 2000 sum11 = 0.0; 2001 for (j=0; j<n; j++) { 2002 sum1 += v[jrow]*x[11*idx[jrow]]; 2003 sum2 += v[jrow]*x[11*idx[jrow]+1]; 2004 sum3 += v[jrow]*x[11*idx[jrow]+2]; 2005 sum4 += v[jrow]*x[11*idx[jrow]+3]; 2006 sum5 += v[jrow]*x[11*idx[jrow]+4]; 2007 sum6 += v[jrow]*x[11*idx[jrow]+5]; 2008 sum7 += v[jrow]*x[11*idx[jrow]+6]; 2009 sum8 += v[jrow]*x[11*idx[jrow]+7]; 2010 sum9 += v[jrow]*x[11*idx[jrow]+8]; 2011 sum10 += v[jrow]*x[11*idx[jrow]+9]; 2012 sum11 += v[jrow]*x[11*idx[jrow]+10]; 2013 jrow++; 2014 } 2015 y[11*i] += sum1; 2016 y[11*i+1] += sum2; 2017 y[11*i+2] += sum3; 2018 y[11*i+3] += sum4; 2019 y[11*i+4] += sum5; 2020 y[11*i+5] += sum6; 2021 y[11*i+6] += sum7; 2022 y[11*i+7] += sum8; 2023 y[11*i+8] += sum9; 2024 y[11*i+9] += sum10; 2025 y[11*i+10] += sum11; 2026 } 2027 2028 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2029 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2030 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2031 PetscFunctionReturn(0); 2032 } 2033 2034 #undef __FUNCT__ 2035 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11" 2036 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 2037 { 2038 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2039 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2040 const PetscScalar *x,*v; 2041 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2042 PetscErrorCode ierr; 2043 const PetscInt m = b->AIJ->rmap->n,*idx; 2044 PetscInt n,i; 2045 2046 PetscFunctionBegin; 2047 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2048 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2049 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2050 2051 for (i=0; i<m; i++) { 2052 idx = a->j + a->i[i] ; 2053 v = a->a + a->i[i] ; 2054 n = a->i[i+1] - a->i[i]; 2055 alpha1 = x[11*i]; 2056 alpha2 = x[11*i+1]; 2057 alpha3 = x[11*i+2]; 2058 alpha4 = x[11*i+3]; 2059 alpha5 = x[11*i+4]; 2060 alpha6 = x[11*i+5]; 2061 alpha7 = x[11*i+6]; 2062 alpha8 = x[11*i+7]; 2063 alpha9 = x[11*i+8]; 2064 alpha10 = x[11*i+9]; 2065 alpha11 = x[11*i+10]; 2066 while (n-->0) { 2067 y[11*(*idx)] += alpha1*(*v); 2068 y[11*(*idx)+1] += alpha2*(*v); 2069 y[11*(*idx)+2] += alpha3*(*v); 2070 y[11*(*idx)+3] += alpha4*(*v); 2071 y[11*(*idx)+4] += alpha5*(*v); 2072 y[11*(*idx)+5] += alpha6*(*v); 2073 y[11*(*idx)+6] += alpha7*(*v); 2074 y[11*(*idx)+7] += alpha8*(*v); 2075 y[11*(*idx)+8] += alpha9*(*v); 2076 y[11*(*idx)+9] += alpha10*(*v); 2077 y[11*(*idx)+10] += alpha11*(*v); 2078 idx++; v++; 2079 } 2080 } 2081 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2082 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2083 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2084 PetscFunctionReturn(0); 2085 } 2086 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11" 2089 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2090 { 2091 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2092 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2093 const PetscScalar *x,*v; 2094 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2095 PetscErrorCode ierr; 2096 const PetscInt m = b->AIJ->rmap->n,*idx; 2097 PetscInt n,i; 2098 2099 PetscFunctionBegin; 2100 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2101 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2102 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2103 for (i=0; i<m; i++) { 2104 idx = a->j + a->i[i] ; 2105 v = a->a + a->i[i] ; 2106 n = a->i[i+1] - a->i[i]; 2107 alpha1 = x[11*i]; 2108 alpha2 = x[11*i+1]; 2109 alpha3 = x[11*i+2]; 2110 alpha4 = x[11*i+3]; 2111 alpha5 = x[11*i+4]; 2112 alpha6 = x[11*i+5]; 2113 alpha7 = x[11*i+6]; 2114 alpha8 = x[11*i+7]; 2115 alpha9 = x[11*i+8]; 2116 alpha10 = x[11*i+9]; 2117 alpha11 = x[11*i+10]; 2118 while (n-->0) { 2119 y[11*(*idx)] += alpha1*(*v); 2120 y[11*(*idx)+1] += alpha2*(*v); 2121 y[11*(*idx)+2] += alpha3*(*v); 2122 y[11*(*idx)+3] += alpha4*(*v); 2123 y[11*(*idx)+4] += alpha5*(*v); 2124 y[11*(*idx)+5] += alpha6*(*v); 2125 y[11*(*idx)+6] += alpha7*(*v); 2126 y[11*(*idx)+7] += alpha8*(*v); 2127 y[11*(*idx)+8] += alpha9*(*v); 2128 y[11*(*idx)+9] += alpha10*(*v); 2129 y[11*(*idx)+10] += alpha11*(*v); 2130 idx++; v++; 2131 } 2132 } 2133 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2134 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2135 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2136 PetscFunctionReturn(0); 2137 } 2138 2139 2140 /*--------------------------------------------------------------------------------------------*/ 2141 #undef __FUNCT__ 2142 #define __FUNCT__ "MatMult_SeqMAIJ_16" 2143 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2144 { 2145 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2147 const PetscScalar *x,*v; 2148 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2149 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2150 PetscErrorCode ierr; 2151 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2152 PetscInt nonzerorow=0,n,i,jrow,j; 2153 2154 PetscFunctionBegin; 2155 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2156 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2157 idx = a->j; 2158 v = a->a; 2159 ii = a->i; 2160 2161 for (i=0; i<m; i++) { 2162 jrow = ii[i]; 2163 n = ii[i+1] - jrow; 2164 sum1 = 0.0; 2165 sum2 = 0.0; 2166 sum3 = 0.0; 2167 sum4 = 0.0; 2168 sum5 = 0.0; 2169 sum6 = 0.0; 2170 sum7 = 0.0; 2171 sum8 = 0.0; 2172 sum9 = 0.0; 2173 sum10 = 0.0; 2174 sum11 = 0.0; 2175 sum12 = 0.0; 2176 sum13 = 0.0; 2177 sum14 = 0.0; 2178 sum15 = 0.0; 2179 sum16 = 0.0; 2180 nonzerorow += (n>0); 2181 for (j=0; j<n; j++) { 2182 sum1 += v[jrow]*x[16*idx[jrow]]; 2183 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2184 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2185 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2186 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2187 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2188 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2189 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2190 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2191 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2192 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2193 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2194 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2195 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2196 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2197 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2198 jrow++; 2199 } 2200 y[16*i] = sum1; 2201 y[16*i+1] = sum2; 2202 y[16*i+2] = sum3; 2203 y[16*i+3] = sum4; 2204 y[16*i+4] = sum5; 2205 y[16*i+5] = sum6; 2206 y[16*i+6] = sum7; 2207 y[16*i+7] = sum8; 2208 y[16*i+8] = sum9; 2209 y[16*i+9] = sum10; 2210 y[16*i+10] = sum11; 2211 y[16*i+11] = sum12; 2212 y[16*i+12] = sum13; 2213 y[16*i+13] = sum14; 2214 y[16*i+14] = sum15; 2215 y[16*i+15] = sum16; 2216 } 2217 2218 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2219 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2220 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2221 PetscFunctionReturn(0); 2222 } 2223 2224 #undef __FUNCT__ 2225 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 2226 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2227 { 2228 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2229 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2230 const PetscScalar *x,*v; 2231 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2232 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2233 PetscErrorCode ierr; 2234 const PetscInt m = b->AIJ->rmap->n,*idx; 2235 PetscInt n,i; 2236 2237 PetscFunctionBegin; 2238 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2239 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2240 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2241 2242 for (i=0; i<m; i++) { 2243 idx = a->j + a->i[i] ; 2244 v = a->a + a->i[i] ; 2245 n = a->i[i+1] - a->i[i]; 2246 alpha1 = x[16*i]; 2247 alpha2 = x[16*i+1]; 2248 alpha3 = x[16*i+2]; 2249 alpha4 = x[16*i+3]; 2250 alpha5 = x[16*i+4]; 2251 alpha6 = x[16*i+5]; 2252 alpha7 = x[16*i+6]; 2253 alpha8 = x[16*i+7]; 2254 alpha9 = x[16*i+8]; 2255 alpha10 = x[16*i+9]; 2256 alpha11 = x[16*i+10]; 2257 alpha12 = x[16*i+11]; 2258 alpha13 = x[16*i+12]; 2259 alpha14 = x[16*i+13]; 2260 alpha15 = x[16*i+14]; 2261 alpha16 = x[16*i+15]; 2262 while (n-->0) { 2263 y[16*(*idx)] += alpha1*(*v); 2264 y[16*(*idx)+1] += alpha2*(*v); 2265 y[16*(*idx)+2] += alpha3*(*v); 2266 y[16*(*idx)+3] += alpha4*(*v); 2267 y[16*(*idx)+4] += alpha5*(*v); 2268 y[16*(*idx)+5] += alpha6*(*v); 2269 y[16*(*idx)+6] += alpha7*(*v); 2270 y[16*(*idx)+7] += alpha8*(*v); 2271 y[16*(*idx)+8] += alpha9*(*v); 2272 y[16*(*idx)+9] += alpha10*(*v); 2273 y[16*(*idx)+10] += alpha11*(*v); 2274 y[16*(*idx)+11] += alpha12*(*v); 2275 y[16*(*idx)+12] += alpha13*(*v); 2276 y[16*(*idx)+13] += alpha14*(*v); 2277 y[16*(*idx)+14] += alpha15*(*v); 2278 y[16*(*idx)+15] += alpha16*(*v); 2279 idx++; v++; 2280 } 2281 } 2282 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2283 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2284 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2285 PetscFunctionReturn(0); 2286 } 2287 2288 #undef __FUNCT__ 2289 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 2290 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2291 { 2292 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2293 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2294 const PetscScalar *x,*v; 2295 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2296 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2297 PetscErrorCode ierr; 2298 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2299 PetscInt n,i,jrow,j; 2300 2301 PetscFunctionBegin; 2302 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2303 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2304 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2305 idx = a->j; 2306 v = a->a; 2307 ii = a->i; 2308 2309 for (i=0; i<m; i++) { 2310 jrow = ii[i]; 2311 n = ii[i+1] - jrow; 2312 sum1 = 0.0; 2313 sum2 = 0.0; 2314 sum3 = 0.0; 2315 sum4 = 0.0; 2316 sum5 = 0.0; 2317 sum6 = 0.0; 2318 sum7 = 0.0; 2319 sum8 = 0.0; 2320 sum9 = 0.0; 2321 sum10 = 0.0; 2322 sum11 = 0.0; 2323 sum12 = 0.0; 2324 sum13 = 0.0; 2325 sum14 = 0.0; 2326 sum15 = 0.0; 2327 sum16 = 0.0; 2328 for (j=0; j<n; j++) { 2329 sum1 += v[jrow]*x[16*idx[jrow]]; 2330 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2331 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2332 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2333 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2334 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2335 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2336 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2337 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2338 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2339 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2340 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2341 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2342 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2343 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2344 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2345 jrow++; 2346 } 2347 y[16*i] += sum1; 2348 y[16*i+1] += sum2; 2349 y[16*i+2] += sum3; 2350 y[16*i+3] += sum4; 2351 y[16*i+4] += sum5; 2352 y[16*i+5] += sum6; 2353 y[16*i+6] += sum7; 2354 y[16*i+7] += sum8; 2355 y[16*i+8] += sum9; 2356 y[16*i+9] += sum10; 2357 y[16*i+10] += sum11; 2358 y[16*i+11] += sum12; 2359 y[16*i+12] += sum13; 2360 y[16*i+13] += sum14; 2361 y[16*i+14] += sum15; 2362 y[16*i+15] += sum16; 2363 } 2364 2365 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2366 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2367 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2368 PetscFunctionReturn(0); 2369 } 2370 2371 #undef __FUNCT__ 2372 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2373 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2374 { 2375 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2376 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2377 const PetscScalar *x,*v; 2378 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2379 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2380 PetscErrorCode ierr; 2381 const PetscInt m = b->AIJ->rmap->n,*idx; 2382 PetscInt n,i; 2383 2384 PetscFunctionBegin; 2385 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2386 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2387 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2388 for (i=0; i<m; i++) { 2389 idx = a->j + a->i[i] ; 2390 v = a->a + a->i[i] ; 2391 n = a->i[i+1] - a->i[i]; 2392 alpha1 = x[16*i]; 2393 alpha2 = x[16*i+1]; 2394 alpha3 = x[16*i+2]; 2395 alpha4 = x[16*i+3]; 2396 alpha5 = x[16*i+4]; 2397 alpha6 = x[16*i+5]; 2398 alpha7 = x[16*i+6]; 2399 alpha8 = x[16*i+7]; 2400 alpha9 = x[16*i+8]; 2401 alpha10 = x[16*i+9]; 2402 alpha11 = x[16*i+10]; 2403 alpha12 = x[16*i+11]; 2404 alpha13 = x[16*i+12]; 2405 alpha14 = x[16*i+13]; 2406 alpha15 = x[16*i+14]; 2407 alpha16 = x[16*i+15]; 2408 while (n-->0) { 2409 y[16*(*idx)] += alpha1*(*v); 2410 y[16*(*idx)+1] += alpha2*(*v); 2411 y[16*(*idx)+2] += alpha3*(*v); 2412 y[16*(*idx)+3] += alpha4*(*v); 2413 y[16*(*idx)+4] += alpha5*(*v); 2414 y[16*(*idx)+5] += alpha6*(*v); 2415 y[16*(*idx)+6] += alpha7*(*v); 2416 y[16*(*idx)+7] += alpha8*(*v); 2417 y[16*(*idx)+8] += alpha9*(*v); 2418 y[16*(*idx)+9] += alpha10*(*v); 2419 y[16*(*idx)+10] += alpha11*(*v); 2420 y[16*(*idx)+11] += alpha12*(*v); 2421 y[16*(*idx)+12] += alpha13*(*v); 2422 y[16*(*idx)+13] += alpha14*(*v); 2423 y[16*(*idx)+14] += alpha15*(*v); 2424 y[16*(*idx)+15] += alpha16*(*v); 2425 idx++; v++; 2426 } 2427 } 2428 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2429 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2430 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2431 PetscFunctionReturn(0); 2432 } 2433 2434 /*--------------------------------------------------------------------------------------------*/ 2435 #undef __FUNCT__ 2436 #define __FUNCT__ "MatMult_SeqMAIJ_18" 2437 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2438 { 2439 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2440 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2441 const PetscScalar *x,*v; 2442 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2443 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2444 PetscErrorCode ierr; 2445 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2446 PetscInt nonzerorow=0,n,i,jrow,j; 2447 2448 PetscFunctionBegin; 2449 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2450 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2451 idx = a->j; 2452 v = a->a; 2453 ii = a->i; 2454 2455 for (i=0; i<m; i++) { 2456 jrow = ii[i]; 2457 n = ii[i+1] - jrow; 2458 sum1 = 0.0; 2459 sum2 = 0.0; 2460 sum3 = 0.0; 2461 sum4 = 0.0; 2462 sum5 = 0.0; 2463 sum6 = 0.0; 2464 sum7 = 0.0; 2465 sum8 = 0.0; 2466 sum9 = 0.0; 2467 sum10 = 0.0; 2468 sum11 = 0.0; 2469 sum12 = 0.0; 2470 sum13 = 0.0; 2471 sum14 = 0.0; 2472 sum15 = 0.0; 2473 sum16 = 0.0; 2474 sum17 = 0.0; 2475 sum18 = 0.0; 2476 nonzerorow += (n>0); 2477 for (j=0; j<n; j++) { 2478 sum1 += v[jrow]*x[18*idx[jrow]]; 2479 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2480 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2481 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2482 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2483 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2484 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2485 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2486 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2487 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2488 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2489 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2490 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2491 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2492 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2493 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2494 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2495 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2496 jrow++; 2497 } 2498 y[18*i] = sum1; 2499 y[18*i+1] = sum2; 2500 y[18*i+2] = sum3; 2501 y[18*i+3] = sum4; 2502 y[18*i+4] = sum5; 2503 y[18*i+5] = sum6; 2504 y[18*i+6] = sum7; 2505 y[18*i+7] = sum8; 2506 y[18*i+8] = sum9; 2507 y[18*i+9] = sum10; 2508 y[18*i+10] = sum11; 2509 y[18*i+11] = sum12; 2510 y[18*i+12] = sum13; 2511 y[18*i+13] = sum14; 2512 y[18*i+14] = sum15; 2513 y[18*i+15] = sum16; 2514 y[18*i+16] = sum17; 2515 y[18*i+17] = sum18; 2516 } 2517 2518 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2519 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2520 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2521 PetscFunctionReturn(0); 2522 } 2523 2524 #undef __FUNCT__ 2525 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2526 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2527 { 2528 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2529 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2530 const PetscScalar *x,*v; 2531 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2532 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2533 PetscErrorCode ierr; 2534 const PetscInt m = b->AIJ->rmap->n,*idx; 2535 PetscInt n,i; 2536 2537 PetscFunctionBegin; 2538 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2539 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2540 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2541 2542 for (i=0; i<m; i++) { 2543 idx = a->j + a->i[i] ; 2544 v = a->a + a->i[i] ; 2545 n = a->i[i+1] - a->i[i]; 2546 alpha1 = x[18*i]; 2547 alpha2 = x[18*i+1]; 2548 alpha3 = x[18*i+2]; 2549 alpha4 = x[18*i+3]; 2550 alpha5 = x[18*i+4]; 2551 alpha6 = x[18*i+5]; 2552 alpha7 = x[18*i+6]; 2553 alpha8 = x[18*i+7]; 2554 alpha9 = x[18*i+8]; 2555 alpha10 = x[18*i+9]; 2556 alpha11 = x[18*i+10]; 2557 alpha12 = x[18*i+11]; 2558 alpha13 = x[18*i+12]; 2559 alpha14 = x[18*i+13]; 2560 alpha15 = x[18*i+14]; 2561 alpha16 = x[18*i+15]; 2562 alpha17 = x[18*i+16]; 2563 alpha18 = x[18*i+17]; 2564 while (n-->0) { 2565 y[18*(*idx)] += alpha1*(*v); 2566 y[18*(*idx)+1] += alpha2*(*v); 2567 y[18*(*idx)+2] += alpha3*(*v); 2568 y[18*(*idx)+3] += alpha4*(*v); 2569 y[18*(*idx)+4] += alpha5*(*v); 2570 y[18*(*idx)+5] += alpha6*(*v); 2571 y[18*(*idx)+6] += alpha7*(*v); 2572 y[18*(*idx)+7] += alpha8*(*v); 2573 y[18*(*idx)+8] += alpha9*(*v); 2574 y[18*(*idx)+9] += alpha10*(*v); 2575 y[18*(*idx)+10] += alpha11*(*v); 2576 y[18*(*idx)+11] += alpha12*(*v); 2577 y[18*(*idx)+12] += alpha13*(*v); 2578 y[18*(*idx)+13] += alpha14*(*v); 2579 y[18*(*idx)+14] += alpha15*(*v); 2580 y[18*(*idx)+15] += alpha16*(*v); 2581 y[18*(*idx)+16] += alpha17*(*v); 2582 y[18*(*idx)+17] += alpha18*(*v); 2583 idx++; v++; 2584 } 2585 } 2586 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2587 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2588 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2589 PetscFunctionReturn(0); 2590 } 2591 2592 #undef __FUNCT__ 2593 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2594 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2595 { 2596 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2597 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2598 const PetscScalar *x,*v; 2599 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2600 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2601 PetscErrorCode ierr; 2602 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2603 PetscInt n,i,jrow,j; 2604 2605 PetscFunctionBegin; 2606 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2607 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2608 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2609 idx = a->j; 2610 v = a->a; 2611 ii = a->i; 2612 2613 for (i=0; i<m; i++) { 2614 jrow = ii[i]; 2615 n = ii[i+1] - jrow; 2616 sum1 = 0.0; 2617 sum2 = 0.0; 2618 sum3 = 0.0; 2619 sum4 = 0.0; 2620 sum5 = 0.0; 2621 sum6 = 0.0; 2622 sum7 = 0.0; 2623 sum8 = 0.0; 2624 sum9 = 0.0; 2625 sum10 = 0.0; 2626 sum11 = 0.0; 2627 sum12 = 0.0; 2628 sum13 = 0.0; 2629 sum14 = 0.0; 2630 sum15 = 0.0; 2631 sum16 = 0.0; 2632 sum17 = 0.0; 2633 sum18 = 0.0; 2634 for (j=0; j<n; j++) { 2635 sum1 += v[jrow]*x[18*idx[jrow]]; 2636 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2637 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2638 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2639 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2640 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2641 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2642 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2643 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2644 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2645 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2646 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2647 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2648 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2649 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2650 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2651 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2652 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2653 jrow++; 2654 } 2655 y[18*i] += sum1; 2656 y[18*i+1] += sum2; 2657 y[18*i+2] += sum3; 2658 y[18*i+3] += sum4; 2659 y[18*i+4] += sum5; 2660 y[18*i+5] += sum6; 2661 y[18*i+6] += sum7; 2662 y[18*i+7] += sum8; 2663 y[18*i+8] += sum9; 2664 y[18*i+9] += sum10; 2665 y[18*i+10] += sum11; 2666 y[18*i+11] += sum12; 2667 y[18*i+12] += sum13; 2668 y[18*i+13] += sum14; 2669 y[18*i+14] += sum15; 2670 y[18*i+15] += sum16; 2671 y[18*i+16] += sum17; 2672 y[18*i+17] += sum18; 2673 } 2674 2675 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2676 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2677 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2678 PetscFunctionReturn(0); 2679 } 2680 2681 #undef __FUNCT__ 2682 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2683 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2684 { 2685 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2686 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2687 const PetscScalar *x,*v; 2688 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2689 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2690 PetscErrorCode ierr; 2691 const PetscInt m = b->AIJ->rmap->n,*idx; 2692 PetscInt n,i; 2693 2694 PetscFunctionBegin; 2695 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2696 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2697 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2698 for (i=0; i<m; i++) { 2699 idx = a->j + a->i[i] ; 2700 v = a->a + a->i[i] ; 2701 n = a->i[i+1] - a->i[i]; 2702 alpha1 = x[18*i]; 2703 alpha2 = x[18*i+1]; 2704 alpha3 = x[18*i+2]; 2705 alpha4 = x[18*i+3]; 2706 alpha5 = x[18*i+4]; 2707 alpha6 = x[18*i+5]; 2708 alpha7 = x[18*i+6]; 2709 alpha8 = x[18*i+7]; 2710 alpha9 = x[18*i+8]; 2711 alpha10 = x[18*i+9]; 2712 alpha11 = x[18*i+10]; 2713 alpha12 = x[18*i+11]; 2714 alpha13 = x[18*i+12]; 2715 alpha14 = x[18*i+13]; 2716 alpha15 = x[18*i+14]; 2717 alpha16 = x[18*i+15]; 2718 alpha17 = x[18*i+16]; 2719 alpha18 = x[18*i+17]; 2720 while (n-->0) { 2721 y[18*(*idx)] += alpha1*(*v); 2722 y[18*(*idx)+1] += alpha2*(*v); 2723 y[18*(*idx)+2] += alpha3*(*v); 2724 y[18*(*idx)+3] += alpha4*(*v); 2725 y[18*(*idx)+4] += alpha5*(*v); 2726 y[18*(*idx)+5] += alpha6*(*v); 2727 y[18*(*idx)+6] += alpha7*(*v); 2728 y[18*(*idx)+7] += alpha8*(*v); 2729 y[18*(*idx)+8] += alpha9*(*v); 2730 y[18*(*idx)+9] += alpha10*(*v); 2731 y[18*(*idx)+10] += alpha11*(*v); 2732 y[18*(*idx)+11] += alpha12*(*v); 2733 y[18*(*idx)+12] += alpha13*(*v); 2734 y[18*(*idx)+13] += alpha14*(*v); 2735 y[18*(*idx)+14] += alpha15*(*v); 2736 y[18*(*idx)+15] += alpha16*(*v); 2737 y[18*(*idx)+16] += alpha17*(*v); 2738 y[18*(*idx)+17] += alpha18*(*v); 2739 idx++; v++; 2740 } 2741 } 2742 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2743 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2744 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2745 PetscFunctionReturn(0); 2746 } 2747 2748 #undef __FUNCT__ 2749 #define __FUNCT__ "MatMult_SeqMAIJ_N" 2750 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2751 { 2752 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2753 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2754 const PetscScalar *x,*v; 2755 PetscScalar *y,*sums; 2756 PetscErrorCode ierr; 2757 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2758 PetscInt n,i,jrow,j,dof = b->dof,k; 2759 2760 PetscFunctionBegin; 2761 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2762 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2763 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2764 idx = a->j; 2765 v = a->a; 2766 ii = a->i; 2767 2768 for (i=0; i<m; i++) { 2769 jrow = ii[i]; 2770 n = ii[i+1] - jrow; 2771 sums = y + dof*i; 2772 for (j=0; j<n; j++) { 2773 for (k=0; k<dof; k++) { 2774 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2775 } 2776 jrow++; 2777 } 2778 } 2779 2780 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2781 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2782 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2783 PetscFunctionReturn(0); 2784 } 2785 2786 #undef __FUNCT__ 2787 #define __FUNCT__ "MatMultAdd_SeqMAIJ_N" 2788 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2789 { 2790 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2791 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2792 const PetscScalar *x,*v; 2793 PetscScalar *y,*sums; 2794 PetscErrorCode ierr; 2795 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2796 PetscInt n,i,jrow,j,dof = b->dof,k; 2797 2798 PetscFunctionBegin; 2799 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2800 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2801 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2802 idx = a->j; 2803 v = a->a; 2804 ii = a->i; 2805 2806 for (i=0; i<m; i++) { 2807 jrow = ii[i]; 2808 n = ii[i+1] - jrow; 2809 sums = y + dof*i; 2810 for (j=0; j<n; j++) { 2811 for (k=0; k<dof; k++) { 2812 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2813 } 2814 jrow++; 2815 } 2816 } 2817 2818 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2819 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2820 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2821 PetscFunctionReturn(0); 2822 } 2823 2824 #undef __FUNCT__ 2825 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N" 2826 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2827 { 2828 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2829 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2830 const PetscScalar *x,*v,*alpha; 2831 PetscScalar *y; 2832 PetscErrorCode ierr; 2833 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2834 PetscInt n,i,k; 2835 2836 PetscFunctionBegin; 2837 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2838 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2839 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2840 for (i=0; i<m; i++) { 2841 idx = a->j + a->i[i] ; 2842 v = a->a + a->i[i] ; 2843 n = a->i[i+1] - a->i[i]; 2844 alpha = x + dof*i; 2845 while (n-->0) { 2846 for (k=0; k<dof; k++) { 2847 y[dof*(*idx)+k] += alpha[k]*(*v); 2848 } 2849 idx++; v++; 2850 } 2851 } 2852 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2853 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2854 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2855 PetscFunctionReturn(0); 2856 } 2857 2858 #undef __FUNCT__ 2859 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N" 2860 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2861 { 2862 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2863 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2864 const PetscScalar *x,*v,*alpha; 2865 PetscScalar *y; 2866 PetscErrorCode ierr; 2867 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2868 PetscInt n,i,k; 2869 2870 PetscFunctionBegin; 2871 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2872 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2873 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2874 for (i=0; i<m; i++) { 2875 idx = a->j + a->i[i] ; 2876 v = a->a + a->i[i] ; 2877 n = a->i[i+1] - a->i[i]; 2878 alpha = x + dof*i; 2879 while (n-->0) { 2880 for (k=0; k<dof; k++) { 2881 y[dof*(*idx)+k] += alpha[k]*(*v); 2882 } 2883 idx++; v++; 2884 } 2885 } 2886 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2887 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2888 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2889 PetscFunctionReturn(0); 2890 } 2891 2892 /*===================================================================================*/ 2893 #undef __FUNCT__ 2894 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2895 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2896 { 2897 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2898 PetscErrorCode ierr; 2899 2900 PetscFunctionBegin; 2901 /* start the scatter */ 2902 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2903 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2904 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2905 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2906 PetscFunctionReturn(0); 2907 } 2908 2909 #undef __FUNCT__ 2910 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2911 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2912 { 2913 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2914 PetscErrorCode ierr; 2915 2916 PetscFunctionBegin; 2917 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2918 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2919 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2920 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2921 PetscFunctionReturn(0); 2922 } 2923 2924 #undef __FUNCT__ 2925 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2926 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2927 { 2928 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2929 PetscErrorCode ierr; 2930 2931 PetscFunctionBegin; 2932 /* start the scatter */ 2933 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2934 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2935 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2936 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2937 PetscFunctionReturn(0); 2938 } 2939 2940 #undef __FUNCT__ 2941 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2942 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2943 { 2944 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2945 PetscErrorCode ierr; 2946 2947 PetscFunctionBegin; 2948 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2949 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2950 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2951 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2952 PetscFunctionReturn(0); 2953 } 2954 2955 /* ----------------------------------------------------------------*/ 2956 #undef __FUNCT__ 2957 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2958 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2959 { 2960 /* This routine requires testing -- but it's getting better. */ 2961 PetscErrorCode ierr; 2962 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2963 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2964 Mat P=pp->AIJ; 2965 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2966 PetscInt *pti,*ptj,*ptJ; 2967 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2968 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2969 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 2970 MatScalar *ca; 2971 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 2972 2973 PetscFunctionBegin; 2974 /* Start timer */ 2975 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2976 2977 /* Get ij structure of P^T */ 2978 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2979 2980 cn = pn*ppdof; 2981 /* Allocate ci array, arrays for fill computation and */ 2982 /* free space for accumulating nonzero column info */ 2983 ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2984 ci[0] = 0; 2985 2986 /* Work arrays for rows of P^T*A */ 2987 ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr); 2988 ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 2989 ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr); 2990 2991 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2992 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2993 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2994 ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 2995 current_space = free_space; 2996 2997 /* Determine symbolic info for each row of C: */ 2998 for (i=0;i<pn;i++) { 2999 ptnzi = pti[i+1] - pti[i]; 3000 ptJ = ptj + pti[i]; 3001 for (dof=0;dof<ppdof;dof++) { 3002 ptanzi = 0; 3003 /* Determine symbolic row of PtA: */ 3004 for (j=0;j<ptnzi;j++) { 3005 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 3006 arow = ptJ[j]*ppdof + dof; 3007 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 3008 anzj = ai[arow+1] - ai[arow]; 3009 ajj = aj + ai[arow]; 3010 for (k=0;k<anzj;k++) { 3011 if (!ptadenserow[ajj[k]]) { 3012 ptadenserow[ajj[k]] = -1; 3013 ptasparserow[ptanzi++] = ajj[k]; 3014 } 3015 } 3016 } 3017 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 3018 ptaj = ptasparserow; 3019 cnzi = 0; 3020 for (j=0;j<ptanzi;j++) { 3021 /* Get offset within block of P */ 3022 pshift = *ptaj%ppdof; 3023 /* Get block row of P */ 3024 prow = (*ptaj++)/ppdof; /* integer division */ 3025 /* P has same number of nonzeros per row as the compressed form */ 3026 pnzj = pi[prow+1] - pi[prow]; 3027 pjj = pj + pi[prow]; 3028 for (k=0;k<pnzj;k++) { 3029 /* Locations in C are shifted by the offset within the block */ 3030 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 3031 if (!denserow[pjj[k]*ppdof+pshift]) { 3032 denserow[pjj[k]*ppdof+pshift] = -1; 3033 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 3034 } 3035 } 3036 } 3037 3038 /* sort sparserow */ 3039 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 3040 3041 /* If free space is not available, make more free space */ 3042 /* Double the amount of total space in the list */ 3043 if (current_space->local_remaining<cnzi) { 3044 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3045 } 3046 3047 /* Copy data into free space, and zero out denserows */ 3048 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 3049 current_space->array += cnzi; 3050 current_space->local_used += cnzi; 3051 current_space->local_remaining -= cnzi; 3052 3053 for (j=0;j<ptanzi;j++) { 3054 ptadenserow[ptasparserow[j]] = 0; 3055 } 3056 for (j=0;j<cnzi;j++) { 3057 denserow[sparserow[j]] = 0; 3058 } 3059 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 3060 /* For now, we will recompute what is needed. */ 3061 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 3062 } 3063 } 3064 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 3065 /* Allocate space for cj, initialize cj, and */ 3066 /* destroy list of free space and other temporary array(s) */ 3067 ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3068 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 3069 ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 3070 3071 /* Allocate space for ca */ 3072 ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 3073 ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 3074 3075 /* put together the new matrix */ 3076 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 3077 3078 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3079 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3080 c = (Mat_SeqAIJ *)((*C)->data); 3081 c->free_a = PETSC_TRUE; 3082 c->free_ij = PETSC_TRUE; 3083 c->nonew = 0; 3084 3085 /* Clean up. */ 3086 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 3087 3088 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 3089 PetscFunctionReturn(0); 3090 } 3091 3092 #undef __FUNCT__ 3093 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 3094 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 3095 { 3096 /* This routine requires testing -- first draft only */ 3097 PetscErrorCode ierr; 3098 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 3099 Mat P=pp->AIJ; 3100 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 3101 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 3102 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 3103 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 3104 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3105 const PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3106 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3107 const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj; 3108 MatScalar *ca=c->a,*caj,*apa; 3109 3110 PetscFunctionBegin; 3111 /* Allocate temporary array for storage of one row of A*P */ 3112 ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr); 3113 ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr); 3114 ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr); 3115 ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr); 3116 3117 /* Clear old values in C */ 3118 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 3119 3120 for (i=0;i<am;i++) { 3121 /* Form sparse row of A*P */ 3122 anzi = ai[i+1] - ai[i]; 3123 apnzj = 0; 3124 for (j=0;j<anzi;j++) { 3125 /* Get offset within block of P */ 3126 pshift = *aj%ppdof; 3127 /* Get block row of P */ 3128 prow = *aj++/ppdof; /* integer division */ 3129 pnzj = pi[prow+1] - pi[prow]; 3130 pjj = pj + pi[prow]; 3131 paj = pa + pi[prow]; 3132 for (k=0;k<pnzj;k++) { 3133 poffset = pjj[k]*ppdof+pshift; 3134 if (!apjdense[poffset]) { 3135 apjdense[poffset] = -1; 3136 apj[apnzj++] = poffset; 3137 } 3138 apa[poffset] += (*aa)*paj[k]; 3139 } 3140 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 3141 aa++; 3142 } 3143 3144 /* Sort the j index array for quick sparse axpy. */ 3145 /* Note: a array does not need sorting as it is in dense storage locations. */ 3146 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 3147 3148 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3149 prow = i/ppdof; /* integer division */ 3150 pshift = i%ppdof; 3151 poffset = pi[prow]; 3152 pnzi = pi[prow+1] - poffset; 3153 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3154 pJ = pj+poffset; 3155 pA = pa+poffset; 3156 for (j=0;j<pnzi;j++) { 3157 crow = (*pJ)*ppdof+pshift; 3158 cjj = cj + ci[crow]; 3159 caj = ca + ci[crow]; 3160 pJ++; 3161 /* Perform sparse axpy operation. Note cjj includes apj. */ 3162 for (k=0,nextap=0;nextap<apnzj;k++) { 3163 if (cjj[k]==apj[nextap]) { 3164 caj[k] += (*pA)*apa[apj[nextap++]]; 3165 } 3166 } 3167 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 3168 pA++; 3169 } 3170 3171 /* Zero the current row info for A*P */ 3172 for (j=0;j<apnzj;j++) { 3173 apa[apj[j]] = 0.; 3174 apjdense[apj[j]] = 0; 3175 } 3176 } 3177 3178 /* Assemble the final matrix and clean up */ 3179 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3180 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3181 ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 3182 PetscFunctionReturn(0); 3183 } 3184 3185 #undef __FUNCT__ 3186 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 3187 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3188 { 3189 PetscErrorCode ierr; 3190 3191 PetscFunctionBegin; 3192 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 3193 ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 3194 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 3195 PetscFunctionReturn(0); 3196 } 3197 3198 #undef __FUNCT__ 3199 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 3200 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3201 { 3202 PetscFunctionBegin; 3203 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3204 PetscFunctionReturn(0); 3205 } 3206 3207 EXTERN_C_BEGIN 3208 #undef __FUNCT__ 3209 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 3210 PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3211 { 3212 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3213 Mat a = b->AIJ,B; 3214 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3215 PetscErrorCode ierr; 3216 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3217 PetscInt *cols; 3218 PetscScalar *vals; 3219 3220 PetscFunctionBegin; 3221 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3222 ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 3223 for (i=0; i<m; i++) { 3224 nmax = PetscMax(nmax,aij->ilen[i]); 3225 for (j=0; j<dof; j++) { 3226 ilen[dof*i+j] = aij->ilen[i]; 3227 } 3228 } 3229 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 3230 ierr = PetscFree(ilen);CHKERRQ(ierr); 3231 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 3232 ii = 0; 3233 for (i=0; i<m; i++) { 3234 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3235 for (j=0; j<dof; j++) { 3236 for (k=0; k<ncols; k++) { 3237 icols[k] = dof*cols[k]+j; 3238 } 3239 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3240 ii++; 3241 } 3242 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3243 } 3244 ierr = PetscFree(icols);CHKERRQ(ierr); 3245 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3246 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3247 3248 if (reuse == MAT_REUSE_MATRIX) { 3249 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3250 } else { 3251 *newmat = B; 3252 } 3253 PetscFunctionReturn(0); 3254 } 3255 EXTERN_C_END 3256 3257 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3258 3259 EXTERN_C_BEGIN 3260 #undef __FUNCT__ 3261 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 3262 PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3263 { 3264 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3265 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3266 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3267 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3268 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3269 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3270 PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 3271 PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 3272 PetscInt rstart,cstart,*garray,ii,k; 3273 PetscErrorCode ierr; 3274 PetscScalar *vals,*ovals; 3275 3276 PetscFunctionBegin; 3277 ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr); 3278 for (i=0; i<A->rmap->n/dof; i++) { 3279 nmax = PetscMax(nmax,AIJ->ilen[i]); 3280 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3281 for (j=0; j<dof; j++) { 3282 dnz[dof*i+j] = AIJ->ilen[i]; 3283 onz[dof*i+j] = OAIJ->ilen[i]; 3284 } 3285 } 3286 ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3287 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3288 3289 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 3290 rstart = dof*maij->A->rmap->rstart; 3291 cstart = dof*maij->A->cmap->rstart; 3292 garray = mpiaij->garray; 3293 3294 ii = rstart; 3295 for (i=0; i<A->rmap->n/dof; i++) { 3296 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3297 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3298 for (j=0; j<dof; j++) { 3299 for (k=0; k<ncols; k++) { 3300 icols[k] = cstart + dof*cols[k]+j; 3301 } 3302 for (k=0; k<oncols; k++) { 3303 oicols[k] = dof*garray[ocols[k]]+j; 3304 } 3305 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3306 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3307 ii++; 3308 } 3309 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3310 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3311 } 3312 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3313 3314 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3315 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3316 3317 if (reuse == MAT_REUSE_MATRIX) { 3318 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3319 ((PetscObject)A)->refct = 1; 3320 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3321 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3322 } else { 3323 *newmat = B; 3324 } 3325 PetscFunctionReturn(0); 3326 } 3327 EXTERN_C_END 3328 3329 #undef __FUNCT__ 3330 #define __FUNCT__ "MatGetSubMatrix_MAIJ" 3331 PetscErrorCode MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 3332 { 3333 PetscErrorCode ierr; 3334 Mat A; 3335 3336 PetscFunctionBegin; 3337 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3338 ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 3339 ierr = MatDestroy(&A);CHKERRQ(ierr); 3340 PetscFunctionReturn(0); 3341 } 3342 3343 /* ---------------------------------------------------------------------------------- */ 3344 #undef __FUNCT__ 3345 #define __FUNCT__ "MatCreateMAIJ" 3346 /*@C 3347 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3348 operations for multicomponent problems. It interpolates each component the same 3349 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3350 and MATMPIAIJ for distributed matrices. 3351 3352 Collective 3353 3354 Input Parameters: 3355 + A - the AIJ matrix describing the action on blocks 3356 - dof - the block size (number of components per node) 3357 3358 Output Parameter: 3359 . maij - the new MAIJ matrix 3360 3361 Operations provided: 3362 + MatMult 3363 . MatMultTranspose 3364 . MatMultAdd 3365 . MatMultTransposeAdd 3366 - MatView 3367 3368 Level: advanced 3369 3370 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3371 @*/ 3372 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3373 { 3374 PetscErrorCode ierr; 3375 PetscMPIInt size; 3376 PetscInt n; 3377 Mat B; 3378 3379 PetscFunctionBegin; 3380 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3381 3382 if (dof == 1) { 3383 *maij = A; 3384 } else { 3385 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3386 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3387 ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3388 ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3389 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3390 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3391 B->assembled = PETSC_TRUE; 3392 3393 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3394 if (size == 1) { 3395 Mat_SeqMAIJ *b; 3396 3397 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3398 B->ops->destroy = MatDestroy_SeqMAIJ; 3399 B->ops->view = MatView_SeqMAIJ; 3400 b = (Mat_SeqMAIJ*)B->data; 3401 b->dof = dof; 3402 b->AIJ = A; 3403 if (dof == 2) { 3404 B->ops->mult = MatMult_SeqMAIJ_2; 3405 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3406 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3407 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3408 } else if (dof == 3) { 3409 B->ops->mult = MatMult_SeqMAIJ_3; 3410 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3411 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3412 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3413 } else if (dof == 4) { 3414 B->ops->mult = MatMult_SeqMAIJ_4; 3415 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3416 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3417 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3418 } else if (dof == 5) { 3419 B->ops->mult = MatMult_SeqMAIJ_5; 3420 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3421 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3422 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3423 } else if (dof == 6) { 3424 B->ops->mult = MatMult_SeqMAIJ_6; 3425 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3426 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3427 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3428 } else if (dof == 7) { 3429 B->ops->mult = MatMult_SeqMAIJ_7; 3430 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3431 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3432 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3433 } else if (dof == 8) { 3434 B->ops->mult = MatMult_SeqMAIJ_8; 3435 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3436 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3437 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3438 } else if (dof == 9) { 3439 B->ops->mult = MatMult_SeqMAIJ_9; 3440 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3441 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3442 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3443 } else if (dof == 10) { 3444 B->ops->mult = MatMult_SeqMAIJ_10; 3445 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3446 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3447 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3448 } else if (dof == 11) { 3449 B->ops->mult = MatMult_SeqMAIJ_11; 3450 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3451 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3452 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3453 } else if (dof == 16) { 3454 B->ops->mult = MatMult_SeqMAIJ_16; 3455 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3456 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3457 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3458 } else if (dof == 18) { 3459 B->ops->mult = MatMult_SeqMAIJ_18; 3460 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3461 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3462 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3463 } else { 3464 B->ops->mult = MatMult_SeqMAIJ_N; 3465 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3466 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3467 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3468 } 3469 B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 3470 B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 3471 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3472 } else { 3473 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3474 Mat_MPIMAIJ *b; 3475 IS from,to; 3476 Vec gvec; 3477 3478 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3479 B->ops->destroy = MatDestroy_MPIMAIJ; 3480 B->ops->view = MatView_MPIMAIJ; 3481 b = (Mat_MPIMAIJ*)B->data; 3482 b->dof = dof; 3483 b->A = A; 3484 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 3485 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 3486 3487 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3488 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 3489 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3490 3491 /* create two temporary Index sets for build scatter gather */ 3492 ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3493 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3494 3495 /* create temporary global vector to generate scatter context */ 3496 ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 3497 ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 3498 3499 /* generate the scatter context */ 3500 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3501 3502 ierr = ISDestroy(&from);CHKERRQ(ierr); 3503 ierr = ISDestroy(&to);CHKERRQ(ierr); 3504 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3505 3506 B->ops->mult = MatMult_MPIMAIJ_dof; 3507 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3508 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3509 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3510 B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ; 3511 B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ; 3512 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3513 } 3514 B->ops->getsubmatrix = MatGetSubMatrix_MAIJ; 3515 *maij = B; 3516 ierr = MatView_Private(B);CHKERRQ(ierr); 3517 } 3518 PetscFunctionReturn(0); 3519 } 3520