1 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20 #include <../src/mat/utils/freespace.h> 21 #include <petsc-private/vecimpl.h> 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatMAIJGetAIJ" 25 /*@ 26 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 27 28 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 29 30 Input Parameter: 31 . A - the MAIJ matrix 32 33 Output Parameter: 34 . B - the AIJ matrix 35 36 Level: advanced 37 38 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 39 40 .seealso: MatCreateMAIJ() 41 @*/ 42 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 43 { 44 PetscErrorCode ierr; 45 PetscBool ismpimaij,isseqmaij; 46 47 PetscFunctionBegin; 48 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 49 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 50 if (ismpimaij) { 51 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 52 53 *B = b->A; 54 } else if (isseqmaij) { 55 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 56 57 *B = b->AIJ; 58 } else { 59 *B = A; 60 } 61 PetscFunctionReturn(0); 62 } 63 64 #undef __FUNCT__ 65 #define __FUNCT__ "MatMAIJRedimension" 66 /*@C 67 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 68 69 Logically Collective 70 71 Input Parameter: 72 + A - the MAIJ matrix 73 - dof - the block size for the new matrix 74 75 Output Parameter: 76 . B - the new MAIJ matrix 77 78 Level: advanced 79 80 .seealso: MatCreateMAIJ() 81 @*/ 82 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 83 { 84 PetscErrorCode ierr; 85 Mat Aij = NULL; 86 87 PetscFunctionBegin; 88 PetscValidLogicalCollectiveInt(A,dof,2); 89 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 90 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatDestroy_SeqMAIJ" 96 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 97 { 98 PetscErrorCode ierr; 99 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 100 101 PetscFunctionBegin; 102 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 103 ierr = PetscFree(A->data);CHKERRQ(ierr); 104 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr); 105 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "MatSetUp_MAIJ" 111 PetscErrorCode MatSetUp_MAIJ(Mat A) 112 { 113 PetscFunctionBegin; 114 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatView_SeqMAIJ" 120 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 121 { 122 PetscErrorCode ierr; 123 Mat B; 124 125 PetscFunctionBegin; 126 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 127 ierr = MatView(B,viewer);CHKERRQ(ierr); 128 ierr = MatDestroy(&B);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatView_MPIMAIJ" 134 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 135 { 136 PetscErrorCode ierr; 137 Mat B; 138 139 PetscFunctionBegin; 140 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 141 ierr = MatView(B,viewer);CHKERRQ(ierr); 142 ierr = MatDestroy(&B);CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 146 #undef __FUNCT__ 147 #define __FUNCT__ "MatDestroy_MPIMAIJ" 148 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 149 { 150 PetscErrorCode ierr; 151 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 152 153 PetscFunctionBegin; 154 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 155 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 156 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 157 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 158 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 159 ierr = PetscFree(A->data);CHKERRQ(ierr); 160 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr); 161 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr); 162 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 163 PetscFunctionReturn(0); 164 } 165 166 /*MC 167 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 168 multicomponent problems, interpolating or restricting each component the same way independently. 169 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 170 171 Operations provided: 172 . MatMult 173 . MatMultTranspose 174 . MatMultAdd 175 . MatMultTransposeAdd 176 177 Level: advanced 178 179 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 180 M*/ 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatCreate_MAIJ" 184 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 185 { 186 PetscErrorCode ierr; 187 Mat_MPIMAIJ *b; 188 PetscMPIInt size; 189 190 PetscFunctionBegin; 191 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 192 A->data = (void*)b; 193 194 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 195 196 A->ops->setup = MatSetUp_MAIJ; 197 198 b->AIJ = 0; 199 b->dof = 0; 200 b->OAIJ = 0; 201 b->ctx = 0; 202 b->w = 0; 203 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 204 if (size == 1) { 205 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 206 } else { 207 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 208 } 209 PetscFunctionReturn(0); 210 } 211 212 /* --------------------------------------------------------------------------------------*/ 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatMult_SeqMAIJ_2" 215 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 216 { 217 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 218 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 219 const PetscScalar *x,*v; 220 PetscScalar *y, sum1, sum2; 221 PetscErrorCode ierr; 222 PetscInt nonzerorow=0,n,i,jrow,j; 223 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 224 225 PetscFunctionBegin; 226 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 227 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 228 idx = a->j; 229 v = a->a; 230 ii = a->i; 231 232 for (i=0; i<m; i++) { 233 jrow = ii[i]; 234 n = ii[i+1] - jrow; 235 sum1 = 0.0; 236 sum2 = 0.0; 237 238 nonzerorow += (n>0); 239 for (j=0; j<n; j++) { 240 sum1 += v[jrow]*x[2*idx[jrow]]; 241 sum2 += v[jrow]*x[2*idx[jrow]+1]; 242 jrow++; 243 } 244 y[2*i] = sum1; 245 y[2*i+1] = sum2; 246 } 247 248 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 249 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 250 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNCT__ 255 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 256 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 257 { 258 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 259 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 260 const PetscScalar *x,*v; 261 PetscScalar *y,alpha1,alpha2; 262 PetscErrorCode ierr; 263 PetscInt n,i; 264 const PetscInt m = b->AIJ->rmap->n,*idx; 265 266 PetscFunctionBegin; 267 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 268 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 269 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 270 271 for (i=0; i<m; i++) { 272 idx = a->j + a->i[i]; 273 v = a->a + a->i[i]; 274 n = a->i[i+1] - a->i[i]; 275 alpha1 = x[2*i]; 276 alpha2 = x[2*i+1]; 277 while (n-->0) { 278 y[2*(*idx)] += alpha1*(*v); 279 y[2*(*idx)+1] += alpha2*(*v); 280 idx++; v++; 281 } 282 } 283 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 284 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 285 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 #undef __FUNCT__ 290 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 291 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 292 { 293 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 294 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 295 const PetscScalar *x,*v; 296 PetscScalar *y,sum1, sum2; 297 PetscErrorCode ierr; 298 PetscInt n,i,jrow,j; 299 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 300 301 PetscFunctionBegin; 302 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 303 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 304 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 305 idx = a->j; 306 v = a->a; 307 ii = a->i; 308 309 for (i=0; i<m; i++) { 310 jrow = ii[i]; 311 n = ii[i+1] - jrow; 312 sum1 = 0.0; 313 sum2 = 0.0; 314 for (j=0; j<n; j++) { 315 sum1 += v[jrow]*x[2*idx[jrow]]; 316 sum2 += v[jrow]*x[2*idx[jrow]+1]; 317 jrow++; 318 } 319 y[2*i] += sum1; 320 y[2*i+1] += sum2; 321 } 322 323 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 324 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 325 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 326 PetscFunctionReturn(0); 327 } 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 330 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 331 { 332 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 333 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 334 const PetscScalar *x,*v; 335 PetscScalar *y,alpha1,alpha2; 336 PetscErrorCode ierr; 337 PetscInt n,i; 338 const PetscInt m = b->AIJ->rmap->n,*idx; 339 340 PetscFunctionBegin; 341 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 342 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 343 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 344 345 for (i=0; i<m; i++) { 346 idx = a->j + a->i[i]; 347 v = a->a + a->i[i]; 348 n = a->i[i+1] - a->i[i]; 349 alpha1 = x[2*i]; 350 alpha2 = x[2*i+1]; 351 while (n-->0) { 352 y[2*(*idx)] += alpha1*(*v); 353 y[2*(*idx)+1] += alpha2*(*v); 354 idx++; v++; 355 } 356 } 357 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 358 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 359 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 360 PetscFunctionReturn(0); 361 } 362 /* --------------------------------------------------------------------------------------*/ 363 #undef __FUNCT__ 364 #define __FUNCT__ "MatMult_SeqMAIJ_3" 365 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 366 { 367 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 368 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 369 const PetscScalar *x,*v; 370 PetscScalar *y,sum1, sum2, sum3; 371 PetscErrorCode ierr; 372 PetscInt nonzerorow=0,n,i,jrow,j; 373 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 374 375 PetscFunctionBegin; 376 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 377 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 378 idx = a->j; 379 v = a->a; 380 ii = a->i; 381 382 for (i=0; i<m; i++) { 383 jrow = ii[i]; 384 n = ii[i+1] - jrow; 385 sum1 = 0.0; 386 sum2 = 0.0; 387 sum3 = 0.0; 388 389 nonzerorow += (n>0); 390 for (j=0; j<n; j++) { 391 sum1 += v[jrow]*x[3*idx[jrow]]; 392 sum2 += v[jrow]*x[3*idx[jrow]+1]; 393 sum3 += v[jrow]*x[3*idx[jrow]+2]; 394 jrow++; 395 } 396 y[3*i] = sum1; 397 y[3*i+1] = sum2; 398 y[3*i+2] = sum3; 399 } 400 401 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 402 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 403 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 409 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 410 { 411 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 412 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 413 const PetscScalar *x,*v; 414 PetscScalar *y,alpha1,alpha2,alpha3; 415 PetscErrorCode ierr; 416 PetscInt n,i; 417 const PetscInt m = b->AIJ->rmap->n,*idx; 418 419 PetscFunctionBegin; 420 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 421 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 422 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 423 424 for (i=0; i<m; i++) { 425 idx = a->j + a->i[i]; 426 v = a->a + a->i[i]; 427 n = a->i[i+1] - a->i[i]; 428 alpha1 = x[3*i]; 429 alpha2 = x[3*i+1]; 430 alpha3 = x[3*i+2]; 431 while (n-->0) { 432 y[3*(*idx)] += alpha1*(*v); 433 y[3*(*idx)+1] += alpha2*(*v); 434 y[3*(*idx)+2] += alpha3*(*v); 435 idx++; v++; 436 } 437 } 438 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 439 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 440 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 446 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 447 { 448 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 449 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 450 const PetscScalar *x,*v; 451 PetscScalar *y,sum1, sum2, sum3; 452 PetscErrorCode ierr; 453 PetscInt n,i,jrow,j; 454 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 455 456 PetscFunctionBegin; 457 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 458 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 459 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 460 idx = a->j; 461 v = a->a; 462 ii = a->i; 463 464 for (i=0; i<m; i++) { 465 jrow = ii[i]; 466 n = ii[i+1] - jrow; 467 sum1 = 0.0; 468 sum2 = 0.0; 469 sum3 = 0.0; 470 for (j=0; j<n; j++) { 471 sum1 += v[jrow]*x[3*idx[jrow]]; 472 sum2 += v[jrow]*x[3*idx[jrow]+1]; 473 sum3 += v[jrow]*x[3*idx[jrow]+2]; 474 jrow++; 475 } 476 y[3*i] += sum1; 477 y[3*i+1] += sum2; 478 y[3*i+2] += sum3; 479 } 480 481 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 482 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 483 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 #undef __FUNCT__ 487 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 488 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 489 { 490 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 491 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 492 const PetscScalar *x,*v; 493 PetscScalar *y,alpha1,alpha2,alpha3; 494 PetscErrorCode ierr; 495 PetscInt n,i; 496 const PetscInt m = b->AIJ->rmap->n,*idx; 497 498 PetscFunctionBegin; 499 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 500 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 501 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 502 for (i=0; i<m; i++) { 503 idx = a->j + a->i[i]; 504 v = a->a + a->i[i]; 505 n = a->i[i+1] - a->i[i]; 506 alpha1 = x[3*i]; 507 alpha2 = x[3*i+1]; 508 alpha3 = x[3*i+2]; 509 while (n-->0) { 510 y[3*(*idx)] += alpha1*(*v); 511 y[3*(*idx)+1] += alpha2*(*v); 512 y[3*(*idx)+2] += alpha3*(*v); 513 idx++; v++; 514 } 515 } 516 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 517 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 518 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 /* ------------------------------------------------------------------------------*/ 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatMult_SeqMAIJ_4" 525 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 526 { 527 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 528 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 529 const PetscScalar *x,*v; 530 PetscScalar *y,sum1, sum2, sum3, sum4; 531 PetscErrorCode ierr; 532 PetscInt nonzerorow=0,n,i,jrow,j; 533 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 534 535 PetscFunctionBegin; 536 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 537 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 538 idx = a->j; 539 v = a->a; 540 ii = a->i; 541 542 for (i=0; i<m; i++) { 543 jrow = ii[i]; 544 n = ii[i+1] - jrow; 545 sum1 = 0.0; 546 sum2 = 0.0; 547 sum3 = 0.0; 548 sum4 = 0.0; 549 nonzerorow += (n>0); 550 for (j=0; j<n; j++) { 551 sum1 += v[jrow]*x[4*idx[jrow]]; 552 sum2 += v[jrow]*x[4*idx[jrow]+1]; 553 sum3 += v[jrow]*x[4*idx[jrow]+2]; 554 sum4 += v[jrow]*x[4*idx[jrow]+3]; 555 jrow++; 556 } 557 y[4*i] = sum1; 558 y[4*i+1] = sum2; 559 y[4*i+2] = sum3; 560 y[4*i+3] = sum4; 561 } 562 563 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 564 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 565 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 571 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 572 { 573 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 574 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 575 const PetscScalar *x,*v; 576 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 577 PetscErrorCode ierr; 578 PetscInt n,i; 579 const PetscInt m = b->AIJ->rmap->n,*idx; 580 581 PetscFunctionBegin; 582 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 583 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 584 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 585 for (i=0; i<m; i++) { 586 idx = a->j + a->i[i]; 587 v = a->a + a->i[i]; 588 n = a->i[i+1] - a->i[i]; 589 alpha1 = x[4*i]; 590 alpha2 = x[4*i+1]; 591 alpha3 = x[4*i+2]; 592 alpha4 = x[4*i+3]; 593 while (n-->0) { 594 y[4*(*idx)] += alpha1*(*v); 595 y[4*(*idx)+1] += alpha2*(*v); 596 y[4*(*idx)+2] += alpha3*(*v); 597 y[4*(*idx)+3] += alpha4*(*v); 598 idx++; v++; 599 } 600 } 601 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 602 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 603 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 609 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 610 { 611 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 612 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 613 const PetscScalar *x,*v; 614 PetscScalar *y,sum1, sum2, sum3, sum4; 615 PetscErrorCode ierr; 616 PetscInt n,i,jrow,j; 617 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 618 619 PetscFunctionBegin; 620 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 621 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 622 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 623 idx = a->j; 624 v = a->a; 625 ii = a->i; 626 627 for (i=0; i<m; i++) { 628 jrow = ii[i]; 629 n = ii[i+1] - jrow; 630 sum1 = 0.0; 631 sum2 = 0.0; 632 sum3 = 0.0; 633 sum4 = 0.0; 634 for (j=0; j<n; j++) { 635 sum1 += v[jrow]*x[4*idx[jrow]]; 636 sum2 += v[jrow]*x[4*idx[jrow]+1]; 637 sum3 += v[jrow]*x[4*idx[jrow]+2]; 638 sum4 += v[jrow]*x[4*idx[jrow]+3]; 639 jrow++; 640 } 641 y[4*i] += sum1; 642 y[4*i+1] += sum2; 643 y[4*i+2] += sum3; 644 y[4*i+3] += sum4; 645 } 646 647 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 648 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 649 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 654 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 655 { 656 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 657 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 658 const PetscScalar *x,*v; 659 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 660 PetscErrorCode ierr; 661 PetscInt n,i; 662 const PetscInt m = b->AIJ->rmap->n,*idx; 663 664 PetscFunctionBegin; 665 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 666 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 667 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 668 669 for (i=0; i<m; i++) { 670 idx = a->j + a->i[i]; 671 v = a->a + a->i[i]; 672 n = a->i[i+1] - a->i[i]; 673 alpha1 = x[4*i]; 674 alpha2 = x[4*i+1]; 675 alpha3 = x[4*i+2]; 676 alpha4 = x[4*i+3]; 677 while (n-->0) { 678 y[4*(*idx)] += alpha1*(*v); 679 y[4*(*idx)+1] += alpha2*(*v); 680 y[4*(*idx)+2] += alpha3*(*v); 681 y[4*(*idx)+3] += alpha4*(*v); 682 idx++; v++; 683 } 684 } 685 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 686 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 687 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 /* ------------------------------------------------------------------------------*/ 691 692 #undef __FUNCT__ 693 #define __FUNCT__ "MatMult_SeqMAIJ_5" 694 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 695 { 696 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 697 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 698 const PetscScalar *x,*v; 699 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 700 PetscErrorCode ierr; 701 PetscInt nonzerorow=0,n,i,jrow,j; 702 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 703 704 PetscFunctionBegin; 705 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 706 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 707 idx = a->j; 708 v = a->a; 709 ii = a->i; 710 711 for (i=0; i<m; i++) { 712 jrow = ii[i]; 713 n = ii[i+1] - jrow; 714 sum1 = 0.0; 715 sum2 = 0.0; 716 sum3 = 0.0; 717 sum4 = 0.0; 718 sum5 = 0.0; 719 720 nonzerorow += (n>0); 721 for (j=0; j<n; j++) { 722 sum1 += v[jrow]*x[5*idx[jrow]]; 723 sum2 += v[jrow]*x[5*idx[jrow]+1]; 724 sum3 += v[jrow]*x[5*idx[jrow]+2]; 725 sum4 += v[jrow]*x[5*idx[jrow]+3]; 726 sum5 += v[jrow]*x[5*idx[jrow]+4]; 727 jrow++; 728 } 729 y[5*i] = sum1; 730 y[5*i+1] = sum2; 731 y[5*i+2] = sum3; 732 y[5*i+3] = sum4; 733 y[5*i+4] = sum5; 734 } 735 736 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 737 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 738 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 744 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 745 { 746 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 747 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 748 const PetscScalar *x,*v; 749 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 750 PetscErrorCode ierr; 751 PetscInt n,i; 752 const PetscInt m = b->AIJ->rmap->n,*idx; 753 754 PetscFunctionBegin; 755 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 756 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 757 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 758 759 for (i=0; i<m; i++) { 760 idx = a->j + a->i[i]; 761 v = a->a + a->i[i]; 762 n = a->i[i+1] - a->i[i]; 763 alpha1 = x[5*i]; 764 alpha2 = x[5*i+1]; 765 alpha3 = x[5*i+2]; 766 alpha4 = x[5*i+3]; 767 alpha5 = x[5*i+4]; 768 while (n-->0) { 769 y[5*(*idx)] += alpha1*(*v); 770 y[5*(*idx)+1] += alpha2*(*v); 771 y[5*(*idx)+2] += alpha3*(*v); 772 y[5*(*idx)+3] += alpha4*(*v); 773 y[5*(*idx)+4] += alpha5*(*v); 774 idx++; v++; 775 } 776 } 777 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 778 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 779 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 780 PetscFunctionReturn(0); 781 } 782 783 #undef __FUNCT__ 784 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 785 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 786 { 787 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 788 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 789 const PetscScalar *x,*v; 790 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 791 PetscErrorCode ierr; 792 PetscInt n,i,jrow,j; 793 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 794 795 PetscFunctionBegin; 796 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 797 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 798 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 799 idx = a->j; 800 v = a->a; 801 ii = a->i; 802 803 for (i=0; i<m; i++) { 804 jrow = ii[i]; 805 n = ii[i+1] - jrow; 806 sum1 = 0.0; 807 sum2 = 0.0; 808 sum3 = 0.0; 809 sum4 = 0.0; 810 sum5 = 0.0; 811 for (j=0; j<n; j++) { 812 sum1 += v[jrow]*x[5*idx[jrow]]; 813 sum2 += v[jrow]*x[5*idx[jrow]+1]; 814 sum3 += v[jrow]*x[5*idx[jrow]+2]; 815 sum4 += v[jrow]*x[5*idx[jrow]+3]; 816 sum5 += v[jrow]*x[5*idx[jrow]+4]; 817 jrow++; 818 } 819 y[5*i] += sum1; 820 y[5*i+1] += sum2; 821 y[5*i+2] += sum3; 822 y[5*i+3] += sum4; 823 y[5*i+4] += sum5; 824 } 825 826 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 827 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 828 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 829 PetscFunctionReturn(0); 830 } 831 832 #undef __FUNCT__ 833 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 834 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 835 { 836 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 837 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 838 const PetscScalar *x,*v; 839 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 840 PetscErrorCode ierr; 841 PetscInt n,i; 842 const PetscInt m = b->AIJ->rmap->n,*idx; 843 844 PetscFunctionBegin; 845 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 846 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 847 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 848 849 for (i=0; i<m; i++) { 850 idx = a->j + a->i[i]; 851 v = a->a + a->i[i]; 852 n = a->i[i+1] - a->i[i]; 853 alpha1 = x[5*i]; 854 alpha2 = x[5*i+1]; 855 alpha3 = x[5*i+2]; 856 alpha4 = x[5*i+3]; 857 alpha5 = x[5*i+4]; 858 while (n-->0) { 859 y[5*(*idx)] += alpha1*(*v); 860 y[5*(*idx)+1] += alpha2*(*v); 861 y[5*(*idx)+2] += alpha3*(*v); 862 y[5*(*idx)+3] += alpha4*(*v); 863 y[5*(*idx)+4] += alpha5*(*v); 864 idx++; v++; 865 } 866 } 867 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 868 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 869 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 /* ------------------------------------------------------------------------------*/ 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatMult_SeqMAIJ_6" 876 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 877 { 878 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 879 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 880 const PetscScalar *x,*v; 881 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 882 PetscErrorCode ierr; 883 PetscInt nonzerorow=0,n,i,jrow,j; 884 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 885 886 PetscFunctionBegin; 887 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 888 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 889 idx = a->j; 890 v = a->a; 891 ii = a->i; 892 893 for (i=0; i<m; i++) { 894 jrow = ii[i]; 895 n = ii[i+1] - jrow; 896 sum1 = 0.0; 897 sum2 = 0.0; 898 sum3 = 0.0; 899 sum4 = 0.0; 900 sum5 = 0.0; 901 sum6 = 0.0; 902 903 nonzerorow += (n>0); 904 for (j=0; j<n; j++) { 905 sum1 += v[jrow]*x[6*idx[jrow]]; 906 sum2 += v[jrow]*x[6*idx[jrow]+1]; 907 sum3 += v[jrow]*x[6*idx[jrow]+2]; 908 sum4 += v[jrow]*x[6*idx[jrow]+3]; 909 sum5 += v[jrow]*x[6*idx[jrow]+4]; 910 sum6 += v[jrow]*x[6*idx[jrow]+5]; 911 jrow++; 912 } 913 y[6*i] = sum1; 914 y[6*i+1] = sum2; 915 y[6*i+2] = sum3; 916 y[6*i+3] = sum4; 917 y[6*i+4] = sum5; 918 y[6*i+5] = sum6; 919 } 920 921 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 922 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 923 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 924 PetscFunctionReturn(0); 925 } 926 927 #undef __FUNCT__ 928 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 929 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 930 { 931 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 932 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 933 const PetscScalar *x,*v; 934 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 935 PetscErrorCode ierr; 936 PetscInt n,i; 937 const PetscInt m = b->AIJ->rmap->n,*idx; 938 939 PetscFunctionBegin; 940 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 941 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 942 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 943 944 for (i=0; i<m; i++) { 945 idx = a->j + a->i[i]; 946 v = a->a + a->i[i]; 947 n = a->i[i+1] - a->i[i]; 948 alpha1 = x[6*i]; 949 alpha2 = x[6*i+1]; 950 alpha3 = x[6*i+2]; 951 alpha4 = x[6*i+3]; 952 alpha5 = x[6*i+4]; 953 alpha6 = x[6*i+5]; 954 while (n-->0) { 955 y[6*(*idx)] += alpha1*(*v); 956 y[6*(*idx)+1] += alpha2*(*v); 957 y[6*(*idx)+2] += alpha3*(*v); 958 y[6*(*idx)+3] += alpha4*(*v); 959 y[6*(*idx)+4] += alpha5*(*v); 960 y[6*(*idx)+5] += alpha6*(*v); 961 idx++; v++; 962 } 963 } 964 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 965 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 966 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 967 PetscFunctionReturn(0); 968 } 969 970 #undef __FUNCT__ 971 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 972 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 973 { 974 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 975 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 976 const PetscScalar *x,*v; 977 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 978 PetscErrorCode ierr; 979 PetscInt n,i,jrow,j; 980 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 981 982 PetscFunctionBegin; 983 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 984 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 985 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 986 idx = a->j; 987 v = a->a; 988 ii = a->i; 989 990 for (i=0; i<m; i++) { 991 jrow = ii[i]; 992 n = ii[i+1] - jrow; 993 sum1 = 0.0; 994 sum2 = 0.0; 995 sum3 = 0.0; 996 sum4 = 0.0; 997 sum5 = 0.0; 998 sum6 = 0.0; 999 for (j=0; j<n; j++) { 1000 sum1 += v[jrow]*x[6*idx[jrow]]; 1001 sum2 += v[jrow]*x[6*idx[jrow]+1]; 1002 sum3 += v[jrow]*x[6*idx[jrow]+2]; 1003 sum4 += v[jrow]*x[6*idx[jrow]+3]; 1004 sum5 += v[jrow]*x[6*idx[jrow]+4]; 1005 sum6 += v[jrow]*x[6*idx[jrow]+5]; 1006 jrow++; 1007 } 1008 y[6*i] += sum1; 1009 y[6*i+1] += sum2; 1010 y[6*i+2] += sum3; 1011 y[6*i+3] += sum4; 1012 y[6*i+4] += sum5; 1013 y[6*i+5] += sum6; 1014 } 1015 1016 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1017 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1018 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1019 PetscFunctionReturn(0); 1020 } 1021 1022 #undef __FUNCT__ 1023 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 1024 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 1025 { 1026 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1027 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1028 const PetscScalar *x,*v; 1029 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 1030 PetscErrorCode ierr; 1031 PetscInt n,i; 1032 const PetscInt m = b->AIJ->rmap->n,*idx; 1033 1034 PetscFunctionBegin; 1035 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1036 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1037 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1038 1039 for (i=0; i<m; i++) { 1040 idx = a->j + a->i[i]; 1041 v = a->a + a->i[i]; 1042 n = a->i[i+1] - a->i[i]; 1043 alpha1 = x[6*i]; 1044 alpha2 = x[6*i+1]; 1045 alpha3 = x[6*i+2]; 1046 alpha4 = x[6*i+3]; 1047 alpha5 = x[6*i+4]; 1048 alpha6 = x[6*i+5]; 1049 while (n-->0) { 1050 y[6*(*idx)] += alpha1*(*v); 1051 y[6*(*idx)+1] += alpha2*(*v); 1052 y[6*(*idx)+2] += alpha3*(*v); 1053 y[6*(*idx)+3] += alpha4*(*v); 1054 y[6*(*idx)+4] += alpha5*(*v); 1055 y[6*(*idx)+5] += alpha6*(*v); 1056 idx++; v++; 1057 } 1058 } 1059 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1061 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 /* ------------------------------------------------------------------------------*/ 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatMult_SeqMAIJ_7" 1068 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1069 { 1070 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1071 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1072 const PetscScalar *x,*v; 1073 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1074 PetscErrorCode ierr; 1075 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1076 PetscInt nonzerorow=0,n,i,jrow,j; 1077 1078 PetscFunctionBegin; 1079 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1080 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1081 idx = a->j; 1082 v = a->a; 1083 ii = a->i; 1084 1085 for (i=0; i<m; i++) { 1086 jrow = ii[i]; 1087 n = ii[i+1] - jrow; 1088 sum1 = 0.0; 1089 sum2 = 0.0; 1090 sum3 = 0.0; 1091 sum4 = 0.0; 1092 sum5 = 0.0; 1093 sum6 = 0.0; 1094 sum7 = 0.0; 1095 1096 nonzerorow += (n>0); 1097 for (j=0; j<n; j++) { 1098 sum1 += v[jrow]*x[7*idx[jrow]]; 1099 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1100 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1101 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1102 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1103 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1104 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1105 jrow++; 1106 } 1107 y[7*i] = sum1; 1108 y[7*i+1] = sum2; 1109 y[7*i+2] = sum3; 1110 y[7*i+3] = sum4; 1111 y[7*i+4] = sum5; 1112 y[7*i+5] = sum6; 1113 y[7*i+6] = sum7; 1114 } 1115 1116 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1117 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1118 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1124 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1125 { 1126 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1127 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1128 const PetscScalar *x,*v; 1129 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1130 PetscErrorCode ierr; 1131 const PetscInt m = b->AIJ->rmap->n,*idx; 1132 PetscInt n,i; 1133 1134 PetscFunctionBegin; 1135 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1136 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1137 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1138 1139 for (i=0; i<m; i++) { 1140 idx = a->j + a->i[i]; 1141 v = a->a + a->i[i]; 1142 n = a->i[i+1] - a->i[i]; 1143 alpha1 = x[7*i]; 1144 alpha2 = x[7*i+1]; 1145 alpha3 = x[7*i+2]; 1146 alpha4 = x[7*i+3]; 1147 alpha5 = x[7*i+4]; 1148 alpha6 = x[7*i+5]; 1149 alpha7 = x[7*i+6]; 1150 while (n-->0) { 1151 y[7*(*idx)] += alpha1*(*v); 1152 y[7*(*idx)+1] += alpha2*(*v); 1153 y[7*(*idx)+2] += alpha3*(*v); 1154 y[7*(*idx)+3] += alpha4*(*v); 1155 y[7*(*idx)+4] += alpha5*(*v); 1156 y[7*(*idx)+5] += alpha6*(*v); 1157 y[7*(*idx)+6] += alpha7*(*v); 1158 idx++; v++; 1159 } 1160 } 1161 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1162 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1163 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1169 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1170 { 1171 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1172 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1173 const PetscScalar *x,*v; 1174 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1175 PetscErrorCode ierr; 1176 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1177 PetscInt n,i,jrow,j; 1178 1179 PetscFunctionBegin; 1180 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1181 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1182 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1183 idx = a->j; 1184 v = a->a; 1185 ii = a->i; 1186 1187 for (i=0; i<m; i++) { 1188 jrow = ii[i]; 1189 n = ii[i+1] - jrow; 1190 sum1 = 0.0; 1191 sum2 = 0.0; 1192 sum3 = 0.0; 1193 sum4 = 0.0; 1194 sum5 = 0.0; 1195 sum6 = 0.0; 1196 sum7 = 0.0; 1197 for (j=0; j<n; j++) { 1198 sum1 += v[jrow]*x[7*idx[jrow]]; 1199 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1200 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1201 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1202 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1203 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1204 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1205 jrow++; 1206 } 1207 y[7*i] += sum1; 1208 y[7*i+1] += sum2; 1209 y[7*i+2] += sum3; 1210 y[7*i+3] += sum4; 1211 y[7*i+4] += sum5; 1212 y[7*i+5] += sum6; 1213 y[7*i+6] += sum7; 1214 } 1215 1216 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1217 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1218 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1224 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1225 { 1226 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1227 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1228 const PetscScalar *x,*v; 1229 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1230 PetscErrorCode ierr; 1231 const PetscInt m = b->AIJ->rmap->n,*idx; 1232 PetscInt n,i; 1233 1234 PetscFunctionBegin; 1235 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1236 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1237 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1238 for (i=0; i<m; i++) { 1239 idx = a->j + a->i[i]; 1240 v = a->a + a->i[i]; 1241 n = a->i[i+1] - a->i[i]; 1242 alpha1 = x[7*i]; 1243 alpha2 = x[7*i+1]; 1244 alpha3 = x[7*i+2]; 1245 alpha4 = x[7*i+3]; 1246 alpha5 = x[7*i+4]; 1247 alpha6 = x[7*i+5]; 1248 alpha7 = x[7*i+6]; 1249 while (n-->0) { 1250 y[7*(*idx)] += alpha1*(*v); 1251 y[7*(*idx)+1] += alpha2*(*v); 1252 y[7*(*idx)+2] += alpha3*(*v); 1253 y[7*(*idx)+3] += alpha4*(*v); 1254 y[7*(*idx)+4] += alpha5*(*v); 1255 y[7*(*idx)+5] += alpha6*(*v); 1256 y[7*(*idx)+6] += alpha7*(*v); 1257 idx++; v++; 1258 } 1259 } 1260 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1261 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1262 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1268 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1269 { 1270 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1271 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1272 const PetscScalar *x,*v; 1273 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1274 PetscErrorCode ierr; 1275 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1276 PetscInt nonzerorow=0,n,i,jrow,j; 1277 1278 PetscFunctionBegin; 1279 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1280 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1281 idx = a->j; 1282 v = a->a; 1283 ii = a->i; 1284 1285 for (i=0; i<m; i++) { 1286 jrow = ii[i]; 1287 n = ii[i+1] - jrow; 1288 sum1 = 0.0; 1289 sum2 = 0.0; 1290 sum3 = 0.0; 1291 sum4 = 0.0; 1292 sum5 = 0.0; 1293 sum6 = 0.0; 1294 sum7 = 0.0; 1295 sum8 = 0.0; 1296 1297 nonzerorow += (n>0); 1298 for (j=0; j<n; j++) { 1299 sum1 += v[jrow]*x[8*idx[jrow]]; 1300 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1301 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1302 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1303 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1304 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1305 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1306 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1307 jrow++; 1308 } 1309 y[8*i] = sum1; 1310 y[8*i+1] = sum2; 1311 y[8*i+2] = sum3; 1312 y[8*i+3] = sum4; 1313 y[8*i+4] = sum5; 1314 y[8*i+5] = sum6; 1315 y[8*i+6] = sum7; 1316 y[8*i+7] = sum8; 1317 } 1318 1319 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1320 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1321 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1327 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1328 { 1329 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1330 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1331 const PetscScalar *x,*v; 1332 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1333 PetscErrorCode ierr; 1334 const PetscInt m = b->AIJ->rmap->n,*idx; 1335 PetscInt n,i; 1336 1337 PetscFunctionBegin; 1338 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1339 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1340 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1341 1342 for (i=0; i<m; i++) { 1343 idx = a->j + a->i[i]; 1344 v = a->a + a->i[i]; 1345 n = a->i[i+1] - a->i[i]; 1346 alpha1 = x[8*i]; 1347 alpha2 = x[8*i+1]; 1348 alpha3 = x[8*i+2]; 1349 alpha4 = x[8*i+3]; 1350 alpha5 = x[8*i+4]; 1351 alpha6 = x[8*i+5]; 1352 alpha7 = x[8*i+6]; 1353 alpha8 = x[8*i+7]; 1354 while (n-->0) { 1355 y[8*(*idx)] += alpha1*(*v); 1356 y[8*(*idx)+1] += alpha2*(*v); 1357 y[8*(*idx)+2] += alpha3*(*v); 1358 y[8*(*idx)+3] += alpha4*(*v); 1359 y[8*(*idx)+4] += alpha5*(*v); 1360 y[8*(*idx)+5] += alpha6*(*v); 1361 y[8*(*idx)+6] += alpha7*(*v); 1362 y[8*(*idx)+7] += alpha8*(*v); 1363 idx++; v++; 1364 } 1365 } 1366 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1367 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1368 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1374 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1375 { 1376 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1377 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1378 const PetscScalar *x,*v; 1379 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1380 PetscErrorCode ierr; 1381 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1382 PetscInt n,i,jrow,j; 1383 1384 PetscFunctionBegin; 1385 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1386 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1387 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1388 idx = a->j; 1389 v = a->a; 1390 ii = a->i; 1391 1392 for (i=0; i<m; i++) { 1393 jrow = ii[i]; 1394 n = ii[i+1] - jrow; 1395 sum1 = 0.0; 1396 sum2 = 0.0; 1397 sum3 = 0.0; 1398 sum4 = 0.0; 1399 sum5 = 0.0; 1400 sum6 = 0.0; 1401 sum7 = 0.0; 1402 sum8 = 0.0; 1403 for (j=0; j<n; j++) { 1404 sum1 += v[jrow]*x[8*idx[jrow]]; 1405 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1406 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1407 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1408 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1409 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1410 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1411 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1412 jrow++; 1413 } 1414 y[8*i] += sum1; 1415 y[8*i+1] += sum2; 1416 y[8*i+2] += sum3; 1417 y[8*i+3] += sum4; 1418 y[8*i+4] += sum5; 1419 y[8*i+5] += sum6; 1420 y[8*i+6] += sum7; 1421 y[8*i+7] += sum8; 1422 } 1423 1424 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1425 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1426 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1432 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1433 { 1434 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1435 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1436 const PetscScalar *x,*v; 1437 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1438 PetscErrorCode ierr; 1439 const PetscInt m = b->AIJ->rmap->n,*idx; 1440 PetscInt n,i; 1441 1442 PetscFunctionBegin; 1443 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1444 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1445 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1446 for (i=0; i<m; i++) { 1447 idx = a->j + a->i[i]; 1448 v = a->a + a->i[i]; 1449 n = a->i[i+1] - a->i[i]; 1450 alpha1 = x[8*i]; 1451 alpha2 = x[8*i+1]; 1452 alpha3 = x[8*i+2]; 1453 alpha4 = x[8*i+3]; 1454 alpha5 = x[8*i+4]; 1455 alpha6 = x[8*i+5]; 1456 alpha7 = x[8*i+6]; 1457 alpha8 = x[8*i+7]; 1458 while (n-->0) { 1459 y[8*(*idx)] += alpha1*(*v); 1460 y[8*(*idx)+1] += alpha2*(*v); 1461 y[8*(*idx)+2] += alpha3*(*v); 1462 y[8*(*idx)+3] += alpha4*(*v); 1463 y[8*(*idx)+4] += alpha5*(*v); 1464 y[8*(*idx)+5] += alpha6*(*v); 1465 y[8*(*idx)+6] += alpha7*(*v); 1466 y[8*(*idx)+7] += alpha8*(*v); 1467 idx++; v++; 1468 } 1469 } 1470 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1471 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1472 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1473 PetscFunctionReturn(0); 1474 } 1475 1476 /* ------------------------------------------------------------------------------*/ 1477 #undef __FUNCT__ 1478 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1479 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1480 { 1481 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1482 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1483 const PetscScalar *x,*v; 1484 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1485 PetscErrorCode ierr; 1486 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1487 PetscInt nonzerorow=0,n,i,jrow,j; 1488 1489 PetscFunctionBegin; 1490 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1491 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1492 idx = a->j; 1493 v = a->a; 1494 ii = a->i; 1495 1496 for (i=0; i<m; i++) { 1497 jrow = ii[i]; 1498 n = ii[i+1] - jrow; 1499 sum1 = 0.0; 1500 sum2 = 0.0; 1501 sum3 = 0.0; 1502 sum4 = 0.0; 1503 sum5 = 0.0; 1504 sum6 = 0.0; 1505 sum7 = 0.0; 1506 sum8 = 0.0; 1507 sum9 = 0.0; 1508 1509 nonzerorow += (n>0); 1510 for (j=0; j<n; j++) { 1511 sum1 += v[jrow]*x[9*idx[jrow]]; 1512 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1513 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1514 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1515 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1516 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1517 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1518 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1519 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1520 jrow++; 1521 } 1522 y[9*i] = sum1; 1523 y[9*i+1] = sum2; 1524 y[9*i+2] = sum3; 1525 y[9*i+3] = sum4; 1526 y[9*i+4] = sum5; 1527 y[9*i+5] = sum6; 1528 y[9*i+6] = sum7; 1529 y[9*i+7] = sum8; 1530 y[9*i+8] = sum9; 1531 } 1532 1533 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1534 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1535 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 /* ------------------------------------------------------------------------------*/ 1540 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1543 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1544 { 1545 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1546 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1547 const PetscScalar *x,*v; 1548 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1549 PetscErrorCode ierr; 1550 const PetscInt m = b->AIJ->rmap->n,*idx; 1551 PetscInt n,i; 1552 1553 PetscFunctionBegin; 1554 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1555 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1556 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1557 1558 for (i=0; i<m; i++) { 1559 idx = a->j + a->i[i]; 1560 v = a->a + a->i[i]; 1561 n = a->i[i+1] - a->i[i]; 1562 alpha1 = x[9*i]; 1563 alpha2 = x[9*i+1]; 1564 alpha3 = x[9*i+2]; 1565 alpha4 = x[9*i+3]; 1566 alpha5 = x[9*i+4]; 1567 alpha6 = x[9*i+5]; 1568 alpha7 = x[9*i+6]; 1569 alpha8 = x[9*i+7]; 1570 alpha9 = x[9*i+8]; 1571 while (n-->0) { 1572 y[9*(*idx)] += alpha1*(*v); 1573 y[9*(*idx)+1] += alpha2*(*v); 1574 y[9*(*idx)+2] += alpha3*(*v); 1575 y[9*(*idx)+3] += alpha4*(*v); 1576 y[9*(*idx)+4] += alpha5*(*v); 1577 y[9*(*idx)+5] += alpha6*(*v); 1578 y[9*(*idx)+6] += alpha7*(*v); 1579 y[9*(*idx)+7] += alpha8*(*v); 1580 y[9*(*idx)+8] += alpha9*(*v); 1581 idx++; v++; 1582 } 1583 } 1584 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1585 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1586 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1592 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1593 { 1594 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1595 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1596 const PetscScalar *x,*v; 1597 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1598 PetscErrorCode ierr; 1599 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1600 PetscInt n,i,jrow,j; 1601 1602 PetscFunctionBegin; 1603 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1604 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1605 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1606 idx = a->j; 1607 v = a->a; 1608 ii = a->i; 1609 1610 for (i=0; i<m; i++) { 1611 jrow = ii[i]; 1612 n = ii[i+1] - jrow; 1613 sum1 = 0.0; 1614 sum2 = 0.0; 1615 sum3 = 0.0; 1616 sum4 = 0.0; 1617 sum5 = 0.0; 1618 sum6 = 0.0; 1619 sum7 = 0.0; 1620 sum8 = 0.0; 1621 sum9 = 0.0; 1622 for (j=0; j<n; j++) { 1623 sum1 += v[jrow]*x[9*idx[jrow]]; 1624 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1625 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1626 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1627 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1628 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1629 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1630 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1631 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1632 jrow++; 1633 } 1634 y[9*i] += sum1; 1635 y[9*i+1] += sum2; 1636 y[9*i+2] += sum3; 1637 y[9*i+3] += sum4; 1638 y[9*i+4] += sum5; 1639 y[9*i+5] += sum6; 1640 y[9*i+6] += sum7; 1641 y[9*i+7] += sum8; 1642 y[9*i+8] += sum9; 1643 } 1644 1645 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1646 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1647 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1648 PetscFunctionReturn(0); 1649 } 1650 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1653 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1654 { 1655 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1656 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1657 const PetscScalar *x,*v; 1658 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1659 PetscErrorCode ierr; 1660 const PetscInt m = b->AIJ->rmap->n,*idx; 1661 PetscInt n,i; 1662 1663 PetscFunctionBegin; 1664 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1665 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1666 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1667 for (i=0; i<m; i++) { 1668 idx = a->j + a->i[i]; 1669 v = a->a + a->i[i]; 1670 n = a->i[i+1] - a->i[i]; 1671 alpha1 = x[9*i]; 1672 alpha2 = x[9*i+1]; 1673 alpha3 = x[9*i+2]; 1674 alpha4 = x[9*i+3]; 1675 alpha5 = x[9*i+4]; 1676 alpha6 = x[9*i+5]; 1677 alpha7 = x[9*i+6]; 1678 alpha8 = x[9*i+7]; 1679 alpha9 = x[9*i+8]; 1680 while (n-->0) { 1681 y[9*(*idx)] += alpha1*(*v); 1682 y[9*(*idx)+1] += alpha2*(*v); 1683 y[9*(*idx)+2] += alpha3*(*v); 1684 y[9*(*idx)+3] += alpha4*(*v); 1685 y[9*(*idx)+4] += alpha5*(*v); 1686 y[9*(*idx)+5] += alpha6*(*v); 1687 y[9*(*idx)+6] += alpha7*(*v); 1688 y[9*(*idx)+7] += alpha8*(*v); 1689 y[9*(*idx)+8] += alpha9*(*v); 1690 idx++; v++; 1691 } 1692 } 1693 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1694 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1695 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1696 PetscFunctionReturn(0); 1697 } 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1700 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1701 { 1702 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1703 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1704 const PetscScalar *x,*v; 1705 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1706 PetscErrorCode ierr; 1707 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1708 PetscInt nonzerorow=0,n,i,jrow,j; 1709 1710 PetscFunctionBegin; 1711 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1712 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1713 idx = a->j; 1714 v = a->a; 1715 ii = a->i; 1716 1717 for (i=0; i<m; i++) { 1718 jrow = ii[i]; 1719 n = ii[i+1] - jrow; 1720 sum1 = 0.0; 1721 sum2 = 0.0; 1722 sum3 = 0.0; 1723 sum4 = 0.0; 1724 sum5 = 0.0; 1725 sum6 = 0.0; 1726 sum7 = 0.0; 1727 sum8 = 0.0; 1728 sum9 = 0.0; 1729 sum10 = 0.0; 1730 1731 nonzerorow += (n>0); 1732 for (j=0; j<n; j++) { 1733 sum1 += v[jrow]*x[10*idx[jrow]]; 1734 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1735 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1736 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1737 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1738 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1739 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1740 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1741 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1742 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1743 jrow++; 1744 } 1745 y[10*i] = sum1; 1746 y[10*i+1] = sum2; 1747 y[10*i+2] = sum3; 1748 y[10*i+3] = sum4; 1749 y[10*i+4] = sum5; 1750 y[10*i+5] = sum6; 1751 y[10*i+6] = sum7; 1752 y[10*i+7] = sum8; 1753 y[10*i+8] = sum9; 1754 y[10*i+9] = sum10; 1755 } 1756 1757 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1758 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1759 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1760 PetscFunctionReturn(0); 1761 } 1762 1763 #undef __FUNCT__ 1764 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10" 1765 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1766 { 1767 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1768 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1769 const PetscScalar *x,*v; 1770 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1771 PetscErrorCode ierr; 1772 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1773 PetscInt n,i,jrow,j; 1774 1775 PetscFunctionBegin; 1776 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1777 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1778 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1779 idx = a->j; 1780 v = a->a; 1781 ii = a->i; 1782 1783 for (i=0; i<m; i++) { 1784 jrow = ii[i]; 1785 n = ii[i+1] - jrow; 1786 sum1 = 0.0; 1787 sum2 = 0.0; 1788 sum3 = 0.0; 1789 sum4 = 0.0; 1790 sum5 = 0.0; 1791 sum6 = 0.0; 1792 sum7 = 0.0; 1793 sum8 = 0.0; 1794 sum9 = 0.0; 1795 sum10 = 0.0; 1796 for (j=0; j<n; j++) { 1797 sum1 += v[jrow]*x[10*idx[jrow]]; 1798 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1799 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1800 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1801 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1802 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1803 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1804 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1805 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1806 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1807 jrow++; 1808 } 1809 y[10*i] += sum1; 1810 y[10*i+1] += sum2; 1811 y[10*i+2] += sum3; 1812 y[10*i+3] += sum4; 1813 y[10*i+4] += sum5; 1814 y[10*i+5] += sum6; 1815 y[10*i+6] += sum7; 1816 y[10*i+7] += sum8; 1817 y[10*i+8] += sum9; 1818 y[10*i+9] += sum10; 1819 } 1820 1821 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1822 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1823 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1824 PetscFunctionReturn(0); 1825 } 1826 1827 #undef __FUNCT__ 1828 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1829 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1830 { 1831 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1832 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1833 const PetscScalar *x,*v; 1834 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1835 PetscErrorCode ierr; 1836 const PetscInt m = b->AIJ->rmap->n,*idx; 1837 PetscInt n,i; 1838 1839 PetscFunctionBegin; 1840 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1841 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1842 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1843 1844 for (i=0; i<m; i++) { 1845 idx = a->j + a->i[i]; 1846 v = a->a + a->i[i]; 1847 n = a->i[i+1] - a->i[i]; 1848 alpha1 = x[10*i]; 1849 alpha2 = x[10*i+1]; 1850 alpha3 = x[10*i+2]; 1851 alpha4 = x[10*i+3]; 1852 alpha5 = x[10*i+4]; 1853 alpha6 = x[10*i+5]; 1854 alpha7 = x[10*i+6]; 1855 alpha8 = x[10*i+7]; 1856 alpha9 = x[10*i+8]; 1857 alpha10 = x[10*i+9]; 1858 while (n-->0) { 1859 y[10*(*idx)] += alpha1*(*v); 1860 y[10*(*idx)+1] += alpha2*(*v); 1861 y[10*(*idx)+2] += alpha3*(*v); 1862 y[10*(*idx)+3] += alpha4*(*v); 1863 y[10*(*idx)+4] += alpha5*(*v); 1864 y[10*(*idx)+5] += alpha6*(*v); 1865 y[10*(*idx)+6] += alpha7*(*v); 1866 y[10*(*idx)+7] += alpha8*(*v); 1867 y[10*(*idx)+8] += alpha9*(*v); 1868 y[10*(*idx)+9] += alpha10*(*v); 1869 idx++; v++; 1870 } 1871 } 1872 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1873 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1874 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1880 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1881 { 1882 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1883 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1884 const PetscScalar *x,*v; 1885 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1886 PetscErrorCode ierr; 1887 const PetscInt m = b->AIJ->rmap->n,*idx; 1888 PetscInt n,i; 1889 1890 PetscFunctionBegin; 1891 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1892 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1893 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1894 for (i=0; i<m; i++) { 1895 idx = a->j + a->i[i]; 1896 v = a->a + a->i[i]; 1897 n = a->i[i+1] - a->i[i]; 1898 alpha1 = x[10*i]; 1899 alpha2 = x[10*i+1]; 1900 alpha3 = x[10*i+2]; 1901 alpha4 = x[10*i+3]; 1902 alpha5 = x[10*i+4]; 1903 alpha6 = x[10*i+5]; 1904 alpha7 = x[10*i+6]; 1905 alpha8 = x[10*i+7]; 1906 alpha9 = x[10*i+8]; 1907 alpha10 = x[10*i+9]; 1908 while (n-->0) { 1909 y[10*(*idx)] += alpha1*(*v); 1910 y[10*(*idx)+1] += alpha2*(*v); 1911 y[10*(*idx)+2] += alpha3*(*v); 1912 y[10*(*idx)+3] += alpha4*(*v); 1913 y[10*(*idx)+4] += alpha5*(*v); 1914 y[10*(*idx)+5] += alpha6*(*v); 1915 y[10*(*idx)+6] += alpha7*(*v); 1916 y[10*(*idx)+7] += alpha8*(*v); 1917 y[10*(*idx)+8] += alpha9*(*v); 1918 y[10*(*idx)+9] += alpha10*(*v); 1919 idx++; v++; 1920 } 1921 } 1922 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1923 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1924 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1925 PetscFunctionReturn(0); 1926 } 1927 1928 1929 /*--------------------------------------------------------------------------------------------*/ 1930 #undef __FUNCT__ 1931 #define __FUNCT__ "MatMult_SeqMAIJ_11" 1932 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1933 { 1934 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1935 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1936 const PetscScalar *x,*v; 1937 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1938 PetscErrorCode ierr; 1939 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1940 PetscInt nonzerorow=0,n,i,jrow,j; 1941 1942 PetscFunctionBegin; 1943 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1944 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1945 idx = a->j; 1946 v = a->a; 1947 ii = a->i; 1948 1949 for (i=0; i<m; i++) { 1950 jrow = ii[i]; 1951 n = ii[i+1] - jrow; 1952 sum1 = 0.0; 1953 sum2 = 0.0; 1954 sum3 = 0.0; 1955 sum4 = 0.0; 1956 sum5 = 0.0; 1957 sum6 = 0.0; 1958 sum7 = 0.0; 1959 sum8 = 0.0; 1960 sum9 = 0.0; 1961 sum10 = 0.0; 1962 sum11 = 0.0; 1963 1964 nonzerorow += (n>0); 1965 for (j=0; j<n; j++) { 1966 sum1 += v[jrow]*x[11*idx[jrow]]; 1967 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1968 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1969 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1970 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1971 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1972 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1973 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1974 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1975 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1976 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1977 jrow++; 1978 } 1979 y[11*i] = sum1; 1980 y[11*i+1] = sum2; 1981 y[11*i+2] = sum3; 1982 y[11*i+3] = sum4; 1983 y[11*i+4] = sum5; 1984 y[11*i+5] = sum6; 1985 y[11*i+6] = sum7; 1986 y[11*i+7] = sum8; 1987 y[11*i+8] = sum9; 1988 y[11*i+9] = sum10; 1989 y[11*i+10] = sum11; 1990 } 1991 1992 ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1993 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1994 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11" 2000 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2001 { 2002 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2003 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2004 const PetscScalar *x,*v; 2005 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 2006 PetscErrorCode ierr; 2007 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2008 PetscInt n,i,jrow,j; 2009 2010 PetscFunctionBegin; 2011 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2012 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2013 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2014 idx = a->j; 2015 v = a->a; 2016 ii = a->i; 2017 2018 for (i=0; i<m; i++) { 2019 jrow = ii[i]; 2020 n = ii[i+1] - jrow; 2021 sum1 = 0.0; 2022 sum2 = 0.0; 2023 sum3 = 0.0; 2024 sum4 = 0.0; 2025 sum5 = 0.0; 2026 sum6 = 0.0; 2027 sum7 = 0.0; 2028 sum8 = 0.0; 2029 sum9 = 0.0; 2030 sum10 = 0.0; 2031 sum11 = 0.0; 2032 for (j=0; j<n; j++) { 2033 sum1 += v[jrow]*x[11*idx[jrow]]; 2034 sum2 += v[jrow]*x[11*idx[jrow]+1]; 2035 sum3 += v[jrow]*x[11*idx[jrow]+2]; 2036 sum4 += v[jrow]*x[11*idx[jrow]+3]; 2037 sum5 += v[jrow]*x[11*idx[jrow]+4]; 2038 sum6 += v[jrow]*x[11*idx[jrow]+5]; 2039 sum7 += v[jrow]*x[11*idx[jrow]+6]; 2040 sum8 += v[jrow]*x[11*idx[jrow]+7]; 2041 sum9 += v[jrow]*x[11*idx[jrow]+8]; 2042 sum10 += v[jrow]*x[11*idx[jrow]+9]; 2043 sum11 += v[jrow]*x[11*idx[jrow]+10]; 2044 jrow++; 2045 } 2046 y[11*i] += sum1; 2047 y[11*i+1] += sum2; 2048 y[11*i+2] += sum3; 2049 y[11*i+3] += sum4; 2050 y[11*i+4] += sum5; 2051 y[11*i+5] += sum6; 2052 y[11*i+6] += sum7; 2053 y[11*i+7] += sum8; 2054 y[11*i+8] += sum9; 2055 y[11*i+9] += sum10; 2056 y[11*i+10] += sum11; 2057 } 2058 2059 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2060 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2061 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2062 PetscFunctionReturn(0); 2063 } 2064 2065 #undef __FUNCT__ 2066 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11" 2067 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 2068 { 2069 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2070 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2071 const PetscScalar *x,*v; 2072 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2073 PetscErrorCode ierr; 2074 const PetscInt m = b->AIJ->rmap->n,*idx; 2075 PetscInt n,i; 2076 2077 PetscFunctionBegin; 2078 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2079 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2080 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2081 2082 for (i=0; i<m; i++) { 2083 idx = a->j + a->i[i]; 2084 v = a->a + a->i[i]; 2085 n = a->i[i+1] - a->i[i]; 2086 alpha1 = x[11*i]; 2087 alpha2 = x[11*i+1]; 2088 alpha3 = x[11*i+2]; 2089 alpha4 = x[11*i+3]; 2090 alpha5 = x[11*i+4]; 2091 alpha6 = x[11*i+5]; 2092 alpha7 = x[11*i+6]; 2093 alpha8 = x[11*i+7]; 2094 alpha9 = x[11*i+8]; 2095 alpha10 = x[11*i+9]; 2096 alpha11 = x[11*i+10]; 2097 while (n-->0) { 2098 y[11*(*idx)] += alpha1*(*v); 2099 y[11*(*idx)+1] += alpha2*(*v); 2100 y[11*(*idx)+2] += alpha3*(*v); 2101 y[11*(*idx)+3] += alpha4*(*v); 2102 y[11*(*idx)+4] += alpha5*(*v); 2103 y[11*(*idx)+5] += alpha6*(*v); 2104 y[11*(*idx)+6] += alpha7*(*v); 2105 y[11*(*idx)+7] += alpha8*(*v); 2106 y[11*(*idx)+8] += alpha9*(*v); 2107 y[11*(*idx)+9] += alpha10*(*v); 2108 y[11*(*idx)+10] += alpha11*(*v); 2109 idx++; v++; 2110 } 2111 } 2112 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2113 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2114 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2115 PetscFunctionReturn(0); 2116 } 2117 2118 #undef __FUNCT__ 2119 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11" 2120 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2121 { 2122 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2123 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2124 const PetscScalar *x,*v; 2125 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2126 PetscErrorCode ierr; 2127 const PetscInt m = b->AIJ->rmap->n,*idx; 2128 PetscInt n,i; 2129 2130 PetscFunctionBegin; 2131 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2132 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2133 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2134 for (i=0; i<m; i++) { 2135 idx = a->j + a->i[i]; 2136 v = a->a + a->i[i]; 2137 n = a->i[i+1] - a->i[i]; 2138 alpha1 = x[11*i]; 2139 alpha2 = x[11*i+1]; 2140 alpha3 = x[11*i+2]; 2141 alpha4 = x[11*i+3]; 2142 alpha5 = x[11*i+4]; 2143 alpha6 = x[11*i+5]; 2144 alpha7 = x[11*i+6]; 2145 alpha8 = x[11*i+7]; 2146 alpha9 = x[11*i+8]; 2147 alpha10 = x[11*i+9]; 2148 alpha11 = x[11*i+10]; 2149 while (n-->0) { 2150 y[11*(*idx)] += alpha1*(*v); 2151 y[11*(*idx)+1] += alpha2*(*v); 2152 y[11*(*idx)+2] += alpha3*(*v); 2153 y[11*(*idx)+3] += alpha4*(*v); 2154 y[11*(*idx)+4] += alpha5*(*v); 2155 y[11*(*idx)+5] += alpha6*(*v); 2156 y[11*(*idx)+6] += alpha7*(*v); 2157 y[11*(*idx)+7] += alpha8*(*v); 2158 y[11*(*idx)+8] += alpha9*(*v); 2159 y[11*(*idx)+9] += alpha10*(*v); 2160 y[11*(*idx)+10] += alpha11*(*v); 2161 idx++; v++; 2162 } 2163 } 2164 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2165 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2166 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 2171 /*--------------------------------------------------------------------------------------------*/ 2172 #undef __FUNCT__ 2173 #define __FUNCT__ "MatMult_SeqMAIJ_16" 2174 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2175 { 2176 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2177 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2178 const PetscScalar *x,*v; 2179 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2180 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2181 PetscErrorCode ierr; 2182 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2183 PetscInt nonzerorow=0,n,i,jrow,j; 2184 2185 PetscFunctionBegin; 2186 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2187 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2188 idx = a->j; 2189 v = a->a; 2190 ii = a->i; 2191 2192 for (i=0; i<m; i++) { 2193 jrow = ii[i]; 2194 n = ii[i+1] - jrow; 2195 sum1 = 0.0; 2196 sum2 = 0.0; 2197 sum3 = 0.0; 2198 sum4 = 0.0; 2199 sum5 = 0.0; 2200 sum6 = 0.0; 2201 sum7 = 0.0; 2202 sum8 = 0.0; 2203 sum9 = 0.0; 2204 sum10 = 0.0; 2205 sum11 = 0.0; 2206 sum12 = 0.0; 2207 sum13 = 0.0; 2208 sum14 = 0.0; 2209 sum15 = 0.0; 2210 sum16 = 0.0; 2211 2212 nonzerorow += (n>0); 2213 for (j=0; j<n; j++) { 2214 sum1 += v[jrow]*x[16*idx[jrow]]; 2215 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2216 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2217 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2218 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2219 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2220 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2221 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2222 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2223 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2224 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2225 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2226 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2227 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2228 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2229 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2230 jrow++; 2231 } 2232 y[16*i] = sum1; 2233 y[16*i+1] = sum2; 2234 y[16*i+2] = sum3; 2235 y[16*i+3] = sum4; 2236 y[16*i+4] = sum5; 2237 y[16*i+5] = sum6; 2238 y[16*i+6] = sum7; 2239 y[16*i+7] = sum8; 2240 y[16*i+8] = sum9; 2241 y[16*i+9] = sum10; 2242 y[16*i+10] = sum11; 2243 y[16*i+11] = sum12; 2244 y[16*i+12] = sum13; 2245 y[16*i+13] = sum14; 2246 y[16*i+14] = sum15; 2247 y[16*i+15] = sum16; 2248 } 2249 2250 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2251 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2252 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 2258 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2259 { 2260 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2261 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2262 const PetscScalar *x,*v; 2263 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2264 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2265 PetscErrorCode ierr; 2266 const PetscInt m = b->AIJ->rmap->n,*idx; 2267 PetscInt n,i; 2268 2269 PetscFunctionBegin; 2270 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2271 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2272 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2273 2274 for (i=0; i<m; i++) { 2275 idx = a->j + a->i[i]; 2276 v = a->a + a->i[i]; 2277 n = a->i[i+1] - a->i[i]; 2278 alpha1 = x[16*i]; 2279 alpha2 = x[16*i+1]; 2280 alpha3 = x[16*i+2]; 2281 alpha4 = x[16*i+3]; 2282 alpha5 = x[16*i+4]; 2283 alpha6 = x[16*i+5]; 2284 alpha7 = x[16*i+6]; 2285 alpha8 = x[16*i+7]; 2286 alpha9 = x[16*i+8]; 2287 alpha10 = x[16*i+9]; 2288 alpha11 = x[16*i+10]; 2289 alpha12 = x[16*i+11]; 2290 alpha13 = x[16*i+12]; 2291 alpha14 = x[16*i+13]; 2292 alpha15 = x[16*i+14]; 2293 alpha16 = x[16*i+15]; 2294 while (n-->0) { 2295 y[16*(*idx)] += alpha1*(*v); 2296 y[16*(*idx)+1] += alpha2*(*v); 2297 y[16*(*idx)+2] += alpha3*(*v); 2298 y[16*(*idx)+3] += alpha4*(*v); 2299 y[16*(*idx)+4] += alpha5*(*v); 2300 y[16*(*idx)+5] += alpha6*(*v); 2301 y[16*(*idx)+6] += alpha7*(*v); 2302 y[16*(*idx)+7] += alpha8*(*v); 2303 y[16*(*idx)+8] += alpha9*(*v); 2304 y[16*(*idx)+9] += alpha10*(*v); 2305 y[16*(*idx)+10] += alpha11*(*v); 2306 y[16*(*idx)+11] += alpha12*(*v); 2307 y[16*(*idx)+12] += alpha13*(*v); 2308 y[16*(*idx)+13] += alpha14*(*v); 2309 y[16*(*idx)+14] += alpha15*(*v); 2310 y[16*(*idx)+15] += alpha16*(*v); 2311 idx++; v++; 2312 } 2313 } 2314 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2315 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2316 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2317 PetscFunctionReturn(0); 2318 } 2319 2320 #undef __FUNCT__ 2321 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 2322 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2323 { 2324 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2325 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2326 const PetscScalar *x,*v; 2327 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2328 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2329 PetscErrorCode ierr; 2330 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2331 PetscInt n,i,jrow,j; 2332 2333 PetscFunctionBegin; 2334 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2335 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2336 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2337 idx = a->j; 2338 v = a->a; 2339 ii = a->i; 2340 2341 for (i=0; i<m; i++) { 2342 jrow = ii[i]; 2343 n = ii[i+1] - jrow; 2344 sum1 = 0.0; 2345 sum2 = 0.0; 2346 sum3 = 0.0; 2347 sum4 = 0.0; 2348 sum5 = 0.0; 2349 sum6 = 0.0; 2350 sum7 = 0.0; 2351 sum8 = 0.0; 2352 sum9 = 0.0; 2353 sum10 = 0.0; 2354 sum11 = 0.0; 2355 sum12 = 0.0; 2356 sum13 = 0.0; 2357 sum14 = 0.0; 2358 sum15 = 0.0; 2359 sum16 = 0.0; 2360 for (j=0; j<n; j++) { 2361 sum1 += v[jrow]*x[16*idx[jrow]]; 2362 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2363 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2364 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2365 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2366 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2367 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2368 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2369 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2370 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2371 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2372 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2373 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2374 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2375 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2376 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2377 jrow++; 2378 } 2379 y[16*i] += sum1; 2380 y[16*i+1] += sum2; 2381 y[16*i+2] += sum3; 2382 y[16*i+3] += sum4; 2383 y[16*i+4] += sum5; 2384 y[16*i+5] += sum6; 2385 y[16*i+6] += sum7; 2386 y[16*i+7] += sum8; 2387 y[16*i+8] += sum9; 2388 y[16*i+9] += sum10; 2389 y[16*i+10] += sum11; 2390 y[16*i+11] += sum12; 2391 y[16*i+12] += sum13; 2392 y[16*i+13] += sum14; 2393 y[16*i+14] += sum15; 2394 y[16*i+15] += sum16; 2395 } 2396 2397 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2398 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2399 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2400 PetscFunctionReturn(0); 2401 } 2402 2403 #undef __FUNCT__ 2404 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2405 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2406 { 2407 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2408 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2409 const PetscScalar *x,*v; 2410 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2411 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2412 PetscErrorCode ierr; 2413 const PetscInt m = b->AIJ->rmap->n,*idx; 2414 PetscInt n,i; 2415 2416 PetscFunctionBegin; 2417 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2418 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2419 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2420 for (i=0; i<m; i++) { 2421 idx = a->j + a->i[i]; 2422 v = a->a + a->i[i]; 2423 n = a->i[i+1] - a->i[i]; 2424 alpha1 = x[16*i]; 2425 alpha2 = x[16*i+1]; 2426 alpha3 = x[16*i+2]; 2427 alpha4 = x[16*i+3]; 2428 alpha5 = x[16*i+4]; 2429 alpha6 = x[16*i+5]; 2430 alpha7 = x[16*i+6]; 2431 alpha8 = x[16*i+7]; 2432 alpha9 = x[16*i+8]; 2433 alpha10 = x[16*i+9]; 2434 alpha11 = x[16*i+10]; 2435 alpha12 = x[16*i+11]; 2436 alpha13 = x[16*i+12]; 2437 alpha14 = x[16*i+13]; 2438 alpha15 = x[16*i+14]; 2439 alpha16 = x[16*i+15]; 2440 while (n-->0) { 2441 y[16*(*idx)] += alpha1*(*v); 2442 y[16*(*idx)+1] += alpha2*(*v); 2443 y[16*(*idx)+2] += alpha3*(*v); 2444 y[16*(*idx)+3] += alpha4*(*v); 2445 y[16*(*idx)+4] += alpha5*(*v); 2446 y[16*(*idx)+5] += alpha6*(*v); 2447 y[16*(*idx)+6] += alpha7*(*v); 2448 y[16*(*idx)+7] += alpha8*(*v); 2449 y[16*(*idx)+8] += alpha9*(*v); 2450 y[16*(*idx)+9] += alpha10*(*v); 2451 y[16*(*idx)+10] += alpha11*(*v); 2452 y[16*(*idx)+11] += alpha12*(*v); 2453 y[16*(*idx)+12] += alpha13*(*v); 2454 y[16*(*idx)+13] += alpha14*(*v); 2455 y[16*(*idx)+14] += alpha15*(*v); 2456 y[16*(*idx)+15] += alpha16*(*v); 2457 idx++; v++; 2458 } 2459 } 2460 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2461 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2462 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2463 PetscFunctionReturn(0); 2464 } 2465 2466 /*--------------------------------------------------------------------------------------------*/ 2467 #undef __FUNCT__ 2468 #define __FUNCT__ "MatMult_SeqMAIJ_18" 2469 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2470 { 2471 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2472 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2473 const PetscScalar *x,*v; 2474 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2475 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2476 PetscErrorCode ierr; 2477 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2478 PetscInt nonzerorow=0,n,i,jrow,j; 2479 2480 PetscFunctionBegin; 2481 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2482 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2483 idx = a->j; 2484 v = a->a; 2485 ii = a->i; 2486 2487 for (i=0; i<m; i++) { 2488 jrow = ii[i]; 2489 n = ii[i+1] - jrow; 2490 sum1 = 0.0; 2491 sum2 = 0.0; 2492 sum3 = 0.0; 2493 sum4 = 0.0; 2494 sum5 = 0.0; 2495 sum6 = 0.0; 2496 sum7 = 0.0; 2497 sum8 = 0.0; 2498 sum9 = 0.0; 2499 sum10 = 0.0; 2500 sum11 = 0.0; 2501 sum12 = 0.0; 2502 sum13 = 0.0; 2503 sum14 = 0.0; 2504 sum15 = 0.0; 2505 sum16 = 0.0; 2506 sum17 = 0.0; 2507 sum18 = 0.0; 2508 2509 nonzerorow += (n>0); 2510 for (j=0; j<n; j++) { 2511 sum1 += v[jrow]*x[18*idx[jrow]]; 2512 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2513 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2514 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2515 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2516 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2517 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2518 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2519 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2520 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2521 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2522 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2523 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2524 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2525 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2526 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2527 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2528 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2529 jrow++; 2530 } 2531 y[18*i] = sum1; 2532 y[18*i+1] = sum2; 2533 y[18*i+2] = sum3; 2534 y[18*i+3] = sum4; 2535 y[18*i+4] = sum5; 2536 y[18*i+5] = sum6; 2537 y[18*i+6] = sum7; 2538 y[18*i+7] = sum8; 2539 y[18*i+8] = sum9; 2540 y[18*i+9] = sum10; 2541 y[18*i+10] = sum11; 2542 y[18*i+11] = sum12; 2543 y[18*i+12] = sum13; 2544 y[18*i+13] = sum14; 2545 y[18*i+14] = sum15; 2546 y[18*i+15] = sum16; 2547 y[18*i+16] = sum17; 2548 y[18*i+17] = sum18; 2549 } 2550 2551 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2552 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2553 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2559 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2560 { 2561 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2562 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2563 const PetscScalar *x,*v; 2564 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2565 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2566 PetscErrorCode ierr; 2567 const PetscInt m = b->AIJ->rmap->n,*idx; 2568 PetscInt n,i; 2569 2570 PetscFunctionBegin; 2571 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2572 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2573 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2574 2575 for (i=0; i<m; i++) { 2576 idx = a->j + a->i[i]; 2577 v = a->a + a->i[i]; 2578 n = a->i[i+1] - a->i[i]; 2579 alpha1 = x[18*i]; 2580 alpha2 = x[18*i+1]; 2581 alpha3 = x[18*i+2]; 2582 alpha4 = x[18*i+3]; 2583 alpha5 = x[18*i+4]; 2584 alpha6 = x[18*i+5]; 2585 alpha7 = x[18*i+6]; 2586 alpha8 = x[18*i+7]; 2587 alpha9 = x[18*i+8]; 2588 alpha10 = x[18*i+9]; 2589 alpha11 = x[18*i+10]; 2590 alpha12 = x[18*i+11]; 2591 alpha13 = x[18*i+12]; 2592 alpha14 = x[18*i+13]; 2593 alpha15 = x[18*i+14]; 2594 alpha16 = x[18*i+15]; 2595 alpha17 = x[18*i+16]; 2596 alpha18 = x[18*i+17]; 2597 while (n-->0) { 2598 y[18*(*idx)] += alpha1*(*v); 2599 y[18*(*idx)+1] += alpha2*(*v); 2600 y[18*(*idx)+2] += alpha3*(*v); 2601 y[18*(*idx)+3] += alpha4*(*v); 2602 y[18*(*idx)+4] += alpha5*(*v); 2603 y[18*(*idx)+5] += alpha6*(*v); 2604 y[18*(*idx)+6] += alpha7*(*v); 2605 y[18*(*idx)+7] += alpha8*(*v); 2606 y[18*(*idx)+8] += alpha9*(*v); 2607 y[18*(*idx)+9] += alpha10*(*v); 2608 y[18*(*idx)+10] += alpha11*(*v); 2609 y[18*(*idx)+11] += alpha12*(*v); 2610 y[18*(*idx)+12] += alpha13*(*v); 2611 y[18*(*idx)+13] += alpha14*(*v); 2612 y[18*(*idx)+14] += alpha15*(*v); 2613 y[18*(*idx)+15] += alpha16*(*v); 2614 y[18*(*idx)+16] += alpha17*(*v); 2615 y[18*(*idx)+17] += alpha18*(*v); 2616 idx++; v++; 2617 } 2618 } 2619 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2620 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2621 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2622 PetscFunctionReturn(0); 2623 } 2624 2625 #undef __FUNCT__ 2626 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2627 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2628 { 2629 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2630 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2631 const PetscScalar *x,*v; 2632 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2633 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2634 PetscErrorCode ierr; 2635 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2636 PetscInt n,i,jrow,j; 2637 2638 PetscFunctionBegin; 2639 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2640 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2641 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2642 idx = a->j; 2643 v = a->a; 2644 ii = a->i; 2645 2646 for (i=0; i<m; i++) { 2647 jrow = ii[i]; 2648 n = ii[i+1] - jrow; 2649 sum1 = 0.0; 2650 sum2 = 0.0; 2651 sum3 = 0.0; 2652 sum4 = 0.0; 2653 sum5 = 0.0; 2654 sum6 = 0.0; 2655 sum7 = 0.0; 2656 sum8 = 0.0; 2657 sum9 = 0.0; 2658 sum10 = 0.0; 2659 sum11 = 0.0; 2660 sum12 = 0.0; 2661 sum13 = 0.0; 2662 sum14 = 0.0; 2663 sum15 = 0.0; 2664 sum16 = 0.0; 2665 sum17 = 0.0; 2666 sum18 = 0.0; 2667 for (j=0; j<n; j++) { 2668 sum1 += v[jrow]*x[18*idx[jrow]]; 2669 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2670 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2671 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2672 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2673 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2674 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2675 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2676 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2677 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2678 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2679 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2680 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2681 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2682 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2683 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2684 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2685 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2686 jrow++; 2687 } 2688 y[18*i] += sum1; 2689 y[18*i+1] += sum2; 2690 y[18*i+2] += sum3; 2691 y[18*i+3] += sum4; 2692 y[18*i+4] += sum5; 2693 y[18*i+5] += sum6; 2694 y[18*i+6] += sum7; 2695 y[18*i+7] += sum8; 2696 y[18*i+8] += sum9; 2697 y[18*i+9] += sum10; 2698 y[18*i+10] += sum11; 2699 y[18*i+11] += sum12; 2700 y[18*i+12] += sum13; 2701 y[18*i+13] += sum14; 2702 y[18*i+14] += sum15; 2703 y[18*i+15] += sum16; 2704 y[18*i+16] += sum17; 2705 y[18*i+17] += sum18; 2706 } 2707 2708 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2709 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2710 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2711 PetscFunctionReturn(0); 2712 } 2713 2714 #undef __FUNCT__ 2715 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2716 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2717 { 2718 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2719 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2720 const PetscScalar *x,*v; 2721 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2722 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2723 PetscErrorCode ierr; 2724 const PetscInt m = b->AIJ->rmap->n,*idx; 2725 PetscInt n,i; 2726 2727 PetscFunctionBegin; 2728 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2729 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2730 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2731 for (i=0; i<m; i++) { 2732 idx = a->j + a->i[i]; 2733 v = a->a + a->i[i]; 2734 n = a->i[i+1] - a->i[i]; 2735 alpha1 = x[18*i]; 2736 alpha2 = x[18*i+1]; 2737 alpha3 = x[18*i+2]; 2738 alpha4 = x[18*i+3]; 2739 alpha5 = x[18*i+4]; 2740 alpha6 = x[18*i+5]; 2741 alpha7 = x[18*i+6]; 2742 alpha8 = x[18*i+7]; 2743 alpha9 = x[18*i+8]; 2744 alpha10 = x[18*i+9]; 2745 alpha11 = x[18*i+10]; 2746 alpha12 = x[18*i+11]; 2747 alpha13 = x[18*i+12]; 2748 alpha14 = x[18*i+13]; 2749 alpha15 = x[18*i+14]; 2750 alpha16 = x[18*i+15]; 2751 alpha17 = x[18*i+16]; 2752 alpha18 = x[18*i+17]; 2753 while (n-->0) { 2754 y[18*(*idx)] += alpha1*(*v); 2755 y[18*(*idx)+1] += alpha2*(*v); 2756 y[18*(*idx)+2] += alpha3*(*v); 2757 y[18*(*idx)+3] += alpha4*(*v); 2758 y[18*(*idx)+4] += alpha5*(*v); 2759 y[18*(*idx)+5] += alpha6*(*v); 2760 y[18*(*idx)+6] += alpha7*(*v); 2761 y[18*(*idx)+7] += alpha8*(*v); 2762 y[18*(*idx)+8] += alpha9*(*v); 2763 y[18*(*idx)+9] += alpha10*(*v); 2764 y[18*(*idx)+10] += alpha11*(*v); 2765 y[18*(*idx)+11] += alpha12*(*v); 2766 y[18*(*idx)+12] += alpha13*(*v); 2767 y[18*(*idx)+13] += alpha14*(*v); 2768 y[18*(*idx)+14] += alpha15*(*v); 2769 y[18*(*idx)+15] += alpha16*(*v); 2770 y[18*(*idx)+16] += alpha17*(*v); 2771 y[18*(*idx)+17] += alpha18*(*v); 2772 idx++; v++; 2773 } 2774 } 2775 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2776 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2777 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2778 PetscFunctionReturn(0); 2779 } 2780 2781 #undef __FUNCT__ 2782 #define __FUNCT__ "MatMult_SeqMAIJ_N" 2783 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2784 { 2785 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2786 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2787 const PetscScalar *x,*v; 2788 PetscScalar *y,*sums; 2789 PetscErrorCode ierr; 2790 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2791 PetscInt n,i,jrow,j,dof = b->dof,k; 2792 2793 PetscFunctionBegin; 2794 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2795 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2796 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2797 idx = a->j; 2798 v = a->a; 2799 ii = a->i; 2800 2801 for (i=0; i<m; i++) { 2802 jrow = ii[i]; 2803 n = ii[i+1] - jrow; 2804 sums = y + dof*i; 2805 for (j=0; j<n; j++) { 2806 for (k=0; k<dof; k++) { 2807 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2808 } 2809 jrow++; 2810 } 2811 } 2812 2813 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2814 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2815 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2816 PetscFunctionReturn(0); 2817 } 2818 2819 #undef __FUNCT__ 2820 #define __FUNCT__ "MatMultAdd_SeqMAIJ_N" 2821 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2822 { 2823 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2824 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2825 const PetscScalar *x,*v; 2826 PetscScalar *y,*sums; 2827 PetscErrorCode ierr; 2828 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2829 PetscInt n,i,jrow,j,dof = b->dof,k; 2830 2831 PetscFunctionBegin; 2832 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2833 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2834 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2835 idx = a->j; 2836 v = a->a; 2837 ii = a->i; 2838 2839 for (i=0; i<m; i++) { 2840 jrow = ii[i]; 2841 n = ii[i+1] - jrow; 2842 sums = y + dof*i; 2843 for (j=0; j<n; j++) { 2844 for (k=0; k<dof; k++) { 2845 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2846 } 2847 jrow++; 2848 } 2849 } 2850 2851 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2852 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2853 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2854 PetscFunctionReturn(0); 2855 } 2856 2857 #undef __FUNCT__ 2858 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N" 2859 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2860 { 2861 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2862 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2863 const PetscScalar *x,*v,*alpha; 2864 PetscScalar *y; 2865 PetscErrorCode ierr; 2866 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2867 PetscInt n,i,k; 2868 2869 PetscFunctionBegin; 2870 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2871 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2872 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2873 for (i=0; i<m; i++) { 2874 idx = a->j + a->i[i]; 2875 v = a->a + a->i[i]; 2876 n = a->i[i+1] - a->i[i]; 2877 alpha = x + dof*i; 2878 while (n-->0) { 2879 for (k=0; k<dof; k++) { 2880 y[dof*(*idx)+k] += alpha[k]*(*v); 2881 } 2882 idx++; v++; 2883 } 2884 } 2885 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2886 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2887 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2888 PetscFunctionReturn(0); 2889 } 2890 2891 #undef __FUNCT__ 2892 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N" 2893 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2894 { 2895 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2896 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2897 const PetscScalar *x,*v,*alpha; 2898 PetscScalar *y; 2899 PetscErrorCode ierr; 2900 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2901 PetscInt n,i,k; 2902 2903 PetscFunctionBegin; 2904 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2905 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2906 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2907 for (i=0; i<m; i++) { 2908 idx = a->j + a->i[i]; 2909 v = a->a + a->i[i]; 2910 n = a->i[i+1] - a->i[i]; 2911 alpha = x + dof*i; 2912 while (n-->0) { 2913 for (k=0; k<dof; k++) { 2914 y[dof*(*idx)+k] += alpha[k]*(*v); 2915 } 2916 idx++; v++; 2917 } 2918 } 2919 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2920 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2921 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2922 PetscFunctionReturn(0); 2923 } 2924 2925 /*===================================================================================*/ 2926 #undef __FUNCT__ 2927 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2928 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2929 { 2930 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2931 PetscErrorCode ierr; 2932 2933 PetscFunctionBegin; 2934 /* start the scatter */ 2935 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2936 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2937 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2938 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2939 PetscFunctionReturn(0); 2940 } 2941 2942 #undef __FUNCT__ 2943 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2944 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2945 { 2946 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2947 PetscErrorCode ierr; 2948 2949 PetscFunctionBegin; 2950 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2951 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2952 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2953 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2954 PetscFunctionReturn(0); 2955 } 2956 2957 #undef __FUNCT__ 2958 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2959 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2960 { 2961 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2962 PetscErrorCode ierr; 2963 2964 PetscFunctionBegin; 2965 /* start the scatter */ 2966 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2967 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2968 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2969 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2970 PetscFunctionReturn(0); 2971 } 2972 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2975 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2976 { 2977 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2978 PetscErrorCode ierr; 2979 2980 PetscFunctionBegin; 2981 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2982 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2983 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2984 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2985 PetscFunctionReturn(0); 2986 } 2987 2988 /* ----------------------------------------------------------------*/ 2989 #undef __FUNCT__ 2990 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2991 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2992 { 2993 PetscErrorCode ierr; 2994 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2995 Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 2996 Mat P =pp->AIJ; 2997 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2998 PetscInt *pti,*ptj,*ptJ; 2999 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 3000 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 3001 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 3002 MatScalar *ca; 3003 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 3004 3005 PetscFunctionBegin; 3006 /* Get ij structure of P^T */ 3007 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 3008 3009 cn = pn*ppdof; 3010 /* Allocate ci array, arrays for fill computation and */ 3011 /* free space for accumulating nonzero column info */ 3012 ierr = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr); 3013 ci[0] = 0; 3014 3015 /* Work arrays for rows of P^T*A */ 3016 ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr); 3017 ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 3018 ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr); 3019 3020 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 3021 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 3022 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 3023 ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);CHKERRQ(ierr); 3024 current_space = free_space; 3025 3026 /* Determine symbolic info for each row of C: */ 3027 for (i=0; i<pn; i++) { 3028 ptnzi = pti[i+1] - pti[i]; 3029 ptJ = ptj + pti[i]; 3030 for (dof=0; dof<ppdof; dof++) { 3031 ptanzi = 0; 3032 /* Determine symbolic row of PtA: */ 3033 for (j=0; j<ptnzi; j++) { 3034 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 3035 arow = ptJ[j]*ppdof + dof; 3036 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 3037 anzj = ai[arow+1] - ai[arow]; 3038 ajj = aj + ai[arow]; 3039 for (k=0; k<anzj; k++) { 3040 if (!ptadenserow[ajj[k]]) { 3041 ptadenserow[ajj[k]] = -1; 3042 ptasparserow[ptanzi++] = ajj[k]; 3043 } 3044 } 3045 } 3046 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 3047 ptaj = ptasparserow; 3048 cnzi = 0; 3049 for (j=0; j<ptanzi; j++) { 3050 /* Get offset within block of P */ 3051 pshift = *ptaj%ppdof; 3052 /* Get block row of P */ 3053 prow = (*ptaj++)/ppdof; /* integer division */ 3054 /* P has same number of nonzeros per row as the compressed form */ 3055 pnzj = pi[prow+1] - pi[prow]; 3056 pjj = pj + pi[prow]; 3057 for (k=0;k<pnzj;k++) { 3058 /* Locations in C are shifted by the offset within the block */ 3059 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 3060 if (!denserow[pjj[k]*ppdof+pshift]) { 3061 denserow[pjj[k]*ppdof+pshift] = -1; 3062 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 3063 } 3064 } 3065 } 3066 3067 /* sort sparserow */ 3068 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 3069 3070 /* If free space is not available, make more free space */ 3071 /* Double the amount of total space in the list */ 3072 if (current_space->local_remaining<cnzi) { 3073 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3074 } 3075 3076 /* Copy data into free space, and zero out denserows */ 3077 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 3078 3079 current_space->array += cnzi; 3080 current_space->local_used += cnzi; 3081 current_space->local_remaining -= cnzi; 3082 3083 for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 3084 for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 3085 3086 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 3087 /* For now, we will recompute what is needed. */ 3088 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 3089 } 3090 } 3091 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 3092 /* Allocate space for cj, initialize cj, and */ 3093 /* destroy list of free space and other temporary array(s) */ 3094 ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr); 3095 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 3096 ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 3097 3098 /* Allocate space for ca */ 3099 ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr); 3100 3101 /* put together the new matrix */ 3102 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 3103 ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr); 3104 3105 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3106 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3107 c = (Mat_SeqAIJ*)((*C)->data); 3108 c->free_a = PETSC_TRUE; 3109 c->free_ij = PETSC_TRUE; 3110 c->nonew = 0; 3111 3112 (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 3113 3114 /* Clean up. */ 3115 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 3116 PetscFunctionReturn(0); 3117 } 3118 3119 #undef __FUNCT__ 3120 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 3121 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 3122 { 3123 /* This routine requires testing -- first draft only */ 3124 PetscErrorCode ierr; 3125 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 3126 Mat P =pp->AIJ; 3127 Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 3128 Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 3129 Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 3130 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3131 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3132 const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3133 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3134 const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3135 MatScalar *ca=c->a,*caj,*apa; 3136 3137 PetscFunctionBegin; 3138 /* Allocate temporary array for storage of one row of A*P */ 3139 ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr); 3140 3141 /* Clear old values in C */ 3142 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 3143 3144 for (i=0; i<am; i++) { 3145 /* Form sparse row of A*P */ 3146 anzi = ai[i+1] - ai[i]; 3147 apnzj = 0; 3148 for (j=0; j<anzi; j++) { 3149 /* Get offset within block of P */ 3150 pshift = *aj%ppdof; 3151 /* Get block row of P */ 3152 prow = *aj++/ppdof; /* integer division */ 3153 pnzj = pi[prow+1] - pi[prow]; 3154 pjj = pj + pi[prow]; 3155 paj = pa + pi[prow]; 3156 for (k=0; k<pnzj; k++) { 3157 poffset = pjj[k]*ppdof+pshift; 3158 if (!apjdense[poffset]) { 3159 apjdense[poffset] = -1; 3160 apj[apnzj++] = poffset; 3161 } 3162 apa[poffset] += (*aa)*paj[k]; 3163 } 3164 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 3165 aa++; 3166 } 3167 3168 /* Sort the j index array for quick sparse axpy. */ 3169 /* Note: a array does not need sorting as it is in dense storage locations. */ 3170 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 3171 3172 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3173 prow = i/ppdof; /* integer division */ 3174 pshift = i%ppdof; 3175 poffset = pi[prow]; 3176 pnzi = pi[prow+1] - poffset; 3177 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3178 pJ = pj+poffset; 3179 pA = pa+poffset; 3180 for (j=0; j<pnzi; j++) { 3181 crow = (*pJ)*ppdof+pshift; 3182 cjj = cj + ci[crow]; 3183 caj = ca + ci[crow]; 3184 pJ++; 3185 /* Perform sparse axpy operation. Note cjj includes apj. */ 3186 for (k=0,nextap=0; nextap<apnzj; k++) { 3187 if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 3188 } 3189 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 3190 pA++; 3191 } 3192 3193 /* Zero the current row info for A*P */ 3194 for (j=0; j<apnzj; j++) { 3195 apa[apj[j]] = 0.; 3196 apjdense[apj[j]] = 0; 3197 } 3198 } 3199 3200 /* Assemble the final matrix and clean up */ 3201 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3202 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3203 ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 3204 PetscFunctionReturn(0); 3205 } 3206 3207 #undef __FUNCT__ 3208 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqMAIJ" 3209 PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3210 { 3211 PetscErrorCode ierr; 3212 3213 PetscFunctionBegin; 3214 if (scall == MAT_INITIAL_MATRIX) { 3215 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3216 ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr); 3217 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3218 } 3219 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3220 ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr); 3221 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3222 PetscFunctionReturn(0); 3223 } 3224 3225 #undef __FUNCT__ 3226 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 3227 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3228 { 3229 PetscErrorCode ierr; 3230 3231 PetscFunctionBegin; 3232 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 3233 ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 3234 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 3235 PetscFunctionReturn(0); 3236 } 3237 3238 #undef __FUNCT__ 3239 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 3240 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3241 { 3242 PetscFunctionBegin; 3243 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3244 PetscFunctionReturn(0); 3245 } 3246 3247 #undef __FUNCT__ 3248 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIMAIJ" 3249 PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3250 { 3251 PetscErrorCode ierr; 3252 3253 PetscFunctionBegin; 3254 if (scall == MAT_INITIAL_MATRIX) { 3255 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3256 ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr); 3257 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3258 } 3259 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3260 ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr); 3261 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3262 PetscFunctionReturn(0); 3263 } 3264 3265 #undef __FUNCT__ 3266 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 3267 PETSC_EXTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3268 { 3269 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3270 Mat a = b->AIJ,B; 3271 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3272 PetscErrorCode ierr; 3273 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3274 PetscInt *cols; 3275 PetscScalar *vals; 3276 3277 PetscFunctionBegin; 3278 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3279 ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr); 3280 for (i=0; i<m; i++) { 3281 nmax = PetscMax(nmax,aij->ilen[i]); 3282 for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 3283 } 3284 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 3285 ierr = PetscFree(ilen);CHKERRQ(ierr); 3286 ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr); 3287 ii = 0; 3288 for (i=0; i<m; i++) { 3289 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3290 for (j=0; j<dof; j++) { 3291 for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 3292 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3293 ii++; 3294 } 3295 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3296 } 3297 ierr = PetscFree(icols);CHKERRQ(ierr); 3298 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3299 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3300 3301 if (reuse == MAT_REUSE_MATRIX) { 3302 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3303 } else { 3304 *newmat = B; 3305 } 3306 PetscFunctionReturn(0); 3307 } 3308 3309 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3310 3311 #undef __FUNCT__ 3312 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 3313 PETSC_EXTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3314 { 3315 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3316 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3317 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3318 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3319 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3320 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3321 PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 3322 PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 3323 PetscInt rstart,cstart,*garray,ii,k; 3324 PetscErrorCode ierr; 3325 PetscScalar *vals,*ovals; 3326 3327 PetscFunctionBegin; 3328 ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr); 3329 for (i=0; i<A->rmap->n/dof; i++) { 3330 nmax = PetscMax(nmax,AIJ->ilen[i]); 3331 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3332 for (j=0; j<dof; j++) { 3333 dnz[dof*i+j] = AIJ->ilen[i]; 3334 onz[dof*i+j] = OAIJ->ilen[i]; 3335 } 3336 } 3337 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3338 ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr); 3339 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3340 3341 ierr = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr); 3342 rstart = dof*maij->A->rmap->rstart; 3343 cstart = dof*maij->A->cmap->rstart; 3344 garray = mpiaij->garray; 3345 3346 ii = rstart; 3347 for (i=0; i<A->rmap->n/dof; i++) { 3348 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3349 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3350 for (j=0; j<dof; j++) { 3351 for (k=0; k<ncols; k++) { 3352 icols[k] = cstart + dof*cols[k]+j; 3353 } 3354 for (k=0; k<oncols; k++) { 3355 oicols[k] = dof*garray[ocols[k]]+j; 3356 } 3357 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3358 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3359 ii++; 3360 } 3361 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3362 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3363 } 3364 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3365 3366 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3367 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3368 3369 if (reuse == MAT_REUSE_MATRIX) { 3370 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3371 ((PetscObject)A)->refct = 1; 3372 3373 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3374 3375 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3376 } else { 3377 *newmat = B; 3378 } 3379 PetscFunctionReturn(0); 3380 } 3381 3382 #undef __FUNCT__ 3383 #define __FUNCT__ "MatGetSubMatrix_MAIJ" 3384 PetscErrorCode MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 3385 { 3386 PetscErrorCode ierr; 3387 Mat A; 3388 3389 PetscFunctionBegin; 3390 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3391 ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 3392 ierr = MatDestroy(&A);CHKERRQ(ierr); 3393 PetscFunctionReturn(0); 3394 } 3395 3396 /* ---------------------------------------------------------------------------------- */ 3397 #undef __FUNCT__ 3398 #define __FUNCT__ "MatCreateMAIJ" 3399 /*@C 3400 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3401 operations for multicomponent problems. It interpolates each component the same 3402 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3403 and MATMPIAIJ for distributed matrices. 3404 3405 Collective 3406 3407 Input Parameters: 3408 + A - the AIJ matrix describing the action on blocks 3409 - dof - the block size (number of components per node) 3410 3411 Output Parameter: 3412 . maij - the new MAIJ matrix 3413 3414 Operations provided: 3415 + MatMult 3416 . MatMultTranspose 3417 . MatMultAdd 3418 . MatMultTransposeAdd 3419 - MatView 3420 3421 Level: advanced 3422 3423 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3424 @*/ 3425 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3426 { 3427 PetscErrorCode ierr; 3428 PetscMPIInt size; 3429 PetscInt n; 3430 Mat B; 3431 3432 PetscFunctionBegin; 3433 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3434 3435 if (dof == 1) *maij = A; 3436 else { 3437 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3438 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3439 ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3440 ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3441 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3442 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3443 3444 B->assembled = PETSC_TRUE; 3445 3446 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3447 if (size == 1) { 3448 Mat_SeqMAIJ *b; 3449 3450 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3451 3452 B->ops->setup = NULL; 3453 B->ops->destroy = MatDestroy_SeqMAIJ; 3454 B->ops->view = MatView_SeqMAIJ; 3455 b = (Mat_SeqMAIJ*)B->data; 3456 b->dof = dof; 3457 b->AIJ = A; 3458 3459 if (dof == 2) { 3460 B->ops->mult = MatMult_SeqMAIJ_2; 3461 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3462 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3463 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3464 } else if (dof == 3) { 3465 B->ops->mult = MatMult_SeqMAIJ_3; 3466 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3467 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3468 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3469 } else if (dof == 4) { 3470 B->ops->mult = MatMult_SeqMAIJ_4; 3471 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3472 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3473 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3474 } else if (dof == 5) { 3475 B->ops->mult = MatMult_SeqMAIJ_5; 3476 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3477 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3478 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3479 } else if (dof == 6) { 3480 B->ops->mult = MatMult_SeqMAIJ_6; 3481 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3482 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3483 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3484 } else if (dof == 7) { 3485 B->ops->mult = MatMult_SeqMAIJ_7; 3486 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3487 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3488 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3489 } else if (dof == 8) { 3490 B->ops->mult = MatMult_SeqMAIJ_8; 3491 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3492 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3493 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3494 } else if (dof == 9) { 3495 B->ops->mult = MatMult_SeqMAIJ_9; 3496 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3497 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3498 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3499 } else if (dof == 10) { 3500 B->ops->mult = MatMult_SeqMAIJ_10; 3501 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3502 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3503 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3504 } else if (dof == 11) { 3505 B->ops->mult = MatMult_SeqMAIJ_11; 3506 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3507 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3508 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3509 } else if (dof == 16) { 3510 B->ops->mult = MatMult_SeqMAIJ_16; 3511 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3512 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3513 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3514 } else if (dof == 18) { 3515 B->ops->mult = MatMult_SeqMAIJ_18; 3516 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3517 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3518 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3519 } else { 3520 B->ops->mult = MatMult_SeqMAIJ_N; 3521 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3522 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3523 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3524 } 3525 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3526 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3527 } else { 3528 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3529 Mat_MPIMAIJ *b; 3530 IS from,to; 3531 Vec gvec; 3532 3533 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3534 3535 B->ops->setup = NULL; 3536 B->ops->destroy = MatDestroy_MPIMAIJ; 3537 B->ops->view = MatView_MPIMAIJ; 3538 3539 b = (Mat_MPIMAIJ*)B->data; 3540 b->dof = dof; 3541 b->A = A; 3542 3543 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 3544 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 3545 3546 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3547 ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3548 ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 3549 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3550 ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3551 3552 /* create two temporary Index sets for build scatter gather */ 3553 ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3554 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3555 3556 /* create temporary global vector to generate scatter context */ 3557 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 3558 3559 /* generate the scatter context */ 3560 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3561 3562 ierr = ISDestroy(&from);CHKERRQ(ierr); 3563 ierr = ISDestroy(&to);CHKERRQ(ierr); 3564 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3565 3566 B->ops->mult = MatMult_MPIMAIJ_dof; 3567 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3568 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3569 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3570 3571 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3572 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr); 3573 } 3574 B->ops->getsubmatrix = MatGetSubMatrix_MAIJ; 3575 ierr = MatSetUp(B);CHKERRQ(ierr); 3576 *maij = B; 3577 ierr = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr); 3578 } 3579 PetscFunctionReturn(0); 3580 } 3581