1 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20 #include <../src/mat/utils/freespace.h> 21 #include <petsc/private/vecimpl.h> 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatMAIJGetAIJ" 25 /*@ 26 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 27 28 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 29 30 Input Parameter: 31 . A - the MAIJ matrix 32 33 Output Parameter: 34 . B - the AIJ matrix 35 36 Level: advanced 37 38 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 39 40 .seealso: MatCreateMAIJ() 41 @*/ 42 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 43 { 44 PetscErrorCode ierr; 45 PetscBool ismpimaij,isseqmaij; 46 47 PetscFunctionBegin; 48 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 49 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 50 if (ismpimaij) { 51 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 52 53 *B = b->A; 54 } else if (isseqmaij) { 55 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 56 57 *B = b->AIJ; 58 } else { 59 *B = A; 60 } 61 PetscFunctionReturn(0); 62 } 63 64 #undef __FUNCT__ 65 #define __FUNCT__ "MatMAIJRedimension" 66 /*@C 67 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 68 69 Logically Collective 70 71 Input Parameter: 72 + A - the MAIJ matrix 73 - dof - the block size for the new matrix 74 75 Output Parameter: 76 . B - the new MAIJ matrix 77 78 Level: advanced 79 80 .seealso: MatCreateMAIJ() 81 @*/ 82 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 83 { 84 PetscErrorCode ierr; 85 Mat Aij = NULL; 86 87 PetscFunctionBegin; 88 PetscValidLogicalCollectiveInt(A,dof,2); 89 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 90 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatDestroy_SeqMAIJ" 96 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 97 { 98 PetscErrorCode ierr; 99 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 100 101 PetscFunctionBegin; 102 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 103 ierr = PetscFree(A->data);CHKERRQ(ierr); 104 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr); 105 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "MatSetUp_MAIJ" 111 PetscErrorCode MatSetUp_MAIJ(Mat A) 112 { 113 PetscFunctionBegin; 114 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatView_SeqMAIJ" 120 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 121 { 122 PetscErrorCode ierr; 123 Mat B; 124 125 PetscFunctionBegin; 126 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 127 ierr = MatView(B,viewer);CHKERRQ(ierr); 128 ierr = MatDestroy(&B);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatView_MPIMAIJ" 134 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 135 { 136 PetscErrorCode ierr; 137 Mat B; 138 139 PetscFunctionBegin; 140 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 141 ierr = MatView(B,viewer);CHKERRQ(ierr); 142 ierr = MatDestroy(&B);CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 146 #undef __FUNCT__ 147 #define __FUNCT__ "MatDestroy_MPIMAIJ" 148 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 149 { 150 PetscErrorCode ierr; 151 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 152 153 PetscFunctionBegin; 154 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 155 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 156 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 157 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 158 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 159 ierr = PetscFree(A->data);CHKERRQ(ierr); 160 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr); 161 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr); 162 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 163 PetscFunctionReturn(0); 164 } 165 166 /*MC 167 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 168 multicomponent problems, interpolating or restricting each component the same way independently. 169 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 170 171 Operations provided: 172 . MatMult 173 . MatMultTranspose 174 . MatMultAdd 175 . MatMultTransposeAdd 176 177 Level: advanced 178 179 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 180 M*/ 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatCreate_MAIJ" 184 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 185 { 186 PetscErrorCode ierr; 187 Mat_MPIMAIJ *b; 188 PetscMPIInt size; 189 190 PetscFunctionBegin; 191 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 192 A->data = (void*)b; 193 194 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 195 196 A->ops->setup = MatSetUp_MAIJ; 197 198 b->AIJ = 0; 199 b->dof = 0; 200 b->OAIJ = 0; 201 b->ctx = 0; 202 b->w = 0; 203 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 204 if (size == 1) { 205 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 206 } else { 207 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 208 } 209 A->preallocated = PETSC_TRUE; 210 A->assembled = PETSC_TRUE; 211 PetscFunctionReturn(0); 212 } 213 214 /* --------------------------------------------------------------------------------------*/ 215 #undef __FUNCT__ 216 #define __FUNCT__ "MatMult_SeqMAIJ_2" 217 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 218 { 219 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 220 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 221 const PetscScalar *x,*v; 222 PetscScalar *y, sum1, sum2; 223 PetscErrorCode ierr; 224 PetscInt nonzerorow=0,n,i,jrow,j; 225 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 226 227 PetscFunctionBegin; 228 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 229 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 230 idx = a->j; 231 v = a->a; 232 ii = a->i; 233 234 for (i=0; i<m; i++) { 235 jrow = ii[i]; 236 n = ii[i+1] - jrow; 237 sum1 = 0.0; 238 sum2 = 0.0; 239 240 nonzerorow += (n>0); 241 for (j=0; j<n; j++) { 242 sum1 += v[jrow]*x[2*idx[jrow]]; 243 sum2 += v[jrow]*x[2*idx[jrow]+1]; 244 jrow++; 245 } 246 y[2*i] = sum1; 247 y[2*i+1] = sum2; 248 } 249 250 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 251 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 252 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 258 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 259 { 260 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 261 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 262 const PetscScalar *x,*v; 263 PetscScalar *y,alpha1,alpha2; 264 PetscErrorCode ierr; 265 PetscInt n,i; 266 const PetscInt m = b->AIJ->rmap->n,*idx; 267 268 PetscFunctionBegin; 269 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 270 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 271 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 272 273 for (i=0; i<m; i++) { 274 idx = a->j + a->i[i]; 275 v = a->a + a->i[i]; 276 n = a->i[i+1] - a->i[i]; 277 alpha1 = x[2*i]; 278 alpha2 = x[2*i+1]; 279 while (n-->0) { 280 y[2*(*idx)] += alpha1*(*v); 281 y[2*(*idx)+1] += alpha2*(*v); 282 idx++; v++; 283 } 284 } 285 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 286 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 287 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 293 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 294 { 295 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 296 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 297 const PetscScalar *x,*v; 298 PetscScalar *y,sum1, sum2; 299 PetscErrorCode ierr; 300 PetscInt n,i,jrow,j; 301 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 302 303 PetscFunctionBegin; 304 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 305 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 306 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 307 idx = a->j; 308 v = a->a; 309 ii = a->i; 310 311 for (i=0; i<m; i++) { 312 jrow = ii[i]; 313 n = ii[i+1] - jrow; 314 sum1 = 0.0; 315 sum2 = 0.0; 316 for (j=0; j<n; j++) { 317 sum1 += v[jrow]*x[2*idx[jrow]]; 318 sum2 += v[jrow]*x[2*idx[jrow]+1]; 319 jrow++; 320 } 321 y[2*i] += sum1; 322 y[2*i+1] += sum2; 323 } 324 325 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 326 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 327 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 328 PetscFunctionReturn(0); 329 } 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 332 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 333 { 334 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 335 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 336 const PetscScalar *x,*v; 337 PetscScalar *y,alpha1,alpha2; 338 PetscErrorCode ierr; 339 PetscInt n,i; 340 const PetscInt m = b->AIJ->rmap->n,*idx; 341 342 PetscFunctionBegin; 343 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 344 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 345 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 346 347 for (i=0; i<m; i++) { 348 idx = a->j + a->i[i]; 349 v = a->a + a->i[i]; 350 n = a->i[i+1] - a->i[i]; 351 alpha1 = x[2*i]; 352 alpha2 = x[2*i+1]; 353 while (n-->0) { 354 y[2*(*idx)] += alpha1*(*v); 355 y[2*(*idx)+1] += alpha2*(*v); 356 idx++; v++; 357 } 358 } 359 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 360 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 361 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 362 PetscFunctionReturn(0); 363 } 364 /* --------------------------------------------------------------------------------------*/ 365 #undef __FUNCT__ 366 #define __FUNCT__ "MatMult_SeqMAIJ_3" 367 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 368 { 369 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 370 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 371 const PetscScalar *x,*v; 372 PetscScalar *y,sum1, sum2, sum3; 373 PetscErrorCode ierr; 374 PetscInt nonzerorow=0,n,i,jrow,j; 375 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 376 377 PetscFunctionBegin; 378 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 379 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 380 idx = a->j; 381 v = a->a; 382 ii = a->i; 383 384 for (i=0; i<m; i++) { 385 jrow = ii[i]; 386 n = ii[i+1] - jrow; 387 sum1 = 0.0; 388 sum2 = 0.0; 389 sum3 = 0.0; 390 391 nonzerorow += (n>0); 392 for (j=0; j<n; j++) { 393 sum1 += v[jrow]*x[3*idx[jrow]]; 394 sum2 += v[jrow]*x[3*idx[jrow]+1]; 395 sum3 += v[jrow]*x[3*idx[jrow]+2]; 396 jrow++; 397 } 398 y[3*i] = sum1; 399 y[3*i+1] = sum2; 400 y[3*i+2] = sum3; 401 } 402 403 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 404 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 405 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 411 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 412 { 413 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 414 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 415 const PetscScalar *x,*v; 416 PetscScalar *y,alpha1,alpha2,alpha3; 417 PetscErrorCode ierr; 418 PetscInt n,i; 419 const PetscInt m = b->AIJ->rmap->n,*idx; 420 421 PetscFunctionBegin; 422 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 423 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 424 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 425 426 for (i=0; i<m; i++) { 427 idx = a->j + a->i[i]; 428 v = a->a + a->i[i]; 429 n = a->i[i+1] - a->i[i]; 430 alpha1 = x[3*i]; 431 alpha2 = x[3*i+1]; 432 alpha3 = x[3*i+2]; 433 while (n-->0) { 434 y[3*(*idx)] += alpha1*(*v); 435 y[3*(*idx)+1] += alpha2*(*v); 436 y[3*(*idx)+2] += alpha3*(*v); 437 idx++; v++; 438 } 439 } 440 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 441 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 442 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 443 PetscFunctionReturn(0); 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 448 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 449 { 450 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 452 const PetscScalar *x,*v; 453 PetscScalar *y,sum1, sum2, sum3; 454 PetscErrorCode ierr; 455 PetscInt n,i,jrow,j; 456 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 457 458 PetscFunctionBegin; 459 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 460 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 461 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 462 idx = a->j; 463 v = a->a; 464 ii = a->i; 465 466 for (i=0; i<m; i++) { 467 jrow = ii[i]; 468 n = ii[i+1] - jrow; 469 sum1 = 0.0; 470 sum2 = 0.0; 471 sum3 = 0.0; 472 for (j=0; j<n; j++) { 473 sum1 += v[jrow]*x[3*idx[jrow]]; 474 sum2 += v[jrow]*x[3*idx[jrow]+1]; 475 sum3 += v[jrow]*x[3*idx[jrow]+2]; 476 jrow++; 477 } 478 y[3*i] += sum1; 479 y[3*i+1] += sum2; 480 y[3*i+2] += sum3; 481 } 482 483 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 484 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 485 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 486 PetscFunctionReturn(0); 487 } 488 #undef __FUNCT__ 489 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 490 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 491 { 492 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 493 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 494 const PetscScalar *x,*v; 495 PetscScalar *y,alpha1,alpha2,alpha3; 496 PetscErrorCode ierr; 497 PetscInt n,i; 498 const PetscInt m = b->AIJ->rmap->n,*idx; 499 500 PetscFunctionBegin; 501 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 502 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 503 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 504 for (i=0; i<m; i++) { 505 idx = a->j + a->i[i]; 506 v = a->a + a->i[i]; 507 n = a->i[i+1] - a->i[i]; 508 alpha1 = x[3*i]; 509 alpha2 = x[3*i+1]; 510 alpha3 = x[3*i+2]; 511 while (n-->0) { 512 y[3*(*idx)] += alpha1*(*v); 513 y[3*(*idx)+1] += alpha2*(*v); 514 y[3*(*idx)+2] += alpha3*(*v); 515 idx++; v++; 516 } 517 } 518 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 519 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 520 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 521 PetscFunctionReturn(0); 522 } 523 524 /* ------------------------------------------------------------------------------*/ 525 #undef __FUNCT__ 526 #define __FUNCT__ "MatMult_SeqMAIJ_4" 527 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 528 { 529 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 530 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 531 const PetscScalar *x,*v; 532 PetscScalar *y,sum1, sum2, sum3, sum4; 533 PetscErrorCode ierr; 534 PetscInt nonzerorow=0,n,i,jrow,j; 535 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 536 537 PetscFunctionBegin; 538 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 539 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 540 idx = a->j; 541 v = a->a; 542 ii = a->i; 543 544 for (i=0; i<m; i++) { 545 jrow = ii[i]; 546 n = ii[i+1] - jrow; 547 sum1 = 0.0; 548 sum2 = 0.0; 549 sum3 = 0.0; 550 sum4 = 0.0; 551 nonzerorow += (n>0); 552 for (j=0; j<n; j++) { 553 sum1 += v[jrow]*x[4*idx[jrow]]; 554 sum2 += v[jrow]*x[4*idx[jrow]+1]; 555 sum3 += v[jrow]*x[4*idx[jrow]+2]; 556 sum4 += v[jrow]*x[4*idx[jrow]+3]; 557 jrow++; 558 } 559 y[4*i] = sum1; 560 y[4*i+1] = sum2; 561 y[4*i+2] = sum3; 562 y[4*i+3] = sum4; 563 } 564 565 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 566 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 567 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 573 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 574 { 575 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 576 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 577 const PetscScalar *x,*v; 578 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 579 PetscErrorCode ierr; 580 PetscInt n,i; 581 const PetscInt m = b->AIJ->rmap->n,*idx; 582 583 PetscFunctionBegin; 584 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 585 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 586 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 587 for (i=0; i<m; i++) { 588 idx = a->j + a->i[i]; 589 v = a->a + a->i[i]; 590 n = a->i[i+1] - a->i[i]; 591 alpha1 = x[4*i]; 592 alpha2 = x[4*i+1]; 593 alpha3 = x[4*i+2]; 594 alpha4 = x[4*i+3]; 595 while (n-->0) { 596 y[4*(*idx)] += alpha1*(*v); 597 y[4*(*idx)+1] += alpha2*(*v); 598 y[4*(*idx)+2] += alpha3*(*v); 599 y[4*(*idx)+3] += alpha4*(*v); 600 idx++; v++; 601 } 602 } 603 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 604 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 605 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 606 PetscFunctionReturn(0); 607 } 608 609 #undef __FUNCT__ 610 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 611 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 612 { 613 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 614 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 615 const PetscScalar *x,*v; 616 PetscScalar *y,sum1, sum2, sum3, sum4; 617 PetscErrorCode ierr; 618 PetscInt n,i,jrow,j; 619 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 620 621 PetscFunctionBegin; 622 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 623 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 624 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 625 idx = a->j; 626 v = a->a; 627 ii = a->i; 628 629 for (i=0; i<m; i++) { 630 jrow = ii[i]; 631 n = ii[i+1] - jrow; 632 sum1 = 0.0; 633 sum2 = 0.0; 634 sum3 = 0.0; 635 sum4 = 0.0; 636 for (j=0; j<n; j++) { 637 sum1 += v[jrow]*x[4*idx[jrow]]; 638 sum2 += v[jrow]*x[4*idx[jrow]+1]; 639 sum3 += v[jrow]*x[4*idx[jrow]+2]; 640 sum4 += v[jrow]*x[4*idx[jrow]+3]; 641 jrow++; 642 } 643 y[4*i] += sum1; 644 y[4*i+1] += sum2; 645 y[4*i+2] += sum3; 646 y[4*i+3] += sum4; 647 } 648 649 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 650 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 651 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 652 PetscFunctionReturn(0); 653 } 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 656 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 657 { 658 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 659 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 660 const PetscScalar *x,*v; 661 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 662 PetscErrorCode ierr; 663 PetscInt n,i; 664 const PetscInt m = b->AIJ->rmap->n,*idx; 665 666 PetscFunctionBegin; 667 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 668 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 669 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 670 671 for (i=0; i<m; i++) { 672 idx = a->j + a->i[i]; 673 v = a->a + a->i[i]; 674 n = a->i[i+1] - a->i[i]; 675 alpha1 = x[4*i]; 676 alpha2 = x[4*i+1]; 677 alpha3 = x[4*i+2]; 678 alpha4 = x[4*i+3]; 679 while (n-->0) { 680 y[4*(*idx)] += alpha1*(*v); 681 y[4*(*idx)+1] += alpha2*(*v); 682 y[4*(*idx)+2] += alpha3*(*v); 683 y[4*(*idx)+3] += alpha4*(*v); 684 idx++; v++; 685 } 686 } 687 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 688 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 689 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 690 PetscFunctionReturn(0); 691 } 692 /* ------------------------------------------------------------------------------*/ 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatMult_SeqMAIJ_5" 696 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 697 { 698 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 699 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 700 const PetscScalar *x,*v; 701 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 702 PetscErrorCode ierr; 703 PetscInt nonzerorow=0,n,i,jrow,j; 704 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 705 706 PetscFunctionBegin; 707 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 708 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 709 idx = a->j; 710 v = a->a; 711 ii = a->i; 712 713 for (i=0; i<m; i++) { 714 jrow = ii[i]; 715 n = ii[i+1] - jrow; 716 sum1 = 0.0; 717 sum2 = 0.0; 718 sum3 = 0.0; 719 sum4 = 0.0; 720 sum5 = 0.0; 721 722 nonzerorow += (n>0); 723 for (j=0; j<n; j++) { 724 sum1 += v[jrow]*x[5*idx[jrow]]; 725 sum2 += v[jrow]*x[5*idx[jrow]+1]; 726 sum3 += v[jrow]*x[5*idx[jrow]+2]; 727 sum4 += v[jrow]*x[5*idx[jrow]+3]; 728 sum5 += v[jrow]*x[5*idx[jrow]+4]; 729 jrow++; 730 } 731 y[5*i] = sum1; 732 y[5*i+1] = sum2; 733 y[5*i+2] = sum3; 734 y[5*i+3] = sum4; 735 y[5*i+4] = sum5; 736 } 737 738 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 739 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 740 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 746 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 747 { 748 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 749 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 750 const PetscScalar *x,*v; 751 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 752 PetscErrorCode ierr; 753 PetscInt n,i; 754 const PetscInt m = b->AIJ->rmap->n,*idx; 755 756 PetscFunctionBegin; 757 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 758 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 759 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 760 761 for (i=0; i<m; i++) { 762 idx = a->j + a->i[i]; 763 v = a->a + a->i[i]; 764 n = a->i[i+1] - a->i[i]; 765 alpha1 = x[5*i]; 766 alpha2 = x[5*i+1]; 767 alpha3 = x[5*i+2]; 768 alpha4 = x[5*i+3]; 769 alpha5 = x[5*i+4]; 770 while (n-->0) { 771 y[5*(*idx)] += alpha1*(*v); 772 y[5*(*idx)+1] += alpha2*(*v); 773 y[5*(*idx)+2] += alpha3*(*v); 774 y[5*(*idx)+3] += alpha4*(*v); 775 y[5*(*idx)+4] += alpha5*(*v); 776 idx++; v++; 777 } 778 } 779 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 780 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 781 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 782 PetscFunctionReturn(0); 783 } 784 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 787 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 788 { 789 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 790 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 791 const PetscScalar *x,*v; 792 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 793 PetscErrorCode ierr; 794 PetscInt n,i,jrow,j; 795 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 796 797 PetscFunctionBegin; 798 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 799 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 800 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 801 idx = a->j; 802 v = a->a; 803 ii = a->i; 804 805 for (i=0; i<m; i++) { 806 jrow = ii[i]; 807 n = ii[i+1] - jrow; 808 sum1 = 0.0; 809 sum2 = 0.0; 810 sum3 = 0.0; 811 sum4 = 0.0; 812 sum5 = 0.0; 813 for (j=0; j<n; j++) { 814 sum1 += v[jrow]*x[5*idx[jrow]]; 815 sum2 += v[jrow]*x[5*idx[jrow]+1]; 816 sum3 += v[jrow]*x[5*idx[jrow]+2]; 817 sum4 += v[jrow]*x[5*idx[jrow]+3]; 818 sum5 += v[jrow]*x[5*idx[jrow]+4]; 819 jrow++; 820 } 821 y[5*i] += sum1; 822 y[5*i+1] += sum2; 823 y[5*i+2] += sum3; 824 y[5*i+3] += sum4; 825 y[5*i+4] += sum5; 826 } 827 828 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 829 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 830 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 831 PetscFunctionReturn(0); 832 } 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 836 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 837 { 838 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 839 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 840 const PetscScalar *x,*v; 841 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 842 PetscErrorCode ierr; 843 PetscInt n,i; 844 const PetscInt m = b->AIJ->rmap->n,*idx; 845 846 PetscFunctionBegin; 847 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 848 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 849 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 850 851 for (i=0; i<m; i++) { 852 idx = a->j + a->i[i]; 853 v = a->a + a->i[i]; 854 n = a->i[i+1] - a->i[i]; 855 alpha1 = x[5*i]; 856 alpha2 = x[5*i+1]; 857 alpha3 = x[5*i+2]; 858 alpha4 = x[5*i+3]; 859 alpha5 = x[5*i+4]; 860 while (n-->0) { 861 y[5*(*idx)] += alpha1*(*v); 862 y[5*(*idx)+1] += alpha2*(*v); 863 y[5*(*idx)+2] += alpha3*(*v); 864 y[5*(*idx)+3] += alpha4*(*v); 865 y[5*(*idx)+4] += alpha5*(*v); 866 idx++; v++; 867 } 868 } 869 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 870 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 871 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 872 PetscFunctionReturn(0); 873 } 874 875 /* ------------------------------------------------------------------------------*/ 876 #undef __FUNCT__ 877 #define __FUNCT__ "MatMult_SeqMAIJ_6" 878 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 879 { 880 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 881 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 882 const PetscScalar *x,*v; 883 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 884 PetscErrorCode ierr; 885 PetscInt nonzerorow=0,n,i,jrow,j; 886 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 887 888 PetscFunctionBegin; 889 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 890 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 891 idx = a->j; 892 v = a->a; 893 ii = a->i; 894 895 for (i=0; i<m; i++) { 896 jrow = ii[i]; 897 n = ii[i+1] - jrow; 898 sum1 = 0.0; 899 sum2 = 0.0; 900 sum3 = 0.0; 901 sum4 = 0.0; 902 sum5 = 0.0; 903 sum6 = 0.0; 904 905 nonzerorow += (n>0); 906 for (j=0; j<n; j++) { 907 sum1 += v[jrow]*x[6*idx[jrow]]; 908 sum2 += v[jrow]*x[6*idx[jrow]+1]; 909 sum3 += v[jrow]*x[6*idx[jrow]+2]; 910 sum4 += v[jrow]*x[6*idx[jrow]+3]; 911 sum5 += v[jrow]*x[6*idx[jrow]+4]; 912 sum6 += v[jrow]*x[6*idx[jrow]+5]; 913 jrow++; 914 } 915 y[6*i] = sum1; 916 y[6*i+1] = sum2; 917 y[6*i+2] = sum3; 918 y[6*i+3] = sum4; 919 y[6*i+4] = sum5; 920 y[6*i+5] = sum6; 921 } 922 923 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 924 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 925 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 926 PetscFunctionReturn(0); 927 } 928 929 #undef __FUNCT__ 930 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 931 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 932 { 933 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 934 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 935 const PetscScalar *x,*v; 936 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 937 PetscErrorCode ierr; 938 PetscInt n,i; 939 const PetscInt m = b->AIJ->rmap->n,*idx; 940 941 PetscFunctionBegin; 942 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 943 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 944 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 945 946 for (i=0; i<m; i++) { 947 idx = a->j + a->i[i]; 948 v = a->a + a->i[i]; 949 n = a->i[i+1] - a->i[i]; 950 alpha1 = x[6*i]; 951 alpha2 = x[6*i+1]; 952 alpha3 = x[6*i+2]; 953 alpha4 = x[6*i+3]; 954 alpha5 = x[6*i+4]; 955 alpha6 = x[6*i+5]; 956 while (n-->0) { 957 y[6*(*idx)] += alpha1*(*v); 958 y[6*(*idx)+1] += alpha2*(*v); 959 y[6*(*idx)+2] += alpha3*(*v); 960 y[6*(*idx)+3] += alpha4*(*v); 961 y[6*(*idx)+4] += alpha5*(*v); 962 y[6*(*idx)+5] += alpha6*(*v); 963 idx++; v++; 964 } 965 } 966 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 967 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 968 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 974 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 975 { 976 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 977 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 978 const PetscScalar *x,*v; 979 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 980 PetscErrorCode ierr; 981 PetscInt n,i,jrow,j; 982 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 983 984 PetscFunctionBegin; 985 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 986 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 987 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 988 idx = a->j; 989 v = a->a; 990 ii = a->i; 991 992 for (i=0; i<m; i++) { 993 jrow = ii[i]; 994 n = ii[i+1] - jrow; 995 sum1 = 0.0; 996 sum2 = 0.0; 997 sum3 = 0.0; 998 sum4 = 0.0; 999 sum5 = 0.0; 1000 sum6 = 0.0; 1001 for (j=0; j<n; j++) { 1002 sum1 += v[jrow]*x[6*idx[jrow]]; 1003 sum2 += v[jrow]*x[6*idx[jrow]+1]; 1004 sum3 += v[jrow]*x[6*idx[jrow]+2]; 1005 sum4 += v[jrow]*x[6*idx[jrow]+3]; 1006 sum5 += v[jrow]*x[6*idx[jrow]+4]; 1007 sum6 += v[jrow]*x[6*idx[jrow]+5]; 1008 jrow++; 1009 } 1010 y[6*i] += sum1; 1011 y[6*i+1] += sum2; 1012 y[6*i+2] += sum3; 1013 y[6*i+3] += sum4; 1014 y[6*i+4] += sum5; 1015 y[6*i+5] += sum6; 1016 } 1017 1018 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1019 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1020 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 1026 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 1027 { 1028 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1029 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1030 const PetscScalar *x,*v; 1031 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 1032 PetscErrorCode ierr; 1033 PetscInt n,i; 1034 const PetscInt m = b->AIJ->rmap->n,*idx; 1035 1036 PetscFunctionBegin; 1037 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1038 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1039 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1040 1041 for (i=0; i<m; i++) { 1042 idx = a->j + a->i[i]; 1043 v = a->a + a->i[i]; 1044 n = a->i[i+1] - a->i[i]; 1045 alpha1 = x[6*i]; 1046 alpha2 = x[6*i+1]; 1047 alpha3 = x[6*i+2]; 1048 alpha4 = x[6*i+3]; 1049 alpha5 = x[6*i+4]; 1050 alpha6 = x[6*i+5]; 1051 while (n-->0) { 1052 y[6*(*idx)] += alpha1*(*v); 1053 y[6*(*idx)+1] += alpha2*(*v); 1054 y[6*(*idx)+2] += alpha3*(*v); 1055 y[6*(*idx)+3] += alpha4*(*v); 1056 y[6*(*idx)+4] += alpha5*(*v); 1057 y[6*(*idx)+5] += alpha6*(*v); 1058 idx++; v++; 1059 } 1060 } 1061 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1062 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1063 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 /* ------------------------------------------------------------------------------*/ 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatMult_SeqMAIJ_7" 1070 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1071 { 1072 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1073 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1074 const PetscScalar *x,*v; 1075 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1076 PetscErrorCode ierr; 1077 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1078 PetscInt nonzerorow=0,n,i,jrow,j; 1079 1080 PetscFunctionBegin; 1081 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1082 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1083 idx = a->j; 1084 v = a->a; 1085 ii = a->i; 1086 1087 for (i=0; i<m; i++) { 1088 jrow = ii[i]; 1089 n = ii[i+1] - jrow; 1090 sum1 = 0.0; 1091 sum2 = 0.0; 1092 sum3 = 0.0; 1093 sum4 = 0.0; 1094 sum5 = 0.0; 1095 sum6 = 0.0; 1096 sum7 = 0.0; 1097 1098 nonzerorow += (n>0); 1099 for (j=0; j<n; j++) { 1100 sum1 += v[jrow]*x[7*idx[jrow]]; 1101 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1102 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1103 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1104 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1105 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1106 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1107 jrow++; 1108 } 1109 y[7*i] = sum1; 1110 y[7*i+1] = sum2; 1111 y[7*i+2] = sum3; 1112 y[7*i+3] = sum4; 1113 y[7*i+4] = sum5; 1114 y[7*i+5] = sum6; 1115 y[7*i+6] = sum7; 1116 } 1117 1118 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1119 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1120 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1126 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1127 { 1128 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1129 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1130 const PetscScalar *x,*v; 1131 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1132 PetscErrorCode ierr; 1133 const PetscInt m = b->AIJ->rmap->n,*idx; 1134 PetscInt n,i; 1135 1136 PetscFunctionBegin; 1137 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1138 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1139 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1140 1141 for (i=0; i<m; i++) { 1142 idx = a->j + a->i[i]; 1143 v = a->a + a->i[i]; 1144 n = a->i[i+1] - a->i[i]; 1145 alpha1 = x[7*i]; 1146 alpha2 = x[7*i+1]; 1147 alpha3 = x[7*i+2]; 1148 alpha4 = x[7*i+3]; 1149 alpha5 = x[7*i+4]; 1150 alpha6 = x[7*i+5]; 1151 alpha7 = x[7*i+6]; 1152 while (n-->0) { 1153 y[7*(*idx)] += alpha1*(*v); 1154 y[7*(*idx)+1] += alpha2*(*v); 1155 y[7*(*idx)+2] += alpha3*(*v); 1156 y[7*(*idx)+3] += alpha4*(*v); 1157 y[7*(*idx)+4] += alpha5*(*v); 1158 y[7*(*idx)+5] += alpha6*(*v); 1159 y[7*(*idx)+6] += alpha7*(*v); 1160 idx++; v++; 1161 } 1162 } 1163 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1164 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1165 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1171 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1172 { 1173 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1174 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1175 const PetscScalar *x,*v; 1176 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1177 PetscErrorCode ierr; 1178 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1179 PetscInt n,i,jrow,j; 1180 1181 PetscFunctionBegin; 1182 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1183 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1184 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1185 idx = a->j; 1186 v = a->a; 1187 ii = a->i; 1188 1189 for (i=0; i<m; i++) { 1190 jrow = ii[i]; 1191 n = ii[i+1] - jrow; 1192 sum1 = 0.0; 1193 sum2 = 0.0; 1194 sum3 = 0.0; 1195 sum4 = 0.0; 1196 sum5 = 0.0; 1197 sum6 = 0.0; 1198 sum7 = 0.0; 1199 for (j=0; j<n; j++) { 1200 sum1 += v[jrow]*x[7*idx[jrow]]; 1201 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1202 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1203 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1204 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1205 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1206 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1207 jrow++; 1208 } 1209 y[7*i] += sum1; 1210 y[7*i+1] += sum2; 1211 y[7*i+2] += sum3; 1212 y[7*i+3] += sum4; 1213 y[7*i+4] += sum5; 1214 y[7*i+5] += sum6; 1215 y[7*i+6] += sum7; 1216 } 1217 1218 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1219 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1220 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1221 PetscFunctionReturn(0); 1222 } 1223 1224 #undef __FUNCT__ 1225 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1226 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1227 { 1228 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1229 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1230 const PetscScalar *x,*v; 1231 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1232 PetscErrorCode ierr; 1233 const PetscInt m = b->AIJ->rmap->n,*idx; 1234 PetscInt n,i; 1235 1236 PetscFunctionBegin; 1237 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1238 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1239 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1240 for (i=0; i<m; i++) { 1241 idx = a->j + a->i[i]; 1242 v = a->a + a->i[i]; 1243 n = a->i[i+1] - a->i[i]; 1244 alpha1 = x[7*i]; 1245 alpha2 = x[7*i+1]; 1246 alpha3 = x[7*i+2]; 1247 alpha4 = x[7*i+3]; 1248 alpha5 = x[7*i+4]; 1249 alpha6 = x[7*i+5]; 1250 alpha7 = x[7*i+6]; 1251 while (n-->0) { 1252 y[7*(*idx)] += alpha1*(*v); 1253 y[7*(*idx)+1] += alpha2*(*v); 1254 y[7*(*idx)+2] += alpha3*(*v); 1255 y[7*(*idx)+3] += alpha4*(*v); 1256 y[7*(*idx)+4] += alpha5*(*v); 1257 y[7*(*idx)+5] += alpha6*(*v); 1258 y[7*(*idx)+6] += alpha7*(*v); 1259 idx++; v++; 1260 } 1261 } 1262 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1263 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1264 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1270 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1271 { 1272 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1273 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1274 const PetscScalar *x,*v; 1275 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1276 PetscErrorCode ierr; 1277 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1278 PetscInt nonzerorow=0,n,i,jrow,j; 1279 1280 PetscFunctionBegin; 1281 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1282 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1283 idx = a->j; 1284 v = a->a; 1285 ii = a->i; 1286 1287 for (i=0; i<m; i++) { 1288 jrow = ii[i]; 1289 n = ii[i+1] - jrow; 1290 sum1 = 0.0; 1291 sum2 = 0.0; 1292 sum3 = 0.0; 1293 sum4 = 0.0; 1294 sum5 = 0.0; 1295 sum6 = 0.0; 1296 sum7 = 0.0; 1297 sum8 = 0.0; 1298 1299 nonzerorow += (n>0); 1300 for (j=0; j<n; j++) { 1301 sum1 += v[jrow]*x[8*idx[jrow]]; 1302 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1303 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1304 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1305 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1306 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1307 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1308 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1309 jrow++; 1310 } 1311 y[8*i] = sum1; 1312 y[8*i+1] = sum2; 1313 y[8*i+2] = sum3; 1314 y[8*i+3] = sum4; 1315 y[8*i+4] = sum5; 1316 y[8*i+5] = sum6; 1317 y[8*i+6] = sum7; 1318 y[8*i+7] = sum8; 1319 } 1320 1321 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1322 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1323 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1329 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1330 { 1331 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1332 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1333 const PetscScalar *x,*v; 1334 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1335 PetscErrorCode ierr; 1336 const PetscInt m = b->AIJ->rmap->n,*idx; 1337 PetscInt n,i; 1338 1339 PetscFunctionBegin; 1340 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1341 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1342 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1343 1344 for (i=0; i<m; i++) { 1345 idx = a->j + a->i[i]; 1346 v = a->a + a->i[i]; 1347 n = a->i[i+1] - a->i[i]; 1348 alpha1 = x[8*i]; 1349 alpha2 = x[8*i+1]; 1350 alpha3 = x[8*i+2]; 1351 alpha4 = x[8*i+3]; 1352 alpha5 = x[8*i+4]; 1353 alpha6 = x[8*i+5]; 1354 alpha7 = x[8*i+6]; 1355 alpha8 = x[8*i+7]; 1356 while (n-->0) { 1357 y[8*(*idx)] += alpha1*(*v); 1358 y[8*(*idx)+1] += alpha2*(*v); 1359 y[8*(*idx)+2] += alpha3*(*v); 1360 y[8*(*idx)+3] += alpha4*(*v); 1361 y[8*(*idx)+4] += alpha5*(*v); 1362 y[8*(*idx)+5] += alpha6*(*v); 1363 y[8*(*idx)+6] += alpha7*(*v); 1364 y[8*(*idx)+7] += alpha8*(*v); 1365 idx++; v++; 1366 } 1367 } 1368 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1369 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1370 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1376 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1377 { 1378 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1379 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1380 const PetscScalar *x,*v; 1381 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1382 PetscErrorCode ierr; 1383 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1384 PetscInt n,i,jrow,j; 1385 1386 PetscFunctionBegin; 1387 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1388 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1389 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1390 idx = a->j; 1391 v = a->a; 1392 ii = a->i; 1393 1394 for (i=0; i<m; i++) { 1395 jrow = ii[i]; 1396 n = ii[i+1] - jrow; 1397 sum1 = 0.0; 1398 sum2 = 0.0; 1399 sum3 = 0.0; 1400 sum4 = 0.0; 1401 sum5 = 0.0; 1402 sum6 = 0.0; 1403 sum7 = 0.0; 1404 sum8 = 0.0; 1405 for (j=0; j<n; j++) { 1406 sum1 += v[jrow]*x[8*idx[jrow]]; 1407 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1408 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1409 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1410 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1411 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1412 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1413 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1414 jrow++; 1415 } 1416 y[8*i] += sum1; 1417 y[8*i+1] += sum2; 1418 y[8*i+2] += sum3; 1419 y[8*i+3] += sum4; 1420 y[8*i+4] += sum5; 1421 y[8*i+5] += sum6; 1422 y[8*i+6] += sum7; 1423 y[8*i+7] += sum8; 1424 } 1425 1426 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1427 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1428 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1434 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1435 { 1436 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1437 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1438 const PetscScalar *x,*v; 1439 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1440 PetscErrorCode ierr; 1441 const PetscInt m = b->AIJ->rmap->n,*idx; 1442 PetscInt n,i; 1443 1444 PetscFunctionBegin; 1445 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1446 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1447 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1448 for (i=0; i<m; i++) { 1449 idx = a->j + a->i[i]; 1450 v = a->a + a->i[i]; 1451 n = a->i[i+1] - a->i[i]; 1452 alpha1 = x[8*i]; 1453 alpha2 = x[8*i+1]; 1454 alpha3 = x[8*i+2]; 1455 alpha4 = x[8*i+3]; 1456 alpha5 = x[8*i+4]; 1457 alpha6 = x[8*i+5]; 1458 alpha7 = x[8*i+6]; 1459 alpha8 = x[8*i+7]; 1460 while (n-->0) { 1461 y[8*(*idx)] += alpha1*(*v); 1462 y[8*(*idx)+1] += alpha2*(*v); 1463 y[8*(*idx)+2] += alpha3*(*v); 1464 y[8*(*idx)+3] += alpha4*(*v); 1465 y[8*(*idx)+4] += alpha5*(*v); 1466 y[8*(*idx)+5] += alpha6*(*v); 1467 y[8*(*idx)+6] += alpha7*(*v); 1468 y[8*(*idx)+7] += alpha8*(*v); 1469 idx++; v++; 1470 } 1471 } 1472 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1473 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1474 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 /* ------------------------------------------------------------------------------*/ 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1481 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1482 { 1483 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1484 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1485 const PetscScalar *x,*v; 1486 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1487 PetscErrorCode ierr; 1488 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1489 PetscInt nonzerorow=0,n,i,jrow,j; 1490 1491 PetscFunctionBegin; 1492 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1493 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1494 idx = a->j; 1495 v = a->a; 1496 ii = a->i; 1497 1498 for (i=0; i<m; i++) { 1499 jrow = ii[i]; 1500 n = ii[i+1] - jrow; 1501 sum1 = 0.0; 1502 sum2 = 0.0; 1503 sum3 = 0.0; 1504 sum4 = 0.0; 1505 sum5 = 0.0; 1506 sum6 = 0.0; 1507 sum7 = 0.0; 1508 sum8 = 0.0; 1509 sum9 = 0.0; 1510 1511 nonzerorow += (n>0); 1512 for (j=0; j<n; j++) { 1513 sum1 += v[jrow]*x[9*idx[jrow]]; 1514 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1515 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1516 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1517 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1518 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1519 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1520 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1521 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1522 jrow++; 1523 } 1524 y[9*i] = sum1; 1525 y[9*i+1] = sum2; 1526 y[9*i+2] = sum3; 1527 y[9*i+3] = sum4; 1528 y[9*i+4] = sum5; 1529 y[9*i+5] = sum6; 1530 y[9*i+6] = sum7; 1531 y[9*i+7] = sum8; 1532 y[9*i+8] = sum9; 1533 } 1534 1535 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1536 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1537 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 /* ------------------------------------------------------------------------------*/ 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1545 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1546 { 1547 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1548 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1549 const PetscScalar *x,*v; 1550 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1551 PetscErrorCode ierr; 1552 const PetscInt m = b->AIJ->rmap->n,*idx; 1553 PetscInt n,i; 1554 1555 PetscFunctionBegin; 1556 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1557 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1558 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1559 1560 for (i=0; i<m; i++) { 1561 idx = a->j + a->i[i]; 1562 v = a->a + a->i[i]; 1563 n = a->i[i+1] - a->i[i]; 1564 alpha1 = x[9*i]; 1565 alpha2 = x[9*i+1]; 1566 alpha3 = x[9*i+2]; 1567 alpha4 = x[9*i+3]; 1568 alpha5 = x[9*i+4]; 1569 alpha6 = x[9*i+5]; 1570 alpha7 = x[9*i+6]; 1571 alpha8 = x[9*i+7]; 1572 alpha9 = x[9*i+8]; 1573 while (n-->0) { 1574 y[9*(*idx)] += alpha1*(*v); 1575 y[9*(*idx)+1] += alpha2*(*v); 1576 y[9*(*idx)+2] += alpha3*(*v); 1577 y[9*(*idx)+3] += alpha4*(*v); 1578 y[9*(*idx)+4] += alpha5*(*v); 1579 y[9*(*idx)+5] += alpha6*(*v); 1580 y[9*(*idx)+6] += alpha7*(*v); 1581 y[9*(*idx)+7] += alpha8*(*v); 1582 y[9*(*idx)+8] += alpha9*(*v); 1583 idx++; v++; 1584 } 1585 } 1586 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1587 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1588 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1589 PetscFunctionReturn(0); 1590 } 1591 1592 #undef __FUNCT__ 1593 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1594 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1595 { 1596 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1597 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1598 const PetscScalar *x,*v; 1599 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1600 PetscErrorCode ierr; 1601 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1602 PetscInt n,i,jrow,j; 1603 1604 PetscFunctionBegin; 1605 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1606 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1607 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1608 idx = a->j; 1609 v = a->a; 1610 ii = a->i; 1611 1612 for (i=0; i<m; i++) { 1613 jrow = ii[i]; 1614 n = ii[i+1] - jrow; 1615 sum1 = 0.0; 1616 sum2 = 0.0; 1617 sum3 = 0.0; 1618 sum4 = 0.0; 1619 sum5 = 0.0; 1620 sum6 = 0.0; 1621 sum7 = 0.0; 1622 sum8 = 0.0; 1623 sum9 = 0.0; 1624 for (j=0; j<n; j++) { 1625 sum1 += v[jrow]*x[9*idx[jrow]]; 1626 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1627 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1628 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1629 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1630 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1631 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1632 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1633 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1634 jrow++; 1635 } 1636 y[9*i] += sum1; 1637 y[9*i+1] += sum2; 1638 y[9*i+2] += sum3; 1639 y[9*i+3] += sum4; 1640 y[9*i+4] += sum5; 1641 y[9*i+5] += sum6; 1642 y[9*i+6] += sum7; 1643 y[9*i+7] += sum8; 1644 y[9*i+8] += sum9; 1645 } 1646 1647 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1648 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1649 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1650 PetscFunctionReturn(0); 1651 } 1652 1653 #undef __FUNCT__ 1654 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1655 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1656 { 1657 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1658 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1659 const PetscScalar *x,*v; 1660 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1661 PetscErrorCode ierr; 1662 const PetscInt m = b->AIJ->rmap->n,*idx; 1663 PetscInt n,i; 1664 1665 PetscFunctionBegin; 1666 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1667 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1668 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1669 for (i=0; i<m; i++) { 1670 idx = a->j + a->i[i]; 1671 v = a->a + a->i[i]; 1672 n = a->i[i+1] - a->i[i]; 1673 alpha1 = x[9*i]; 1674 alpha2 = x[9*i+1]; 1675 alpha3 = x[9*i+2]; 1676 alpha4 = x[9*i+3]; 1677 alpha5 = x[9*i+4]; 1678 alpha6 = x[9*i+5]; 1679 alpha7 = x[9*i+6]; 1680 alpha8 = x[9*i+7]; 1681 alpha9 = x[9*i+8]; 1682 while (n-->0) { 1683 y[9*(*idx)] += alpha1*(*v); 1684 y[9*(*idx)+1] += alpha2*(*v); 1685 y[9*(*idx)+2] += alpha3*(*v); 1686 y[9*(*idx)+3] += alpha4*(*v); 1687 y[9*(*idx)+4] += alpha5*(*v); 1688 y[9*(*idx)+5] += alpha6*(*v); 1689 y[9*(*idx)+6] += alpha7*(*v); 1690 y[9*(*idx)+7] += alpha8*(*v); 1691 y[9*(*idx)+8] += alpha9*(*v); 1692 idx++; v++; 1693 } 1694 } 1695 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1696 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1697 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1702 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1703 { 1704 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1705 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1706 const PetscScalar *x,*v; 1707 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1708 PetscErrorCode ierr; 1709 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1710 PetscInt nonzerorow=0,n,i,jrow,j; 1711 1712 PetscFunctionBegin; 1713 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1714 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1715 idx = a->j; 1716 v = a->a; 1717 ii = a->i; 1718 1719 for (i=0; i<m; i++) { 1720 jrow = ii[i]; 1721 n = ii[i+1] - jrow; 1722 sum1 = 0.0; 1723 sum2 = 0.0; 1724 sum3 = 0.0; 1725 sum4 = 0.0; 1726 sum5 = 0.0; 1727 sum6 = 0.0; 1728 sum7 = 0.0; 1729 sum8 = 0.0; 1730 sum9 = 0.0; 1731 sum10 = 0.0; 1732 1733 nonzerorow += (n>0); 1734 for (j=0; j<n; j++) { 1735 sum1 += v[jrow]*x[10*idx[jrow]]; 1736 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1737 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1738 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1739 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1740 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1741 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1742 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1743 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1744 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1745 jrow++; 1746 } 1747 y[10*i] = sum1; 1748 y[10*i+1] = sum2; 1749 y[10*i+2] = sum3; 1750 y[10*i+3] = sum4; 1751 y[10*i+4] = sum5; 1752 y[10*i+5] = sum6; 1753 y[10*i+6] = sum7; 1754 y[10*i+7] = sum8; 1755 y[10*i+8] = sum9; 1756 y[10*i+9] = sum10; 1757 } 1758 1759 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1760 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1761 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1762 PetscFunctionReturn(0); 1763 } 1764 1765 #undef __FUNCT__ 1766 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10" 1767 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1768 { 1769 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1770 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1771 const PetscScalar *x,*v; 1772 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1773 PetscErrorCode ierr; 1774 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1775 PetscInt n,i,jrow,j; 1776 1777 PetscFunctionBegin; 1778 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1779 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1780 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1781 idx = a->j; 1782 v = a->a; 1783 ii = a->i; 1784 1785 for (i=0; i<m; i++) { 1786 jrow = ii[i]; 1787 n = ii[i+1] - jrow; 1788 sum1 = 0.0; 1789 sum2 = 0.0; 1790 sum3 = 0.0; 1791 sum4 = 0.0; 1792 sum5 = 0.0; 1793 sum6 = 0.0; 1794 sum7 = 0.0; 1795 sum8 = 0.0; 1796 sum9 = 0.0; 1797 sum10 = 0.0; 1798 for (j=0; j<n; j++) { 1799 sum1 += v[jrow]*x[10*idx[jrow]]; 1800 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1801 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1802 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1803 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1804 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1805 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1806 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1807 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1808 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1809 jrow++; 1810 } 1811 y[10*i] += sum1; 1812 y[10*i+1] += sum2; 1813 y[10*i+2] += sum3; 1814 y[10*i+3] += sum4; 1815 y[10*i+4] += sum5; 1816 y[10*i+5] += sum6; 1817 y[10*i+6] += sum7; 1818 y[10*i+7] += sum8; 1819 y[10*i+8] += sum9; 1820 y[10*i+9] += sum10; 1821 } 1822 1823 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1824 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1825 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1826 PetscFunctionReturn(0); 1827 } 1828 1829 #undef __FUNCT__ 1830 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1831 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1832 { 1833 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1834 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1835 const PetscScalar *x,*v; 1836 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1837 PetscErrorCode ierr; 1838 const PetscInt m = b->AIJ->rmap->n,*idx; 1839 PetscInt n,i; 1840 1841 PetscFunctionBegin; 1842 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1843 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1844 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1845 1846 for (i=0; i<m; i++) { 1847 idx = a->j + a->i[i]; 1848 v = a->a + a->i[i]; 1849 n = a->i[i+1] - a->i[i]; 1850 alpha1 = x[10*i]; 1851 alpha2 = x[10*i+1]; 1852 alpha3 = x[10*i+2]; 1853 alpha4 = x[10*i+3]; 1854 alpha5 = x[10*i+4]; 1855 alpha6 = x[10*i+5]; 1856 alpha7 = x[10*i+6]; 1857 alpha8 = x[10*i+7]; 1858 alpha9 = x[10*i+8]; 1859 alpha10 = x[10*i+9]; 1860 while (n-->0) { 1861 y[10*(*idx)] += alpha1*(*v); 1862 y[10*(*idx)+1] += alpha2*(*v); 1863 y[10*(*idx)+2] += alpha3*(*v); 1864 y[10*(*idx)+3] += alpha4*(*v); 1865 y[10*(*idx)+4] += alpha5*(*v); 1866 y[10*(*idx)+5] += alpha6*(*v); 1867 y[10*(*idx)+6] += alpha7*(*v); 1868 y[10*(*idx)+7] += alpha8*(*v); 1869 y[10*(*idx)+8] += alpha9*(*v); 1870 y[10*(*idx)+9] += alpha10*(*v); 1871 idx++; v++; 1872 } 1873 } 1874 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1875 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1876 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1877 PetscFunctionReturn(0); 1878 } 1879 1880 #undef __FUNCT__ 1881 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1882 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1883 { 1884 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1885 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1886 const PetscScalar *x,*v; 1887 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1888 PetscErrorCode ierr; 1889 const PetscInt m = b->AIJ->rmap->n,*idx; 1890 PetscInt n,i; 1891 1892 PetscFunctionBegin; 1893 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1894 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1895 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1896 for (i=0; i<m; i++) { 1897 idx = a->j + a->i[i]; 1898 v = a->a + a->i[i]; 1899 n = a->i[i+1] - a->i[i]; 1900 alpha1 = x[10*i]; 1901 alpha2 = x[10*i+1]; 1902 alpha3 = x[10*i+2]; 1903 alpha4 = x[10*i+3]; 1904 alpha5 = x[10*i+4]; 1905 alpha6 = x[10*i+5]; 1906 alpha7 = x[10*i+6]; 1907 alpha8 = x[10*i+7]; 1908 alpha9 = x[10*i+8]; 1909 alpha10 = x[10*i+9]; 1910 while (n-->0) { 1911 y[10*(*idx)] += alpha1*(*v); 1912 y[10*(*idx)+1] += alpha2*(*v); 1913 y[10*(*idx)+2] += alpha3*(*v); 1914 y[10*(*idx)+3] += alpha4*(*v); 1915 y[10*(*idx)+4] += alpha5*(*v); 1916 y[10*(*idx)+5] += alpha6*(*v); 1917 y[10*(*idx)+6] += alpha7*(*v); 1918 y[10*(*idx)+7] += alpha8*(*v); 1919 y[10*(*idx)+8] += alpha9*(*v); 1920 y[10*(*idx)+9] += alpha10*(*v); 1921 idx++; v++; 1922 } 1923 } 1924 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1925 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1926 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 1931 /*--------------------------------------------------------------------------------------------*/ 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatMult_SeqMAIJ_11" 1934 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1935 { 1936 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1937 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1938 const PetscScalar *x,*v; 1939 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1940 PetscErrorCode ierr; 1941 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1942 PetscInt nonzerorow=0,n,i,jrow,j; 1943 1944 PetscFunctionBegin; 1945 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1946 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1947 idx = a->j; 1948 v = a->a; 1949 ii = a->i; 1950 1951 for (i=0; i<m; i++) { 1952 jrow = ii[i]; 1953 n = ii[i+1] - jrow; 1954 sum1 = 0.0; 1955 sum2 = 0.0; 1956 sum3 = 0.0; 1957 sum4 = 0.0; 1958 sum5 = 0.0; 1959 sum6 = 0.0; 1960 sum7 = 0.0; 1961 sum8 = 0.0; 1962 sum9 = 0.0; 1963 sum10 = 0.0; 1964 sum11 = 0.0; 1965 1966 nonzerorow += (n>0); 1967 for (j=0; j<n; j++) { 1968 sum1 += v[jrow]*x[11*idx[jrow]]; 1969 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1970 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1971 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1972 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1973 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1974 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1975 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1976 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1977 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1978 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1979 jrow++; 1980 } 1981 y[11*i] = sum1; 1982 y[11*i+1] = sum2; 1983 y[11*i+2] = sum3; 1984 y[11*i+3] = sum4; 1985 y[11*i+4] = sum5; 1986 y[11*i+5] = sum6; 1987 y[11*i+6] = sum7; 1988 y[11*i+7] = sum8; 1989 y[11*i+8] = sum9; 1990 y[11*i+9] = sum10; 1991 y[11*i+10] = sum11; 1992 } 1993 1994 ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1995 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1996 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1997 PetscFunctionReturn(0); 1998 } 1999 2000 #undef __FUNCT__ 2001 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11" 2002 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2003 { 2004 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2005 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2006 const PetscScalar *x,*v; 2007 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 2008 PetscErrorCode ierr; 2009 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2010 PetscInt n,i,jrow,j; 2011 2012 PetscFunctionBegin; 2013 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2014 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2015 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2016 idx = a->j; 2017 v = a->a; 2018 ii = a->i; 2019 2020 for (i=0; i<m; i++) { 2021 jrow = ii[i]; 2022 n = ii[i+1] - jrow; 2023 sum1 = 0.0; 2024 sum2 = 0.0; 2025 sum3 = 0.0; 2026 sum4 = 0.0; 2027 sum5 = 0.0; 2028 sum6 = 0.0; 2029 sum7 = 0.0; 2030 sum8 = 0.0; 2031 sum9 = 0.0; 2032 sum10 = 0.0; 2033 sum11 = 0.0; 2034 for (j=0; j<n; j++) { 2035 sum1 += v[jrow]*x[11*idx[jrow]]; 2036 sum2 += v[jrow]*x[11*idx[jrow]+1]; 2037 sum3 += v[jrow]*x[11*idx[jrow]+2]; 2038 sum4 += v[jrow]*x[11*idx[jrow]+3]; 2039 sum5 += v[jrow]*x[11*idx[jrow]+4]; 2040 sum6 += v[jrow]*x[11*idx[jrow]+5]; 2041 sum7 += v[jrow]*x[11*idx[jrow]+6]; 2042 sum8 += v[jrow]*x[11*idx[jrow]+7]; 2043 sum9 += v[jrow]*x[11*idx[jrow]+8]; 2044 sum10 += v[jrow]*x[11*idx[jrow]+9]; 2045 sum11 += v[jrow]*x[11*idx[jrow]+10]; 2046 jrow++; 2047 } 2048 y[11*i] += sum1; 2049 y[11*i+1] += sum2; 2050 y[11*i+2] += sum3; 2051 y[11*i+3] += sum4; 2052 y[11*i+4] += sum5; 2053 y[11*i+5] += sum6; 2054 y[11*i+6] += sum7; 2055 y[11*i+7] += sum8; 2056 y[11*i+8] += sum9; 2057 y[11*i+9] += sum10; 2058 y[11*i+10] += sum11; 2059 } 2060 2061 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2062 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2063 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2064 PetscFunctionReturn(0); 2065 } 2066 2067 #undef __FUNCT__ 2068 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11" 2069 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 2070 { 2071 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2072 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2073 const PetscScalar *x,*v; 2074 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2075 PetscErrorCode ierr; 2076 const PetscInt m = b->AIJ->rmap->n,*idx; 2077 PetscInt n,i; 2078 2079 PetscFunctionBegin; 2080 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2081 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2082 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2083 2084 for (i=0; i<m; i++) { 2085 idx = a->j + a->i[i]; 2086 v = a->a + a->i[i]; 2087 n = a->i[i+1] - a->i[i]; 2088 alpha1 = x[11*i]; 2089 alpha2 = x[11*i+1]; 2090 alpha3 = x[11*i+2]; 2091 alpha4 = x[11*i+3]; 2092 alpha5 = x[11*i+4]; 2093 alpha6 = x[11*i+5]; 2094 alpha7 = x[11*i+6]; 2095 alpha8 = x[11*i+7]; 2096 alpha9 = x[11*i+8]; 2097 alpha10 = x[11*i+9]; 2098 alpha11 = x[11*i+10]; 2099 while (n-->0) { 2100 y[11*(*idx)] += alpha1*(*v); 2101 y[11*(*idx)+1] += alpha2*(*v); 2102 y[11*(*idx)+2] += alpha3*(*v); 2103 y[11*(*idx)+3] += alpha4*(*v); 2104 y[11*(*idx)+4] += alpha5*(*v); 2105 y[11*(*idx)+5] += alpha6*(*v); 2106 y[11*(*idx)+6] += alpha7*(*v); 2107 y[11*(*idx)+7] += alpha8*(*v); 2108 y[11*(*idx)+8] += alpha9*(*v); 2109 y[11*(*idx)+9] += alpha10*(*v); 2110 y[11*(*idx)+10] += alpha11*(*v); 2111 idx++; v++; 2112 } 2113 } 2114 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2115 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2116 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2117 PetscFunctionReturn(0); 2118 } 2119 2120 #undef __FUNCT__ 2121 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11" 2122 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2123 { 2124 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2125 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2126 const PetscScalar *x,*v; 2127 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2128 PetscErrorCode ierr; 2129 const PetscInt m = b->AIJ->rmap->n,*idx; 2130 PetscInt n,i; 2131 2132 PetscFunctionBegin; 2133 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2134 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2135 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2136 for (i=0; i<m; i++) { 2137 idx = a->j + a->i[i]; 2138 v = a->a + a->i[i]; 2139 n = a->i[i+1] - a->i[i]; 2140 alpha1 = x[11*i]; 2141 alpha2 = x[11*i+1]; 2142 alpha3 = x[11*i+2]; 2143 alpha4 = x[11*i+3]; 2144 alpha5 = x[11*i+4]; 2145 alpha6 = x[11*i+5]; 2146 alpha7 = x[11*i+6]; 2147 alpha8 = x[11*i+7]; 2148 alpha9 = x[11*i+8]; 2149 alpha10 = x[11*i+9]; 2150 alpha11 = x[11*i+10]; 2151 while (n-->0) { 2152 y[11*(*idx)] += alpha1*(*v); 2153 y[11*(*idx)+1] += alpha2*(*v); 2154 y[11*(*idx)+2] += alpha3*(*v); 2155 y[11*(*idx)+3] += alpha4*(*v); 2156 y[11*(*idx)+4] += alpha5*(*v); 2157 y[11*(*idx)+5] += alpha6*(*v); 2158 y[11*(*idx)+6] += alpha7*(*v); 2159 y[11*(*idx)+7] += alpha8*(*v); 2160 y[11*(*idx)+8] += alpha9*(*v); 2161 y[11*(*idx)+9] += alpha10*(*v); 2162 y[11*(*idx)+10] += alpha11*(*v); 2163 idx++; v++; 2164 } 2165 } 2166 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2167 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2168 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2169 PetscFunctionReturn(0); 2170 } 2171 2172 2173 /*--------------------------------------------------------------------------------------------*/ 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "MatMult_SeqMAIJ_16" 2176 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2177 { 2178 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2179 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2180 const PetscScalar *x,*v; 2181 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2182 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2183 PetscErrorCode ierr; 2184 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2185 PetscInt nonzerorow=0,n,i,jrow,j; 2186 2187 PetscFunctionBegin; 2188 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2189 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2190 idx = a->j; 2191 v = a->a; 2192 ii = a->i; 2193 2194 for (i=0; i<m; i++) { 2195 jrow = ii[i]; 2196 n = ii[i+1] - jrow; 2197 sum1 = 0.0; 2198 sum2 = 0.0; 2199 sum3 = 0.0; 2200 sum4 = 0.0; 2201 sum5 = 0.0; 2202 sum6 = 0.0; 2203 sum7 = 0.0; 2204 sum8 = 0.0; 2205 sum9 = 0.0; 2206 sum10 = 0.0; 2207 sum11 = 0.0; 2208 sum12 = 0.0; 2209 sum13 = 0.0; 2210 sum14 = 0.0; 2211 sum15 = 0.0; 2212 sum16 = 0.0; 2213 2214 nonzerorow += (n>0); 2215 for (j=0; j<n; j++) { 2216 sum1 += v[jrow]*x[16*idx[jrow]]; 2217 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2218 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2219 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2220 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2221 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2222 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2223 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2224 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2225 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2226 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2227 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2228 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2229 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2230 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2231 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2232 jrow++; 2233 } 2234 y[16*i] = sum1; 2235 y[16*i+1] = sum2; 2236 y[16*i+2] = sum3; 2237 y[16*i+3] = sum4; 2238 y[16*i+4] = sum5; 2239 y[16*i+5] = sum6; 2240 y[16*i+6] = sum7; 2241 y[16*i+7] = sum8; 2242 y[16*i+8] = sum9; 2243 y[16*i+9] = sum10; 2244 y[16*i+10] = sum11; 2245 y[16*i+11] = sum12; 2246 y[16*i+12] = sum13; 2247 y[16*i+13] = sum14; 2248 y[16*i+14] = sum15; 2249 y[16*i+15] = sum16; 2250 } 2251 2252 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2253 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2254 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2255 PetscFunctionReturn(0); 2256 } 2257 2258 #undef __FUNCT__ 2259 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 2260 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2261 { 2262 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2263 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2264 const PetscScalar *x,*v; 2265 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2266 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2267 PetscErrorCode ierr; 2268 const PetscInt m = b->AIJ->rmap->n,*idx; 2269 PetscInt n,i; 2270 2271 PetscFunctionBegin; 2272 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2273 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2274 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2275 2276 for (i=0; i<m; i++) { 2277 idx = a->j + a->i[i]; 2278 v = a->a + a->i[i]; 2279 n = a->i[i+1] - a->i[i]; 2280 alpha1 = x[16*i]; 2281 alpha2 = x[16*i+1]; 2282 alpha3 = x[16*i+2]; 2283 alpha4 = x[16*i+3]; 2284 alpha5 = x[16*i+4]; 2285 alpha6 = x[16*i+5]; 2286 alpha7 = x[16*i+6]; 2287 alpha8 = x[16*i+7]; 2288 alpha9 = x[16*i+8]; 2289 alpha10 = x[16*i+9]; 2290 alpha11 = x[16*i+10]; 2291 alpha12 = x[16*i+11]; 2292 alpha13 = x[16*i+12]; 2293 alpha14 = x[16*i+13]; 2294 alpha15 = x[16*i+14]; 2295 alpha16 = x[16*i+15]; 2296 while (n-->0) { 2297 y[16*(*idx)] += alpha1*(*v); 2298 y[16*(*idx)+1] += alpha2*(*v); 2299 y[16*(*idx)+2] += alpha3*(*v); 2300 y[16*(*idx)+3] += alpha4*(*v); 2301 y[16*(*idx)+4] += alpha5*(*v); 2302 y[16*(*idx)+5] += alpha6*(*v); 2303 y[16*(*idx)+6] += alpha7*(*v); 2304 y[16*(*idx)+7] += alpha8*(*v); 2305 y[16*(*idx)+8] += alpha9*(*v); 2306 y[16*(*idx)+9] += alpha10*(*v); 2307 y[16*(*idx)+10] += alpha11*(*v); 2308 y[16*(*idx)+11] += alpha12*(*v); 2309 y[16*(*idx)+12] += alpha13*(*v); 2310 y[16*(*idx)+13] += alpha14*(*v); 2311 y[16*(*idx)+14] += alpha15*(*v); 2312 y[16*(*idx)+15] += alpha16*(*v); 2313 idx++; v++; 2314 } 2315 } 2316 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2317 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2318 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 #undef __FUNCT__ 2323 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 2324 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2325 { 2326 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2327 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2328 const PetscScalar *x,*v; 2329 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2330 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2331 PetscErrorCode ierr; 2332 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2333 PetscInt n,i,jrow,j; 2334 2335 PetscFunctionBegin; 2336 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2337 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2338 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2339 idx = a->j; 2340 v = a->a; 2341 ii = a->i; 2342 2343 for (i=0; i<m; i++) { 2344 jrow = ii[i]; 2345 n = ii[i+1] - jrow; 2346 sum1 = 0.0; 2347 sum2 = 0.0; 2348 sum3 = 0.0; 2349 sum4 = 0.0; 2350 sum5 = 0.0; 2351 sum6 = 0.0; 2352 sum7 = 0.0; 2353 sum8 = 0.0; 2354 sum9 = 0.0; 2355 sum10 = 0.0; 2356 sum11 = 0.0; 2357 sum12 = 0.0; 2358 sum13 = 0.0; 2359 sum14 = 0.0; 2360 sum15 = 0.0; 2361 sum16 = 0.0; 2362 for (j=0; j<n; j++) { 2363 sum1 += v[jrow]*x[16*idx[jrow]]; 2364 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2365 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2366 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2367 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2368 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2369 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2370 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2371 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2372 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2373 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2374 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2375 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2376 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2377 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2378 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2379 jrow++; 2380 } 2381 y[16*i] += sum1; 2382 y[16*i+1] += sum2; 2383 y[16*i+2] += sum3; 2384 y[16*i+3] += sum4; 2385 y[16*i+4] += sum5; 2386 y[16*i+5] += sum6; 2387 y[16*i+6] += sum7; 2388 y[16*i+7] += sum8; 2389 y[16*i+8] += sum9; 2390 y[16*i+9] += sum10; 2391 y[16*i+10] += sum11; 2392 y[16*i+11] += sum12; 2393 y[16*i+12] += sum13; 2394 y[16*i+13] += sum14; 2395 y[16*i+14] += sum15; 2396 y[16*i+15] += sum16; 2397 } 2398 2399 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2400 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2401 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2402 PetscFunctionReturn(0); 2403 } 2404 2405 #undef __FUNCT__ 2406 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2407 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2408 { 2409 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2410 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2411 const PetscScalar *x,*v; 2412 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2413 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2414 PetscErrorCode ierr; 2415 const PetscInt m = b->AIJ->rmap->n,*idx; 2416 PetscInt n,i; 2417 2418 PetscFunctionBegin; 2419 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2420 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2421 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2422 for (i=0; i<m; i++) { 2423 idx = a->j + a->i[i]; 2424 v = a->a + a->i[i]; 2425 n = a->i[i+1] - a->i[i]; 2426 alpha1 = x[16*i]; 2427 alpha2 = x[16*i+1]; 2428 alpha3 = x[16*i+2]; 2429 alpha4 = x[16*i+3]; 2430 alpha5 = x[16*i+4]; 2431 alpha6 = x[16*i+5]; 2432 alpha7 = x[16*i+6]; 2433 alpha8 = x[16*i+7]; 2434 alpha9 = x[16*i+8]; 2435 alpha10 = x[16*i+9]; 2436 alpha11 = x[16*i+10]; 2437 alpha12 = x[16*i+11]; 2438 alpha13 = x[16*i+12]; 2439 alpha14 = x[16*i+13]; 2440 alpha15 = x[16*i+14]; 2441 alpha16 = x[16*i+15]; 2442 while (n-->0) { 2443 y[16*(*idx)] += alpha1*(*v); 2444 y[16*(*idx)+1] += alpha2*(*v); 2445 y[16*(*idx)+2] += alpha3*(*v); 2446 y[16*(*idx)+3] += alpha4*(*v); 2447 y[16*(*idx)+4] += alpha5*(*v); 2448 y[16*(*idx)+5] += alpha6*(*v); 2449 y[16*(*idx)+6] += alpha7*(*v); 2450 y[16*(*idx)+7] += alpha8*(*v); 2451 y[16*(*idx)+8] += alpha9*(*v); 2452 y[16*(*idx)+9] += alpha10*(*v); 2453 y[16*(*idx)+10] += alpha11*(*v); 2454 y[16*(*idx)+11] += alpha12*(*v); 2455 y[16*(*idx)+12] += alpha13*(*v); 2456 y[16*(*idx)+13] += alpha14*(*v); 2457 y[16*(*idx)+14] += alpha15*(*v); 2458 y[16*(*idx)+15] += alpha16*(*v); 2459 idx++; v++; 2460 } 2461 } 2462 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2463 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2464 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2465 PetscFunctionReturn(0); 2466 } 2467 2468 /*--------------------------------------------------------------------------------------------*/ 2469 #undef __FUNCT__ 2470 #define __FUNCT__ "MatMult_SeqMAIJ_18" 2471 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2472 { 2473 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2474 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2475 const PetscScalar *x,*v; 2476 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2477 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2478 PetscErrorCode ierr; 2479 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2480 PetscInt nonzerorow=0,n,i,jrow,j; 2481 2482 PetscFunctionBegin; 2483 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2484 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2485 idx = a->j; 2486 v = a->a; 2487 ii = a->i; 2488 2489 for (i=0; i<m; i++) { 2490 jrow = ii[i]; 2491 n = ii[i+1] - jrow; 2492 sum1 = 0.0; 2493 sum2 = 0.0; 2494 sum3 = 0.0; 2495 sum4 = 0.0; 2496 sum5 = 0.0; 2497 sum6 = 0.0; 2498 sum7 = 0.0; 2499 sum8 = 0.0; 2500 sum9 = 0.0; 2501 sum10 = 0.0; 2502 sum11 = 0.0; 2503 sum12 = 0.0; 2504 sum13 = 0.0; 2505 sum14 = 0.0; 2506 sum15 = 0.0; 2507 sum16 = 0.0; 2508 sum17 = 0.0; 2509 sum18 = 0.0; 2510 2511 nonzerorow += (n>0); 2512 for (j=0; j<n; j++) { 2513 sum1 += v[jrow]*x[18*idx[jrow]]; 2514 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2515 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2516 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2517 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2518 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2519 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2520 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2521 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2522 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2523 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2524 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2525 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2526 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2527 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2528 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2529 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2530 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2531 jrow++; 2532 } 2533 y[18*i] = sum1; 2534 y[18*i+1] = sum2; 2535 y[18*i+2] = sum3; 2536 y[18*i+3] = sum4; 2537 y[18*i+4] = sum5; 2538 y[18*i+5] = sum6; 2539 y[18*i+6] = sum7; 2540 y[18*i+7] = sum8; 2541 y[18*i+8] = sum9; 2542 y[18*i+9] = sum10; 2543 y[18*i+10] = sum11; 2544 y[18*i+11] = sum12; 2545 y[18*i+12] = sum13; 2546 y[18*i+13] = sum14; 2547 y[18*i+14] = sum15; 2548 y[18*i+15] = sum16; 2549 y[18*i+16] = sum17; 2550 y[18*i+17] = sum18; 2551 } 2552 2553 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2554 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2555 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2556 PetscFunctionReturn(0); 2557 } 2558 2559 #undef __FUNCT__ 2560 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2561 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2562 { 2563 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2564 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2565 const PetscScalar *x,*v; 2566 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2567 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2568 PetscErrorCode ierr; 2569 const PetscInt m = b->AIJ->rmap->n,*idx; 2570 PetscInt n,i; 2571 2572 PetscFunctionBegin; 2573 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2574 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2575 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2576 2577 for (i=0; i<m; i++) { 2578 idx = a->j + a->i[i]; 2579 v = a->a + a->i[i]; 2580 n = a->i[i+1] - a->i[i]; 2581 alpha1 = x[18*i]; 2582 alpha2 = x[18*i+1]; 2583 alpha3 = x[18*i+2]; 2584 alpha4 = x[18*i+3]; 2585 alpha5 = x[18*i+4]; 2586 alpha6 = x[18*i+5]; 2587 alpha7 = x[18*i+6]; 2588 alpha8 = x[18*i+7]; 2589 alpha9 = x[18*i+8]; 2590 alpha10 = x[18*i+9]; 2591 alpha11 = x[18*i+10]; 2592 alpha12 = x[18*i+11]; 2593 alpha13 = x[18*i+12]; 2594 alpha14 = x[18*i+13]; 2595 alpha15 = x[18*i+14]; 2596 alpha16 = x[18*i+15]; 2597 alpha17 = x[18*i+16]; 2598 alpha18 = x[18*i+17]; 2599 while (n-->0) { 2600 y[18*(*idx)] += alpha1*(*v); 2601 y[18*(*idx)+1] += alpha2*(*v); 2602 y[18*(*idx)+2] += alpha3*(*v); 2603 y[18*(*idx)+3] += alpha4*(*v); 2604 y[18*(*idx)+4] += alpha5*(*v); 2605 y[18*(*idx)+5] += alpha6*(*v); 2606 y[18*(*idx)+6] += alpha7*(*v); 2607 y[18*(*idx)+7] += alpha8*(*v); 2608 y[18*(*idx)+8] += alpha9*(*v); 2609 y[18*(*idx)+9] += alpha10*(*v); 2610 y[18*(*idx)+10] += alpha11*(*v); 2611 y[18*(*idx)+11] += alpha12*(*v); 2612 y[18*(*idx)+12] += alpha13*(*v); 2613 y[18*(*idx)+13] += alpha14*(*v); 2614 y[18*(*idx)+14] += alpha15*(*v); 2615 y[18*(*idx)+15] += alpha16*(*v); 2616 y[18*(*idx)+16] += alpha17*(*v); 2617 y[18*(*idx)+17] += alpha18*(*v); 2618 idx++; v++; 2619 } 2620 } 2621 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2622 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2623 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2624 PetscFunctionReturn(0); 2625 } 2626 2627 #undef __FUNCT__ 2628 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2629 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2630 { 2631 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2632 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2633 const PetscScalar *x,*v; 2634 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2635 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2636 PetscErrorCode ierr; 2637 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2638 PetscInt n,i,jrow,j; 2639 2640 PetscFunctionBegin; 2641 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2642 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2643 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2644 idx = a->j; 2645 v = a->a; 2646 ii = a->i; 2647 2648 for (i=0; i<m; i++) { 2649 jrow = ii[i]; 2650 n = ii[i+1] - jrow; 2651 sum1 = 0.0; 2652 sum2 = 0.0; 2653 sum3 = 0.0; 2654 sum4 = 0.0; 2655 sum5 = 0.0; 2656 sum6 = 0.0; 2657 sum7 = 0.0; 2658 sum8 = 0.0; 2659 sum9 = 0.0; 2660 sum10 = 0.0; 2661 sum11 = 0.0; 2662 sum12 = 0.0; 2663 sum13 = 0.0; 2664 sum14 = 0.0; 2665 sum15 = 0.0; 2666 sum16 = 0.0; 2667 sum17 = 0.0; 2668 sum18 = 0.0; 2669 for (j=0; j<n; j++) { 2670 sum1 += v[jrow]*x[18*idx[jrow]]; 2671 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2672 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2673 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2674 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2675 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2676 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2677 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2678 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2679 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2680 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2681 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2682 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2683 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2684 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2685 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2686 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2687 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2688 jrow++; 2689 } 2690 y[18*i] += sum1; 2691 y[18*i+1] += sum2; 2692 y[18*i+2] += sum3; 2693 y[18*i+3] += sum4; 2694 y[18*i+4] += sum5; 2695 y[18*i+5] += sum6; 2696 y[18*i+6] += sum7; 2697 y[18*i+7] += sum8; 2698 y[18*i+8] += sum9; 2699 y[18*i+9] += sum10; 2700 y[18*i+10] += sum11; 2701 y[18*i+11] += sum12; 2702 y[18*i+12] += sum13; 2703 y[18*i+13] += sum14; 2704 y[18*i+14] += sum15; 2705 y[18*i+15] += sum16; 2706 y[18*i+16] += sum17; 2707 y[18*i+17] += sum18; 2708 } 2709 2710 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2711 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2712 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2713 PetscFunctionReturn(0); 2714 } 2715 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2718 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2719 { 2720 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2721 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2722 const PetscScalar *x,*v; 2723 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2724 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2725 PetscErrorCode ierr; 2726 const PetscInt m = b->AIJ->rmap->n,*idx; 2727 PetscInt n,i; 2728 2729 PetscFunctionBegin; 2730 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2731 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2732 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2733 for (i=0; i<m; i++) { 2734 idx = a->j + a->i[i]; 2735 v = a->a + a->i[i]; 2736 n = a->i[i+1] - a->i[i]; 2737 alpha1 = x[18*i]; 2738 alpha2 = x[18*i+1]; 2739 alpha3 = x[18*i+2]; 2740 alpha4 = x[18*i+3]; 2741 alpha5 = x[18*i+4]; 2742 alpha6 = x[18*i+5]; 2743 alpha7 = x[18*i+6]; 2744 alpha8 = x[18*i+7]; 2745 alpha9 = x[18*i+8]; 2746 alpha10 = x[18*i+9]; 2747 alpha11 = x[18*i+10]; 2748 alpha12 = x[18*i+11]; 2749 alpha13 = x[18*i+12]; 2750 alpha14 = x[18*i+13]; 2751 alpha15 = x[18*i+14]; 2752 alpha16 = x[18*i+15]; 2753 alpha17 = x[18*i+16]; 2754 alpha18 = x[18*i+17]; 2755 while (n-->0) { 2756 y[18*(*idx)] += alpha1*(*v); 2757 y[18*(*idx)+1] += alpha2*(*v); 2758 y[18*(*idx)+2] += alpha3*(*v); 2759 y[18*(*idx)+3] += alpha4*(*v); 2760 y[18*(*idx)+4] += alpha5*(*v); 2761 y[18*(*idx)+5] += alpha6*(*v); 2762 y[18*(*idx)+6] += alpha7*(*v); 2763 y[18*(*idx)+7] += alpha8*(*v); 2764 y[18*(*idx)+8] += alpha9*(*v); 2765 y[18*(*idx)+9] += alpha10*(*v); 2766 y[18*(*idx)+10] += alpha11*(*v); 2767 y[18*(*idx)+11] += alpha12*(*v); 2768 y[18*(*idx)+12] += alpha13*(*v); 2769 y[18*(*idx)+13] += alpha14*(*v); 2770 y[18*(*idx)+14] += alpha15*(*v); 2771 y[18*(*idx)+15] += alpha16*(*v); 2772 y[18*(*idx)+16] += alpha17*(*v); 2773 y[18*(*idx)+17] += alpha18*(*v); 2774 idx++; v++; 2775 } 2776 } 2777 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2778 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2779 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2780 PetscFunctionReturn(0); 2781 } 2782 2783 #undef __FUNCT__ 2784 #define __FUNCT__ "MatMult_SeqMAIJ_N" 2785 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2786 { 2787 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2788 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2789 const PetscScalar *x,*v; 2790 PetscScalar *y,*sums; 2791 PetscErrorCode ierr; 2792 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2793 PetscInt n,i,jrow,j,dof = b->dof,k; 2794 2795 PetscFunctionBegin; 2796 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2797 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2798 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2799 idx = a->j; 2800 v = a->a; 2801 ii = a->i; 2802 2803 for (i=0; i<m; i++) { 2804 jrow = ii[i]; 2805 n = ii[i+1] - jrow; 2806 sums = y + dof*i; 2807 for (j=0; j<n; j++) { 2808 for (k=0; k<dof; k++) { 2809 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2810 } 2811 jrow++; 2812 } 2813 } 2814 2815 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2816 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2817 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2818 PetscFunctionReturn(0); 2819 } 2820 2821 #undef __FUNCT__ 2822 #define __FUNCT__ "MatMultAdd_SeqMAIJ_N" 2823 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2824 { 2825 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2826 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2827 const PetscScalar *x,*v; 2828 PetscScalar *y,*sums; 2829 PetscErrorCode ierr; 2830 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2831 PetscInt n,i,jrow,j,dof = b->dof,k; 2832 2833 PetscFunctionBegin; 2834 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2835 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2836 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2837 idx = a->j; 2838 v = a->a; 2839 ii = a->i; 2840 2841 for (i=0; i<m; i++) { 2842 jrow = ii[i]; 2843 n = ii[i+1] - jrow; 2844 sums = y + dof*i; 2845 for (j=0; j<n; j++) { 2846 for (k=0; k<dof; k++) { 2847 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2848 } 2849 jrow++; 2850 } 2851 } 2852 2853 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2854 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2855 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2856 PetscFunctionReturn(0); 2857 } 2858 2859 #undef __FUNCT__ 2860 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N" 2861 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2862 { 2863 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2864 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2865 const PetscScalar *x,*v,*alpha; 2866 PetscScalar *y; 2867 PetscErrorCode ierr; 2868 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2869 PetscInt n,i,k; 2870 2871 PetscFunctionBegin; 2872 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2873 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2874 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2875 for (i=0; i<m; i++) { 2876 idx = a->j + a->i[i]; 2877 v = a->a + a->i[i]; 2878 n = a->i[i+1] - a->i[i]; 2879 alpha = x + dof*i; 2880 while (n-->0) { 2881 for (k=0; k<dof; k++) { 2882 y[dof*(*idx)+k] += alpha[k]*(*v); 2883 } 2884 idx++; v++; 2885 } 2886 } 2887 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2888 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2889 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2890 PetscFunctionReturn(0); 2891 } 2892 2893 #undef __FUNCT__ 2894 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N" 2895 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2896 { 2897 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2898 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2899 const PetscScalar *x,*v,*alpha; 2900 PetscScalar *y; 2901 PetscErrorCode ierr; 2902 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2903 PetscInt n,i,k; 2904 2905 PetscFunctionBegin; 2906 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2907 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2908 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2909 for (i=0; i<m; i++) { 2910 idx = a->j + a->i[i]; 2911 v = a->a + a->i[i]; 2912 n = a->i[i+1] - a->i[i]; 2913 alpha = x + dof*i; 2914 while (n-->0) { 2915 for (k=0; k<dof; k++) { 2916 y[dof*(*idx)+k] += alpha[k]*(*v); 2917 } 2918 idx++; v++; 2919 } 2920 } 2921 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2922 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2923 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2924 PetscFunctionReturn(0); 2925 } 2926 2927 /*===================================================================================*/ 2928 #undef __FUNCT__ 2929 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2930 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2931 { 2932 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2933 PetscErrorCode ierr; 2934 2935 PetscFunctionBegin; 2936 /* start the scatter */ 2937 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2938 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2939 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2940 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2941 PetscFunctionReturn(0); 2942 } 2943 2944 #undef __FUNCT__ 2945 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2946 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2947 { 2948 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2949 PetscErrorCode ierr; 2950 2951 PetscFunctionBegin; 2952 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2953 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2954 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2955 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2956 PetscFunctionReturn(0); 2957 } 2958 2959 #undef __FUNCT__ 2960 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2961 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2962 { 2963 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2964 PetscErrorCode ierr; 2965 2966 PetscFunctionBegin; 2967 /* start the scatter */ 2968 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2969 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2970 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2971 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2972 PetscFunctionReturn(0); 2973 } 2974 2975 #undef __FUNCT__ 2976 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2977 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2978 { 2979 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2980 PetscErrorCode ierr; 2981 2982 PetscFunctionBegin; 2983 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2984 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2985 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2986 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 /* ----------------------------------------------------------------*/ 2991 #undef __FUNCT__ 2992 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2993 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2994 { 2995 PetscErrorCode ierr; 2996 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2997 Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 2998 Mat P =pp->AIJ; 2999 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 3000 PetscInt *pti,*ptj,*ptJ; 3001 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 3002 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 3003 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 3004 MatScalar *ca; 3005 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 3006 3007 PetscFunctionBegin; 3008 /* Get ij structure of P^T */ 3009 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 3010 3011 cn = pn*ppdof; 3012 /* Allocate ci array, arrays for fill computation and */ 3013 /* free space for accumulating nonzero column info */ 3014 ierr = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr); 3015 ci[0] = 0; 3016 3017 /* Work arrays for rows of P^T*A */ 3018 ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr); 3019 ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 3020 ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr); 3021 3022 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 3023 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 3024 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 3025 ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);CHKERRQ(ierr); 3026 current_space = free_space; 3027 3028 /* Determine symbolic info for each row of C: */ 3029 for (i=0; i<pn; i++) { 3030 ptnzi = pti[i+1] - pti[i]; 3031 ptJ = ptj + pti[i]; 3032 for (dof=0; dof<ppdof; dof++) { 3033 ptanzi = 0; 3034 /* Determine symbolic row of PtA: */ 3035 for (j=0; j<ptnzi; j++) { 3036 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 3037 arow = ptJ[j]*ppdof + dof; 3038 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 3039 anzj = ai[arow+1] - ai[arow]; 3040 ajj = aj + ai[arow]; 3041 for (k=0; k<anzj; k++) { 3042 if (!ptadenserow[ajj[k]]) { 3043 ptadenserow[ajj[k]] = -1; 3044 ptasparserow[ptanzi++] = ajj[k]; 3045 } 3046 } 3047 } 3048 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 3049 ptaj = ptasparserow; 3050 cnzi = 0; 3051 for (j=0; j<ptanzi; j++) { 3052 /* Get offset within block of P */ 3053 pshift = *ptaj%ppdof; 3054 /* Get block row of P */ 3055 prow = (*ptaj++)/ppdof; /* integer division */ 3056 /* P has same number of nonzeros per row as the compressed form */ 3057 pnzj = pi[prow+1] - pi[prow]; 3058 pjj = pj + pi[prow]; 3059 for (k=0;k<pnzj;k++) { 3060 /* Locations in C are shifted by the offset within the block */ 3061 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 3062 if (!denserow[pjj[k]*ppdof+pshift]) { 3063 denserow[pjj[k]*ppdof+pshift] = -1; 3064 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 3065 } 3066 } 3067 } 3068 3069 /* sort sparserow */ 3070 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 3071 3072 /* If free space is not available, make more free space */ 3073 /* Double the amount of total space in the list */ 3074 if (current_space->local_remaining<cnzi) { 3075 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3076 } 3077 3078 /* Copy data into free space, and zero out denserows */ 3079 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 3080 3081 current_space->array += cnzi; 3082 current_space->local_used += cnzi; 3083 current_space->local_remaining -= cnzi; 3084 3085 for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 3086 for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 3087 3088 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 3089 /* For now, we will recompute what is needed. */ 3090 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 3091 } 3092 } 3093 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 3094 /* Allocate space for cj, initialize cj, and */ 3095 /* destroy list of free space and other temporary array(s) */ 3096 ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr); 3097 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 3098 ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 3099 3100 /* Allocate space for ca */ 3101 ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr); 3102 3103 /* put together the new matrix */ 3104 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 3105 ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr); 3106 3107 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3108 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3109 c = (Mat_SeqAIJ*)((*C)->data); 3110 c->free_a = PETSC_TRUE; 3111 c->free_ij = PETSC_TRUE; 3112 c->nonew = 0; 3113 3114 (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 3115 3116 /* Clean up. */ 3117 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 3118 PetscFunctionReturn(0); 3119 } 3120 3121 #undef __FUNCT__ 3122 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 3123 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 3124 { 3125 /* This routine requires testing -- first draft only */ 3126 PetscErrorCode ierr; 3127 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 3128 Mat P =pp->AIJ; 3129 Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 3130 Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 3131 Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 3132 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3133 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3134 const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3135 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3136 const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3137 MatScalar *ca=c->a,*caj,*apa; 3138 3139 PetscFunctionBegin; 3140 /* Allocate temporary array for storage of one row of A*P */ 3141 ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr); 3142 3143 /* Clear old values in C */ 3144 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 3145 3146 for (i=0; i<am; i++) { 3147 /* Form sparse row of A*P */ 3148 anzi = ai[i+1] - ai[i]; 3149 apnzj = 0; 3150 for (j=0; j<anzi; j++) { 3151 /* Get offset within block of P */ 3152 pshift = *aj%ppdof; 3153 /* Get block row of P */ 3154 prow = *aj++/ppdof; /* integer division */ 3155 pnzj = pi[prow+1] - pi[prow]; 3156 pjj = pj + pi[prow]; 3157 paj = pa + pi[prow]; 3158 for (k=0; k<pnzj; k++) { 3159 poffset = pjj[k]*ppdof+pshift; 3160 if (!apjdense[poffset]) { 3161 apjdense[poffset] = -1; 3162 apj[apnzj++] = poffset; 3163 } 3164 apa[poffset] += (*aa)*paj[k]; 3165 } 3166 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 3167 aa++; 3168 } 3169 3170 /* Sort the j index array for quick sparse axpy. */ 3171 /* Note: a array does not need sorting as it is in dense storage locations. */ 3172 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 3173 3174 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3175 prow = i/ppdof; /* integer division */ 3176 pshift = i%ppdof; 3177 poffset = pi[prow]; 3178 pnzi = pi[prow+1] - poffset; 3179 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3180 pJ = pj+poffset; 3181 pA = pa+poffset; 3182 for (j=0; j<pnzi; j++) { 3183 crow = (*pJ)*ppdof+pshift; 3184 cjj = cj + ci[crow]; 3185 caj = ca + ci[crow]; 3186 pJ++; 3187 /* Perform sparse axpy operation. Note cjj includes apj. */ 3188 for (k=0,nextap=0; nextap<apnzj; k++) { 3189 if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 3190 } 3191 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 3192 pA++; 3193 } 3194 3195 /* Zero the current row info for A*P */ 3196 for (j=0; j<apnzj; j++) { 3197 apa[apj[j]] = 0.; 3198 apjdense[apj[j]] = 0; 3199 } 3200 } 3201 3202 /* Assemble the final matrix and clean up */ 3203 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3204 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3205 ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 3206 PetscFunctionReturn(0); 3207 } 3208 3209 #undef __FUNCT__ 3210 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqMAIJ" 3211 PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3212 { 3213 PetscErrorCode ierr; 3214 3215 PetscFunctionBegin; 3216 if (scall == MAT_INITIAL_MATRIX) { 3217 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3218 ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr); 3219 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3220 } 3221 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3222 ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr); 3223 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3224 PetscFunctionReturn(0); 3225 } 3226 3227 #undef __FUNCT__ 3228 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 3229 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3230 { 3231 PetscErrorCode ierr; 3232 3233 PetscFunctionBegin; 3234 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 3235 ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 3236 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 3237 PetscFunctionReturn(0); 3238 } 3239 3240 #undef __FUNCT__ 3241 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 3242 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3243 { 3244 PetscFunctionBegin; 3245 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3246 PetscFunctionReturn(0); 3247 } 3248 3249 #undef __FUNCT__ 3250 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIMAIJ" 3251 PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3252 { 3253 PetscErrorCode ierr; 3254 3255 PetscFunctionBegin; 3256 if (scall == MAT_INITIAL_MATRIX) { 3257 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3258 ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr); 3259 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3260 } 3261 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3262 ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr); 3263 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3264 PetscFunctionReturn(0); 3265 } 3266 3267 #undef __FUNCT__ 3268 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 3269 PETSC_EXTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3270 { 3271 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3272 Mat a = b->AIJ,B; 3273 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3274 PetscErrorCode ierr; 3275 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3276 PetscInt *cols; 3277 PetscScalar *vals; 3278 3279 PetscFunctionBegin; 3280 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3281 ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr); 3282 for (i=0; i<m; i++) { 3283 nmax = PetscMax(nmax,aij->ilen[i]); 3284 for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 3285 } 3286 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 3287 ierr = PetscFree(ilen);CHKERRQ(ierr); 3288 ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr); 3289 ii = 0; 3290 for (i=0; i<m; i++) { 3291 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3292 for (j=0; j<dof; j++) { 3293 for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 3294 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3295 ii++; 3296 } 3297 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3298 } 3299 ierr = PetscFree(icols);CHKERRQ(ierr); 3300 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3301 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3302 3303 if (reuse == MAT_REUSE_MATRIX) { 3304 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3305 } else { 3306 *newmat = B; 3307 } 3308 PetscFunctionReturn(0); 3309 } 3310 3311 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3312 3313 #undef __FUNCT__ 3314 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 3315 PETSC_EXTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3316 { 3317 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3318 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3319 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3320 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3321 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3322 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3323 PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 3324 PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 3325 PetscInt rstart,cstart,*garray,ii,k; 3326 PetscErrorCode ierr; 3327 PetscScalar *vals,*ovals; 3328 3329 PetscFunctionBegin; 3330 ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr); 3331 for (i=0; i<A->rmap->n/dof; i++) { 3332 nmax = PetscMax(nmax,AIJ->ilen[i]); 3333 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3334 for (j=0; j<dof; j++) { 3335 dnz[dof*i+j] = AIJ->ilen[i]; 3336 onz[dof*i+j] = OAIJ->ilen[i]; 3337 } 3338 } 3339 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3340 ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr); 3341 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3342 3343 ierr = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr); 3344 rstart = dof*maij->A->rmap->rstart; 3345 cstart = dof*maij->A->cmap->rstart; 3346 garray = mpiaij->garray; 3347 3348 ii = rstart; 3349 for (i=0; i<A->rmap->n/dof; i++) { 3350 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3351 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3352 for (j=0; j<dof; j++) { 3353 for (k=0; k<ncols; k++) { 3354 icols[k] = cstart + dof*cols[k]+j; 3355 } 3356 for (k=0; k<oncols; k++) { 3357 oicols[k] = dof*garray[ocols[k]]+j; 3358 } 3359 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3360 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3361 ii++; 3362 } 3363 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3364 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3365 } 3366 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3367 3368 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3369 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3370 3371 if (reuse == MAT_REUSE_MATRIX) { 3372 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3373 ((PetscObject)A)->refct = 1; 3374 3375 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3376 3377 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3378 } else { 3379 *newmat = B; 3380 } 3381 PetscFunctionReturn(0); 3382 } 3383 3384 #undef __FUNCT__ 3385 #define __FUNCT__ "MatGetSubMatrix_MAIJ" 3386 PetscErrorCode MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 3387 { 3388 PetscErrorCode ierr; 3389 Mat A; 3390 3391 PetscFunctionBegin; 3392 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3393 ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 3394 ierr = MatDestroy(&A);CHKERRQ(ierr); 3395 PetscFunctionReturn(0); 3396 } 3397 3398 /* ---------------------------------------------------------------------------------- */ 3399 #undef __FUNCT__ 3400 #define __FUNCT__ "MatCreateMAIJ" 3401 /*@C 3402 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3403 operations for multicomponent problems. It interpolates each component the same 3404 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3405 and MATMPIAIJ for distributed matrices. 3406 3407 Collective 3408 3409 Input Parameters: 3410 + A - the AIJ matrix describing the action on blocks 3411 - dof - the block size (number of components per node) 3412 3413 Output Parameter: 3414 . maij - the new MAIJ matrix 3415 3416 Operations provided: 3417 + MatMult 3418 . MatMultTranspose 3419 . MatMultAdd 3420 . MatMultTransposeAdd 3421 - MatView 3422 3423 Level: advanced 3424 3425 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3426 @*/ 3427 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3428 { 3429 PetscErrorCode ierr; 3430 PetscMPIInt size; 3431 PetscInt n; 3432 Mat B; 3433 3434 PetscFunctionBegin; 3435 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3436 3437 if (dof == 1) *maij = A; 3438 else { 3439 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3440 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3441 ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3442 ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3443 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3444 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3445 3446 B->assembled = PETSC_TRUE; 3447 3448 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3449 if (size == 1) { 3450 Mat_SeqMAIJ *b; 3451 3452 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3453 3454 B->ops->setup = NULL; 3455 B->ops->destroy = MatDestroy_SeqMAIJ; 3456 B->ops->view = MatView_SeqMAIJ; 3457 b = (Mat_SeqMAIJ*)B->data; 3458 b->dof = dof; 3459 b->AIJ = A; 3460 3461 if (dof == 2) { 3462 B->ops->mult = MatMult_SeqMAIJ_2; 3463 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3464 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3465 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3466 } else if (dof == 3) { 3467 B->ops->mult = MatMult_SeqMAIJ_3; 3468 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3469 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3470 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3471 } else if (dof == 4) { 3472 B->ops->mult = MatMult_SeqMAIJ_4; 3473 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3474 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3475 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3476 } else if (dof == 5) { 3477 B->ops->mult = MatMult_SeqMAIJ_5; 3478 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3479 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3480 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3481 } else if (dof == 6) { 3482 B->ops->mult = MatMult_SeqMAIJ_6; 3483 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3484 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3485 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3486 } else if (dof == 7) { 3487 B->ops->mult = MatMult_SeqMAIJ_7; 3488 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3489 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3490 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3491 } else if (dof == 8) { 3492 B->ops->mult = MatMult_SeqMAIJ_8; 3493 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3494 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3495 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3496 } else if (dof == 9) { 3497 B->ops->mult = MatMult_SeqMAIJ_9; 3498 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3499 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3500 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3501 } else if (dof == 10) { 3502 B->ops->mult = MatMult_SeqMAIJ_10; 3503 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3504 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3505 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3506 } else if (dof == 11) { 3507 B->ops->mult = MatMult_SeqMAIJ_11; 3508 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3509 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3510 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3511 } else if (dof == 16) { 3512 B->ops->mult = MatMult_SeqMAIJ_16; 3513 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3514 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3515 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3516 } else if (dof == 18) { 3517 B->ops->mult = MatMult_SeqMAIJ_18; 3518 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3519 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3520 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3521 } else { 3522 B->ops->mult = MatMult_SeqMAIJ_N; 3523 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3524 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3525 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3526 } 3527 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3528 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3529 } else { 3530 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3531 Mat_MPIMAIJ *b; 3532 IS from,to; 3533 Vec gvec; 3534 3535 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3536 3537 B->ops->setup = NULL; 3538 B->ops->destroy = MatDestroy_MPIMAIJ; 3539 B->ops->view = MatView_MPIMAIJ; 3540 3541 b = (Mat_MPIMAIJ*)B->data; 3542 b->dof = dof; 3543 b->A = A; 3544 3545 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 3546 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 3547 3548 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3549 ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3550 ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 3551 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3552 ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3553 3554 /* create two temporary Index sets for build scatter gather */ 3555 ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3556 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3557 3558 /* create temporary global vector to generate scatter context */ 3559 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 3560 3561 /* generate the scatter context */ 3562 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3563 3564 ierr = ISDestroy(&from);CHKERRQ(ierr); 3565 ierr = ISDestroy(&to);CHKERRQ(ierr); 3566 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3567 3568 B->ops->mult = MatMult_MPIMAIJ_dof; 3569 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3570 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3571 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3572 3573 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3574 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr); 3575 } 3576 B->ops->getsubmatrix = MatGetSubMatrix_MAIJ; 3577 ierr = MatSetUp(B);CHKERRQ(ierr); 3578 *maij = B; 3579 ierr = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr); 3580 } 3581 PetscFunctionReturn(0); 3582 } 3583