1 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20 #include <../src/mat/utils/freespace.h> 21 22 #undef __FUNCT__ 23 #define __FUNCT__ "MatMAIJGetAIJ" 24 /*@ 25 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 26 27 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 28 29 Input Parameter: 30 . A - the MAIJ matrix 31 32 Output Parameter: 33 . B - the AIJ matrix 34 35 Level: advanced 36 37 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 38 39 .seealso: MatCreateMAIJ() 40 @*/ 41 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 42 { 43 PetscErrorCode ierr; 44 PetscBool ismpimaij,isseqmaij; 45 46 PetscFunctionBegin; 47 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 48 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 49 if (ismpimaij) { 50 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 51 52 *B = b->A; 53 } else if (isseqmaij) { 54 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 55 56 *B = b->AIJ; 57 } else { 58 *B = A; 59 } 60 PetscFunctionReturn(0); 61 } 62 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatMAIJRedimension" 65 /*@C 66 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 67 68 Logically Collective 69 70 Input Parameter: 71 + A - the MAIJ matrix 72 - dof - the block size for the new matrix 73 74 Output Parameter: 75 . B - the new MAIJ matrix 76 77 Level: advanced 78 79 .seealso: MatCreateMAIJ() 80 @*/ 81 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 82 { 83 PetscErrorCode ierr; 84 Mat Aij = NULL; 85 86 PetscFunctionBegin; 87 PetscValidLogicalCollectiveInt(A,dof,2); 88 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 89 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 90 PetscFunctionReturn(0); 91 } 92 93 #undef __FUNCT__ 94 #define __FUNCT__ "MatDestroy_SeqMAIJ" 95 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 96 { 97 PetscErrorCode ierr; 98 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 99 100 PetscFunctionBegin; 101 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 102 ierr = PetscFree(A->data);CHKERRQ(ierr); 103 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr); 104 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatSetUp_MAIJ" 110 PetscErrorCode MatSetUp_MAIJ(Mat A) 111 { 112 PetscFunctionBegin; 113 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 114 PetscFunctionReturn(0); 115 } 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "MatView_SeqMAIJ" 119 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 120 { 121 PetscErrorCode ierr; 122 Mat B; 123 124 PetscFunctionBegin; 125 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 126 ierr = MatView(B,viewer);CHKERRQ(ierr); 127 ierr = MatDestroy(&B);CHKERRQ(ierr); 128 PetscFunctionReturn(0); 129 } 130 131 #undef __FUNCT__ 132 #define __FUNCT__ "MatView_MPIMAIJ" 133 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 134 { 135 PetscErrorCode ierr; 136 Mat B; 137 138 PetscFunctionBegin; 139 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 140 ierr = MatView(B,viewer);CHKERRQ(ierr); 141 ierr = MatDestroy(&B);CHKERRQ(ierr); 142 PetscFunctionReturn(0); 143 } 144 145 #undef __FUNCT__ 146 #define __FUNCT__ "MatDestroy_MPIMAIJ" 147 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 148 { 149 PetscErrorCode ierr; 150 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 151 152 PetscFunctionBegin; 153 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 154 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 155 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 156 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 157 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 158 ierr = PetscFree(A->data);CHKERRQ(ierr); 159 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr); 160 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr); 161 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 /*MC 166 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 167 multicomponent problems, interpolating or restricting each component the same way independently. 168 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 169 170 Operations provided: 171 . MatMult 172 . MatMultTranspose 173 . MatMultAdd 174 . MatMultTransposeAdd 175 176 Level: advanced 177 178 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 179 M*/ 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatCreate_MAIJ" 183 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 184 { 185 PetscErrorCode ierr; 186 Mat_MPIMAIJ *b; 187 PetscMPIInt size; 188 189 PetscFunctionBegin; 190 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 191 A->data = (void*)b; 192 193 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 194 195 A->ops->setup = MatSetUp_MAIJ; 196 197 b->AIJ = 0; 198 b->dof = 0; 199 b->OAIJ = 0; 200 b->ctx = 0; 201 b->w = 0; 202 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 203 if (size == 1) { 204 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 205 } else { 206 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 207 } 208 A->preallocated = PETSC_TRUE; 209 A->assembled = PETSC_TRUE; 210 PetscFunctionReturn(0); 211 } 212 213 /* --------------------------------------------------------------------------------------*/ 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatMult_SeqMAIJ_2" 216 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 217 { 218 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 219 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 220 const PetscScalar *x,*v; 221 PetscScalar *y, sum1, sum2; 222 PetscErrorCode ierr; 223 PetscInt nonzerorow=0,n,i,jrow,j; 224 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 225 226 PetscFunctionBegin; 227 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 228 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 229 idx = a->j; 230 v = a->a; 231 ii = a->i; 232 233 for (i=0; i<m; i++) { 234 jrow = ii[i]; 235 n = ii[i+1] - jrow; 236 sum1 = 0.0; 237 sum2 = 0.0; 238 239 nonzerorow += (n>0); 240 for (j=0; j<n; j++) { 241 sum1 += v[jrow]*x[2*idx[jrow]]; 242 sum2 += v[jrow]*x[2*idx[jrow]+1]; 243 jrow++; 244 } 245 y[2*i] = sum1; 246 y[2*i+1] = sum2; 247 } 248 249 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 250 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 251 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 257 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 258 { 259 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 260 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 261 const PetscScalar *x,*v; 262 PetscScalar *y,alpha1,alpha2; 263 PetscErrorCode ierr; 264 PetscInt n,i; 265 const PetscInt m = b->AIJ->rmap->n,*idx; 266 267 PetscFunctionBegin; 268 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 269 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 270 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 271 272 for (i=0; i<m; i++) { 273 idx = a->j + a->i[i]; 274 v = a->a + a->i[i]; 275 n = a->i[i+1] - a->i[i]; 276 alpha1 = x[2*i]; 277 alpha2 = x[2*i+1]; 278 while (n-->0) { 279 y[2*(*idx)] += alpha1*(*v); 280 y[2*(*idx)+1] += alpha2*(*v); 281 idx++; v++; 282 } 283 } 284 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 285 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 286 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 292 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 293 { 294 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 295 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 296 const PetscScalar *x,*v; 297 PetscScalar *y,sum1, sum2; 298 PetscErrorCode ierr; 299 PetscInt n,i,jrow,j; 300 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 301 302 PetscFunctionBegin; 303 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 304 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 305 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 306 idx = a->j; 307 v = a->a; 308 ii = a->i; 309 310 for (i=0; i<m; i++) { 311 jrow = ii[i]; 312 n = ii[i+1] - jrow; 313 sum1 = 0.0; 314 sum2 = 0.0; 315 for (j=0; j<n; j++) { 316 sum1 += v[jrow]*x[2*idx[jrow]]; 317 sum2 += v[jrow]*x[2*idx[jrow]+1]; 318 jrow++; 319 } 320 y[2*i] += sum1; 321 y[2*i+1] += sum2; 322 } 323 324 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 325 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 326 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 327 PetscFunctionReturn(0); 328 } 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 331 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 332 { 333 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 334 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 335 const PetscScalar *x,*v; 336 PetscScalar *y,alpha1,alpha2; 337 PetscErrorCode ierr; 338 PetscInt n,i; 339 const PetscInt m = b->AIJ->rmap->n,*idx; 340 341 PetscFunctionBegin; 342 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 343 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 344 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 345 346 for (i=0; i<m; i++) { 347 idx = a->j + a->i[i]; 348 v = a->a + a->i[i]; 349 n = a->i[i+1] - a->i[i]; 350 alpha1 = x[2*i]; 351 alpha2 = x[2*i+1]; 352 while (n-->0) { 353 y[2*(*idx)] += alpha1*(*v); 354 y[2*(*idx)+1] += alpha2*(*v); 355 idx++; v++; 356 } 357 } 358 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 359 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 360 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 361 PetscFunctionReturn(0); 362 } 363 /* --------------------------------------------------------------------------------------*/ 364 #undef __FUNCT__ 365 #define __FUNCT__ "MatMult_SeqMAIJ_3" 366 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 367 { 368 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 369 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 370 const PetscScalar *x,*v; 371 PetscScalar *y,sum1, sum2, sum3; 372 PetscErrorCode ierr; 373 PetscInt nonzerorow=0,n,i,jrow,j; 374 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 375 376 PetscFunctionBegin; 377 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 378 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 379 idx = a->j; 380 v = a->a; 381 ii = a->i; 382 383 for (i=0; i<m; i++) { 384 jrow = ii[i]; 385 n = ii[i+1] - jrow; 386 sum1 = 0.0; 387 sum2 = 0.0; 388 sum3 = 0.0; 389 390 nonzerorow += (n>0); 391 for (j=0; j<n; j++) { 392 sum1 += v[jrow]*x[3*idx[jrow]]; 393 sum2 += v[jrow]*x[3*idx[jrow]+1]; 394 sum3 += v[jrow]*x[3*idx[jrow]+2]; 395 jrow++; 396 } 397 y[3*i] = sum1; 398 y[3*i+1] = sum2; 399 y[3*i+2] = sum3; 400 } 401 402 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 403 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 404 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 410 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 411 { 412 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 413 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 414 const PetscScalar *x,*v; 415 PetscScalar *y,alpha1,alpha2,alpha3; 416 PetscErrorCode ierr; 417 PetscInt n,i; 418 const PetscInt m = b->AIJ->rmap->n,*idx; 419 420 PetscFunctionBegin; 421 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 422 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 423 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 424 425 for (i=0; i<m; i++) { 426 idx = a->j + a->i[i]; 427 v = a->a + a->i[i]; 428 n = a->i[i+1] - a->i[i]; 429 alpha1 = x[3*i]; 430 alpha2 = x[3*i+1]; 431 alpha3 = x[3*i+2]; 432 while (n-->0) { 433 y[3*(*idx)] += alpha1*(*v); 434 y[3*(*idx)+1] += alpha2*(*v); 435 y[3*(*idx)+2] += alpha3*(*v); 436 idx++; v++; 437 } 438 } 439 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 440 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 441 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 442 PetscFunctionReturn(0); 443 } 444 445 #undef __FUNCT__ 446 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 447 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 448 { 449 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 450 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 451 const PetscScalar *x,*v; 452 PetscScalar *y,sum1, sum2, sum3; 453 PetscErrorCode ierr; 454 PetscInt n,i,jrow,j; 455 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 456 457 PetscFunctionBegin; 458 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 459 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 460 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 461 idx = a->j; 462 v = a->a; 463 ii = a->i; 464 465 for (i=0; i<m; i++) { 466 jrow = ii[i]; 467 n = ii[i+1] - jrow; 468 sum1 = 0.0; 469 sum2 = 0.0; 470 sum3 = 0.0; 471 for (j=0; j<n; j++) { 472 sum1 += v[jrow]*x[3*idx[jrow]]; 473 sum2 += v[jrow]*x[3*idx[jrow]+1]; 474 sum3 += v[jrow]*x[3*idx[jrow]+2]; 475 jrow++; 476 } 477 y[3*i] += sum1; 478 y[3*i+1] += sum2; 479 y[3*i+2] += sum3; 480 } 481 482 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 483 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 484 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 485 PetscFunctionReturn(0); 486 } 487 #undef __FUNCT__ 488 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 489 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 490 { 491 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 493 const PetscScalar *x,*v; 494 PetscScalar *y,alpha1,alpha2,alpha3; 495 PetscErrorCode ierr; 496 PetscInt n,i; 497 const PetscInt m = b->AIJ->rmap->n,*idx; 498 499 PetscFunctionBegin; 500 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 501 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 502 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 503 for (i=0; i<m; i++) { 504 idx = a->j + a->i[i]; 505 v = a->a + a->i[i]; 506 n = a->i[i+1] - a->i[i]; 507 alpha1 = x[3*i]; 508 alpha2 = x[3*i+1]; 509 alpha3 = x[3*i+2]; 510 while (n-->0) { 511 y[3*(*idx)] += alpha1*(*v); 512 y[3*(*idx)+1] += alpha2*(*v); 513 y[3*(*idx)+2] += alpha3*(*v); 514 idx++; v++; 515 } 516 } 517 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 518 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 519 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 520 PetscFunctionReturn(0); 521 } 522 523 /* ------------------------------------------------------------------------------*/ 524 #undef __FUNCT__ 525 #define __FUNCT__ "MatMult_SeqMAIJ_4" 526 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 527 { 528 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 529 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 530 const PetscScalar *x,*v; 531 PetscScalar *y,sum1, sum2, sum3, sum4; 532 PetscErrorCode ierr; 533 PetscInt nonzerorow=0,n,i,jrow,j; 534 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 535 536 PetscFunctionBegin; 537 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 538 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 539 idx = a->j; 540 v = a->a; 541 ii = a->i; 542 543 for (i=0; i<m; i++) { 544 jrow = ii[i]; 545 n = ii[i+1] - jrow; 546 sum1 = 0.0; 547 sum2 = 0.0; 548 sum3 = 0.0; 549 sum4 = 0.0; 550 nonzerorow += (n>0); 551 for (j=0; j<n; j++) { 552 sum1 += v[jrow]*x[4*idx[jrow]]; 553 sum2 += v[jrow]*x[4*idx[jrow]+1]; 554 sum3 += v[jrow]*x[4*idx[jrow]+2]; 555 sum4 += v[jrow]*x[4*idx[jrow]+3]; 556 jrow++; 557 } 558 y[4*i] = sum1; 559 y[4*i+1] = sum2; 560 y[4*i+2] = sum3; 561 y[4*i+3] = sum4; 562 } 563 564 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 565 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 566 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNCT__ 571 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 572 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 573 { 574 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 575 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 576 const PetscScalar *x,*v; 577 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 578 PetscErrorCode ierr; 579 PetscInt n,i; 580 const PetscInt m = b->AIJ->rmap->n,*idx; 581 582 PetscFunctionBegin; 583 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 584 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 585 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 586 for (i=0; i<m; i++) { 587 idx = a->j + a->i[i]; 588 v = a->a + a->i[i]; 589 n = a->i[i+1] - a->i[i]; 590 alpha1 = x[4*i]; 591 alpha2 = x[4*i+1]; 592 alpha3 = x[4*i+2]; 593 alpha4 = x[4*i+3]; 594 while (n-->0) { 595 y[4*(*idx)] += alpha1*(*v); 596 y[4*(*idx)+1] += alpha2*(*v); 597 y[4*(*idx)+2] += alpha3*(*v); 598 y[4*(*idx)+3] += alpha4*(*v); 599 idx++; v++; 600 } 601 } 602 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 603 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 604 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 610 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 611 { 612 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 613 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 614 const PetscScalar *x,*v; 615 PetscScalar *y,sum1, sum2, sum3, sum4; 616 PetscErrorCode ierr; 617 PetscInt n,i,jrow,j; 618 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 619 620 PetscFunctionBegin; 621 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 622 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 623 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 624 idx = a->j; 625 v = a->a; 626 ii = a->i; 627 628 for (i=0; i<m; i++) { 629 jrow = ii[i]; 630 n = ii[i+1] - jrow; 631 sum1 = 0.0; 632 sum2 = 0.0; 633 sum3 = 0.0; 634 sum4 = 0.0; 635 for (j=0; j<n; j++) { 636 sum1 += v[jrow]*x[4*idx[jrow]]; 637 sum2 += v[jrow]*x[4*idx[jrow]+1]; 638 sum3 += v[jrow]*x[4*idx[jrow]+2]; 639 sum4 += v[jrow]*x[4*idx[jrow]+3]; 640 jrow++; 641 } 642 y[4*i] += sum1; 643 y[4*i+1] += sum2; 644 y[4*i+2] += sum3; 645 y[4*i+3] += sum4; 646 } 647 648 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 649 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 650 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 655 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 656 { 657 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 658 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 659 const PetscScalar *x,*v; 660 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 661 PetscErrorCode ierr; 662 PetscInt n,i; 663 const PetscInt m = b->AIJ->rmap->n,*idx; 664 665 PetscFunctionBegin; 666 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 667 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 668 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 669 670 for (i=0; i<m; i++) { 671 idx = a->j + a->i[i]; 672 v = a->a + a->i[i]; 673 n = a->i[i+1] - a->i[i]; 674 alpha1 = x[4*i]; 675 alpha2 = x[4*i+1]; 676 alpha3 = x[4*i+2]; 677 alpha4 = x[4*i+3]; 678 while (n-->0) { 679 y[4*(*idx)] += alpha1*(*v); 680 y[4*(*idx)+1] += alpha2*(*v); 681 y[4*(*idx)+2] += alpha3*(*v); 682 y[4*(*idx)+3] += alpha4*(*v); 683 idx++; v++; 684 } 685 } 686 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 687 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 688 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 689 PetscFunctionReturn(0); 690 } 691 /* ------------------------------------------------------------------------------*/ 692 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatMult_SeqMAIJ_5" 695 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 696 { 697 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 698 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 699 const PetscScalar *x,*v; 700 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 701 PetscErrorCode ierr; 702 PetscInt nonzerorow=0,n,i,jrow,j; 703 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 704 705 PetscFunctionBegin; 706 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 707 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 708 idx = a->j; 709 v = a->a; 710 ii = a->i; 711 712 for (i=0; i<m; i++) { 713 jrow = ii[i]; 714 n = ii[i+1] - jrow; 715 sum1 = 0.0; 716 sum2 = 0.0; 717 sum3 = 0.0; 718 sum4 = 0.0; 719 sum5 = 0.0; 720 721 nonzerorow += (n>0); 722 for (j=0; j<n; j++) { 723 sum1 += v[jrow]*x[5*idx[jrow]]; 724 sum2 += v[jrow]*x[5*idx[jrow]+1]; 725 sum3 += v[jrow]*x[5*idx[jrow]+2]; 726 sum4 += v[jrow]*x[5*idx[jrow]+3]; 727 sum5 += v[jrow]*x[5*idx[jrow]+4]; 728 jrow++; 729 } 730 y[5*i] = sum1; 731 y[5*i+1] = sum2; 732 y[5*i+2] = sum3; 733 y[5*i+3] = sum4; 734 y[5*i+4] = sum5; 735 } 736 737 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 738 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 739 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 745 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 746 { 747 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 748 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 749 const PetscScalar *x,*v; 750 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 751 PetscErrorCode ierr; 752 PetscInt n,i; 753 const PetscInt m = b->AIJ->rmap->n,*idx; 754 755 PetscFunctionBegin; 756 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 757 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 758 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 759 760 for (i=0; i<m; i++) { 761 idx = a->j + a->i[i]; 762 v = a->a + a->i[i]; 763 n = a->i[i+1] - a->i[i]; 764 alpha1 = x[5*i]; 765 alpha2 = x[5*i+1]; 766 alpha3 = x[5*i+2]; 767 alpha4 = x[5*i+3]; 768 alpha5 = x[5*i+4]; 769 while (n-->0) { 770 y[5*(*idx)] += alpha1*(*v); 771 y[5*(*idx)+1] += alpha2*(*v); 772 y[5*(*idx)+2] += alpha3*(*v); 773 y[5*(*idx)+3] += alpha4*(*v); 774 y[5*(*idx)+4] += alpha5*(*v); 775 idx++; v++; 776 } 777 } 778 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 779 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 780 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 781 PetscFunctionReturn(0); 782 } 783 784 #undef __FUNCT__ 785 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 786 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 787 { 788 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 789 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 790 const PetscScalar *x,*v; 791 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 792 PetscErrorCode ierr; 793 PetscInt n,i,jrow,j; 794 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 795 796 PetscFunctionBegin; 797 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 798 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 799 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 800 idx = a->j; 801 v = a->a; 802 ii = a->i; 803 804 for (i=0; i<m; i++) { 805 jrow = ii[i]; 806 n = ii[i+1] - jrow; 807 sum1 = 0.0; 808 sum2 = 0.0; 809 sum3 = 0.0; 810 sum4 = 0.0; 811 sum5 = 0.0; 812 for (j=0; j<n; j++) { 813 sum1 += v[jrow]*x[5*idx[jrow]]; 814 sum2 += v[jrow]*x[5*idx[jrow]+1]; 815 sum3 += v[jrow]*x[5*idx[jrow]+2]; 816 sum4 += v[jrow]*x[5*idx[jrow]+3]; 817 sum5 += v[jrow]*x[5*idx[jrow]+4]; 818 jrow++; 819 } 820 y[5*i] += sum1; 821 y[5*i+1] += sum2; 822 y[5*i+2] += sum3; 823 y[5*i+3] += sum4; 824 y[5*i+4] += sum5; 825 } 826 827 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 828 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 829 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 830 PetscFunctionReturn(0); 831 } 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 835 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 836 { 837 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 838 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 839 const PetscScalar *x,*v; 840 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 841 PetscErrorCode ierr; 842 PetscInt n,i; 843 const PetscInt m = b->AIJ->rmap->n,*idx; 844 845 PetscFunctionBegin; 846 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 847 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 848 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 849 850 for (i=0; i<m; i++) { 851 idx = a->j + a->i[i]; 852 v = a->a + a->i[i]; 853 n = a->i[i+1] - a->i[i]; 854 alpha1 = x[5*i]; 855 alpha2 = x[5*i+1]; 856 alpha3 = x[5*i+2]; 857 alpha4 = x[5*i+3]; 858 alpha5 = x[5*i+4]; 859 while (n-->0) { 860 y[5*(*idx)] += alpha1*(*v); 861 y[5*(*idx)+1] += alpha2*(*v); 862 y[5*(*idx)+2] += alpha3*(*v); 863 y[5*(*idx)+3] += alpha4*(*v); 864 y[5*(*idx)+4] += alpha5*(*v); 865 idx++; v++; 866 } 867 } 868 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 869 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 870 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 /* ------------------------------------------------------------------------------*/ 875 #undef __FUNCT__ 876 #define __FUNCT__ "MatMult_SeqMAIJ_6" 877 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 878 { 879 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 880 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 881 const PetscScalar *x,*v; 882 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 883 PetscErrorCode ierr; 884 PetscInt nonzerorow=0,n,i,jrow,j; 885 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 886 887 PetscFunctionBegin; 888 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 889 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 890 idx = a->j; 891 v = a->a; 892 ii = a->i; 893 894 for (i=0; i<m; i++) { 895 jrow = ii[i]; 896 n = ii[i+1] - jrow; 897 sum1 = 0.0; 898 sum2 = 0.0; 899 sum3 = 0.0; 900 sum4 = 0.0; 901 sum5 = 0.0; 902 sum6 = 0.0; 903 904 nonzerorow += (n>0); 905 for (j=0; j<n; j++) { 906 sum1 += v[jrow]*x[6*idx[jrow]]; 907 sum2 += v[jrow]*x[6*idx[jrow]+1]; 908 sum3 += v[jrow]*x[6*idx[jrow]+2]; 909 sum4 += v[jrow]*x[6*idx[jrow]+3]; 910 sum5 += v[jrow]*x[6*idx[jrow]+4]; 911 sum6 += v[jrow]*x[6*idx[jrow]+5]; 912 jrow++; 913 } 914 y[6*i] = sum1; 915 y[6*i+1] = sum2; 916 y[6*i+2] = sum3; 917 y[6*i+3] = sum4; 918 y[6*i+4] = sum5; 919 y[6*i+5] = sum6; 920 } 921 922 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 923 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 924 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 930 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 931 { 932 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 933 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 934 const PetscScalar *x,*v; 935 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 936 PetscErrorCode ierr; 937 PetscInt n,i; 938 const PetscInt m = b->AIJ->rmap->n,*idx; 939 940 PetscFunctionBegin; 941 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 942 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 943 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 944 945 for (i=0; i<m; i++) { 946 idx = a->j + a->i[i]; 947 v = a->a + a->i[i]; 948 n = a->i[i+1] - a->i[i]; 949 alpha1 = x[6*i]; 950 alpha2 = x[6*i+1]; 951 alpha3 = x[6*i+2]; 952 alpha4 = x[6*i+3]; 953 alpha5 = x[6*i+4]; 954 alpha6 = x[6*i+5]; 955 while (n-->0) { 956 y[6*(*idx)] += alpha1*(*v); 957 y[6*(*idx)+1] += alpha2*(*v); 958 y[6*(*idx)+2] += alpha3*(*v); 959 y[6*(*idx)+3] += alpha4*(*v); 960 y[6*(*idx)+4] += alpha5*(*v); 961 y[6*(*idx)+5] += alpha6*(*v); 962 idx++; v++; 963 } 964 } 965 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 966 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 967 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 973 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 974 { 975 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 976 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 977 const PetscScalar *x,*v; 978 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 979 PetscErrorCode ierr; 980 PetscInt n,i,jrow,j; 981 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 982 983 PetscFunctionBegin; 984 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 985 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 986 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 987 idx = a->j; 988 v = a->a; 989 ii = a->i; 990 991 for (i=0; i<m; i++) { 992 jrow = ii[i]; 993 n = ii[i+1] - jrow; 994 sum1 = 0.0; 995 sum2 = 0.0; 996 sum3 = 0.0; 997 sum4 = 0.0; 998 sum5 = 0.0; 999 sum6 = 0.0; 1000 for (j=0; j<n; j++) { 1001 sum1 += v[jrow]*x[6*idx[jrow]]; 1002 sum2 += v[jrow]*x[6*idx[jrow]+1]; 1003 sum3 += v[jrow]*x[6*idx[jrow]+2]; 1004 sum4 += v[jrow]*x[6*idx[jrow]+3]; 1005 sum5 += v[jrow]*x[6*idx[jrow]+4]; 1006 sum6 += v[jrow]*x[6*idx[jrow]+5]; 1007 jrow++; 1008 } 1009 y[6*i] += sum1; 1010 y[6*i+1] += sum2; 1011 y[6*i+2] += sum3; 1012 y[6*i+3] += sum4; 1013 y[6*i+4] += sum5; 1014 y[6*i+5] += sum6; 1015 } 1016 1017 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1018 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1019 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1020 PetscFunctionReturn(0); 1021 } 1022 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 1025 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 1026 { 1027 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1028 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1029 const PetscScalar *x,*v; 1030 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 1031 PetscErrorCode ierr; 1032 PetscInt n,i; 1033 const PetscInt m = b->AIJ->rmap->n,*idx; 1034 1035 PetscFunctionBegin; 1036 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1037 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1038 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1039 1040 for (i=0; i<m; i++) { 1041 idx = a->j + a->i[i]; 1042 v = a->a + a->i[i]; 1043 n = a->i[i+1] - a->i[i]; 1044 alpha1 = x[6*i]; 1045 alpha2 = x[6*i+1]; 1046 alpha3 = x[6*i+2]; 1047 alpha4 = x[6*i+3]; 1048 alpha5 = x[6*i+4]; 1049 alpha6 = x[6*i+5]; 1050 while (n-->0) { 1051 y[6*(*idx)] += alpha1*(*v); 1052 y[6*(*idx)+1] += alpha2*(*v); 1053 y[6*(*idx)+2] += alpha3*(*v); 1054 y[6*(*idx)+3] += alpha4*(*v); 1055 y[6*(*idx)+4] += alpha5*(*v); 1056 y[6*(*idx)+5] += alpha6*(*v); 1057 idx++; v++; 1058 } 1059 } 1060 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1061 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1062 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 /* ------------------------------------------------------------------------------*/ 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatMult_SeqMAIJ_7" 1069 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1070 { 1071 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1072 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1073 const PetscScalar *x,*v; 1074 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1075 PetscErrorCode ierr; 1076 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1077 PetscInt nonzerorow=0,n,i,jrow,j; 1078 1079 PetscFunctionBegin; 1080 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1081 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1082 idx = a->j; 1083 v = a->a; 1084 ii = a->i; 1085 1086 for (i=0; i<m; i++) { 1087 jrow = ii[i]; 1088 n = ii[i+1] - jrow; 1089 sum1 = 0.0; 1090 sum2 = 0.0; 1091 sum3 = 0.0; 1092 sum4 = 0.0; 1093 sum5 = 0.0; 1094 sum6 = 0.0; 1095 sum7 = 0.0; 1096 1097 nonzerorow += (n>0); 1098 for (j=0; j<n; j++) { 1099 sum1 += v[jrow]*x[7*idx[jrow]]; 1100 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1101 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1102 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1103 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1104 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1105 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1106 jrow++; 1107 } 1108 y[7*i] = sum1; 1109 y[7*i+1] = sum2; 1110 y[7*i+2] = sum3; 1111 y[7*i+3] = sum4; 1112 y[7*i+4] = sum5; 1113 y[7*i+5] = sum6; 1114 y[7*i+6] = sum7; 1115 } 1116 1117 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1118 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1119 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1125 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1126 { 1127 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1128 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1129 const PetscScalar *x,*v; 1130 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1131 PetscErrorCode ierr; 1132 const PetscInt m = b->AIJ->rmap->n,*idx; 1133 PetscInt n,i; 1134 1135 PetscFunctionBegin; 1136 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1137 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1138 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1139 1140 for (i=0; i<m; i++) { 1141 idx = a->j + a->i[i]; 1142 v = a->a + a->i[i]; 1143 n = a->i[i+1] - a->i[i]; 1144 alpha1 = x[7*i]; 1145 alpha2 = x[7*i+1]; 1146 alpha3 = x[7*i+2]; 1147 alpha4 = x[7*i+3]; 1148 alpha5 = x[7*i+4]; 1149 alpha6 = x[7*i+5]; 1150 alpha7 = x[7*i+6]; 1151 while (n-->0) { 1152 y[7*(*idx)] += alpha1*(*v); 1153 y[7*(*idx)+1] += alpha2*(*v); 1154 y[7*(*idx)+2] += alpha3*(*v); 1155 y[7*(*idx)+3] += alpha4*(*v); 1156 y[7*(*idx)+4] += alpha5*(*v); 1157 y[7*(*idx)+5] += alpha6*(*v); 1158 y[7*(*idx)+6] += alpha7*(*v); 1159 idx++; v++; 1160 } 1161 } 1162 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1163 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1164 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 #undef __FUNCT__ 1169 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1170 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1171 { 1172 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1173 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1174 const PetscScalar *x,*v; 1175 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1176 PetscErrorCode ierr; 1177 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1178 PetscInt n,i,jrow,j; 1179 1180 PetscFunctionBegin; 1181 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1182 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1183 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1184 idx = a->j; 1185 v = a->a; 1186 ii = a->i; 1187 1188 for (i=0; i<m; i++) { 1189 jrow = ii[i]; 1190 n = ii[i+1] - jrow; 1191 sum1 = 0.0; 1192 sum2 = 0.0; 1193 sum3 = 0.0; 1194 sum4 = 0.0; 1195 sum5 = 0.0; 1196 sum6 = 0.0; 1197 sum7 = 0.0; 1198 for (j=0; j<n; j++) { 1199 sum1 += v[jrow]*x[7*idx[jrow]]; 1200 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1201 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1202 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1203 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1204 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1205 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1206 jrow++; 1207 } 1208 y[7*i] += sum1; 1209 y[7*i+1] += sum2; 1210 y[7*i+2] += sum3; 1211 y[7*i+3] += sum4; 1212 y[7*i+4] += sum5; 1213 y[7*i+5] += sum6; 1214 y[7*i+6] += sum7; 1215 } 1216 1217 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1218 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1219 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 #undef __FUNCT__ 1224 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1225 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1226 { 1227 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1228 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1229 const PetscScalar *x,*v; 1230 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1231 PetscErrorCode ierr; 1232 const PetscInt m = b->AIJ->rmap->n,*idx; 1233 PetscInt n,i; 1234 1235 PetscFunctionBegin; 1236 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1237 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1238 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1239 for (i=0; i<m; i++) { 1240 idx = a->j + a->i[i]; 1241 v = a->a + a->i[i]; 1242 n = a->i[i+1] - a->i[i]; 1243 alpha1 = x[7*i]; 1244 alpha2 = x[7*i+1]; 1245 alpha3 = x[7*i+2]; 1246 alpha4 = x[7*i+3]; 1247 alpha5 = x[7*i+4]; 1248 alpha6 = x[7*i+5]; 1249 alpha7 = x[7*i+6]; 1250 while (n-->0) { 1251 y[7*(*idx)] += alpha1*(*v); 1252 y[7*(*idx)+1] += alpha2*(*v); 1253 y[7*(*idx)+2] += alpha3*(*v); 1254 y[7*(*idx)+3] += alpha4*(*v); 1255 y[7*(*idx)+4] += alpha5*(*v); 1256 y[7*(*idx)+5] += alpha6*(*v); 1257 y[7*(*idx)+6] += alpha7*(*v); 1258 idx++; v++; 1259 } 1260 } 1261 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1262 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1263 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1269 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1270 { 1271 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1272 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1273 const PetscScalar *x,*v; 1274 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1275 PetscErrorCode ierr; 1276 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1277 PetscInt nonzerorow=0,n,i,jrow,j; 1278 1279 PetscFunctionBegin; 1280 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1281 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1282 idx = a->j; 1283 v = a->a; 1284 ii = a->i; 1285 1286 for (i=0; i<m; i++) { 1287 jrow = ii[i]; 1288 n = ii[i+1] - jrow; 1289 sum1 = 0.0; 1290 sum2 = 0.0; 1291 sum3 = 0.0; 1292 sum4 = 0.0; 1293 sum5 = 0.0; 1294 sum6 = 0.0; 1295 sum7 = 0.0; 1296 sum8 = 0.0; 1297 1298 nonzerorow += (n>0); 1299 for (j=0; j<n; j++) { 1300 sum1 += v[jrow]*x[8*idx[jrow]]; 1301 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1302 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1303 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1304 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1305 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1306 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1307 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1308 jrow++; 1309 } 1310 y[8*i] = sum1; 1311 y[8*i+1] = sum2; 1312 y[8*i+2] = sum3; 1313 y[8*i+3] = sum4; 1314 y[8*i+4] = sum5; 1315 y[8*i+5] = sum6; 1316 y[8*i+6] = sum7; 1317 y[8*i+7] = sum8; 1318 } 1319 1320 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1321 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1322 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1328 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1329 { 1330 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1331 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1332 const PetscScalar *x,*v; 1333 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1334 PetscErrorCode ierr; 1335 const PetscInt m = b->AIJ->rmap->n,*idx; 1336 PetscInt n,i; 1337 1338 PetscFunctionBegin; 1339 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1340 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1341 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1342 1343 for (i=0; i<m; i++) { 1344 idx = a->j + a->i[i]; 1345 v = a->a + a->i[i]; 1346 n = a->i[i+1] - a->i[i]; 1347 alpha1 = x[8*i]; 1348 alpha2 = x[8*i+1]; 1349 alpha3 = x[8*i+2]; 1350 alpha4 = x[8*i+3]; 1351 alpha5 = x[8*i+4]; 1352 alpha6 = x[8*i+5]; 1353 alpha7 = x[8*i+6]; 1354 alpha8 = x[8*i+7]; 1355 while (n-->0) { 1356 y[8*(*idx)] += alpha1*(*v); 1357 y[8*(*idx)+1] += alpha2*(*v); 1358 y[8*(*idx)+2] += alpha3*(*v); 1359 y[8*(*idx)+3] += alpha4*(*v); 1360 y[8*(*idx)+4] += alpha5*(*v); 1361 y[8*(*idx)+5] += alpha6*(*v); 1362 y[8*(*idx)+6] += alpha7*(*v); 1363 y[8*(*idx)+7] += alpha8*(*v); 1364 idx++; v++; 1365 } 1366 } 1367 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1368 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1369 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1375 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1376 { 1377 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1378 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1379 const PetscScalar *x,*v; 1380 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1381 PetscErrorCode ierr; 1382 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1383 PetscInt n,i,jrow,j; 1384 1385 PetscFunctionBegin; 1386 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1387 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1388 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1389 idx = a->j; 1390 v = a->a; 1391 ii = a->i; 1392 1393 for (i=0; i<m; i++) { 1394 jrow = ii[i]; 1395 n = ii[i+1] - jrow; 1396 sum1 = 0.0; 1397 sum2 = 0.0; 1398 sum3 = 0.0; 1399 sum4 = 0.0; 1400 sum5 = 0.0; 1401 sum6 = 0.0; 1402 sum7 = 0.0; 1403 sum8 = 0.0; 1404 for (j=0; j<n; j++) { 1405 sum1 += v[jrow]*x[8*idx[jrow]]; 1406 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1407 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1408 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1409 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1410 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1411 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1412 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1413 jrow++; 1414 } 1415 y[8*i] += sum1; 1416 y[8*i+1] += sum2; 1417 y[8*i+2] += sum3; 1418 y[8*i+3] += sum4; 1419 y[8*i+4] += sum5; 1420 y[8*i+5] += sum6; 1421 y[8*i+6] += sum7; 1422 y[8*i+7] += sum8; 1423 } 1424 1425 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1426 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1427 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1433 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1434 { 1435 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1436 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1437 const PetscScalar *x,*v; 1438 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1439 PetscErrorCode ierr; 1440 const PetscInt m = b->AIJ->rmap->n,*idx; 1441 PetscInt n,i; 1442 1443 PetscFunctionBegin; 1444 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1445 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1446 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1447 for (i=0; i<m; i++) { 1448 idx = a->j + a->i[i]; 1449 v = a->a + a->i[i]; 1450 n = a->i[i+1] - a->i[i]; 1451 alpha1 = x[8*i]; 1452 alpha2 = x[8*i+1]; 1453 alpha3 = x[8*i+2]; 1454 alpha4 = x[8*i+3]; 1455 alpha5 = x[8*i+4]; 1456 alpha6 = x[8*i+5]; 1457 alpha7 = x[8*i+6]; 1458 alpha8 = x[8*i+7]; 1459 while (n-->0) { 1460 y[8*(*idx)] += alpha1*(*v); 1461 y[8*(*idx)+1] += alpha2*(*v); 1462 y[8*(*idx)+2] += alpha3*(*v); 1463 y[8*(*idx)+3] += alpha4*(*v); 1464 y[8*(*idx)+4] += alpha5*(*v); 1465 y[8*(*idx)+5] += alpha6*(*v); 1466 y[8*(*idx)+6] += alpha7*(*v); 1467 y[8*(*idx)+7] += alpha8*(*v); 1468 idx++; v++; 1469 } 1470 } 1471 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1472 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1473 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } 1476 1477 /* ------------------------------------------------------------------------------*/ 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1480 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1481 { 1482 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1483 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1484 const PetscScalar *x,*v; 1485 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1486 PetscErrorCode ierr; 1487 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1488 PetscInt nonzerorow=0,n,i,jrow,j; 1489 1490 PetscFunctionBegin; 1491 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1492 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1493 idx = a->j; 1494 v = a->a; 1495 ii = a->i; 1496 1497 for (i=0; i<m; i++) { 1498 jrow = ii[i]; 1499 n = ii[i+1] - jrow; 1500 sum1 = 0.0; 1501 sum2 = 0.0; 1502 sum3 = 0.0; 1503 sum4 = 0.0; 1504 sum5 = 0.0; 1505 sum6 = 0.0; 1506 sum7 = 0.0; 1507 sum8 = 0.0; 1508 sum9 = 0.0; 1509 1510 nonzerorow += (n>0); 1511 for (j=0; j<n; j++) { 1512 sum1 += v[jrow]*x[9*idx[jrow]]; 1513 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1514 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1515 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1516 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1517 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1518 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1519 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1520 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1521 jrow++; 1522 } 1523 y[9*i] = sum1; 1524 y[9*i+1] = sum2; 1525 y[9*i+2] = sum3; 1526 y[9*i+3] = sum4; 1527 y[9*i+4] = sum5; 1528 y[9*i+5] = sum6; 1529 y[9*i+6] = sum7; 1530 y[9*i+7] = sum8; 1531 y[9*i+8] = sum9; 1532 } 1533 1534 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1535 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1536 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 /* ------------------------------------------------------------------------------*/ 1541 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1544 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1545 { 1546 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1547 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1548 const PetscScalar *x,*v; 1549 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1550 PetscErrorCode ierr; 1551 const PetscInt m = b->AIJ->rmap->n,*idx; 1552 PetscInt n,i; 1553 1554 PetscFunctionBegin; 1555 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1556 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1557 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1558 1559 for (i=0; i<m; i++) { 1560 idx = a->j + a->i[i]; 1561 v = a->a + a->i[i]; 1562 n = a->i[i+1] - a->i[i]; 1563 alpha1 = x[9*i]; 1564 alpha2 = x[9*i+1]; 1565 alpha3 = x[9*i+2]; 1566 alpha4 = x[9*i+3]; 1567 alpha5 = x[9*i+4]; 1568 alpha6 = x[9*i+5]; 1569 alpha7 = x[9*i+6]; 1570 alpha8 = x[9*i+7]; 1571 alpha9 = x[9*i+8]; 1572 while (n-->0) { 1573 y[9*(*idx)] += alpha1*(*v); 1574 y[9*(*idx)+1] += alpha2*(*v); 1575 y[9*(*idx)+2] += alpha3*(*v); 1576 y[9*(*idx)+3] += alpha4*(*v); 1577 y[9*(*idx)+4] += alpha5*(*v); 1578 y[9*(*idx)+5] += alpha6*(*v); 1579 y[9*(*idx)+6] += alpha7*(*v); 1580 y[9*(*idx)+7] += alpha8*(*v); 1581 y[9*(*idx)+8] += alpha9*(*v); 1582 idx++; v++; 1583 } 1584 } 1585 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1586 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1587 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1593 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1594 { 1595 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1596 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1597 const PetscScalar *x,*v; 1598 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1599 PetscErrorCode ierr; 1600 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1601 PetscInt n,i,jrow,j; 1602 1603 PetscFunctionBegin; 1604 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1605 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1606 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1607 idx = a->j; 1608 v = a->a; 1609 ii = a->i; 1610 1611 for (i=0; i<m; i++) { 1612 jrow = ii[i]; 1613 n = ii[i+1] - jrow; 1614 sum1 = 0.0; 1615 sum2 = 0.0; 1616 sum3 = 0.0; 1617 sum4 = 0.0; 1618 sum5 = 0.0; 1619 sum6 = 0.0; 1620 sum7 = 0.0; 1621 sum8 = 0.0; 1622 sum9 = 0.0; 1623 for (j=0; j<n; j++) { 1624 sum1 += v[jrow]*x[9*idx[jrow]]; 1625 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1626 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1627 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1628 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1629 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1630 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1631 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1632 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1633 jrow++; 1634 } 1635 y[9*i] += sum1; 1636 y[9*i+1] += sum2; 1637 y[9*i+2] += sum3; 1638 y[9*i+3] += sum4; 1639 y[9*i+4] += sum5; 1640 y[9*i+5] += sum6; 1641 y[9*i+6] += sum7; 1642 y[9*i+7] += sum8; 1643 y[9*i+8] += sum9; 1644 } 1645 1646 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1647 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1648 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1649 PetscFunctionReturn(0); 1650 } 1651 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1654 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1655 { 1656 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1657 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1658 const PetscScalar *x,*v; 1659 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1660 PetscErrorCode ierr; 1661 const PetscInt m = b->AIJ->rmap->n,*idx; 1662 PetscInt n,i; 1663 1664 PetscFunctionBegin; 1665 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1666 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1667 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1668 for (i=0; i<m; i++) { 1669 idx = a->j + a->i[i]; 1670 v = a->a + a->i[i]; 1671 n = a->i[i+1] - a->i[i]; 1672 alpha1 = x[9*i]; 1673 alpha2 = x[9*i+1]; 1674 alpha3 = x[9*i+2]; 1675 alpha4 = x[9*i+3]; 1676 alpha5 = x[9*i+4]; 1677 alpha6 = x[9*i+5]; 1678 alpha7 = x[9*i+6]; 1679 alpha8 = x[9*i+7]; 1680 alpha9 = x[9*i+8]; 1681 while (n-->0) { 1682 y[9*(*idx)] += alpha1*(*v); 1683 y[9*(*idx)+1] += alpha2*(*v); 1684 y[9*(*idx)+2] += alpha3*(*v); 1685 y[9*(*idx)+3] += alpha4*(*v); 1686 y[9*(*idx)+4] += alpha5*(*v); 1687 y[9*(*idx)+5] += alpha6*(*v); 1688 y[9*(*idx)+6] += alpha7*(*v); 1689 y[9*(*idx)+7] += alpha8*(*v); 1690 y[9*(*idx)+8] += alpha9*(*v); 1691 idx++; v++; 1692 } 1693 } 1694 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1695 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1696 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1697 PetscFunctionReturn(0); 1698 } 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1701 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1702 { 1703 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1704 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1705 const PetscScalar *x,*v; 1706 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1707 PetscErrorCode ierr; 1708 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1709 PetscInt nonzerorow=0,n,i,jrow,j; 1710 1711 PetscFunctionBegin; 1712 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1713 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1714 idx = a->j; 1715 v = a->a; 1716 ii = a->i; 1717 1718 for (i=0; i<m; i++) { 1719 jrow = ii[i]; 1720 n = ii[i+1] - jrow; 1721 sum1 = 0.0; 1722 sum2 = 0.0; 1723 sum3 = 0.0; 1724 sum4 = 0.0; 1725 sum5 = 0.0; 1726 sum6 = 0.0; 1727 sum7 = 0.0; 1728 sum8 = 0.0; 1729 sum9 = 0.0; 1730 sum10 = 0.0; 1731 1732 nonzerorow += (n>0); 1733 for (j=0; j<n; j++) { 1734 sum1 += v[jrow]*x[10*idx[jrow]]; 1735 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1736 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1737 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1738 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1739 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1740 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1741 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1742 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1743 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1744 jrow++; 1745 } 1746 y[10*i] = sum1; 1747 y[10*i+1] = sum2; 1748 y[10*i+2] = sum3; 1749 y[10*i+3] = sum4; 1750 y[10*i+4] = sum5; 1751 y[10*i+5] = sum6; 1752 y[10*i+6] = sum7; 1753 y[10*i+7] = sum8; 1754 y[10*i+8] = sum9; 1755 y[10*i+9] = sum10; 1756 } 1757 1758 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1759 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1760 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1761 PetscFunctionReturn(0); 1762 } 1763 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10" 1766 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1767 { 1768 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1769 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1770 const PetscScalar *x,*v; 1771 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1772 PetscErrorCode ierr; 1773 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1774 PetscInt n,i,jrow,j; 1775 1776 PetscFunctionBegin; 1777 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1778 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1779 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1780 idx = a->j; 1781 v = a->a; 1782 ii = a->i; 1783 1784 for (i=0; i<m; i++) { 1785 jrow = ii[i]; 1786 n = ii[i+1] - jrow; 1787 sum1 = 0.0; 1788 sum2 = 0.0; 1789 sum3 = 0.0; 1790 sum4 = 0.0; 1791 sum5 = 0.0; 1792 sum6 = 0.0; 1793 sum7 = 0.0; 1794 sum8 = 0.0; 1795 sum9 = 0.0; 1796 sum10 = 0.0; 1797 for (j=0; j<n; j++) { 1798 sum1 += v[jrow]*x[10*idx[jrow]]; 1799 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1800 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1801 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1802 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1803 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1804 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1805 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1806 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1807 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1808 jrow++; 1809 } 1810 y[10*i] += sum1; 1811 y[10*i+1] += sum2; 1812 y[10*i+2] += sum3; 1813 y[10*i+3] += sum4; 1814 y[10*i+4] += sum5; 1815 y[10*i+5] += sum6; 1816 y[10*i+6] += sum7; 1817 y[10*i+7] += sum8; 1818 y[10*i+8] += sum9; 1819 y[10*i+9] += sum10; 1820 } 1821 1822 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1823 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1824 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1825 PetscFunctionReturn(0); 1826 } 1827 1828 #undef __FUNCT__ 1829 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1830 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1831 { 1832 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1833 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1834 const PetscScalar *x,*v; 1835 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1836 PetscErrorCode ierr; 1837 const PetscInt m = b->AIJ->rmap->n,*idx; 1838 PetscInt n,i; 1839 1840 PetscFunctionBegin; 1841 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1842 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1843 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1844 1845 for (i=0; i<m; i++) { 1846 idx = a->j + a->i[i]; 1847 v = a->a + a->i[i]; 1848 n = a->i[i+1] - a->i[i]; 1849 alpha1 = x[10*i]; 1850 alpha2 = x[10*i+1]; 1851 alpha3 = x[10*i+2]; 1852 alpha4 = x[10*i+3]; 1853 alpha5 = x[10*i+4]; 1854 alpha6 = x[10*i+5]; 1855 alpha7 = x[10*i+6]; 1856 alpha8 = x[10*i+7]; 1857 alpha9 = x[10*i+8]; 1858 alpha10 = x[10*i+9]; 1859 while (n-->0) { 1860 y[10*(*idx)] += alpha1*(*v); 1861 y[10*(*idx)+1] += alpha2*(*v); 1862 y[10*(*idx)+2] += alpha3*(*v); 1863 y[10*(*idx)+3] += alpha4*(*v); 1864 y[10*(*idx)+4] += alpha5*(*v); 1865 y[10*(*idx)+5] += alpha6*(*v); 1866 y[10*(*idx)+6] += alpha7*(*v); 1867 y[10*(*idx)+7] += alpha8*(*v); 1868 y[10*(*idx)+8] += alpha9*(*v); 1869 y[10*(*idx)+9] += alpha10*(*v); 1870 idx++; v++; 1871 } 1872 } 1873 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1874 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1875 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1876 PetscFunctionReturn(0); 1877 } 1878 1879 #undef __FUNCT__ 1880 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1881 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1882 { 1883 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1884 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1885 const PetscScalar *x,*v; 1886 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1887 PetscErrorCode ierr; 1888 const PetscInt m = b->AIJ->rmap->n,*idx; 1889 PetscInt n,i; 1890 1891 PetscFunctionBegin; 1892 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1893 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1894 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1895 for (i=0; i<m; i++) { 1896 idx = a->j + a->i[i]; 1897 v = a->a + a->i[i]; 1898 n = a->i[i+1] - a->i[i]; 1899 alpha1 = x[10*i]; 1900 alpha2 = x[10*i+1]; 1901 alpha3 = x[10*i+2]; 1902 alpha4 = x[10*i+3]; 1903 alpha5 = x[10*i+4]; 1904 alpha6 = x[10*i+5]; 1905 alpha7 = x[10*i+6]; 1906 alpha8 = x[10*i+7]; 1907 alpha9 = x[10*i+8]; 1908 alpha10 = x[10*i+9]; 1909 while (n-->0) { 1910 y[10*(*idx)] += alpha1*(*v); 1911 y[10*(*idx)+1] += alpha2*(*v); 1912 y[10*(*idx)+2] += alpha3*(*v); 1913 y[10*(*idx)+3] += alpha4*(*v); 1914 y[10*(*idx)+4] += alpha5*(*v); 1915 y[10*(*idx)+5] += alpha6*(*v); 1916 y[10*(*idx)+6] += alpha7*(*v); 1917 y[10*(*idx)+7] += alpha8*(*v); 1918 y[10*(*idx)+8] += alpha9*(*v); 1919 y[10*(*idx)+9] += alpha10*(*v); 1920 idx++; v++; 1921 } 1922 } 1923 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1924 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1925 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1926 PetscFunctionReturn(0); 1927 } 1928 1929 1930 /*--------------------------------------------------------------------------------------------*/ 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatMult_SeqMAIJ_11" 1933 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1934 { 1935 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1936 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1937 const PetscScalar *x,*v; 1938 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1939 PetscErrorCode ierr; 1940 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1941 PetscInt nonzerorow=0,n,i,jrow,j; 1942 1943 PetscFunctionBegin; 1944 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1945 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1946 idx = a->j; 1947 v = a->a; 1948 ii = a->i; 1949 1950 for (i=0; i<m; i++) { 1951 jrow = ii[i]; 1952 n = ii[i+1] - jrow; 1953 sum1 = 0.0; 1954 sum2 = 0.0; 1955 sum3 = 0.0; 1956 sum4 = 0.0; 1957 sum5 = 0.0; 1958 sum6 = 0.0; 1959 sum7 = 0.0; 1960 sum8 = 0.0; 1961 sum9 = 0.0; 1962 sum10 = 0.0; 1963 sum11 = 0.0; 1964 1965 nonzerorow += (n>0); 1966 for (j=0; j<n; j++) { 1967 sum1 += v[jrow]*x[11*idx[jrow]]; 1968 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1969 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1970 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1971 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1972 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1973 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1974 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1975 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1976 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1977 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1978 jrow++; 1979 } 1980 y[11*i] = sum1; 1981 y[11*i+1] = sum2; 1982 y[11*i+2] = sum3; 1983 y[11*i+3] = sum4; 1984 y[11*i+4] = sum5; 1985 y[11*i+5] = sum6; 1986 y[11*i+6] = sum7; 1987 y[11*i+7] = sum8; 1988 y[11*i+8] = sum9; 1989 y[11*i+9] = sum10; 1990 y[11*i+10] = sum11; 1991 } 1992 1993 ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1994 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1995 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1996 PetscFunctionReturn(0); 1997 } 1998 1999 #undef __FUNCT__ 2000 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11" 2001 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2002 { 2003 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2004 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2005 const PetscScalar *x,*v; 2006 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 2007 PetscErrorCode ierr; 2008 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2009 PetscInt n,i,jrow,j; 2010 2011 PetscFunctionBegin; 2012 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2013 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2014 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2015 idx = a->j; 2016 v = a->a; 2017 ii = a->i; 2018 2019 for (i=0; i<m; i++) { 2020 jrow = ii[i]; 2021 n = ii[i+1] - jrow; 2022 sum1 = 0.0; 2023 sum2 = 0.0; 2024 sum3 = 0.0; 2025 sum4 = 0.0; 2026 sum5 = 0.0; 2027 sum6 = 0.0; 2028 sum7 = 0.0; 2029 sum8 = 0.0; 2030 sum9 = 0.0; 2031 sum10 = 0.0; 2032 sum11 = 0.0; 2033 for (j=0; j<n; j++) { 2034 sum1 += v[jrow]*x[11*idx[jrow]]; 2035 sum2 += v[jrow]*x[11*idx[jrow]+1]; 2036 sum3 += v[jrow]*x[11*idx[jrow]+2]; 2037 sum4 += v[jrow]*x[11*idx[jrow]+3]; 2038 sum5 += v[jrow]*x[11*idx[jrow]+4]; 2039 sum6 += v[jrow]*x[11*idx[jrow]+5]; 2040 sum7 += v[jrow]*x[11*idx[jrow]+6]; 2041 sum8 += v[jrow]*x[11*idx[jrow]+7]; 2042 sum9 += v[jrow]*x[11*idx[jrow]+8]; 2043 sum10 += v[jrow]*x[11*idx[jrow]+9]; 2044 sum11 += v[jrow]*x[11*idx[jrow]+10]; 2045 jrow++; 2046 } 2047 y[11*i] += sum1; 2048 y[11*i+1] += sum2; 2049 y[11*i+2] += sum3; 2050 y[11*i+3] += sum4; 2051 y[11*i+4] += sum5; 2052 y[11*i+5] += sum6; 2053 y[11*i+6] += sum7; 2054 y[11*i+7] += sum8; 2055 y[11*i+8] += sum9; 2056 y[11*i+9] += sum10; 2057 y[11*i+10] += sum11; 2058 } 2059 2060 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2061 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2062 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2063 PetscFunctionReturn(0); 2064 } 2065 2066 #undef __FUNCT__ 2067 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11" 2068 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 2069 { 2070 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2071 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2072 const PetscScalar *x,*v; 2073 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2074 PetscErrorCode ierr; 2075 const PetscInt m = b->AIJ->rmap->n,*idx; 2076 PetscInt n,i; 2077 2078 PetscFunctionBegin; 2079 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2080 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2081 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2082 2083 for (i=0; i<m; i++) { 2084 idx = a->j + a->i[i]; 2085 v = a->a + a->i[i]; 2086 n = a->i[i+1] - a->i[i]; 2087 alpha1 = x[11*i]; 2088 alpha2 = x[11*i+1]; 2089 alpha3 = x[11*i+2]; 2090 alpha4 = x[11*i+3]; 2091 alpha5 = x[11*i+4]; 2092 alpha6 = x[11*i+5]; 2093 alpha7 = x[11*i+6]; 2094 alpha8 = x[11*i+7]; 2095 alpha9 = x[11*i+8]; 2096 alpha10 = x[11*i+9]; 2097 alpha11 = x[11*i+10]; 2098 while (n-->0) { 2099 y[11*(*idx)] += alpha1*(*v); 2100 y[11*(*idx)+1] += alpha2*(*v); 2101 y[11*(*idx)+2] += alpha3*(*v); 2102 y[11*(*idx)+3] += alpha4*(*v); 2103 y[11*(*idx)+4] += alpha5*(*v); 2104 y[11*(*idx)+5] += alpha6*(*v); 2105 y[11*(*idx)+6] += alpha7*(*v); 2106 y[11*(*idx)+7] += alpha8*(*v); 2107 y[11*(*idx)+8] += alpha9*(*v); 2108 y[11*(*idx)+9] += alpha10*(*v); 2109 y[11*(*idx)+10] += alpha11*(*v); 2110 idx++; v++; 2111 } 2112 } 2113 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2114 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2115 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2116 PetscFunctionReturn(0); 2117 } 2118 2119 #undef __FUNCT__ 2120 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11" 2121 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2122 { 2123 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2124 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2125 const PetscScalar *x,*v; 2126 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2127 PetscErrorCode ierr; 2128 const PetscInt m = b->AIJ->rmap->n,*idx; 2129 PetscInt n,i; 2130 2131 PetscFunctionBegin; 2132 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2133 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2134 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2135 for (i=0; i<m; i++) { 2136 idx = a->j + a->i[i]; 2137 v = a->a + a->i[i]; 2138 n = a->i[i+1] - a->i[i]; 2139 alpha1 = x[11*i]; 2140 alpha2 = x[11*i+1]; 2141 alpha3 = x[11*i+2]; 2142 alpha4 = x[11*i+3]; 2143 alpha5 = x[11*i+4]; 2144 alpha6 = x[11*i+5]; 2145 alpha7 = x[11*i+6]; 2146 alpha8 = x[11*i+7]; 2147 alpha9 = x[11*i+8]; 2148 alpha10 = x[11*i+9]; 2149 alpha11 = x[11*i+10]; 2150 while (n-->0) { 2151 y[11*(*idx)] += alpha1*(*v); 2152 y[11*(*idx)+1] += alpha2*(*v); 2153 y[11*(*idx)+2] += alpha3*(*v); 2154 y[11*(*idx)+3] += alpha4*(*v); 2155 y[11*(*idx)+4] += alpha5*(*v); 2156 y[11*(*idx)+5] += alpha6*(*v); 2157 y[11*(*idx)+6] += alpha7*(*v); 2158 y[11*(*idx)+7] += alpha8*(*v); 2159 y[11*(*idx)+8] += alpha9*(*v); 2160 y[11*(*idx)+9] += alpha10*(*v); 2161 y[11*(*idx)+10] += alpha11*(*v); 2162 idx++; v++; 2163 } 2164 } 2165 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2166 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2167 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2168 PetscFunctionReturn(0); 2169 } 2170 2171 2172 /*--------------------------------------------------------------------------------------------*/ 2173 #undef __FUNCT__ 2174 #define __FUNCT__ "MatMult_SeqMAIJ_16" 2175 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2176 { 2177 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2178 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2179 const PetscScalar *x,*v; 2180 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2181 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2182 PetscErrorCode ierr; 2183 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2184 PetscInt nonzerorow=0,n,i,jrow,j; 2185 2186 PetscFunctionBegin; 2187 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2188 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2189 idx = a->j; 2190 v = a->a; 2191 ii = a->i; 2192 2193 for (i=0; i<m; i++) { 2194 jrow = ii[i]; 2195 n = ii[i+1] - jrow; 2196 sum1 = 0.0; 2197 sum2 = 0.0; 2198 sum3 = 0.0; 2199 sum4 = 0.0; 2200 sum5 = 0.0; 2201 sum6 = 0.0; 2202 sum7 = 0.0; 2203 sum8 = 0.0; 2204 sum9 = 0.0; 2205 sum10 = 0.0; 2206 sum11 = 0.0; 2207 sum12 = 0.0; 2208 sum13 = 0.0; 2209 sum14 = 0.0; 2210 sum15 = 0.0; 2211 sum16 = 0.0; 2212 2213 nonzerorow += (n>0); 2214 for (j=0; j<n; j++) { 2215 sum1 += v[jrow]*x[16*idx[jrow]]; 2216 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2217 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2218 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2219 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2220 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2221 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2222 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2223 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2224 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2225 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2226 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2227 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2228 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2229 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2230 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2231 jrow++; 2232 } 2233 y[16*i] = sum1; 2234 y[16*i+1] = sum2; 2235 y[16*i+2] = sum3; 2236 y[16*i+3] = sum4; 2237 y[16*i+4] = sum5; 2238 y[16*i+5] = sum6; 2239 y[16*i+6] = sum7; 2240 y[16*i+7] = sum8; 2241 y[16*i+8] = sum9; 2242 y[16*i+9] = sum10; 2243 y[16*i+10] = sum11; 2244 y[16*i+11] = sum12; 2245 y[16*i+12] = sum13; 2246 y[16*i+13] = sum14; 2247 y[16*i+14] = sum15; 2248 y[16*i+15] = sum16; 2249 } 2250 2251 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2252 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2253 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 2259 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2260 { 2261 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2262 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2263 const PetscScalar *x,*v; 2264 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2265 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2266 PetscErrorCode ierr; 2267 const PetscInt m = b->AIJ->rmap->n,*idx; 2268 PetscInt n,i; 2269 2270 PetscFunctionBegin; 2271 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2272 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2273 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2274 2275 for (i=0; i<m; i++) { 2276 idx = a->j + a->i[i]; 2277 v = a->a + a->i[i]; 2278 n = a->i[i+1] - a->i[i]; 2279 alpha1 = x[16*i]; 2280 alpha2 = x[16*i+1]; 2281 alpha3 = x[16*i+2]; 2282 alpha4 = x[16*i+3]; 2283 alpha5 = x[16*i+4]; 2284 alpha6 = x[16*i+5]; 2285 alpha7 = x[16*i+6]; 2286 alpha8 = x[16*i+7]; 2287 alpha9 = x[16*i+8]; 2288 alpha10 = x[16*i+9]; 2289 alpha11 = x[16*i+10]; 2290 alpha12 = x[16*i+11]; 2291 alpha13 = x[16*i+12]; 2292 alpha14 = x[16*i+13]; 2293 alpha15 = x[16*i+14]; 2294 alpha16 = x[16*i+15]; 2295 while (n-->0) { 2296 y[16*(*idx)] += alpha1*(*v); 2297 y[16*(*idx)+1] += alpha2*(*v); 2298 y[16*(*idx)+2] += alpha3*(*v); 2299 y[16*(*idx)+3] += alpha4*(*v); 2300 y[16*(*idx)+4] += alpha5*(*v); 2301 y[16*(*idx)+5] += alpha6*(*v); 2302 y[16*(*idx)+6] += alpha7*(*v); 2303 y[16*(*idx)+7] += alpha8*(*v); 2304 y[16*(*idx)+8] += alpha9*(*v); 2305 y[16*(*idx)+9] += alpha10*(*v); 2306 y[16*(*idx)+10] += alpha11*(*v); 2307 y[16*(*idx)+11] += alpha12*(*v); 2308 y[16*(*idx)+12] += alpha13*(*v); 2309 y[16*(*idx)+13] += alpha14*(*v); 2310 y[16*(*idx)+14] += alpha15*(*v); 2311 y[16*(*idx)+15] += alpha16*(*v); 2312 idx++; v++; 2313 } 2314 } 2315 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2316 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2317 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2318 PetscFunctionReturn(0); 2319 } 2320 2321 #undef __FUNCT__ 2322 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 2323 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2324 { 2325 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2326 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2327 const PetscScalar *x,*v; 2328 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2329 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2330 PetscErrorCode ierr; 2331 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2332 PetscInt n,i,jrow,j; 2333 2334 PetscFunctionBegin; 2335 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2336 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2337 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2338 idx = a->j; 2339 v = a->a; 2340 ii = a->i; 2341 2342 for (i=0; i<m; i++) { 2343 jrow = ii[i]; 2344 n = ii[i+1] - jrow; 2345 sum1 = 0.0; 2346 sum2 = 0.0; 2347 sum3 = 0.0; 2348 sum4 = 0.0; 2349 sum5 = 0.0; 2350 sum6 = 0.0; 2351 sum7 = 0.0; 2352 sum8 = 0.0; 2353 sum9 = 0.0; 2354 sum10 = 0.0; 2355 sum11 = 0.0; 2356 sum12 = 0.0; 2357 sum13 = 0.0; 2358 sum14 = 0.0; 2359 sum15 = 0.0; 2360 sum16 = 0.0; 2361 for (j=0; j<n; j++) { 2362 sum1 += v[jrow]*x[16*idx[jrow]]; 2363 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2364 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2365 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2366 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2367 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2368 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2369 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2370 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2371 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2372 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2373 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2374 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2375 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2376 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2377 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2378 jrow++; 2379 } 2380 y[16*i] += sum1; 2381 y[16*i+1] += sum2; 2382 y[16*i+2] += sum3; 2383 y[16*i+3] += sum4; 2384 y[16*i+4] += sum5; 2385 y[16*i+5] += sum6; 2386 y[16*i+6] += sum7; 2387 y[16*i+7] += sum8; 2388 y[16*i+8] += sum9; 2389 y[16*i+9] += sum10; 2390 y[16*i+10] += sum11; 2391 y[16*i+11] += sum12; 2392 y[16*i+12] += sum13; 2393 y[16*i+13] += sum14; 2394 y[16*i+14] += sum15; 2395 y[16*i+15] += sum16; 2396 } 2397 2398 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2399 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2400 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2401 PetscFunctionReturn(0); 2402 } 2403 2404 #undef __FUNCT__ 2405 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2406 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2407 { 2408 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2409 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2410 const PetscScalar *x,*v; 2411 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2412 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2413 PetscErrorCode ierr; 2414 const PetscInt m = b->AIJ->rmap->n,*idx; 2415 PetscInt n,i; 2416 2417 PetscFunctionBegin; 2418 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2419 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2420 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2421 for (i=0; i<m; i++) { 2422 idx = a->j + a->i[i]; 2423 v = a->a + a->i[i]; 2424 n = a->i[i+1] - a->i[i]; 2425 alpha1 = x[16*i]; 2426 alpha2 = x[16*i+1]; 2427 alpha3 = x[16*i+2]; 2428 alpha4 = x[16*i+3]; 2429 alpha5 = x[16*i+4]; 2430 alpha6 = x[16*i+5]; 2431 alpha7 = x[16*i+6]; 2432 alpha8 = x[16*i+7]; 2433 alpha9 = x[16*i+8]; 2434 alpha10 = x[16*i+9]; 2435 alpha11 = x[16*i+10]; 2436 alpha12 = x[16*i+11]; 2437 alpha13 = x[16*i+12]; 2438 alpha14 = x[16*i+13]; 2439 alpha15 = x[16*i+14]; 2440 alpha16 = x[16*i+15]; 2441 while (n-->0) { 2442 y[16*(*idx)] += alpha1*(*v); 2443 y[16*(*idx)+1] += alpha2*(*v); 2444 y[16*(*idx)+2] += alpha3*(*v); 2445 y[16*(*idx)+3] += alpha4*(*v); 2446 y[16*(*idx)+4] += alpha5*(*v); 2447 y[16*(*idx)+5] += alpha6*(*v); 2448 y[16*(*idx)+6] += alpha7*(*v); 2449 y[16*(*idx)+7] += alpha8*(*v); 2450 y[16*(*idx)+8] += alpha9*(*v); 2451 y[16*(*idx)+9] += alpha10*(*v); 2452 y[16*(*idx)+10] += alpha11*(*v); 2453 y[16*(*idx)+11] += alpha12*(*v); 2454 y[16*(*idx)+12] += alpha13*(*v); 2455 y[16*(*idx)+13] += alpha14*(*v); 2456 y[16*(*idx)+14] += alpha15*(*v); 2457 y[16*(*idx)+15] += alpha16*(*v); 2458 idx++; v++; 2459 } 2460 } 2461 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2462 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2463 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2464 PetscFunctionReturn(0); 2465 } 2466 2467 /*--------------------------------------------------------------------------------------------*/ 2468 #undef __FUNCT__ 2469 #define __FUNCT__ "MatMult_SeqMAIJ_18" 2470 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2471 { 2472 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2473 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2474 const PetscScalar *x,*v; 2475 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2476 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2477 PetscErrorCode ierr; 2478 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2479 PetscInt nonzerorow=0,n,i,jrow,j; 2480 2481 PetscFunctionBegin; 2482 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2483 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2484 idx = a->j; 2485 v = a->a; 2486 ii = a->i; 2487 2488 for (i=0; i<m; i++) { 2489 jrow = ii[i]; 2490 n = ii[i+1] - jrow; 2491 sum1 = 0.0; 2492 sum2 = 0.0; 2493 sum3 = 0.0; 2494 sum4 = 0.0; 2495 sum5 = 0.0; 2496 sum6 = 0.0; 2497 sum7 = 0.0; 2498 sum8 = 0.0; 2499 sum9 = 0.0; 2500 sum10 = 0.0; 2501 sum11 = 0.0; 2502 sum12 = 0.0; 2503 sum13 = 0.0; 2504 sum14 = 0.0; 2505 sum15 = 0.0; 2506 sum16 = 0.0; 2507 sum17 = 0.0; 2508 sum18 = 0.0; 2509 2510 nonzerorow += (n>0); 2511 for (j=0; j<n; j++) { 2512 sum1 += v[jrow]*x[18*idx[jrow]]; 2513 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2514 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2515 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2516 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2517 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2518 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2519 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2520 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2521 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2522 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2523 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2524 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2525 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2526 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2527 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2528 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2529 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2530 jrow++; 2531 } 2532 y[18*i] = sum1; 2533 y[18*i+1] = sum2; 2534 y[18*i+2] = sum3; 2535 y[18*i+3] = sum4; 2536 y[18*i+4] = sum5; 2537 y[18*i+5] = sum6; 2538 y[18*i+6] = sum7; 2539 y[18*i+7] = sum8; 2540 y[18*i+8] = sum9; 2541 y[18*i+9] = sum10; 2542 y[18*i+10] = sum11; 2543 y[18*i+11] = sum12; 2544 y[18*i+12] = sum13; 2545 y[18*i+13] = sum14; 2546 y[18*i+14] = sum15; 2547 y[18*i+15] = sum16; 2548 y[18*i+16] = sum17; 2549 y[18*i+17] = sum18; 2550 } 2551 2552 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2553 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2554 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2555 PetscFunctionReturn(0); 2556 } 2557 2558 #undef __FUNCT__ 2559 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2560 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2561 { 2562 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2564 const PetscScalar *x,*v; 2565 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2566 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2567 PetscErrorCode ierr; 2568 const PetscInt m = b->AIJ->rmap->n,*idx; 2569 PetscInt n,i; 2570 2571 PetscFunctionBegin; 2572 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2573 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2574 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2575 2576 for (i=0; i<m; i++) { 2577 idx = a->j + a->i[i]; 2578 v = a->a + a->i[i]; 2579 n = a->i[i+1] - a->i[i]; 2580 alpha1 = x[18*i]; 2581 alpha2 = x[18*i+1]; 2582 alpha3 = x[18*i+2]; 2583 alpha4 = x[18*i+3]; 2584 alpha5 = x[18*i+4]; 2585 alpha6 = x[18*i+5]; 2586 alpha7 = x[18*i+6]; 2587 alpha8 = x[18*i+7]; 2588 alpha9 = x[18*i+8]; 2589 alpha10 = x[18*i+9]; 2590 alpha11 = x[18*i+10]; 2591 alpha12 = x[18*i+11]; 2592 alpha13 = x[18*i+12]; 2593 alpha14 = x[18*i+13]; 2594 alpha15 = x[18*i+14]; 2595 alpha16 = x[18*i+15]; 2596 alpha17 = x[18*i+16]; 2597 alpha18 = x[18*i+17]; 2598 while (n-->0) { 2599 y[18*(*idx)] += alpha1*(*v); 2600 y[18*(*idx)+1] += alpha2*(*v); 2601 y[18*(*idx)+2] += alpha3*(*v); 2602 y[18*(*idx)+3] += alpha4*(*v); 2603 y[18*(*idx)+4] += alpha5*(*v); 2604 y[18*(*idx)+5] += alpha6*(*v); 2605 y[18*(*idx)+6] += alpha7*(*v); 2606 y[18*(*idx)+7] += alpha8*(*v); 2607 y[18*(*idx)+8] += alpha9*(*v); 2608 y[18*(*idx)+9] += alpha10*(*v); 2609 y[18*(*idx)+10] += alpha11*(*v); 2610 y[18*(*idx)+11] += alpha12*(*v); 2611 y[18*(*idx)+12] += alpha13*(*v); 2612 y[18*(*idx)+13] += alpha14*(*v); 2613 y[18*(*idx)+14] += alpha15*(*v); 2614 y[18*(*idx)+15] += alpha16*(*v); 2615 y[18*(*idx)+16] += alpha17*(*v); 2616 y[18*(*idx)+17] += alpha18*(*v); 2617 idx++; v++; 2618 } 2619 } 2620 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2621 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2622 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2623 PetscFunctionReturn(0); 2624 } 2625 2626 #undef __FUNCT__ 2627 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2628 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2629 { 2630 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2631 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2632 const PetscScalar *x,*v; 2633 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2634 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2635 PetscErrorCode ierr; 2636 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2637 PetscInt n,i,jrow,j; 2638 2639 PetscFunctionBegin; 2640 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2641 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2642 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2643 idx = a->j; 2644 v = a->a; 2645 ii = a->i; 2646 2647 for (i=0; i<m; i++) { 2648 jrow = ii[i]; 2649 n = ii[i+1] - jrow; 2650 sum1 = 0.0; 2651 sum2 = 0.0; 2652 sum3 = 0.0; 2653 sum4 = 0.0; 2654 sum5 = 0.0; 2655 sum6 = 0.0; 2656 sum7 = 0.0; 2657 sum8 = 0.0; 2658 sum9 = 0.0; 2659 sum10 = 0.0; 2660 sum11 = 0.0; 2661 sum12 = 0.0; 2662 sum13 = 0.0; 2663 sum14 = 0.0; 2664 sum15 = 0.0; 2665 sum16 = 0.0; 2666 sum17 = 0.0; 2667 sum18 = 0.0; 2668 for (j=0; j<n; j++) { 2669 sum1 += v[jrow]*x[18*idx[jrow]]; 2670 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2671 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2672 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2673 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2674 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2675 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2676 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2677 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2678 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2679 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2680 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2681 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2682 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2683 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2684 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2685 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2686 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2687 jrow++; 2688 } 2689 y[18*i] += sum1; 2690 y[18*i+1] += sum2; 2691 y[18*i+2] += sum3; 2692 y[18*i+3] += sum4; 2693 y[18*i+4] += sum5; 2694 y[18*i+5] += sum6; 2695 y[18*i+6] += sum7; 2696 y[18*i+7] += sum8; 2697 y[18*i+8] += sum9; 2698 y[18*i+9] += sum10; 2699 y[18*i+10] += sum11; 2700 y[18*i+11] += sum12; 2701 y[18*i+12] += sum13; 2702 y[18*i+13] += sum14; 2703 y[18*i+14] += sum15; 2704 y[18*i+15] += sum16; 2705 y[18*i+16] += sum17; 2706 y[18*i+17] += sum18; 2707 } 2708 2709 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2710 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2711 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2712 PetscFunctionReturn(0); 2713 } 2714 2715 #undef __FUNCT__ 2716 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2717 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2718 { 2719 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2720 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2721 const PetscScalar *x,*v; 2722 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2723 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2724 PetscErrorCode ierr; 2725 const PetscInt m = b->AIJ->rmap->n,*idx; 2726 PetscInt n,i; 2727 2728 PetscFunctionBegin; 2729 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2730 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2731 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2732 for (i=0; i<m; i++) { 2733 idx = a->j + a->i[i]; 2734 v = a->a + a->i[i]; 2735 n = a->i[i+1] - a->i[i]; 2736 alpha1 = x[18*i]; 2737 alpha2 = x[18*i+1]; 2738 alpha3 = x[18*i+2]; 2739 alpha4 = x[18*i+3]; 2740 alpha5 = x[18*i+4]; 2741 alpha6 = x[18*i+5]; 2742 alpha7 = x[18*i+6]; 2743 alpha8 = x[18*i+7]; 2744 alpha9 = x[18*i+8]; 2745 alpha10 = x[18*i+9]; 2746 alpha11 = x[18*i+10]; 2747 alpha12 = x[18*i+11]; 2748 alpha13 = x[18*i+12]; 2749 alpha14 = x[18*i+13]; 2750 alpha15 = x[18*i+14]; 2751 alpha16 = x[18*i+15]; 2752 alpha17 = x[18*i+16]; 2753 alpha18 = x[18*i+17]; 2754 while (n-->0) { 2755 y[18*(*idx)] += alpha1*(*v); 2756 y[18*(*idx)+1] += alpha2*(*v); 2757 y[18*(*idx)+2] += alpha3*(*v); 2758 y[18*(*idx)+3] += alpha4*(*v); 2759 y[18*(*idx)+4] += alpha5*(*v); 2760 y[18*(*idx)+5] += alpha6*(*v); 2761 y[18*(*idx)+6] += alpha7*(*v); 2762 y[18*(*idx)+7] += alpha8*(*v); 2763 y[18*(*idx)+8] += alpha9*(*v); 2764 y[18*(*idx)+9] += alpha10*(*v); 2765 y[18*(*idx)+10] += alpha11*(*v); 2766 y[18*(*idx)+11] += alpha12*(*v); 2767 y[18*(*idx)+12] += alpha13*(*v); 2768 y[18*(*idx)+13] += alpha14*(*v); 2769 y[18*(*idx)+14] += alpha15*(*v); 2770 y[18*(*idx)+15] += alpha16*(*v); 2771 y[18*(*idx)+16] += alpha17*(*v); 2772 y[18*(*idx)+17] += alpha18*(*v); 2773 idx++; v++; 2774 } 2775 } 2776 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2777 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2778 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2779 PetscFunctionReturn(0); 2780 } 2781 2782 #undef __FUNCT__ 2783 #define __FUNCT__ "MatMult_SeqMAIJ_N" 2784 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2785 { 2786 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2787 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2788 const PetscScalar *x,*v; 2789 PetscScalar *y,*sums; 2790 PetscErrorCode ierr; 2791 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2792 PetscInt n,i,jrow,j,dof = b->dof,k; 2793 2794 PetscFunctionBegin; 2795 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2796 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2797 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2798 idx = a->j; 2799 v = a->a; 2800 ii = a->i; 2801 2802 for (i=0; i<m; i++) { 2803 jrow = ii[i]; 2804 n = ii[i+1] - jrow; 2805 sums = y + dof*i; 2806 for (j=0; j<n; j++) { 2807 for (k=0; k<dof; k++) { 2808 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2809 } 2810 jrow++; 2811 } 2812 } 2813 2814 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2815 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2816 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2817 PetscFunctionReturn(0); 2818 } 2819 2820 #undef __FUNCT__ 2821 #define __FUNCT__ "MatMultAdd_SeqMAIJ_N" 2822 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2823 { 2824 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2825 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2826 const PetscScalar *x,*v; 2827 PetscScalar *y,*sums; 2828 PetscErrorCode ierr; 2829 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2830 PetscInt n,i,jrow,j,dof = b->dof,k; 2831 2832 PetscFunctionBegin; 2833 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2834 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2835 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2836 idx = a->j; 2837 v = a->a; 2838 ii = a->i; 2839 2840 for (i=0; i<m; i++) { 2841 jrow = ii[i]; 2842 n = ii[i+1] - jrow; 2843 sums = y + dof*i; 2844 for (j=0; j<n; j++) { 2845 for (k=0; k<dof; k++) { 2846 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2847 } 2848 jrow++; 2849 } 2850 } 2851 2852 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2853 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2854 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2855 PetscFunctionReturn(0); 2856 } 2857 2858 #undef __FUNCT__ 2859 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N" 2860 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2861 { 2862 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2863 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2864 const PetscScalar *x,*v,*alpha; 2865 PetscScalar *y; 2866 PetscErrorCode ierr; 2867 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2868 PetscInt n,i,k; 2869 2870 PetscFunctionBegin; 2871 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2872 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2873 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2874 for (i=0; i<m; i++) { 2875 idx = a->j + a->i[i]; 2876 v = a->a + a->i[i]; 2877 n = a->i[i+1] - a->i[i]; 2878 alpha = x + dof*i; 2879 while (n-->0) { 2880 for (k=0; k<dof; k++) { 2881 y[dof*(*idx)+k] += alpha[k]*(*v); 2882 } 2883 idx++; v++; 2884 } 2885 } 2886 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2887 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2888 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2889 PetscFunctionReturn(0); 2890 } 2891 2892 #undef __FUNCT__ 2893 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N" 2894 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2895 { 2896 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2897 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2898 const PetscScalar *x,*v,*alpha; 2899 PetscScalar *y; 2900 PetscErrorCode ierr; 2901 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2902 PetscInt n,i,k; 2903 2904 PetscFunctionBegin; 2905 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2906 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2907 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2908 for (i=0; i<m; i++) { 2909 idx = a->j + a->i[i]; 2910 v = a->a + a->i[i]; 2911 n = a->i[i+1] - a->i[i]; 2912 alpha = x + dof*i; 2913 while (n-->0) { 2914 for (k=0; k<dof; k++) { 2915 y[dof*(*idx)+k] += alpha[k]*(*v); 2916 } 2917 idx++; v++; 2918 } 2919 } 2920 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2921 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2922 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2923 PetscFunctionReturn(0); 2924 } 2925 2926 /*===================================================================================*/ 2927 #undef __FUNCT__ 2928 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2929 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2930 { 2931 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2932 PetscErrorCode ierr; 2933 2934 PetscFunctionBegin; 2935 /* start the scatter */ 2936 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2937 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2938 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2939 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2940 PetscFunctionReturn(0); 2941 } 2942 2943 #undef __FUNCT__ 2944 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2945 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2946 { 2947 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2948 PetscErrorCode ierr; 2949 2950 PetscFunctionBegin; 2951 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2952 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2953 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2954 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2955 PetscFunctionReturn(0); 2956 } 2957 2958 #undef __FUNCT__ 2959 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2960 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2961 { 2962 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2963 PetscErrorCode ierr; 2964 2965 PetscFunctionBegin; 2966 /* start the scatter */ 2967 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2968 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2969 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2970 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2971 PetscFunctionReturn(0); 2972 } 2973 2974 #undef __FUNCT__ 2975 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2976 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2977 { 2978 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2979 PetscErrorCode ierr; 2980 2981 PetscFunctionBegin; 2982 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2983 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2984 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2985 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2986 PetscFunctionReturn(0); 2987 } 2988 2989 /* ----------------------------------------------------------------*/ 2990 #undef __FUNCT__ 2991 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2992 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2993 { 2994 PetscErrorCode ierr; 2995 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2996 Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 2997 Mat P =pp->AIJ; 2998 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2999 PetscInt *pti,*ptj,*ptJ; 3000 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 3001 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 3002 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 3003 MatScalar *ca; 3004 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 3005 3006 PetscFunctionBegin; 3007 /* Get ij structure of P^T */ 3008 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 3009 3010 cn = pn*ppdof; 3011 /* Allocate ci array, arrays for fill computation and */ 3012 /* free space for accumulating nonzero column info */ 3013 ierr = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr); 3014 ci[0] = 0; 3015 3016 /* Work arrays for rows of P^T*A */ 3017 ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr); 3018 ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 3019 ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr); 3020 3021 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 3022 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 3023 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 3024 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr); 3025 current_space = free_space; 3026 3027 /* Determine symbolic info for each row of C: */ 3028 for (i=0; i<pn; i++) { 3029 ptnzi = pti[i+1] - pti[i]; 3030 ptJ = ptj + pti[i]; 3031 for (dof=0; dof<ppdof; dof++) { 3032 ptanzi = 0; 3033 /* Determine symbolic row of PtA: */ 3034 for (j=0; j<ptnzi; j++) { 3035 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 3036 arow = ptJ[j]*ppdof + dof; 3037 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 3038 anzj = ai[arow+1] - ai[arow]; 3039 ajj = aj + ai[arow]; 3040 for (k=0; k<anzj; k++) { 3041 if (!ptadenserow[ajj[k]]) { 3042 ptadenserow[ajj[k]] = -1; 3043 ptasparserow[ptanzi++] = ajj[k]; 3044 } 3045 } 3046 } 3047 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 3048 ptaj = ptasparserow; 3049 cnzi = 0; 3050 for (j=0; j<ptanzi; j++) { 3051 /* Get offset within block of P */ 3052 pshift = *ptaj%ppdof; 3053 /* Get block row of P */ 3054 prow = (*ptaj++)/ppdof; /* integer division */ 3055 /* P has same number of nonzeros per row as the compressed form */ 3056 pnzj = pi[prow+1] - pi[prow]; 3057 pjj = pj + pi[prow]; 3058 for (k=0;k<pnzj;k++) { 3059 /* Locations in C are shifted by the offset within the block */ 3060 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 3061 if (!denserow[pjj[k]*ppdof+pshift]) { 3062 denserow[pjj[k]*ppdof+pshift] = -1; 3063 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 3064 } 3065 } 3066 } 3067 3068 /* sort sparserow */ 3069 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 3070 3071 /* If free space is not available, make more free space */ 3072 /* Double the amount of total space in the list */ 3073 if (current_space->local_remaining<cnzi) { 3074 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 3075 } 3076 3077 /* Copy data into free space, and zero out denserows */ 3078 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 3079 3080 current_space->array += cnzi; 3081 current_space->local_used += cnzi; 3082 current_space->local_remaining -= cnzi; 3083 3084 for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 3085 for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 3086 3087 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 3088 /* For now, we will recompute what is needed. */ 3089 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 3090 } 3091 } 3092 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 3093 /* Allocate space for cj, initialize cj, and */ 3094 /* destroy list of free space and other temporary array(s) */ 3095 ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr); 3096 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 3097 ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 3098 3099 /* Allocate space for ca */ 3100 ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr); 3101 3102 /* put together the new matrix */ 3103 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 3104 ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr); 3105 3106 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3107 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3108 c = (Mat_SeqAIJ*)((*C)->data); 3109 c->free_a = PETSC_TRUE; 3110 c->free_ij = PETSC_TRUE; 3111 c->nonew = 0; 3112 3113 (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 3114 3115 /* Clean up. */ 3116 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 3117 PetscFunctionReturn(0); 3118 } 3119 3120 #undef __FUNCT__ 3121 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 3122 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 3123 { 3124 /* This routine requires testing -- first draft only */ 3125 PetscErrorCode ierr; 3126 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 3127 Mat P =pp->AIJ; 3128 Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 3129 Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 3130 Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 3131 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3132 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3133 const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3134 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3135 const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3136 MatScalar *ca=c->a,*caj,*apa; 3137 3138 PetscFunctionBegin; 3139 /* Allocate temporary array for storage of one row of A*P */ 3140 ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr); 3141 3142 /* Clear old values in C */ 3143 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 3144 3145 for (i=0; i<am; i++) { 3146 /* Form sparse row of A*P */ 3147 anzi = ai[i+1] - ai[i]; 3148 apnzj = 0; 3149 for (j=0; j<anzi; j++) { 3150 /* Get offset within block of P */ 3151 pshift = *aj%ppdof; 3152 /* Get block row of P */ 3153 prow = *aj++/ppdof; /* integer division */ 3154 pnzj = pi[prow+1] - pi[prow]; 3155 pjj = pj + pi[prow]; 3156 paj = pa + pi[prow]; 3157 for (k=0; k<pnzj; k++) { 3158 poffset = pjj[k]*ppdof+pshift; 3159 if (!apjdense[poffset]) { 3160 apjdense[poffset] = -1; 3161 apj[apnzj++] = poffset; 3162 } 3163 apa[poffset] += (*aa)*paj[k]; 3164 } 3165 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 3166 aa++; 3167 } 3168 3169 /* Sort the j index array for quick sparse axpy. */ 3170 /* Note: a array does not need sorting as it is in dense storage locations. */ 3171 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 3172 3173 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3174 prow = i/ppdof; /* integer division */ 3175 pshift = i%ppdof; 3176 poffset = pi[prow]; 3177 pnzi = pi[prow+1] - poffset; 3178 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3179 pJ = pj+poffset; 3180 pA = pa+poffset; 3181 for (j=0; j<pnzi; j++) { 3182 crow = (*pJ)*ppdof+pshift; 3183 cjj = cj + ci[crow]; 3184 caj = ca + ci[crow]; 3185 pJ++; 3186 /* Perform sparse axpy operation. Note cjj includes apj. */ 3187 for (k=0,nextap=0; nextap<apnzj; k++) { 3188 if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 3189 } 3190 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 3191 pA++; 3192 } 3193 3194 /* Zero the current row info for A*P */ 3195 for (j=0; j<apnzj; j++) { 3196 apa[apj[j]] = 0.; 3197 apjdense[apj[j]] = 0; 3198 } 3199 } 3200 3201 /* Assemble the final matrix and clean up */ 3202 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3203 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3204 ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 3205 PetscFunctionReturn(0); 3206 } 3207 3208 #undef __FUNCT__ 3209 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqMAIJ" 3210 PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3211 { 3212 PetscErrorCode ierr; 3213 3214 PetscFunctionBegin; 3215 if (scall == MAT_INITIAL_MATRIX) { 3216 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3217 ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr); 3218 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3219 } 3220 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3221 ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr); 3222 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3223 PetscFunctionReturn(0); 3224 } 3225 3226 #undef __FUNCT__ 3227 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 3228 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3229 { 3230 PetscErrorCode ierr; 3231 3232 PetscFunctionBegin; 3233 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 3234 ierr = MatConvert(PP,MATMPIAIJ,MAT_INPLACE_MATRIX,&PP);CHKERRQ(ierr); 3235 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 3236 PetscFunctionReturn(0); 3237 } 3238 3239 #undef __FUNCT__ 3240 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 3241 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3242 { 3243 PetscFunctionBegin; 3244 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3245 PetscFunctionReturn(0); 3246 } 3247 3248 #undef __FUNCT__ 3249 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIMAIJ" 3250 PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3251 { 3252 PetscErrorCode ierr; 3253 3254 PetscFunctionBegin; 3255 if (scall == MAT_INITIAL_MATRIX) { 3256 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3257 ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr); 3258 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3259 } 3260 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3261 ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr); 3262 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3263 PetscFunctionReturn(0); 3264 } 3265 3266 #undef __FUNCT__ 3267 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 3268 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3269 { 3270 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3271 Mat a = b->AIJ,B; 3272 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3273 PetscErrorCode ierr; 3274 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3275 PetscInt *cols; 3276 PetscScalar *vals; 3277 3278 PetscFunctionBegin; 3279 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3280 ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr); 3281 for (i=0; i<m; i++) { 3282 nmax = PetscMax(nmax,aij->ilen[i]); 3283 for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 3284 } 3285 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 3286 ierr = PetscFree(ilen);CHKERRQ(ierr); 3287 ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr); 3288 ii = 0; 3289 for (i=0; i<m; i++) { 3290 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3291 for (j=0; j<dof; j++) { 3292 for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 3293 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3294 ii++; 3295 } 3296 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3297 } 3298 ierr = PetscFree(icols);CHKERRQ(ierr); 3299 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3300 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3301 3302 if (reuse == MAT_INPLACE_MATRIX) { 3303 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3304 } else { 3305 *newmat = B; 3306 } 3307 PetscFunctionReturn(0); 3308 } 3309 3310 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3311 3312 #undef __FUNCT__ 3313 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 3314 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3315 { 3316 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3317 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3318 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3319 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3320 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3321 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3322 PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 3323 PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 3324 PetscInt rstart,cstart,*garray,ii,k; 3325 PetscErrorCode ierr; 3326 PetscScalar *vals,*ovals; 3327 3328 PetscFunctionBegin; 3329 ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr); 3330 for (i=0; i<A->rmap->n/dof; i++) { 3331 nmax = PetscMax(nmax,AIJ->ilen[i]); 3332 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3333 for (j=0; j<dof; j++) { 3334 dnz[dof*i+j] = AIJ->ilen[i]; 3335 onz[dof*i+j] = OAIJ->ilen[i]; 3336 } 3337 } 3338 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3339 ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr); 3340 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3341 3342 ierr = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr); 3343 rstart = dof*maij->A->rmap->rstart; 3344 cstart = dof*maij->A->cmap->rstart; 3345 garray = mpiaij->garray; 3346 3347 ii = rstart; 3348 for (i=0; i<A->rmap->n/dof; i++) { 3349 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3350 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3351 for (j=0; j<dof; j++) { 3352 for (k=0; k<ncols; k++) { 3353 icols[k] = cstart + dof*cols[k]+j; 3354 } 3355 for (k=0; k<oncols; k++) { 3356 oicols[k] = dof*garray[ocols[k]]+j; 3357 } 3358 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3359 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3360 ii++; 3361 } 3362 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3363 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3364 } 3365 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3366 3367 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3368 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3369 3370 if (reuse == MAT_INPLACE_MATRIX) { 3371 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3372 ((PetscObject)A)->refct = 1; 3373 3374 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3375 3376 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3377 } else { 3378 *newmat = B; 3379 } 3380 PetscFunctionReturn(0); 3381 } 3382 3383 #undef __FUNCT__ 3384 #define __FUNCT__ "MatGetSubMatrix_MAIJ" 3385 PetscErrorCode MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 3386 { 3387 PetscErrorCode ierr; 3388 Mat A; 3389 3390 PetscFunctionBegin; 3391 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3392 ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 3393 ierr = MatDestroy(&A);CHKERRQ(ierr); 3394 PetscFunctionReturn(0); 3395 } 3396 3397 /* ---------------------------------------------------------------------------------- */ 3398 #undef __FUNCT__ 3399 #define __FUNCT__ "MatCreateMAIJ" 3400 /*@C 3401 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3402 operations for multicomponent problems. It interpolates each component the same 3403 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3404 and MATMPIAIJ for distributed matrices. 3405 3406 Collective 3407 3408 Input Parameters: 3409 + A - the AIJ matrix describing the action on blocks 3410 - dof - the block size (number of components per node) 3411 3412 Output Parameter: 3413 . maij - the new MAIJ matrix 3414 3415 Operations provided: 3416 + MatMult 3417 . MatMultTranspose 3418 . MatMultAdd 3419 . MatMultTransposeAdd 3420 - MatView 3421 3422 Level: advanced 3423 3424 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3425 @*/ 3426 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3427 { 3428 PetscErrorCode ierr; 3429 PetscMPIInt size; 3430 PetscInt n; 3431 Mat B; 3432 3433 PetscFunctionBegin; 3434 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3435 3436 if (dof == 1) *maij = A; 3437 else { 3438 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3439 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3440 ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3441 ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3442 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3443 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3444 3445 B->assembled = PETSC_TRUE; 3446 3447 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3448 if (size == 1) { 3449 Mat_SeqMAIJ *b; 3450 3451 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3452 3453 B->ops->setup = NULL; 3454 B->ops->destroy = MatDestroy_SeqMAIJ; 3455 B->ops->view = MatView_SeqMAIJ; 3456 b = (Mat_SeqMAIJ*)B->data; 3457 b->dof = dof; 3458 b->AIJ = A; 3459 3460 if (dof == 2) { 3461 B->ops->mult = MatMult_SeqMAIJ_2; 3462 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3463 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3464 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3465 } else if (dof == 3) { 3466 B->ops->mult = MatMult_SeqMAIJ_3; 3467 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3468 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3469 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3470 } else if (dof == 4) { 3471 B->ops->mult = MatMult_SeqMAIJ_4; 3472 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3473 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3474 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3475 } else if (dof == 5) { 3476 B->ops->mult = MatMult_SeqMAIJ_5; 3477 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3478 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3479 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3480 } else if (dof == 6) { 3481 B->ops->mult = MatMult_SeqMAIJ_6; 3482 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3483 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3484 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3485 } else if (dof == 7) { 3486 B->ops->mult = MatMult_SeqMAIJ_7; 3487 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3488 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3489 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3490 } else if (dof == 8) { 3491 B->ops->mult = MatMult_SeqMAIJ_8; 3492 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3493 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3494 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3495 } else if (dof == 9) { 3496 B->ops->mult = MatMult_SeqMAIJ_9; 3497 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3498 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3499 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3500 } else if (dof == 10) { 3501 B->ops->mult = MatMult_SeqMAIJ_10; 3502 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3503 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3504 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3505 } else if (dof == 11) { 3506 B->ops->mult = MatMult_SeqMAIJ_11; 3507 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3508 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3509 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3510 } else if (dof == 16) { 3511 B->ops->mult = MatMult_SeqMAIJ_16; 3512 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3513 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3514 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3515 } else if (dof == 18) { 3516 B->ops->mult = MatMult_SeqMAIJ_18; 3517 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3518 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3519 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3520 } else { 3521 B->ops->mult = MatMult_SeqMAIJ_N; 3522 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3523 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3524 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3525 } 3526 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3527 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3528 } else { 3529 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3530 Mat_MPIMAIJ *b; 3531 IS from,to; 3532 Vec gvec; 3533 3534 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3535 3536 B->ops->setup = NULL; 3537 B->ops->destroy = MatDestroy_MPIMAIJ; 3538 B->ops->view = MatView_MPIMAIJ; 3539 3540 b = (Mat_MPIMAIJ*)B->data; 3541 b->dof = dof; 3542 b->A = A; 3543 3544 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 3545 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 3546 3547 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3548 ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3549 ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 3550 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3551 ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3552 3553 /* create two temporary Index sets for build scatter gather */ 3554 ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3555 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3556 3557 /* create temporary global vector to generate scatter context */ 3558 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 3559 3560 /* generate the scatter context */ 3561 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3562 3563 ierr = ISDestroy(&from);CHKERRQ(ierr); 3564 ierr = ISDestroy(&to);CHKERRQ(ierr); 3565 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3566 3567 B->ops->mult = MatMult_MPIMAIJ_dof; 3568 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3569 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3570 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3571 3572 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3573 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr); 3574 } 3575 B->ops->getsubmatrix = MatGetSubMatrix_MAIJ; 3576 ierr = MatSetUp(B);CHKERRQ(ierr); 3577 *maij = B; 3578 ierr = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr); 3579 } 3580 PetscFunctionReturn(0); 3581 } 3582