1 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20 #include <../src/mat/utils/freespace.h> 21 #include <petsc-private/vecimpl.h> 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatMAIJGetAIJ" 25 /*@C 26 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 27 28 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 29 30 Input Parameter: 31 . A - the MAIJ matrix 32 33 Output Parameter: 34 . B - the AIJ matrix 35 36 Level: advanced 37 38 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 39 40 .seealso: MatCreateMAIJ() 41 @*/ 42 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 43 { 44 PetscErrorCode ierr; 45 PetscBool ismpimaij,isseqmaij; 46 47 PetscFunctionBegin; 48 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 49 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 50 if (ismpimaij) { 51 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 52 53 *B = b->A; 54 } else if (isseqmaij) { 55 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 56 57 *B = b->AIJ; 58 } else { 59 *B = A; 60 } 61 PetscFunctionReturn(0); 62 } 63 64 #undef __FUNCT__ 65 #define __FUNCT__ "MatMAIJRedimension" 66 /*@C 67 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 68 69 Logically Collective 70 71 Input Parameter: 72 + A - the MAIJ matrix 73 - dof - the block size for the new matrix 74 75 Output Parameter: 76 . B - the new MAIJ matrix 77 78 Level: advanced 79 80 .seealso: MatCreateMAIJ() 81 @*/ 82 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 83 { 84 PetscErrorCode ierr; 85 Mat Aij = PETSC_NULL; 86 87 PetscFunctionBegin; 88 PetscValidLogicalCollectiveInt(A,dof,2); 89 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 90 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatDestroy_SeqMAIJ" 96 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 97 { 98 PetscErrorCode ierr; 99 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 100 101 PetscFunctionBegin; 102 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 103 ierr = PetscFree(A->data);CHKERRQ(ierr); 104 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatConvert_seqmaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatPtAP_seqaij_seqmaij_C","",PETSC_NULL);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "MatSetUp_MAIJ" 111 PetscErrorCode MatSetUp_MAIJ(Mat A) 112 { 113 PetscFunctionBegin; 114 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatView_SeqMAIJ" 120 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 121 { 122 PetscErrorCode ierr; 123 Mat B; 124 125 PetscFunctionBegin; 126 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 127 ierr = MatView(B,viewer);CHKERRQ(ierr); 128 ierr = MatDestroy(&B);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatView_MPIMAIJ" 134 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 135 { 136 PetscErrorCode ierr; 137 Mat B; 138 139 PetscFunctionBegin; 140 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 141 ierr = MatView(B,viewer);CHKERRQ(ierr); 142 ierr = MatDestroy(&B);CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 146 #undef __FUNCT__ 147 #define __FUNCT__ "MatDestroy_MPIMAIJ" 148 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 149 { 150 PetscErrorCode ierr; 151 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 152 153 PetscFunctionBegin; 154 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 155 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 156 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 157 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 158 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 159 ierr = PetscFree(A->data);CHKERRQ(ierr); 160 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatConvert_mpimaij_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 161 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C","",PETSC_NULL);CHKERRQ(ierr); 162 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 163 PetscFunctionReturn(0); 164 } 165 166 /*MC 167 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 168 multicomponent problems, interpolating or restricting each component the same way independently. 169 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 170 171 Operations provided: 172 . MatMult 173 . MatMultTranspose 174 . MatMultAdd 175 . MatMultTransposeAdd 176 177 Level: advanced 178 179 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 180 M*/ 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatCreate_MAIJ" 184 PETSC_EXTERN_C PetscErrorCode MatCreate_MAIJ(Mat A) 185 { 186 PetscErrorCode ierr; 187 Mat_MPIMAIJ *b; 188 PetscMPIInt size; 189 190 PetscFunctionBegin; 191 ierr = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr); 192 A->data = (void*)b; 193 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 194 A->ops->setup = MatSetUp_MAIJ; 195 196 b->AIJ = 0; 197 b->dof = 0; 198 b->OAIJ = 0; 199 b->ctx = 0; 200 b->w = 0; 201 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 202 if (size == 1) { 203 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 204 } else { 205 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 206 } 207 PetscFunctionReturn(0); 208 } 209 210 /* --------------------------------------------------------------------------------------*/ 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatMult_SeqMAIJ_2" 213 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 214 { 215 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 216 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 217 const PetscScalar *x,*v; 218 PetscScalar *y, sum1, sum2; 219 PetscErrorCode ierr; 220 PetscInt nonzerorow=0,n,i,jrow,j; 221 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 222 223 PetscFunctionBegin; 224 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 225 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 226 idx = a->j; 227 v = a->a; 228 ii = a->i; 229 230 for (i=0; i<m; i++) { 231 jrow = ii[i]; 232 n = ii[i+1] - jrow; 233 sum1 = 0.0; 234 sum2 = 0.0; 235 nonzerorow += (n>0); 236 for (j=0; j<n; j++) { 237 sum1 += v[jrow]*x[2*idx[jrow]]; 238 sum2 += v[jrow]*x[2*idx[jrow]+1]; 239 jrow++; 240 } 241 y[2*i] = sum1; 242 y[2*i+1] = sum2; 243 } 244 245 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 246 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 247 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 248 PetscFunctionReturn(0); 249 } 250 251 #undef __FUNCT__ 252 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 253 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 254 { 255 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 256 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 257 const PetscScalar *x,*v; 258 PetscScalar *y,alpha1,alpha2; 259 PetscErrorCode ierr; 260 PetscInt n,i; 261 const PetscInt m = b->AIJ->rmap->n,*idx; 262 263 PetscFunctionBegin; 264 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 265 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 266 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 267 268 for (i=0; i<m; i++) { 269 idx = a->j + a->i[i] ; 270 v = a->a + a->i[i] ; 271 n = a->i[i+1] - a->i[i]; 272 alpha1 = x[2*i]; 273 alpha2 = x[2*i+1]; 274 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 275 } 276 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 277 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 278 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 284 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 285 { 286 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 287 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 288 const PetscScalar *x,*v; 289 PetscScalar *y,sum1, sum2; 290 PetscErrorCode ierr; 291 PetscInt n,i,jrow,j; 292 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 293 294 PetscFunctionBegin; 295 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 296 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 297 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 298 idx = a->j; 299 v = a->a; 300 ii = a->i; 301 302 for (i=0; i<m; i++) { 303 jrow = ii[i]; 304 n = ii[i+1] - jrow; 305 sum1 = 0.0; 306 sum2 = 0.0; 307 for (j=0; j<n; j++) { 308 sum1 += v[jrow]*x[2*idx[jrow]]; 309 sum2 += v[jrow]*x[2*idx[jrow]+1]; 310 jrow++; 311 } 312 y[2*i] += sum1; 313 y[2*i+1] += sum2; 314 } 315 316 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 317 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 318 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 323 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 324 { 325 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 326 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 327 const PetscScalar *x,*v; 328 PetscScalar *y,alpha1,alpha2; 329 PetscErrorCode ierr; 330 PetscInt n,i; 331 const PetscInt m = b->AIJ->rmap->n,*idx; 332 333 PetscFunctionBegin; 334 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 335 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 336 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 337 338 for (i=0; i<m; i++) { 339 idx = a->j + a->i[i] ; 340 v = a->a + a->i[i] ; 341 n = a->i[i+1] - a->i[i]; 342 alpha1 = x[2*i]; 343 alpha2 = x[2*i+1]; 344 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 345 } 346 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 347 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 348 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 349 PetscFunctionReturn(0); 350 } 351 /* --------------------------------------------------------------------------------------*/ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatMult_SeqMAIJ_3" 354 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 355 { 356 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 357 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 358 const PetscScalar *x,*v; 359 PetscScalar *y,sum1, sum2, sum3; 360 PetscErrorCode ierr; 361 PetscInt nonzerorow=0,n,i,jrow,j; 362 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 363 364 PetscFunctionBegin; 365 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 366 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 367 idx = a->j; 368 v = a->a; 369 ii = a->i; 370 371 for (i=0; i<m; i++) { 372 jrow = ii[i]; 373 n = ii[i+1] - jrow; 374 sum1 = 0.0; 375 sum2 = 0.0; 376 sum3 = 0.0; 377 nonzerorow += (n>0); 378 for (j=0; j<n; j++) { 379 sum1 += v[jrow]*x[3*idx[jrow]]; 380 sum2 += v[jrow]*x[3*idx[jrow]+1]; 381 sum3 += v[jrow]*x[3*idx[jrow]+2]; 382 jrow++; 383 } 384 y[3*i] = sum1; 385 y[3*i+1] = sum2; 386 y[3*i+2] = sum3; 387 } 388 389 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 390 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 391 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 392 PetscFunctionReturn(0); 393 } 394 395 #undef __FUNCT__ 396 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 397 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 398 { 399 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 400 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 401 const PetscScalar *x,*v; 402 PetscScalar *y,alpha1,alpha2,alpha3; 403 PetscErrorCode ierr; 404 PetscInt n,i; 405 const PetscInt m = b->AIJ->rmap->n,*idx; 406 407 PetscFunctionBegin; 408 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 409 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 410 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 411 412 for (i=0; i<m; i++) { 413 idx = a->j + a->i[i]; 414 v = a->a + a->i[i]; 415 n = a->i[i+1] - a->i[i]; 416 alpha1 = x[3*i]; 417 alpha2 = x[3*i+1]; 418 alpha3 = x[3*i+2]; 419 while (n-->0) { 420 y[3*(*idx)] += alpha1*(*v); 421 y[3*(*idx)+1] += alpha2*(*v); 422 y[3*(*idx)+2] += alpha3*(*v); 423 idx++; v++; 424 } 425 } 426 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 427 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 428 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 429 PetscFunctionReturn(0); 430 } 431 432 #undef __FUNCT__ 433 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 434 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 435 { 436 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 437 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 438 const PetscScalar *x,*v; 439 PetscScalar *y,sum1, sum2, sum3; 440 PetscErrorCode ierr; 441 PetscInt n,i,jrow,j; 442 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 443 444 PetscFunctionBegin; 445 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 446 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 447 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 448 idx = a->j; 449 v = a->a; 450 ii = a->i; 451 452 for (i=0; i<m; i++) { 453 jrow = ii[i]; 454 n = ii[i+1] - jrow; 455 sum1 = 0.0; 456 sum2 = 0.0; 457 sum3 = 0.0; 458 for (j=0; j<n; j++) { 459 sum1 += v[jrow]*x[3*idx[jrow]]; 460 sum2 += v[jrow]*x[3*idx[jrow]+1]; 461 sum3 += v[jrow]*x[3*idx[jrow]+2]; 462 jrow++; 463 } 464 y[3*i] += sum1; 465 y[3*i+1] += sum2; 466 y[3*i+2] += sum3; 467 } 468 469 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 470 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 471 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 476 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 477 { 478 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 479 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 480 const PetscScalar *x,*v; 481 PetscScalar *y,alpha1,alpha2,alpha3; 482 PetscErrorCode ierr; 483 PetscInt n,i; 484 const PetscInt m = b->AIJ->rmap->n,*idx; 485 486 PetscFunctionBegin; 487 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 488 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 489 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 490 for (i=0; i<m; i++) { 491 idx = a->j + a->i[i] ; 492 v = a->a + a->i[i] ; 493 n = a->i[i+1] - a->i[i]; 494 alpha1 = x[3*i]; 495 alpha2 = x[3*i+1]; 496 alpha3 = x[3*i+2]; 497 while (n-->0) { 498 y[3*(*idx)] += alpha1*(*v); 499 y[3*(*idx)+1] += alpha2*(*v); 500 y[3*(*idx)+2] += alpha3*(*v); 501 idx++; v++; 502 } 503 } 504 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 505 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 506 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 /* ------------------------------------------------------------------------------*/ 511 #undef __FUNCT__ 512 #define __FUNCT__ "MatMult_SeqMAIJ_4" 513 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 514 { 515 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 516 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 517 const PetscScalar *x,*v; 518 PetscScalar *y,sum1, sum2, sum3, sum4; 519 PetscErrorCode ierr; 520 PetscInt nonzerorow=0,n,i,jrow,j; 521 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 522 523 PetscFunctionBegin; 524 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 525 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 526 idx = a->j; 527 v = a->a; 528 ii = a->i; 529 530 for (i=0; i<m; i++) { 531 jrow = ii[i]; 532 n = ii[i+1] - jrow; 533 sum1 = 0.0; 534 sum2 = 0.0; 535 sum3 = 0.0; 536 sum4 = 0.0; 537 nonzerorow += (n>0); 538 for (j=0; j<n; j++) { 539 sum1 += v[jrow]*x[4*idx[jrow]]; 540 sum2 += v[jrow]*x[4*idx[jrow]+1]; 541 sum3 += v[jrow]*x[4*idx[jrow]+2]; 542 sum4 += v[jrow]*x[4*idx[jrow]+3]; 543 jrow++; 544 } 545 y[4*i] = sum1; 546 y[4*i+1] = sum2; 547 y[4*i+2] = sum3; 548 y[4*i+3] = sum4; 549 } 550 551 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 552 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 553 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 554 PetscFunctionReturn(0); 555 } 556 557 #undef __FUNCT__ 558 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 559 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 560 { 561 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 562 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 563 const PetscScalar *x,*v; 564 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 565 PetscErrorCode ierr; 566 PetscInt n,i; 567 const PetscInt m = b->AIJ->rmap->n,*idx; 568 569 PetscFunctionBegin; 570 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 571 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 572 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 573 for (i=0; i<m; i++) { 574 idx = a->j + a->i[i] ; 575 v = a->a + a->i[i] ; 576 n = a->i[i+1] - a->i[i]; 577 alpha1 = x[4*i]; 578 alpha2 = x[4*i+1]; 579 alpha3 = x[4*i+2]; 580 alpha4 = x[4*i+3]; 581 while (n-->0) { 582 y[4*(*idx)] += alpha1*(*v); 583 y[4*(*idx)+1] += alpha2*(*v); 584 y[4*(*idx)+2] += alpha3*(*v); 585 y[4*(*idx)+3] += alpha4*(*v); 586 idx++; v++; 587 } 588 } 589 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 590 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 591 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 592 PetscFunctionReturn(0); 593 } 594 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 597 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 598 { 599 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 600 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 601 const PetscScalar *x,*v; 602 PetscScalar *y,sum1, sum2, sum3, sum4; 603 PetscErrorCode ierr; 604 PetscInt n,i,jrow,j; 605 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 606 607 PetscFunctionBegin; 608 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 609 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 610 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 611 idx = a->j; 612 v = a->a; 613 ii = a->i; 614 615 for (i=0; i<m; i++) { 616 jrow = ii[i]; 617 n = ii[i+1] - jrow; 618 sum1 = 0.0; 619 sum2 = 0.0; 620 sum3 = 0.0; 621 sum4 = 0.0; 622 for (j=0; j<n; j++) { 623 sum1 += v[jrow]*x[4*idx[jrow]]; 624 sum2 += v[jrow]*x[4*idx[jrow]+1]; 625 sum3 += v[jrow]*x[4*idx[jrow]+2]; 626 sum4 += v[jrow]*x[4*idx[jrow]+3]; 627 jrow++; 628 } 629 y[4*i] += sum1; 630 y[4*i+1] += sum2; 631 y[4*i+2] += sum3; 632 y[4*i+3] += sum4; 633 } 634 635 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 636 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 637 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 638 PetscFunctionReturn(0); 639 } 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 642 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 643 { 644 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 645 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 646 const PetscScalar *x,*v; 647 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 648 PetscErrorCode ierr; 649 PetscInt n,i; 650 const PetscInt m = b->AIJ->rmap->n,*idx; 651 652 PetscFunctionBegin; 653 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 654 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 655 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 656 657 for (i=0; i<m; i++) { 658 idx = a->j + a->i[i] ; 659 v = a->a + a->i[i] ; 660 n = a->i[i+1] - a->i[i]; 661 alpha1 = x[4*i]; 662 alpha2 = x[4*i+1]; 663 alpha3 = x[4*i+2]; 664 alpha4 = x[4*i+3]; 665 while (n-->0) { 666 y[4*(*idx)] += alpha1*(*v); 667 y[4*(*idx)+1] += alpha2*(*v); 668 y[4*(*idx)+2] += alpha3*(*v); 669 y[4*(*idx)+3] += alpha4*(*v); 670 idx++; v++; 671 } 672 } 673 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 674 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 675 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } 678 /* ------------------------------------------------------------------------------*/ 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatMult_SeqMAIJ_5" 682 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 683 { 684 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 685 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 686 const PetscScalar *x,*v; 687 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 688 PetscErrorCode ierr; 689 PetscInt nonzerorow=0,n,i,jrow,j; 690 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 691 692 PetscFunctionBegin; 693 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 694 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 695 idx = a->j; 696 v = a->a; 697 ii = a->i; 698 699 for (i=0; i<m; i++) { 700 jrow = ii[i]; 701 n = ii[i+1] - jrow; 702 sum1 = 0.0; 703 sum2 = 0.0; 704 sum3 = 0.0; 705 sum4 = 0.0; 706 sum5 = 0.0; 707 nonzerorow += (n>0); 708 for (j=0; j<n; j++) { 709 sum1 += v[jrow]*x[5*idx[jrow]]; 710 sum2 += v[jrow]*x[5*idx[jrow]+1]; 711 sum3 += v[jrow]*x[5*idx[jrow]+2]; 712 sum4 += v[jrow]*x[5*idx[jrow]+3]; 713 sum5 += v[jrow]*x[5*idx[jrow]+4]; 714 jrow++; 715 } 716 y[5*i] = sum1; 717 y[5*i+1] = sum2; 718 y[5*i+2] = sum3; 719 y[5*i+3] = sum4; 720 y[5*i+4] = sum5; 721 } 722 723 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 724 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 725 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 731 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 732 { 733 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 734 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 735 const PetscScalar *x,*v; 736 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 737 PetscErrorCode ierr; 738 PetscInt n,i; 739 const PetscInt m = b->AIJ->rmap->n,*idx; 740 741 PetscFunctionBegin; 742 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 743 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 744 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 745 746 for (i=0; i<m; i++) { 747 idx = a->j + a->i[i] ; 748 v = a->a + a->i[i] ; 749 n = a->i[i+1] - a->i[i]; 750 alpha1 = x[5*i]; 751 alpha2 = x[5*i+1]; 752 alpha3 = x[5*i+2]; 753 alpha4 = x[5*i+3]; 754 alpha5 = x[5*i+4]; 755 while (n-->0) { 756 y[5*(*idx)] += alpha1*(*v); 757 y[5*(*idx)+1] += alpha2*(*v); 758 y[5*(*idx)+2] += alpha3*(*v); 759 y[5*(*idx)+3] += alpha4*(*v); 760 y[5*(*idx)+4] += alpha5*(*v); 761 idx++; v++; 762 } 763 } 764 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 765 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 766 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 767 PetscFunctionReturn(0); 768 } 769 770 #undef __FUNCT__ 771 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 772 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 773 { 774 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 775 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 776 const PetscScalar *x,*v; 777 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 778 PetscErrorCode ierr; 779 PetscInt n,i,jrow,j; 780 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 781 782 PetscFunctionBegin; 783 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 784 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 785 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 786 idx = a->j; 787 v = a->a; 788 ii = a->i; 789 790 for (i=0; i<m; i++) { 791 jrow = ii[i]; 792 n = ii[i+1] - jrow; 793 sum1 = 0.0; 794 sum2 = 0.0; 795 sum3 = 0.0; 796 sum4 = 0.0; 797 sum5 = 0.0; 798 for (j=0; j<n; j++) { 799 sum1 += v[jrow]*x[5*idx[jrow]]; 800 sum2 += v[jrow]*x[5*idx[jrow]+1]; 801 sum3 += v[jrow]*x[5*idx[jrow]+2]; 802 sum4 += v[jrow]*x[5*idx[jrow]+3]; 803 sum5 += v[jrow]*x[5*idx[jrow]+4]; 804 jrow++; 805 } 806 y[5*i] += sum1; 807 y[5*i+1] += sum2; 808 y[5*i+2] += sum3; 809 y[5*i+3] += sum4; 810 y[5*i+4] += sum5; 811 } 812 813 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 814 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 815 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 821 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 822 { 823 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 824 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 825 const PetscScalar *x,*v; 826 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 827 PetscErrorCode ierr; 828 PetscInt n,i; 829 const PetscInt m = b->AIJ->rmap->n,*idx; 830 831 PetscFunctionBegin; 832 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 833 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 834 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 835 836 for (i=0; i<m; i++) { 837 idx = a->j + a->i[i] ; 838 v = a->a + a->i[i] ; 839 n = a->i[i+1] - a->i[i]; 840 alpha1 = x[5*i]; 841 alpha2 = x[5*i+1]; 842 alpha3 = x[5*i+2]; 843 alpha4 = x[5*i+3]; 844 alpha5 = x[5*i+4]; 845 while (n-->0) { 846 y[5*(*idx)] += alpha1*(*v); 847 y[5*(*idx)+1] += alpha2*(*v); 848 y[5*(*idx)+2] += alpha3*(*v); 849 y[5*(*idx)+3] += alpha4*(*v); 850 y[5*(*idx)+4] += alpha5*(*v); 851 idx++; v++; 852 } 853 } 854 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 855 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 856 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 857 PetscFunctionReturn(0); 858 } 859 860 /* ------------------------------------------------------------------------------*/ 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatMult_SeqMAIJ_6" 863 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 864 { 865 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 866 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 867 const PetscScalar *x,*v; 868 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 869 PetscErrorCode ierr; 870 PetscInt nonzerorow=0,n,i,jrow,j; 871 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 872 873 PetscFunctionBegin; 874 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 875 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 876 idx = a->j; 877 v = a->a; 878 ii = a->i; 879 880 for (i=0; i<m; i++) { 881 jrow = ii[i]; 882 n = ii[i+1] - jrow; 883 sum1 = 0.0; 884 sum2 = 0.0; 885 sum3 = 0.0; 886 sum4 = 0.0; 887 sum5 = 0.0; 888 sum6 = 0.0; 889 nonzerorow += (n>0); 890 for (j=0; j<n; j++) { 891 sum1 += v[jrow]*x[6*idx[jrow]]; 892 sum2 += v[jrow]*x[6*idx[jrow]+1]; 893 sum3 += v[jrow]*x[6*idx[jrow]+2]; 894 sum4 += v[jrow]*x[6*idx[jrow]+3]; 895 sum5 += v[jrow]*x[6*idx[jrow]+4]; 896 sum6 += v[jrow]*x[6*idx[jrow]+5]; 897 jrow++; 898 } 899 y[6*i] = sum1; 900 y[6*i+1] = sum2; 901 y[6*i+2] = sum3; 902 y[6*i+3] = sum4; 903 y[6*i+4] = sum5; 904 y[6*i+5] = sum6; 905 } 906 907 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 908 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 909 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 910 PetscFunctionReturn(0); 911 } 912 913 #undef __FUNCT__ 914 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 915 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 916 { 917 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 918 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 919 const PetscScalar *x,*v; 920 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 921 PetscErrorCode ierr; 922 PetscInt n,i; 923 const PetscInt m = b->AIJ->rmap->n,*idx; 924 925 PetscFunctionBegin; 926 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 927 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 928 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 929 930 for (i=0; i<m; i++) { 931 idx = a->j + a->i[i] ; 932 v = a->a + a->i[i] ; 933 n = a->i[i+1] - a->i[i]; 934 alpha1 = x[6*i]; 935 alpha2 = x[6*i+1]; 936 alpha3 = x[6*i+2]; 937 alpha4 = x[6*i+3]; 938 alpha5 = x[6*i+4]; 939 alpha6 = x[6*i+5]; 940 while (n-->0) { 941 y[6*(*idx)] += alpha1*(*v); 942 y[6*(*idx)+1] += alpha2*(*v); 943 y[6*(*idx)+2] += alpha3*(*v); 944 y[6*(*idx)+3] += alpha4*(*v); 945 y[6*(*idx)+4] += alpha5*(*v); 946 y[6*(*idx)+5] += alpha6*(*v); 947 idx++; v++; 948 } 949 } 950 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 951 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 952 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 953 PetscFunctionReturn(0); 954 } 955 956 #undef __FUNCT__ 957 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 958 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 959 { 960 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 961 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 962 const PetscScalar *x,*v; 963 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 964 PetscErrorCode ierr; 965 PetscInt n,i,jrow,j; 966 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 967 968 PetscFunctionBegin; 969 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 970 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 971 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 972 idx = a->j; 973 v = a->a; 974 ii = a->i; 975 976 for (i=0; i<m; i++) { 977 jrow = ii[i]; 978 n = ii[i+1] - jrow; 979 sum1 = 0.0; 980 sum2 = 0.0; 981 sum3 = 0.0; 982 sum4 = 0.0; 983 sum5 = 0.0; 984 sum6 = 0.0; 985 for (j=0; j<n; j++) { 986 sum1 += v[jrow]*x[6*idx[jrow]]; 987 sum2 += v[jrow]*x[6*idx[jrow]+1]; 988 sum3 += v[jrow]*x[6*idx[jrow]+2]; 989 sum4 += v[jrow]*x[6*idx[jrow]+3]; 990 sum5 += v[jrow]*x[6*idx[jrow]+4]; 991 sum6 += v[jrow]*x[6*idx[jrow]+5]; 992 jrow++; 993 } 994 y[6*i] += sum1; 995 y[6*i+1] += sum2; 996 y[6*i+2] += sum3; 997 y[6*i+3] += sum4; 998 y[6*i+4] += sum5; 999 y[6*i+5] += sum6; 1000 } 1001 1002 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1003 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1004 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 1010 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 1011 { 1012 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1013 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1014 const PetscScalar *x,*v; 1015 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 1016 PetscErrorCode ierr; 1017 PetscInt n,i; 1018 const PetscInt m = b->AIJ->rmap->n,*idx; 1019 1020 PetscFunctionBegin; 1021 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1022 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1023 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1024 1025 for (i=0; i<m; i++) { 1026 idx = a->j + a->i[i] ; 1027 v = a->a + a->i[i] ; 1028 n = a->i[i+1] - a->i[i]; 1029 alpha1 = x[6*i]; 1030 alpha2 = x[6*i+1]; 1031 alpha3 = x[6*i+2]; 1032 alpha4 = x[6*i+3]; 1033 alpha5 = x[6*i+4]; 1034 alpha6 = x[6*i+5]; 1035 while (n-->0) { 1036 y[6*(*idx)] += alpha1*(*v); 1037 y[6*(*idx)+1] += alpha2*(*v); 1038 y[6*(*idx)+2] += alpha3*(*v); 1039 y[6*(*idx)+3] += alpha4*(*v); 1040 y[6*(*idx)+4] += alpha5*(*v); 1041 y[6*(*idx)+5] += alpha6*(*v); 1042 idx++; v++; 1043 } 1044 } 1045 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1046 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1047 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 /* ------------------------------------------------------------------------------*/ 1052 #undef __FUNCT__ 1053 #define __FUNCT__ "MatMult_SeqMAIJ_7" 1054 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1055 { 1056 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1057 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1058 const PetscScalar *x,*v; 1059 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1060 PetscErrorCode ierr; 1061 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1062 PetscInt nonzerorow=0,n,i,jrow,j; 1063 1064 PetscFunctionBegin; 1065 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1066 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1067 idx = a->j; 1068 v = a->a; 1069 ii = a->i; 1070 1071 for (i=0; i<m; i++) { 1072 jrow = ii[i]; 1073 n = ii[i+1] - jrow; 1074 sum1 = 0.0; 1075 sum2 = 0.0; 1076 sum3 = 0.0; 1077 sum4 = 0.0; 1078 sum5 = 0.0; 1079 sum6 = 0.0; 1080 sum7 = 0.0; 1081 nonzerorow += (n>0); 1082 for (j=0; j<n; j++) { 1083 sum1 += v[jrow]*x[7*idx[jrow]]; 1084 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1085 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1086 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1087 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1088 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1089 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1090 jrow++; 1091 } 1092 y[7*i] = sum1; 1093 y[7*i+1] = sum2; 1094 y[7*i+2] = sum3; 1095 y[7*i+3] = sum4; 1096 y[7*i+4] = sum5; 1097 y[7*i+5] = sum6; 1098 y[7*i+6] = sum7; 1099 } 1100 1101 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1102 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1103 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1109 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1110 { 1111 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1112 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1113 const PetscScalar *x,*v; 1114 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1115 PetscErrorCode ierr; 1116 const PetscInt m = b->AIJ->rmap->n,*idx; 1117 PetscInt n,i; 1118 1119 PetscFunctionBegin; 1120 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1121 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1122 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1123 1124 for (i=0; i<m; i++) { 1125 idx = a->j + a->i[i] ; 1126 v = a->a + a->i[i] ; 1127 n = a->i[i+1] - a->i[i]; 1128 alpha1 = x[7*i]; 1129 alpha2 = x[7*i+1]; 1130 alpha3 = x[7*i+2]; 1131 alpha4 = x[7*i+3]; 1132 alpha5 = x[7*i+4]; 1133 alpha6 = x[7*i+5]; 1134 alpha7 = x[7*i+6]; 1135 while (n-->0) { 1136 y[7*(*idx)] += alpha1*(*v); 1137 y[7*(*idx)+1] += alpha2*(*v); 1138 y[7*(*idx)+2] += alpha3*(*v); 1139 y[7*(*idx)+3] += alpha4*(*v); 1140 y[7*(*idx)+4] += alpha5*(*v); 1141 y[7*(*idx)+5] += alpha6*(*v); 1142 y[7*(*idx)+6] += alpha7*(*v); 1143 idx++; v++; 1144 } 1145 } 1146 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1147 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1148 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1149 PetscFunctionReturn(0); 1150 } 1151 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1154 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1155 { 1156 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1157 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1158 const PetscScalar *x,*v; 1159 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1160 PetscErrorCode ierr; 1161 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1162 PetscInt n,i,jrow,j; 1163 1164 PetscFunctionBegin; 1165 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1166 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1167 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1168 idx = a->j; 1169 v = a->a; 1170 ii = a->i; 1171 1172 for (i=0; i<m; i++) { 1173 jrow = ii[i]; 1174 n = ii[i+1] - jrow; 1175 sum1 = 0.0; 1176 sum2 = 0.0; 1177 sum3 = 0.0; 1178 sum4 = 0.0; 1179 sum5 = 0.0; 1180 sum6 = 0.0; 1181 sum7 = 0.0; 1182 for (j=0; j<n; j++) { 1183 sum1 += v[jrow]*x[7*idx[jrow]]; 1184 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1185 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1186 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1187 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1188 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1189 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1190 jrow++; 1191 } 1192 y[7*i] += sum1; 1193 y[7*i+1] += sum2; 1194 y[7*i+2] += sum3; 1195 y[7*i+3] += sum4; 1196 y[7*i+4] += sum5; 1197 y[7*i+5] += sum6; 1198 y[7*i+6] += sum7; 1199 } 1200 1201 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1202 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1203 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1209 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1210 { 1211 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1212 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1213 const PetscScalar *x,*v; 1214 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1215 PetscErrorCode ierr; 1216 const PetscInt m = b->AIJ->rmap->n,*idx; 1217 PetscInt n,i; 1218 1219 PetscFunctionBegin; 1220 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1221 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1222 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1223 for (i=0; i<m; i++) { 1224 idx = a->j + a->i[i] ; 1225 v = a->a + a->i[i] ; 1226 n = a->i[i+1] - a->i[i]; 1227 alpha1 = x[7*i]; 1228 alpha2 = x[7*i+1]; 1229 alpha3 = x[7*i+2]; 1230 alpha4 = x[7*i+3]; 1231 alpha5 = x[7*i+4]; 1232 alpha6 = x[7*i+5]; 1233 alpha7 = x[7*i+6]; 1234 while (n-->0) { 1235 y[7*(*idx)] += alpha1*(*v); 1236 y[7*(*idx)+1] += alpha2*(*v); 1237 y[7*(*idx)+2] += alpha3*(*v); 1238 y[7*(*idx)+3] += alpha4*(*v); 1239 y[7*(*idx)+4] += alpha5*(*v); 1240 y[7*(*idx)+5] += alpha6*(*v); 1241 y[7*(*idx)+6] += alpha7*(*v); 1242 idx++; v++; 1243 } 1244 } 1245 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1246 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1247 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1253 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1254 { 1255 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1256 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1257 const PetscScalar *x,*v; 1258 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1259 PetscErrorCode ierr; 1260 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1261 PetscInt nonzerorow=0,n,i,jrow,j; 1262 1263 PetscFunctionBegin; 1264 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1265 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1266 idx = a->j; 1267 v = a->a; 1268 ii = a->i; 1269 1270 for (i=0; i<m; i++) { 1271 jrow = ii[i]; 1272 n = ii[i+1] - jrow; 1273 sum1 = 0.0; 1274 sum2 = 0.0; 1275 sum3 = 0.0; 1276 sum4 = 0.0; 1277 sum5 = 0.0; 1278 sum6 = 0.0; 1279 sum7 = 0.0; 1280 sum8 = 0.0; 1281 nonzerorow += (n>0); 1282 for (j=0; j<n; j++) { 1283 sum1 += v[jrow]*x[8*idx[jrow]]; 1284 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1285 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1286 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1287 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1288 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1289 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1290 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1291 jrow++; 1292 } 1293 y[8*i] = sum1; 1294 y[8*i+1] = sum2; 1295 y[8*i+2] = sum3; 1296 y[8*i+3] = sum4; 1297 y[8*i+4] = sum5; 1298 y[8*i+5] = sum6; 1299 y[8*i+6] = sum7; 1300 y[8*i+7] = sum8; 1301 } 1302 1303 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1304 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1305 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1311 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1312 { 1313 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1314 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1315 const PetscScalar *x,*v; 1316 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1317 PetscErrorCode ierr; 1318 const PetscInt m = b->AIJ->rmap->n,*idx; 1319 PetscInt n,i; 1320 1321 PetscFunctionBegin; 1322 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1323 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1324 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1325 1326 for (i=0; i<m; i++) { 1327 idx = a->j + a->i[i] ; 1328 v = a->a + a->i[i] ; 1329 n = a->i[i+1] - a->i[i]; 1330 alpha1 = x[8*i]; 1331 alpha2 = x[8*i+1]; 1332 alpha3 = x[8*i+2]; 1333 alpha4 = x[8*i+3]; 1334 alpha5 = x[8*i+4]; 1335 alpha6 = x[8*i+5]; 1336 alpha7 = x[8*i+6]; 1337 alpha8 = x[8*i+7]; 1338 while (n-->0) { 1339 y[8*(*idx)] += alpha1*(*v); 1340 y[8*(*idx)+1] += alpha2*(*v); 1341 y[8*(*idx)+2] += alpha3*(*v); 1342 y[8*(*idx)+3] += alpha4*(*v); 1343 y[8*(*idx)+4] += alpha5*(*v); 1344 y[8*(*idx)+5] += alpha6*(*v); 1345 y[8*(*idx)+6] += alpha7*(*v); 1346 y[8*(*idx)+7] += alpha8*(*v); 1347 idx++; v++; 1348 } 1349 } 1350 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1351 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1352 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1358 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1359 { 1360 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1361 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1362 const PetscScalar *x,*v; 1363 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1364 PetscErrorCode ierr; 1365 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1366 PetscInt n,i,jrow,j; 1367 1368 PetscFunctionBegin; 1369 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1370 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1371 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1372 idx = a->j; 1373 v = a->a; 1374 ii = a->i; 1375 1376 for (i=0; i<m; i++) { 1377 jrow = ii[i]; 1378 n = ii[i+1] - jrow; 1379 sum1 = 0.0; 1380 sum2 = 0.0; 1381 sum3 = 0.0; 1382 sum4 = 0.0; 1383 sum5 = 0.0; 1384 sum6 = 0.0; 1385 sum7 = 0.0; 1386 sum8 = 0.0; 1387 for (j=0; j<n; j++) { 1388 sum1 += v[jrow]*x[8*idx[jrow]]; 1389 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1390 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1391 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1392 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1393 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1394 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1395 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1396 jrow++; 1397 } 1398 y[8*i] += sum1; 1399 y[8*i+1] += sum2; 1400 y[8*i+2] += sum3; 1401 y[8*i+3] += sum4; 1402 y[8*i+4] += sum5; 1403 y[8*i+5] += sum6; 1404 y[8*i+6] += sum7; 1405 y[8*i+7] += sum8; 1406 } 1407 1408 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1409 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1410 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1416 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1417 { 1418 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1419 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1420 const PetscScalar *x,*v; 1421 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1422 PetscErrorCode ierr; 1423 const PetscInt m = b->AIJ->rmap->n,*idx; 1424 PetscInt n,i; 1425 1426 PetscFunctionBegin; 1427 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1428 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1429 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1430 for (i=0; i<m; i++) { 1431 idx = a->j + a->i[i] ; 1432 v = a->a + a->i[i] ; 1433 n = a->i[i+1] - a->i[i]; 1434 alpha1 = x[8*i]; 1435 alpha2 = x[8*i+1]; 1436 alpha3 = x[8*i+2]; 1437 alpha4 = x[8*i+3]; 1438 alpha5 = x[8*i+4]; 1439 alpha6 = x[8*i+5]; 1440 alpha7 = x[8*i+6]; 1441 alpha8 = x[8*i+7]; 1442 while (n-->0) { 1443 y[8*(*idx)] += alpha1*(*v); 1444 y[8*(*idx)+1] += alpha2*(*v); 1445 y[8*(*idx)+2] += alpha3*(*v); 1446 y[8*(*idx)+3] += alpha4*(*v); 1447 y[8*(*idx)+4] += alpha5*(*v); 1448 y[8*(*idx)+5] += alpha6*(*v); 1449 y[8*(*idx)+6] += alpha7*(*v); 1450 y[8*(*idx)+7] += alpha8*(*v); 1451 idx++; v++; 1452 } 1453 } 1454 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1455 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1456 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1457 PetscFunctionReturn(0); 1458 } 1459 1460 /* ------------------------------------------------------------------------------*/ 1461 #undef __FUNCT__ 1462 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1463 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1464 { 1465 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1466 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1467 const PetscScalar *x,*v; 1468 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1469 PetscErrorCode ierr; 1470 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1471 PetscInt nonzerorow=0,n,i,jrow,j; 1472 1473 PetscFunctionBegin; 1474 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1475 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1476 idx = a->j; 1477 v = a->a; 1478 ii = a->i; 1479 1480 for (i=0; i<m; i++) { 1481 jrow = ii[i]; 1482 n = ii[i+1] - jrow; 1483 sum1 = 0.0; 1484 sum2 = 0.0; 1485 sum3 = 0.0; 1486 sum4 = 0.0; 1487 sum5 = 0.0; 1488 sum6 = 0.0; 1489 sum7 = 0.0; 1490 sum8 = 0.0; 1491 sum9 = 0.0; 1492 nonzerorow += (n>0); 1493 for (j=0; j<n; j++) { 1494 sum1 += v[jrow]*x[9*idx[jrow]]; 1495 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1496 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1497 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1498 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1499 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1500 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1501 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1502 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1503 jrow++; 1504 } 1505 y[9*i] = sum1; 1506 y[9*i+1] = sum2; 1507 y[9*i+2] = sum3; 1508 y[9*i+3] = sum4; 1509 y[9*i+4] = sum5; 1510 y[9*i+5] = sum6; 1511 y[9*i+6] = sum7; 1512 y[9*i+7] = sum8; 1513 y[9*i+8] = sum9; 1514 } 1515 1516 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1517 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1518 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1519 PetscFunctionReturn(0); 1520 } 1521 1522 /* ------------------------------------------------------------------------------*/ 1523 1524 #undef __FUNCT__ 1525 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1526 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1527 { 1528 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1529 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1530 const PetscScalar *x,*v; 1531 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1532 PetscErrorCode ierr; 1533 const PetscInt m = b->AIJ->rmap->n,*idx; 1534 PetscInt n,i; 1535 1536 PetscFunctionBegin; 1537 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1538 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1539 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1540 1541 for (i=0; i<m; i++) { 1542 idx = a->j + a->i[i] ; 1543 v = a->a + a->i[i] ; 1544 n = a->i[i+1] - a->i[i]; 1545 alpha1 = x[9*i]; 1546 alpha2 = x[9*i+1]; 1547 alpha3 = x[9*i+2]; 1548 alpha4 = x[9*i+3]; 1549 alpha5 = x[9*i+4]; 1550 alpha6 = x[9*i+5]; 1551 alpha7 = x[9*i+6]; 1552 alpha8 = x[9*i+7]; 1553 alpha9 = x[9*i+8]; 1554 while (n-->0) { 1555 y[9*(*idx)] += alpha1*(*v); 1556 y[9*(*idx)+1] += alpha2*(*v); 1557 y[9*(*idx)+2] += alpha3*(*v); 1558 y[9*(*idx)+3] += alpha4*(*v); 1559 y[9*(*idx)+4] += alpha5*(*v); 1560 y[9*(*idx)+5] += alpha6*(*v); 1561 y[9*(*idx)+6] += alpha7*(*v); 1562 y[9*(*idx)+7] += alpha8*(*v); 1563 y[9*(*idx)+8] += alpha9*(*v); 1564 idx++; v++; 1565 } 1566 } 1567 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1568 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1569 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1570 PetscFunctionReturn(0); 1571 } 1572 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1575 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1576 { 1577 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1578 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1579 const PetscScalar *x,*v; 1580 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1581 PetscErrorCode ierr; 1582 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1583 PetscInt n,i,jrow,j; 1584 1585 PetscFunctionBegin; 1586 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1587 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1588 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1589 idx = a->j; 1590 v = a->a; 1591 ii = a->i; 1592 1593 for (i=0; i<m; i++) { 1594 jrow = ii[i]; 1595 n = ii[i+1] - jrow; 1596 sum1 = 0.0; 1597 sum2 = 0.0; 1598 sum3 = 0.0; 1599 sum4 = 0.0; 1600 sum5 = 0.0; 1601 sum6 = 0.0; 1602 sum7 = 0.0; 1603 sum8 = 0.0; 1604 sum9 = 0.0; 1605 for (j=0; j<n; j++) { 1606 sum1 += v[jrow]*x[9*idx[jrow]]; 1607 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1608 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1609 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1610 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1611 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1612 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1613 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1614 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1615 jrow++; 1616 } 1617 y[9*i] += sum1; 1618 y[9*i+1] += sum2; 1619 y[9*i+2] += sum3; 1620 y[9*i+3] += sum4; 1621 y[9*i+4] += sum5; 1622 y[9*i+5] += sum6; 1623 y[9*i+6] += sum7; 1624 y[9*i+7] += sum8; 1625 y[9*i+8] += sum9; 1626 } 1627 1628 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1629 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1630 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1631 PetscFunctionReturn(0); 1632 } 1633 1634 #undef __FUNCT__ 1635 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1636 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1637 { 1638 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1639 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1640 const PetscScalar *x,*v; 1641 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1642 PetscErrorCode ierr; 1643 const PetscInt m = b->AIJ->rmap->n,*idx; 1644 PetscInt n,i; 1645 1646 PetscFunctionBegin; 1647 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1648 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1649 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1650 for (i=0; i<m; i++) { 1651 idx = a->j + a->i[i] ; 1652 v = a->a + a->i[i] ; 1653 n = a->i[i+1] - a->i[i]; 1654 alpha1 = x[9*i]; 1655 alpha2 = x[9*i+1]; 1656 alpha3 = x[9*i+2]; 1657 alpha4 = x[9*i+3]; 1658 alpha5 = x[9*i+4]; 1659 alpha6 = x[9*i+5]; 1660 alpha7 = x[9*i+6]; 1661 alpha8 = x[9*i+7]; 1662 alpha9 = x[9*i+8]; 1663 while (n-->0) { 1664 y[9*(*idx)] += alpha1*(*v); 1665 y[9*(*idx)+1] += alpha2*(*v); 1666 y[9*(*idx)+2] += alpha3*(*v); 1667 y[9*(*idx)+3] += alpha4*(*v); 1668 y[9*(*idx)+4] += alpha5*(*v); 1669 y[9*(*idx)+5] += alpha6*(*v); 1670 y[9*(*idx)+6] += alpha7*(*v); 1671 y[9*(*idx)+7] += alpha8*(*v); 1672 y[9*(*idx)+8] += alpha9*(*v); 1673 idx++; v++; 1674 } 1675 } 1676 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1677 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1678 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1679 PetscFunctionReturn(0); 1680 } 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1683 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1684 { 1685 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1686 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1687 const PetscScalar *x,*v; 1688 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1689 PetscErrorCode ierr; 1690 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1691 PetscInt nonzerorow=0,n,i,jrow,j; 1692 1693 PetscFunctionBegin; 1694 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1695 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1696 idx = a->j; 1697 v = a->a; 1698 ii = a->i; 1699 1700 for (i=0; i<m; i++) { 1701 jrow = ii[i]; 1702 n = ii[i+1] - jrow; 1703 sum1 = 0.0; 1704 sum2 = 0.0; 1705 sum3 = 0.0; 1706 sum4 = 0.0; 1707 sum5 = 0.0; 1708 sum6 = 0.0; 1709 sum7 = 0.0; 1710 sum8 = 0.0; 1711 sum9 = 0.0; 1712 sum10 = 0.0; 1713 nonzerorow += (n>0); 1714 for (j=0; j<n; j++) { 1715 sum1 += v[jrow]*x[10*idx[jrow]]; 1716 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1717 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1718 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1719 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1720 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1721 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1722 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1723 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1724 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1725 jrow++; 1726 } 1727 y[10*i] = sum1; 1728 y[10*i+1] = sum2; 1729 y[10*i+2] = sum3; 1730 y[10*i+3] = sum4; 1731 y[10*i+4] = sum5; 1732 y[10*i+5] = sum6; 1733 y[10*i+6] = sum7; 1734 y[10*i+7] = sum8; 1735 y[10*i+8] = sum9; 1736 y[10*i+9] = sum10; 1737 } 1738 1739 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1740 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1741 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1742 PetscFunctionReturn(0); 1743 } 1744 1745 #undef __FUNCT__ 1746 #define __FUNCT__ "MatMultAdd_SeqMAIJ_10" 1747 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1748 { 1749 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1750 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1751 const PetscScalar *x,*v; 1752 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1753 PetscErrorCode ierr; 1754 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1755 PetscInt n,i,jrow,j; 1756 1757 PetscFunctionBegin; 1758 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1759 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1760 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1761 idx = a->j; 1762 v = a->a; 1763 ii = a->i; 1764 1765 for (i=0; i<m; i++) { 1766 jrow = ii[i]; 1767 n = ii[i+1] - jrow; 1768 sum1 = 0.0; 1769 sum2 = 0.0; 1770 sum3 = 0.0; 1771 sum4 = 0.0; 1772 sum5 = 0.0; 1773 sum6 = 0.0; 1774 sum7 = 0.0; 1775 sum8 = 0.0; 1776 sum9 = 0.0; 1777 sum10 = 0.0; 1778 for (j=0; j<n; j++) { 1779 sum1 += v[jrow]*x[10*idx[jrow]]; 1780 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1781 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1782 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1783 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1784 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1785 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1786 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1787 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1788 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1789 jrow++; 1790 } 1791 y[10*i] += sum1; 1792 y[10*i+1] += sum2; 1793 y[10*i+2] += sum3; 1794 y[10*i+3] += sum4; 1795 y[10*i+4] += sum5; 1796 y[10*i+5] += sum6; 1797 y[10*i+6] += sum7; 1798 y[10*i+7] += sum8; 1799 y[10*i+8] += sum9; 1800 y[10*i+9] += sum10; 1801 } 1802 1803 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1804 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1805 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1811 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1812 { 1813 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1814 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1815 const PetscScalar *x,*v; 1816 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1817 PetscErrorCode ierr; 1818 const PetscInt m = b->AIJ->rmap->n,*idx; 1819 PetscInt n,i; 1820 1821 PetscFunctionBegin; 1822 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1823 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1824 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1825 1826 for (i=0; i<m; i++) { 1827 idx = a->j + a->i[i] ; 1828 v = a->a + a->i[i] ; 1829 n = a->i[i+1] - a->i[i]; 1830 alpha1 = x[10*i]; 1831 alpha2 = x[10*i+1]; 1832 alpha3 = x[10*i+2]; 1833 alpha4 = x[10*i+3]; 1834 alpha5 = x[10*i+4]; 1835 alpha6 = x[10*i+5]; 1836 alpha7 = x[10*i+6]; 1837 alpha8 = x[10*i+7]; 1838 alpha9 = x[10*i+8]; 1839 alpha10 = x[10*i+9]; 1840 while (n-->0) { 1841 y[10*(*idx)] += alpha1*(*v); 1842 y[10*(*idx)+1] += alpha2*(*v); 1843 y[10*(*idx)+2] += alpha3*(*v); 1844 y[10*(*idx)+3] += alpha4*(*v); 1845 y[10*(*idx)+4] += alpha5*(*v); 1846 y[10*(*idx)+5] += alpha6*(*v); 1847 y[10*(*idx)+6] += alpha7*(*v); 1848 y[10*(*idx)+7] += alpha8*(*v); 1849 y[10*(*idx)+8] += alpha9*(*v); 1850 y[10*(*idx)+9] += alpha10*(*v); 1851 idx++; v++; 1852 } 1853 } 1854 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1855 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1856 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1857 PetscFunctionReturn(0); 1858 } 1859 1860 #undef __FUNCT__ 1861 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1862 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1863 { 1864 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1865 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1866 const PetscScalar *x,*v; 1867 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1868 PetscErrorCode ierr; 1869 const PetscInt m = b->AIJ->rmap->n,*idx; 1870 PetscInt n,i; 1871 1872 PetscFunctionBegin; 1873 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1874 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1875 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1876 for (i=0; i<m; i++) { 1877 idx = a->j + a->i[i] ; 1878 v = a->a + a->i[i] ; 1879 n = a->i[i+1] - a->i[i]; 1880 alpha1 = x[10*i]; 1881 alpha2 = x[10*i+1]; 1882 alpha3 = x[10*i+2]; 1883 alpha4 = x[10*i+3]; 1884 alpha5 = x[10*i+4]; 1885 alpha6 = x[10*i+5]; 1886 alpha7 = x[10*i+6]; 1887 alpha8 = x[10*i+7]; 1888 alpha9 = x[10*i+8]; 1889 alpha10 = x[10*i+9]; 1890 while (n-->0) { 1891 y[10*(*idx)] += alpha1*(*v); 1892 y[10*(*idx)+1] += alpha2*(*v); 1893 y[10*(*idx)+2] += alpha3*(*v); 1894 y[10*(*idx)+3] += alpha4*(*v); 1895 y[10*(*idx)+4] += alpha5*(*v); 1896 y[10*(*idx)+5] += alpha6*(*v); 1897 y[10*(*idx)+6] += alpha7*(*v); 1898 y[10*(*idx)+7] += alpha8*(*v); 1899 y[10*(*idx)+8] += alpha9*(*v); 1900 y[10*(*idx)+9] += alpha10*(*v); 1901 idx++; v++; 1902 } 1903 } 1904 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1905 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1906 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 1911 /*--------------------------------------------------------------------------------------------*/ 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatMult_SeqMAIJ_11" 1914 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1915 { 1916 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1917 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1918 const PetscScalar *x,*v; 1919 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1920 PetscErrorCode ierr; 1921 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1922 PetscInt nonzerorow=0,n,i,jrow,j; 1923 1924 PetscFunctionBegin; 1925 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1926 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1927 idx = a->j; 1928 v = a->a; 1929 ii = a->i; 1930 1931 for (i=0; i<m; i++) { 1932 jrow = ii[i]; 1933 n = ii[i+1] - jrow; 1934 sum1 = 0.0; 1935 sum2 = 0.0; 1936 sum3 = 0.0; 1937 sum4 = 0.0; 1938 sum5 = 0.0; 1939 sum6 = 0.0; 1940 sum7 = 0.0; 1941 sum8 = 0.0; 1942 sum9 = 0.0; 1943 sum10 = 0.0; 1944 sum11 = 0.0; 1945 nonzerorow += (n>0); 1946 for (j=0; j<n; j++) { 1947 sum1 += v[jrow]*x[11*idx[jrow]]; 1948 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1949 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1950 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1951 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1952 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1953 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1954 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1955 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1956 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1957 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1958 jrow++; 1959 } 1960 y[11*i] = sum1; 1961 y[11*i+1] = sum2; 1962 y[11*i+2] = sum3; 1963 y[11*i+3] = sum4; 1964 y[11*i+4] = sum5; 1965 y[11*i+5] = sum6; 1966 y[11*i+6] = sum7; 1967 y[11*i+7] = sum8; 1968 y[11*i+8] = sum9; 1969 y[11*i+9] = sum10; 1970 y[11*i+10] = sum11; 1971 } 1972 1973 ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1974 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1975 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1976 PetscFunctionReturn(0); 1977 } 1978 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatMultAdd_SeqMAIJ_11" 1981 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1982 { 1983 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1984 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1985 const PetscScalar *x,*v; 1986 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1987 PetscErrorCode ierr; 1988 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1989 PetscInt n,i,jrow,j; 1990 1991 PetscFunctionBegin; 1992 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1993 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1994 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1995 idx = a->j; 1996 v = a->a; 1997 ii = a->i; 1998 1999 for (i=0; i<m; i++) { 2000 jrow = ii[i]; 2001 n = ii[i+1] - jrow; 2002 sum1 = 0.0; 2003 sum2 = 0.0; 2004 sum3 = 0.0; 2005 sum4 = 0.0; 2006 sum5 = 0.0; 2007 sum6 = 0.0; 2008 sum7 = 0.0; 2009 sum8 = 0.0; 2010 sum9 = 0.0; 2011 sum10 = 0.0; 2012 sum11 = 0.0; 2013 for (j=0; j<n; j++) { 2014 sum1 += v[jrow]*x[11*idx[jrow]]; 2015 sum2 += v[jrow]*x[11*idx[jrow]+1]; 2016 sum3 += v[jrow]*x[11*idx[jrow]+2]; 2017 sum4 += v[jrow]*x[11*idx[jrow]+3]; 2018 sum5 += v[jrow]*x[11*idx[jrow]+4]; 2019 sum6 += v[jrow]*x[11*idx[jrow]+5]; 2020 sum7 += v[jrow]*x[11*idx[jrow]+6]; 2021 sum8 += v[jrow]*x[11*idx[jrow]+7]; 2022 sum9 += v[jrow]*x[11*idx[jrow]+8]; 2023 sum10 += v[jrow]*x[11*idx[jrow]+9]; 2024 sum11 += v[jrow]*x[11*idx[jrow]+10]; 2025 jrow++; 2026 } 2027 y[11*i] += sum1; 2028 y[11*i+1] += sum2; 2029 y[11*i+2] += sum3; 2030 y[11*i+3] += sum4; 2031 y[11*i+4] += sum5; 2032 y[11*i+5] += sum6; 2033 y[11*i+6] += sum7; 2034 y[11*i+7] += sum8; 2035 y[11*i+8] += sum9; 2036 y[11*i+9] += sum10; 2037 y[11*i+10] += sum11; 2038 } 2039 2040 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2041 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2042 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2043 PetscFunctionReturn(0); 2044 } 2045 2046 #undef __FUNCT__ 2047 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_11" 2048 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 2049 { 2050 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2051 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2052 const PetscScalar *x,*v; 2053 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2054 PetscErrorCode ierr; 2055 const PetscInt m = b->AIJ->rmap->n,*idx; 2056 PetscInt n,i; 2057 2058 PetscFunctionBegin; 2059 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2060 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2061 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2062 2063 for (i=0; i<m; i++) { 2064 idx = a->j + a->i[i] ; 2065 v = a->a + a->i[i] ; 2066 n = a->i[i+1] - a->i[i]; 2067 alpha1 = x[11*i]; 2068 alpha2 = x[11*i+1]; 2069 alpha3 = x[11*i+2]; 2070 alpha4 = x[11*i+3]; 2071 alpha5 = x[11*i+4]; 2072 alpha6 = x[11*i+5]; 2073 alpha7 = x[11*i+6]; 2074 alpha8 = x[11*i+7]; 2075 alpha9 = x[11*i+8]; 2076 alpha10 = x[11*i+9]; 2077 alpha11 = x[11*i+10]; 2078 while (n-->0) { 2079 y[11*(*idx)] += alpha1*(*v); 2080 y[11*(*idx)+1] += alpha2*(*v); 2081 y[11*(*idx)+2] += alpha3*(*v); 2082 y[11*(*idx)+3] += alpha4*(*v); 2083 y[11*(*idx)+4] += alpha5*(*v); 2084 y[11*(*idx)+5] += alpha6*(*v); 2085 y[11*(*idx)+6] += alpha7*(*v); 2086 y[11*(*idx)+7] += alpha8*(*v); 2087 y[11*(*idx)+8] += alpha9*(*v); 2088 y[11*(*idx)+9] += alpha10*(*v); 2089 y[11*(*idx)+10] += alpha11*(*v); 2090 idx++; v++; 2091 } 2092 } 2093 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2094 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2095 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2096 PetscFunctionReturn(0); 2097 } 2098 2099 #undef __FUNCT__ 2100 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_11" 2101 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2102 { 2103 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2104 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2105 const PetscScalar *x,*v; 2106 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2107 PetscErrorCode ierr; 2108 const PetscInt m = b->AIJ->rmap->n,*idx; 2109 PetscInt n,i; 2110 2111 PetscFunctionBegin; 2112 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2113 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2114 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2115 for (i=0; i<m; i++) { 2116 idx = a->j + a->i[i] ; 2117 v = a->a + a->i[i] ; 2118 n = a->i[i+1] - a->i[i]; 2119 alpha1 = x[11*i]; 2120 alpha2 = x[11*i+1]; 2121 alpha3 = x[11*i+2]; 2122 alpha4 = x[11*i+3]; 2123 alpha5 = x[11*i+4]; 2124 alpha6 = x[11*i+5]; 2125 alpha7 = x[11*i+6]; 2126 alpha8 = x[11*i+7]; 2127 alpha9 = x[11*i+8]; 2128 alpha10 = x[11*i+9]; 2129 alpha11 = x[11*i+10]; 2130 while (n-->0) { 2131 y[11*(*idx)] += alpha1*(*v); 2132 y[11*(*idx)+1] += alpha2*(*v); 2133 y[11*(*idx)+2] += alpha3*(*v); 2134 y[11*(*idx)+3] += alpha4*(*v); 2135 y[11*(*idx)+4] += alpha5*(*v); 2136 y[11*(*idx)+5] += alpha6*(*v); 2137 y[11*(*idx)+6] += alpha7*(*v); 2138 y[11*(*idx)+7] += alpha8*(*v); 2139 y[11*(*idx)+8] += alpha9*(*v); 2140 y[11*(*idx)+9] += alpha10*(*v); 2141 y[11*(*idx)+10] += alpha11*(*v); 2142 idx++; v++; 2143 } 2144 } 2145 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2146 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2147 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 2152 /*--------------------------------------------------------------------------------------------*/ 2153 #undef __FUNCT__ 2154 #define __FUNCT__ "MatMult_SeqMAIJ_16" 2155 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2156 { 2157 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2158 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2159 const PetscScalar *x,*v; 2160 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2161 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2162 PetscErrorCode ierr; 2163 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2164 PetscInt nonzerorow=0,n,i,jrow,j; 2165 2166 PetscFunctionBegin; 2167 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2168 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2169 idx = a->j; 2170 v = a->a; 2171 ii = a->i; 2172 2173 for (i=0; i<m; i++) { 2174 jrow = ii[i]; 2175 n = ii[i+1] - jrow; 2176 sum1 = 0.0; 2177 sum2 = 0.0; 2178 sum3 = 0.0; 2179 sum4 = 0.0; 2180 sum5 = 0.0; 2181 sum6 = 0.0; 2182 sum7 = 0.0; 2183 sum8 = 0.0; 2184 sum9 = 0.0; 2185 sum10 = 0.0; 2186 sum11 = 0.0; 2187 sum12 = 0.0; 2188 sum13 = 0.0; 2189 sum14 = 0.0; 2190 sum15 = 0.0; 2191 sum16 = 0.0; 2192 nonzerorow += (n>0); 2193 for (j=0; j<n; j++) { 2194 sum1 += v[jrow]*x[16*idx[jrow]]; 2195 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2196 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2197 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2198 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2199 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2200 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2201 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2202 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2203 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2204 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2205 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2206 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2207 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2208 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2209 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2210 jrow++; 2211 } 2212 y[16*i] = sum1; 2213 y[16*i+1] = sum2; 2214 y[16*i+2] = sum3; 2215 y[16*i+3] = sum4; 2216 y[16*i+4] = sum5; 2217 y[16*i+5] = sum6; 2218 y[16*i+6] = sum7; 2219 y[16*i+7] = sum8; 2220 y[16*i+8] = sum9; 2221 y[16*i+9] = sum10; 2222 y[16*i+10] = sum11; 2223 y[16*i+11] = sum12; 2224 y[16*i+12] = sum13; 2225 y[16*i+13] = sum14; 2226 y[16*i+14] = sum15; 2227 y[16*i+15] = sum16; 2228 } 2229 2230 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2231 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2232 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2233 PetscFunctionReturn(0); 2234 } 2235 2236 #undef __FUNCT__ 2237 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 2238 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2239 { 2240 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2241 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2242 const PetscScalar *x,*v; 2243 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2244 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2245 PetscErrorCode ierr; 2246 const PetscInt m = b->AIJ->rmap->n,*idx; 2247 PetscInt n,i; 2248 2249 PetscFunctionBegin; 2250 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2251 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2252 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2253 2254 for (i=0; i<m; i++) { 2255 idx = a->j + a->i[i] ; 2256 v = a->a + a->i[i] ; 2257 n = a->i[i+1] - a->i[i]; 2258 alpha1 = x[16*i]; 2259 alpha2 = x[16*i+1]; 2260 alpha3 = x[16*i+2]; 2261 alpha4 = x[16*i+3]; 2262 alpha5 = x[16*i+4]; 2263 alpha6 = x[16*i+5]; 2264 alpha7 = x[16*i+6]; 2265 alpha8 = x[16*i+7]; 2266 alpha9 = x[16*i+8]; 2267 alpha10 = x[16*i+9]; 2268 alpha11 = x[16*i+10]; 2269 alpha12 = x[16*i+11]; 2270 alpha13 = x[16*i+12]; 2271 alpha14 = x[16*i+13]; 2272 alpha15 = x[16*i+14]; 2273 alpha16 = x[16*i+15]; 2274 while (n-->0) { 2275 y[16*(*idx)] += alpha1*(*v); 2276 y[16*(*idx)+1] += alpha2*(*v); 2277 y[16*(*idx)+2] += alpha3*(*v); 2278 y[16*(*idx)+3] += alpha4*(*v); 2279 y[16*(*idx)+4] += alpha5*(*v); 2280 y[16*(*idx)+5] += alpha6*(*v); 2281 y[16*(*idx)+6] += alpha7*(*v); 2282 y[16*(*idx)+7] += alpha8*(*v); 2283 y[16*(*idx)+8] += alpha9*(*v); 2284 y[16*(*idx)+9] += alpha10*(*v); 2285 y[16*(*idx)+10] += alpha11*(*v); 2286 y[16*(*idx)+11] += alpha12*(*v); 2287 y[16*(*idx)+12] += alpha13*(*v); 2288 y[16*(*idx)+13] += alpha14*(*v); 2289 y[16*(*idx)+14] += alpha15*(*v); 2290 y[16*(*idx)+15] += alpha16*(*v); 2291 idx++; v++; 2292 } 2293 } 2294 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2295 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2296 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2297 PetscFunctionReturn(0); 2298 } 2299 2300 #undef __FUNCT__ 2301 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 2302 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2303 { 2304 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2305 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2306 const PetscScalar *x,*v; 2307 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2308 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2309 PetscErrorCode ierr; 2310 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2311 PetscInt n,i,jrow,j; 2312 2313 PetscFunctionBegin; 2314 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2315 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2316 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2317 idx = a->j; 2318 v = a->a; 2319 ii = a->i; 2320 2321 for (i=0; i<m; i++) { 2322 jrow = ii[i]; 2323 n = ii[i+1] - jrow; 2324 sum1 = 0.0; 2325 sum2 = 0.0; 2326 sum3 = 0.0; 2327 sum4 = 0.0; 2328 sum5 = 0.0; 2329 sum6 = 0.0; 2330 sum7 = 0.0; 2331 sum8 = 0.0; 2332 sum9 = 0.0; 2333 sum10 = 0.0; 2334 sum11 = 0.0; 2335 sum12 = 0.0; 2336 sum13 = 0.0; 2337 sum14 = 0.0; 2338 sum15 = 0.0; 2339 sum16 = 0.0; 2340 for (j=0; j<n; j++) { 2341 sum1 += v[jrow]*x[16*idx[jrow]]; 2342 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2343 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2344 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2345 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2346 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2347 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2348 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2349 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2350 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2351 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2352 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2353 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2354 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2355 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2356 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2357 jrow++; 2358 } 2359 y[16*i] += sum1; 2360 y[16*i+1] += sum2; 2361 y[16*i+2] += sum3; 2362 y[16*i+3] += sum4; 2363 y[16*i+4] += sum5; 2364 y[16*i+5] += sum6; 2365 y[16*i+6] += sum7; 2366 y[16*i+7] += sum8; 2367 y[16*i+8] += sum9; 2368 y[16*i+9] += sum10; 2369 y[16*i+10] += sum11; 2370 y[16*i+11] += sum12; 2371 y[16*i+12] += sum13; 2372 y[16*i+13] += sum14; 2373 y[16*i+14] += sum15; 2374 y[16*i+15] += sum16; 2375 } 2376 2377 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2378 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2379 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2385 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2386 { 2387 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2388 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2389 const PetscScalar *x,*v; 2390 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2391 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2392 PetscErrorCode ierr; 2393 const PetscInt m = b->AIJ->rmap->n,*idx; 2394 PetscInt n,i; 2395 2396 PetscFunctionBegin; 2397 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2398 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2399 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2400 for (i=0; i<m; i++) { 2401 idx = a->j + a->i[i] ; 2402 v = a->a + a->i[i] ; 2403 n = a->i[i+1] - a->i[i]; 2404 alpha1 = x[16*i]; 2405 alpha2 = x[16*i+1]; 2406 alpha3 = x[16*i+2]; 2407 alpha4 = x[16*i+3]; 2408 alpha5 = x[16*i+4]; 2409 alpha6 = x[16*i+5]; 2410 alpha7 = x[16*i+6]; 2411 alpha8 = x[16*i+7]; 2412 alpha9 = x[16*i+8]; 2413 alpha10 = x[16*i+9]; 2414 alpha11 = x[16*i+10]; 2415 alpha12 = x[16*i+11]; 2416 alpha13 = x[16*i+12]; 2417 alpha14 = x[16*i+13]; 2418 alpha15 = x[16*i+14]; 2419 alpha16 = x[16*i+15]; 2420 while (n-->0) { 2421 y[16*(*idx)] += alpha1*(*v); 2422 y[16*(*idx)+1] += alpha2*(*v); 2423 y[16*(*idx)+2] += alpha3*(*v); 2424 y[16*(*idx)+3] += alpha4*(*v); 2425 y[16*(*idx)+4] += alpha5*(*v); 2426 y[16*(*idx)+5] += alpha6*(*v); 2427 y[16*(*idx)+6] += alpha7*(*v); 2428 y[16*(*idx)+7] += alpha8*(*v); 2429 y[16*(*idx)+8] += alpha9*(*v); 2430 y[16*(*idx)+9] += alpha10*(*v); 2431 y[16*(*idx)+10] += alpha11*(*v); 2432 y[16*(*idx)+11] += alpha12*(*v); 2433 y[16*(*idx)+12] += alpha13*(*v); 2434 y[16*(*idx)+13] += alpha14*(*v); 2435 y[16*(*idx)+14] += alpha15*(*v); 2436 y[16*(*idx)+15] += alpha16*(*v); 2437 idx++; v++; 2438 } 2439 } 2440 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2441 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2442 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2443 PetscFunctionReturn(0); 2444 } 2445 2446 /*--------------------------------------------------------------------------------------------*/ 2447 #undef __FUNCT__ 2448 #define __FUNCT__ "MatMult_SeqMAIJ_18" 2449 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2450 { 2451 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2452 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2453 const PetscScalar *x,*v; 2454 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2455 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2456 PetscErrorCode ierr; 2457 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2458 PetscInt nonzerorow=0,n,i,jrow,j; 2459 2460 PetscFunctionBegin; 2461 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2462 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2463 idx = a->j; 2464 v = a->a; 2465 ii = a->i; 2466 2467 for (i=0; i<m; i++) { 2468 jrow = ii[i]; 2469 n = ii[i+1] - jrow; 2470 sum1 = 0.0; 2471 sum2 = 0.0; 2472 sum3 = 0.0; 2473 sum4 = 0.0; 2474 sum5 = 0.0; 2475 sum6 = 0.0; 2476 sum7 = 0.0; 2477 sum8 = 0.0; 2478 sum9 = 0.0; 2479 sum10 = 0.0; 2480 sum11 = 0.0; 2481 sum12 = 0.0; 2482 sum13 = 0.0; 2483 sum14 = 0.0; 2484 sum15 = 0.0; 2485 sum16 = 0.0; 2486 sum17 = 0.0; 2487 sum18 = 0.0; 2488 nonzerorow += (n>0); 2489 for (j=0; j<n; j++) { 2490 sum1 += v[jrow]*x[18*idx[jrow]]; 2491 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2492 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2493 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2494 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2495 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2496 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2497 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2498 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2499 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2500 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2501 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2502 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2503 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2504 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2505 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2506 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2507 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2508 jrow++; 2509 } 2510 y[18*i] = sum1; 2511 y[18*i+1] = sum2; 2512 y[18*i+2] = sum3; 2513 y[18*i+3] = sum4; 2514 y[18*i+4] = sum5; 2515 y[18*i+5] = sum6; 2516 y[18*i+6] = sum7; 2517 y[18*i+7] = sum8; 2518 y[18*i+8] = sum9; 2519 y[18*i+9] = sum10; 2520 y[18*i+10] = sum11; 2521 y[18*i+11] = sum12; 2522 y[18*i+12] = sum13; 2523 y[18*i+13] = sum14; 2524 y[18*i+14] = sum15; 2525 y[18*i+15] = sum16; 2526 y[18*i+16] = sum17; 2527 y[18*i+17] = sum18; 2528 } 2529 2530 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2531 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2532 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2533 PetscFunctionReturn(0); 2534 } 2535 2536 #undef __FUNCT__ 2537 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2538 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2539 { 2540 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2541 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2542 const PetscScalar *x,*v; 2543 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2544 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2545 PetscErrorCode ierr; 2546 const PetscInt m = b->AIJ->rmap->n,*idx; 2547 PetscInt n,i; 2548 2549 PetscFunctionBegin; 2550 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2551 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2552 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2553 2554 for (i=0; i<m; i++) { 2555 idx = a->j + a->i[i] ; 2556 v = a->a + a->i[i] ; 2557 n = a->i[i+1] - a->i[i]; 2558 alpha1 = x[18*i]; 2559 alpha2 = x[18*i+1]; 2560 alpha3 = x[18*i+2]; 2561 alpha4 = x[18*i+3]; 2562 alpha5 = x[18*i+4]; 2563 alpha6 = x[18*i+5]; 2564 alpha7 = x[18*i+6]; 2565 alpha8 = x[18*i+7]; 2566 alpha9 = x[18*i+8]; 2567 alpha10 = x[18*i+9]; 2568 alpha11 = x[18*i+10]; 2569 alpha12 = x[18*i+11]; 2570 alpha13 = x[18*i+12]; 2571 alpha14 = x[18*i+13]; 2572 alpha15 = x[18*i+14]; 2573 alpha16 = x[18*i+15]; 2574 alpha17 = x[18*i+16]; 2575 alpha18 = x[18*i+17]; 2576 while (n-->0) { 2577 y[18*(*idx)] += alpha1*(*v); 2578 y[18*(*idx)+1] += alpha2*(*v); 2579 y[18*(*idx)+2] += alpha3*(*v); 2580 y[18*(*idx)+3] += alpha4*(*v); 2581 y[18*(*idx)+4] += alpha5*(*v); 2582 y[18*(*idx)+5] += alpha6*(*v); 2583 y[18*(*idx)+6] += alpha7*(*v); 2584 y[18*(*idx)+7] += alpha8*(*v); 2585 y[18*(*idx)+8] += alpha9*(*v); 2586 y[18*(*idx)+9] += alpha10*(*v); 2587 y[18*(*idx)+10] += alpha11*(*v); 2588 y[18*(*idx)+11] += alpha12*(*v); 2589 y[18*(*idx)+12] += alpha13*(*v); 2590 y[18*(*idx)+13] += alpha14*(*v); 2591 y[18*(*idx)+14] += alpha15*(*v); 2592 y[18*(*idx)+15] += alpha16*(*v); 2593 y[18*(*idx)+16] += alpha17*(*v); 2594 y[18*(*idx)+17] += alpha18*(*v); 2595 idx++; v++; 2596 } 2597 } 2598 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2599 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2600 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2601 PetscFunctionReturn(0); 2602 } 2603 2604 #undef __FUNCT__ 2605 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2606 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2607 { 2608 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2609 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2610 const PetscScalar *x,*v; 2611 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2612 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2613 PetscErrorCode ierr; 2614 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2615 PetscInt n,i,jrow,j; 2616 2617 PetscFunctionBegin; 2618 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2619 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2620 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2621 idx = a->j; 2622 v = a->a; 2623 ii = a->i; 2624 2625 for (i=0; i<m; i++) { 2626 jrow = ii[i]; 2627 n = ii[i+1] - jrow; 2628 sum1 = 0.0; 2629 sum2 = 0.0; 2630 sum3 = 0.0; 2631 sum4 = 0.0; 2632 sum5 = 0.0; 2633 sum6 = 0.0; 2634 sum7 = 0.0; 2635 sum8 = 0.0; 2636 sum9 = 0.0; 2637 sum10 = 0.0; 2638 sum11 = 0.0; 2639 sum12 = 0.0; 2640 sum13 = 0.0; 2641 sum14 = 0.0; 2642 sum15 = 0.0; 2643 sum16 = 0.0; 2644 sum17 = 0.0; 2645 sum18 = 0.0; 2646 for (j=0; j<n; j++) { 2647 sum1 += v[jrow]*x[18*idx[jrow]]; 2648 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2649 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2650 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2651 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2652 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2653 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2654 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2655 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2656 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2657 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2658 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2659 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2660 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2661 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2662 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2663 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2664 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2665 jrow++; 2666 } 2667 y[18*i] += sum1; 2668 y[18*i+1] += sum2; 2669 y[18*i+2] += sum3; 2670 y[18*i+3] += sum4; 2671 y[18*i+4] += sum5; 2672 y[18*i+5] += sum6; 2673 y[18*i+6] += sum7; 2674 y[18*i+7] += sum8; 2675 y[18*i+8] += sum9; 2676 y[18*i+9] += sum10; 2677 y[18*i+10] += sum11; 2678 y[18*i+11] += sum12; 2679 y[18*i+12] += sum13; 2680 y[18*i+13] += sum14; 2681 y[18*i+14] += sum15; 2682 y[18*i+15] += sum16; 2683 y[18*i+16] += sum17; 2684 y[18*i+17] += sum18; 2685 } 2686 2687 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2688 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2689 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2690 PetscFunctionReturn(0); 2691 } 2692 2693 #undef __FUNCT__ 2694 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2695 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2696 { 2697 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2698 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2699 const PetscScalar *x,*v; 2700 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2701 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2702 PetscErrorCode ierr; 2703 const PetscInt m = b->AIJ->rmap->n,*idx; 2704 PetscInt n,i; 2705 2706 PetscFunctionBegin; 2707 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2708 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2709 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2710 for (i=0; i<m; i++) { 2711 idx = a->j + a->i[i] ; 2712 v = a->a + a->i[i] ; 2713 n = a->i[i+1] - a->i[i]; 2714 alpha1 = x[18*i]; 2715 alpha2 = x[18*i+1]; 2716 alpha3 = x[18*i+2]; 2717 alpha4 = x[18*i+3]; 2718 alpha5 = x[18*i+4]; 2719 alpha6 = x[18*i+5]; 2720 alpha7 = x[18*i+6]; 2721 alpha8 = x[18*i+7]; 2722 alpha9 = x[18*i+8]; 2723 alpha10 = x[18*i+9]; 2724 alpha11 = x[18*i+10]; 2725 alpha12 = x[18*i+11]; 2726 alpha13 = x[18*i+12]; 2727 alpha14 = x[18*i+13]; 2728 alpha15 = x[18*i+14]; 2729 alpha16 = x[18*i+15]; 2730 alpha17 = x[18*i+16]; 2731 alpha18 = x[18*i+17]; 2732 while (n-->0) { 2733 y[18*(*idx)] += alpha1*(*v); 2734 y[18*(*idx)+1] += alpha2*(*v); 2735 y[18*(*idx)+2] += alpha3*(*v); 2736 y[18*(*idx)+3] += alpha4*(*v); 2737 y[18*(*idx)+4] += alpha5*(*v); 2738 y[18*(*idx)+5] += alpha6*(*v); 2739 y[18*(*idx)+6] += alpha7*(*v); 2740 y[18*(*idx)+7] += alpha8*(*v); 2741 y[18*(*idx)+8] += alpha9*(*v); 2742 y[18*(*idx)+9] += alpha10*(*v); 2743 y[18*(*idx)+10] += alpha11*(*v); 2744 y[18*(*idx)+11] += alpha12*(*v); 2745 y[18*(*idx)+12] += alpha13*(*v); 2746 y[18*(*idx)+13] += alpha14*(*v); 2747 y[18*(*idx)+14] += alpha15*(*v); 2748 y[18*(*idx)+15] += alpha16*(*v); 2749 y[18*(*idx)+16] += alpha17*(*v); 2750 y[18*(*idx)+17] += alpha18*(*v); 2751 idx++; v++; 2752 } 2753 } 2754 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2755 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2756 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2757 PetscFunctionReturn(0); 2758 } 2759 2760 #undef __FUNCT__ 2761 #define __FUNCT__ "MatMult_SeqMAIJ_N" 2762 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2763 { 2764 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2765 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2766 const PetscScalar *x,*v; 2767 PetscScalar *y,*sums; 2768 PetscErrorCode ierr; 2769 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2770 PetscInt n,i,jrow,j,dof = b->dof,k; 2771 2772 PetscFunctionBegin; 2773 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2774 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2775 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2776 idx = a->j; 2777 v = a->a; 2778 ii = a->i; 2779 2780 for (i=0; i<m; i++) { 2781 jrow = ii[i]; 2782 n = ii[i+1] - jrow; 2783 sums = y + dof*i; 2784 for (j=0; j<n; j++) { 2785 for (k=0; k<dof; k++) { 2786 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2787 } 2788 jrow++; 2789 } 2790 } 2791 2792 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2793 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2794 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2795 PetscFunctionReturn(0); 2796 } 2797 2798 #undef __FUNCT__ 2799 #define __FUNCT__ "MatMultAdd_SeqMAIJ_N" 2800 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2801 { 2802 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2803 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2804 const PetscScalar *x,*v; 2805 PetscScalar *y,*sums; 2806 PetscErrorCode ierr; 2807 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2808 PetscInt n,i,jrow,j,dof = b->dof,k; 2809 2810 PetscFunctionBegin; 2811 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2812 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2813 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2814 idx = a->j; 2815 v = a->a; 2816 ii = a->i; 2817 2818 for (i=0; i<m; i++) { 2819 jrow = ii[i]; 2820 n = ii[i+1] - jrow; 2821 sums = y + dof*i; 2822 for (j=0; j<n; j++) { 2823 for (k=0; k<dof; k++) { 2824 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2825 } 2826 jrow++; 2827 } 2828 } 2829 2830 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2831 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2832 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2833 PetscFunctionReturn(0); 2834 } 2835 2836 #undef __FUNCT__ 2837 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_N" 2838 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2839 { 2840 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2841 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2842 const PetscScalar *x,*v,*alpha; 2843 PetscScalar *y; 2844 PetscErrorCode ierr; 2845 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2846 PetscInt n,i,k; 2847 2848 PetscFunctionBegin; 2849 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2850 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2851 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2852 for (i=0; i<m; i++) { 2853 idx = a->j + a->i[i] ; 2854 v = a->a + a->i[i] ; 2855 n = a->i[i+1] - a->i[i]; 2856 alpha = x + dof*i; 2857 while (n-->0) { 2858 for (k=0; k<dof; k++) { 2859 y[dof*(*idx)+k] += alpha[k]*(*v); 2860 } 2861 idx++; v++; 2862 } 2863 } 2864 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2865 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2866 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2867 PetscFunctionReturn(0); 2868 } 2869 2870 #undef __FUNCT__ 2871 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_N" 2872 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2873 { 2874 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2875 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2876 const PetscScalar *x,*v,*alpha; 2877 PetscScalar *y; 2878 PetscErrorCode ierr; 2879 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2880 PetscInt n,i,k; 2881 2882 PetscFunctionBegin; 2883 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2884 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2885 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2886 for (i=0; i<m; i++) { 2887 idx = a->j + a->i[i] ; 2888 v = a->a + a->i[i] ; 2889 n = a->i[i+1] - a->i[i]; 2890 alpha = x + dof*i; 2891 while (n-->0) { 2892 for (k=0; k<dof; k++) { 2893 y[dof*(*idx)+k] += alpha[k]*(*v); 2894 } 2895 idx++; v++; 2896 } 2897 } 2898 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2899 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2900 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2901 PetscFunctionReturn(0); 2902 } 2903 2904 /*===================================================================================*/ 2905 #undef __FUNCT__ 2906 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2907 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2908 { 2909 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2910 PetscErrorCode ierr; 2911 2912 PetscFunctionBegin; 2913 /* start the scatter */ 2914 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2915 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2916 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2917 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2918 PetscFunctionReturn(0); 2919 } 2920 2921 #undef __FUNCT__ 2922 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2923 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2924 { 2925 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2926 PetscErrorCode ierr; 2927 2928 PetscFunctionBegin; 2929 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2930 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2931 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2932 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2933 PetscFunctionReturn(0); 2934 } 2935 2936 #undef __FUNCT__ 2937 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2938 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2939 { 2940 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2941 PetscErrorCode ierr; 2942 2943 PetscFunctionBegin; 2944 /* start the scatter */ 2945 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2946 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2947 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2948 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2949 PetscFunctionReturn(0); 2950 } 2951 2952 #undef __FUNCT__ 2953 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2954 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2955 { 2956 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2957 PetscErrorCode ierr; 2958 2959 PetscFunctionBegin; 2960 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2961 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2962 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2963 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2964 PetscFunctionReturn(0); 2965 } 2966 2967 /* ----------------------------------------------------------------*/ 2968 #undef __FUNCT__ 2969 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2970 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2971 { 2972 PetscErrorCode ierr; 2973 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2974 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2975 Mat P=pp->AIJ; 2976 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2977 PetscInt *pti,*ptj,*ptJ; 2978 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2979 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2980 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 2981 MatScalar *ca; 2982 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 2983 2984 PetscFunctionBegin; 2985 /* Get ij structure of P^T */ 2986 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2987 2988 cn = pn*ppdof; 2989 /* Allocate ci array, arrays for fill computation and */ 2990 /* free space for accumulating nonzero column info */ 2991 ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2992 ci[0] = 0; 2993 2994 /* Work arrays for rows of P^T*A */ 2995 ierr = PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);CHKERRQ(ierr); 2996 ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 2997 ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr); 2998 2999 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 3000 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 3001 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 3002 ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);CHKERRQ(ierr); 3003 current_space = free_space; 3004 3005 /* Determine symbolic info for each row of C: */ 3006 for (i=0;i<pn;i++) { 3007 ptnzi = pti[i+1] - pti[i]; 3008 ptJ = ptj + pti[i]; 3009 for (dof=0;dof<ppdof;dof++) { 3010 ptanzi = 0; 3011 /* Determine symbolic row of PtA: */ 3012 for (j=0;j<ptnzi;j++) { 3013 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 3014 arow = ptJ[j]*ppdof + dof; 3015 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 3016 anzj = ai[arow+1] - ai[arow]; 3017 ajj = aj + ai[arow]; 3018 for (k=0;k<anzj;k++) { 3019 if (!ptadenserow[ajj[k]]) { 3020 ptadenserow[ajj[k]] = -1; 3021 ptasparserow[ptanzi++] = ajj[k]; 3022 } 3023 } 3024 } 3025 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 3026 ptaj = ptasparserow; 3027 cnzi = 0; 3028 for (j=0;j<ptanzi;j++) { 3029 /* Get offset within block of P */ 3030 pshift = *ptaj%ppdof; 3031 /* Get block row of P */ 3032 prow = (*ptaj++)/ppdof; /* integer division */ 3033 /* P has same number of nonzeros per row as the compressed form */ 3034 pnzj = pi[prow+1] - pi[prow]; 3035 pjj = pj + pi[prow]; 3036 for (k=0;k<pnzj;k++) { 3037 /* Locations in C are shifted by the offset within the block */ 3038 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 3039 if (!denserow[pjj[k]*ppdof+pshift]) { 3040 denserow[pjj[k]*ppdof+pshift] = -1; 3041 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 3042 } 3043 } 3044 } 3045 3046 /* sort sparserow */ 3047 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 3048 3049 /* If free space is not available, make more free space */ 3050 /* Double the amount of total space in the list */ 3051 if (current_space->local_remaining<cnzi) { 3052 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3053 } 3054 3055 /* Copy data into free space, and zero out denserows */ 3056 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 3057 current_space->array += cnzi; 3058 current_space->local_used += cnzi; 3059 current_space->local_remaining -= cnzi; 3060 3061 for (j=0;j<ptanzi;j++) { 3062 ptadenserow[ptasparserow[j]] = 0; 3063 } 3064 for (j=0;j<cnzi;j++) { 3065 denserow[sparserow[j]] = 0; 3066 } 3067 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 3068 /* For now, we will recompute what is needed. */ 3069 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 3070 } 3071 } 3072 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 3073 /* Allocate space for cj, initialize cj, and */ 3074 /* destroy list of free space and other temporary array(s) */ 3075 ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3076 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 3077 ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 3078 3079 /* Allocate space for ca */ 3080 ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 3081 ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 3082 3083 /* put together the new matrix */ 3084 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 3085 3086 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3087 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3088 c = (Mat_SeqAIJ *)((*C)->data); 3089 c->free_a = PETSC_TRUE; 3090 c->free_ij = PETSC_TRUE; 3091 c->nonew = 0; 3092 (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 3093 3094 /* Clean up. */ 3095 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 3096 PetscFunctionReturn(0); 3097 } 3098 3099 #undef __FUNCT__ 3100 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 3101 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 3102 { 3103 /* This routine requires testing -- first draft only */ 3104 PetscErrorCode ierr; 3105 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 3106 Mat P=pp->AIJ; 3107 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 3108 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 3109 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 3110 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3111 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3112 const PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3113 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3114 const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3115 MatScalar *ca=c->a,*caj,*apa; 3116 3117 PetscFunctionBegin; 3118 /* Allocate temporary array for storage of one row of A*P */ 3119 ierr = PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);CHKERRQ(ierr); 3120 ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr); 3121 ierr = PetscMemzero(apj,cn*sizeof(PetscInt));CHKERRQ(ierr); 3122 ierr = PetscMemzero(apjdense,cn*sizeof(PetscInt));CHKERRQ(ierr); 3123 3124 /* Clear old values in C */ 3125 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 3126 3127 for (i=0;i<am;i++) { 3128 /* Form sparse row of A*P */ 3129 anzi = ai[i+1] - ai[i]; 3130 apnzj = 0; 3131 for (j=0;j<anzi;j++) { 3132 /* Get offset within block of P */ 3133 pshift = *aj%ppdof; 3134 /* Get block row of P */ 3135 prow = *aj++/ppdof; /* integer division */ 3136 pnzj = pi[prow+1] - pi[prow]; 3137 pjj = pj + pi[prow]; 3138 paj = pa + pi[prow]; 3139 for (k=0;k<pnzj;k++) { 3140 poffset = pjj[k]*ppdof+pshift; 3141 if (!apjdense[poffset]) { 3142 apjdense[poffset] = -1; 3143 apj[apnzj++] = poffset; 3144 } 3145 apa[poffset] += (*aa)*paj[k]; 3146 } 3147 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 3148 aa++; 3149 } 3150 3151 /* Sort the j index array for quick sparse axpy. */ 3152 /* Note: a array does not need sorting as it is in dense storage locations. */ 3153 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 3154 3155 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3156 prow = i/ppdof; /* integer division */ 3157 pshift = i%ppdof; 3158 poffset = pi[prow]; 3159 pnzi = pi[prow+1] - poffset; 3160 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3161 pJ = pj+poffset; 3162 pA = pa+poffset; 3163 for (j=0;j<pnzi;j++) { 3164 crow = (*pJ)*ppdof+pshift; 3165 cjj = cj + ci[crow]; 3166 caj = ca + ci[crow]; 3167 pJ++; 3168 /* Perform sparse axpy operation. Note cjj includes apj. */ 3169 for (k=0,nextap=0;nextap<apnzj;k++) { 3170 if (cjj[k]==apj[nextap]) { 3171 caj[k] += (*pA)*apa[apj[nextap++]]; 3172 } 3173 } 3174 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 3175 pA++; 3176 } 3177 3178 /* Zero the current row info for A*P */ 3179 for (j=0;j<apnzj;j++) { 3180 apa[apj[j]] = 0.; 3181 apjdense[apj[j]] = 0; 3182 } 3183 } 3184 3185 /* Assemble the final matrix and clean up */ 3186 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3187 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3188 ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 3189 PetscFunctionReturn(0); 3190 } 3191 3192 #undef __FUNCT__ 3193 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqMAIJ" 3194 PETSC_EXTERN_C PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3195 { 3196 PetscErrorCode ierr; 3197 3198 PetscFunctionBegin; 3199 if (scall == MAT_INITIAL_MATRIX) { 3200 ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr); 3201 } 3202 ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr); 3203 PetscFunctionReturn(0); 3204 } 3205 3206 #undef __FUNCT__ 3207 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 3208 PETSC_EXTERN_C PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3209 { 3210 PetscErrorCode ierr; 3211 3212 PetscFunctionBegin; 3213 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 3214 ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 3215 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 3216 PetscFunctionReturn(0); 3217 } 3218 3219 #undef __FUNCT__ 3220 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 3221 PETSC_EXTERN_C PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3222 { 3223 PetscFunctionBegin; 3224 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3225 PetscFunctionReturn(0); 3226 } 3227 3228 #undef __FUNCT__ 3229 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIMAIJ" 3230 PETSC_EXTERN_C PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3231 { 3232 PetscErrorCode ierr; 3233 3234 PetscFunctionBegin; 3235 if (scall == MAT_INITIAL_MATRIX) { 3236 ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr); 3237 } 3238 ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr); 3239 PetscFunctionReturn(0); 3240 } 3241 3242 #undef __FUNCT__ 3243 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 3244 PETSC_EXTERN_C PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3245 { 3246 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3247 Mat a = b->AIJ,B; 3248 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3249 PetscErrorCode ierr; 3250 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3251 PetscInt *cols; 3252 PetscScalar *vals; 3253 3254 PetscFunctionBegin; 3255 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3256 ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 3257 for (i=0; i<m; i++) { 3258 nmax = PetscMax(nmax,aij->ilen[i]); 3259 for (j=0; j<dof; j++) { 3260 ilen[dof*i+j] = aij->ilen[i]; 3261 } 3262 } 3263 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 3264 ierr = PetscFree(ilen);CHKERRQ(ierr); 3265 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 3266 ii = 0; 3267 for (i=0; i<m; i++) { 3268 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3269 for (j=0; j<dof; j++) { 3270 for (k=0; k<ncols; k++) { 3271 icols[k] = dof*cols[k]+j; 3272 } 3273 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3274 ii++; 3275 } 3276 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3277 } 3278 ierr = PetscFree(icols);CHKERRQ(ierr); 3279 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3280 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3281 3282 if (reuse == MAT_REUSE_MATRIX) { 3283 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3284 } else { 3285 *newmat = B; 3286 } 3287 PetscFunctionReturn(0); 3288 } 3289 3290 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3291 3292 #undef __FUNCT__ 3293 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 3294 PETSC_EXTERN_C PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3295 { 3296 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3297 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3298 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3299 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3300 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3301 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3302 PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 3303 PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 3304 PetscInt rstart,cstart,*garray,ii,k; 3305 PetscErrorCode ierr; 3306 PetscScalar *vals,*ovals; 3307 3308 PetscFunctionBegin; 3309 ierr = PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);CHKERRQ(ierr); 3310 for (i=0; i<A->rmap->n/dof; i++) { 3311 nmax = PetscMax(nmax,AIJ->ilen[i]); 3312 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3313 for (j=0; j<dof; j++) { 3314 dnz[dof*i+j] = AIJ->ilen[i]; 3315 onz[dof*i+j] = OAIJ->ilen[i]; 3316 } 3317 } 3318 ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3319 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3320 3321 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 3322 rstart = dof*maij->A->rmap->rstart; 3323 cstart = dof*maij->A->cmap->rstart; 3324 garray = mpiaij->garray; 3325 3326 ii = rstart; 3327 for (i=0; i<A->rmap->n/dof; i++) { 3328 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3329 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3330 for (j=0; j<dof; j++) { 3331 for (k=0; k<ncols; k++) { 3332 icols[k] = cstart + dof*cols[k]+j; 3333 } 3334 for (k=0; k<oncols; k++) { 3335 oicols[k] = dof*garray[ocols[k]]+j; 3336 } 3337 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3338 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3339 ii++; 3340 } 3341 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3342 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3343 } 3344 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3345 3346 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3347 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3348 3349 if (reuse == MAT_REUSE_MATRIX) { 3350 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3351 ((PetscObject)A)->refct = 1; 3352 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3353 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3354 } else { 3355 *newmat = B; 3356 } 3357 PetscFunctionReturn(0); 3358 } 3359 3360 #undef __FUNCT__ 3361 #define __FUNCT__ "MatGetSubMatrix_MAIJ" 3362 PetscErrorCode MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 3363 { 3364 PetscErrorCode ierr; 3365 Mat A; 3366 3367 PetscFunctionBegin; 3368 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3369 ierr = MatGetSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 3370 ierr = MatDestroy(&A);CHKERRQ(ierr); 3371 PetscFunctionReturn(0); 3372 } 3373 3374 /* ---------------------------------------------------------------------------------- */ 3375 #undef __FUNCT__ 3376 #define __FUNCT__ "MatCreateMAIJ" 3377 /*@C 3378 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3379 operations for multicomponent problems. It interpolates each component the same 3380 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3381 and MATMPIAIJ for distributed matrices. 3382 3383 Collective 3384 3385 Input Parameters: 3386 + A - the AIJ matrix describing the action on blocks 3387 - dof - the block size (number of components per node) 3388 3389 Output Parameter: 3390 . maij - the new MAIJ matrix 3391 3392 Operations provided: 3393 + MatMult 3394 . MatMultTranspose 3395 . MatMultAdd 3396 . MatMultTransposeAdd 3397 - MatView 3398 3399 Level: advanced 3400 3401 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3402 @*/ 3403 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3404 { 3405 PetscErrorCode ierr; 3406 PetscMPIInt size; 3407 PetscInt n; 3408 Mat B; 3409 3410 PetscFunctionBegin; 3411 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3412 3413 if (dof == 1) { 3414 *maij = A; 3415 } else { 3416 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3417 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3418 ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3419 ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3420 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3421 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3422 B->assembled = PETSC_TRUE; 3423 3424 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3425 if (size == 1) { 3426 Mat_SeqMAIJ *b; 3427 3428 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3429 B->ops->setup = PETSC_NULL; 3430 B->ops->destroy = MatDestroy_SeqMAIJ; 3431 B->ops->view = MatView_SeqMAIJ; 3432 b = (Mat_SeqMAIJ*)B->data; 3433 b->dof = dof; 3434 b->AIJ = A; 3435 if (dof == 2) { 3436 B->ops->mult = MatMult_SeqMAIJ_2; 3437 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3438 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3439 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3440 } else if (dof == 3) { 3441 B->ops->mult = MatMult_SeqMAIJ_3; 3442 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3443 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3444 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3445 } else if (dof == 4) { 3446 B->ops->mult = MatMult_SeqMAIJ_4; 3447 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3448 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3449 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3450 } else if (dof == 5) { 3451 B->ops->mult = MatMult_SeqMAIJ_5; 3452 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3453 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3454 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3455 } else if (dof == 6) { 3456 B->ops->mult = MatMult_SeqMAIJ_6; 3457 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3458 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3459 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3460 } else if (dof == 7) { 3461 B->ops->mult = MatMult_SeqMAIJ_7; 3462 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3463 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3464 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3465 } else if (dof == 8) { 3466 B->ops->mult = MatMult_SeqMAIJ_8; 3467 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3468 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3469 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3470 } else if (dof == 9) { 3471 B->ops->mult = MatMult_SeqMAIJ_9; 3472 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3473 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3474 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3475 } else if (dof == 10) { 3476 B->ops->mult = MatMult_SeqMAIJ_10; 3477 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3478 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3479 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3480 } else if (dof == 11) { 3481 B->ops->mult = MatMult_SeqMAIJ_11; 3482 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3483 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3484 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3485 } else if (dof == 16) { 3486 B->ops->mult = MatMult_SeqMAIJ_16; 3487 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3488 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3489 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3490 } else if (dof == 18) { 3491 B->ops->mult = MatMult_SeqMAIJ_18; 3492 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3493 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3494 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3495 } else { 3496 B->ops->mult = MatMult_SeqMAIJ_N; 3497 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3498 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3499 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3500 } 3501 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3502 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatPtAP_seqaij_seqmaij_C","MatPtAP_SeqAIJ_SeqMAIJ",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3503 } else { 3504 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3505 Mat_MPIMAIJ *b; 3506 IS from,to; 3507 Vec gvec; 3508 3509 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3510 B->ops->setup = PETSC_NULL; 3511 B->ops->destroy = MatDestroy_MPIMAIJ; 3512 B->ops->view = MatView_MPIMAIJ; 3513 b = (Mat_MPIMAIJ*)B->data; 3514 b->dof = dof; 3515 b->A = A; 3516 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 3517 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 3518 3519 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3520 ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3521 ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 3522 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3523 ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3524 3525 /* create two temporary Index sets for build scatter gather */ 3526 ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3527 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3528 3529 /* create temporary global vector to generate scatter context */ 3530 ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 3531 3532 /* generate the scatter context */ 3533 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3534 3535 ierr = ISDestroy(&from);CHKERRQ(ierr); 3536 ierr = ISDestroy(&to);CHKERRQ(ierr); 3537 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3538 3539 B->ops->mult = MatMult_MPIMAIJ_dof; 3540 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3541 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3542 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3543 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3544 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C","MatPtAP_MPIAIJ_MPIMAIJ",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr); 3545 } 3546 B->ops->getsubmatrix = MatGetSubMatrix_MAIJ; 3547 ierr = MatSetUp(B);CHKERRQ(ierr); 3548 *maij = B; 3549 ierr = MatViewFromOptions(B,"-mat_view");CHKERRQ(ierr); 3550 } 3551 PetscFunctionReturn(0); 3552 } 3553