1 /* 2 Defines the basic matrix operations for the MAIJ matrix storage format. 3 This format is used for restriction and interpolation operations for 4 multicomponent problems. It interpolates each component the same way 5 independently. 6 7 We provide: 8 MatMult() 9 MatMultTranspose() 10 MatMultTransposeAdd() 11 MatMultAdd() 12 and 13 MatCreateMAIJ(Mat,dof,Mat*) 14 15 This single directory handles both the sequential and parallel codes 16 */ 17 18 #include "src/mat/impls/maij/maij.h" 19 #include "vecimpl.h" 20 21 #undef __FUNCT__ 22 #define __FUNCT__ "MatMAIJGetAIJ" 23 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 24 { 25 PetscErrorCode ierr; 26 PetscTruth ismpimaij,isseqmaij; 27 28 PetscFunctionBegin; 29 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 30 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 31 if (ismpimaij) { 32 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 33 34 *B = b->A; 35 } else if (isseqmaij) { 36 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 37 38 *B = b->AIJ; 39 } else { 40 *B = A; 41 } 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatMAIJRedimension" 47 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 48 { 49 PetscErrorCode ierr; 50 Mat Aij; 51 52 PetscFunctionBegin; 53 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 54 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatDestroy_SeqMAIJ" 60 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 61 { 62 PetscErrorCode ierr; 63 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 64 65 PetscFunctionBegin; 66 if (b->AIJ) { 67 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 68 } 69 ierr = PetscFree(b);CHKERRQ(ierr); 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatDestroy_MPIMAIJ" 75 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 76 { 77 PetscErrorCode ierr; 78 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 79 80 PetscFunctionBegin; 81 if (b->AIJ) { 82 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 83 } 84 if (b->OAIJ) { 85 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 86 } 87 if (b->A) { 88 ierr = MatDestroy(b->A);CHKERRQ(ierr); 89 } 90 if (b->ctx) { 91 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 92 } 93 if (b->w) { 94 ierr = VecDestroy(b->w);CHKERRQ(ierr); 95 } 96 ierr = PetscFree(b);CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 100 /*MC 101 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 102 multicomponent problems, interpolating or restricting each component the same way independently. 103 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 104 105 Operations provided: 106 . MatMult 107 . MatMultTranspose 108 . MatMultAdd 109 . MatMultTransposeAdd 110 111 Level: advanced 112 113 .seealso: MatCreateSeqDense 114 M*/ 115 116 EXTERN_C_BEGIN 117 #undef __FUNCT__ 118 #define __FUNCT__ "MatCreate_MAIJ" 119 PetscErrorCode MatCreate_MAIJ(Mat A) 120 { 121 PetscErrorCode ierr; 122 Mat_MPIMAIJ *b; 123 124 PetscFunctionBegin; 125 ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 126 A->data = (void*)b; 127 ierr = PetscMemzero(b,sizeof(Mat_MPIMAIJ));CHKERRQ(ierr); 128 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 129 A->factor = 0; 130 A->mapping = 0; 131 132 b->AIJ = 0; 133 b->dof = 0; 134 b->OAIJ = 0; 135 b->ctx = 0; 136 b->w = 0; 137 PetscFunctionReturn(0); 138 } 139 EXTERN_C_END 140 141 /* --------------------------------------------------------------------------------------*/ 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatMult_SeqMAIJ_2" 144 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 145 { 146 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 147 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 148 PetscScalar *x,*y,*v,sum1, sum2; 149 PetscErrorCode ierr; 150 PetscInt m = b->AIJ->m,*idx,*ii; 151 PetscInt n,i,jrow,j; 152 153 PetscFunctionBegin; 154 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 155 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 156 idx = a->j; 157 v = a->a; 158 ii = a->i; 159 160 for (i=0; i<m; i++) { 161 jrow = ii[i]; 162 n = ii[i+1] - jrow; 163 sum1 = 0.0; 164 sum2 = 0.0; 165 for (j=0; j<n; j++) { 166 sum1 += v[jrow]*x[2*idx[jrow]]; 167 sum2 += v[jrow]*x[2*idx[jrow]+1]; 168 jrow++; 169 } 170 y[2*i] = sum1; 171 y[2*i+1] = sum2; 172 } 173 174 PetscLogFlops(4*a->nz - 2*m); 175 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 176 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 182 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 183 { 184 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 185 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 186 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 187 PetscErrorCode ierr; 188 PetscInt m = b->AIJ->m,n,i,*idx; 189 190 PetscFunctionBegin; 191 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 192 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 193 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 194 195 for (i=0; i<m; i++) { 196 idx = a->j + a->i[i] ; 197 v = a->a + a->i[i] ; 198 n = a->i[i+1] - a->i[i]; 199 alpha1 = x[2*i]; 200 alpha2 = x[2*i+1]; 201 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 202 } 203 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 204 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 205 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 206 PetscFunctionReturn(0); 207 } 208 209 #undef __FUNCT__ 210 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 211 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 212 { 213 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 214 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 215 PetscScalar *x,*y,*v,sum1, sum2; 216 PetscErrorCode ierr; 217 PetscInt m = b->AIJ->m,*idx,*ii; 218 PetscInt n,i,jrow,j; 219 220 PetscFunctionBegin; 221 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 222 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 223 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 224 idx = a->j; 225 v = a->a; 226 ii = a->i; 227 228 for (i=0; i<m; i++) { 229 jrow = ii[i]; 230 n = ii[i+1] - jrow; 231 sum1 = 0.0; 232 sum2 = 0.0; 233 for (j=0; j<n; j++) { 234 sum1 += v[jrow]*x[2*idx[jrow]]; 235 sum2 += v[jrow]*x[2*idx[jrow]+1]; 236 jrow++; 237 } 238 y[2*i] += sum1; 239 y[2*i+1] += sum2; 240 } 241 242 PetscLogFlops(4*a->nz - 2*m); 243 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 244 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 245 PetscFunctionReturn(0); 246 } 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 249 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 250 { 251 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 252 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 253 PetscScalar *x,*y,*v,alpha1,alpha2; 254 PetscErrorCode ierr; 255 PetscInt m = b->AIJ->m,n,i,*idx; 256 257 PetscFunctionBegin; 258 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 259 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 260 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 261 262 for (i=0; i<m; i++) { 263 idx = a->j + a->i[i] ; 264 v = a->a + a->i[i] ; 265 n = a->i[i+1] - a->i[i]; 266 alpha1 = x[2*i]; 267 alpha2 = x[2*i+1]; 268 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 269 } 270 PetscLogFlops(4*a->nz - 2*b->AIJ->n); 271 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 272 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 273 PetscFunctionReturn(0); 274 } 275 /* --------------------------------------------------------------------------------------*/ 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatMult_SeqMAIJ_3" 278 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 279 { 280 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 281 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 282 PetscScalar *x,*y,*v,sum1, sum2, sum3; 283 PetscErrorCode ierr; 284 PetscInt m = b->AIJ->m,*idx,*ii; 285 PetscInt n,i,jrow,j; 286 287 PetscFunctionBegin; 288 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 289 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 290 idx = a->j; 291 v = a->a; 292 ii = a->i; 293 294 for (i=0; i<m; i++) { 295 jrow = ii[i]; 296 n = ii[i+1] - jrow; 297 sum1 = 0.0; 298 sum2 = 0.0; 299 sum3 = 0.0; 300 for (j=0; j<n; j++) { 301 sum1 += v[jrow]*x[3*idx[jrow]]; 302 sum2 += v[jrow]*x[3*idx[jrow]+1]; 303 sum3 += v[jrow]*x[3*idx[jrow]+2]; 304 jrow++; 305 } 306 y[3*i] = sum1; 307 y[3*i+1] = sum2; 308 y[3*i+2] = sum3; 309 } 310 311 PetscLogFlops(6*a->nz - 3*m); 312 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 313 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 319 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 320 { 321 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 322 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 323 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 324 PetscErrorCode ierr; 325 PetscInt m = b->AIJ->m,n,i,*idx; 326 327 PetscFunctionBegin; 328 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 329 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 330 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 331 332 for (i=0; i<m; i++) { 333 idx = a->j + a->i[i]; 334 v = a->a + a->i[i]; 335 n = a->i[i+1] - a->i[i]; 336 alpha1 = x[3*i]; 337 alpha2 = x[3*i+1]; 338 alpha3 = x[3*i+2]; 339 while (n-->0) { 340 y[3*(*idx)] += alpha1*(*v); 341 y[3*(*idx)+1] += alpha2*(*v); 342 y[3*(*idx)+2] += alpha3*(*v); 343 idx++; v++; 344 } 345 } 346 PetscLogFlops(6*a->nz - 3*b->AIJ->n); 347 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 348 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 354 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 355 { 356 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 357 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 358 PetscScalar *x,*y,*v,sum1, sum2, sum3; 359 PetscErrorCode ierr; 360 PetscInt m = b->AIJ->m,*idx,*ii; 361 PetscInt n,i,jrow,j; 362 363 PetscFunctionBegin; 364 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 365 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 366 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 367 idx = a->j; 368 v = a->a; 369 ii = a->i; 370 371 for (i=0; i<m; i++) { 372 jrow = ii[i]; 373 n = ii[i+1] - jrow; 374 sum1 = 0.0; 375 sum2 = 0.0; 376 sum3 = 0.0; 377 for (j=0; j<n; j++) { 378 sum1 += v[jrow]*x[3*idx[jrow]]; 379 sum2 += v[jrow]*x[3*idx[jrow]+1]; 380 sum3 += v[jrow]*x[3*idx[jrow]+2]; 381 jrow++; 382 } 383 y[3*i] += sum1; 384 y[3*i+1] += sum2; 385 y[3*i+2] += sum3; 386 } 387 388 PetscLogFlops(6*a->nz); 389 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 390 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 391 PetscFunctionReturn(0); 392 } 393 #undef __FUNCT__ 394 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 395 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 396 { 397 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 398 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 399 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 400 PetscErrorCode ierr; 401 PetscInt m = b->AIJ->m,n,i,*idx; 402 403 PetscFunctionBegin; 404 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 405 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 406 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 407 for (i=0; i<m; i++) { 408 idx = a->j + a->i[i] ; 409 v = a->a + a->i[i] ; 410 n = a->i[i+1] - a->i[i]; 411 alpha1 = x[3*i]; 412 alpha2 = x[3*i+1]; 413 alpha3 = x[3*i+2]; 414 while (n-->0) { 415 y[3*(*idx)] += alpha1*(*v); 416 y[3*(*idx)+1] += alpha2*(*v); 417 y[3*(*idx)+2] += alpha3*(*v); 418 idx++; v++; 419 } 420 } 421 PetscLogFlops(6*a->nz); 422 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 423 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 424 PetscFunctionReturn(0); 425 } 426 427 /* ------------------------------------------------------------------------------*/ 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatMult_SeqMAIJ_4" 430 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 431 { 432 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 433 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 434 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 435 PetscErrorCode ierr; 436 PetscInt m = b->AIJ->m,*idx,*ii; 437 PetscInt n,i,jrow,j; 438 439 PetscFunctionBegin; 440 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 441 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 442 idx = a->j; 443 v = a->a; 444 ii = a->i; 445 446 for (i=0; i<m; i++) { 447 jrow = ii[i]; 448 n = ii[i+1] - jrow; 449 sum1 = 0.0; 450 sum2 = 0.0; 451 sum3 = 0.0; 452 sum4 = 0.0; 453 for (j=0; j<n; j++) { 454 sum1 += v[jrow]*x[4*idx[jrow]]; 455 sum2 += v[jrow]*x[4*idx[jrow]+1]; 456 sum3 += v[jrow]*x[4*idx[jrow]+2]; 457 sum4 += v[jrow]*x[4*idx[jrow]+3]; 458 jrow++; 459 } 460 y[4*i] = sum1; 461 y[4*i+1] = sum2; 462 y[4*i+2] = sum3; 463 y[4*i+3] = sum4; 464 } 465 466 PetscLogFlops(8*a->nz - 4*m); 467 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 468 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 474 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 475 { 476 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 477 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 478 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 479 PetscErrorCode ierr; 480 PetscInt m = b->AIJ->m,n,i,*idx; 481 482 PetscFunctionBegin; 483 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 484 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 485 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 486 for (i=0; i<m; i++) { 487 idx = a->j + a->i[i] ; 488 v = a->a + a->i[i] ; 489 n = a->i[i+1] - a->i[i]; 490 alpha1 = x[4*i]; 491 alpha2 = x[4*i+1]; 492 alpha3 = x[4*i+2]; 493 alpha4 = x[4*i+3]; 494 while (n-->0) { 495 y[4*(*idx)] += alpha1*(*v); 496 y[4*(*idx)+1] += alpha2*(*v); 497 y[4*(*idx)+2] += alpha3*(*v); 498 y[4*(*idx)+3] += alpha4*(*v); 499 idx++; v++; 500 } 501 } 502 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 503 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 504 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNCT__ 509 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 510 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 511 { 512 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 513 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 514 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 515 PetscErrorCode ierr; 516 PetscInt m = b->AIJ->m,*idx,*ii; 517 PetscInt n,i,jrow,j; 518 519 PetscFunctionBegin; 520 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 521 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 522 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 523 idx = a->j; 524 v = a->a; 525 ii = a->i; 526 527 for (i=0; i<m; i++) { 528 jrow = ii[i]; 529 n = ii[i+1] - jrow; 530 sum1 = 0.0; 531 sum2 = 0.0; 532 sum3 = 0.0; 533 sum4 = 0.0; 534 for (j=0; j<n; j++) { 535 sum1 += v[jrow]*x[4*idx[jrow]]; 536 sum2 += v[jrow]*x[4*idx[jrow]+1]; 537 sum3 += v[jrow]*x[4*idx[jrow]+2]; 538 sum4 += v[jrow]*x[4*idx[jrow]+3]; 539 jrow++; 540 } 541 y[4*i] += sum1; 542 y[4*i+1] += sum2; 543 y[4*i+2] += sum3; 544 y[4*i+3] += sum4; 545 } 546 547 PetscLogFlops(8*a->nz - 4*m); 548 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 549 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 550 PetscFunctionReturn(0); 551 } 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 554 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 555 { 556 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 557 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 558 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 559 PetscErrorCode ierr; 560 PetscInt m = b->AIJ->m,n,i,*idx; 561 562 PetscFunctionBegin; 563 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 564 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 565 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 566 567 for (i=0; i<m; i++) { 568 idx = a->j + a->i[i] ; 569 v = a->a + a->i[i] ; 570 n = a->i[i+1] - a->i[i]; 571 alpha1 = x[4*i]; 572 alpha2 = x[4*i+1]; 573 alpha3 = x[4*i+2]; 574 alpha4 = x[4*i+3]; 575 while (n-->0) { 576 y[4*(*idx)] += alpha1*(*v); 577 y[4*(*idx)+1] += alpha2*(*v); 578 y[4*(*idx)+2] += alpha3*(*v); 579 y[4*(*idx)+3] += alpha4*(*v); 580 idx++; v++; 581 } 582 } 583 PetscLogFlops(8*a->nz - 4*b->AIJ->n); 584 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 585 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 /* ------------------------------------------------------------------------------*/ 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatMult_SeqMAIJ_5" 592 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 593 { 594 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 595 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 596 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 597 PetscErrorCode ierr; 598 PetscInt m = b->AIJ->m,*idx,*ii; 599 PetscInt n,i,jrow,j; 600 601 PetscFunctionBegin; 602 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 603 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 604 idx = a->j; 605 v = a->a; 606 ii = a->i; 607 608 for (i=0; i<m; i++) { 609 jrow = ii[i]; 610 n = ii[i+1] - jrow; 611 sum1 = 0.0; 612 sum2 = 0.0; 613 sum3 = 0.0; 614 sum4 = 0.0; 615 sum5 = 0.0; 616 for (j=0; j<n; j++) { 617 sum1 += v[jrow]*x[5*idx[jrow]]; 618 sum2 += v[jrow]*x[5*idx[jrow]+1]; 619 sum3 += v[jrow]*x[5*idx[jrow]+2]; 620 sum4 += v[jrow]*x[5*idx[jrow]+3]; 621 sum5 += v[jrow]*x[5*idx[jrow]+4]; 622 jrow++; 623 } 624 y[5*i] = sum1; 625 y[5*i+1] = sum2; 626 y[5*i+2] = sum3; 627 y[5*i+3] = sum4; 628 y[5*i+4] = sum5; 629 } 630 631 PetscLogFlops(10*a->nz - 5*m); 632 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 633 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 634 PetscFunctionReturn(0); 635 } 636 637 #undef __FUNCT__ 638 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 639 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 640 { 641 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 642 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 643 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 644 PetscErrorCode ierr; 645 PetscInt m = b->AIJ->m,n,i,*idx; 646 647 PetscFunctionBegin; 648 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 649 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 650 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 651 652 for (i=0; i<m; i++) { 653 idx = a->j + a->i[i] ; 654 v = a->a + a->i[i] ; 655 n = a->i[i+1] - a->i[i]; 656 alpha1 = x[5*i]; 657 alpha2 = x[5*i+1]; 658 alpha3 = x[5*i+2]; 659 alpha4 = x[5*i+3]; 660 alpha5 = x[5*i+4]; 661 while (n-->0) { 662 y[5*(*idx)] += alpha1*(*v); 663 y[5*(*idx)+1] += alpha2*(*v); 664 y[5*(*idx)+2] += alpha3*(*v); 665 y[5*(*idx)+3] += alpha4*(*v); 666 y[5*(*idx)+4] += alpha5*(*v); 667 idx++; v++; 668 } 669 } 670 PetscLogFlops(10*a->nz - 5*b->AIJ->n); 671 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 672 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 678 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 679 { 680 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 681 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 682 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 683 PetscErrorCode ierr; 684 PetscInt m = b->AIJ->m,*idx,*ii; 685 PetscInt n,i,jrow,j; 686 687 PetscFunctionBegin; 688 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 689 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 690 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 691 idx = a->j; 692 v = a->a; 693 ii = a->i; 694 695 for (i=0; i<m; i++) { 696 jrow = ii[i]; 697 n = ii[i+1] - jrow; 698 sum1 = 0.0; 699 sum2 = 0.0; 700 sum3 = 0.0; 701 sum4 = 0.0; 702 sum5 = 0.0; 703 for (j=0; j<n; j++) { 704 sum1 += v[jrow]*x[5*idx[jrow]]; 705 sum2 += v[jrow]*x[5*idx[jrow]+1]; 706 sum3 += v[jrow]*x[5*idx[jrow]+2]; 707 sum4 += v[jrow]*x[5*idx[jrow]+3]; 708 sum5 += v[jrow]*x[5*idx[jrow]+4]; 709 jrow++; 710 } 711 y[5*i] += sum1; 712 y[5*i+1] += sum2; 713 y[5*i+2] += sum3; 714 y[5*i+3] += sum4; 715 y[5*i+4] += sum5; 716 } 717 718 PetscLogFlops(10*a->nz); 719 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 720 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 721 PetscFunctionReturn(0); 722 } 723 724 #undef __FUNCT__ 725 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 726 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 727 { 728 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 729 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 730 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 731 PetscErrorCode ierr; 732 PetscInt m = b->AIJ->m,n,i,*idx; 733 734 PetscFunctionBegin; 735 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 736 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 737 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 738 739 for (i=0; i<m; i++) { 740 idx = a->j + a->i[i] ; 741 v = a->a + a->i[i] ; 742 n = a->i[i+1] - a->i[i]; 743 alpha1 = x[5*i]; 744 alpha2 = x[5*i+1]; 745 alpha3 = x[5*i+2]; 746 alpha4 = x[5*i+3]; 747 alpha5 = x[5*i+4]; 748 while (n-->0) { 749 y[5*(*idx)] += alpha1*(*v); 750 y[5*(*idx)+1] += alpha2*(*v); 751 y[5*(*idx)+2] += alpha3*(*v); 752 y[5*(*idx)+3] += alpha4*(*v); 753 y[5*(*idx)+4] += alpha5*(*v); 754 idx++; v++; 755 } 756 } 757 PetscLogFlops(10*a->nz); 758 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 759 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 760 PetscFunctionReturn(0); 761 } 762 763 /* ------------------------------------------------------------------------------*/ 764 #undef __FUNCT__ 765 #define __FUNCT__ "MatMult_SeqMAIJ_6" 766 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 767 { 768 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 769 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 770 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 771 PetscErrorCode ierr; 772 PetscInt m = b->AIJ->m,*idx,*ii; 773 PetscInt n,i,jrow,j; 774 775 PetscFunctionBegin; 776 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 777 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 778 idx = a->j; 779 v = a->a; 780 ii = a->i; 781 782 for (i=0; i<m; i++) { 783 jrow = ii[i]; 784 n = ii[i+1] - jrow; 785 sum1 = 0.0; 786 sum2 = 0.0; 787 sum3 = 0.0; 788 sum4 = 0.0; 789 sum5 = 0.0; 790 sum6 = 0.0; 791 for (j=0; j<n; j++) { 792 sum1 += v[jrow]*x[6*idx[jrow]]; 793 sum2 += v[jrow]*x[6*idx[jrow]+1]; 794 sum3 += v[jrow]*x[6*idx[jrow]+2]; 795 sum4 += v[jrow]*x[6*idx[jrow]+3]; 796 sum5 += v[jrow]*x[6*idx[jrow]+4]; 797 sum6 += v[jrow]*x[6*idx[jrow]+5]; 798 jrow++; 799 } 800 y[6*i] = sum1; 801 y[6*i+1] = sum2; 802 y[6*i+2] = sum3; 803 y[6*i+3] = sum4; 804 y[6*i+4] = sum5; 805 y[6*i+5] = sum6; 806 } 807 808 PetscLogFlops(12*a->nz - 6*m); 809 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 810 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 811 PetscFunctionReturn(0); 812 } 813 814 #undef __FUNCT__ 815 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 816 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 817 { 818 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 819 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 820 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 821 PetscErrorCode ierr; 822 PetscInt m = b->AIJ->m,n,i,*idx; 823 824 PetscFunctionBegin; 825 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 826 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 827 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 828 829 for (i=0; i<m; i++) { 830 idx = a->j + a->i[i] ; 831 v = a->a + a->i[i] ; 832 n = a->i[i+1] - a->i[i]; 833 alpha1 = x[6*i]; 834 alpha2 = x[6*i+1]; 835 alpha3 = x[6*i+2]; 836 alpha4 = x[6*i+3]; 837 alpha5 = x[6*i+4]; 838 alpha6 = x[6*i+5]; 839 while (n-->0) { 840 y[6*(*idx)] += alpha1*(*v); 841 y[6*(*idx)+1] += alpha2*(*v); 842 y[6*(*idx)+2] += alpha3*(*v); 843 y[6*(*idx)+3] += alpha4*(*v); 844 y[6*(*idx)+4] += alpha5*(*v); 845 y[6*(*idx)+5] += alpha6*(*v); 846 idx++; v++; 847 } 848 } 849 PetscLogFlops(12*a->nz - 6*b->AIJ->n); 850 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 851 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 852 PetscFunctionReturn(0); 853 } 854 855 #undef __FUNCT__ 856 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 857 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 858 { 859 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 860 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 861 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 862 PetscErrorCode ierr; 863 PetscInt m = b->AIJ->m,*idx,*ii; 864 PetscInt n,i,jrow,j; 865 866 PetscFunctionBegin; 867 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 868 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 869 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 870 idx = a->j; 871 v = a->a; 872 ii = a->i; 873 874 for (i=0; i<m; i++) { 875 jrow = ii[i]; 876 n = ii[i+1] - jrow; 877 sum1 = 0.0; 878 sum2 = 0.0; 879 sum3 = 0.0; 880 sum4 = 0.0; 881 sum5 = 0.0; 882 sum6 = 0.0; 883 for (j=0; j<n; j++) { 884 sum1 += v[jrow]*x[6*idx[jrow]]; 885 sum2 += v[jrow]*x[6*idx[jrow]+1]; 886 sum3 += v[jrow]*x[6*idx[jrow]+2]; 887 sum4 += v[jrow]*x[6*idx[jrow]+3]; 888 sum5 += v[jrow]*x[6*idx[jrow]+4]; 889 sum6 += v[jrow]*x[6*idx[jrow]+5]; 890 jrow++; 891 } 892 y[6*i] += sum1; 893 y[6*i+1] += sum2; 894 y[6*i+2] += sum3; 895 y[6*i+3] += sum4; 896 y[6*i+4] += sum5; 897 y[6*i+5] += sum6; 898 } 899 900 PetscLogFlops(12*a->nz); 901 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 902 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } 905 906 #undef __FUNCT__ 907 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 908 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 909 { 910 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 911 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 912 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 913 PetscErrorCode ierr; 914 PetscInt m = b->AIJ->m,n,i,*idx; 915 916 PetscFunctionBegin; 917 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 918 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 919 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 920 921 for (i=0; i<m; i++) { 922 idx = a->j + a->i[i] ; 923 v = a->a + a->i[i] ; 924 n = a->i[i+1] - a->i[i]; 925 alpha1 = x[6*i]; 926 alpha2 = x[6*i+1]; 927 alpha3 = x[6*i+2]; 928 alpha4 = x[6*i+3]; 929 alpha5 = x[6*i+4]; 930 alpha6 = x[6*i+5]; 931 while (n-->0) { 932 y[6*(*idx)] += alpha1*(*v); 933 y[6*(*idx)+1] += alpha2*(*v); 934 y[6*(*idx)+2] += alpha3*(*v); 935 y[6*(*idx)+3] += alpha4*(*v); 936 y[6*(*idx)+4] += alpha5*(*v); 937 y[6*(*idx)+5] += alpha6*(*v); 938 idx++; v++; 939 } 940 } 941 PetscLogFlops(12*a->nz); 942 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 943 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 944 PetscFunctionReturn(0); 945 } 946 947 /* ------------------------------------------------------------------------------*/ 948 #undef __FUNCT__ 949 #define __FUNCT__ "MatMult_SeqMAIJ_8" 950 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 951 { 952 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 953 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 954 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 955 PetscErrorCode ierr; 956 PetscInt m = b->AIJ->m,*idx,*ii; 957 PetscInt n,i,jrow,j; 958 959 PetscFunctionBegin; 960 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 961 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 962 idx = a->j; 963 v = a->a; 964 ii = a->i; 965 966 for (i=0; i<m; i++) { 967 jrow = ii[i]; 968 n = ii[i+1] - jrow; 969 sum1 = 0.0; 970 sum2 = 0.0; 971 sum3 = 0.0; 972 sum4 = 0.0; 973 sum5 = 0.0; 974 sum6 = 0.0; 975 sum7 = 0.0; 976 sum8 = 0.0; 977 for (j=0; j<n; j++) { 978 sum1 += v[jrow]*x[8*idx[jrow]]; 979 sum2 += v[jrow]*x[8*idx[jrow]+1]; 980 sum3 += v[jrow]*x[8*idx[jrow]+2]; 981 sum4 += v[jrow]*x[8*idx[jrow]+3]; 982 sum5 += v[jrow]*x[8*idx[jrow]+4]; 983 sum6 += v[jrow]*x[8*idx[jrow]+5]; 984 sum7 += v[jrow]*x[8*idx[jrow]+6]; 985 sum8 += v[jrow]*x[8*idx[jrow]+7]; 986 jrow++; 987 } 988 y[8*i] = sum1; 989 y[8*i+1] = sum2; 990 y[8*i+2] = sum3; 991 y[8*i+3] = sum4; 992 y[8*i+4] = sum5; 993 y[8*i+5] = sum6; 994 y[8*i+6] = sum7; 995 y[8*i+7] = sum8; 996 } 997 998 PetscLogFlops(16*a->nz - 8*m); 999 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1000 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1006 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1007 { 1008 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1009 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1010 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1011 PetscErrorCode ierr; 1012 PetscInt m = b->AIJ->m,n,i,*idx; 1013 1014 PetscFunctionBegin; 1015 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1016 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1017 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1018 1019 for (i=0; i<m; i++) { 1020 idx = a->j + a->i[i] ; 1021 v = a->a + a->i[i] ; 1022 n = a->i[i+1] - a->i[i]; 1023 alpha1 = x[8*i]; 1024 alpha2 = x[8*i+1]; 1025 alpha3 = x[8*i+2]; 1026 alpha4 = x[8*i+3]; 1027 alpha5 = x[8*i+4]; 1028 alpha6 = x[8*i+5]; 1029 alpha7 = x[8*i+6]; 1030 alpha8 = x[8*i+7]; 1031 while (n-->0) { 1032 y[8*(*idx)] += alpha1*(*v); 1033 y[8*(*idx)+1] += alpha2*(*v); 1034 y[8*(*idx)+2] += alpha3*(*v); 1035 y[8*(*idx)+3] += alpha4*(*v); 1036 y[8*(*idx)+4] += alpha5*(*v); 1037 y[8*(*idx)+5] += alpha6*(*v); 1038 y[8*(*idx)+6] += alpha7*(*v); 1039 y[8*(*idx)+7] += alpha8*(*v); 1040 idx++; v++; 1041 } 1042 } 1043 PetscLogFlops(16*a->nz - 8*b->AIJ->n); 1044 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1045 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1046 PetscFunctionReturn(0); 1047 } 1048 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1051 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1052 { 1053 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1054 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1055 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1056 PetscErrorCode ierr; 1057 PetscInt m = b->AIJ->m,*idx,*ii; 1058 PetscInt n,i,jrow,j; 1059 1060 PetscFunctionBegin; 1061 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1062 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1063 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1064 idx = a->j; 1065 v = a->a; 1066 ii = a->i; 1067 1068 for (i=0; i<m; i++) { 1069 jrow = ii[i]; 1070 n = ii[i+1] - jrow; 1071 sum1 = 0.0; 1072 sum2 = 0.0; 1073 sum3 = 0.0; 1074 sum4 = 0.0; 1075 sum5 = 0.0; 1076 sum6 = 0.0; 1077 sum7 = 0.0; 1078 sum8 = 0.0; 1079 for (j=0; j<n; j++) { 1080 sum1 += v[jrow]*x[8*idx[jrow]]; 1081 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1082 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1083 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1084 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1085 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1086 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1087 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1088 jrow++; 1089 } 1090 y[8*i] += sum1; 1091 y[8*i+1] += sum2; 1092 y[8*i+2] += sum3; 1093 y[8*i+3] += sum4; 1094 y[8*i+4] += sum5; 1095 y[8*i+5] += sum6; 1096 y[8*i+6] += sum7; 1097 y[8*i+7] += sum8; 1098 } 1099 1100 PetscLogFlops(16*a->nz); 1101 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1102 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1108 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1109 { 1110 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1111 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1112 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1113 PetscErrorCode ierr; 1114 PetscInt m = b->AIJ->m,n,i,*idx; 1115 1116 PetscFunctionBegin; 1117 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1118 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1119 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1120 for (i=0; i<m; i++) { 1121 idx = a->j + a->i[i] ; 1122 v = a->a + a->i[i] ; 1123 n = a->i[i+1] - a->i[i]; 1124 alpha1 = x[8*i]; 1125 alpha2 = x[8*i+1]; 1126 alpha3 = x[8*i+2]; 1127 alpha4 = x[8*i+3]; 1128 alpha5 = x[8*i+4]; 1129 alpha6 = x[8*i+5]; 1130 alpha7 = x[8*i+6]; 1131 alpha8 = x[8*i+7]; 1132 while (n-->0) { 1133 y[8*(*idx)] += alpha1*(*v); 1134 y[8*(*idx)+1] += alpha2*(*v); 1135 y[8*(*idx)+2] += alpha3*(*v); 1136 y[8*(*idx)+3] += alpha4*(*v); 1137 y[8*(*idx)+4] += alpha5*(*v); 1138 y[8*(*idx)+5] += alpha6*(*v); 1139 y[8*(*idx)+6] += alpha7*(*v); 1140 y[8*(*idx)+7] += alpha8*(*v); 1141 idx++; v++; 1142 } 1143 } 1144 PetscLogFlops(16*a->nz); 1145 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1146 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 /* ------------------------------------------------------------------------------*/ 1151 #undef __FUNCT__ 1152 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1153 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1154 { 1155 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1156 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1157 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1158 PetscErrorCode ierr; 1159 PetscInt m = b->AIJ->m,*idx,*ii; 1160 PetscInt n,i,jrow,j; 1161 1162 PetscFunctionBegin; 1163 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1164 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1165 idx = a->j; 1166 v = a->a; 1167 ii = a->i; 1168 1169 for (i=0; i<m; i++) { 1170 jrow = ii[i]; 1171 n = ii[i+1] - jrow; 1172 sum1 = 0.0; 1173 sum2 = 0.0; 1174 sum3 = 0.0; 1175 sum4 = 0.0; 1176 sum5 = 0.0; 1177 sum6 = 0.0; 1178 sum7 = 0.0; 1179 sum8 = 0.0; 1180 sum9 = 0.0; 1181 for (j=0; j<n; j++) { 1182 sum1 += v[jrow]*x[9*idx[jrow]]; 1183 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1184 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1185 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1186 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1187 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1188 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1189 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1190 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1191 jrow++; 1192 } 1193 y[9*i] = sum1; 1194 y[9*i+1] = sum2; 1195 y[9*i+2] = sum3; 1196 y[9*i+3] = sum4; 1197 y[9*i+4] = sum5; 1198 y[9*i+5] = sum6; 1199 y[9*i+6] = sum7; 1200 y[9*i+7] = sum8; 1201 y[9*i+8] = sum9; 1202 } 1203 1204 PetscLogFlops(18*a->nz - 9*m); 1205 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1206 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1212 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1213 { 1214 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1215 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1216 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1217 PetscErrorCode ierr; 1218 PetscInt m = b->AIJ->m,n,i,*idx; 1219 1220 PetscFunctionBegin; 1221 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1222 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1223 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1224 1225 for (i=0; i<m; i++) { 1226 idx = a->j + a->i[i] ; 1227 v = a->a + a->i[i] ; 1228 n = a->i[i+1] - a->i[i]; 1229 alpha1 = x[9*i]; 1230 alpha2 = x[9*i+1]; 1231 alpha3 = x[9*i+2]; 1232 alpha4 = x[9*i+3]; 1233 alpha5 = x[9*i+4]; 1234 alpha6 = x[9*i+5]; 1235 alpha7 = x[9*i+6]; 1236 alpha8 = x[9*i+7]; 1237 alpha9 = x[9*i+8]; 1238 while (n-->0) { 1239 y[9*(*idx)] += alpha1*(*v); 1240 y[9*(*idx)+1] += alpha2*(*v); 1241 y[9*(*idx)+2] += alpha3*(*v); 1242 y[9*(*idx)+3] += alpha4*(*v); 1243 y[9*(*idx)+4] += alpha5*(*v); 1244 y[9*(*idx)+5] += alpha6*(*v); 1245 y[9*(*idx)+6] += alpha7*(*v); 1246 y[9*(*idx)+7] += alpha8*(*v); 1247 y[9*(*idx)+8] += alpha9*(*v); 1248 idx++; v++; 1249 } 1250 } 1251 PetscLogFlops(18*a->nz - 9*b->AIJ->n); 1252 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1253 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1259 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1260 { 1261 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1262 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1263 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1264 PetscErrorCode ierr; 1265 PetscInt m = b->AIJ->m,*idx,*ii; 1266 PetscInt n,i,jrow,j; 1267 1268 PetscFunctionBegin; 1269 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1270 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1271 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1272 idx = a->j; 1273 v = a->a; 1274 ii = a->i; 1275 1276 for (i=0; i<m; i++) { 1277 jrow = ii[i]; 1278 n = ii[i+1] - jrow; 1279 sum1 = 0.0; 1280 sum2 = 0.0; 1281 sum3 = 0.0; 1282 sum4 = 0.0; 1283 sum5 = 0.0; 1284 sum6 = 0.0; 1285 sum7 = 0.0; 1286 sum8 = 0.0; 1287 sum9 = 0.0; 1288 for (j=0; j<n; j++) { 1289 sum1 += v[jrow]*x[9*idx[jrow]]; 1290 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1291 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1292 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1293 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1294 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1295 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1296 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1297 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1298 jrow++; 1299 } 1300 y[9*i] += sum1; 1301 y[9*i+1] += sum2; 1302 y[9*i+2] += sum3; 1303 y[9*i+3] += sum4; 1304 y[9*i+4] += sum5; 1305 y[9*i+5] += sum6; 1306 y[9*i+6] += sum7; 1307 y[9*i+7] += sum8; 1308 y[9*i+8] += sum9; 1309 } 1310 1311 PetscLogFlops(18*a->nz); 1312 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1313 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1319 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1320 { 1321 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1322 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1323 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1324 PetscErrorCode ierr; 1325 PetscInt m = b->AIJ->m,n,i,*idx; 1326 1327 PetscFunctionBegin; 1328 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1329 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1330 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1331 for (i=0; i<m; i++) { 1332 idx = a->j + a->i[i] ; 1333 v = a->a + a->i[i] ; 1334 n = a->i[i+1] - a->i[i]; 1335 alpha1 = x[9*i]; 1336 alpha2 = x[9*i+1]; 1337 alpha3 = x[9*i+2]; 1338 alpha4 = x[9*i+3]; 1339 alpha5 = x[9*i+4]; 1340 alpha6 = x[9*i+5]; 1341 alpha7 = x[9*i+6]; 1342 alpha8 = x[9*i+7]; 1343 alpha9 = x[9*i+8]; 1344 while (n-->0) { 1345 y[9*(*idx)] += alpha1*(*v); 1346 y[9*(*idx)+1] += alpha2*(*v); 1347 y[9*(*idx)+2] += alpha3*(*v); 1348 y[9*(*idx)+3] += alpha4*(*v); 1349 y[9*(*idx)+4] += alpha5*(*v); 1350 y[9*(*idx)+5] += alpha6*(*v); 1351 y[9*(*idx)+6] += alpha7*(*v); 1352 y[9*(*idx)+7] += alpha8*(*v); 1353 y[9*(*idx)+8] += alpha9*(*v); 1354 idx++; v++; 1355 } 1356 } 1357 PetscLogFlops(18*a->nz); 1358 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1359 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } 1362 1363 /*--------------------------------------------------------------------------------------------*/ 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatMult_SeqMAIJ_16" 1366 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1367 { 1368 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1369 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1370 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1371 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1372 PetscErrorCode ierr; 1373 PetscInt m = b->AIJ->m,*idx,*ii; 1374 PetscInt n,i,jrow,j; 1375 1376 PetscFunctionBegin; 1377 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1378 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1379 idx = a->j; 1380 v = a->a; 1381 ii = a->i; 1382 1383 for (i=0; i<m; i++) { 1384 jrow = ii[i]; 1385 n = ii[i+1] - jrow; 1386 sum1 = 0.0; 1387 sum2 = 0.0; 1388 sum3 = 0.0; 1389 sum4 = 0.0; 1390 sum5 = 0.0; 1391 sum6 = 0.0; 1392 sum7 = 0.0; 1393 sum8 = 0.0; 1394 sum9 = 0.0; 1395 sum10 = 0.0; 1396 sum11 = 0.0; 1397 sum12 = 0.0; 1398 sum13 = 0.0; 1399 sum14 = 0.0; 1400 sum15 = 0.0; 1401 sum16 = 0.0; 1402 for (j=0; j<n; j++) { 1403 sum1 += v[jrow]*x[16*idx[jrow]]; 1404 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1405 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1406 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1407 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1408 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1409 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1410 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1411 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1412 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1413 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1414 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1415 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1416 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1417 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1418 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1419 jrow++; 1420 } 1421 y[16*i] = sum1; 1422 y[16*i+1] = sum2; 1423 y[16*i+2] = sum3; 1424 y[16*i+3] = sum4; 1425 y[16*i+4] = sum5; 1426 y[16*i+5] = sum6; 1427 y[16*i+6] = sum7; 1428 y[16*i+7] = sum8; 1429 y[16*i+8] = sum9; 1430 y[16*i+9] = sum10; 1431 y[16*i+10] = sum11; 1432 y[16*i+11] = sum12; 1433 y[16*i+12] = sum13; 1434 y[16*i+13] = sum14; 1435 y[16*i+14] = sum15; 1436 y[16*i+15] = sum16; 1437 } 1438 1439 PetscLogFlops(32*a->nz - 16*m); 1440 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1441 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1442 PetscFunctionReturn(0); 1443 } 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1447 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1448 { 1449 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1450 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1451 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1452 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1453 PetscErrorCode ierr; 1454 PetscInt m = b->AIJ->m,n,i,*idx; 1455 1456 PetscFunctionBegin; 1457 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 1458 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1459 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1460 1461 for (i=0; i<m; i++) { 1462 idx = a->j + a->i[i] ; 1463 v = a->a + a->i[i] ; 1464 n = a->i[i+1] - a->i[i]; 1465 alpha1 = x[16*i]; 1466 alpha2 = x[16*i+1]; 1467 alpha3 = x[16*i+2]; 1468 alpha4 = x[16*i+3]; 1469 alpha5 = x[16*i+4]; 1470 alpha6 = x[16*i+5]; 1471 alpha7 = x[16*i+6]; 1472 alpha8 = x[16*i+7]; 1473 alpha9 = x[16*i+8]; 1474 alpha10 = x[16*i+9]; 1475 alpha11 = x[16*i+10]; 1476 alpha12 = x[16*i+11]; 1477 alpha13 = x[16*i+12]; 1478 alpha14 = x[16*i+13]; 1479 alpha15 = x[16*i+14]; 1480 alpha16 = x[16*i+15]; 1481 while (n-->0) { 1482 y[16*(*idx)] += alpha1*(*v); 1483 y[16*(*idx)+1] += alpha2*(*v); 1484 y[16*(*idx)+2] += alpha3*(*v); 1485 y[16*(*idx)+3] += alpha4*(*v); 1486 y[16*(*idx)+4] += alpha5*(*v); 1487 y[16*(*idx)+5] += alpha6*(*v); 1488 y[16*(*idx)+6] += alpha7*(*v); 1489 y[16*(*idx)+7] += alpha8*(*v); 1490 y[16*(*idx)+8] += alpha9*(*v); 1491 y[16*(*idx)+9] += alpha10*(*v); 1492 y[16*(*idx)+10] += alpha11*(*v); 1493 y[16*(*idx)+11] += alpha12*(*v); 1494 y[16*(*idx)+12] += alpha13*(*v); 1495 y[16*(*idx)+13] += alpha14*(*v); 1496 y[16*(*idx)+14] += alpha15*(*v); 1497 y[16*(*idx)+15] += alpha16*(*v); 1498 idx++; v++; 1499 } 1500 } 1501 PetscLogFlops(32*a->nz - 16*b->AIJ->n); 1502 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1503 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1504 PetscFunctionReturn(0); 1505 } 1506 1507 #undef __FUNCT__ 1508 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1509 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1510 { 1511 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1512 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1513 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1514 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1515 PetscErrorCode ierr; 1516 PetscInt m = b->AIJ->m,*idx,*ii; 1517 PetscInt n,i,jrow,j; 1518 1519 PetscFunctionBegin; 1520 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1521 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1522 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1523 idx = a->j; 1524 v = a->a; 1525 ii = a->i; 1526 1527 for (i=0; i<m; i++) { 1528 jrow = ii[i]; 1529 n = ii[i+1] - jrow; 1530 sum1 = 0.0; 1531 sum2 = 0.0; 1532 sum3 = 0.0; 1533 sum4 = 0.0; 1534 sum5 = 0.0; 1535 sum6 = 0.0; 1536 sum7 = 0.0; 1537 sum8 = 0.0; 1538 sum9 = 0.0; 1539 sum10 = 0.0; 1540 sum11 = 0.0; 1541 sum12 = 0.0; 1542 sum13 = 0.0; 1543 sum14 = 0.0; 1544 sum15 = 0.0; 1545 sum16 = 0.0; 1546 for (j=0; j<n; j++) { 1547 sum1 += v[jrow]*x[16*idx[jrow]]; 1548 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1549 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1550 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1551 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1552 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1553 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1554 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1555 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1556 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1557 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1558 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1559 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1560 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1561 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1562 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1563 jrow++; 1564 } 1565 y[16*i] += sum1; 1566 y[16*i+1] += sum2; 1567 y[16*i+2] += sum3; 1568 y[16*i+3] += sum4; 1569 y[16*i+4] += sum5; 1570 y[16*i+5] += sum6; 1571 y[16*i+6] += sum7; 1572 y[16*i+7] += sum8; 1573 y[16*i+8] += sum9; 1574 y[16*i+9] += sum10; 1575 y[16*i+10] += sum11; 1576 y[16*i+11] += sum12; 1577 y[16*i+12] += sum13; 1578 y[16*i+13] += sum14; 1579 y[16*i+14] += sum15; 1580 y[16*i+15] += sum16; 1581 } 1582 1583 PetscLogFlops(32*a->nz); 1584 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1585 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 1591 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1592 { 1593 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1594 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1595 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1596 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1597 PetscErrorCode ierr; 1598 PetscInt m = b->AIJ->m,n,i,*idx; 1599 1600 PetscFunctionBegin; 1601 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1602 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1603 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1604 for (i=0; i<m; i++) { 1605 idx = a->j + a->i[i] ; 1606 v = a->a + a->i[i] ; 1607 n = a->i[i+1] - a->i[i]; 1608 alpha1 = x[16*i]; 1609 alpha2 = x[16*i+1]; 1610 alpha3 = x[16*i+2]; 1611 alpha4 = x[16*i+3]; 1612 alpha5 = x[16*i+4]; 1613 alpha6 = x[16*i+5]; 1614 alpha7 = x[16*i+6]; 1615 alpha8 = x[16*i+7]; 1616 alpha9 = x[16*i+8]; 1617 alpha10 = x[16*i+9]; 1618 alpha11 = x[16*i+10]; 1619 alpha12 = x[16*i+11]; 1620 alpha13 = x[16*i+12]; 1621 alpha14 = x[16*i+13]; 1622 alpha15 = x[16*i+14]; 1623 alpha16 = x[16*i+15]; 1624 while (n-->0) { 1625 y[16*(*idx)] += alpha1*(*v); 1626 y[16*(*idx)+1] += alpha2*(*v); 1627 y[16*(*idx)+2] += alpha3*(*v); 1628 y[16*(*idx)+3] += alpha4*(*v); 1629 y[16*(*idx)+4] += alpha5*(*v); 1630 y[16*(*idx)+5] += alpha6*(*v); 1631 y[16*(*idx)+6] += alpha7*(*v); 1632 y[16*(*idx)+7] += alpha8*(*v); 1633 y[16*(*idx)+8] += alpha9*(*v); 1634 y[16*(*idx)+9] += alpha10*(*v); 1635 y[16*(*idx)+10] += alpha11*(*v); 1636 y[16*(*idx)+11] += alpha12*(*v); 1637 y[16*(*idx)+12] += alpha13*(*v); 1638 y[16*(*idx)+13] += alpha14*(*v); 1639 y[16*(*idx)+14] += alpha15*(*v); 1640 y[16*(*idx)+15] += alpha16*(*v); 1641 idx++; v++; 1642 } 1643 } 1644 PetscLogFlops(32*a->nz); 1645 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1646 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1647 PetscFunctionReturn(0); 1648 } 1649 1650 /*===================================================================================*/ 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 1653 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1654 { 1655 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1656 PetscErrorCode ierr; 1657 1658 PetscFunctionBegin; 1659 /* start the scatter */ 1660 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1661 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 1662 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1663 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 1664 PetscFunctionReturn(0); 1665 } 1666 1667 #undef __FUNCT__ 1668 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 1669 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1670 { 1671 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1672 PetscErrorCode ierr; 1673 1674 PetscFunctionBegin; 1675 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1676 ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1677 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 1678 ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 1684 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1685 { 1686 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1687 PetscErrorCode ierr; 1688 1689 PetscFunctionBegin; 1690 /* start the scatter */ 1691 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1692 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1693 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1694 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 1700 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1701 { 1702 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1703 PetscErrorCode ierr; 1704 1705 PetscFunctionBegin; 1706 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1707 ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1708 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1709 ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1710 PetscFunctionReturn(0); 1711 } 1712 1713 #include "src/mat/impls/aij/seq/aij.h" 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatConvert_MAIJ_SeqAIJ" 1716 PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A,const MatType newtype,Mat *B) 1717 { 1718 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1719 Mat a = b->AIJ; 1720 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 1721 PetscErrorCode ierr; 1722 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii; 1723 const PetscInt *cols; 1724 const PetscScalar *vals; 1725 1726 PetscFunctionBegin; 1727 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 1728 ierr = PetscMalloc(4*m*sizeof(int),&ilen);CHKERRQ(ierr); 1729 for (i=0; i<m; i++) { 1730 nmax = PetscMax(nmax,aij->ilen[i]); 1731 for (j=0; j<4; j++) { 1732 ilen[4*i+j] = aij->ilen[i]; 1733 } 1734 } 1735 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,4*m,4*n,0,ilen,B);CHKERRQ(ierr); 1736 ierr = MatSetOption(*B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1737 ierr = PetscFree(ilen);CHKERRQ(ierr); 1738 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 1739 ii = 0; 1740 for (i=0; i<m; i++) { 1741 ierr = MatGetRow(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 1742 for (j=0; j<4; j++) { 1743 for (k=0; k<ncols; k++) { 1744 icols[k] = 4*cols[k]+j; 1745 } 1746 ierr = MatSetValues_SeqAIJ(*B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 1747 ii++; 1748 } 1749 ierr = MatRestoreRow(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 1750 } 1751 ierr = PetscFree(icols);CHKERRQ(ierr); 1752 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1753 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1754 PetscFunctionReturn(0); 1755 } 1756 1757 /* ---------------------------------------------------------------------------------- */ 1758 /*MC 1759 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 1760 operations for multicomponent problems. It interpolates each component the same 1761 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 1762 and MATMPIAIJ for distributed matrices. 1763 1764 Operations provided: 1765 . MatMult 1766 . MatMultTranspose 1767 . MatMultAdd 1768 . MatMultTransposeAdd 1769 1770 Level: advanced 1771 1772 M*/ 1773 #undef __FUNCT__ 1774 #define __FUNCT__ "MatCreateMAIJ" 1775 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 1776 { 1777 PetscErrorCode ierr; 1778 PetscMPIInt size; 1779 PetscInt n; 1780 Mat_MPIMAIJ *b; 1781 Mat B; 1782 1783 PetscFunctionBegin; 1784 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 1785 1786 if (dof == 1) { 1787 *maij = A; 1788 } else { 1789 ierr = MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);CHKERRQ(ierr); 1790 B->assembled = PETSC_TRUE; 1791 1792 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1793 if (size == 1) { 1794 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 1795 B->ops->destroy = MatDestroy_SeqMAIJ; 1796 b = (Mat_MPIMAIJ*)B->data; 1797 b->dof = dof; 1798 b->AIJ = A; 1799 if (dof == 2) { 1800 B->ops->mult = MatMult_SeqMAIJ_2; 1801 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1802 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1803 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1804 } else if (dof == 3) { 1805 B->ops->mult = MatMult_SeqMAIJ_3; 1806 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1807 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1808 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1809 } else if (dof == 4) { 1810 B->ops->mult = MatMult_SeqMAIJ_4; 1811 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1812 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1813 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1814 } else if (dof == 5) { 1815 B->ops->mult = MatMult_SeqMAIJ_5; 1816 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1817 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1818 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 1819 } else if (dof == 6) { 1820 B->ops->mult = MatMult_SeqMAIJ_6; 1821 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 1822 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 1823 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1824 } else if (dof == 8) { 1825 B->ops->mult = MatMult_SeqMAIJ_8; 1826 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 1827 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 1828 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 1829 } else if (dof == 9) { 1830 B->ops->mult = MatMult_SeqMAIJ_9; 1831 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 1832 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 1833 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 1834 } else if (dof == 16) { 1835 B->ops->mult = MatMult_SeqMAIJ_16; 1836 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 1837 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 1838 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1839 } else { 1840 SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 1841 } 1842 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 1843 } else { 1844 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1845 IS from,to; 1846 Vec gvec; 1847 PetscInt *garray,i; 1848 1849 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 1850 B->ops->destroy = MatDestroy_MPIMAIJ; 1851 b = (Mat_MPIMAIJ*)B->data; 1852 b->dof = dof; 1853 b->A = A; 1854 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 1855 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 1856 1857 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 1858 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 1859 1860 /* create two temporary Index sets for build scatter gather */ 1861 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 1862 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 1863 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 1864 ierr = PetscFree(garray);CHKERRQ(ierr); 1865 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 1866 1867 /* create temporary global vector to generate scatter context */ 1868 ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 1869 1870 /* generate the scatter context */ 1871 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 1872 1873 ierr = ISDestroy(from);CHKERRQ(ierr); 1874 ierr = ISDestroy(to);CHKERRQ(ierr); 1875 ierr = VecDestroy(gvec);CHKERRQ(ierr); 1876 1877 B->ops->mult = MatMult_MPIMAIJ_dof; 1878 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 1879 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 1880 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1881 } 1882 *maij = B; 1883 } 1884 PetscFunctionReturn(0); 1885 } 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898