1 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20 #include <../src/mat/utils/freespace.h> 21 22 /*@ 23 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 24 25 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 26 27 Input Parameter: 28 . A - the MAIJ matrix 29 30 Output Parameter: 31 . B - the AIJ matrix 32 33 Level: advanced 34 35 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 36 37 .seealso: MatCreateMAIJ() 38 @*/ 39 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 40 { 41 PetscErrorCode ierr; 42 PetscBool ismpimaij,isseqmaij; 43 44 PetscFunctionBegin; 45 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 46 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 47 if (ismpimaij) { 48 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 49 50 *B = b->A; 51 } else if (isseqmaij) { 52 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 53 54 *B = b->AIJ; 55 } else { 56 *B = A; 57 } 58 PetscFunctionReturn(0); 59 } 60 61 /*@C 62 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 63 64 Logically Collective 65 66 Input Parameter: 67 + A - the MAIJ matrix 68 - dof - the block size for the new matrix 69 70 Output Parameter: 71 . B - the new MAIJ matrix 72 73 Level: advanced 74 75 .seealso: MatCreateMAIJ() 76 @*/ 77 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 78 { 79 PetscErrorCode ierr; 80 Mat Aij = NULL; 81 82 PetscFunctionBegin; 83 PetscValidLogicalCollectiveInt(A,dof,2); 84 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 85 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 90 { 91 PetscErrorCode ierr; 92 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 93 94 PetscFunctionBegin; 95 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 96 ierr = PetscFree(A->data);CHKERRQ(ierr); 97 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr); 98 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr); 99 PetscFunctionReturn(0); 100 } 101 102 PetscErrorCode MatSetUp_MAIJ(Mat A) 103 { 104 PetscFunctionBegin; 105 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 106 PetscFunctionReturn(0); 107 } 108 109 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 110 { 111 PetscErrorCode ierr; 112 Mat B; 113 114 PetscFunctionBegin; 115 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 116 ierr = MatView(B,viewer);CHKERRQ(ierr); 117 ierr = MatDestroy(&B);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 122 { 123 PetscErrorCode ierr; 124 Mat B; 125 126 PetscFunctionBegin; 127 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 128 ierr = MatView(B,viewer);CHKERRQ(ierr); 129 ierr = MatDestroy(&B);CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 134 { 135 PetscErrorCode ierr; 136 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 137 138 PetscFunctionBegin; 139 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 140 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 141 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 142 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 143 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 144 ierr = PetscFree(A->data);CHKERRQ(ierr); 145 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr); 146 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr); 147 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 148 PetscFunctionReturn(0); 149 } 150 151 /*MC 152 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 153 multicomponent problems, interpolating or restricting each component the same way independently. 154 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 155 156 Operations provided: 157 . MatMult 158 . MatMultTranspose 159 . MatMultAdd 160 . MatMultTransposeAdd 161 162 Level: advanced 163 164 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 165 M*/ 166 167 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 168 { 169 PetscErrorCode ierr; 170 Mat_MPIMAIJ *b; 171 PetscMPIInt size; 172 173 PetscFunctionBegin; 174 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 175 A->data = (void*)b; 176 177 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 178 179 A->ops->setup = MatSetUp_MAIJ; 180 181 b->AIJ = 0; 182 b->dof = 0; 183 b->OAIJ = 0; 184 b->ctx = 0; 185 b->w = 0; 186 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 187 if (size == 1) { 188 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 189 } else { 190 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 191 } 192 A->preallocated = PETSC_TRUE; 193 A->assembled = PETSC_TRUE; 194 PetscFunctionReturn(0); 195 } 196 197 /* --------------------------------------------------------------------------------------*/ 198 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 199 { 200 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 201 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 202 const PetscScalar *x,*v; 203 PetscScalar *y, sum1, sum2; 204 PetscErrorCode ierr; 205 PetscInt nonzerorow=0,n,i,jrow,j; 206 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 207 208 PetscFunctionBegin; 209 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 210 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 211 idx = a->j; 212 v = a->a; 213 ii = a->i; 214 215 for (i=0; i<m; i++) { 216 jrow = ii[i]; 217 n = ii[i+1] - jrow; 218 sum1 = 0.0; 219 sum2 = 0.0; 220 221 nonzerorow += (n>0); 222 for (j=0; j<n; j++) { 223 sum1 += v[jrow]*x[2*idx[jrow]]; 224 sum2 += v[jrow]*x[2*idx[jrow]+1]; 225 jrow++; 226 } 227 y[2*i] = sum1; 228 y[2*i+1] = sum2; 229 } 230 231 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 232 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 233 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 234 PetscFunctionReturn(0); 235 } 236 237 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 238 { 239 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 240 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 241 const PetscScalar *x,*v; 242 PetscScalar *y,alpha1,alpha2; 243 PetscErrorCode ierr; 244 PetscInt n,i; 245 const PetscInt m = b->AIJ->rmap->n,*idx; 246 247 PetscFunctionBegin; 248 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 249 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 250 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 251 252 for (i=0; i<m; i++) { 253 idx = a->j + a->i[i]; 254 v = a->a + a->i[i]; 255 n = a->i[i+1] - a->i[i]; 256 alpha1 = x[2*i]; 257 alpha2 = x[2*i+1]; 258 while (n-->0) { 259 y[2*(*idx)] += alpha1*(*v); 260 y[2*(*idx)+1] += alpha2*(*v); 261 idx++; v++; 262 } 263 } 264 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 265 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 266 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 271 { 272 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 273 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 274 const PetscScalar *x,*v; 275 PetscScalar *y,sum1, sum2; 276 PetscErrorCode ierr; 277 PetscInt n,i,jrow,j; 278 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 279 280 PetscFunctionBegin; 281 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 282 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 283 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 284 idx = a->j; 285 v = a->a; 286 ii = a->i; 287 288 for (i=0; i<m; i++) { 289 jrow = ii[i]; 290 n = ii[i+1] - jrow; 291 sum1 = 0.0; 292 sum2 = 0.0; 293 for (j=0; j<n; j++) { 294 sum1 += v[jrow]*x[2*idx[jrow]]; 295 sum2 += v[jrow]*x[2*idx[jrow]+1]; 296 jrow++; 297 } 298 y[2*i] += sum1; 299 y[2*i+1] += sum2; 300 } 301 302 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 303 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 304 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 305 PetscFunctionReturn(0); 306 } 307 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 308 { 309 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 310 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 311 const PetscScalar *x,*v; 312 PetscScalar *y,alpha1,alpha2; 313 PetscErrorCode ierr; 314 PetscInt n,i; 315 const PetscInt m = b->AIJ->rmap->n,*idx; 316 317 PetscFunctionBegin; 318 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 319 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 320 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 321 322 for (i=0; i<m; i++) { 323 idx = a->j + a->i[i]; 324 v = a->a + a->i[i]; 325 n = a->i[i+1] - a->i[i]; 326 alpha1 = x[2*i]; 327 alpha2 = x[2*i+1]; 328 while (n-->0) { 329 y[2*(*idx)] += alpha1*(*v); 330 y[2*(*idx)+1] += alpha2*(*v); 331 idx++; v++; 332 } 333 } 334 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 335 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 336 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 337 PetscFunctionReturn(0); 338 } 339 /* --------------------------------------------------------------------------------------*/ 340 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 341 { 342 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 343 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 344 const PetscScalar *x,*v; 345 PetscScalar *y,sum1, sum2, sum3; 346 PetscErrorCode ierr; 347 PetscInt nonzerorow=0,n,i,jrow,j; 348 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 349 350 PetscFunctionBegin; 351 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 352 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 353 idx = a->j; 354 v = a->a; 355 ii = a->i; 356 357 for (i=0; i<m; i++) { 358 jrow = ii[i]; 359 n = ii[i+1] - jrow; 360 sum1 = 0.0; 361 sum2 = 0.0; 362 sum3 = 0.0; 363 364 nonzerorow += (n>0); 365 for (j=0; j<n; j++) { 366 sum1 += v[jrow]*x[3*idx[jrow]]; 367 sum2 += v[jrow]*x[3*idx[jrow]+1]; 368 sum3 += v[jrow]*x[3*idx[jrow]+2]; 369 jrow++; 370 } 371 y[3*i] = sum1; 372 y[3*i+1] = sum2; 373 y[3*i+2] = sum3; 374 } 375 376 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 377 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 378 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 379 PetscFunctionReturn(0); 380 } 381 382 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 383 { 384 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 385 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 386 const PetscScalar *x,*v; 387 PetscScalar *y,alpha1,alpha2,alpha3; 388 PetscErrorCode ierr; 389 PetscInt n,i; 390 const PetscInt m = b->AIJ->rmap->n,*idx; 391 392 PetscFunctionBegin; 393 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 394 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 395 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 396 397 for (i=0; i<m; i++) { 398 idx = a->j + a->i[i]; 399 v = a->a + a->i[i]; 400 n = a->i[i+1] - a->i[i]; 401 alpha1 = x[3*i]; 402 alpha2 = x[3*i+1]; 403 alpha3 = x[3*i+2]; 404 while (n-->0) { 405 y[3*(*idx)] += alpha1*(*v); 406 y[3*(*idx)+1] += alpha2*(*v); 407 y[3*(*idx)+2] += alpha3*(*v); 408 idx++; v++; 409 } 410 } 411 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 412 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 413 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 418 { 419 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 420 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 421 const PetscScalar *x,*v; 422 PetscScalar *y,sum1, sum2, sum3; 423 PetscErrorCode ierr; 424 PetscInt n,i,jrow,j; 425 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 426 427 PetscFunctionBegin; 428 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 429 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 430 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 431 idx = a->j; 432 v = a->a; 433 ii = a->i; 434 435 for (i=0; i<m; i++) { 436 jrow = ii[i]; 437 n = ii[i+1] - jrow; 438 sum1 = 0.0; 439 sum2 = 0.0; 440 sum3 = 0.0; 441 for (j=0; j<n; j++) { 442 sum1 += v[jrow]*x[3*idx[jrow]]; 443 sum2 += v[jrow]*x[3*idx[jrow]+1]; 444 sum3 += v[jrow]*x[3*idx[jrow]+2]; 445 jrow++; 446 } 447 y[3*i] += sum1; 448 y[3*i+1] += sum2; 449 y[3*i+2] += sum3; 450 } 451 452 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 453 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 454 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 455 PetscFunctionReturn(0); 456 } 457 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 458 { 459 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 460 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 461 const PetscScalar *x,*v; 462 PetscScalar *y,alpha1,alpha2,alpha3; 463 PetscErrorCode ierr; 464 PetscInt n,i; 465 const PetscInt m = b->AIJ->rmap->n,*idx; 466 467 PetscFunctionBegin; 468 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 469 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 470 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 471 for (i=0; i<m; i++) { 472 idx = a->j + a->i[i]; 473 v = a->a + a->i[i]; 474 n = a->i[i+1] - a->i[i]; 475 alpha1 = x[3*i]; 476 alpha2 = x[3*i+1]; 477 alpha3 = x[3*i+2]; 478 while (n-->0) { 479 y[3*(*idx)] += alpha1*(*v); 480 y[3*(*idx)+1] += alpha2*(*v); 481 y[3*(*idx)+2] += alpha3*(*v); 482 idx++; v++; 483 } 484 } 485 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 486 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 487 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 /* ------------------------------------------------------------------------------*/ 492 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 493 { 494 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 495 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 496 const PetscScalar *x,*v; 497 PetscScalar *y,sum1, sum2, sum3, sum4; 498 PetscErrorCode ierr; 499 PetscInt nonzerorow=0,n,i,jrow,j; 500 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 501 502 PetscFunctionBegin; 503 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 504 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 505 idx = a->j; 506 v = a->a; 507 ii = a->i; 508 509 for (i=0; i<m; i++) { 510 jrow = ii[i]; 511 n = ii[i+1] - jrow; 512 sum1 = 0.0; 513 sum2 = 0.0; 514 sum3 = 0.0; 515 sum4 = 0.0; 516 nonzerorow += (n>0); 517 for (j=0; j<n; j++) { 518 sum1 += v[jrow]*x[4*idx[jrow]]; 519 sum2 += v[jrow]*x[4*idx[jrow]+1]; 520 sum3 += v[jrow]*x[4*idx[jrow]+2]; 521 sum4 += v[jrow]*x[4*idx[jrow]+3]; 522 jrow++; 523 } 524 y[4*i] = sum1; 525 y[4*i+1] = sum2; 526 y[4*i+2] = sum3; 527 y[4*i+3] = sum4; 528 } 529 530 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 531 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 532 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 537 { 538 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 540 const PetscScalar *x,*v; 541 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 542 PetscErrorCode ierr; 543 PetscInt n,i; 544 const PetscInt m = b->AIJ->rmap->n,*idx; 545 546 PetscFunctionBegin; 547 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 548 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 549 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 550 for (i=0; i<m; i++) { 551 idx = a->j + a->i[i]; 552 v = a->a + a->i[i]; 553 n = a->i[i+1] - a->i[i]; 554 alpha1 = x[4*i]; 555 alpha2 = x[4*i+1]; 556 alpha3 = x[4*i+2]; 557 alpha4 = x[4*i+3]; 558 while (n-->0) { 559 y[4*(*idx)] += alpha1*(*v); 560 y[4*(*idx)+1] += alpha2*(*v); 561 y[4*(*idx)+2] += alpha3*(*v); 562 y[4*(*idx)+3] += alpha4*(*v); 563 idx++; v++; 564 } 565 } 566 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 567 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 568 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 569 PetscFunctionReturn(0); 570 } 571 572 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 573 { 574 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 575 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 576 const PetscScalar *x,*v; 577 PetscScalar *y,sum1, sum2, sum3, sum4; 578 PetscErrorCode ierr; 579 PetscInt n,i,jrow,j; 580 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 581 582 PetscFunctionBegin; 583 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 584 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 585 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 586 idx = a->j; 587 v = a->a; 588 ii = a->i; 589 590 for (i=0; i<m; i++) { 591 jrow = ii[i]; 592 n = ii[i+1] - jrow; 593 sum1 = 0.0; 594 sum2 = 0.0; 595 sum3 = 0.0; 596 sum4 = 0.0; 597 for (j=0; j<n; j++) { 598 sum1 += v[jrow]*x[4*idx[jrow]]; 599 sum2 += v[jrow]*x[4*idx[jrow]+1]; 600 sum3 += v[jrow]*x[4*idx[jrow]+2]; 601 sum4 += v[jrow]*x[4*idx[jrow]+3]; 602 jrow++; 603 } 604 y[4*i] += sum1; 605 y[4*i+1] += sum2; 606 y[4*i+2] += sum3; 607 y[4*i+3] += sum4; 608 } 609 610 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 611 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 612 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 613 PetscFunctionReturn(0); 614 } 615 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 616 { 617 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 618 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 619 const PetscScalar *x,*v; 620 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 621 PetscErrorCode ierr; 622 PetscInt n,i; 623 const PetscInt m = b->AIJ->rmap->n,*idx; 624 625 PetscFunctionBegin; 626 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 627 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 628 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 629 630 for (i=0; i<m; i++) { 631 idx = a->j + a->i[i]; 632 v = a->a + a->i[i]; 633 n = a->i[i+1] - a->i[i]; 634 alpha1 = x[4*i]; 635 alpha2 = x[4*i+1]; 636 alpha3 = x[4*i+2]; 637 alpha4 = x[4*i+3]; 638 while (n-->0) { 639 y[4*(*idx)] += alpha1*(*v); 640 y[4*(*idx)+1] += alpha2*(*v); 641 y[4*(*idx)+2] += alpha3*(*v); 642 y[4*(*idx)+3] += alpha4*(*v); 643 idx++; v++; 644 } 645 } 646 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 647 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 648 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 649 PetscFunctionReturn(0); 650 } 651 /* ------------------------------------------------------------------------------*/ 652 653 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 654 { 655 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 656 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 657 const PetscScalar *x,*v; 658 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 659 PetscErrorCode ierr; 660 PetscInt nonzerorow=0,n,i,jrow,j; 661 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 662 663 PetscFunctionBegin; 664 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 665 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 666 idx = a->j; 667 v = a->a; 668 ii = a->i; 669 670 for (i=0; i<m; i++) { 671 jrow = ii[i]; 672 n = ii[i+1] - jrow; 673 sum1 = 0.0; 674 sum2 = 0.0; 675 sum3 = 0.0; 676 sum4 = 0.0; 677 sum5 = 0.0; 678 679 nonzerorow += (n>0); 680 for (j=0; j<n; j++) { 681 sum1 += v[jrow]*x[5*idx[jrow]]; 682 sum2 += v[jrow]*x[5*idx[jrow]+1]; 683 sum3 += v[jrow]*x[5*idx[jrow]+2]; 684 sum4 += v[jrow]*x[5*idx[jrow]+3]; 685 sum5 += v[jrow]*x[5*idx[jrow]+4]; 686 jrow++; 687 } 688 y[5*i] = sum1; 689 y[5*i+1] = sum2; 690 y[5*i+2] = sum3; 691 y[5*i+3] = sum4; 692 y[5*i+4] = sum5; 693 } 694 695 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 696 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 697 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 698 PetscFunctionReturn(0); 699 } 700 701 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 702 { 703 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 704 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 705 const PetscScalar *x,*v; 706 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 707 PetscErrorCode ierr; 708 PetscInt n,i; 709 const PetscInt m = b->AIJ->rmap->n,*idx; 710 711 PetscFunctionBegin; 712 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 713 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 714 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 715 716 for (i=0; i<m; i++) { 717 idx = a->j + a->i[i]; 718 v = a->a + a->i[i]; 719 n = a->i[i+1] - a->i[i]; 720 alpha1 = x[5*i]; 721 alpha2 = x[5*i+1]; 722 alpha3 = x[5*i+2]; 723 alpha4 = x[5*i+3]; 724 alpha5 = x[5*i+4]; 725 while (n-->0) { 726 y[5*(*idx)] += alpha1*(*v); 727 y[5*(*idx)+1] += alpha2*(*v); 728 y[5*(*idx)+2] += alpha3*(*v); 729 y[5*(*idx)+3] += alpha4*(*v); 730 y[5*(*idx)+4] += alpha5*(*v); 731 idx++; v++; 732 } 733 } 734 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 735 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 736 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 741 { 742 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 743 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 744 const PetscScalar *x,*v; 745 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 746 PetscErrorCode ierr; 747 PetscInt n,i,jrow,j; 748 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 749 750 PetscFunctionBegin; 751 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 752 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 753 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 754 idx = a->j; 755 v = a->a; 756 ii = a->i; 757 758 for (i=0; i<m; i++) { 759 jrow = ii[i]; 760 n = ii[i+1] - jrow; 761 sum1 = 0.0; 762 sum2 = 0.0; 763 sum3 = 0.0; 764 sum4 = 0.0; 765 sum5 = 0.0; 766 for (j=0; j<n; j++) { 767 sum1 += v[jrow]*x[5*idx[jrow]]; 768 sum2 += v[jrow]*x[5*idx[jrow]+1]; 769 sum3 += v[jrow]*x[5*idx[jrow]+2]; 770 sum4 += v[jrow]*x[5*idx[jrow]+3]; 771 sum5 += v[jrow]*x[5*idx[jrow]+4]; 772 jrow++; 773 } 774 y[5*i] += sum1; 775 y[5*i+1] += sum2; 776 y[5*i+2] += sum3; 777 y[5*i+3] += sum4; 778 y[5*i+4] += sum5; 779 } 780 781 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 782 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 783 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 784 PetscFunctionReturn(0); 785 } 786 787 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 788 { 789 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 790 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 791 const PetscScalar *x,*v; 792 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 793 PetscErrorCode ierr; 794 PetscInt n,i; 795 const PetscInt m = b->AIJ->rmap->n,*idx; 796 797 PetscFunctionBegin; 798 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 799 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 800 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 801 802 for (i=0; i<m; i++) { 803 idx = a->j + a->i[i]; 804 v = a->a + a->i[i]; 805 n = a->i[i+1] - a->i[i]; 806 alpha1 = x[5*i]; 807 alpha2 = x[5*i+1]; 808 alpha3 = x[5*i+2]; 809 alpha4 = x[5*i+3]; 810 alpha5 = x[5*i+4]; 811 while (n-->0) { 812 y[5*(*idx)] += alpha1*(*v); 813 y[5*(*idx)+1] += alpha2*(*v); 814 y[5*(*idx)+2] += alpha3*(*v); 815 y[5*(*idx)+3] += alpha4*(*v); 816 y[5*(*idx)+4] += alpha5*(*v); 817 idx++; v++; 818 } 819 } 820 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 821 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 822 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 823 PetscFunctionReturn(0); 824 } 825 826 /* ------------------------------------------------------------------------------*/ 827 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 828 { 829 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 830 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 831 const PetscScalar *x,*v; 832 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 833 PetscErrorCode ierr; 834 PetscInt nonzerorow=0,n,i,jrow,j; 835 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 836 837 PetscFunctionBegin; 838 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 839 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 840 idx = a->j; 841 v = a->a; 842 ii = a->i; 843 844 for (i=0; i<m; i++) { 845 jrow = ii[i]; 846 n = ii[i+1] - jrow; 847 sum1 = 0.0; 848 sum2 = 0.0; 849 sum3 = 0.0; 850 sum4 = 0.0; 851 sum5 = 0.0; 852 sum6 = 0.0; 853 854 nonzerorow += (n>0); 855 for (j=0; j<n; j++) { 856 sum1 += v[jrow]*x[6*idx[jrow]]; 857 sum2 += v[jrow]*x[6*idx[jrow]+1]; 858 sum3 += v[jrow]*x[6*idx[jrow]+2]; 859 sum4 += v[jrow]*x[6*idx[jrow]+3]; 860 sum5 += v[jrow]*x[6*idx[jrow]+4]; 861 sum6 += v[jrow]*x[6*idx[jrow]+5]; 862 jrow++; 863 } 864 y[6*i] = sum1; 865 y[6*i+1] = sum2; 866 y[6*i+2] = sum3; 867 y[6*i+3] = sum4; 868 y[6*i+4] = sum5; 869 y[6*i+5] = sum6; 870 } 871 872 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 873 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 874 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 879 { 880 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 881 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 882 const PetscScalar *x,*v; 883 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 884 PetscErrorCode ierr; 885 PetscInt n,i; 886 const PetscInt m = b->AIJ->rmap->n,*idx; 887 888 PetscFunctionBegin; 889 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 890 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 891 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 892 893 for (i=0; i<m; i++) { 894 idx = a->j + a->i[i]; 895 v = a->a + a->i[i]; 896 n = a->i[i+1] - a->i[i]; 897 alpha1 = x[6*i]; 898 alpha2 = x[6*i+1]; 899 alpha3 = x[6*i+2]; 900 alpha4 = x[6*i+3]; 901 alpha5 = x[6*i+4]; 902 alpha6 = x[6*i+5]; 903 while (n-->0) { 904 y[6*(*idx)] += alpha1*(*v); 905 y[6*(*idx)+1] += alpha2*(*v); 906 y[6*(*idx)+2] += alpha3*(*v); 907 y[6*(*idx)+3] += alpha4*(*v); 908 y[6*(*idx)+4] += alpha5*(*v); 909 y[6*(*idx)+5] += alpha6*(*v); 910 idx++; v++; 911 } 912 } 913 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 914 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 915 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 916 PetscFunctionReturn(0); 917 } 918 919 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 920 { 921 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 922 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 923 const PetscScalar *x,*v; 924 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 925 PetscErrorCode ierr; 926 PetscInt n,i,jrow,j; 927 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 928 929 PetscFunctionBegin; 930 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 931 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 932 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 933 idx = a->j; 934 v = a->a; 935 ii = a->i; 936 937 for (i=0; i<m; i++) { 938 jrow = ii[i]; 939 n = ii[i+1] - jrow; 940 sum1 = 0.0; 941 sum2 = 0.0; 942 sum3 = 0.0; 943 sum4 = 0.0; 944 sum5 = 0.0; 945 sum6 = 0.0; 946 for (j=0; j<n; j++) { 947 sum1 += v[jrow]*x[6*idx[jrow]]; 948 sum2 += v[jrow]*x[6*idx[jrow]+1]; 949 sum3 += v[jrow]*x[6*idx[jrow]+2]; 950 sum4 += v[jrow]*x[6*idx[jrow]+3]; 951 sum5 += v[jrow]*x[6*idx[jrow]+4]; 952 sum6 += v[jrow]*x[6*idx[jrow]+5]; 953 jrow++; 954 } 955 y[6*i] += sum1; 956 y[6*i+1] += sum2; 957 y[6*i+2] += sum3; 958 y[6*i+3] += sum4; 959 y[6*i+4] += sum5; 960 y[6*i+5] += sum6; 961 } 962 963 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 964 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 965 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 970 { 971 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 972 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 973 const PetscScalar *x,*v; 974 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 975 PetscErrorCode ierr; 976 PetscInt n,i; 977 const PetscInt m = b->AIJ->rmap->n,*idx; 978 979 PetscFunctionBegin; 980 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 981 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 982 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 983 984 for (i=0; i<m; i++) { 985 idx = a->j + a->i[i]; 986 v = a->a + a->i[i]; 987 n = a->i[i+1] - a->i[i]; 988 alpha1 = x[6*i]; 989 alpha2 = x[6*i+1]; 990 alpha3 = x[6*i+2]; 991 alpha4 = x[6*i+3]; 992 alpha5 = x[6*i+4]; 993 alpha6 = x[6*i+5]; 994 while (n-->0) { 995 y[6*(*idx)] += alpha1*(*v); 996 y[6*(*idx)+1] += alpha2*(*v); 997 y[6*(*idx)+2] += alpha3*(*v); 998 y[6*(*idx)+3] += alpha4*(*v); 999 y[6*(*idx)+4] += alpha5*(*v); 1000 y[6*(*idx)+5] += alpha6*(*v); 1001 idx++; v++; 1002 } 1003 } 1004 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1005 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1006 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1007 PetscFunctionReturn(0); 1008 } 1009 1010 /* ------------------------------------------------------------------------------*/ 1011 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1012 { 1013 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1014 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1015 const PetscScalar *x,*v; 1016 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1017 PetscErrorCode ierr; 1018 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1019 PetscInt nonzerorow=0,n,i,jrow,j; 1020 1021 PetscFunctionBegin; 1022 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1023 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1024 idx = a->j; 1025 v = a->a; 1026 ii = a->i; 1027 1028 for (i=0; i<m; i++) { 1029 jrow = ii[i]; 1030 n = ii[i+1] - jrow; 1031 sum1 = 0.0; 1032 sum2 = 0.0; 1033 sum3 = 0.0; 1034 sum4 = 0.0; 1035 sum5 = 0.0; 1036 sum6 = 0.0; 1037 sum7 = 0.0; 1038 1039 nonzerorow += (n>0); 1040 for (j=0; j<n; j++) { 1041 sum1 += v[jrow]*x[7*idx[jrow]]; 1042 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1043 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1044 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1045 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1046 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1047 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1048 jrow++; 1049 } 1050 y[7*i] = sum1; 1051 y[7*i+1] = sum2; 1052 y[7*i+2] = sum3; 1053 y[7*i+3] = sum4; 1054 y[7*i+4] = sum5; 1055 y[7*i+5] = sum6; 1056 y[7*i+6] = sum7; 1057 } 1058 1059 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1061 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1066 { 1067 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1068 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1069 const PetscScalar *x,*v; 1070 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1071 PetscErrorCode ierr; 1072 const PetscInt m = b->AIJ->rmap->n,*idx; 1073 PetscInt n,i; 1074 1075 PetscFunctionBegin; 1076 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1077 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1078 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1079 1080 for (i=0; i<m; i++) { 1081 idx = a->j + a->i[i]; 1082 v = a->a + a->i[i]; 1083 n = a->i[i+1] - a->i[i]; 1084 alpha1 = x[7*i]; 1085 alpha2 = x[7*i+1]; 1086 alpha3 = x[7*i+2]; 1087 alpha4 = x[7*i+3]; 1088 alpha5 = x[7*i+4]; 1089 alpha6 = x[7*i+5]; 1090 alpha7 = x[7*i+6]; 1091 while (n-->0) { 1092 y[7*(*idx)] += alpha1*(*v); 1093 y[7*(*idx)+1] += alpha2*(*v); 1094 y[7*(*idx)+2] += alpha3*(*v); 1095 y[7*(*idx)+3] += alpha4*(*v); 1096 y[7*(*idx)+4] += alpha5*(*v); 1097 y[7*(*idx)+5] += alpha6*(*v); 1098 y[7*(*idx)+6] += alpha7*(*v); 1099 idx++; v++; 1100 } 1101 } 1102 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1103 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1104 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1109 { 1110 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1111 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1112 const PetscScalar *x,*v; 1113 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1114 PetscErrorCode ierr; 1115 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1116 PetscInt n,i,jrow,j; 1117 1118 PetscFunctionBegin; 1119 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1120 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1121 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1122 idx = a->j; 1123 v = a->a; 1124 ii = a->i; 1125 1126 for (i=0; i<m; i++) { 1127 jrow = ii[i]; 1128 n = ii[i+1] - jrow; 1129 sum1 = 0.0; 1130 sum2 = 0.0; 1131 sum3 = 0.0; 1132 sum4 = 0.0; 1133 sum5 = 0.0; 1134 sum6 = 0.0; 1135 sum7 = 0.0; 1136 for (j=0; j<n; j++) { 1137 sum1 += v[jrow]*x[7*idx[jrow]]; 1138 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1139 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1140 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1141 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1142 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1143 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1144 jrow++; 1145 } 1146 y[7*i] += sum1; 1147 y[7*i+1] += sum2; 1148 y[7*i+2] += sum3; 1149 y[7*i+3] += sum4; 1150 y[7*i+4] += sum5; 1151 y[7*i+5] += sum6; 1152 y[7*i+6] += sum7; 1153 } 1154 1155 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1156 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1157 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1162 { 1163 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1164 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1165 const PetscScalar *x,*v; 1166 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1167 PetscErrorCode ierr; 1168 const PetscInt m = b->AIJ->rmap->n,*idx; 1169 PetscInt n,i; 1170 1171 PetscFunctionBegin; 1172 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1173 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1174 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1175 for (i=0; i<m; i++) { 1176 idx = a->j + a->i[i]; 1177 v = a->a + a->i[i]; 1178 n = a->i[i+1] - a->i[i]; 1179 alpha1 = x[7*i]; 1180 alpha2 = x[7*i+1]; 1181 alpha3 = x[7*i+2]; 1182 alpha4 = x[7*i+3]; 1183 alpha5 = x[7*i+4]; 1184 alpha6 = x[7*i+5]; 1185 alpha7 = x[7*i+6]; 1186 while (n-->0) { 1187 y[7*(*idx)] += alpha1*(*v); 1188 y[7*(*idx)+1] += alpha2*(*v); 1189 y[7*(*idx)+2] += alpha3*(*v); 1190 y[7*(*idx)+3] += alpha4*(*v); 1191 y[7*(*idx)+4] += alpha5*(*v); 1192 y[7*(*idx)+5] += alpha6*(*v); 1193 y[7*(*idx)+6] += alpha7*(*v); 1194 idx++; v++; 1195 } 1196 } 1197 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1198 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1199 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1204 { 1205 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1206 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1207 const PetscScalar *x,*v; 1208 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1209 PetscErrorCode ierr; 1210 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1211 PetscInt nonzerorow=0,n,i,jrow,j; 1212 1213 PetscFunctionBegin; 1214 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1215 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1216 idx = a->j; 1217 v = a->a; 1218 ii = a->i; 1219 1220 for (i=0; i<m; i++) { 1221 jrow = ii[i]; 1222 n = ii[i+1] - jrow; 1223 sum1 = 0.0; 1224 sum2 = 0.0; 1225 sum3 = 0.0; 1226 sum4 = 0.0; 1227 sum5 = 0.0; 1228 sum6 = 0.0; 1229 sum7 = 0.0; 1230 sum8 = 0.0; 1231 1232 nonzerorow += (n>0); 1233 for (j=0; j<n; j++) { 1234 sum1 += v[jrow]*x[8*idx[jrow]]; 1235 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1236 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1237 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1238 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1239 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1240 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1241 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1242 jrow++; 1243 } 1244 y[8*i] = sum1; 1245 y[8*i+1] = sum2; 1246 y[8*i+2] = sum3; 1247 y[8*i+3] = sum4; 1248 y[8*i+4] = sum5; 1249 y[8*i+5] = sum6; 1250 y[8*i+6] = sum7; 1251 y[8*i+7] = sum8; 1252 } 1253 1254 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1255 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1256 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1261 { 1262 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1263 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1264 const PetscScalar *x,*v; 1265 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1266 PetscErrorCode ierr; 1267 const PetscInt m = b->AIJ->rmap->n,*idx; 1268 PetscInt n,i; 1269 1270 PetscFunctionBegin; 1271 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1272 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1273 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1274 1275 for (i=0; i<m; i++) { 1276 idx = a->j + a->i[i]; 1277 v = a->a + a->i[i]; 1278 n = a->i[i+1] - a->i[i]; 1279 alpha1 = x[8*i]; 1280 alpha2 = x[8*i+1]; 1281 alpha3 = x[8*i+2]; 1282 alpha4 = x[8*i+3]; 1283 alpha5 = x[8*i+4]; 1284 alpha6 = x[8*i+5]; 1285 alpha7 = x[8*i+6]; 1286 alpha8 = x[8*i+7]; 1287 while (n-->0) { 1288 y[8*(*idx)] += alpha1*(*v); 1289 y[8*(*idx)+1] += alpha2*(*v); 1290 y[8*(*idx)+2] += alpha3*(*v); 1291 y[8*(*idx)+3] += alpha4*(*v); 1292 y[8*(*idx)+4] += alpha5*(*v); 1293 y[8*(*idx)+5] += alpha6*(*v); 1294 y[8*(*idx)+6] += alpha7*(*v); 1295 y[8*(*idx)+7] += alpha8*(*v); 1296 idx++; v++; 1297 } 1298 } 1299 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1300 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1301 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1306 { 1307 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1308 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1309 const PetscScalar *x,*v; 1310 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1311 PetscErrorCode ierr; 1312 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1313 PetscInt n,i,jrow,j; 1314 1315 PetscFunctionBegin; 1316 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1317 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1318 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1319 idx = a->j; 1320 v = a->a; 1321 ii = a->i; 1322 1323 for (i=0; i<m; i++) { 1324 jrow = ii[i]; 1325 n = ii[i+1] - jrow; 1326 sum1 = 0.0; 1327 sum2 = 0.0; 1328 sum3 = 0.0; 1329 sum4 = 0.0; 1330 sum5 = 0.0; 1331 sum6 = 0.0; 1332 sum7 = 0.0; 1333 sum8 = 0.0; 1334 for (j=0; j<n; j++) { 1335 sum1 += v[jrow]*x[8*idx[jrow]]; 1336 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1337 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1338 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1339 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1340 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1341 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1342 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1343 jrow++; 1344 } 1345 y[8*i] += sum1; 1346 y[8*i+1] += sum2; 1347 y[8*i+2] += sum3; 1348 y[8*i+3] += sum4; 1349 y[8*i+4] += sum5; 1350 y[8*i+5] += sum6; 1351 y[8*i+6] += sum7; 1352 y[8*i+7] += sum8; 1353 } 1354 1355 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1356 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1357 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1358 PetscFunctionReturn(0); 1359 } 1360 1361 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1362 { 1363 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1364 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1365 const PetscScalar *x,*v; 1366 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1367 PetscErrorCode ierr; 1368 const PetscInt m = b->AIJ->rmap->n,*idx; 1369 PetscInt n,i; 1370 1371 PetscFunctionBegin; 1372 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1373 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1374 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1375 for (i=0; i<m; i++) { 1376 idx = a->j + a->i[i]; 1377 v = a->a + a->i[i]; 1378 n = a->i[i+1] - a->i[i]; 1379 alpha1 = x[8*i]; 1380 alpha2 = x[8*i+1]; 1381 alpha3 = x[8*i+2]; 1382 alpha4 = x[8*i+3]; 1383 alpha5 = x[8*i+4]; 1384 alpha6 = x[8*i+5]; 1385 alpha7 = x[8*i+6]; 1386 alpha8 = x[8*i+7]; 1387 while (n-->0) { 1388 y[8*(*idx)] += alpha1*(*v); 1389 y[8*(*idx)+1] += alpha2*(*v); 1390 y[8*(*idx)+2] += alpha3*(*v); 1391 y[8*(*idx)+3] += alpha4*(*v); 1392 y[8*(*idx)+4] += alpha5*(*v); 1393 y[8*(*idx)+5] += alpha6*(*v); 1394 y[8*(*idx)+6] += alpha7*(*v); 1395 y[8*(*idx)+7] += alpha8*(*v); 1396 idx++; v++; 1397 } 1398 } 1399 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1400 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1401 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1402 PetscFunctionReturn(0); 1403 } 1404 1405 /* ------------------------------------------------------------------------------*/ 1406 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1407 { 1408 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1409 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1410 const PetscScalar *x,*v; 1411 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1412 PetscErrorCode ierr; 1413 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1414 PetscInt nonzerorow=0,n,i,jrow,j; 1415 1416 PetscFunctionBegin; 1417 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1418 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1419 idx = a->j; 1420 v = a->a; 1421 ii = a->i; 1422 1423 for (i=0; i<m; i++) { 1424 jrow = ii[i]; 1425 n = ii[i+1] - jrow; 1426 sum1 = 0.0; 1427 sum2 = 0.0; 1428 sum3 = 0.0; 1429 sum4 = 0.0; 1430 sum5 = 0.0; 1431 sum6 = 0.0; 1432 sum7 = 0.0; 1433 sum8 = 0.0; 1434 sum9 = 0.0; 1435 1436 nonzerorow += (n>0); 1437 for (j=0; j<n; j++) { 1438 sum1 += v[jrow]*x[9*idx[jrow]]; 1439 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1440 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1441 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1442 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1443 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1444 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1445 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1446 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1447 jrow++; 1448 } 1449 y[9*i] = sum1; 1450 y[9*i+1] = sum2; 1451 y[9*i+2] = sum3; 1452 y[9*i+3] = sum4; 1453 y[9*i+4] = sum5; 1454 y[9*i+5] = sum6; 1455 y[9*i+6] = sum7; 1456 y[9*i+7] = sum8; 1457 y[9*i+8] = sum9; 1458 } 1459 1460 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1461 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1462 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1463 PetscFunctionReturn(0); 1464 } 1465 1466 /* ------------------------------------------------------------------------------*/ 1467 1468 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1469 { 1470 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1471 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1472 const PetscScalar *x,*v; 1473 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1474 PetscErrorCode ierr; 1475 const PetscInt m = b->AIJ->rmap->n,*idx; 1476 PetscInt n,i; 1477 1478 PetscFunctionBegin; 1479 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1480 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1481 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1482 1483 for (i=0; i<m; i++) { 1484 idx = a->j + a->i[i]; 1485 v = a->a + a->i[i]; 1486 n = a->i[i+1] - a->i[i]; 1487 alpha1 = x[9*i]; 1488 alpha2 = x[9*i+1]; 1489 alpha3 = x[9*i+2]; 1490 alpha4 = x[9*i+3]; 1491 alpha5 = x[9*i+4]; 1492 alpha6 = x[9*i+5]; 1493 alpha7 = x[9*i+6]; 1494 alpha8 = x[9*i+7]; 1495 alpha9 = x[9*i+8]; 1496 while (n-->0) { 1497 y[9*(*idx)] += alpha1*(*v); 1498 y[9*(*idx)+1] += alpha2*(*v); 1499 y[9*(*idx)+2] += alpha3*(*v); 1500 y[9*(*idx)+3] += alpha4*(*v); 1501 y[9*(*idx)+4] += alpha5*(*v); 1502 y[9*(*idx)+5] += alpha6*(*v); 1503 y[9*(*idx)+6] += alpha7*(*v); 1504 y[9*(*idx)+7] += alpha8*(*v); 1505 y[9*(*idx)+8] += alpha9*(*v); 1506 idx++; v++; 1507 } 1508 } 1509 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1510 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1511 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1512 PetscFunctionReturn(0); 1513 } 1514 1515 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1516 { 1517 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1518 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1519 const PetscScalar *x,*v; 1520 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1521 PetscErrorCode ierr; 1522 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1523 PetscInt n,i,jrow,j; 1524 1525 PetscFunctionBegin; 1526 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1527 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1528 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1529 idx = a->j; 1530 v = a->a; 1531 ii = a->i; 1532 1533 for (i=0; i<m; i++) { 1534 jrow = ii[i]; 1535 n = ii[i+1] - jrow; 1536 sum1 = 0.0; 1537 sum2 = 0.0; 1538 sum3 = 0.0; 1539 sum4 = 0.0; 1540 sum5 = 0.0; 1541 sum6 = 0.0; 1542 sum7 = 0.0; 1543 sum8 = 0.0; 1544 sum9 = 0.0; 1545 for (j=0; j<n; j++) { 1546 sum1 += v[jrow]*x[9*idx[jrow]]; 1547 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1548 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1549 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1550 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1551 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1552 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1553 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1554 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1555 jrow++; 1556 } 1557 y[9*i] += sum1; 1558 y[9*i+1] += sum2; 1559 y[9*i+2] += sum3; 1560 y[9*i+3] += sum4; 1561 y[9*i+4] += sum5; 1562 y[9*i+5] += sum6; 1563 y[9*i+6] += sum7; 1564 y[9*i+7] += sum8; 1565 y[9*i+8] += sum9; 1566 } 1567 1568 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1569 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1570 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1571 PetscFunctionReturn(0); 1572 } 1573 1574 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1575 { 1576 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1577 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1578 const PetscScalar *x,*v; 1579 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1580 PetscErrorCode ierr; 1581 const PetscInt m = b->AIJ->rmap->n,*idx; 1582 PetscInt n,i; 1583 1584 PetscFunctionBegin; 1585 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1586 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1587 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1588 for (i=0; i<m; i++) { 1589 idx = a->j + a->i[i]; 1590 v = a->a + a->i[i]; 1591 n = a->i[i+1] - a->i[i]; 1592 alpha1 = x[9*i]; 1593 alpha2 = x[9*i+1]; 1594 alpha3 = x[9*i+2]; 1595 alpha4 = x[9*i+3]; 1596 alpha5 = x[9*i+4]; 1597 alpha6 = x[9*i+5]; 1598 alpha7 = x[9*i+6]; 1599 alpha8 = x[9*i+7]; 1600 alpha9 = x[9*i+8]; 1601 while (n-->0) { 1602 y[9*(*idx)] += alpha1*(*v); 1603 y[9*(*idx)+1] += alpha2*(*v); 1604 y[9*(*idx)+2] += alpha3*(*v); 1605 y[9*(*idx)+3] += alpha4*(*v); 1606 y[9*(*idx)+4] += alpha5*(*v); 1607 y[9*(*idx)+5] += alpha6*(*v); 1608 y[9*(*idx)+6] += alpha7*(*v); 1609 y[9*(*idx)+7] += alpha8*(*v); 1610 y[9*(*idx)+8] += alpha9*(*v); 1611 idx++; v++; 1612 } 1613 } 1614 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1615 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1616 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1617 PetscFunctionReturn(0); 1618 } 1619 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1620 { 1621 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1622 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1623 const PetscScalar *x,*v; 1624 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1625 PetscErrorCode ierr; 1626 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1627 PetscInt nonzerorow=0,n,i,jrow,j; 1628 1629 PetscFunctionBegin; 1630 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1631 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1632 idx = a->j; 1633 v = a->a; 1634 ii = a->i; 1635 1636 for (i=0; i<m; i++) { 1637 jrow = ii[i]; 1638 n = ii[i+1] - jrow; 1639 sum1 = 0.0; 1640 sum2 = 0.0; 1641 sum3 = 0.0; 1642 sum4 = 0.0; 1643 sum5 = 0.0; 1644 sum6 = 0.0; 1645 sum7 = 0.0; 1646 sum8 = 0.0; 1647 sum9 = 0.0; 1648 sum10 = 0.0; 1649 1650 nonzerorow += (n>0); 1651 for (j=0; j<n; j++) { 1652 sum1 += v[jrow]*x[10*idx[jrow]]; 1653 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1654 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1655 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1656 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1657 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1658 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1659 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1660 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1661 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1662 jrow++; 1663 } 1664 y[10*i] = sum1; 1665 y[10*i+1] = sum2; 1666 y[10*i+2] = sum3; 1667 y[10*i+3] = sum4; 1668 y[10*i+4] = sum5; 1669 y[10*i+5] = sum6; 1670 y[10*i+6] = sum7; 1671 y[10*i+7] = sum8; 1672 y[10*i+8] = sum9; 1673 y[10*i+9] = sum10; 1674 } 1675 1676 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1677 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1678 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1679 PetscFunctionReturn(0); 1680 } 1681 1682 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1683 { 1684 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1685 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1686 const PetscScalar *x,*v; 1687 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1688 PetscErrorCode ierr; 1689 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1690 PetscInt n,i,jrow,j; 1691 1692 PetscFunctionBegin; 1693 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1694 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1695 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1696 idx = a->j; 1697 v = a->a; 1698 ii = a->i; 1699 1700 for (i=0; i<m; i++) { 1701 jrow = ii[i]; 1702 n = ii[i+1] - jrow; 1703 sum1 = 0.0; 1704 sum2 = 0.0; 1705 sum3 = 0.0; 1706 sum4 = 0.0; 1707 sum5 = 0.0; 1708 sum6 = 0.0; 1709 sum7 = 0.0; 1710 sum8 = 0.0; 1711 sum9 = 0.0; 1712 sum10 = 0.0; 1713 for (j=0; j<n; j++) { 1714 sum1 += v[jrow]*x[10*idx[jrow]]; 1715 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1716 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1717 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1718 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1719 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1720 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1721 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1722 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1723 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1724 jrow++; 1725 } 1726 y[10*i] += sum1; 1727 y[10*i+1] += sum2; 1728 y[10*i+2] += sum3; 1729 y[10*i+3] += sum4; 1730 y[10*i+4] += sum5; 1731 y[10*i+5] += sum6; 1732 y[10*i+6] += sum7; 1733 y[10*i+7] += sum8; 1734 y[10*i+8] += sum9; 1735 y[10*i+9] += sum10; 1736 } 1737 1738 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1739 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1740 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1741 PetscFunctionReturn(0); 1742 } 1743 1744 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1745 { 1746 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1747 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1748 const PetscScalar *x,*v; 1749 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1750 PetscErrorCode ierr; 1751 const PetscInt m = b->AIJ->rmap->n,*idx; 1752 PetscInt n,i; 1753 1754 PetscFunctionBegin; 1755 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1756 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1757 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1758 1759 for (i=0; i<m; i++) { 1760 idx = a->j + a->i[i]; 1761 v = a->a + a->i[i]; 1762 n = a->i[i+1] - a->i[i]; 1763 alpha1 = x[10*i]; 1764 alpha2 = x[10*i+1]; 1765 alpha3 = x[10*i+2]; 1766 alpha4 = x[10*i+3]; 1767 alpha5 = x[10*i+4]; 1768 alpha6 = x[10*i+5]; 1769 alpha7 = x[10*i+6]; 1770 alpha8 = x[10*i+7]; 1771 alpha9 = x[10*i+8]; 1772 alpha10 = x[10*i+9]; 1773 while (n-->0) { 1774 y[10*(*idx)] += alpha1*(*v); 1775 y[10*(*idx)+1] += alpha2*(*v); 1776 y[10*(*idx)+2] += alpha3*(*v); 1777 y[10*(*idx)+3] += alpha4*(*v); 1778 y[10*(*idx)+4] += alpha5*(*v); 1779 y[10*(*idx)+5] += alpha6*(*v); 1780 y[10*(*idx)+6] += alpha7*(*v); 1781 y[10*(*idx)+7] += alpha8*(*v); 1782 y[10*(*idx)+8] += alpha9*(*v); 1783 y[10*(*idx)+9] += alpha10*(*v); 1784 idx++; v++; 1785 } 1786 } 1787 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1788 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1789 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1790 PetscFunctionReturn(0); 1791 } 1792 1793 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1794 { 1795 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1796 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1797 const PetscScalar *x,*v; 1798 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1799 PetscErrorCode ierr; 1800 const PetscInt m = b->AIJ->rmap->n,*idx; 1801 PetscInt n,i; 1802 1803 PetscFunctionBegin; 1804 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1805 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1806 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1807 for (i=0; i<m; i++) { 1808 idx = a->j + a->i[i]; 1809 v = a->a + a->i[i]; 1810 n = a->i[i+1] - a->i[i]; 1811 alpha1 = x[10*i]; 1812 alpha2 = x[10*i+1]; 1813 alpha3 = x[10*i+2]; 1814 alpha4 = x[10*i+3]; 1815 alpha5 = x[10*i+4]; 1816 alpha6 = x[10*i+5]; 1817 alpha7 = x[10*i+6]; 1818 alpha8 = x[10*i+7]; 1819 alpha9 = x[10*i+8]; 1820 alpha10 = x[10*i+9]; 1821 while (n-->0) { 1822 y[10*(*idx)] += alpha1*(*v); 1823 y[10*(*idx)+1] += alpha2*(*v); 1824 y[10*(*idx)+2] += alpha3*(*v); 1825 y[10*(*idx)+3] += alpha4*(*v); 1826 y[10*(*idx)+4] += alpha5*(*v); 1827 y[10*(*idx)+5] += alpha6*(*v); 1828 y[10*(*idx)+6] += alpha7*(*v); 1829 y[10*(*idx)+7] += alpha8*(*v); 1830 y[10*(*idx)+8] += alpha9*(*v); 1831 y[10*(*idx)+9] += alpha10*(*v); 1832 idx++; v++; 1833 } 1834 } 1835 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1836 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1837 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1838 PetscFunctionReturn(0); 1839 } 1840 1841 1842 /*--------------------------------------------------------------------------------------------*/ 1843 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1844 { 1845 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1846 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1847 const PetscScalar *x,*v; 1848 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1849 PetscErrorCode ierr; 1850 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1851 PetscInt nonzerorow=0,n,i,jrow,j; 1852 1853 PetscFunctionBegin; 1854 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1855 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1856 idx = a->j; 1857 v = a->a; 1858 ii = a->i; 1859 1860 for (i=0; i<m; i++) { 1861 jrow = ii[i]; 1862 n = ii[i+1] - jrow; 1863 sum1 = 0.0; 1864 sum2 = 0.0; 1865 sum3 = 0.0; 1866 sum4 = 0.0; 1867 sum5 = 0.0; 1868 sum6 = 0.0; 1869 sum7 = 0.0; 1870 sum8 = 0.0; 1871 sum9 = 0.0; 1872 sum10 = 0.0; 1873 sum11 = 0.0; 1874 1875 nonzerorow += (n>0); 1876 for (j=0; j<n; j++) { 1877 sum1 += v[jrow]*x[11*idx[jrow]]; 1878 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1879 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1880 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1881 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1882 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1883 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1884 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1885 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1886 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1887 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1888 jrow++; 1889 } 1890 y[11*i] = sum1; 1891 y[11*i+1] = sum2; 1892 y[11*i+2] = sum3; 1893 y[11*i+3] = sum4; 1894 y[11*i+4] = sum5; 1895 y[11*i+5] = sum6; 1896 y[11*i+6] = sum7; 1897 y[11*i+7] = sum8; 1898 y[11*i+8] = sum9; 1899 y[11*i+9] = sum10; 1900 y[11*i+10] = sum11; 1901 } 1902 1903 ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1904 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1905 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1910 { 1911 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1912 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1913 const PetscScalar *x,*v; 1914 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1915 PetscErrorCode ierr; 1916 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1917 PetscInt n,i,jrow,j; 1918 1919 PetscFunctionBegin; 1920 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1921 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1922 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1923 idx = a->j; 1924 v = a->a; 1925 ii = a->i; 1926 1927 for (i=0; i<m; i++) { 1928 jrow = ii[i]; 1929 n = ii[i+1] - jrow; 1930 sum1 = 0.0; 1931 sum2 = 0.0; 1932 sum3 = 0.0; 1933 sum4 = 0.0; 1934 sum5 = 0.0; 1935 sum6 = 0.0; 1936 sum7 = 0.0; 1937 sum8 = 0.0; 1938 sum9 = 0.0; 1939 sum10 = 0.0; 1940 sum11 = 0.0; 1941 for (j=0; j<n; j++) { 1942 sum1 += v[jrow]*x[11*idx[jrow]]; 1943 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1944 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1945 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1946 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1947 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1948 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1949 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1950 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1951 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1952 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1953 jrow++; 1954 } 1955 y[11*i] += sum1; 1956 y[11*i+1] += sum2; 1957 y[11*i+2] += sum3; 1958 y[11*i+3] += sum4; 1959 y[11*i+4] += sum5; 1960 y[11*i+5] += sum6; 1961 y[11*i+6] += sum7; 1962 y[11*i+7] += sum8; 1963 y[11*i+8] += sum9; 1964 y[11*i+9] += sum10; 1965 y[11*i+10] += sum11; 1966 } 1967 1968 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 1969 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1970 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1971 PetscFunctionReturn(0); 1972 } 1973 1974 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1975 { 1976 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1977 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1978 const PetscScalar *x,*v; 1979 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1980 PetscErrorCode ierr; 1981 const PetscInt m = b->AIJ->rmap->n,*idx; 1982 PetscInt n,i; 1983 1984 PetscFunctionBegin; 1985 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1986 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1987 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1988 1989 for (i=0; i<m; i++) { 1990 idx = a->j + a->i[i]; 1991 v = a->a + a->i[i]; 1992 n = a->i[i+1] - a->i[i]; 1993 alpha1 = x[11*i]; 1994 alpha2 = x[11*i+1]; 1995 alpha3 = x[11*i+2]; 1996 alpha4 = x[11*i+3]; 1997 alpha5 = x[11*i+4]; 1998 alpha6 = x[11*i+5]; 1999 alpha7 = x[11*i+6]; 2000 alpha8 = x[11*i+7]; 2001 alpha9 = x[11*i+8]; 2002 alpha10 = x[11*i+9]; 2003 alpha11 = x[11*i+10]; 2004 while (n-->0) { 2005 y[11*(*idx)] += alpha1*(*v); 2006 y[11*(*idx)+1] += alpha2*(*v); 2007 y[11*(*idx)+2] += alpha3*(*v); 2008 y[11*(*idx)+3] += alpha4*(*v); 2009 y[11*(*idx)+4] += alpha5*(*v); 2010 y[11*(*idx)+5] += alpha6*(*v); 2011 y[11*(*idx)+6] += alpha7*(*v); 2012 y[11*(*idx)+7] += alpha8*(*v); 2013 y[11*(*idx)+8] += alpha9*(*v); 2014 y[11*(*idx)+9] += alpha10*(*v); 2015 y[11*(*idx)+10] += alpha11*(*v); 2016 idx++; v++; 2017 } 2018 } 2019 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2020 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2021 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2022 PetscFunctionReturn(0); 2023 } 2024 2025 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2026 { 2027 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2028 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2029 const PetscScalar *x,*v; 2030 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2031 PetscErrorCode ierr; 2032 const PetscInt m = b->AIJ->rmap->n,*idx; 2033 PetscInt n,i; 2034 2035 PetscFunctionBegin; 2036 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2037 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2038 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2039 for (i=0; i<m; i++) { 2040 idx = a->j + a->i[i]; 2041 v = a->a + a->i[i]; 2042 n = a->i[i+1] - a->i[i]; 2043 alpha1 = x[11*i]; 2044 alpha2 = x[11*i+1]; 2045 alpha3 = x[11*i+2]; 2046 alpha4 = x[11*i+3]; 2047 alpha5 = x[11*i+4]; 2048 alpha6 = x[11*i+5]; 2049 alpha7 = x[11*i+6]; 2050 alpha8 = x[11*i+7]; 2051 alpha9 = x[11*i+8]; 2052 alpha10 = x[11*i+9]; 2053 alpha11 = x[11*i+10]; 2054 while (n-->0) { 2055 y[11*(*idx)] += alpha1*(*v); 2056 y[11*(*idx)+1] += alpha2*(*v); 2057 y[11*(*idx)+2] += alpha3*(*v); 2058 y[11*(*idx)+3] += alpha4*(*v); 2059 y[11*(*idx)+4] += alpha5*(*v); 2060 y[11*(*idx)+5] += alpha6*(*v); 2061 y[11*(*idx)+6] += alpha7*(*v); 2062 y[11*(*idx)+7] += alpha8*(*v); 2063 y[11*(*idx)+8] += alpha9*(*v); 2064 y[11*(*idx)+9] += alpha10*(*v); 2065 y[11*(*idx)+10] += alpha11*(*v); 2066 idx++; v++; 2067 } 2068 } 2069 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2070 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2071 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2072 PetscFunctionReturn(0); 2073 } 2074 2075 2076 /*--------------------------------------------------------------------------------------------*/ 2077 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2078 { 2079 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2080 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2081 const PetscScalar *x,*v; 2082 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2083 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2084 PetscErrorCode ierr; 2085 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2086 PetscInt nonzerorow=0,n,i,jrow,j; 2087 2088 PetscFunctionBegin; 2089 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2090 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2091 idx = a->j; 2092 v = a->a; 2093 ii = a->i; 2094 2095 for (i=0; i<m; i++) { 2096 jrow = ii[i]; 2097 n = ii[i+1] - jrow; 2098 sum1 = 0.0; 2099 sum2 = 0.0; 2100 sum3 = 0.0; 2101 sum4 = 0.0; 2102 sum5 = 0.0; 2103 sum6 = 0.0; 2104 sum7 = 0.0; 2105 sum8 = 0.0; 2106 sum9 = 0.0; 2107 sum10 = 0.0; 2108 sum11 = 0.0; 2109 sum12 = 0.0; 2110 sum13 = 0.0; 2111 sum14 = 0.0; 2112 sum15 = 0.0; 2113 sum16 = 0.0; 2114 2115 nonzerorow += (n>0); 2116 for (j=0; j<n; j++) { 2117 sum1 += v[jrow]*x[16*idx[jrow]]; 2118 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2119 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2120 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2121 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2122 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2123 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2124 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2125 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2126 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2127 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2128 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2129 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2130 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2131 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2132 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2133 jrow++; 2134 } 2135 y[16*i] = sum1; 2136 y[16*i+1] = sum2; 2137 y[16*i+2] = sum3; 2138 y[16*i+3] = sum4; 2139 y[16*i+4] = sum5; 2140 y[16*i+5] = sum6; 2141 y[16*i+6] = sum7; 2142 y[16*i+7] = sum8; 2143 y[16*i+8] = sum9; 2144 y[16*i+9] = sum10; 2145 y[16*i+10] = sum11; 2146 y[16*i+11] = sum12; 2147 y[16*i+12] = sum13; 2148 y[16*i+13] = sum14; 2149 y[16*i+14] = sum15; 2150 y[16*i+15] = sum16; 2151 } 2152 2153 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2154 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2155 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2156 PetscFunctionReturn(0); 2157 } 2158 2159 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2160 { 2161 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2162 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2163 const PetscScalar *x,*v; 2164 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2165 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2166 PetscErrorCode ierr; 2167 const PetscInt m = b->AIJ->rmap->n,*idx; 2168 PetscInt n,i; 2169 2170 PetscFunctionBegin; 2171 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2172 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2173 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2174 2175 for (i=0; i<m; i++) { 2176 idx = a->j + a->i[i]; 2177 v = a->a + a->i[i]; 2178 n = a->i[i+1] - a->i[i]; 2179 alpha1 = x[16*i]; 2180 alpha2 = x[16*i+1]; 2181 alpha3 = x[16*i+2]; 2182 alpha4 = x[16*i+3]; 2183 alpha5 = x[16*i+4]; 2184 alpha6 = x[16*i+5]; 2185 alpha7 = x[16*i+6]; 2186 alpha8 = x[16*i+7]; 2187 alpha9 = x[16*i+8]; 2188 alpha10 = x[16*i+9]; 2189 alpha11 = x[16*i+10]; 2190 alpha12 = x[16*i+11]; 2191 alpha13 = x[16*i+12]; 2192 alpha14 = x[16*i+13]; 2193 alpha15 = x[16*i+14]; 2194 alpha16 = x[16*i+15]; 2195 while (n-->0) { 2196 y[16*(*idx)] += alpha1*(*v); 2197 y[16*(*idx)+1] += alpha2*(*v); 2198 y[16*(*idx)+2] += alpha3*(*v); 2199 y[16*(*idx)+3] += alpha4*(*v); 2200 y[16*(*idx)+4] += alpha5*(*v); 2201 y[16*(*idx)+5] += alpha6*(*v); 2202 y[16*(*idx)+6] += alpha7*(*v); 2203 y[16*(*idx)+7] += alpha8*(*v); 2204 y[16*(*idx)+8] += alpha9*(*v); 2205 y[16*(*idx)+9] += alpha10*(*v); 2206 y[16*(*idx)+10] += alpha11*(*v); 2207 y[16*(*idx)+11] += alpha12*(*v); 2208 y[16*(*idx)+12] += alpha13*(*v); 2209 y[16*(*idx)+13] += alpha14*(*v); 2210 y[16*(*idx)+14] += alpha15*(*v); 2211 y[16*(*idx)+15] += alpha16*(*v); 2212 idx++; v++; 2213 } 2214 } 2215 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2216 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2217 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2222 { 2223 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2224 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2225 const PetscScalar *x,*v; 2226 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2227 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2228 PetscErrorCode ierr; 2229 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2230 PetscInt n,i,jrow,j; 2231 2232 PetscFunctionBegin; 2233 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2234 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2235 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2236 idx = a->j; 2237 v = a->a; 2238 ii = a->i; 2239 2240 for (i=0; i<m; i++) { 2241 jrow = ii[i]; 2242 n = ii[i+1] - jrow; 2243 sum1 = 0.0; 2244 sum2 = 0.0; 2245 sum3 = 0.0; 2246 sum4 = 0.0; 2247 sum5 = 0.0; 2248 sum6 = 0.0; 2249 sum7 = 0.0; 2250 sum8 = 0.0; 2251 sum9 = 0.0; 2252 sum10 = 0.0; 2253 sum11 = 0.0; 2254 sum12 = 0.0; 2255 sum13 = 0.0; 2256 sum14 = 0.0; 2257 sum15 = 0.0; 2258 sum16 = 0.0; 2259 for (j=0; j<n; j++) { 2260 sum1 += v[jrow]*x[16*idx[jrow]]; 2261 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2262 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2263 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2264 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2265 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2266 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2267 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2268 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2269 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2270 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2271 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2272 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2273 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2274 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2275 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2276 jrow++; 2277 } 2278 y[16*i] += sum1; 2279 y[16*i+1] += sum2; 2280 y[16*i+2] += sum3; 2281 y[16*i+3] += sum4; 2282 y[16*i+4] += sum5; 2283 y[16*i+5] += sum6; 2284 y[16*i+6] += sum7; 2285 y[16*i+7] += sum8; 2286 y[16*i+8] += sum9; 2287 y[16*i+9] += sum10; 2288 y[16*i+10] += sum11; 2289 y[16*i+11] += sum12; 2290 y[16*i+12] += sum13; 2291 y[16*i+13] += sum14; 2292 y[16*i+14] += sum15; 2293 y[16*i+15] += sum16; 2294 } 2295 2296 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2297 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2298 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2303 { 2304 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2305 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2306 const PetscScalar *x,*v; 2307 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2308 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2309 PetscErrorCode ierr; 2310 const PetscInt m = b->AIJ->rmap->n,*idx; 2311 PetscInt n,i; 2312 2313 PetscFunctionBegin; 2314 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2315 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2316 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2317 for (i=0; i<m; i++) { 2318 idx = a->j + a->i[i]; 2319 v = a->a + a->i[i]; 2320 n = a->i[i+1] - a->i[i]; 2321 alpha1 = x[16*i]; 2322 alpha2 = x[16*i+1]; 2323 alpha3 = x[16*i+2]; 2324 alpha4 = x[16*i+3]; 2325 alpha5 = x[16*i+4]; 2326 alpha6 = x[16*i+5]; 2327 alpha7 = x[16*i+6]; 2328 alpha8 = x[16*i+7]; 2329 alpha9 = x[16*i+8]; 2330 alpha10 = x[16*i+9]; 2331 alpha11 = x[16*i+10]; 2332 alpha12 = x[16*i+11]; 2333 alpha13 = x[16*i+12]; 2334 alpha14 = x[16*i+13]; 2335 alpha15 = x[16*i+14]; 2336 alpha16 = x[16*i+15]; 2337 while (n-->0) { 2338 y[16*(*idx)] += alpha1*(*v); 2339 y[16*(*idx)+1] += alpha2*(*v); 2340 y[16*(*idx)+2] += alpha3*(*v); 2341 y[16*(*idx)+3] += alpha4*(*v); 2342 y[16*(*idx)+4] += alpha5*(*v); 2343 y[16*(*idx)+5] += alpha6*(*v); 2344 y[16*(*idx)+6] += alpha7*(*v); 2345 y[16*(*idx)+7] += alpha8*(*v); 2346 y[16*(*idx)+8] += alpha9*(*v); 2347 y[16*(*idx)+9] += alpha10*(*v); 2348 y[16*(*idx)+10] += alpha11*(*v); 2349 y[16*(*idx)+11] += alpha12*(*v); 2350 y[16*(*idx)+12] += alpha13*(*v); 2351 y[16*(*idx)+13] += alpha14*(*v); 2352 y[16*(*idx)+14] += alpha15*(*v); 2353 y[16*(*idx)+15] += alpha16*(*v); 2354 idx++; v++; 2355 } 2356 } 2357 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2358 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2359 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2360 PetscFunctionReturn(0); 2361 } 2362 2363 /*--------------------------------------------------------------------------------------------*/ 2364 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2365 { 2366 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2367 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2368 const PetscScalar *x,*v; 2369 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2370 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2371 PetscErrorCode ierr; 2372 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2373 PetscInt nonzerorow=0,n,i,jrow,j; 2374 2375 PetscFunctionBegin; 2376 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2377 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2378 idx = a->j; 2379 v = a->a; 2380 ii = a->i; 2381 2382 for (i=0; i<m; i++) { 2383 jrow = ii[i]; 2384 n = ii[i+1] - jrow; 2385 sum1 = 0.0; 2386 sum2 = 0.0; 2387 sum3 = 0.0; 2388 sum4 = 0.0; 2389 sum5 = 0.0; 2390 sum6 = 0.0; 2391 sum7 = 0.0; 2392 sum8 = 0.0; 2393 sum9 = 0.0; 2394 sum10 = 0.0; 2395 sum11 = 0.0; 2396 sum12 = 0.0; 2397 sum13 = 0.0; 2398 sum14 = 0.0; 2399 sum15 = 0.0; 2400 sum16 = 0.0; 2401 sum17 = 0.0; 2402 sum18 = 0.0; 2403 2404 nonzerorow += (n>0); 2405 for (j=0; j<n; j++) { 2406 sum1 += v[jrow]*x[18*idx[jrow]]; 2407 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2408 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2409 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2410 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2411 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2412 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2413 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2414 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2415 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2416 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2417 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2418 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2419 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2420 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2421 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2422 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2423 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2424 jrow++; 2425 } 2426 y[18*i] = sum1; 2427 y[18*i+1] = sum2; 2428 y[18*i+2] = sum3; 2429 y[18*i+3] = sum4; 2430 y[18*i+4] = sum5; 2431 y[18*i+5] = sum6; 2432 y[18*i+6] = sum7; 2433 y[18*i+7] = sum8; 2434 y[18*i+8] = sum9; 2435 y[18*i+9] = sum10; 2436 y[18*i+10] = sum11; 2437 y[18*i+11] = sum12; 2438 y[18*i+12] = sum13; 2439 y[18*i+13] = sum14; 2440 y[18*i+14] = sum15; 2441 y[18*i+15] = sum16; 2442 y[18*i+16] = sum17; 2443 y[18*i+17] = sum18; 2444 } 2445 2446 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2447 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2448 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2449 PetscFunctionReturn(0); 2450 } 2451 2452 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2453 { 2454 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2455 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2456 const PetscScalar *x,*v; 2457 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2458 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2459 PetscErrorCode ierr; 2460 const PetscInt m = b->AIJ->rmap->n,*idx; 2461 PetscInt n,i; 2462 2463 PetscFunctionBegin; 2464 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2465 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2466 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2467 2468 for (i=0; i<m; i++) { 2469 idx = a->j + a->i[i]; 2470 v = a->a + a->i[i]; 2471 n = a->i[i+1] - a->i[i]; 2472 alpha1 = x[18*i]; 2473 alpha2 = x[18*i+1]; 2474 alpha3 = x[18*i+2]; 2475 alpha4 = x[18*i+3]; 2476 alpha5 = x[18*i+4]; 2477 alpha6 = x[18*i+5]; 2478 alpha7 = x[18*i+6]; 2479 alpha8 = x[18*i+7]; 2480 alpha9 = x[18*i+8]; 2481 alpha10 = x[18*i+9]; 2482 alpha11 = x[18*i+10]; 2483 alpha12 = x[18*i+11]; 2484 alpha13 = x[18*i+12]; 2485 alpha14 = x[18*i+13]; 2486 alpha15 = x[18*i+14]; 2487 alpha16 = x[18*i+15]; 2488 alpha17 = x[18*i+16]; 2489 alpha18 = x[18*i+17]; 2490 while (n-->0) { 2491 y[18*(*idx)] += alpha1*(*v); 2492 y[18*(*idx)+1] += alpha2*(*v); 2493 y[18*(*idx)+2] += alpha3*(*v); 2494 y[18*(*idx)+3] += alpha4*(*v); 2495 y[18*(*idx)+4] += alpha5*(*v); 2496 y[18*(*idx)+5] += alpha6*(*v); 2497 y[18*(*idx)+6] += alpha7*(*v); 2498 y[18*(*idx)+7] += alpha8*(*v); 2499 y[18*(*idx)+8] += alpha9*(*v); 2500 y[18*(*idx)+9] += alpha10*(*v); 2501 y[18*(*idx)+10] += alpha11*(*v); 2502 y[18*(*idx)+11] += alpha12*(*v); 2503 y[18*(*idx)+12] += alpha13*(*v); 2504 y[18*(*idx)+13] += alpha14*(*v); 2505 y[18*(*idx)+14] += alpha15*(*v); 2506 y[18*(*idx)+15] += alpha16*(*v); 2507 y[18*(*idx)+16] += alpha17*(*v); 2508 y[18*(*idx)+17] += alpha18*(*v); 2509 idx++; v++; 2510 } 2511 } 2512 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2513 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2514 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2515 PetscFunctionReturn(0); 2516 } 2517 2518 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2519 { 2520 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2521 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2522 const PetscScalar *x,*v; 2523 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2524 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2525 PetscErrorCode ierr; 2526 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2527 PetscInt n,i,jrow,j; 2528 2529 PetscFunctionBegin; 2530 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2531 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2532 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2533 idx = a->j; 2534 v = a->a; 2535 ii = a->i; 2536 2537 for (i=0; i<m; i++) { 2538 jrow = ii[i]; 2539 n = ii[i+1] - jrow; 2540 sum1 = 0.0; 2541 sum2 = 0.0; 2542 sum3 = 0.0; 2543 sum4 = 0.0; 2544 sum5 = 0.0; 2545 sum6 = 0.0; 2546 sum7 = 0.0; 2547 sum8 = 0.0; 2548 sum9 = 0.0; 2549 sum10 = 0.0; 2550 sum11 = 0.0; 2551 sum12 = 0.0; 2552 sum13 = 0.0; 2553 sum14 = 0.0; 2554 sum15 = 0.0; 2555 sum16 = 0.0; 2556 sum17 = 0.0; 2557 sum18 = 0.0; 2558 for (j=0; j<n; j++) { 2559 sum1 += v[jrow]*x[18*idx[jrow]]; 2560 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2561 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2562 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2563 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2564 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2565 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2566 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2567 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2568 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2569 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2570 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2571 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2572 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2573 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2574 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2575 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2576 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2577 jrow++; 2578 } 2579 y[18*i] += sum1; 2580 y[18*i+1] += sum2; 2581 y[18*i+2] += sum3; 2582 y[18*i+3] += sum4; 2583 y[18*i+4] += sum5; 2584 y[18*i+5] += sum6; 2585 y[18*i+6] += sum7; 2586 y[18*i+7] += sum8; 2587 y[18*i+8] += sum9; 2588 y[18*i+9] += sum10; 2589 y[18*i+10] += sum11; 2590 y[18*i+11] += sum12; 2591 y[18*i+12] += sum13; 2592 y[18*i+13] += sum14; 2593 y[18*i+14] += sum15; 2594 y[18*i+15] += sum16; 2595 y[18*i+16] += sum17; 2596 y[18*i+17] += sum18; 2597 } 2598 2599 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2600 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2601 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2602 PetscFunctionReturn(0); 2603 } 2604 2605 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2606 { 2607 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2608 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2609 const PetscScalar *x,*v; 2610 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2611 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2612 PetscErrorCode ierr; 2613 const PetscInt m = b->AIJ->rmap->n,*idx; 2614 PetscInt n,i; 2615 2616 PetscFunctionBegin; 2617 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2618 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2619 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2620 for (i=0; i<m; i++) { 2621 idx = a->j + a->i[i]; 2622 v = a->a + a->i[i]; 2623 n = a->i[i+1] - a->i[i]; 2624 alpha1 = x[18*i]; 2625 alpha2 = x[18*i+1]; 2626 alpha3 = x[18*i+2]; 2627 alpha4 = x[18*i+3]; 2628 alpha5 = x[18*i+4]; 2629 alpha6 = x[18*i+5]; 2630 alpha7 = x[18*i+6]; 2631 alpha8 = x[18*i+7]; 2632 alpha9 = x[18*i+8]; 2633 alpha10 = x[18*i+9]; 2634 alpha11 = x[18*i+10]; 2635 alpha12 = x[18*i+11]; 2636 alpha13 = x[18*i+12]; 2637 alpha14 = x[18*i+13]; 2638 alpha15 = x[18*i+14]; 2639 alpha16 = x[18*i+15]; 2640 alpha17 = x[18*i+16]; 2641 alpha18 = x[18*i+17]; 2642 while (n-->0) { 2643 y[18*(*idx)] += alpha1*(*v); 2644 y[18*(*idx)+1] += alpha2*(*v); 2645 y[18*(*idx)+2] += alpha3*(*v); 2646 y[18*(*idx)+3] += alpha4*(*v); 2647 y[18*(*idx)+4] += alpha5*(*v); 2648 y[18*(*idx)+5] += alpha6*(*v); 2649 y[18*(*idx)+6] += alpha7*(*v); 2650 y[18*(*idx)+7] += alpha8*(*v); 2651 y[18*(*idx)+8] += alpha9*(*v); 2652 y[18*(*idx)+9] += alpha10*(*v); 2653 y[18*(*idx)+10] += alpha11*(*v); 2654 y[18*(*idx)+11] += alpha12*(*v); 2655 y[18*(*idx)+12] += alpha13*(*v); 2656 y[18*(*idx)+13] += alpha14*(*v); 2657 y[18*(*idx)+14] += alpha15*(*v); 2658 y[18*(*idx)+15] += alpha16*(*v); 2659 y[18*(*idx)+16] += alpha17*(*v); 2660 y[18*(*idx)+17] += alpha18*(*v); 2661 idx++; v++; 2662 } 2663 } 2664 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2665 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2666 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2667 PetscFunctionReturn(0); 2668 } 2669 2670 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2671 { 2672 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2673 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2674 const PetscScalar *x,*v; 2675 PetscScalar *y,*sums; 2676 PetscErrorCode ierr; 2677 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2678 PetscInt n,i,jrow,j,dof = b->dof,k; 2679 2680 PetscFunctionBegin; 2681 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2682 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2683 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2684 idx = a->j; 2685 v = a->a; 2686 ii = a->i; 2687 2688 for (i=0; i<m; i++) { 2689 jrow = ii[i]; 2690 n = ii[i+1] - jrow; 2691 sums = y + dof*i; 2692 for (j=0; j<n; j++) { 2693 for (k=0; k<dof; k++) { 2694 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2695 } 2696 jrow++; 2697 } 2698 } 2699 2700 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2701 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2702 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2703 PetscFunctionReturn(0); 2704 } 2705 2706 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2707 { 2708 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2709 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2710 const PetscScalar *x,*v; 2711 PetscScalar *y,*sums; 2712 PetscErrorCode ierr; 2713 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2714 PetscInt n,i,jrow,j,dof = b->dof,k; 2715 2716 PetscFunctionBegin; 2717 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2718 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2719 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2720 idx = a->j; 2721 v = a->a; 2722 ii = a->i; 2723 2724 for (i=0; i<m; i++) { 2725 jrow = ii[i]; 2726 n = ii[i+1] - jrow; 2727 sums = y + dof*i; 2728 for (j=0; j<n; j++) { 2729 for (k=0; k<dof; k++) { 2730 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2731 } 2732 jrow++; 2733 } 2734 } 2735 2736 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2737 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2738 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2739 PetscFunctionReturn(0); 2740 } 2741 2742 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2743 { 2744 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2745 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2746 const PetscScalar *x,*v,*alpha; 2747 PetscScalar *y; 2748 PetscErrorCode ierr; 2749 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2750 PetscInt n,i,k; 2751 2752 PetscFunctionBegin; 2753 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2754 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2755 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2756 for (i=0; i<m; i++) { 2757 idx = a->j + a->i[i]; 2758 v = a->a + a->i[i]; 2759 n = a->i[i+1] - a->i[i]; 2760 alpha = x + dof*i; 2761 while (n-->0) { 2762 for (k=0; k<dof; k++) { 2763 y[dof*(*idx)+k] += alpha[k]*(*v); 2764 } 2765 idx++; v++; 2766 } 2767 } 2768 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2769 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2770 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2771 PetscFunctionReturn(0); 2772 } 2773 2774 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2775 { 2776 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2777 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2778 const PetscScalar *x,*v,*alpha; 2779 PetscScalar *y; 2780 PetscErrorCode ierr; 2781 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2782 PetscInt n,i,k; 2783 2784 PetscFunctionBegin; 2785 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2786 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2787 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2788 for (i=0; i<m; i++) { 2789 idx = a->j + a->i[i]; 2790 v = a->a + a->i[i]; 2791 n = a->i[i+1] - a->i[i]; 2792 alpha = x + dof*i; 2793 while (n-->0) { 2794 for (k=0; k<dof; k++) { 2795 y[dof*(*idx)+k] += alpha[k]*(*v); 2796 } 2797 idx++; v++; 2798 } 2799 } 2800 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2801 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2802 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2803 PetscFunctionReturn(0); 2804 } 2805 2806 /*===================================================================================*/ 2807 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2808 { 2809 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2810 PetscErrorCode ierr; 2811 2812 PetscFunctionBegin; 2813 /* start the scatter */ 2814 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2815 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2816 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2817 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2818 PetscFunctionReturn(0); 2819 } 2820 2821 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2822 { 2823 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2824 PetscErrorCode ierr; 2825 2826 PetscFunctionBegin; 2827 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2828 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2829 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2830 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2831 PetscFunctionReturn(0); 2832 } 2833 2834 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2835 { 2836 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2837 PetscErrorCode ierr; 2838 2839 PetscFunctionBegin; 2840 /* start the scatter */ 2841 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2842 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2843 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2844 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2845 PetscFunctionReturn(0); 2846 } 2847 2848 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2849 { 2850 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2851 PetscErrorCode ierr; 2852 2853 PetscFunctionBegin; 2854 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2855 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2856 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2857 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2858 PetscFunctionReturn(0); 2859 } 2860 2861 /* ----------------------------------------------------------------*/ 2862 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2863 { 2864 PetscErrorCode ierr; 2865 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2866 Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 2867 Mat P =pp->AIJ; 2868 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2869 PetscInt *pti,*ptj,*ptJ; 2870 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2871 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2872 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 2873 MatScalar *ca; 2874 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 2875 2876 PetscFunctionBegin; 2877 /* Get ij structure of P^T */ 2878 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2879 2880 cn = pn*ppdof; 2881 /* Allocate ci array, arrays for fill computation and */ 2882 /* free space for accumulating nonzero column info */ 2883 ierr = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr); 2884 ci[0] = 0; 2885 2886 /* Work arrays for rows of P^T*A */ 2887 ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr); 2888 ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 2889 ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr); 2890 2891 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2892 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2893 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2894 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr); 2895 current_space = free_space; 2896 2897 /* Determine symbolic info for each row of C: */ 2898 for (i=0; i<pn; i++) { 2899 ptnzi = pti[i+1] - pti[i]; 2900 ptJ = ptj + pti[i]; 2901 for (dof=0; dof<ppdof; dof++) { 2902 ptanzi = 0; 2903 /* Determine symbolic row of PtA: */ 2904 for (j=0; j<ptnzi; j++) { 2905 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2906 arow = ptJ[j]*ppdof + dof; 2907 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2908 anzj = ai[arow+1] - ai[arow]; 2909 ajj = aj + ai[arow]; 2910 for (k=0; k<anzj; k++) { 2911 if (!ptadenserow[ajj[k]]) { 2912 ptadenserow[ajj[k]] = -1; 2913 ptasparserow[ptanzi++] = ajj[k]; 2914 } 2915 } 2916 } 2917 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2918 ptaj = ptasparserow; 2919 cnzi = 0; 2920 for (j=0; j<ptanzi; j++) { 2921 /* Get offset within block of P */ 2922 pshift = *ptaj%ppdof; 2923 /* Get block row of P */ 2924 prow = (*ptaj++)/ppdof; /* integer division */ 2925 /* P has same number of nonzeros per row as the compressed form */ 2926 pnzj = pi[prow+1] - pi[prow]; 2927 pjj = pj + pi[prow]; 2928 for (k=0;k<pnzj;k++) { 2929 /* Locations in C are shifted by the offset within the block */ 2930 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2931 if (!denserow[pjj[k]*ppdof+pshift]) { 2932 denserow[pjj[k]*ppdof+pshift] = -1; 2933 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 2934 } 2935 } 2936 } 2937 2938 /* sort sparserow */ 2939 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 2940 2941 /* If free space is not available, make more free space */ 2942 /* Double the amount of total space in the list */ 2943 if (current_space->local_remaining<cnzi) { 2944 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 2945 } 2946 2947 /* Copy data into free space, and zero out denserows */ 2948 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 2949 2950 current_space->array += cnzi; 2951 current_space->local_used += cnzi; 2952 current_space->local_remaining -= cnzi; 2953 2954 for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 2955 for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 2956 2957 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2958 /* For now, we will recompute what is needed. */ 2959 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 2960 } 2961 } 2962 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2963 /* Allocate space for cj, initialize cj, and */ 2964 /* destroy list of free space and other temporary array(s) */ 2965 ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr); 2966 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2967 ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 2968 2969 /* Allocate space for ca */ 2970 ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr); 2971 2972 /* put together the new matrix */ 2973 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 2974 ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr); 2975 2976 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2977 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2978 c = (Mat_SeqAIJ*)((*C)->data); 2979 c->free_a = PETSC_TRUE; 2980 c->free_ij = PETSC_TRUE; 2981 c->nonew = 0; 2982 2983 (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 2984 2985 /* Clean up. */ 2986 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 2991 { 2992 /* This routine requires testing -- first draft only */ 2993 PetscErrorCode ierr; 2994 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2995 Mat P =pp->AIJ; 2996 Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 2997 Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 2998 Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 2999 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3000 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3001 const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3002 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3003 const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3004 MatScalar *ca=c->a,*caj,*apa; 3005 3006 PetscFunctionBegin; 3007 /* Allocate temporary array for storage of one row of A*P */ 3008 ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr); 3009 3010 /* Clear old values in C */ 3011 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 3012 3013 for (i=0; i<am; i++) { 3014 /* Form sparse row of A*P */ 3015 anzi = ai[i+1] - ai[i]; 3016 apnzj = 0; 3017 for (j=0; j<anzi; j++) { 3018 /* Get offset within block of P */ 3019 pshift = *aj%ppdof; 3020 /* Get block row of P */ 3021 prow = *aj++/ppdof; /* integer division */ 3022 pnzj = pi[prow+1] - pi[prow]; 3023 pjj = pj + pi[prow]; 3024 paj = pa + pi[prow]; 3025 for (k=0; k<pnzj; k++) { 3026 poffset = pjj[k]*ppdof+pshift; 3027 if (!apjdense[poffset]) { 3028 apjdense[poffset] = -1; 3029 apj[apnzj++] = poffset; 3030 } 3031 apa[poffset] += (*aa)*paj[k]; 3032 } 3033 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 3034 aa++; 3035 } 3036 3037 /* Sort the j index array for quick sparse axpy. */ 3038 /* Note: a array does not need sorting as it is in dense storage locations. */ 3039 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 3040 3041 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3042 prow = i/ppdof; /* integer division */ 3043 pshift = i%ppdof; 3044 poffset = pi[prow]; 3045 pnzi = pi[prow+1] - poffset; 3046 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3047 pJ = pj+poffset; 3048 pA = pa+poffset; 3049 for (j=0; j<pnzi; j++) { 3050 crow = (*pJ)*ppdof+pshift; 3051 cjj = cj + ci[crow]; 3052 caj = ca + ci[crow]; 3053 pJ++; 3054 /* Perform sparse axpy operation. Note cjj includes apj. */ 3055 for (k=0,nextap=0; nextap<apnzj; k++) { 3056 if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 3057 } 3058 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 3059 pA++; 3060 } 3061 3062 /* Zero the current row info for A*P */ 3063 for (j=0; j<apnzj; j++) { 3064 apa[apj[j]] = 0.; 3065 apjdense[apj[j]] = 0; 3066 } 3067 } 3068 3069 /* Assemble the final matrix and clean up */ 3070 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3071 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3072 ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 3073 PetscFunctionReturn(0); 3074 } 3075 3076 PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3077 { 3078 PetscErrorCode ierr; 3079 3080 PetscFunctionBegin; 3081 if (scall == MAT_INITIAL_MATRIX) { 3082 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3083 ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr); 3084 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3085 } 3086 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3087 ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr); 3088 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3089 PetscFunctionReturn(0); 3090 } 3091 3092 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3093 { 3094 PetscErrorCode ierr; 3095 3096 PetscFunctionBegin; 3097 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 3098 ierr = MatConvert(PP,MATMPIAIJ,MAT_INPLACE_MATRIX,&PP);CHKERRQ(ierr); 3099 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 3100 PetscFunctionReturn(0); 3101 } 3102 3103 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3104 { 3105 PetscFunctionBegin; 3106 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3107 PetscFunctionReturn(0); 3108 } 3109 3110 PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3111 { 3112 PetscErrorCode ierr; 3113 3114 PetscFunctionBegin; 3115 if (scall == MAT_INITIAL_MATRIX) { 3116 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3117 ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr); 3118 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3119 } 3120 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3121 ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr); 3122 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3123 PetscFunctionReturn(0); 3124 } 3125 3126 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3127 { 3128 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3129 Mat a = b->AIJ,B; 3130 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3131 PetscErrorCode ierr; 3132 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3133 PetscInt *cols; 3134 PetscScalar *vals; 3135 3136 PetscFunctionBegin; 3137 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3138 ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr); 3139 for (i=0; i<m; i++) { 3140 nmax = PetscMax(nmax,aij->ilen[i]); 3141 for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 3142 } 3143 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 3144 ierr = PetscFree(ilen);CHKERRQ(ierr); 3145 ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr); 3146 ii = 0; 3147 for (i=0; i<m; i++) { 3148 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3149 for (j=0; j<dof; j++) { 3150 for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 3151 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3152 ii++; 3153 } 3154 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3155 } 3156 ierr = PetscFree(icols);CHKERRQ(ierr); 3157 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3158 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3159 3160 if (reuse == MAT_INPLACE_MATRIX) { 3161 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3162 } else { 3163 *newmat = B; 3164 } 3165 PetscFunctionReturn(0); 3166 } 3167 3168 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3169 3170 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3171 { 3172 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3173 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3174 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3175 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3176 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3177 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3178 PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 3179 PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 3180 PetscInt rstart,cstart,*garray,ii,k; 3181 PetscErrorCode ierr; 3182 PetscScalar *vals,*ovals; 3183 3184 PetscFunctionBegin; 3185 ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr); 3186 for (i=0; i<A->rmap->n/dof; i++) { 3187 nmax = PetscMax(nmax,AIJ->ilen[i]); 3188 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3189 for (j=0; j<dof; j++) { 3190 dnz[dof*i+j] = AIJ->ilen[i]; 3191 onz[dof*i+j] = OAIJ->ilen[i]; 3192 } 3193 } 3194 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3195 ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr); 3196 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3197 3198 ierr = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr); 3199 rstart = dof*maij->A->rmap->rstart; 3200 cstart = dof*maij->A->cmap->rstart; 3201 garray = mpiaij->garray; 3202 3203 ii = rstart; 3204 for (i=0; i<A->rmap->n/dof; i++) { 3205 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3206 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3207 for (j=0; j<dof; j++) { 3208 for (k=0; k<ncols; k++) { 3209 icols[k] = cstart + dof*cols[k]+j; 3210 } 3211 for (k=0; k<oncols; k++) { 3212 oicols[k] = dof*garray[ocols[k]]+j; 3213 } 3214 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3215 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3216 ii++; 3217 } 3218 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3219 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3220 } 3221 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3222 3223 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3224 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3225 3226 if (reuse == MAT_INPLACE_MATRIX) { 3227 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3228 ((PetscObject)A)->refct = 1; 3229 3230 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3231 3232 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3233 } else { 3234 *newmat = B; 3235 } 3236 PetscFunctionReturn(0); 3237 } 3238 3239 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 3240 { 3241 PetscErrorCode ierr; 3242 Mat A; 3243 3244 PetscFunctionBegin; 3245 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3246 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 3247 ierr = MatDestroy(&A);CHKERRQ(ierr); 3248 PetscFunctionReturn(0); 3249 } 3250 3251 /* ---------------------------------------------------------------------------------- */ 3252 /*@C 3253 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3254 operations for multicomponent problems. It interpolates each component the same 3255 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3256 and MATMPIAIJ for distributed matrices. 3257 3258 Collective 3259 3260 Input Parameters: 3261 + A - the AIJ matrix describing the action on blocks 3262 - dof - the block size (number of components per node) 3263 3264 Output Parameter: 3265 . maij - the new MAIJ matrix 3266 3267 Operations provided: 3268 + MatMult 3269 . MatMultTranspose 3270 . MatMultAdd 3271 . MatMultTransposeAdd 3272 - MatView 3273 3274 Level: advanced 3275 3276 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3277 @*/ 3278 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3279 { 3280 PetscErrorCode ierr; 3281 PetscMPIInt size; 3282 PetscInt n; 3283 Mat B; 3284 3285 PetscFunctionBegin; 3286 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3287 3288 if (dof == 1) *maij = A; 3289 else { 3290 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3291 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3292 ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3293 ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3294 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3295 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3296 3297 B->assembled = PETSC_TRUE; 3298 3299 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3300 if (size == 1) { 3301 Mat_SeqMAIJ *b; 3302 3303 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3304 3305 B->ops->setup = NULL; 3306 B->ops->destroy = MatDestroy_SeqMAIJ; 3307 B->ops->view = MatView_SeqMAIJ; 3308 b = (Mat_SeqMAIJ*)B->data; 3309 b->dof = dof; 3310 b->AIJ = A; 3311 3312 if (dof == 2) { 3313 B->ops->mult = MatMult_SeqMAIJ_2; 3314 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3315 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3316 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3317 } else if (dof == 3) { 3318 B->ops->mult = MatMult_SeqMAIJ_3; 3319 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3320 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3321 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3322 } else if (dof == 4) { 3323 B->ops->mult = MatMult_SeqMAIJ_4; 3324 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3325 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3326 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3327 } else if (dof == 5) { 3328 B->ops->mult = MatMult_SeqMAIJ_5; 3329 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3330 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3331 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3332 } else if (dof == 6) { 3333 B->ops->mult = MatMult_SeqMAIJ_6; 3334 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3335 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3336 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3337 } else if (dof == 7) { 3338 B->ops->mult = MatMult_SeqMAIJ_7; 3339 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3340 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3341 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3342 } else if (dof == 8) { 3343 B->ops->mult = MatMult_SeqMAIJ_8; 3344 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3345 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3346 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3347 } else if (dof == 9) { 3348 B->ops->mult = MatMult_SeqMAIJ_9; 3349 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3350 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3351 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3352 } else if (dof == 10) { 3353 B->ops->mult = MatMult_SeqMAIJ_10; 3354 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3355 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3356 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3357 } else if (dof == 11) { 3358 B->ops->mult = MatMult_SeqMAIJ_11; 3359 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3360 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3361 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3362 } else if (dof == 16) { 3363 B->ops->mult = MatMult_SeqMAIJ_16; 3364 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3365 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3366 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3367 } else if (dof == 18) { 3368 B->ops->mult = MatMult_SeqMAIJ_18; 3369 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3370 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3371 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3372 } else { 3373 B->ops->mult = MatMult_SeqMAIJ_N; 3374 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3375 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3376 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3377 } 3378 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3379 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3380 } else { 3381 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3382 Mat_MPIMAIJ *b; 3383 IS from,to; 3384 Vec gvec; 3385 3386 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3387 3388 B->ops->setup = NULL; 3389 B->ops->destroy = MatDestroy_MPIMAIJ; 3390 B->ops->view = MatView_MPIMAIJ; 3391 3392 b = (Mat_MPIMAIJ*)B->data; 3393 b->dof = dof; 3394 b->A = A; 3395 3396 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 3397 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 3398 3399 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3400 ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3401 ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 3402 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3403 ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3404 3405 /* create two temporary Index sets for build scatter gather */ 3406 ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3407 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3408 3409 /* create temporary global vector to generate scatter context */ 3410 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 3411 3412 /* generate the scatter context */ 3413 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3414 3415 ierr = ISDestroy(&from);CHKERRQ(ierr); 3416 ierr = ISDestroy(&to);CHKERRQ(ierr); 3417 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3418 3419 B->ops->mult = MatMult_MPIMAIJ_dof; 3420 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3421 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3422 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3423 3424 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3425 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr); 3426 } 3427 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3428 ierr = MatSetUp(B);CHKERRQ(ierr); 3429 *maij = B; 3430 ierr = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr); 3431 } 3432 PetscFunctionReturn(0); 3433 } 3434