1 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20 #include <../src/mat/utils/freespace.h> 21 22 /*@ 23 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 24 25 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 26 27 Input Parameter: 28 . A - the MAIJ matrix 29 30 Output Parameter: 31 . B - the AIJ matrix 32 33 Level: advanced 34 35 Notes: 36 The reference count on the AIJ matrix is not increased so you should not destroy it. 37 38 .seealso: MatCreateMAIJ() 39 @*/ 40 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 41 { 42 PetscErrorCode ierr; 43 PetscBool ismpimaij,isseqmaij; 44 45 PetscFunctionBegin; 46 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 47 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 48 if (ismpimaij) { 49 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 50 51 *B = b->A; 52 } else if (isseqmaij) { 53 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 54 55 *B = b->AIJ; 56 } else { 57 *B = A; 58 } 59 PetscFunctionReturn(0); 60 } 61 62 /*@ 63 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 64 65 Logically Collective 66 67 Input Parameter: 68 + A - the MAIJ matrix 69 - dof - the block size for the new matrix 70 71 Output Parameter: 72 . B - the new MAIJ matrix 73 74 Level: advanced 75 76 .seealso: MatCreateMAIJ() 77 @*/ 78 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 79 { 80 PetscErrorCode ierr; 81 Mat Aij = NULL; 82 83 PetscFunctionBegin; 84 PetscValidLogicalCollectiveInt(A,dof,2); 85 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 86 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 91 { 92 PetscErrorCode ierr; 93 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 94 95 PetscFunctionBegin; 96 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 97 ierr = PetscFree(A->data);CHKERRQ(ierr); 98 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr); 99 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL);CHKERRQ(ierr); 100 PetscFunctionReturn(0); 101 } 102 103 PetscErrorCode MatSetUp_MAIJ(Mat A) 104 { 105 PetscFunctionBegin; 106 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 107 PetscFunctionReturn(0); 108 } 109 110 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 111 { 112 PetscErrorCode ierr; 113 Mat B; 114 115 PetscFunctionBegin; 116 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 117 ierr = MatView(B,viewer);CHKERRQ(ierr); 118 ierr = MatDestroy(&B);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 123 { 124 PetscErrorCode ierr; 125 Mat B; 126 127 PetscFunctionBegin; 128 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 129 ierr = MatView(B,viewer);CHKERRQ(ierr); 130 ierr = MatDestroy(&B);CHKERRQ(ierr); 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 135 { 136 PetscErrorCode ierr; 137 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 138 139 PetscFunctionBegin; 140 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 141 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 142 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 143 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 144 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 145 ierr = PetscFree(A->data);CHKERRQ(ierr); 146 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr); 147 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr); 148 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 /*MC 153 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 154 multicomponent problems, interpolating or restricting each component the same way independently. 155 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 156 157 Operations provided: 158 + MatMult 159 . MatMultTranspose 160 . MatMultAdd 161 - MatMultTransposeAdd 162 163 Level: advanced 164 165 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 166 M*/ 167 168 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 169 { 170 PetscErrorCode ierr; 171 Mat_MPIMAIJ *b; 172 PetscMPIInt size; 173 174 PetscFunctionBegin; 175 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 176 A->data = (void*)b; 177 178 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 179 180 A->ops->setup = MatSetUp_MAIJ; 181 182 b->AIJ = 0; 183 b->dof = 0; 184 b->OAIJ = 0; 185 b->ctx = 0; 186 b->w = 0; 187 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 188 if (size == 1) { 189 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 190 } else { 191 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 192 } 193 A->preallocated = PETSC_TRUE; 194 A->assembled = PETSC_TRUE; 195 PetscFunctionReturn(0); 196 } 197 198 /* --------------------------------------------------------------------------------------*/ 199 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 200 { 201 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 202 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 203 const PetscScalar *x,*v; 204 PetscScalar *y, sum1, sum2; 205 PetscErrorCode ierr; 206 PetscInt nonzerorow=0,n,i,jrow,j; 207 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 208 209 PetscFunctionBegin; 210 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 211 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 212 idx = a->j; 213 v = a->a; 214 ii = a->i; 215 216 for (i=0; i<m; i++) { 217 jrow = ii[i]; 218 n = ii[i+1] - jrow; 219 sum1 = 0.0; 220 sum2 = 0.0; 221 222 nonzerorow += (n>0); 223 for (j=0; j<n; j++) { 224 sum1 += v[jrow]*x[2*idx[jrow]]; 225 sum2 += v[jrow]*x[2*idx[jrow]+1]; 226 jrow++; 227 } 228 y[2*i] = sum1; 229 y[2*i+1] = sum2; 230 } 231 232 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 233 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 234 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 235 PetscFunctionReturn(0); 236 } 237 238 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 239 { 240 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 241 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 242 const PetscScalar *x,*v; 243 PetscScalar *y,alpha1,alpha2; 244 PetscErrorCode ierr; 245 PetscInt n,i; 246 const PetscInt m = b->AIJ->rmap->n,*idx; 247 248 PetscFunctionBegin; 249 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 250 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 251 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 252 253 for (i=0; i<m; i++) { 254 idx = a->j + a->i[i]; 255 v = a->a + a->i[i]; 256 n = a->i[i+1] - a->i[i]; 257 alpha1 = x[2*i]; 258 alpha2 = x[2*i+1]; 259 while (n-->0) { 260 y[2*(*idx)] += alpha1*(*v); 261 y[2*(*idx)+1] += alpha2*(*v); 262 idx++; v++; 263 } 264 } 265 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 266 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 267 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 272 { 273 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 274 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 275 const PetscScalar *x,*v; 276 PetscScalar *y,sum1, sum2; 277 PetscErrorCode ierr; 278 PetscInt n,i,jrow,j; 279 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 280 281 PetscFunctionBegin; 282 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 283 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 284 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 285 idx = a->j; 286 v = a->a; 287 ii = a->i; 288 289 for (i=0; i<m; i++) { 290 jrow = ii[i]; 291 n = ii[i+1] - jrow; 292 sum1 = 0.0; 293 sum2 = 0.0; 294 for (j=0; j<n; j++) { 295 sum1 += v[jrow]*x[2*idx[jrow]]; 296 sum2 += v[jrow]*x[2*idx[jrow]+1]; 297 jrow++; 298 } 299 y[2*i] += sum1; 300 y[2*i+1] += sum2; 301 } 302 303 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 304 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 305 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 309 { 310 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 311 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 312 const PetscScalar *x,*v; 313 PetscScalar *y,alpha1,alpha2; 314 PetscErrorCode ierr; 315 PetscInt n,i; 316 const PetscInt m = b->AIJ->rmap->n,*idx; 317 318 PetscFunctionBegin; 319 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 320 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 321 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 322 323 for (i=0; i<m; i++) { 324 idx = a->j + a->i[i]; 325 v = a->a + a->i[i]; 326 n = a->i[i+1] - a->i[i]; 327 alpha1 = x[2*i]; 328 alpha2 = x[2*i+1]; 329 while (n-->0) { 330 y[2*(*idx)] += alpha1*(*v); 331 y[2*(*idx)+1] += alpha2*(*v); 332 idx++; v++; 333 } 334 } 335 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 336 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 337 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 338 PetscFunctionReturn(0); 339 } 340 /* --------------------------------------------------------------------------------------*/ 341 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 342 { 343 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 344 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 345 const PetscScalar *x,*v; 346 PetscScalar *y,sum1, sum2, sum3; 347 PetscErrorCode ierr; 348 PetscInt nonzerorow=0,n,i,jrow,j; 349 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 350 351 PetscFunctionBegin; 352 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 353 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 354 idx = a->j; 355 v = a->a; 356 ii = a->i; 357 358 for (i=0; i<m; i++) { 359 jrow = ii[i]; 360 n = ii[i+1] - jrow; 361 sum1 = 0.0; 362 sum2 = 0.0; 363 sum3 = 0.0; 364 365 nonzerorow += (n>0); 366 for (j=0; j<n; j++) { 367 sum1 += v[jrow]*x[3*idx[jrow]]; 368 sum2 += v[jrow]*x[3*idx[jrow]+1]; 369 sum3 += v[jrow]*x[3*idx[jrow]+2]; 370 jrow++; 371 } 372 y[3*i] = sum1; 373 y[3*i+1] = sum2; 374 y[3*i+2] = sum3; 375 } 376 377 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 378 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 379 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 380 PetscFunctionReturn(0); 381 } 382 383 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 384 { 385 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 386 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 387 const PetscScalar *x,*v; 388 PetscScalar *y,alpha1,alpha2,alpha3; 389 PetscErrorCode ierr; 390 PetscInt n,i; 391 const PetscInt m = b->AIJ->rmap->n,*idx; 392 393 PetscFunctionBegin; 394 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 395 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 396 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 397 398 for (i=0; i<m; i++) { 399 idx = a->j + a->i[i]; 400 v = a->a + a->i[i]; 401 n = a->i[i+1] - a->i[i]; 402 alpha1 = x[3*i]; 403 alpha2 = x[3*i+1]; 404 alpha3 = x[3*i+2]; 405 while (n-->0) { 406 y[3*(*idx)] += alpha1*(*v); 407 y[3*(*idx)+1] += alpha2*(*v); 408 y[3*(*idx)+2] += alpha3*(*v); 409 idx++; v++; 410 } 411 } 412 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 413 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 414 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 415 PetscFunctionReturn(0); 416 } 417 418 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 419 { 420 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 421 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 422 const PetscScalar *x,*v; 423 PetscScalar *y,sum1, sum2, sum3; 424 PetscErrorCode ierr; 425 PetscInt n,i,jrow,j; 426 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 427 428 PetscFunctionBegin; 429 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 430 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 431 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 432 idx = a->j; 433 v = a->a; 434 ii = a->i; 435 436 for (i=0; i<m; i++) { 437 jrow = ii[i]; 438 n = ii[i+1] - jrow; 439 sum1 = 0.0; 440 sum2 = 0.0; 441 sum3 = 0.0; 442 for (j=0; j<n; j++) { 443 sum1 += v[jrow]*x[3*idx[jrow]]; 444 sum2 += v[jrow]*x[3*idx[jrow]+1]; 445 sum3 += v[jrow]*x[3*idx[jrow]+2]; 446 jrow++; 447 } 448 y[3*i] += sum1; 449 y[3*i+1] += sum2; 450 y[3*i+2] += sum3; 451 } 452 453 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 454 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 455 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 456 PetscFunctionReturn(0); 457 } 458 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 459 { 460 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 461 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 462 const PetscScalar *x,*v; 463 PetscScalar *y,alpha1,alpha2,alpha3; 464 PetscErrorCode ierr; 465 PetscInt n,i; 466 const PetscInt m = b->AIJ->rmap->n,*idx; 467 468 PetscFunctionBegin; 469 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 470 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 471 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 472 for (i=0; i<m; i++) { 473 idx = a->j + a->i[i]; 474 v = a->a + a->i[i]; 475 n = a->i[i+1] - a->i[i]; 476 alpha1 = x[3*i]; 477 alpha2 = x[3*i+1]; 478 alpha3 = x[3*i+2]; 479 while (n-->0) { 480 y[3*(*idx)] += alpha1*(*v); 481 y[3*(*idx)+1] += alpha2*(*v); 482 y[3*(*idx)+2] += alpha3*(*v); 483 idx++; v++; 484 } 485 } 486 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 487 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 488 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 489 PetscFunctionReturn(0); 490 } 491 492 /* ------------------------------------------------------------------------------*/ 493 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 494 { 495 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 496 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 497 const PetscScalar *x,*v; 498 PetscScalar *y,sum1, sum2, sum3, sum4; 499 PetscErrorCode ierr; 500 PetscInt nonzerorow=0,n,i,jrow,j; 501 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 502 503 PetscFunctionBegin; 504 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 505 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 506 idx = a->j; 507 v = a->a; 508 ii = a->i; 509 510 for (i=0; i<m; i++) { 511 jrow = ii[i]; 512 n = ii[i+1] - jrow; 513 sum1 = 0.0; 514 sum2 = 0.0; 515 sum3 = 0.0; 516 sum4 = 0.0; 517 nonzerorow += (n>0); 518 for (j=0; j<n; j++) { 519 sum1 += v[jrow]*x[4*idx[jrow]]; 520 sum2 += v[jrow]*x[4*idx[jrow]+1]; 521 sum3 += v[jrow]*x[4*idx[jrow]+2]; 522 sum4 += v[jrow]*x[4*idx[jrow]+3]; 523 jrow++; 524 } 525 y[4*i] = sum1; 526 y[4*i+1] = sum2; 527 y[4*i+2] = sum3; 528 y[4*i+3] = sum4; 529 } 530 531 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 532 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 533 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 534 PetscFunctionReturn(0); 535 } 536 537 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 538 { 539 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 540 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 541 const PetscScalar *x,*v; 542 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 543 PetscErrorCode ierr; 544 PetscInt n,i; 545 const PetscInt m = b->AIJ->rmap->n,*idx; 546 547 PetscFunctionBegin; 548 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 549 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 550 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 551 for (i=0; i<m; i++) { 552 idx = a->j + a->i[i]; 553 v = a->a + a->i[i]; 554 n = a->i[i+1] - a->i[i]; 555 alpha1 = x[4*i]; 556 alpha2 = x[4*i+1]; 557 alpha3 = x[4*i+2]; 558 alpha4 = x[4*i+3]; 559 while (n-->0) { 560 y[4*(*idx)] += alpha1*(*v); 561 y[4*(*idx)+1] += alpha2*(*v); 562 y[4*(*idx)+2] += alpha3*(*v); 563 y[4*(*idx)+3] += alpha4*(*v); 564 idx++; v++; 565 } 566 } 567 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 568 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 569 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 570 PetscFunctionReturn(0); 571 } 572 573 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 574 { 575 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 576 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 577 const PetscScalar *x,*v; 578 PetscScalar *y,sum1, sum2, sum3, sum4; 579 PetscErrorCode ierr; 580 PetscInt n,i,jrow,j; 581 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 582 583 PetscFunctionBegin; 584 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 585 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 586 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 587 idx = a->j; 588 v = a->a; 589 ii = a->i; 590 591 for (i=0; i<m; i++) { 592 jrow = ii[i]; 593 n = ii[i+1] - jrow; 594 sum1 = 0.0; 595 sum2 = 0.0; 596 sum3 = 0.0; 597 sum4 = 0.0; 598 for (j=0; j<n; j++) { 599 sum1 += v[jrow]*x[4*idx[jrow]]; 600 sum2 += v[jrow]*x[4*idx[jrow]+1]; 601 sum3 += v[jrow]*x[4*idx[jrow]+2]; 602 sum4 += v[jrow]*x[4*idx[jrow]+3]; 603 jrow++; 604 } 605 y[4*i] += sum1; 606 y[4*i+1] += sum2; 607 y[4*i+2] += sum3; 608 y[4*i+3] += sum4; 609 } 610 611 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 612 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 613 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 614 PetscFunctionReturn(0); 615 } 616 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 617 { 618 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 619 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 620 const PetscScalar *x,*v; 621 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 622 PetscErrorCode ierr; 623 PetscInt n,i; 624 const PetscInt m = b->AIJ->rmap->n,*idx; 625 626 PetscFunctionBegin; 627 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 628 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 629 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 630 631 for (i=0; i<m; i++) { 632 idx = a->j + a->i[i]; 633 v = a->a + a->i[i]; 634 n = a->i[i+1] - a->i[i]; 635 alpha1 = x[4*i]; 636 alpha2 = x[4*i+1]; 637 alpha3 = x[4*i+2]; 638 alpha4 = x[4*i+3]; 639 while (n-->0) { 640 y[4*(*idx)] += alpha1*(*v); 641 y[4*(*idx)+1] += alpha2*(*v); 642 y[4*(*idx)+2] += alpha3*(*v); 643 y[4*(*idx)+3] += alpha4*(*v); 644 idx++; v++; 645 } 646 } 647 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 648 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 649 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 /* ------------------------------------------------------------------------------*/ 653 654 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 655 { 656 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 657 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 658 const PetscScalar *x,*v; 659 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 660 PetscErrorCode ierr; 661 PetscInt nonzerorow=0,n,i,jrow,j; 662 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 663 664 PetscFunctionBegin; 665 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 666 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 667 idx = a->j; 668 v = a->a; 669 ii = a->i; 670 671 for (i=0; i<m; i++) { 672 jrow = ii[i]; 673 n = ii[i+1] - jrow; 674 sum1 = 0.0; 675 sum2 = 0.0; 676 sum3 = 0.0; 677 sum4 = 0.0; 678 sum5 = 0.0; 679 680 nonzerorow += (n>0); 681 for (j=0; j<n; j++) { 682 sum1 += v[jrow]*x[5*idx[jrow]]; 683 sum2 += v[jrow]*x[5*idx[jrow]+1]; 684 sum3 += v[jrow]*x[5*idx[jrow]+2]; 685 sum4 += v[jrow]*x[5*idx[jrow]+3]; 686 sum5 += v[jrow]*x[5*idx[jrow]+4]; 687 jrow++; 688 } 689 y[5*i] = sum1; 690 y[5*i+1] = sum2; 691 y[5*i+2] = sum3; 692 y[5*i+3] = sum4; 693 y[5*i+4] = sum5; 694 } 695 696 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 697 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 698 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 699 PetscFunctionReturn(0); 700 } 701 702 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 703 { 704 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 705 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 706 const PetscScalar *x,*v; 707 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 708 PetscErrorCode ierr; 709 PetscInt n,i; 710 const PetscInt m = b->AIJ->rmap->n,*idx; 711 712 PetscFunctionBegin; 713 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 714 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 715 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 716 717 for (i=0; i<m; i++) { 718 idx = a->j + a->i[i]; 719 v = a->a + a->i[i]; 720 n = a->i[i+1] - a->i[i]; 721 alpha1 = x[5*i]; 722 alpha2 = x[5*i+1]; 723 alpha3 = x[5*i+2]; 724 alpha4 = x[5*i+3]; 725 alpha5 = x[5*i+4]; 726 while (n-->0) { 727 y[5*(*idx)] += alpha1*(*v); 728 y[5*(*idx)+1] += alpha2*(*v); 729 y[5*(*idx)+2] += alpha3*(*v); 730 y[5*(*idx)+3] += alpha4*(*v); 731 y[5*(*idx)+4] += alpha5*(*v); 732 idx++; v++; 733 } 734 } 735 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 736 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 737 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 738 PetscFunctionReturn(0); 739 } 740 741 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 742 { 743 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 744 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 745 const PetscScalar *x,*v; 746 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 747 PetscErrorCode ierr; 748 PetscInt n,i,jrow,j; 749 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 750 751 PetscFunctionBegin; 752 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 753 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 754 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 755 idx = a->j; 756 v = a->a; 757 ii = a->i; 758 759 for (i=0; i<m; i++) { 760 jrow = ii[i]; 761 n = ii[i+1] - jrow; 762 sum1 = 0.0; 763 sum2 = 0.0; 764 sum3 = 0.0; 765 sum4 = 0.0; 766 sum5 = 0.0; 767 for (j=0; j<n; j++) { 768 sum1 += v[jrow]*x[5*idx[jrow]]; 769 sum2 += v[jrow]*x[5*idx[jrow]+1]; 770 sum3 += v[jrow]*x[5*idx[jrow]+2]; 771 sum4 += v[jrow]*x[5*idx[jrow]+3]; 772 sum5 += v[jrow]*x[5*idx[jrow]+4]; 773 jrow++; 774 } 775 y[5*i] += sum1; 776 y[5*i+1] += sum2; 777 y[5*i+2] += sum3; 778 y[5*i+3] += sum4; 779 y[5*i+4] += sum5; 780 } 781 782 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 783 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 784 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 785 PetscFunctionReturn(0); 786 } 787 788 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 789 { 790 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 791 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 792 const PetscScalar *x,*v; 793 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 794 PetscErrorCode ierr; 795 PetscInt n,i; 796 const PetscInt m = b->AIJ->rmap->n,*idx; 797 798 PetscFunctionBegin; 799 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 800 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 801 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 802 803 for (i=0; i<m; i++) { 804 idx = a->j + a->i[i]; 805 v = a->a + a->i[i]; 806 n = a->i[i+1] - a->i[i]; 807 alpha1 = x[5*i]; 808 alpha2 = x[5*i+1]; 809 alpha3 = x[5*i+2]; 810 alpha4 = x[5*i+3]; 811 alpha5 = x[5*i+4]; 812 while (n-->0) { 813 y[5*(*idx)] += alpha1*(*v); 814 y[5*(*idx)+1] += alpha2*(*v); 815 y[5*(*idx)+2] += alpha3*(*v); 816 y[5*(*idx)+3] += alpha4*(*v); 817 y[5*(*idx)+4] += alpha5*(*v); 818 idx++; v++; 819 } 820 } 821 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 822 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 823 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } 826 827 /* ------------------------------------------------------------------------------*/ 828 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 829 { 830 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 831 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 832 const PetscScalar *x,*v; 833 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 834 PetscErrorCode ierr; 835 PetscInt nonzerorow=0,n,i,jrow,j; 836 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 837 838 PetscFunctionBegin; 839 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 840 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 841 idx = a->j; 842 v = a->a; 843 ii = a->i; 844 845 for (i=0; i<m; i++) { 846 jrow = ii[i]; 847 n = ii[i+1] - jrow; 848 sum1 = 0.0; 849 sum2 = 0.0; 850 sum3 = 0.0; 851 sum4 = 0.0; 852 sum5 = 0.0; 853 sum6 = 0.0; 854 855 nonzerorow += (n>0); 856 for (j=0; j<n; j++) { 857 sum1 += v[jrow]*x[6*idx[jrow]]; 858 sum2 += v[jrow]*x[6*idx[jrow]+1]; 859 sum3 += v[jrow]*x[6*idx[jrow]+2]; 860 sum4 += v[jrow]*x[6*idx[jrow]+3]; 861 sum5 += v[jrow]*x[6*idx[jrow]+4]; 862 sum6 += v[jrow]*x[6*idx[jrow]+5]; 863 jrow++; 864 } 865 y[6*i] = sum1; 866 y[6*i+1] = sum2; 867 y[6*i+2] = sum3; 868 y[6*i+3] = sum4; 869 y[6*i+4] = sum5; 870 y[6*i+5] = sum6; 871 } 872 873 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 874 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 875 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } 878 879 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 880 { 881 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 882 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 883 const PetscScalar *x,*v; 884 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 885 PetscErrorCode ierr; 886 PetscInt n,i; 887 const PetscInt m = b->AIJ->rmap->n,*idx; 888 889 PetscFunctionBegin; 890 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 891 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 892 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 893 894 for (i=0; i<m; i++) { 895 idx = a->j + a->i[i]; 896 v = a->a + a->i[i]; 897 n = a->i[i+1] - a->i[i]; 898 alpha1 = x[6*i]; 899 alpha2 = x[6*i+1]; 900 alpha3 = x[6*i+2]; 901 alpha4 = x[6*i+3]; 902 alpha5 = x[6*i+4]; 903 alpha6 = x[6*i+5]; 904 while (n-->0) { 905 y[6*(*idx)] += alpha1*(*v); 906 y[6*(*idx)+1] += alpha2*(*v); 907 y[6*(*idx)+2] += alpha3*(*v); 908 y[6*(*idx)+3] += alpha4*(*v); 909 y[6*(*idx)+4] += alpha5*(*v); 910 y[6*(*idx)+5] += alpha6*(*v); 911 idx++; v++; 912 } 913 } 914 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 915 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 916 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 917 PetscFunctionReturn(0); 918 } 919 920 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 921 { 922 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 923 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 924 const PetscScalar *x,*v; 925 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 926 PetscErrorCode ierr; 927 PetscInt n,i,jrow,j; 928 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 929 930 PetscFunctionBegin; 931 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 932 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 933 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 934 idx = a->j; 935 v = a->a; 936 ii = a->i; 937 938 for (i=0; i<m; i++) { 939 jrow = ii[i]; 940 n = ii[i+1] - jrow; 941 sum1 = 0.0; 942 sum2 = 0.0; 943 sum3 = 0.0; 944 sum4 = 0.0; 945 sum5 = 0.0; 946 sum6 = 0.0; 947 for (j=0; j<n; j++) { 948 sum1 += v[jrow]*x[6*idx[jrow]]; 949 sum2 += v[jrow]*x[6*idx[jrow]+1]; 950 sum3 += v[jrow]*x[6*idx[jrow]+2]; 951 sum4 += v[jrow]*x[6*idx[jrow]+3]; 952 sum5 += v[jrow]*x[6*idx[jrow]+4]; 953 sum6 += v[jrow]*x[6*idx[jrow]+5]; 954 jrow++; 955 } 956 y[6*i] += sum1; 957 y[6*i+1] += sum2; 958 y[6*i+2] += sum3; 959 y[6*i+3] += sum4; 960 y[6*i+4] += sum5; 961 y[6*i+5] += sum6; 962 } 963 964 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 965 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 966 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 971 { 972 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 973 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 974 const PetscScalar *x,*v; 975 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 976 PetscErrorCode ierr; 977 PetscInt n,i; 978 const PetscInt m = b->AIJ->rmap->n,*idx; 979 980 PetscFunctionBegin; 981 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 982 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 983 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 984 985 for (i=0; i<m; i++) { 986 idx = a->j + a->i[i]; 987 v = a->a + a->i[i]; 988 n = a->i[i+1] - a->i[i]; 989 alpha1 = x[6*i]; 990 alpha2 = x[6*i+1]; 991 alpha3 = x[6*i+2]; 992 alpha4 = x[6*i+3]; 993 alpha5 = x[6*i+4]; 994 alpha6 = x[6*i+5]; 995 while (n-->0) { 996 y[6*(*idx)] += alpha1*(*v); 997 y[6*(*idx)+1] += alpha2*(*v); 998 y[6*(*idx)+2] += alpha3*(*v); 999 y[6*(*idx)+3] += alpha4*(*v); 1000 y[6*(*idx)+4] += alpha5*(*v); 1001 y[6*(*idx)+5] += alpha6*(*v); 1002 idx++; v++; 1003 } 1004 } 1005 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1006 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1007 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 /* ------------------------------------------------------------------------------*/ 1012 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1013 { 1014 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1015 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1016 const PetscScalar *x,*v; 1017 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1018 PetscErrorCode ierr; 1019 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1020 PetscInt nonzerorow=0,n,i,jrow,j; 1021 1022 PetscFunctionBegin; 1023 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1024 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1025 idx = a->j; 1026 v = a->a; 1027 ii = a->i; 1028 1029 for (i=0; i<m; i++) { 1030 jrow = ii[i]; 1031 n = ii[i+1] - jrow; 1032 sum1 = 0.0; 1033 sum2 = 0.0; 1034 sum3 = 0.0; 1035 sum4 = 0.0; 1036 sum5 = 0.0; 1037 sum6 = 0.0; 1038 sum7 = 0.0; 1039 1040 nonzerorow += (n>0); 1041 for (j=0; j<n; j++) { 1042 sum1 += v[jrow]*x[7*idx[jrow]]; 1043 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1044 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1045 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1046 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1047 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1048 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1049 jrow++; 1050 } 1051 y[7*i] = sum1; 1052 y[7*i+1] = sum2; 1053 y[7*i+2] = sum3; 1054 y[7*i+3] = sum4; 1055 y[7*i+4] = sum5; 1056 y[7*i+5] = sum6; 1057 y[7*i+6] = sum7; 1058 } 1059 1060 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1061 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1062 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1067 { 1068 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1069 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1070 const PetscScalar *x,*v; 1071 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1072 PetscErrorCode ierr; 1073 const PetscInt m = b->AIJ->rmap->n,*idx; 1074 PetscInt n,i; 1075 1076 PetscFunctionBegin; 1077 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1078 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1079 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1080 1081 for (i=0; i<m; i++) { 1082 idx = a->j + a->i[i]; 1083 v = a->a + a->i[i]; 1084 n = a->i[i+1] - a->i[i]; 1085 alpha1 = x[7*i]; 1086 alpha2 = x[7*i+1]; 1087 alpha3 = x[7*i+2]; 1088 alpha4 = x[7*i+3]; 1089 alpha5 = x[7*i+4]; 1090 alpha6 = x[7*i+5]; 1091 alpha7 = x[7*i+6]; 1092 while (n-->0) { 1093 y[7*(*idx)] += alpha1*(*v); 1094 y[7*(*idx)+1] += alpha2*(*v); 1095 y[7*(*idx)+2] += alpha3*(*v); 1096 y[7*(*idx)+3] += alpha4*(*v); 1097 y[7*(*idx)+4] += alpha5*(*v); 1098 y[7*(*idx)+5] += alpha6*(*v); 1099 y[7*(*idx)+6] += alpha7*(*v); 1100 idx++; v++; 1101 } 1102 } 1103 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1104 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1105 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1110 { 1111 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1112 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1113 const PetscScalar *x,*v; 1114 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1115 PetscErrorCode ierr; 1116 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1117 PetscInt n,i,jrow,j; 1118 1119 PetscFunctionBegin; 1120 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1121 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1122 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1123 idx = a->j; 1124 v = a->a; 1125 ii = a->i; 1126 1127 for (i=0; i<m; i++) { 1128 jrow = ii[i]; 1129 n = ii[i+1] - jrow; 1130 sum1 = 0.0; 1131 sum2 = 0.0; 1132 sum3 = 0.0; 1133 sum4 = 0.0; 1134 sum5 = 0.0; 1135 sum6 = 0.0; 1136 sum7 = 0.0; 1137 for (j=0; j<n; j++) { 1138 sum1 += v[jrow]*x[7*idx[jrow]]; 1139 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1140 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1141 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1142 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1143 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1144 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1145 jrow++; 1146 } 1147 y[7*i] += sum1; 1148 y[7*i+1] += sum2; 1149 y[7*i+2] += sum3; 1150 y[7*i+3] += sum4; 1151 y[7*i+4] += sum5; 1152 y[7*i+5] += sum6; 1153 y[7*i+6] += sum7; 1154 } 1155 1156 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1157 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1158 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1163 { 1164 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1165 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1166 const PetscScalar *x,*v; 1167 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1168 PetscErrorCode ierr; 1169 const PetscInt m = b->AIJ->rmap->n,*idx; 1170 PetscInt n,i; 1171 1172 PetscFunctionBegin; 1173 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1174 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1175 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1176 for (i=0; i<m; i++) { 1177 idx = a->j + a->i[i]; 1178 v = a->a + a->i[i]; 1179 n = a->i[i+1] - a->i[i]; 1180 alpha1 = x[7*i]; 1181 alpha2 = x[7*i+1]; 1182 alpha3 = x[7*i+2]; 1183 alpha4 = x[7*i+3]; 1184 alpha5 = x[7*i+4]; 1185 alpha6 = x[7*i+5]; 1186 alpha7 = x[7*i+6]; 1187 while (n-->0) { 1188 y[7*(*idx)] += alpha1*(*v); 1189 y[7*(*idx)+1] += alpha2*(*v); 1190 y[7*(*idx)+2] += alpha3*(*v); 1191 y[7*(*idx)+3] += alpha4*(*v); 1192 y[7*(*idx)+4] += alpha5*(*v); 1193 y[7*(*idx)+5] += alpha6*(*v); 1194 y[7*(*idx)+6] += alpha7*(*v); 1195 idx++; v++; 1196 } 1197 } 1198 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1199 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1200 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1201 PetscFunctionReturn(0); 1202 } 1203 1204 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1205 { 1206 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1207 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1208 const PetscScalar *x,*v; 1209 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1210 PetscErrorCode ierr; 1211 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1212 PetscInt nonzerorow=0,n,i,jrow,j; 1213 1214 PetscFunctionBegin; 1215 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1216 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1217 idx = a->j; 1218 v = a->a; 1219 ii = a->i; 1220 1221 for (i=0; i<m; i++) { 1222 jrow = ii[i]; 1223 n = ii[i+1] - jrow; 1224 sum1 = 0.0; 1225 sum2 = 0.0; 1226 sum3 = 0.0; 1227 sum4 = 0.0; 1228 sum5 = 0.0; 1229 sum6 = 0.0; 1230 sum7 = 0.0; 1231 sum8 = 0.0; 1232 1233 nonzerorow += (n>0); 1234 for (j=0; j<n; j++) { 1235 sum1 += v[jrow]*x[8*idx[jrow]]; 1236 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1237 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1238 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1239 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1240 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1241 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1242 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1243 jrow++; 1244 } 1245 y[8*i] = sum1; 1246 y[8*i+1] = sum2; 1247 y[8*i+2] = sum3; 1248 y[8*i+3] = sum4; 1249 y[8*i+4] = sum5; 1250 y[8*i+5] = sum6; 1251 y[8*i+6] = sum7; 1252 y[8*i+7] = sum8; 1253 } 1254 1255 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1256 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1257 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1262 { 1263 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1264 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1265 const PetscScalar *x,*v; 1266 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1267 PetscErrorCode ierr; 1268 const PetscInt m = b->AIJ->rmap->n,*idx; 1269 PetscInt n,i; 1270 1271 PetscFunctionBegin; 1272 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1273 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1274 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1275 1276 for (i=0; i<m; i++) { 1277 idx = a->j + a->i[i]; 1278 v = a->a + a->i[i]; 1279 n = a->i[i+1] - a->i[i]; 1280 alpha1 = x[8*i]; 1281 alpha2 = x[8*i+1]; 1282 alpha3 = x[8*i+2]; 1283 alpha4 = x[8*i+3]; 1284 alpha5 = x[8*i+4]; 1285 alpha6 = x[8*i+5]; 1286 alpha7 = x[8*i+6]; 1287 alpha8 = x[8*i+7]; 1288 while (n-->0) { 1289 y[8*(*idx)] += alpha1*(*v); 1290 y[8*(*idx)+1] += alpha2*(*v); 1291 y[8*(*idx)+2] += alpha3*(*v); 1292 y[8*(*idx)+3] += alpha4*(*v); 1293 y[8*(*idx)+4] += alpha5*(*v); 1294 y[8*(*idx)+5] += alpha6*(*v); 1295 y[8*(*idx)+6] += alpha7*(*v); 1296 y[8*(*idx)+7] += alpha8*(*v); 1297 idx++; v++; 1298 } 1299 } 1300 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1301 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1302 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1307 { 1308 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1309 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1310 const PetscScalar *x,*v; 1311 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1312 PetscErrorCode ierr; 1313 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1314 PetscInt n,i,jrow,j; 1315 1316 PetscFunctionBegin; 1317 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1318 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1319 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1320 idx = a->j; 1321 v = a->a; 1322 ii = a->i; 1323 1324 for (i=0; i<m; i++) { 1325 jrow = ii[i]; 1326 n = ii[i+1] - jrow; 1327 sum1 = 0.0; 1328 sum2 = 0.0; 1329 sum3 = 0.0; 1330 sum4 = 0.0; 1331 sum5 = 0.0; 1332 sum6 = 0.0; 1333 sum7 = 0.0; 1334 sum8 = 0.0; 1335 for (j=0; j<n; j++) { 1336 sum1 += v[jrow]*x[8*idx[jrow]]; 1337 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1338 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1339 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1340 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1341 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1342 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1343 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1344 jrow++; 1345 } 1346 y[8*i] += sum1; 1347 y[8*i+1] += sum2; 1348 y[8*i+2] += sum3; 1349 y[8*i+3] += sum4; 1350 y[8*i+4] += sum5; 1351 y[8*i+5] += sum6; 1352 y[8*i+6] += sum7; 1353 y[8*i+7] += sum8; 1354 } 1355 1356 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1357 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1358 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1359 PetscFunctionReturn(0); 1360 } 1361 1362 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1363 { 1364 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1365 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1366 const PetscScalar *x,*v; 1367 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1368 PetscErrorCode ierr; 1369 const PetscInt m = b->AIJ->rmap->n,*idx; 1370 PetscInt n,i; 1371 1372 PetscFunctionBegin; 1373 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1374 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1375 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1376 for (i=0; i<m; i++) { 1377 idx = a->j + a->i[i]; 1378 v = a->a + a->i[i]; 1379 n = a->i[i+1] - a->i[i]; 1380 alpha1 = x[8*i]; 1381 alpha2 = x[8*i+1]; 1382 alpha3 = x[8*i+2]; 1383 alpha4 = x[8*i+3]; 1384 alpha5 = x[8*i+4]; 1385 alpha6 = x[8*i+5]; 1386 alpha7 = x[8*i+6]; 1387 alpha8 = x[8*i+7]; 1388 while (n-->0) { 1389 y[8*(*idx)] += alpha1*(*v); 1390 y[8*(*idx)+1] += alpha2*(*v); 1391 y[8*(*idx)+2] += alpha3*(*v); 1392 y[8*(*idx)+3] += alpha4*(*v); 1393 y[8*(*idx)+4] += alpha5*(*v); 1394 y[8*(*idx)+5] += alpha6*(*v); 1395 y[8*(*idx)+6] += alpha7*(*v); 1396 y[8*(*idx)+7] += alpha8*(*v); 1397 idx++; v++; 1398 } 1399 } 1400 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1401 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1402 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 /* ------------------------------------------------------------------------------*/ 1407 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1408 { 1409 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1410 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1411 const PetscScalar *x,*v; 1412 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1413 PetscErrorCode ierr; 1414 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1415 PetscInt nonzerorow=0,n,i,jrow,j; 1416 1417 PetscFunctionBegin; 1418 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1419 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1420 idx = a->j; 1421 v = a->a; 1422 ii = a->i; 1423 1424 for (i=0; i<m; i++) { 1425 jrow = ii[i]; 1426 n = ii[i+1] - jrow; 1427 sum1 = 0.0; 1428 sum2 = 0.0; 1429 sum3 = 0.0; 1430 sum4 = 0.0; 1431 sum5 = 0.0; 1432 sum6 = 0.0; 1433 sum7 = 0.0; 1434 sum8 = 0.0; 1435 sum9 = 0.0; 1436 1437 nonzerorow += (n>0); 1438 for (j=0; j<n; j++) { 1439 sum1 += v[jrow]*x[9*idx[jrow]]; 1440 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1441 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1442 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1443 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1444 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1445 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1446 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1447 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1448 jrow++; 1449 } 1450 y[9*i] = sum1; 1451 y[9*i+1] = sum2; 1452 y[9*i+2] = sum3; 1453 y[9*i+3] = sum4; 1454 y[9*i+4] = sum5; 1455 y[9*i+5] = sum6; 1456 y[9*i+6] = sum7; 1457 y[9*i+7] = sum8; 1458 y[9*i+8] = sum9; 1459 } 1460 1461 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1462 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1463 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } 1466 1467 /* ------------------------------------------------------------------------------*/ 1468 1469 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1470 { 1471 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1472 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1473 const PetscScalar *x,*v; 1474 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1475 PetscErrorCode ierr; 1476 const PetscInt m = b->AIJ->rmap->n,*idx; 1477 PetscInt n,i; 1478 1479 PetscFunctionBegin; 1480 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1481 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1482 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1483 1484 for (i=0; i<m; i++) { 1485 idx = a->j + a->i[i]; 1486 v = a->a + a->i[i]; 1487 n = a->i[i+1] - a->i[i]; 1488 alpha1 = x[9*i]; 1489 alpha2 = x[9*i+1]; 1490 alpha3 = x[9*i+2]; 1491 alpha4 = x[9*i+3]; 1492 alpha5 = x[9*i+4]; 1493 alpha6 = x[9*i+5]; 1494 alpha7 = x[9*i+6]; 1495 alpha8 = x[9*i+7]; 1496 alpha9 = x[9*i+8]; 1497 while (n-->0) { 1498 y[9*(*idx)] += alpha1*(*v); 1499 y[9*(*idx)+1] += alpha2*(*v); 1500 y[9*(*idx)+2] += alpha3*(*v); 1501 y[9*(*idx)+3] += alpha4*(*v); 1502 y[9*(*idx)+4] += alpha5*(*v); 1503 y[9*(*idx)+5] += alpha6*(*v); 1504 y[9*(*idx)+6] += alpha7*(*v); 1505 y[9*(*idx)+7] += alpha8*(*v); 1506 y[9*(*idx)+8] += alpha9*(*v); 1507 idx++; v++; 1508 } 1509 } 1510 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1511 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1512 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1517 { 1518 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1519 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1520 const PetscScalar *x,*v; 1521 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1522 PetscErrorCode ierr; 1523 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1524 PetscInt n,i,jrow,j; 1525 1526 PetscFunctionBegin; 1527 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1528 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1529 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1530 idx = a->j; 1531 v = a->a; 1532 ii = a->i; 1533 1534 for (i=0; i<m; i++) { 1535 jrow = ii[i]; 1536 n = ii[i+1] - jrow; 1537 sum1 = 0.0; 1538 sum2 = 0.0; 1539 sum3 = 0.0; 1540 sum4 = 0.0; 1541 sum5 = 0.0; 1542 sum6 = 0.0; 1543 sum7 = 0.0; 1544 sum8 = 0.0; 1545 sum9 = 0.0; 1546 for (j=0; j<n; j++) { 1547 sum1 += v[jrow]*x[9*idx[jrow]]; 1548 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1549 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1550 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1551 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1552 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1553 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1554 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1555 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1556 jrow++; 1557 } 1558 y[9*i] += sum1; 1559 y[9*i+1] += sum2; 1560 y[9*i+2] += sum3; 1561 y[9*i+3] += sum4; 1562 y[9*i+4] += sum5; 1563 y[9*i+5] += sum6; 1564 y[9*i+6] += sum7; 1565 y[9*i+7] += sum8; 1566 y[9*i+8] += sum9; 1567 } 1568 1569 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1570 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1571 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1572 PetscFunctionReturn(0); 1573 } 1574 1575 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1576 { 1577 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1578 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1579 const PetscScalar *x,*v; 1580 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1581 PetscErrorCode ierr; 1582 const PetscInt m = b->AIJ->rmap->n,*idx; 1583 PetscInt n,i; 1584 1585 PetscFunctionBegin; 1586 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1587 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1588 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1589 for (i=0; i<m; i++) { 1590 idx = a->j + a->i[i]; 1591 v = a->a + a->i[i]; 1592 n = a->i[i+1] - a->i[i]; 1593 alpha1 = x[9*i]; 1594 alpha2 = x[9*i+1]; 1595 alpha3 = x[9*i+2]; 1596 alpha4 = x[9*i+3]; 1597 alpha5 = x[9*i+4]; 1598 alpha6 = x[9*i+5]; 1599 alpha7 = x[9*i+6]; 1600 alpha8 = x[9*i+7]; 1601 alpha9 = x[9*i+8]; 1602 while (n-->0) { 1603 y[9*(*idx)] += alpha1*(*v); 1604 y[9*(*idx)+1] += alpha2*(*v); 1605 y[9*(*idx)+2] += alpha3*(*v); 1606 y[9*(*idx)+3] += alpha4*(*v); 1607 y[9*(*idx)+4] += alpha5*(*v); 1608 y[9*(*idx)+5] += alpha6*(*v); 1609 y[9*(*idx)+6] += alpha7*(*v); 1610 y[9*(*idx)+7] += alpha8*(*v); 1611 y[9*(*idx)+8] += alpha9*(*v); 1612 idx++; v++; 1613 } 1614 } 1615 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1616 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1617 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1618 PetscFunctionReturn(0); 1619 } 1620 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1621 { 1622 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1623 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1624 const PetscScalar *x,*v; 1625 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1626 PetscErrorCode ierr; 1627 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1628 PetscInt nonzerorow=0,n,i,jrow,j; 1629 1630 PetscFunctionBegin; 1631 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1632 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1633 idx = a->j; 1634 v = a->a; 1635 ii = a->i; 1636 1637 for (i=0; i<m; i++) { 1638 jrow = ii[i]; 1639 n = ii[i+1] - jrow; 1640 sum1 = 0.0; 1641 sum2 = 0.0; 1642 sum3 = 0.0; 1643 sum4 = 0.0; 1644 sum5 = 0.0; 1645 sum6 = 0.0; 1646 sum7 = 0.0; 1647 sum8 = 0.0; 1648 sum9 = 0.0; 1649 sum10 = 0.0; 1650 1651 nonzerorow += (n>0); 1652 for (j=0; j<n; j++) { 1653 sum1 += v[jrow]*x[10*idx[jrow]]; 1654 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1655 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1656 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1657 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1658 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1659 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1660 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1661 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1662 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1663 jrow++; 1664 } 1665 y[10*i] = sum1; 1666 y[10*i+1] = sum2; 1667 y[10*i+2] = sum3; 1668 y[10*i+3] = sum4; 1669 y[10*i+4] = sum5; 1670 y[10*i+5] = sum6; 1671 y[10*i+6] = sum7; 1672 y[10*i+7] = sum8; 1673 y[10*i+8] = sum9; 1674 y[10*i+9] = sum10; 1675 } 1676 1677 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1678 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1679 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1680 PetscFunctionReturn(0); 1681 } 1682 1683 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1684 { 1685 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1686 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1687 const PetscScalar *x,*v; 1688 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1689 PetscErrorCode ierr; 1690 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1691 PetscInt n,i,jrow,j; 1692 1693 PetscFunctionBegin; 1694 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1695 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1696 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1697 idx = a->j; 1698 v = a->a; 1699 ii = a->i; 1700 1701 for (i=0; i<m; i++) { 1702 jrow = ii[i]; 1703 n = ii[i+1] - jrow; 1704 sum1 = 0.0; 1705 sum2 = 0.0; 1706 sum3 = 0.0; 1707 sum4 = 0.0; 1708 sum5 = 0.0; 1709 sum6 = 0.0; 1710 sum7 = 0.0; 1711 sum8 = 0.0; 1712 sum9 = 0.0; 1713 sum10 = 0.0; 1714 for (j=0; j<n; j++) { 1715 sum1 += v[jrow]*x[10*idx[jrow]]; 1716 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1717 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1718 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1719 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1720 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1721 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1722 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1723 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1724 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1725 jrow++; 1726 } 1727 y[10*i] += sum1; 1728 y[10*i+1] += sum2; 1729 y[10*i+2] += sum3; 1730 y[10*i+3] += sum4; 1731 y[10*i+4] += sum5; 1732 y[10*i+5] += sum6; 1733 y[10*i+6] += sum7; 1734 y[10*i+7] += sum8; 1735 y[10*i+8] += sum9; 1736 y[10*i+9] += sum10; 1737 } 1738 1739 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1740 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1741 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1746 { 1747 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1748 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1749 const PetscScalar *x,*v; 1750 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1751 PetscErrorCode ierr; 1752 const PetscInt m = b->AIJ->rmap->n,*idx; 1753 PetscInt n,i; 1754 1755 PetscFunctionBegin; 1756 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1757 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1758 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1759 1760 for (i=0; i<m; i++) { 1761 idx = a->j + a->i[i]; 1762 v = a->a + a->i[i]; 1763 n = a->i[i+1] - a->i[i]; 1764 alpha1 = x[10*i]; 1765 alpha2 = x[10*i+1]; 1766 alpha3 = x[10*i+2]; 1767 alpha4 = x[10*i+3]; 1768 alpha5 = x[10*i+4]; 1769 alpha6 = x[10*i+5]; 1770 alpha7 = x[10*i+6]; 1771 alpha8 = x[10*i+7]; 1772 alpha9 = x[10*i+8]; 1773 alpha10 = x[10*i+9]; 1774 while (n-->0) { 1775 y[10*(*idx)] += alpha1*(*v); 1776 y[10*(*idx)+1] += alpha2*(*v); 1777 y[10*(*idx)+2] += alpha3*(*v); 1778 y[10*(*idx)+3] += alpha4*(*v); 1779 y[10*(*idx)+4] += alpha5*(*v); 1780 y[10*(*idx)+5] += alpha6*(*v); 1781 y[10*(*idx)+6] += alpha7*(*v); 1782 y[10*(*idx)+7] += alpha8*(*v); 1783 y[10*(*idx)+8] += alpha9*(*v); 1784 y[10*(*idx)+9] += alpha10*(*v); 1785 idx++; v++; 1786 } 1787 } 1788 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1789 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1790 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1791 PetscFunctionReturn(0); 1792 } 1793 1794 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1795 { 1796 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1797 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1798 const PetscScalar *x,*v; 1799 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1800 PetscErrorCode ierr; 1801 const PetscInt m = b->AIJ->rmap->n,*idx; 1802 PetscInt n,i; 1803 1804 PetscFunctionBegin; 1805 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1806 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1807 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1808 for (i=0; i<m; i++) { 1809 idx = a->j + a->i[i]; 1810 v = a->a + a->i[i]; 1811 n = a->i[i+1] - a->i[i]; 1812 alpha1 = x[10*i]; 1813 alpha2 = x[10*i+1]; 1814 alpha3 = x[10*i+2]; 1815 alpha4 = x[10*i+3]; 1816 alpha5 = x[10*i+4]; 1817 alpha6 = x[10*i+5]; 1818 alpha7 = x[10*i+6]; 1819 alpha8 = x[10*i+7]; 1820 alpha9 = x[10*i+8]; 1821 alpha10 = x[10*i+9]; 1822 while (n-->0) { 1823 y[10*(*idx)] += alpha1*(*v); 1824 y[10*(*idx)+1] += alpha2*(*v); 1825 y[10*(*idx)+2] += alpha3*(*v); 1826 y[10*(*idx)+3] += alpha4*(*v); 1827 y[10*(*idx)+4] += alpha5*(*v); 1828 y[10*(*idx)+5] += alpha6*(*v); 1829 y[10*(*idx)+6] += alpha7*(*v); 1830 y[10*(*idx)+7] += alpha8*(*v); 1831 y[10*(*idx)+8] += alpha9*(*v); 1832 y[10*(*idx)+9] += alpha10*(*v); 1833 idx++; v++; 1834 } 1835 } 1836 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1837 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1838 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1839 PetscFunctionReturn(0); 1840 } 1841 1842 1843 /*--------------------------------------------------------------------------------------------*/ 1844 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1845 { 1846 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1847 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1848 const PetscScalar *x,*v; 1849 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1850 PetscErrorCode ierr; 1851 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1852 PetscInt nonzerorow=0,n,i,jrow,j; 1853 1854 PetscFunctionBegin; 1855 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1856 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1857 idx = a->j; 1858 v = a->a; 1859 ii = a->i; 1860 1861 for (i=0; i<m; i++) { 1862 jrow = ii[i]; 1863 n = ii[i+1] - jrow; 1864 sum1 = 0.0; 1865 sum2 = 0.0; 1866 sum3 = 0.0; 1867 sum4 = 0.0; 1868 sum5 = 0.0; 1869 sum6 = 0.0; 1870 sum7 = 0.0; 1871 sum8 = 0.0; 1872 sum9 = 0.0; 1873 sum10 = 0.0; 1874 sum11 = 0.0; 1875 1876 nonzerorow += (n>0); 1877 for (j=0; j<n; j++) { 1878 sum1 += v[jrow]*x[11*idx[jrow]]; 1879 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1880 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1881 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1882 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1883 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1884 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1885 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1886 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1887 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1888 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1889 jrow++; 1890 } 1891 y[11*i] = sum1; 1892 y[11*i+1] = sum2; 1893 y[11*i+2] = sum3; 1894 y[11*i+3] = sum4; 1895 y[11*i+4] = sum5; 1896 y[11*i+5] = sum6; 1897 y[11*i+6] = sum7; 1898 y[11*i+7] = sum8; 1899 y[11*i+8] = sum9; 1900 y[11*i+9] = sum10; 1901 y[11*i+10] = sum11; 1902 } 1903 1904 ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1905 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1906 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1911 { 1912 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1913 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1914 const PetscScalar *x,*v; 1915 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1916 PetscErrorCode ierr; 1917 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1918 PetscInt n,i,jrow,j; 1919 1920 PetscFunctionBegin; 1921 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1922 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1923 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1924 idx = a->j; 1925 v = a->a; 1926 ii = a->i; 1927 1928 for (i=0; i<m; i++) { 1929 jrow = ii[i]; 1930 n = ii[i+1] - jrow; 1931 sum1 = 0.0; 1932 sum2 = 0.0; 1933 sum3 = 0.0; 1934 sum4 = 0.0; 1935 sum5 = 0.0; 1936 sum6 = 0.0; 1937 sum7 = 0.0; 1938 sum8 = 0.0; 1939 sum9 = 0.0; 1940 sum10 = 0.0; 1941 sum11 = 0.0; 1942 for (j=0; j<n; j++) { 1943 sum1 += v[jrow]*x[11*idx[jrow]]; 1944 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1945 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1946 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1947 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1948 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1949 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1950 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1951 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1952 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1953 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1954 jrow++; 1955 } 1956 y[11*i] += sum1; 1957 y[11*i+1] += sum2; 1958 y[11*i+2] += sum3; 1959 y[11*i+3] += sum4; 1960 y[11*i+4] += sum5; 1961 y[11*i+5] += sum6; 1962 y[11*i+6] += sum7; 1963 y[11*i+7] += sum8; 1964 y[11*i+8] += sum9; 1965 y[11*i+9] += sum10; 1966 y[11*i+10] += sum11; 1967 } 1968 1969 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 1970 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1971 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1972 PetscFunctionReturn(0); 1973 } 1974 1975 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1976 { 1977 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1978 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1979 const PetscScalar *x,*v; 1980 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1981 PetscErrorCode ierr; 1982 const PetscInt m = b->AIJ->rmap->n,*idx; 1983 PetscInt n,i; 1984 1985 PetscFunctionBegin; 1986 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1987 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1988 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1989 1990 for (i=0; i<m; i++) { 1991 idx = a->j + a->i[i]; 1992 v = a->a + a->i[i]; 1993 n = a->i[i+1] - a->i[i]; 1994 alpha1 = x[11*i]; 1995 alpha2 = x[11*i+1]; 1996 alpha3 = x[11*i+2]; 1997 alpha4 = x[11*i+3]; 1998 alpha5 = x[11*i+4]; 1999 alpha6 = x[11*i+5]; 2000 alpha7 = x[11*i+6]; 2001 alpha8 = x[11*i+7]; 2002 alpha9 = x[11*i+8]; 2003 alpha10 = x[11*i+9]; 2004 alpha11 = x[11*i+10]; 2005 while (n-->0) { 2006 y[11*(*idx)] += alpha1*(*v); 2007 y[11*(*idx)+1] += alpha2*(*v); 2008 y[11*(*idx)+2] += alpha3*(*v); 2009 y[11*(*idx)+3] += alpha4*(*v); 2010 y[11*(*idx)+4] += alpha5*(*v); 2011 y[11*(*idx)+5] += alpha6*(*v); 2012 y[11*(*idx)+6] += alpha7*(*v); 2013 y[11*(*idx)+7] += alpha8*(*v); 2014 y[11*(*idx)+8] += alpha9*(*v); 2015 y[11*(*idx)+9] += alpha10*(*v); 2016 y[11*(*idx)+10] += alpha11*(*v); 2017 idx++; v++; 2018 } 2019 } 2020 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2021 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2022 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2023 PetscFunctionReturn(0); 2024 } 2025 2026 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2027 { 2028 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2029 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2030 const PetscScalar *x,*v; 2031 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2032 PetscErrorCode ierr; 2033 const PetscInt m = b->AIJ->rmap->n,*idx; 2034 PetscInt n,i; 2035 2036 PetscFunctionBegin; 2037 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2038 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2039 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2040 for (i=0; i<m; i++) { 2041 idx = a->j + a->i[i]; 2042 v = a->a + a->i[i]; 2043 n = a->i[i+1] - a->i[i]; 2044 alpha1 = x[11*i]; 2045 alpha2 = x[11*i+1]; 2046 alpha3 = x[11*i+2]; 2047 alpha4 = x[11*i+3]; 2048 alpha5 = x[11*i+4]; 2049 alpha6 = x[11*i+5]; 2050 alpha7 = x[11*i+6]; 2051 alpha8 = x[11*i+7]; 2052 alpha9 = x[11*i+8]; 2053 alpha10 = x[11*i+9]; 2054 alpha11 = x[11*i+10]; 2055 while (n-->0) { 2056 y[11*(*idx)] += alpha1*(*v); 2057 y[11*(*idx)+1] += alpha2*(*v); 2058 y[11*(*idx)+2] += alpha3*(*v); 2059 y[11*(*idx)+3] += alpha4*(*v); 2060 y[11*(*idx)+4] += alpha5*(*v); 2061 y[11*(*idx)+5] += alpha6*(*v); 2062 y[11*(*idx)+6] += alpha7*(*v); 2063 y[11*(*idx)+7] += alpha8*(*v); 2064 y[11*(*idx)+8] += alpha9*(*v); 2065 y[11*(*idx)+9] += alpha10*(*v); 2066 y[11*(*idx)+10] += alpha11*(*v); 2067 idx++; v++; 2068 } 2069 } 2070 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2071 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2072 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2073 PetscFunctionReturn(0); 2074 } 2075 2076 2077 /*--------------------------------------------------------------------------------------------*/ 2078 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2079 { 2080 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2081 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2082 const PetscScalar *x,*v; 2083 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2084 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2085 PetscErrorCode ierr; 2086 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2087 PetscInt nonzerorow=0,n,i,jrow,j; 2088 2089 PetscFunctionBegin; 2090 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2091 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2092 idx = a->j; 2093 v = a->a; 2094 ii = a->i; 2095 2096 for (i=0; i<m; i++) { 2097 jrow = ii[i]; 2098 n = ii[i+1] - jrow; 2099 sum1 = 0.0; 2100 sum2 = 0.0; 2101 sum3 = 0.0; 2102 sum4 = 0.0; 2103 sum5 = 0.0; 2104 sum6 = 0.0; 2105 sum7 = 0.0; 2106 sum8 = 0.0; 2107 sum9 = 0.0; 2108 sum10 = 0.0; 2109 sum11 = 0.0; 2110 sum12 = 0.0; 2111 sum13 = 0.0; 2112 sum14 = 0.0; 2113 sum15 = 0.0; 2114 sum16 = 0.0; 2115 2116 nonzerorow += (n>0); 2117 for (j=0; j<n; j++) { 2118 sum1 += v[jrow]*x[16*idx[jrow]]; 2119 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2120 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2121 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2122 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2123 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2124 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2125 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2126 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2127 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2128 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2129 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2130 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2131 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2132 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2133 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2134 jrow++; 2135 } 2136 y[16*i] = sum1; 2137 y[16*i+1] = sum2; 2138 y[16*i+2] = sum3; 2139 y[16*i+3] = sum4; 2140 y[16*i+4] = sum5; 2141 y[16*i+5] = sum6; 2142 y[16*i+6] = sum7; 2143 y[16*i+7] = sum8; 2144 y[16*i+8] = sum9; 2145 y[16*i+9] = sum10; 2146 y[16*i+10] = sum11; 2147 y[16*i+11] = sum12; 2148 y[16*i+12] = sum13; 2149 y[16*i+13] = sum14; 2150 y[16*i+14] = sum15; 2151 y[16*i+15] = sum16; 2152 } 2153 2154 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2155 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2156 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2161 { 2162 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2163 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2164 const PetscScalar *x,*v; 2165 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2166 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2167 PetscErrorCode ierr; 2168 const PetscInt m = b->AIJ->rmap->n,*idx; 2169 PetscInt n,i; 2170 2171 PetscFunctionBegin; 2172 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2173 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2174 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2175 2176 for (i=0; i<m; i++) { 2177 idx = a->j + a->i[i]; 2178 v = a->a + a->i[i]; 2179 n = a->i[i+1] - a->i[i]; 2180 alpha1 = x[16*i]; 2181 alpha2 = x[16*i+1]; 2182 alpha3 = x[16*i+2]; 2183 alpha4 = x[16*i+3]; 2184 alpha5 = x[16*i+4]; 2185 alpha6 = x[16*i+5]; 2186 alpha7 = x[16*i+6]; 2187 alpha8 = x[16*i+7]; 2188 alpha9 = x[16*i+8]; 2189 alpha10 = x[16*i+9]; 2190 alpha11 = x[16*i+10]; 2191 alpha12 = x[16*i+11]; 2192 alpha13 = x[16*i+12]; 2193 alpha14 = x[16*i+13]; 2194 alpha15 = x[16*i+14]; 2195 alpha16 = x[16*i+15]; 2196 while (n-->0) { 2197 y[16*(*idx)] += alpha1*(*v); 2198 y[16*(*idx)+1] += alpha2*(*v); 2199 y[16*(*idx)+2] += alpha3*(*v); 2200 y[16*(*idx)+3] += alpha4*(*v); 2201 y[16*(*idx)+4] += alpha5*(*v); 2202 y[16*(*idx)+5] += alpha6*(*v); 2203 y[16*(*idx)+6] += alpha7*(*v); 2204 y[16*(*idx)+7] += alpha8*(*v); 2205 y[16*(*idx)+8] += alpha9*(*v); 2206 y[16*(*idx)+9] += alpha10*(*v); 2207 y[16*(*idx)+10] += alpha11*(*v); 2208 y[16*(*idx)+11] += alpha12*(*v); 2209 y[16*(*idx)+12] += alpha13*(*v); 2210 y[16*(*idx)+13] += alpha14*(*v); 2211 y[16*(*idx)+14] += alpha15*(*v); 2212 y[16*(*idx)+15] += alpha16*(*v); 2213 idx++; v++; 2214 } 2215 } 2216 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2217 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2218 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2219 PetscFunctionReturn(0); 2220 } 2221 2222 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2223 { 2224 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2225 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2226 const PetscScalar *x,*v; 2227 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2228 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2229 PetscErrorCode ierr; 2230 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2231 PetscInt n,i,jrow,j; 2232 2233 PetscFunctionBegin; 2234 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2235 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2236 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2237 idx = a->j; 2238 v = a->a; 2239 ii = a->i; 2240 2241 for (i=0; i<m; i++) { 2242 jrow = ii[i]; 2243 n = ii[i+1] - jrow; 2244 sum1 = 0.0; 2245 sum2 = 0.0; 2246 sum3 = 0.0; 2247 sum4 = 0.0; 2248 sum5 = 0.0; 2249 sum6 = 0.0; 2250 sum7 = 0.0; 2251 sum8 = 0.0; 2252 sum9 = 0.0; 2253 sum10 = 0.0; 2254 sum11 = 0.0; 2255 sum12 = 0.0; 2256 sum13 = 0.0; 2257 sum14 = 0.0; 2258 sum15 = 0.0; 2259 sum16 = 0.0; 2260 for (j=0; j<n; j++) { 2261 sum1 += v[jrow]*x[16*idx[jrow]]; 2262 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2263 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2264 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2265 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2266 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2267 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2268 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2269 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2270 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2271 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2272 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2273 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2274 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2275 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2276 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2277 jrow++; 2278 } 2279 y[16*i] += sum1; 2280 y[16*i+1] += sum2; 2281 y[16*i+2] += sum3; 2282 y[16*i+3] += sum4; 2283 y[16*i+4] += sum5; 2284 y[16*i+5] += sum6; 2285 y[16*i+6] += sum7; 2286 y[16*i+7] += sum8; 2287 y[16*i+8] += sum9; 2288 y[16*i+9] += sum10; 2289 y[16*i+10] += sum11; 2290 y[16*i+11] += sum12; 2291 y[16*i+12] += sum13; 2292 y[16*i+13] += sum14; 2293 y[16*i+14] += sum15; 2294 y[16*i+15] += sum16; 2295 } 2296 2297 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2298 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2299 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2304 { 2305 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2306 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2307 const PetscScalar *x,*v; 2308 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2309 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2310 PetscErrorCode ierr; 2311 const PetscInt m = b->AIJ->rmap->n,*idx; 2312 PetscInt n,i; 2313 2314 PetscFunctionBegin; 2315 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2316 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2317 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2318 for (i=0; i<m; i++) { 2319 idx = a->j + a->i[i]; 2320 v = a->a + a->i[i]; 2321 n = a->i[i+1] - a->i[i]; 2322 alpha1 = x[16*i]; 2323 alpha2 = x[16*i+1]; 2324 alpha3 = x[16*i+2]; 2325 alpha4 = x[16*i+3]; 2326 alpha5 = x[16*i+4]; 2327 alpha6 = x[16*i+5]; 2328 alpha7 = x[16*i+6]; 2329 alpha8 = x[16*i+7]; 2330 alpha9 = x[16*i+8]; 2331 alpha10 = x[16*i+9]; 2332 alpha11 = x[16*i+10]; 2333 alpha12 = x[16*i+11]; 2334 alpha13 = x[16*i+12]; 2335 alpha14 = x[16*i+13]; 2336 alpha15 = x[16*i+14]; 2337 alpha16 = x[16*i+15]; 2338 while (n-->0) { 2339 y[16*(*idx)] += alpha1*(*v); 2340 y[16*(*idx)+1] += alpha2*(*v); 2341 y[16*(*idx)+2] += alpha3*(*v); 2342 y[16*(*idx)+3] += alpha4*(*v); 2343 y[16*(*idx)+4] += alpha5*(*v); 2344 y[16*(*idx)+5] += alpha6*(*v); 2345 y[16*(*idx)+6] += alpha7*(*v); 2346 y[16*(*idx)+7] += alpha8*(*v); 2347 y[16*(*idx)+8] += alpha9*(*v); 2348 y[16*(*idx)+9] += alpha10*(*v); 2349 y[16*(*idx)+10] += alpha11*(*v); 2350 y[16*(*idx)+11] += alpha12*(*v); 2351 y[16*(*idx)+12] += alpha13*(*v); 2352 y[16*(*idx)+13] += alpha14*(*v); 2353 y[16*(*idx)+14] += alpha15*(*v); 2354 y[16*(*idx)+15] += alpha16*(*v); 2355 idx++; v++; 2356 } 2357 } 2358 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2359 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2360 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2361 PetscFunctionReturn(0); 2362 } 2363 2364 /*--------------------------------------------------------------------------------------------*/ 2365 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2366 { 2367 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2368 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2369 const PetscScalar *x,*v; 2370 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2371 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2372 PetscErrorCode ierr; 2373 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2374 PetscInt nonzerorow=0,n,i,jrow,j; 2375 2376 PetscFunctionBegin; 2377 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2378 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2379 idx = a->j; 2380 v = a->a; 2381 ii = a->i; 2382 2383 for (i=0; i<m; i++) { 2384 jrow = ii[i]; 2385 n = ii[i+1] - jrow; 2386 sum1 = 0.0; 2387 sum2 = 0.0; 2388 sum3 = 0.0; 2389 sum4 = 0.0; 2390 sum5 = 0.0; 2391 sum6 = 0.0; 2392 sum7 = 0.0; 2393 sum8 = 0.0; 2394 sum9 = 0.0; 2395 sum10 = 0.0; 2396 sum11 = 0.0; 2397 sum12 = 0.0; 2398 sum13 = 0.0; 2399 sum14 = 0.0; 2400 sum15 = 0.0; 2401 sum16 = 0.0; 2402 sum17 = 0.0; 2403 sum18 = 0.0; 2404 2405 nonzerorow += (n>0); 2406 for (j=0; j<n; j++) { 2407 sum1 += v[jrow]*x[18*idx[jrow]]; 2408 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2409 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2410 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2411 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2412 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2413 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2414 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2415 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2416 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2417 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2418 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2419 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2420 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2421 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2422 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2423 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2424 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2425 jrow++; 2426 } 2427 y[18*i] = sum1; 2428 y[18*i+1] = sum2; 2429 y[18*i+2] = sum3; 2430 y[18*i+3] = sum4; 2431 y[18*i+4] = sum5; 2432 y[18*i+5] = sum6; 2433 y[18*i+6] = sum7; 2434 y[18*i+7] = sum8; 2435 y[18*i+8] = sum9; 2436 y[18*i+9] = sum10; 2437 y[18*i+10] = sum11; 2438 y[18*i+11] = sum12; 2439 y[18*i+12] = sum13; 2440 y[18*i+13] = sum14; 2441 y[18*i+14] = sum15; 2442 y[18*i+15] = sum16; 2443 y[18*i+16] = sum17; 2444 y[18*i+17] = sum18; 2445 } 2446 2447 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2448 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2449 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2454 { 2455 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2457 const PetscScalar *x,*v; 2458 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2459 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2460 PetscErrorCode ierr; 2461 const PetscInt m = b->AIJ->rmap->n,*idx; 2462 PetscInt n,i; 2463 2464 PetscFunctionBegin; 2465 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2466 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2467 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2468 2469 for (i=0; i<m; i++) { 2470 idx = a->j + a->i[i]; 2471 v = a->a + a->i[i]; 2472 n = a->i[i+1] - a->i[i]; 2473 alpha1 = x[18*i]; 2474 alpha2 = x[18*i+1]; 2475 alpha3 = x[18*i+2]; 2476 alpha4 = x[18*i+3]; 2477 alpha5 = x[18*i+4]; 2478 alpha6 = x[18*i+5]; 2479 alpha7 = x[18*i+6]; 2480 alpha8 = x[18*i+7]; 2481 alpha9 = x[18*i+8]; 2482 alpha10 = x[18*i+9]; 2483 alpha11 = x[18*i+10]; 2484 alpha12 = x[18*i+11]; 2485 alpha13 = x[18*i+12]; 2486 alpha14 = x[18*i+13]; 2487 alpha15 = x[18*i+14]; 2488 alpha16 = x[18*i+15]; 2489 alpha17 = x[18*i+16]; 2490 alpha18 = x[18*i+17]; 2491 while (n-->0) { 2492 y[18*(*idx)] += alpha1*(*v); 2493 y[18*(*idx)+1] += alpha2*(*v); 2494 y[18*(*idx)+2] += alpha3*(*v); 2495 y[18*(*idx)+3] += alpha4*(*v); 2496 y[18*(*idx)+4] += alpha5*(*v); 2497 y[18*(*idx)+5] += alpha6*(*v); 2498 y[18*(*idx)+6] += alpha7*(*v); 2499 y[18*(*idx)+7] += alpha8*(*v); 2500 y[18*(*idx)+8] += alpha9*(*v); 2501 y[18*(*idx)+9] += alpha10*(*v); 2502 y[18*(*idx)+10] += alpha11*(*v); 2503 y[18*(*idx)+11] += alpha12*(*v); 2504 y[18*(*idx)+12] += alpha13*(*v); 2505 y[18*(*idx)+13] += alpha14*(*v); 2506 y[18*(*idx)+14] += alpha15*(*v); 2507 y[18*(*idx)+15] += alpha16*(*v); 2508 y[18*(*idx)+16] += alpha17*(*v); 2509 y[18*(*idx)+17] += alpha18*(*v); 2510 idx++; v++; 2511 } 2512 } 2513 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2514 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2515 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2516 PetscFunctionReturn(0); 2517 } 2518 2519 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2520 { 2521 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2522 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2523 const PetscScalar *x,*v; 2524 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2525 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2526 PetscErrorCode ierr; 2527 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2528 PetscInt n,i,jrow,j; 2529 2530 PetscFunctionBegin; 2531 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2532 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2533 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2534 idx = a->j; 2535 v = a->a; 2536 ii = a->i; 2537 2538 for (i=0; i<m; i++) { 2539 jrow = ii[i]; 2540 n = ii[i+1] - jrow; 2541 sum1 = 0.0; 2542 sum2 = 0.0; 2543 sum3 = 0.0; 2544 sum4 = 0.0; 2545 sum5 = 0.0; 2546 sum6 = 0.0; 2547 sum7 = 0.0; 2548 sum8 = 0.0; 2549 sum9 = 0.0; 2550 sum10 = 0.0; 2551 sum11 = 0.0; 2552 sum12 = 0.0; 2553 sum13 = 0.0; 2554 sum14 = 0.0; 2555 sum15 = 0.0; 2556 sum16 = 0.0; 2557 sum17 = 0.0; 2558 sum18 = 0.0; 2559 for (j=0; j<n; j++) { 2560 sum1 += v[jrow]*x[18*idx[jrow]]; 2561 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2562 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2563 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2564 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2565 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2566 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2567 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2568 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2569 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2570 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2571 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2572 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2573 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2574 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2575 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2576 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2577 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2578 jrow++; 2579 } 2580 y[18*i] += sum1; 2581 y[18*i+1] += sum2; 2582 y[18*i+2] += sum3; 2583 y[18*i+3] += sum4; 2584 y[18*i+4] += sum5; 2585 y[18*i+5] += sum6; 2586 y[18*i+6] += sum7; 2587 y[18*i+7] += sum8; 2588 y[18*i+8] += sum9; 2589 y[18*i+9] += sum10; 2590 y[18*i+10] += sum11; 2591 y[18*i+11] += sum12; 2592 y[18*i+12] += sum13; 2593 y[18*i+13] += sum14; 2594 y[18*i+14] += sum15; 2595 y[18*i+15] += sum16; 2596 y[18*i+16] += sum17; 2597 y[18*i+17] += sum18; 2598 } 2599 2600 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2601 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2602 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2603 PetscFunctionReturn(0); 2604 } 2605 2606 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2607 { 2608 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2609 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2610 const PetscScalar *x,*v; 2611 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2612 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2613 PetscErrorCode ierr; 2614 const PetscInt m = b->AIJ->rmap->n,*idx; 2615 PetscInt n,i; 2616 2617 PetscFunctionBegin; 2618 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2619 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2620 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2621 for (i=0; i<m; i++) { 2622 idx = a->j + a->i[i]; 2623 v = a->a + a->i[i]; 2624 n = a->i[i+1] - a->i[i]; 2625 alpha1 = x[18*i]; 2626 alpha2 = x[18*i+1]; 2627 alpha3 = x[18*i+2]; 2628 alpha4 = x[18*i+3]; 2629 alpha5 = x[18*i+4]; 2630 alpha6 = x[18*i+5]; 2631 alpha7 = x[18*i+6]; 2632 alpha8 = x[18*i+7]; 2633 alpha9 = x[18*i+8]; 2634 alpha10 = x[18*i+9]; 2635 alpha11 = x[18*i+10]; 2636 alpha12 = x[18*i+11]; 2637 alpha13 = x[18*i+12]; 2638 alpha14 = x[18*i+13]; 2639 alpha15 = x[18*i+14]; 2640 alpha16 = x[18*i+15]; 2641 alpha17 = x[18*i+16]; 2642 alpha18 = x[18*i+17]; 2643 while (n-->0) { 2644 y[18*(*idx)] += alpha1*(*v); 2645 y[18*(*idx)+1] += alpha2*(*v); 2646 y[18*(*idx)+2] += alpha3*(*v); 2647 y[18*(*idx)+3] += alpha4*(*v); 2648 y[18*(*idx)+4] += alpha5*(*v); 2649 y[18*(*idx)+5] += alpha6*(*v); 2650 y[18*(*idx)+6] += alpha7*(*v); 2651 y[18*(*idx)+7] += alpha8*(*v); 2652 y[18*(*idx)+8] += alpha9*(*v); 2653 y[18*(*idx)+9] += alpha10*(*v); 2654 y[18*(*idx)+10] += alpha11*(*v); 2655 y[18*(*idx)+11] += alpha12*(*v); 2656 y[18*(*idx)+12] += alpha13*(*v); 2657 y[18*(*idx)+13] += alpha14*(*v); 2658 y[18*(*idx)+14] += alpha15*(*v); 2659 y[18*(*idx)+15] += alpha16*(*v); 2660 y[18*(*idx)+16] += alpha17*(*v); 2661 y[18*(*idx)+17] += alpha18*(*v); 2662 idx++; v++; 2663 } 2664 } 2665 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2666 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2667 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2668 PetscFunctionReturn(0); 2669 } 2670 2671 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2672 { 2673 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2674 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2675 const PetscScalar *x,*v; 2676 PetscScalar *y,*sums; 2677 PetscErrorCode ierr; 2678 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2679 PetscInt n,i,jrow,j,dof = b->dof,k; 2680 2681 PetscFunctionBegin; 2682 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2683 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2684 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2685 idx = a->j; 2686 v = a->a; 2687 ii = a->i; 2688 2689 for (i=0; i<m; i++) { 2690 jrow = ii[i]; 2691 n = ii[i+1] - jrow; 2692 sums = y + dof*i; 2693 for (j=0; j<n; j++) { 2694 for (k=0; k<dof; k++) { 2695 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2696 } 2697 jrow++; 2698 } 2699 } 2700 2701 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2702 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2703 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2704 PetscFunctionReturn(0); 2705 } 2706 2707 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2708 { 2709 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2710 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2711 const PetscScalar *x,*v; 2712 PetscScalar *y,*sums; 2713 PetscErrorCode ierr; 2714 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2715 PetscInt n,i,jrow,j,dof = b->dof,k; 2716 2717 PetscFunctionBegin; 2718 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2719 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2720 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2721 idx = a->j; 2722 v = a->a; 2723 ii = a->i; 2724 2725 for (i=0; i<m; i++) { 2726 jrow = ii[i]; 2727 n = ii[i+1] - jrow; 2728 sums = y + dof*i; 2729 for (j=0; j<n; j++) { 2730 for (k=0; k<dof; k++) { 2731 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2732 } 2733 jrow++; 2734 } 2735 } 2736 2737 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2738 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2739 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2740 PetscFunctionReturn(0); 2741 } 2742 2743 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2744 { 2745 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2746 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2747 const PetscScalar *x,*v,*alpha; 2748 PetscScalar *y; 2749 PetscErrorCode ierr; 2750 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2751 PetscInt n,i,k; 2752 2753 PetscFunctionBegin; 2754 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2755 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2756 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2757 for (i=0; i<m; i++) { 2758 idx = a->j + a->i[i]; 2759 v = a->a + a->i[i]; 2760 n = a->i[i+1] - a->i[i]; 2761 alpha = x + dof*i; 2762 while (n-->0) { 2763 for (k=0; k<dof; k++) { 2764 y[dof*(*idx)+k] += alpha[k]*(*v); 2765 } 2766 idx++; v++; 2767 } 2768 } 2769 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2770 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2771 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2772 PetscFunctionReturn(0); 2773 } 2774 2775 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2776 { 2777 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2778 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2779 const PetscScalar *x,*v,*alpha; 2780 PetscScalar *y; 2781 PetscErrorCode ierr; 2782 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2783 PetscInt n,i,k; 2784 2785 PetscFunctionBegin; 2786 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2787 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2788 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2789 for (i=0; i<m; i++) { 2790 idx = a->j + a->i[i]; 2791 v = a->a + a->i[i]; 2792 n = a->i[i+1] - a->i[i]; 2793 alpha = x + dof*i; 2794 while (n-->0) { 2795 for (k=0; k<dof; k++) { 2796 y[dof*(*idx)+k] += alpha[k]*(*v); 2797 } 2798 idx++; v++; 2799 } 2800 } 2801 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2802 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2803 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2804 PetscFunctionReturn(0); 2805 } 2806 2807 /*===================================================================================*/ 2808 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2809 { 2810 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2811 PetscErrorCode ierr; 2812 2813 PetscFunctionBegin; 2814 /* start the scatter */ 2815 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2816 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2817 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2818 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2819 PetscFunctionReturn(0); 2820 } 2821 2822 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2823 { 2824 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2825 PetscErrorCode ierr; 2826 2827 PetscFunctionBegin; 2828 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2829 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2830 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2831 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2832 PetscFunctionReturn(0); 2833 } 2834 2835 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2836 { 2837 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2838 PetscErrorCode ierr; 2839 2840 PetscFunctionBegin; 2841 /* start the scatter */ 2842 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2843 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2844 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2845 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2846 PetscFunctionReturn(0); 2847 } 2848 2849 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2850 { 2851 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2852 PetscErrorCode ierr; 2853 2854 PetscFunctionBegin; 2855 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2856 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2857 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2858 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2859 PetscFunctionReturn(0); 2860 } 2861 2862 /* ----------------------------------------------------------------*/ 2863 PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 2864 { 2865 Mat_Product *product = C->product; 2866 2867 PetscFunctionBegin; 2868 if (product->type == MATPRODUCT_PtAP) { 2869 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 2870 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]); 2871 PetscFunctionReturn(0); 2872 } 2873 2874 PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 2875 { 2876 PetscErrorCode ierr; 2877 Mat_Product *product = C->product; 2878 PetscBool flg = PETSC_FALSE; 2879 Mat A=product->A,P=product->B; 2880 PetscInt alg=1; /* set default algorithm */ 2881 #if !defined(PETSC_HAVE_HYPRE) 2882 const char *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"}; 2883 PetscInt nalg=4; 2884 #else 2885 const char *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"}; 2886 PetscInt nalg=5; 2887 #endif 2888 2889 PetscFunctionBegin; 2890 if (product->type != MATPRODUCT_PtAP) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]); 2891 2892 /* PtAP */ 2893 /* Check matrix local sizes */ 2894 if (A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%D, %D) != Prow (%D,%D)",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend); 2895 if (A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%D, %D) != Prow (%D,%D)",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend); 2896 2897 /* Set the default algorithm */ 2898 ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr); 2899 if (flg) { 2900 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2901 } 2902 2903 /* Get runtime option */ 2904 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 2905 ierr = PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2906 if (flg) { 2907 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2908 } 2909 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2910 2911 ierr = PetscStrcmp(C->product->alg,"allatonce",&flg);CHKERRQ(ierr); 2912 if (flg) { 2913 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 2914 PetscFunctionReturn(0); 2915 } 2916 2917 ierr = PetscStrcmp(C->product->alg,"allatonce_merged",&flg);CHKERRQ(ierr); 2918 if (flg) { 2919 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 2920 PetscFunctionReturn(0); 2921 } 2922 2923 /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 2924 ierr = PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n");CHKERRQ(ierr); 2925 ierr = MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);CHKERRQ(ierr); 2926 ierr = MatProductSetFromOptions(C);CHKERRQ(ierr); 2927 PetscFunctionReturn(0); 2928 } 2929 2930 /* ----------------------------------------------------------------*/ 2931 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C) 2932 { 2933 PetscErrorCode ierr; 2934 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2935 Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 2936 Mat P =pp->AIJ; 2937 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2938 PetscInt *pti,*ptj,*ptJ; 2939 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2940 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2941 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 2942 MatScalar *ca; 2943 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 2944 2945 PetscFunctionBegin; 2946 /* Get ij structure of P^T */ 2947 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2948 2949 cn = pn*ppdof; 2950 /* Allocate ci array, arrays for fill computation and */ 2951 /* free space for accumulating nonzero column info */ 2952 ierr = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr); 2953 ci[0] = 0; 2954 2955 /* Work arrays for rows of P^T*A */ 2956 ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr); 2957 ierr = PetscArrayzero(ptadenserow,an);CHKERRQ(ierr); 2958 ierr = PetscArrayzero(denserow,cn);CHKERRQ(ierr); 2959 2960 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2961 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2962 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2963 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr); 2964 current_space = free_space; 2965 2966 /* Determine symbolic info for each row of C: */ 2967 for (i=0; i<pn; i++) { 2968 ptnzi = pti[i+1] - pti[i]; 2969 ptJ = ptj + pti[i]; 2970 for (dof=0; dof<ppdof; dof++) { 2971 ptanzi = 0; 2972 /* Determine symbolic row of PtA: */ 2973 for (j=0; j<ptnzi; j++) { 2974 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2975 arow = ptJ[j]*ppdof + dof; 2976 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2977 anzj = ai[arow+1] - ai[arow]; 2978 ajj = aj + ai[arow]; 2979 for (k=0; k<anzj; k++) { 2980 if (!ptadenserow[ajj[k]]) { 2981 ptadenserow[ajj[k]] = -1; 2982 ptasparserow[ptanzi++] = ajj[k]; 2983 } 2984 } 2985 } 2986 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2987 ptaj = ptasparserow; 2988 cnzi = 0; 2989 for (j=0; j<ptanzi; j++) { 2990 /* Get offset within block of P */ 2991 pshift = *ptaj%ppdof; 2992 /* Get block row of P */ 2993 prow = (*ptaj++)/ppdof; /* integer division */ 2994 /* P has same number of nonzeros per row as the compressed form */ 2995 pnzj = pi[prow+1] - pi[prow]; 2996 pjj = pj + pi[prow]; 2997 for (k=0;k<pnzj;k++) { 2998 /* Locations in C are shifted by the offset within the block */ 2999 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 3000 if (!denserow[pjj[k]*ppdof+pshift]) { 3001 denserow[pjj[k]*ppdof+pshift] = -1; 3002 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 3003 } 3004 } 3005 } 3006 3007 /* sort sparserow */ 3008 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 3009 3010 /* If free space is not available, make more free space */ 3011 /* Double the amount of total space in the list */ 3012 if (current_space->local_remaining<cnzi) { 3013 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 3014 } 3015 3016 /* Copy data into free space, and zero out denserows */ 3017 ierr = PetscArraycpy(current_space->array,sparserow,cnzi);CHKERRQ(ierr); 3018 3019 current_space->array += cnzi; 3020 current_space->local_used += cnzi; 3021 current_space->local_remaining -= cnzi; 3022 3023 for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 3024 for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 3025 3026 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 3027 /* For now, we will recompute what is needed. */ 3028 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 3029 } 3030 } 3031 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 3032 /* Allocate space for cj, initialize cj, and */ 3033 /* destroy list of free space and other temporary array(s) */ 3034 ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr); 3035 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 3036 ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 3037 3038 /* Allocate space for ca */ 3039 ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr); 3040 3041 /* put together the new matrix */ 3042 ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C);CHKERRQ(ierr); 3043 ierr = MatSetBlockSize(C,pp->dof);CHKERRQ(ierr); 3044 3045 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3046 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3047 c = (Mat_SeqAIJ*)(C->data); 3048 c->free_a = PETSC_TRUE; 3049 c->free_ij = PETSC_TRUE; 3050 c->nonew = 0; 3051 3052 C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 3053 C->ops->productnumeric = MatProductNumeric_PtAP; 3054 3055 /* Clean up. */ 3056 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 3057 PetscFunctionReturn(0); 3058 } 3059 3060 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 3061 { 3062 /* This routine requires testing -- first draft only */ 3063 PetscErrorCode ierr; 3064 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 3065 Mat P =pp->AIJ; 3066 Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 3067 Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 3068 Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 3069 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3070 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3071 const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3072 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3073 const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3074 MatScalar *ca=c->a,*caj,*apa; 3075 3076 PetscFunctionBegin; 3077 /* Allocate temporary array for storage of one row of A*P */ 3078 ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr); 3079 3080 /* Clear old values in C */ 3081 ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr); 3082 3083 for (i=0; i<am; i++) { 3084 /* Form sparse row of A*P */ 3085 anzi = ai[i+1] - ai[i]; 3086 apnzj = 0; 3087 for (j=0; j<anzi; j++) { 3088 /* Get offset within block of P */ 3089 pshift = *aj%ppdof; 3090 /* Get block row of P */ 3091 prow = *aj++/ppdof; /* integer division */ 3092 pnzj = pi[prow+1] - pi[prow]; 3093 pjj = pj + pi[prow]; 3094 paj = pa + pi[prow]; 3095 for (k=0; k<pnzj; k++) { 3096 poffset = pjj[k]*ppdof+pshift; 3097 if (!apjdense[poffset]) { 3098 apjdense[poffset] = -1; 3099 apj[apnzj++] = poffset; 3100 } 3101 apa[poffset] += (*aa)*paj[k]; 3102 } 3103 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 3104 aa++; 3105 } 3106 3107 /* Sort the j index array for quick sparse axpy. */ 3108 /* Note: a array does not need sorting as it is in dense storage locations. */ 3109 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 3110 3111 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3112 prow = i/ppdof; /* integer division */ 3113 pshift = i%ppdof; 3114 poffset = pi[prow]; 3115 pnzi = pi[prow+1] - poffset; 3116 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3117 pJ = pj+poffset; 3118 pA = pa+poffset; 3119 for (j=0; j<pnzi; j++) { 3120 crow = (*pJ)*ppdof+pshift; 3121 cjj = cj + ci[crow]; 3122 caj = ca + ci[crow]; 3123 pJ++; 3124 /* Perform sparse axpy operation. Note cjj includes apj. */ 3125 for (k=0,nextap=0; nextap<apnzj; k++) { 3126 if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 3127 } 3128 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 3129 pA++; 3130 } 3131 3132 /* Zero the current row info for A*P */ 3133 for (j=0; j<apnzj; j++) { 3134 apa[apj[j]] = 0.; 3135 apjdense[apj[j]] = 0; 3136 } 3137 } 3138 3139 /* Assemble the final matrix and clean up */ 3140 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3141 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3142 ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 3143 PetscFunctionReturn(0); 3144 } 3145 3146 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 3147 { 3148 PetscErrorCode ierr; 3149 Mat_Product *product = C->product; 3150 Mat A=product->A,P=product->B; 3151 3152 PetscFunctionBegin; 3153 ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C);CHKERRQ(ierr); 3154 PetscFunctionReturn(0); 3155 } 3156 3157 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3158 { 3159 PetscFunctionBegin; 3160 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet"); 3161 PetscFunctionReturn(0); 3162 } 3163 3164 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3165 { 3166 PetscFunctionBegin; 3167 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3168 PetscFunctionReturn(0); 3169 } 3170 3171 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat); 3172 3173 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C) 3174 { 3175 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3176 PetscErrorCode ierr; 3177 3178 PetscFunctionBegin; 3179 3180 ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);CHKERRQ(ierr); 3181 PetscFunctionReturn(0); 3182 } 3183 3184 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat); 3185 3186 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C) 3187 { 3188 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3189 PetscErrorCode ierr; 3190 3191 PetscFunctionBegin; 3192 ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr); 3193 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 3194 PetscFunctionReturn(0); 3195 } 3196 3197 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat); 3198 3199 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C) 3200 { 3201 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3202 PetscErrorCode ierr; 3203 3204 PetscFunctionBegin; 3205 3206 ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);CHKERRQ(ierr); 3207 PetscFunctionReturn(0); 3208 } 3209 3210 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat); 3211 3212 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C) 3213 { 3214 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3215 PetscErrorCode ierr; 3216 3217 PetscFunctionBegin; 3218 3219 ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr); 3220 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 3221 PetscFunctionReturn(0); 3222 } 3223 3224 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 3225 { 3226 PetscErrorCode ierr; 3227 Mat_Product *product = C->product; 3228 Mat A=product->A,P=product->B; 3229 PetscBool flg; 3230 3231 PetscFunctionBegin; 3232 ierr = PetscStrcmp(product->alg,"allatonce",&flg);CHKERRQ(ierr); 3233 if (flg) { 3234 ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C);CHKERRQ(ierr); 3235 C->ops->productnumeric = MatProductNumeric_PtAP; 3236 PetscFunctionReturn(0); 3237 } 3238 3239 ierr = PetscStrcmp(product->alg,"allatonce_merged",&flg);CHKERRQ(ierr); 3240 if (flg) { 3241 ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C);CHKERRQ(ierr); 3242 C->ops->productnumeric = MatProductNumeric_PtAP; 3243 PetscFunctionReturn(0); 3244 } 3245 3246 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 3247 PetscFunctionReturn(0); 3248 } 3249 3250 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3251 { 3252 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3253 Mat a = b->AIJ,B; 3254 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3255 PetscErrorCode ierr; 3256 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3257 PetscInt *cols; 3258 PetscScalar *vals; 3259 3260 PetscFunctionBegin; 3261 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3262 ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr); 3263 for (i=0; i<m; i++) { 3264 nmax = PetscMax(nmax,aij->ilen[i]); 3265 for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 3266 } 3267 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3268 ierr = MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);CHKERRQ(ierr); 3269 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3270 ierr = MatSeqAIJSetPreallocation(B,0,ilen);CHKERRQ(ierr); 3271 ierr = PetscFree(ilen);CHKERRQ(ierr); 3272 ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr); 3273 ii = 0; 3274 for (i=0; i<m; i++) { 3275 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3276 for (j=0; j<dof; j++) { 3277 for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 3278 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3279 ii++; 3280 } 3281 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3282 } 3283 ierr = PetscFree(icols);CHKERRQ(ierr); 3284 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3285 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3286 3287 if (reuse == MAT_INPLACE_MATRIX) { 3288 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3289 } else { 3290 *newmat = B; 3291 } 3292 PetscFunctionReturn(0); 3293 } 3294 3295 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3296 3297 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3298 { 3299 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3300 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3301 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3302 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3303 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3304 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3305 PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 3306 PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 3307 PetscInt rstart,cstart,*garray,ii,k; 3308 PetscErrorCode ierr; 3309 PetscScalar *vals,*ovals; 3310 3311 PetscFunctionBegin; 3312 ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr); 3313 for (i=0; i<A->rmap->n/dof; i++) { 3314 nmax = PetscMax(nmax,AIJ->ilen[i]); 3315 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3316 for (j=0; j<dof; j++) { 3317 dnz[dof*i+j] = AIJ->ilen[i]; 3318 onz[dof*i+j] = OAIJ->ilen[i]; 3319 } 3320 } 3321 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3322 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3323 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3324 ierr = MatMPIAIJSetPreallocation(B,0,dnz,0,onz);CHKERRQ(ierr); 3325 ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr); 3326 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3327 3328 ierr = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr); 3329 rstart = dof*maij->A->rmap->rstart; 3330 cstart = dof*maij->A->cmap->rstart; 3331 garray = mpiaij->garray; 3332 3333 ii = rstart; 3334 for (i=0; i<A->rmap->n/dof; i++) { 3335 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3336 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3337 for (j=0; j<dof; j++) { 3338 for (k=0; k<ncols; k++) { 3339 icols[k] = cstart + dof*cols[k]+j; 3340 } 3341 for (k=0; k<oncols; k++) { 3342 oicols[k] = dof*garray[ocols[k]]+j; 3343 } 3344 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3345 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3346 ii++; 3347 } 3348 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3349 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3350 } 3351 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3352 3353 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3354 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3355 3356 if (reuse == MAT_INPLACE_MATRIX) { 3357 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3358 ((PetscObject)A)->refct = 1; 3359 3360 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3361 3362 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3363 } else { 3364 *newmat = B; 3365 } 3366 PetscFunctionReturn(0); 3367 } 3368 3369 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 3370 { 3371 PetscErrorCode ierr; 3372 Mat A; 3373 3374 PetscFunctionBegin; 3375 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3376 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 3377 ierr = MatDestroy(&A);CHKERRQ(ierr); 3378 PetscFunctionReturn(0); 3379 } 3380 3381 PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[]) 3382 { 3383 PetscErrorCode ierr; 3384 Mat A; 3385 3386 PetscFunctionBegin; 3387 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3388 ierr = MatCreateSubMatrices(A,n,irow,icol,scall,submat);CHKERRQ(ierr); 3389 ierr = MatDestroy(&A);CHKERRQ(ierr); 3390 PetscFunctionReturn(0); 3391 } 3392 3393 /* ---------------------------------------------------------------------------------- */ 3394 /*@ 3395 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3396 operations for multicomponent problems. It interpolates each component the same 3397 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3398 and MATMPIAIJ for distributed matrices. 3399 3400 Collective 3401 3402 Input Parameters: 3403 + A - the AIJ matrix describing the action on blocks 3404 - dof - the block size (number of components per node) 3405 3406 Output Parameter: 3407 . maij - the new MAIJ matrix 3408 3409 Operations provided: 3410 + MatMult 3411 . MatMultTranspose 3412 . MatMultAdd 3413 . MatMultTransposeAdd 3414 - MatView 3415 3416 Level: advanced 3417 3418 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3419 @*/ 3420 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3421 { 3422 PetscErrorCode ierr; 3423 PetscMPIInt size; 3424 PetscInt n; 3425 Mat B; 3426 #if defined(PETSC_HAVE_CUDA) 3427 /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 3428 PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 3429 #endif 3430 3431 PetscFunctionBegin; 3432 dof = PetscAbs(dof); 3433 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3434 3435 if (dof == 1) *maij = A; 3436 else { 3437 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3438 /* propagate vec type */ 3439 ierr = MatSetVecType(B,A->defaultvectype);CHKERRQ(ierr); 3440 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3441 ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3442 ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3443 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3444 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3445 3446 B->assembled = PETSC_TRUE; 3447 3448 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3449 if (size == 1) { 3450 Mat_SeqMAIJ *b; 3451 3452 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3453 3454 B->ops->setup = NULL; 3455 B->ops->destroy = MatDestroy_SeqMAIJ; 3456 B->ops->view = MatView_SeqMAIJ; 3457 3458 b = (Mat_SeqMAIJ*)B->data; 3459 b->dof = dof; 3460 b->AIJ = A; 3461 3462 if (dof == 2) { 3463 B->ops->mult = MatMult_SeqMAIJ_2; 3464 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3465 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3466 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3467 } else if (dof == 3) { 3468 B->ops->mult = MatMult_SeqMAIJ_3; 3469 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3470 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3471 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3472 } else if (dof == 4) { 3473 B->ops->mult = MatMult_SeqMAIJ_4; 3474 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3475 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3476 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3477 } else if (dof == 5) { 3478 B->ops->mult = MatMult_SeqMAIJ_5; 3479 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3480 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3481 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3482 } else if (dof == 6) { 3483 B->ops->mult = MatMult_SeqMAIJ_6; 3484 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3485 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3486 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3487 } else if (dof == 7) { 3488 B->ops->mult = MatMult_SeqMAIJ_7; 3489 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3490 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3491 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3492 } else if (dof == 8) { 3493 B->ops->mult = MatMult_SeqMAIJ_8; 3494 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3495 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3496 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3497 } else if (dof == 9) { 3498 B->ops->mult = MatMult_SeqMAIJ_9; 3499 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3500 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3501 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3502 } else if (dof == 10) { 3503 B->ops->mult = MatMult_SeqMAIJ_10; 3504 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3505 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3506 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3507 } else if (dof == 11) { 3508 B->ops->mult = MatMult_SeqMAIJ_11; 3509 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3510 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3511 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3512 } else if (dof == 16) { 3513 B->ops->mult = MatMult_SeqMAIJ_16; 3514 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3515 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3516 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3517 } else if (dof == 18) { 3518 B->ops->mult = MatMult_SeqMAIJ_18; 3519 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3520 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3521 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3522 } else { 3523 B->ops->mult = MatMult_SeqMAIJ_N; 3524 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3525 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3526 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3527 } 3528 #if defined(PETSC_HAVE_CUDA) 3529 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3530 #endif 3531 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3532 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3533 } else { 3534 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3535 Mat_MPIMAIJ *b; 3536 IS from,to; 3537 Vec gvec; 3538 3539 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3540 3541 B->ops->setup = NULL; 3542 B->ops->destroy = MatDestroy_MPIMAIJ; 3543 B->ops->view = MatView_MPIMAIJ; 3544 3545 b = (Mat_MPIMAIJ*)B->data; 3546 b->dof = dof; 3547 b->A = A; 3548 3549 ierr = MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);CHKERRQ(ierr); 3550 ierr = MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);CHKERRQ(ierr); 3551 3552 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3553 ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3554 ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 3555 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3556 ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3557 3558 /* create two temporary Index sets for build scatter gather */ 3559 ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3560 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3561 3562 /* create temporary global vector to generate scatter context */ 3563 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 3564 3565 /* generate the scatter context */ 3566 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3567 3568 ierr = ISDestroy(&from);CHKERRQ(ierr); 3569 ierr = ISDestroy(&to);CHKERRQ(ierr); 3570 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3571 3572 B->ops->mult = MatMult_MPIMAIJ_dof; 3573 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3574 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3575 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3576 3577 #if defined(PETSC_HAVE_CUDA) 3578 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3579 #endif 3580 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3581 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ);CHKERRQ(ierr); 3582 } 3583 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3584 B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 3585 ierr = MatSetFromOptions(B);CHKERRQ(ierr); 3586 ierr = MatSetUp(B);CHKERRQ(ierr); 3587 #if defined(PETSC_HAVE_CUDA) 3588 /* temporary until we have CUDA implementation of MAIJ */ 3589 { 3590 PetscBool flg; 3591 if (convert) { 3592 ierr = PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");CHKERRQ(ierr); 3593 if (flg) { 3594 ierr = MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 3595 } 3596 } 3597 } 3598 #endif 3599 *maij = B; 3600 ierr = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr); 3601 } 3602 PetscFunctionReturn(0); 3603 } 3604