1 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20 #include <../src/mat/utils/freespace.h> 21 22 /*@ 23 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 24 25 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 26 27 Input Parameter: 28 . A - the MAIJ matrix 29 30 Output Parameter: 31 . B - the AIJ matrix 32 33 Level: advanced 34 35 Notes: 36 The reference count on the AIJ matrix is not increased so you should not destroy it. 37 38 .seealso: MatCreateMAIJ() 39 @*/ 40 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 41 { 42 PetscErrorCode ierr; 43 PetscBool ismpimaij,isseqmaij; 44 45 PetscFunctionBegin; 46 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 47 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 48 if (ismpimaij) { 49 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 50 51 *B = b->A; 52 } else if (isseqmaij) { 53 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 54 55 *B = b->AIJ; 56 } else { 57 *B = A; 58 } 59 PetscFunctionReturn(0); 60 } 61 62 /*@ 63 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 64 65 Logically Collective 66 67 Input Parameters: 68 + A - the MAIJ matrix 69 - dof - the block size for the new matrix 70 71 Output Parameter: 72 . B - the new MAIJ matrix 73 74 Level: advanced 75 76 .seealso: MatCreateMAIJ() 77 @*/ 78 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 79 { 80 PetscErrorCode ierr; 81 Mat Aij = NULL; 82 83 PetscFunctionBegin; 84 PetscValidLogicalCollectiveInt(A,dof,2); 85 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 86 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 91 { 92 PetscErrorCode ierr; 93 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 94 95 PetscFunctionBegin; 96 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 97 ierr = PetscFree(A->data);CHKERRQ(ierr); 98 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr); 99 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL);CHKERRQ(ierr); 100 PetscFunctionReturn(0); 101 } 102 103 PetscErrorCode MatSetUp_MAIJ(Mat A) 104 { 105 PetscFunctionBegin; 106 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 107 } 108 109 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 110 { 111 PetscErrorCode ierr; 112 Mat B; 113 114 PetscFunctionBegin; 115 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 116 ierr = MatView(B,viewer);CHKERRQ(ierr); 117 ierr = MatDestroy(&B);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 122 { 123 PetscErrorCode ierr; 124 Mat B; 125 126 PetscFunctionBegin; 127 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 128 ierr = MatView(B,viewer);CHKERRQ(ierr); 129 ierr = MatDestroy(&B);CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 134 { 135 PetscErrorCode ierr; 136 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 137 138 PetscFunctionBegin; 139 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 140 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 141 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 142 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 143 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 144 ierr = PetscFree(A->data);CHKERRQ(ierr); 145 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr); 146 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr); 147 ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr); 148 PetscFunctionReturn(0); 149 } 150 151 /*MC 152 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 153 multicomponent problems, interpolating or restricting each component the same way independently. 154 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 155 156 Operations provided: 157 .vb 158 MatMult 159 MatMultTranspose 160 MatMultAdd 161 MatMultTransposeAdd 162 .ve 163 164 Level: advanced 165 166 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 167 M*/ 168 169 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 170 { 171 PetscErrorCode ierr; 172 Mat_MPIMAIJ *b; 173 PetscMPIInt size; 174 175 PetscFunctionBegin; 176 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 177 A->data = (void*)b; 178 179 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 180 181 A->ops->setup = MatSetUp_MAIJ; 182 183 b->AIJ = NULL; 184 b->dof = 0; 185 b->OAIJ = NULL; 186 b->ctx = NULL; 187 b->w = NULL; 188 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 189 if (size == 1) { 190 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 191 } else { 192 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 193 } 194 A->preallocated = PETSC_TRUE; 195 A->assembled = PETSC_TRUE; 196 PetscFunctionReturn(0); 197 } 198 199 /* --------------------------------------------------------------------------------------*/ 200 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 201 { 202 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 203 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 204 const PetscScalar *x,*v; 205 PetscScalar *y, sum1, sum2; 206 PetscErrorCode ierr; 207 PetscInt nonzerorow=0,n,i,jrow,j; 208 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 209 210 PetscFunctionBegin; 211 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 212 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 213 idx = a->j; 214 v = a->a; 215 ii = a->i; 216 217 for (i=0; i<m; i++) { 218 jrow = ii[i]; 219 n = ii[i+1] - jrow; 220 sum1 = 0.0; 221 sum2 = 0.0; 222 223 nonzerorow += (n>0); 224 for (j=0; j<n; j++) { 225 sum1 += v[jrow]*x[2*idx[jrow]]; 226 sum2 += v[jrow]*x[2*idx[jrow]+1]; 227 jrow++; 228 } 229 y[2*i] = sum1; 230 y[2*i+1] = sum2; 231 } 232 233 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 234 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 235 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 239 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 240 { 241 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 242 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 243 const PetscScalar *x,*v; 244 PetscScalar *y,alpha1,alpha2; 245 PetscErrorCode ierr; 246 PetscInt n,i; 247 const PetscInt m = b->AIJ->rmap->n,*idx; 248 249 PetscFunctionBegin; 250 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 251 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 252 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 253 254 for (i=0; i<m; i++) { 255 idx = a->j + a->i[i]; 256 v = a->a + a->i[i]; 257 n = a->i[i+1] - a->i[i]; 258 alpha1 = x[2*i]; 259 alpha2 = x[2*i+1]; 260 while (n-->0) { 261 y[2*(*idx)] += alpha1*(*v); 262 y[2*(*idx)+1] += alpha2*(*v); 263 idx++; v++; 264 } 265 } 266 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 267 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 268 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 269 PetscFunctionReturn(0); 270 } 271 272 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 273 { 274 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 275 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 276 const PetscScalar *x,*v; 277 PetscScalar *y,sum1, sum2; 278 PetscErrorCode ierr; 279 PetscInt n,i,jrow,j; 280 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 281 282 PetscFunctionBegin; 283 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 284 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 285 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 286 idx = a->j; 287 v = a->a; 288 ii = a->i; 289 290 for (i=0; i<m; i++) { 291 jrow = ii[i]; 292 n = ii[i+1] - jrow; 293 sum1 = 0.0; 294 sum2 = 0.0; 295 for (j=0; j<n; j++) { 296 sum1 += v[jrow]*x[2*idx[jrow]]; 297 sum2 += v[jrow]*x[2*idx[jrow]+1]; 298 jrow++; 299 } 300 y[2*i] += sum1; 301 y[2*i+1] += sum2; 302 } 303 304 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 305 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 306 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 310 { 311 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 312 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 313 const PetscScalar *x,*v; 314 PetscScalar *y,alpha1,alpha2; 315 PetscErrorCode ierr; 316 PetscInt n,i; 317 const PetscInt m = b->AIJ->rmap->n,*idx; 318 319 PetscFunctionBegin; 320 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 321 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 322 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 323 324 for (i=0; i<m; i++) { 325 idx = a->j + a->i[i]; 326 v = a->a + a->i[i]; 327 n = a->i[i+1] - a->i[i]; 328 alpha1 = x[2*i]; 329 alpha2 = x[2*i+1]; 330 while (n-->0) { 331 y[2*(*idx)] += alpha1*(*v); 332 y[2*(*idx)+1] += alpha2*(*v); 333 idx++; v++; 334 } 335 } 336 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 337 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 338 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 339 PetscFunctionReturn(0); 340 } 341 /* --------------------------------------------------------------------------------------*/ 342 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 343 { 344 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 345 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 346 const PetscScalar *x,*v; 347 PetscScalar *y,sum1, sum2, sum3; 348 PetscErrorCode ierr; 349 PetscInt nonzerorow=0,n,i,jrow,j; 350 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 351 352 PetscFunctionBegin; 353 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 354 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 355 idx = a->j; 356 v = a->a; 357 ii = a->i; 358 359 for (i=0; i<m; i++) { 360 jrow = ii[i]; 361 n = ii[i+1] - jrow; 362 sum1 = 0.0; 363 sum2 = 0.0; 364 sum3 = 0.0; 365 366 nonzerorow += (n>0); 367 for (j=0; j<n; j++) { 368 sum1 += v[jrow]*x[3*idx[jrow]]; 369 sum2 += v[jrow]*x[3*idx[jrow]+1]; 370 sum3 += v[jrow]*x[3*idx[jrow]+2]; 371 jrow++; 372 } 373 y[3*i] = sum1; 374 y[3*i+1] = sum2; 375 y[3*i+2] = sum3; 376 } 377 378 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 379 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 380 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 381 PetscFunctionReturn(0); 382 } 383 384 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 385 { 386 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 387 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 388 const PetscScalar *x,*v; 389 PetscScalar *y,alpha1,alpha2,alpha3; 390 PetscErrorCode ierr; 391 PetscInt n,i; 392 const PetscInt m = b->AIJ->rmap->n,*idx; 393 394 PetscFunctionBegin; 395 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 396 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 397 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 398 399 for (i=0; i<m; i++) { 400 idx = a->j + a->i[i]; 401 v = a->a + a->i[i]; 402 n = a->i[i+1] - a->i[i]; 403 alpha1 = x[3*i]; 404 alpha2 = x[3*i+1]; 405 alpha3 = x[3*i+2]; 406 while (n-->0) { 407 y[3*(*idx)] += alpha1*(*v); 408 y[3*(*idx)+1] += alpha2*(*v); 409 y[3*(*idx)+2] += alpha3*(*v); 410 idx++; v++; 411 } 412 } 413 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 415 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 420 { 421 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 422 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 423 const PetscScalar *x,*v; 424 PetscScalar *y,sum1, sum2, sum3; 425 PetscErrorCode ierr; 426 PetscInt n,i,jrow,j; 427 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 428 429 PetscFunctionBegin; 430 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 431 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 432 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 433 idx = a->j; 434 v = a->a; 435 ii = a->i; 436 437 for (i=0; i<m; i++) { 438 jrow = ii[i]; 439 n = ii[i+1] - jrow; 440 sum1 = 0.0; 441 sum2 = 0.0; 442 sum3 = 0.0; 443 for (j=0; j<n; j++) { 444 sum1 += v[jrow]*x[3*idx[jrow]]; 445 sum2 += v[jrow]*x[3*idx[jrow]+1]; 446 sum3 += v[jrow]*x[3*idx[jrow]+2]; 447 jrow++; 448 } 449 y[3*i] += sum1; 450 y[3*i+1] += sum2; 451 y[3*i+2] += sum3; 452 } 453 454 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 455 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 456 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 457 PetscFunctionReturn(0); 458 } 459 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 460 { 461 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 462 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 463 const PetscScalar *x,*v; 464 PetscScalar *y,alpha1,alpha2,alpha3; 465 PetscErrorCode ierr; 466 PetscInt n,i; 467 const PetscInt m = b->AIJ->rmap->n,*idx; 468 469 PetscFunctionBegin; 470 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 471 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 472 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 473 for (i=0; i<m; i++) { 474 idx = a->j + a->i[i]; 475 v = a->a + a->i[i]; 476 n = a->i[i+1] - a->i[i]; 477 alpha1 = x[3*i]; 478 alpha2 = x[3*i+1]; 479 alpha3 = x[3*i+2]; 480 while (n-->0) { 481 y[3*(*idx)] += alpha1*(*v); 482 y[3*(*idx)+1] += alpha2*(*v); 483 y[3*(*idx)+2] += alpha3*(*v); 484 idx++; v++; 485 } 486 } 487 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 488 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 489 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492 493 /* ------------------------------------------------------------------------------*/ 494 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 495 { 496 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 497 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 498 const PetscScalar *x,*v; 499 PetscScalar *y,sum1, sum2, sum3, sum4; 500 PetscErrorCode ierr; 501 PetscInt nonzerorow=0,n,i,jrow,j; 502 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 503 504 PetscFunctionBegin; 505 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 506 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 507 idx = a->j; 508 v = a->a; 509 ii = a->i; 510 511 for (i=0; i<m; i++) { 512 jrow = ii[i]; 513 n = ii[i+1] - jrow; 514 sum1 = 0.0; 515 sum2 = 0.0; 516 sum3 = 0.0; 517 sum4 = 0.0; 518 nonzerorow += (n>0); 519 for (j=0; j<n; j++) { 520 sum1 += v[jrow]*x[4*idx[jrow]]; 521 sum2 += v[jrow]*x[4*idx[jrow]+1]; 522 sum3 += v[jrow]*x[4*idx[jrow]+2]; 523 sum4 += v[jrow]*x[4*idx[jrow]+3]; 524 jrow++; 525 } 526 y[4*i] = sum1; 527 y[4*i+1] = sum2; 528 y[4*i+2] = sum3; 529 y[4*i+3] = sum4; 530 } 531 532 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 533 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 534 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 535 PetscFunctionReturn(0); 536 } 537 538 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 539 { 540 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 541 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 542 const PetscScalar *x,*v; 543 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 544 PetscErrorCode ierr; 545 PetscInt n,i; 546 const PetscInt m = b->AIJ->rmap->n,*idx; 547 548 PetscFunctionBegin; 549 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 550 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 551 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 552 for (i=0; i<m; i++) { 553 idx = a->j + a->i[i]; 554 v = a->a + a->i[i]; 555 n = a->i[i+1] - a->i[i]; 556 alpha1 = x[4*i]; 557 alpha2 = x[4*i+1]; 558 alpha3 = x[4*i+2]; 559 alpha4 = x[4*i+3]; 560 while (n-->0) { 561 y[4*(*idx)] += alpha1*(*v); 562 y[4*(*idx)+1] += alpha2*(*v); 563 y[4*(*idx)+2] += alpha3*(*v); 564 y[4*(*idx)+3] += alpha4*(*v); 565 idx++; v++; 566 } 567 } 568 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 569 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 570 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 575 { 576 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 577 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 578 const PetscScalar *x,*v; 579 PetscScalar *y,sum1, sum2, sum3, sum4; 580 PetscErrorCode ierr; 581 PetscInt n,i,jrow,j; 582 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 583 584 PetscFunctionBegin; 585 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 586 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 587 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 588 idx = a->j; 589 v = a->a; 590 ii = a->i; 591 592 for (i=0; i<m; i++) { 593 jrow = ii[i]; 594 n = ii[i+1] - jrow; 595 sum1 = 0.0; 596 sum2 = 0.0; 597 sum3 = 0.0; 598 sum4 = 0.0; 599 for (j=0; j<n; j++) { 600 sum1 += v[jrow]*x[4*idx[jrow]]; 601 sum2 += v[jrow]*x[4*idx[jrow]+1]; 602 sum3 += v[jrow]*x[4*idx[jrow]+2]; 603 sum4 += v[jrow]*x[4*idx[jrow]+3]; 604 jrow++; 605 } 606 y[4*i] += sum1; 607 y[4*i+1] += sum2; 608 y[4*i+2] += sum3; 609 y[4*i+3] += sum4; 610 } 611 612 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 613 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 614 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 615 PetscFunctionReturn(0); 616 } 617 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 618 { 619 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 620 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 621 const PetscScalar *x,*v; 622 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 623 PetscErrorCode ierr; 624 PetscInt n,i; 625 const PetscInt m = b->AIJ->rmap->n,*idx; 626 627 PetscFunctionBegin; 628 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 629 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 630 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 631 632 for (i=0; i<m; i++) { 633 idx = a->j + a->i[i]; 634 v = a->a + a->i[i]; 635 n = a->i[i+1] - a->i[i]; 636 alpha1 = x[4*i]; 637 alpha2 = x[4*i+1]; 638 alpha3 = x[4*i+2]; 639 alpha4 = x[4*i+3]; 640 while (n-->0) { 641 y[4*(*idx)] += alpha1*(*v); 642 y[4*(*idx)+1] += alpha2*(*v); 643 y[4*(*idx)+2] += alpha3*(*v); 644 y[4*(*idx)+3] += alpha4*(*v); 645 idx++; v++; 646 } 647 } 648 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 649 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 650 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653 /* ------------------------------------------------------------------------------*/ 654 655 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 656 { 657 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 658 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 659 const PetscScalar *x,*v; 660 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 661 PetscErrorCode ierr; 662 PetscInt nonzerorow=0,n,i,jrow,j; 663 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 664 665 PetscFunctionBegin; 666 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 667 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 668 idx = a->j; 669 v = a->a; 670 ii = a->i; 671 672 for (i=0; i<m; i++) { 673 jrow = ii[i]; 674 n = ii[i+1] - jrow; 675 sum1 = 0.0; 676 sum2 = 0.0; 677 sum3 = 0.0; 678 sum4 = 0.0; 679 sum5 = 0.0; 680 681 nonzerorow += (n>0); 682 for (j=0; j<n; j++) { 683 sum1 += v[jrow]*x[5*idx[jrow]]; 684 sum2 += v[jrow]*x[5*idx[jrow]+1]; 685 sum3 += v[jrow]*x[5*idx[jrow]+2]; 686 sum4 += v[jrow]*x[5*idx[jrow]+3]; 687 sum5 += v[jrow]*x[5*idx[jrow]+4]; 688 jrow++; 689 } 690 y[5*i] = sum1; 691 y[5*i+1] = sum2; 692 y[5*i+2] = sum3; 693 y[5*i+3] = sum4; 694 y[5*i+4] = sum5; 695 } 696 697 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 698 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 699 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 704 { 705 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 706 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 707 const PetscScalar *x,*v; 708 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 709 PetscErrorCode ierr; 710 PetscInt n,i; 711 const PetscInt m = b->AIJ->rmap->n,*idx; 712 713 PetscFunctionBegin; 714 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 715 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 716 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 717 718 for (i=0; i<m; i++) { 719 idx = a->j + a->i[i]; 720 v = a->a + a->i[i]; 721 n = a->i[i+1] - a->i[i]; 722 alpha1 = x[5*i]; 723 alpha2 = x[5*i+1]; 724 alpha3 = x[5*i+2]; 725 alpha4 = x[5*i+3]; 726 alpha5 = x[5*i+4]; 727 while (n-->0) { 728 y[5*(*idx)] += alpha1*(*v); 729 y[5*(*idx)+1] += alpha2*(*v); 730 y[5*(*idx)+2] += alpha3*(*v); 731 y[5*(*idx)+3] += alpha4*(*v); 732 y[5*(*idx)+4] += alpha5*(*v); 733 idx++; v++; 734 } 735 } 736 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 737 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 738 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 743 { 744 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 745 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 746 const PetscScalar *x,*v; 747 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 748 PetscErrorCode ierr; 749 PetscInt n,i,jrow,j; 750 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 751 752 PetscFunctionBegin; 753 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 754 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 755 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 756 idx = a->j; 757 v = a->a; 758 ii = a->i; 759 760 for (i=0; i<m; i++) { 761 jrow = ii[i]; 762 n = ii[i+1] - jrow; 763 sum1 = 0.0; 764 sum2 = 0.0; 765 sum3 = 0.0; 766 sum4 = 0.0; 767 sum5 = 0.0; 768 for (j=0; j<n; j++) { 769 sum1 += v[jrow]*x[5*idx[jrow]]; 770 sum2 += v[jrow]*x[5*idx[jrow]+1]; 771 sum3 += v[jrow]*x[5*idx[jrow]+2]; 772 sum4 += v[jrow]*x[5*idx[jrow]+3]; 773 sum5 += v[jrow]*x[5*idx[jrow]+4]; 774 jrow++; 775 } 776 y[5*i] += sum1; 777 y[5*i+1] += sum2; 778 y[5*i+2] += sum3; 779 y[5*i+3] += sum4; 780 y[5*i+4] += sum5; 781 } 782 783 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 784 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 785 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 786 PetscFunctionReturn(0); 787 } 788 789 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 790 { 791 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 792 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 793 const PetscScalar *x,*v; 794 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 795 PetscErrorCode ierr; 796 PetscInt n,i; 797 const PetscInt m = b->AIJ->rmap->n,*idx; 798 799 PetscFunctionBegin; 800 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 801 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 802 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 803 804 for (i=0; i<m; i++) { 805 idx = a->j + a->i[i]; 806 v = a->a + a->i[i]; 807 n = a->i[i+1] - a->i[i]; 808 alpha1 = x[5*i]; 809 alpha2 = x[5*i+1]; 810 alpha3 = x[5*i+2]; 811 alpha4 = x[5*i+3]; 812 alpha5 = x[5*i+4]; 813 while (n-->0) { 814 y[5*(*idx)] += alpha1*(*v); 815 y[5*(*idx)+1] += alpha2*(*v); 816 y[5*(*idx)+2] += alpha3*(*v); 817 y[5*(*idx)+3] += alpha4*(*v); 818 y[5*(*idx)+4] += alpha5*(*v); 819 idx++; v++; 820 } 821 } 822 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 823 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 824 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 825 PetscFunctionReturn(0); 826 } 827 828 /* ------------------------------------------------------------------------------*/ 829 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 830 { 831 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 832 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 833 const PetscScalar *x,*v; 834 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 835 PetscErrorCode ierr; 836 PetscInt nonzerorow=0,n,i,jrow,j; 837 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 838 839 PetscFunctionBegin; 840 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 841 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 842 idx = a->j; 843 v = a->a; 844 ii = a->i; 845 846 for (i=0; i<m; i++) { 847 jrow = ii[i]; 848 n = ii[i+1] - jrow; 849 sum1 = 0.0; 850 sum2 = 0.0; 851 sum3 = 0.0; 852 sum4 = 0.0; 853 sum5 = 0.0; 854 sum6 = 0.0; 855 856 nonzerorow += (n>0); 857 for (j=0; j<n; j++) { 858 sum1 += v[jrow]*x[6*idx[jrow]]; 859 sum2 += v[jrow]*x[6*idx[jrow]+1]; 860 sum3 += v[jrow]*x[6*idx[jrow]+2]; 861 sum4 += v[jrow]*x[6*idx[jrow]+3]; 862 sum5 += v[jrow]*x[6*idx[jrow]+4]; 863 sum6 += v[jrow]*x[6*idx[jrow]+5]; 864 jrow++; 865 } 866 y[6*i] = sum1; 867 y[6*i+1] = sum2; 868 y[6*i+2] = sum3; 869 y[6*i+3] = sum4; 870 y[6*i+4] = sum5; 871 y[6*i+5] = sum6; 872 } 873 874 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 875 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 876 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 877 PetscFunctionReturn(0); 878 } 879 880 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 881 { 882 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 883 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 884 const PetscScalar *x,*v; 885 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 886 PetscErrorCode ierr; 887 PetscInt n,i; 888 const PetscInt m = b->AIJ->rmap->n,*idx; 889 890 PetscFunctionBegin; 891 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 892 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 893 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 894 895 for (i=0; i<m; i++) { 896 idx = a->j + a->i[i]; 897 v = a->a + a->i[i]; 898 n = a->i[i+1] - a->i[i]; 899 alpha1 = x[6*i]; 900 alpha2 = x[6*i+1]; 901 alpha3 = x[6*i+2]; 902 alpha4 = x[6*i+3]; 903 alpha5 = x[6*i+4]; 904 alpha6 = x[6*i+5]; 905 while (n-->0) { 906 y[6*(*idx)] += alpha1*(*v); 907 y[6*(*idx)+1] += alpha2*(*v); 908 y[6*(*idx)+2] += alpha3*(*v); 909 y[6*(*idx)+3] += alpha4*(*v); 910 y[6*(*idx)+4] += alpha5*(*v); 911 y[6*(*idx)+5] += alpha6*(*v); 912 idx++; v++; 913 } 914 } 915 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 916 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 917 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 918 PetscFunctionReturn(0); 919 } 920 921 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 922 { 923 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 924 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 925 const PetscScalar *x,*v; 926 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 927 PetscErrorCode ierr; 928 PetscInt n,i,jrow,j; 929 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 930 931 PetscFunctionBegin; 932 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 933 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 934 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 935 idx = a->j; 936 v = a->a; 937 ii = a->i; 938 939 for (i=0; i<m; i++) { 940 jrow = ii[i]; 941 n = ii[i+1] - jrow; 942 sum1 = 0.0; 943 sum2 = 0.0; 944 sum3 = 0.0; 945 sum4 = 0.0; 946 sum5 = 0.0; 947 sum6 = 0.0; 948 for (j=0; j<n; j++) { 949 sum1 += v[jrow]*x[6*idx[jrow]]; 950 sum2 += v[jrow]*x[6*idx[jrow]+1]; 951 sum3 += v[jrow]*x[6*idx[jrow]+2]; 952 sum4 += v[jrow]*x[6*idx[jrow]+3]; 953 sum5 += v[jrow]*x[6*idx[jrow]+4]; 954 sum6 += v[jrow]*x[6*idx[jrow]+5]; 955 jrow++; 956 } 957 y[6*i] += sum1; 958 y[6*i+1] += sum2; 959 y[6*i+2] += sum3; 960 y[6*i+3] += sum4; 961 y[6*i+4] += sum5; 962 y[6*i+5] += sum6; 963 } 964 965 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 966 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 967 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 972 { 973 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 974 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 975 const PetscScalar *x,*v; 976 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 977 PetscErrorCode ierr; 978 PetscInt n,i; 979 const PetscInt m = b->AIJ->rmap->n,*idx; 980 981 PetscFunctionBegin; 982 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 983 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 984 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 985 986 for (i=0; i<m; i++) { 987 idx = a->j + a->i[i]; 988 v = a->a + a->i[i]; 989 n = a->i[i+1] - a->i[i]; 990 alpha1 = x[6*i]; 991 alpha2 = x[6*i+1]; 992 alpha3 = x[6*i+2]; 993 alpha4 = x[6*i+3]; 994 alpha5 = x[6*i+4]; 995 alpha6 = x[6*i+5]; 996 while (n-->0) { 997 y[6*(*idx)] += alpha1*(*v); 998 y[6*(*idx)+1] += alpha2*(*v); 999 y[6*(*idx)+2] += alpha3*(*v); 1000 y[6*(*idx)+3] += alpha4*(*v); 1001 y[6*(*idx)+4] += alpha5*(*v); 1002 y[6*(*idx)+5] += alpha6*(*v); 1003 idx++; v++; 1004 } 1005 } 1006 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1007 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1008 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 /* ------------------------------------------------------------------------------*/ 1013 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1014 { 1015 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1016 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1017 const PetscScalar *x,*v; 1018 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1019 PetscErrorCode ierr; 1020 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1021 PetscInt nonzerorow=0,n,i,jrow,j; 1022 1023 PetscFunctionBegin; 1024 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1025 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1026 idx = a->j; 1027 v = a->a; 1028 ii = a->i; 1029 1030 for (i=0; i<m; i++) { 1031 jrow = ii[i]; 1032 n = ii[i+1] - jrow; 1033 sum1 = 0.0; 1034 sum2 = 0.0; 1035 sum3 = 0.0; 1036 sum4 = 0.0; 1037 sum5 = 0.0; 1038 sum6 = 0.0; 1039 sum7 = 0.0; 1040 1041 nonzerorow += (n>0); 1042 for (j=0; j<n; j++) { 1043 sum1 += v[jrow]*x[7*idx[jrow]]; 1044 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1045 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1046 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1047 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1048 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1049 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1050 jrow++; 1051 } 1052 y[7*i] = sum1; 1053 y[7*i+1] = sum2; 1054 y[7*i+2] = sum3; 1055 y[7*i+3] = sum4; 1056 y[7*i+4] = sum5; 1057 y[7*i+5] = sum6; 1058 y[7*i+6] = sum7; 1059 } 1060 1061 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1062 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1063 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1068 { 1069 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1070 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1071 const PetscScalar *x,*v; 1072 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1073 PetscErrorCode ierr; 1074 const PetscInt m = b->AIJ->rmap->n,*idx; 1075 PetscInt n,i; 1076 1077 PetscFunctionBegin; 1078 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1079 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1080 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1081 1082 for (i=0; i<m; i++) { 1083 idx = a->j + a->i[i]; 1084 v = a->a + a->i[i]; 1085 n = a->i[i+1] - a->i[i]; 1086 alpha1 = x[7*i]; 1087 alpha2 = x[7*i+1]; 1088 alpha3 = x[7*i+2]; 1089 alpha4 = x[7*i+3]; 1090 alpha5 = x[7*i+4]; 1091 alpha6 = x[7*i+5]; 1092 alpha7 = x[7*i+6]; 1093 while (n-->0) { 1094 y[7*(*idx)] += alpha1*(*v); 1095 y[7*(*idx)+1] += alpha2*(*v); 1096 y[7*(*idx)+2] += alpha3*(*v); 1097 y[7*(*idx)+3] += alpha4*(*v); 1098 y[7*(*idx)+4] += alpha5*(*v); 1099 y[7*(*idx)+5] += alpha6*(*v); 1100 y[7*(*idx)+6] += alpha7*(*v); 1101 idx++; v++; 1102 } 1103 } 1104 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1105 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1106 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1113 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1114 const PetscScalar *x,*v; 1115 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1116 PetscErrorCode ierr; 1117 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1118 PetscInt n,i,jrow,j; 1119 1120 PetscFunctionBegin; 1121 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1122 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1123 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1124 idx = a->j; 1125 v = a->a; 1126 ii = a->i; 1127 1128 for (i=0; i<m; i++) { 1129 jrow = ii[i]; 1130 n = ii[i+1] - jrow; 1131 sum1 = 0.0; 1132 sum2 = 0.0; 1133 sum3 = 0.0; 1134 sum4 = 0.0; 1135 sum5 = 0.0; 1136 sum6 = 0.0; 1137 sum7 = 0.0; 1138 for (j=0; j<n; j++) { 1139 sum1 += v[jrow]*x[7*idx[jrow]]; 1140 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1141 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1142 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1143 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1144 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1145 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1146 jrow++; 1147 } 1148 y[7*i] += sum1; 1149 y[7*i+1] += sum2; 1150 y[7*i+2] += sum3; 1151 y[7*i+3] += sum4; 1152 y[7*i+4] += sum5; 1153 y[7*i+5] += sum6; 1154 y[7*i+6] += sum7; 1155 } 1156 1157 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1158 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1159 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1164 { 1165 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1166 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1167 const PetscScalar *x,*v; 1168 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1169 PetscErrorCode ierr; 1170 const PetscInt m = b->AIJ->rmap->n,*idx; 1171 PetscInt n,i; 1172 1173 PetscFunctionBegin; 1174 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1175 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1176 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1177 for (i=0; i<m; i++) { 1178 idx = a->j + a->i[i]; 1179 v = a->a + a->i[i]; 1180 n = a->i[i+1] - a->i[i]; 1181 alpha1 = x[7*i]; 1182 alpha2 = x[7*i+1]; 1183 alpha3 = x[7*i+2]; 1184 alpha4 = x[7*i+3]; 1185 alpha5 = x[7*i+4]; 1186 alpha6 = x[7*i+5]; 1187 alpha7 = x[7*i+6]; 1188 while (n-->0) { 1189 y[7*(*idx)] += alpha1*(*v); 1190 y[7*(*idx)+1] += alpha2*(*v); 1191 y[7*(*idx)+2] += alpha3*(*v); 1192 y[7*(*idx)+3] += alpha4*(*v); 1193 y[7*(*idx)+4] += alpha5*(*v); 1194 y[7*(*idx)+5] += alpha6*(*v); 1195 y[7*(*idx)+6] += alpha7*(*v); 1196 idx++; v++; 1197 } 1198 } 1199 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1200 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1201 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1206 { 1207 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1208 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1209 const PetscScalar *x,*v; 1210 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1211 PetscErrorCode ierr; 1212 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1213 PetscInt nonzerorow=0,n,i,jrow,j; 1214 1215 PetscFunctionBegin; 1216 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1217 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1218 idx = a->j; 1219 v = a->a; 1220 ii = a->i; 1221 1222 for (i=0; i<m; i++) { 1223 jrow = ii[i]; 1224 n = ii[i+1] - jrow; 1225 sum1 = 0.0; 1226 sum2 = 0.0; 1227 sum3 = 0.0; 1228 sum4 = 0.0; 1229 sum5 = 0.0; 1230 sum6 = 0.0; 1231 sum7 = 0.0; 1232 sum8 = 0.0; 1233 1234 nonzerorow += (n>0); 1235 for (j=0; j<n; j++) { 1236 sum1 += v[jrow]*x[8*idx[jrow]]; 1237 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1238 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1239 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1240 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1241 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1242 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1243 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1244 jrow++; 1245 } 1246 y[8*i] = sum1; 1247 y[8*i+1] = sum2; 1248 y[8*i+2] = sum3; 1249 y[8*i+3] = sum4; 1250 y[8*i+4] = sum5; 1251 y[8*i+5] = sum6; 1252 y[8*i+6] = sum7; 1253 y[8*i+7] = sum8; 1254 } 1255 1256 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1257 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1258 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1263 { 1264 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1265 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1266 const PetscScalar *x,*v; 1267 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1268 PetscErrorCode ierr; 1269 const PetscInt m = b->AIJ->rmap->n,*idx; 1270 PetscInt n,i; 1271 1272 PetscFunctionBegin; 1273 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1274 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1275 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1276 1277 for (i=0; i<m; i++) { 1278 idx = a->j + a->i[i]; 1279 v = a->a + a->i[i]; 1280 n = a->i[i+1] - a->i[i]; 1281 alpha1 = x[8*i]; 1282 alpha2 = x[8*i+1]; 1283 alpha3 = x[8*i+2]; 1284 alpha4 = x[8*i+3]; 1285 alpha5 = x[8*i+4]; 1286 alpha6 = x[8*i+5]; 1287 alpha7 = x[8*i+6]; 1288 alpha8 = x[8*i+7]; 1289 while (n-->0) { 1290 y[8*(*idx)] += alpha1*(*v); 1291 y[8*(*idx)+1] += alpha2*(*v); 1292 y[8*(*idx)+2] += alpha3*(*v); 1293 y[8*(*idx)+3] += alpha4*(*v); 1294 y[8*(*idx)+4] += alpha5*(*v); 1295 y[8*(*idx)+5] += alpha6*(*v); 1296 y[8*(*idx)+6] += alpha7*(*v); 1297 y[8*(*idx)+7] += alpha8*(*v); 1298 idx++; v++; 1299 } 1300 } 1301 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1302 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1303 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1308 { 1309 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1310 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1311 const PetscScalar *x,*v; 1312 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1313 PetscErrorCode ierr; 1314 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1315 PetscInt n,i,jrow,j; 1316 1317 PetscFunctionBegin; 1318 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1319 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1320 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1321 idx = a->j; 1322 v = a->a; 1323 ii = a->i; 1324 1325 for (i=0; i<m; i++) { 1326 jrow = ii[i]; 1327 n = ii[i+1] - jrow; 1328 sum1 = 0.0; 1329 sum2 = 0.0; 1330 sum3 = 0.0; 1331 sum4 = 0.0; 1332 sum5 = 0.0; 1333 sum6 = 0.0; 1334 sum7 = 0.0; 1335 sum8 = 0.0; 1336 for (j=0; j<n; j++) { 1337 sum1 += v[jrow]*x[8*idx[jrow]]; 1338 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1339 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1340 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1341 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1342 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1343 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1344 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1345 jrow++; 1346 } 1347 y[8*i] += sum1; 1348 y[8*i+1] += sum2; 1349 y[8*i+2] += sum3; 1350 y[8*i+3] += sum4; 1351 y[8*i+4] += sum5; 1352 y[8*i+5] += sum6; 1353 y[8*i+6] += sum7; 1354 y[8*i+7] += sum8; 1355 } 1356 1357 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1358 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1359 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } 1362 1363 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1364 { 1365 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1366 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1367 const PetscScalar *x,*v; 1368 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1369 PetscErrorCode ierr; 1370 const PetscInt m = b->AIJ->rmap->n,*idx; 1371 PetscInt n,i; 1372 1373 PetscFunctionBegin; 1374 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1375 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1376 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1377 for (i=0; i<m; i++) { 1378 idx = a->j + a->i[i]; 1379 v = a->a + a->i[i]; 1380 n = a->i[i+1] - a->i[i]; 1381 alpha1 = x[8*i]; 1382 alpha2 = x[8*i+1]; 1383 alpha3 = x[8*i+2]; 1384 alpha4 = x[8*i+3]; 1385 alpha5 = x[8*i+4]; 1386 alpha6 = x[8*i+5]; 1387 alpha7 = x[8*i+6]; 1388 alpha8 = x[8*i+7]; 1389 while (n-->0) { 1390 y[8*(*idx)] += alpha1*(*v); 1391 y[8*(*idx)+1] += alpha2*(*v); 1392 y[8*(*idx)+2] += alpha3*(*v); 1393 y[8*(*idx)+3] += alpha4*(*v); 1394 y[8*(*idx)+4] += alpha5*(*v); 1395 y[8*(*idx)+5] += alpha6*(*v); 1396 y[8*(*idx)+6] += alpha7*(*v); 1397 y[8*(*idx)+7] += alpha8*(*v); 1398 idx++; v++; 1399 } 1400 } 1401 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1402 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1403 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1404 PetscFunctionReturn(0); 1405 } 1406 1407 /* ------------------------------------------------------------------------------*/ 1408 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1409 { 1410 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1411 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1412 const PetscScalar *x,*v; 1413 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1414 PetscErrorCode ierr; 1415 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1416 PetscInt nonzerorow=0,n,i,jrow,j; 1417 1418 PetscFunctionBegin; 1419 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1420 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1421 idx = a->j; 1422 v = a->a; 1423 ii = a->i; 1424 1425 for (i=0; i<m; i++) { 1426 jrow = ii[i]; 1427 n = ii[i+1] - jrow; 1428 sum1 = 0.0; 1429 sum2 = 0.0; 1430 sum3 = 0.0; 1431 sum4 = 0.0; 1432 sum5 = 0.0; 1433 sum6 = 0.0; 1434 sum7 = 0.0; 1435 sum8 = 0.0; 1436 sum9 = 0.0; 1437 1438 nonzerorow += (n>0); 1439 for (j=0; j<n; j++) { 1440 sum1 += v[jrow]*x[9*idx[jrow]]; 1441 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1442 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1443 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1444 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1445 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1446 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1447 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1448 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1449 jrow++; 1450 } 1451 y[9*i] = sum1; 1452 y[9*i+1] = sum2; 1453 y[9*i+2] = sum3; 1454 y[9*i+3] = sum4; 1455 y[9*i+4] = sum5; 1456 y[9*i+5] = sum6; 1457 y[9*i+6] = sum7; 1458 y[9*i+7] = sum8; 1459 y[9*i+8] = sum9; 1460 } 1461 1462 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1463 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1464 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1465 PetscFunctionReturn(0); 1466 } 1467 1468 /* ------------------------------------------------------------------------------*/ 1469 1470 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1471 { 1472 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1473 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1474 const PetscScalar *x,*v; 1475 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1476 PetscErrorCode ierr; 1477 const PetscInt m = b->AIJ->rmap->n,*idx; 1478 PetscInt n,i; 1479 1480 PetscFunctionBegin; 1481 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1482 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1483 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1484 1485 for (i=0; i<m; i++) { 1486 idx = a->j + a->i[i]; 1487 v = a->a + a->i[i]; 1488 n = a->i[i+1] - a->i[i]; 1489 alpha1 = x[9*i]; 1490 alpha2 = x[9*i+1]; 1491 alpha3 = x[9*i+2]; 1492 alpha4 = x[9*i+3]; 1493 alpha5 = x[9*i+4]; 1494 alpha6 = x[9*i+5]; 1495 alpha7 = x[9*i+6]; 1496 alpha8 = x[9*i+7]; 1497 alpha9 = x[9*i+8]; 1498 while (n-->0) { 1499 y[9*(*idx)] += alpha1*(*v); 1500 y[9*(*idx)+1] += alpha2*(*v); 1501 y[9*(*idx)+2] += alpha3*(*v); 1502 y[9*(*idx)+3] += alpha4*(*v); 1503 y[9*(*idx)+4] += alpha5*(*v); 1504 y[9*(*idx)+5] += alpha6*(*v); 1505 y[9*(*idx)+6] += alpha7*(*v); 1506 y[9*(*idx)+7] += alpha8*(*v); 1507 y[9*(*idx)+8] += alpha9*(*v); 1508 idx++; v++; 1509 } 1510 } 1511 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1512 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1513 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1514 PetscFunctionReturn(0); 1515 } 1516 1517 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1518 { 1519 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1520 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1521 const PetscScalar *x,*v; 1522 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1523 PetscErrorCode ierr; 1524 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1525 PetscInt n,i,jrow,j; 1526 1527 PetscFunctionBegin; 1528 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1529 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1530 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1531 idx = a->j; 1532 v = a->a; 1533 ii = a->i; 1534 1535 for (i=0; i<m; i++) { 1536 jrow = ii[i]; 1537 n = ii[i+1] - jrow; 1538 sum1 = 0.0; 1539 sum2 = 0.0; 1540 sum3 = 0.0; 1541 sum4 = 0.0; 1542 sum5 = 0.0; 1543 sum6 = 0.0; 1544 sum7 = 0.0; 1545 sum8 = 0.0; 1546 sum9 = 0.0; 1547 for (j=0; j<n; j++) { 1548 sum1 += v[jrow]*x[9*idx[jrow]]; 1549 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1550 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1551 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1552 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1553 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1554 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1555 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1556 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1557 jrow++; 1558 } 1559 y[9*i] += sum1; 1560 y[9*i+1] += sum2; 1561 y[9*i+2] += sum3; 1562 y[9*i+3] += sum4; 1563 y[9*i+4] += sum5; 1564 y[9*i+5] += sum6; 1565 y[9*i+6] += sum7; 1566 y[9*i+7] += sum8; 1567 y[9*i+8] += sum9; 1568 } 1569 1570 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1571 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1572 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1573 PetscFunctionReturn(0); 1574 } 1575 1576 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1577 { 1578 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1579 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1580 const PetscScalar *x,*v; 1581 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1582 PetscErrorCode ierr; 1583 const PetscInt m = b->AIJ->rmap->n,*idx; 1584 PetscInt n,i; 1585 1586 PetscFunctionBegin; 1587 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1588 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1589 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1590 for (i=0; i<m; i++) { 1591 idx = a->j + a->i[i]; 1592 v = a->a + a->i[i]; 1593 n = a->i[i+1] - a->i[i]; 1594 alpha1 = x[9*i]; 1595 alpha2 = x[9*i+1]; 1596 alpha3 = x[9*i+2]; 1597 alpha4 = x[9*i+3]; 1598 alpha5 = x[9*i+4]; 1599 alpha6 = x[9*i+5]; 1600 alpha7 = x[9*i+6]; 1601 alpha8 = x[9*i+7]; 1602 alpha9 = x[9*i+8]; 1603 while (n-->0) { 1604 y[9*(*idx)] += alpha1*(*v); 1605 y[9*(*idx)+1] += alpha2*(*v); 1606 y[9*(*idx)+2] += alpha3*(*v); 1607 y[9*(*idx)+3] += alpha4*(*v); 1608 y[9*(*idx)+4] += alpha5*(*v); 1609 y[9*(*idx)+5] += alpha6*(*v); 1610 y[9*(*idx)+6] += alpha7*(*v); 1611 y[9*(*idx)+7] += alpha8*(*v); 1612 y[9*(*idx)+8] += alpha9*(*v); 1613 idx++; v++; 1614 } 1615 } 1616 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1617 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1618 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1619 PetscFunctionReturn(0); 1620 } 1621 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1622 { 1623 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1624 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1625 const PetscScalar *x,*v; 1626 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1627 PetscErrorCode ierr; 1628 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1629 PetscInt nonzerorow=0,n,i,jrow,j; 1630 1631 PetscFunctionBegin; 1632 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1633 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1634 idx = a->j; 1635 v = a->a; 1636 ii = a->i; 1637 1638 for (i=0; i<m; i++) { 1639 jrow = ii[i]; 1640 n = ii[i+1] - jrow; 1641 sum1 = 0.0; 1642 sum2 = 0.0; 1643 sum3 = 0.0; 1644 sum4 = 0.0; 1645 sum5 = 0.0; 1646 sum6 = 0.0; 1647 sum7 = 0.0; 1648 sum8 = 0.0; 1649 sum9 = 0.0; 1650 sum10 = 0.0; 1651 1652 nonzerorow += (n>0); 1653 for (j=0; j<n; j++) { 1654 sum1 += v[jrow]*x[10*idx[jrow]]; 1655 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1656 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1657 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1658 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1659 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1660 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1661 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1662 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1663 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1664 jrow++; 1665 } 1666 y[10*i] = sum1; 1667 y[10*i+1] = sum2; 1668 y[10*i+2] = sum3; 1669 y[10*i+3] = sum4; 1670 y[10*i+4] = sum5; 1671 y[10*i+5] = sum6; 1672 y[10*i+6] = sum7; 1673 y[10*i+7] = sum8; 1674 y[10*i+8] = sum9; 1675 y[10*i+9] = sum10; 1676 } 1677 1678 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1679 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1680 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1681 PetscFunctionReturn(0); 1682 } 1683 1684 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1685 { 1686 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1687 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1688 const PetscScalar *x,*v; 1689 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1690 PetscErrorCode ierr; 1691 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1692 PetscInt n,i,jrow,j; 1693 1694 PetscFunctionBegin; 1695 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1696 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1697 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1698 idx = a->j; 1699 v = a->a; 1700 ii = a->i; 1701 1702 for (i=0; i<m; i++) { 1703 jrow = ii[i]; 1704 n = ii[i+1] - jrow; 1705 sum1 = 0.0; 1706 sum2 = 0.0; 1707 sum3 = 0.0; 1708 sum4 = 0.0; 1709 sum5 = 0.0; 1710 sum6 = 0.0; 1711 sum7 = 0.0; 1712 sum8 = 0.0; 1713 sum9 = 0.0; 1714 sum10 = 0.0; 1715 for (j=0; j<n; j++) { 1716 sum1 += v[jrow]*x[10*idx[jrow]]; 1717 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1718 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1719 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1720 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1721 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1722 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1723 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1724 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1725 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1726 jrow++; 1727 } 1728 y[10*i] += sum1; 1729 y[10*i+1] += sum2; 1730 y[10*i+2] += sum3; 1731 y[10*i+3] += sum4; 1732 y[10*i+4] += sum5; 1733 y[10*i+5] += sum6; 1734 y[10*i+6] += sum7; 1735 y[10*i+7] += sum8; 1736 y[10*i+8] += sum9; 1737 y[10*i+9] += sum10; 1738 } 1739 1740 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1741 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1742 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1743 PetscFunctionReturn(0); 1744 } 1745 1746 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1747 { 1748 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1749 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1750 const PetscScalar *x,*v; 1751 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1752 PetscErrorCode ierr; 1753 const PetscInt m = b->AIJ->rmap->n,*idx; 1754 PetscInt n,i; 1755 1756 PetscFunctionBegin; 1757 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1758 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1759 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1760 1761 for (i=0; i<m; i++) { 1762 idx = a->j + a->i[i]; 1763 v = a->a + a->i[i]; 1764 n = a->i[i+1] - a->i[i]; 1765 alpha1 = x[10*i]; 1766 alpha2 = x[10*i+1]; 1767 alpha3 = x[10*i+2]; 1768 alpha4 = x[10*i+3]; 1769 alpha5 = x[10*i+4]; 1770 alpha6 = x[10*i+5]; 1771 alpha7 = x[10*i+6]; 1772 alpha8 = x[10*i+7]; 1773 alpha9 = x[10*i+8]; 1774 alpha10 = x[10*i+9]; 1775 while (n-->0) { 1776 y[10*(*idx)] += alpha1*(*v); 1777 y[10*(*idx)+1] += alpha2*(*v); 1778 y[10*(*idx)+2] += alpha3*(*v); 1779 y[10*(*idx)+3] += alpha4*(*v); 1780 y[10*(*idx)+4] += alpha5*(*v); 1781 y[10*(*idx)+5] += alpha6*(*v); 1782 y[10*(*idx)+6] += alpha7*(*v); 1783 y[10*(*idx)+7] += alpha8*(*v); 1784 y[10*(*idx)+8] += alpha9*(*v); 1785 y[10*(*idx)+9] += alpha10*(*v); 1786 idx++; v++; 1787 } 1788 } 1789 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1790 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1791 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1792 PetscFunctionReturn(0); 1793 } 1794 1795 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1796 { 1797 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1798 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1799 const PetscScalar *x,*v; 1800 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1801 PetscErrorCode ierr; 1802 const PetscInt m = b->AIJ->rmap->n,*idx; 1803 PetscInt n,i; 1804 1805 PetscFunctionBegin; 1806 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1807 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1808 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1809 for (i=0; i<m; i++) { 1810 idx = a->j + a->i[i]; 1811 v = a->a + a->i[i]; 1812 n = a->i[i+1] - a->i[i]; 1813 alpha1 = x[10*i]; 1814 alpha2 = x[10*i+1]; 1815 alpha3 = x[10*i+2]; 1816 alpha4 = x[10*i+3]; 1817 alpha5 = x[10*i+4]; 1818 alpha6 = x[10*i+5]; 1819 alpha7 = x[10*i+6]; 1820 alpha8 = x[10*i+7]; 1821 alpha9 = x[10*i+8]; 1822 alpha10 = x[10*i+9]; 1823 while (n-->0) { 1824 y[10*(*idx)] += alpha1*(*v); 1825 y[10*(*idx)+1] += alpha2*(*v); 1826 y[10*(*idx)+2] += alpha3*(*v); 1827 y[10*(*idx)+3] += alpha4*(*v); 1828 y[10*(*idx)+4] += alpha5*(*v); 1829 y[10*(*idx)+5] += alpha6*(*v); 1830 y[10*(*idx)+6] += alpha7*(*v); 1831 y[10*(*idx)+7] += alpha8*(*v); 1832 y[10*(*idx)+8] += alpha9*(*v); 1833 y[10*(*idx)+9] += alpha10*(*v); 1834 idx++; v++; 1835 } 1836 } 1837 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1838 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1839 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1840 PetscFunctionReturn(0); 1841 } 1842 1843 /*--------------------------------------------------------------------------------------------*/ 1844 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1845 { 1846 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1847 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1848 const PetscScalar *x,*v; 1849 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1850 PetscErrorCode ierr; 1851 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1852 PetscInt nonzerorow=0,n,i,jrow,j; 1853 1854 PetscFunctionBegin; 1855 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1856 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1857 idx = a->j; 1858 v = a->a; 1859 ii = a->i; 1860 1861 for (i=0; i<m; i++) { 1862 jrow = ii[i]; 1863 n = ii[i+1] - jrow; 1864 sum1 = 0.0; 1865 sum2 = 0.0; 1866 sum3 = 0.0; 1867 sum4 = 0.0; 1868 sum5 = 0.0; 1869 sum6 = 0.0; 1870 sum7 = 0.0; 1871 sum8 = 0.0; 1872 sum9 = 0.0; 1873 sum10 = 0.0; 1874 sum11 = 0.0; 1875 1876 nonzerorow += (n>0); 1877 for (j=0; j<n; j++) { 1878 sum1 += v[jrow]*x[11*idx[jrow]]; 1879 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1880 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1881 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1882 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1883 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1884 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1885 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1886 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1887 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1888 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1889 jrow++; 1890 } 1891 y[11*i] = sum1; 1892 y[11*i+1] = sum2; 1893 y[11*i+2] = sum3; 1894 y[11*i+3] = sum4; 1895 y[11*i+4] = sum5; 1896 y[11*i+5] = sum6; 1897 y[11*i+6] = sum7; 1898 y[11*i+7] = sum8; 1899 y[11*i+8] = sum9; 1900 y[11*i+9] = sum10; 1901 y[11*i+10] = sum11; 1902 } 1903 1904 ierr = PetscLogFlops(22.0*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1905 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1906 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1911 { 1912 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1913 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1914 const PetscScalar *x,*v; 1915 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1916 PetscErrorCode ierr; 1917 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1918 PetscInt n,i,jrow,j; 1919 1920 PetscFunctionBegin; 1921 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1922 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1923 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1924 idx = a->j; 1925 v = a->a; 1926 ii = a->i; 1927 1928 for (i=0; i<m; i++) { 1929 jrow = ii[i]; 1930 n = ii[i+1] - jrow; 1931 sum1 = 0.0; 1932 sum2 = 0.0; 1933 sum3 = 0.0; 1934 sum4 = 0.0; 1935 sum5 = 0.0; 1936 sum6 = 0.0; 1937 sum7 = 0.0; 1938 sum8 = 0.0; 1939 sum9 = 0.0; 1940 sum10 = 0.0; 1941 sum11 = 0.0; 1942 for (j=0; j<n; j++) { 1943 sum1 += v[jrow]*x[11*idx[jrow]]; 1944 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1945 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1946 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1947 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1948 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1949 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1950 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1951 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1952 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1953 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1954 jrow++; 1955 } 1956 y[11*i] += sum1; 1957 y[11*i+1] += sum2; 1958 y[11*i+2] += sum3; 1959 y[11*i+3] += sum4; 1960 y[11*i+4] += sum5; 1961 y[11*i+5] += sum6; 1962 y[11*i+6] += sum7; 1963 y[11*i+7] += sum8; 1964 y[11*i+8] += sum9; 1965 y[11*i+9] += sum10; 1966 y[11*i+10] += sum11; 1967 } 1968 1969 ierr = PetscLogFlops(22.0*a->nz);CHKERRQ(ierr); 1970 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1971 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1972 PetscFunctionReturn(0); 1973 } 1974 1975 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1976 { 1977 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1978 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1979 const PetscScalar *x,*v; 1980 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1981 PetscErrorCode ierr; 1982 const PetscInt m = b->AIJ->rmap->n,*idx; 1983 PetscInt n,i; 1984 1985 PetscFunctionBegin; 1986 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1987 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1988 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1989 1990 for (i=0; i<m; i++) { 1991 idx = a->j + a->i[i]; 1992 v = a->a + a->i[i]; 1993 n = a->i[i+1] - a->i[i]; 1994 alpha1 = x[11*i]; 1995 alpha2 = x[11*i+1]; 1996 alpha3 = x[11*i+2]; 1997 alpha4 = x[11*i+3]; 1998 alpha5 = x[11*i+4]; 1999 alpha6 = x[11*i+5]; 2000 alpha7 = x[11*i+6]; 2001 alpha8 = x[11*i+7]; 2002 alpha9 = x[11*i+8]; 2003 alpha10 = x[11*i+9]; 2004 alpha11 = x[11*i+10]; 2005 while (n-->0) { 2006 y[11*(*idx)] += alpha1*(*v); 2007 y[11*(*idx)+1] += alpha2*(*v); 2008 y[11*(*idx)+2] += alpha3*(*v); 2009 y[11*(*idx)+3] += alpha4*(*v); 2010 y[11*(*idx)+4] += alpha5*(*v); 2011 y[11*(*idx)+5] += alpha6*(*v); 2012 y[11*(*idx)+6] += alpha7*(*v); 2013 y[11*(*idx)+7] += alpha8*(*v); 2014 y[11*(*idx)+8] += alpha9*(*v); 2015 y[11*(*idx)+9] += alpha10*(*v); 2016 y[11*(*idx)+10] += alpha11*(*v); 2017 idx++; v++; 2018 } 2019 } 2020 ierr = PetscLogFlops(22.0*a->nz);CHKERRQ(ierr); 2021 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2022 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2023 PetscFunctionReturn(0); 2024 } 2025 2026 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2027 { 2028 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2029 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2030 const PetscScalar *x,*v; 2031 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2032 PetscErrorCode ierr; 2033 const PetscInt m = b->AIJ->rmap->n,*idx; 2034 PetscInt n,i; 2035 2036 PetscFunctionBegin; 2037 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2038 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2039 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2040 for (i=0; i<m; i++) { 2041 idx = a->j + a->i[i]; 2042 v = a->a + a->i[i]; 2043 n = a->i[i+1] - a->i[i]; 2044 alpha1 = x[11*i]; 2045 alpha2 = x[11*i+1]; 2046 alpha3 = x[11*i+2]; 2047 alpha4 = x[11*i+3]; 2048 alpha5 = x[11*i+4]; 2049 alpha6 = x[11*i+5]; 2050 alpha7 = x[11*i+6]; 2051 alpha8 = x[11*i+7]; 2052 alpha9 = x[11*i+8]; 2053 alpha10 = x[11*i+9]; 2054 alpha11 = x[11*i+10]; 2055 while (n-->0) { 2056 y[11*(*idx)] += alpha1*(*v); 2057 y[11*(*idx)+1] += alpha2*(*v); 2058 y[11*(*idx)+2] += alpha3*(*v); 2059 y[11*(*idx)+3] += alpha4*(*v); 2060 y[11*(*idx)+4] += alpha5*(*v); 2061 y[11*(*idx)+5] += alpha6*(*v); 2062 y[11*(*idx)+6] += alpha7*(*v); 2063 y[11*(*idx)+7] += alpha8*(*v); 2064 y[11*(*idx)+8] += alpha9*(*v); 2065 y[11*(*idx)+9] += alpha10*(*v); 2066 y[11*(*idx)+10] += alpha11*(*v); 2067 idx++; v++; 2068 } 2069 } 2070 ierr = PetscLogFlops(22.0*a->nz);CHKERRQ(ierr); 2071 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2072 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2073 PetscFunctionReturn(0); 2074 } 2075 2076 /*--------------------------------------------------------------------------------------------*/ 2077 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2078 { 2079 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2080 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2081 const PetscScalar *x,*v; 2082 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2083 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2084 PetscErrorCode ierr; 2085 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2086 PetscInt nonzerorow=0,n,i,jrow,j; 2087 2088 PetscFunctionBegin; 2089 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2090 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2091 idx = a->j; 2092 v = a->a; 2093 ii = a->i; 2094 2095 for (i=0; i<m; i++) { 2096 jrow = ii[i]; 2097 n = ii[i+1] - jrow; 2098 sum1 = 0.0; 2099 sum2 = 0.0; 2100 sum3 = 0.0; 2101 sum4 = 0.0; 2102 sum5 = 0.0; 2103 sum6 = 0.0; 2104 sum7 = 0.0; 2105 sum8 = 0.0; 2106 sum9 = 0.0; 2107 sum10 = 0.0; 2108 sum11 = 0.0; 2109 sum12 = 0.0; 2110 sum13 = 0.0; 2111 sum14 = 0.0; 2112 sum15 = 0.0; 2113 sum16 = 0.0; 2114 2115 nonzerorow += (n>0); 2116 for (j=0; j<n; j++) { 2117 sum1 += v[jrow]*x[16*idx[jrow]]; 2118 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2119 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2120 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2121 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2122 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2123 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2124 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2125 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2126 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2127 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2128 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2129 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2130 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2131 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2132 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2133 jrow++; 2134 } 2135 y[16*i] = sum1; 2136 y[16*i+1] = sum2; 2137 y[16*i+2] = sum3; 2138 y[16*i+3] = sum4; 2139 y[16*i+4] = sum5; 2140 y[16*i+5] = sum6; 2141 y[16*i+6] = sum7; 2142 y[16*i+7] = sum8; 2143 y[16*i+8] = sum9; 2144 y[16*i+9] = sum10; 2145 y[16*i+10] = sum11; 2146 y[16*i+11] = sum12; 2147 y[16*i+12] = sum13; 2148 y[16*i+13] = sum14; 2149 y[16*i+14] = sum15; 2150 y[16*i+15] = sum16; 2151 } 2152 2153 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2154 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2155 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2156 PetscFunctionReturn(0); 2157 } 2158 2159 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2160 { 2161 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2162 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2163 const PetscScalar *x,*v; 2164 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2165 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2166 PetscErrorCode ierr; 2167 const PetscInt m = b->AIJ->rmap->n,*idx; 2168 PetscInt n,i; 2169 2170 PetscFunctionBegin; 2171 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2172 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2173 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2174 2175 for (i=0; i<m; i++) { 2176 idx = a->j + a->i[i]; 2177 v = a->a + a->i[i]; 2178 n = a->i[i+1] - a->i[i]; 2179 alpha1 = x[16*i]; 2180 alpha2 = x[16*i+1]; 2181 alpha3 = x[16*i+2]; 2182 alpha4 = x[16*i+3]; 2183 alpha5 = x[16*i+4]; 2184 alpha6 = x[16*i+5]; 2185 alpha7 = x[16*i+6]; 2186 alpha8 = x[16*i+7]; 2187 alpha9 = x[16*i+8]; 2188 alpha10 = x[16*i+9]; 2189 alpha11 = x[16*i+10]; 2190 alpha12 = x[16*i+11]; 2191 alpha13 = x[16*i+12]; 2192 alpha14 = x[16*i+13]; 2193 alpha15 = x[16*i+14]; 2194 alpha16 = x[16*i+15]; 2195 while (n-->0) { 2196 y[16*(*idx)] += alpha1*(*v); 2197 y[16*(*idx)+1] += alpha2*(*v); 2198 y[16*(*idx)+2] += alpha3*(*v); 2199 y[16*(*idx)+3] += alpha4*(*v); 2200 y[16*(*idx)+4] += alpha5*(*v); 2201 y[16*(*idx)+5] += alpha6*(*v); 2202 y[16*(*idx)+6] += alpha7*(*v); 2203 y[16*(*idx)+7] += alpha8*(*v); 2204 y[16*(*idx)+8] += alpha9*(*v); 2205 y[16*(*idx)+9] += alpha10*(*v); 2206 y[16*(*idx)+10] += alpha11*(*v); 2207 y[16*(*idx)+11] += alpha12*(*v); 2208 y[16*(*idx)+12] += alpha13*(*v); 2209 y[16*(*idx)+13] += alpha14*(*v); 2210 y[16*(*idx)+14] += alpha15*(*v); 2211 y[16*(*idx)+15] += alpha16*(*v); 2212 idx++; v++; 2213 } 2214 } 2215 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2216 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2217 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2222 { 2223 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2224 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2225 const PetscScalar *x,*v; 2226 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2227 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2228 PetscErrorCode ierr; 2229 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2230 PetscInt n,i,jrow,j; 2231 2232 PetscFunctionBegin; 2233 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2234 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2235 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2236 idx = a->j; 2237 v = a->a; 2238 ii = a->i; 2239 2240 for (i=0; i<m; i++) { 2241 jrow = ii[i]; 2242 n = ii[i+1] - jrow; 2243 sum1 = 0.0; 2244 sum2 = 0.0; 2245 sum3 = 0.0; 2246 sum4 = 0.0; 2247 sum5 = 0.0; 2248 sum6 = 0.0; 2249 sum7 = 0.0; 2250 sum8 = 0.0; 2251 sum9 = 0.0; 2252 sum10 = 0.0; 2253 sum11 = 0.0; 2254 sum12 = 0.0; 2255 sum13 = 0.0; 2256 sum14 = 0.0; 2257 sum15 = 0.0; 2258 sum16 = 0.0; 2259 for (j=0; j<n; j++) { 2260 sum1 += v[jrow]*x[16*idx[jrow]]; 2261 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2262 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2263 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2264 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2265 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2266 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2267 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2268 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2269 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2270 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2271 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2272 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2273 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2274 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2275 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2276 jrow++; 2277 } 2278 y[16*i] += sum1; 2279 y[16*i+1] += sum2; 2280 y[16*i+2] += sum3; 2281 y[16*i+3] += sum4; 2282 y[16*i+4] += sum5; 2283 y[16*i+5] += sum6; 2284 y[16*i+6] += sum7; 2285 y[16*i+7] += sum8; 2286 y[16*i+8] += sum9; 2287 y[16*i+9] += sum10; 2288 y[16*i+10] += sum11; 2289 y[16*i+11] += sum12; 2290 y[16*i+12] += sum13; 2291 y[16*i+13] += sum14; 2292 y[16*i+14] += sum15; 2293 y[16*i+15] += sum16; 2294 } 2295 2296 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2297 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2298 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2303 { 2304 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2305 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2306 const PetscScalar *x,*v; 2307 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2308 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2309 PetscErrorCode ierr; 2310 const PetscInt m = b->AIJ->rmap->n,*idx; 2311 PetscInt n,i; 2312 2313 PetscFunctionBegin; 2314 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2315 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2316 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2317 for (i=0; i<m; i++) { 2318 idx = a->j + a->i[i]; 2319 v = a->a + a->i[i]; 2320 n = a->i[i+1] - a->i[i]; 2321 alpha1 = x[16*i]; 2322 alpha2 = x[16*i+1]; 2323 alpha3 = x[16*i+2]; 2324 alpha4 = x[16*i+3]; 2325 alpha5 = x[16*i+4]; 2326 alpha6 = x[16*i+5]; 2327 alpha7 = x[16*i+6]; 2328 alpha8 = x[16*i+7]; 2329 alpha9 = x[16*i+8]; 2330 alpha10 = x[16*i+9]; 2331 alpha11 = x[16*i+10]; 2332 alpha12 = x[16*i+11]; 2333 alpha13 = x[16*i+12]; 2334 alpha14 = x[16*i+13]; 2335 alpha15 = x[16*i+14]; 2336 alpha16 = x[16*i+15]; 2337 while (n-->0) { 2338 y[16*(*idx)] += alpha1*(*v); 2339 y[16*(*idx)+1] += alpha2*(*v); 2340 y[16*(*idx)+2] += alpha3*(*v); 2341 y[16*(*idx)+3] += alpha4*(*v); 2342 y[16*(*idx)+4] += alpha5*(*v); 2343 y[16*(*idx)+5] += alpha6*(*v); 2344 y[16*(*idx)+6] += alpha7*(*v); 2345 y[16*(*idx)+7] += alpha8*(*v); 2346 y[16*(*idx)+8] += alpha9*(*v); 2347 y[16*(*idx)+9] += alpha10*(*v); 2348 y[16*(*idx)+10] += alpha11*(*v); 2349 y[16*(*idx)+11] += alpha12*(*v); 2350 y[16*(*idx)+12] += alpha13*(*v); 2351 y[16*(*idx)+13] += alpha14*(*v); 2352 y[16*(*idx)+14] += alpha15*(*v); 2353 y[16*(*idx)+15] += alpha16*(*v); 2354 idx++; v++; 2355 } 2356 } 2357 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2358 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2359 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2360 PetscFunctionReturn(0); 2361 } 2362 2363 /*--------------------------------------------------------------------------------------------*/ 2364 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2365 { 2366 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2367 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2368 const PetscScalar *x,*v; 2369 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2370 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2371 PetscErrorCode ierr; 2372 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2373 PetscInt nonzerorow=0,n,i,jrow,j; 2374 2375 PetscFunctionBegin; 2376 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2377 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2378 idx = a->j; 2379 v = a->a; 2380 ii = a->i; 2381 2382 for (i=0; i<m; i++) { 2383 jrow = ii[i]; 2384 n = ii[i+1] - jrow; 2385 sum1 = 0.0; 2386 sum2 = 0.0; 2387 sum3 = 0.0; 2388 sum4 = 0.0; 2389 sum5 = 0.0; 2390 sum6 = 0.0; 2391 sum7 = 0.0; 2392 sum8 = 0.0; 2393 sum9 = 0.0; 2394 sum10 = 0.0; 2395 sum11 = 0.0; 2396 sum12 = 0.0; 2397 sum13 = 0.0; 2398 sum14 = 0.0; 2399 sum15 = 0.0; 2400 sum16 = 0.0; 2401 sum17 = 0.0; 2402 sum18 = 0.0; 2403 2404 nonzerorow += (n>0); 2405 for (j=0; j<n; j++) { 2406 sum1 += v[jrow]*x[18*idx[jrow]]; 2407 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2408 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2409 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2410 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2411 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2412 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2413 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2414 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2415 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2416 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2417 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2418 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2419 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2420 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2421 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2422 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2423 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2424 jrow++; 2425 } 2426 y[18*i] = sum1; 2427 y[18*i+1] = sum2; 2428 y[18*i+2] = sum3; 2429 y[18*i+3] = sum4; 2430 y[18*i+4] = sum5; 2431 y[18*i+5] = sum6; 2432 y[18*i+6] = sum7; 2433 y[18*i+7] = sum8; 2434 y[18*i+8] = sum9; 2435 y[18*i+9] = sum10; 2436 y[18*i+10] = sum11; 2437 y[18*i+11] = sum12; 2438 y[18*i+12] = sum13; 2439 y[18*i+13] = sum14; 2440 y[18*i+14] = sum15; 2441 y[18*i+15] = sum16; 2442 y[18*i+16] = sum17; 2443 y[18*i+17] = sum18; 2444 } 2445 2446 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2447 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2448 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2449 PetscFunctionReturn(0); 2450 } 2451 2452 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2453 { 2454 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2455 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2456 const PetscScalar *x,*v; 2457 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2458 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2459 PetscErrorCode ierr; 2460 const PetscInt m = b->AIJ->rmap->n,*idx; 2461 PetscInt n,i; 2462 2463 PetscFunctionBegin; 2464 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2465 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2466 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2467 2468 for (i=0; i<m; i++) { 2469 idx = a->j + a->i[i]; 2470 v = a->a + a->i[i]; 2471 n = a->i[i+1] - a->i[i]; 2472 alpha1 = x[18*i]; 2473 alpha2 = x[18*i+1]; 2474 alpha3 = x[18*i+2]; 2475 alpha4 = x[18*i+3]; 2476 alpha5 = x[18*i+4]; 2477 alpha6 = x[18*i+5]; 2478 alpha7 = x[18*i+6]; 2479 alpha8 = x[18*i+7]; 2480 alpha9 = x[18*i+8]; 2481 alpha10 = x[18*i+9]; 2482 alpha11 = x[18*i+10]; 2483 alpha12 = x[18*i+11]; 2484 alpha13 = x[18*i+12]; 2485 alpha14 = x[18*i+13]; 2486 alpha15 = x[18*i+14]; 2487 alpha16 = x[18*i+15]; 2488 alpha17 = x[18*i+16]; 2489 alpha18 = x[18*i+17]; 2490 while (n-->0) { 2491 y[18*(*idx)] += alpha1*(*v); 2492 y[18*(*idx)+1] += alpha2*(*v); 2493 y[18*(*idx)+2] += alpha3*(*v); 2494 y[18*(*idx)+3] += alpha4*(*v); 2495 y[18*(*idx)+4] += alpha5*(*v); 2496 y[18*(*idx)+5] += alpha6*(*v); 2497 y[18*(*idx)+6] += alpha7*(*v); 2498 y[18*(*idx)+7] += alpha8*(*v); 2499 y[18*(*idx)+8] += alpha9*(*v); 2500 y[18*(*idx)+9] += alpha10*(*v); 2501 y[18*(*idx)+10] += alpha11*(*v); 2502 y[18*(*idx)+11] += alpha12*(*v); 2503 y[18*(*idx)+12] += alpha13*(*v); 2504 y[18*(*idx)+13] += alpha14*(*v); 2505 y[18*(*idx)+14] += alpha15*(*v); 2506 y[18*(*idx)+15] += alpha16*(*v); 2507 y[18*(*idx)+16] += alpha17*(*v); 2508 y[18*(*idx)+17] += alpha18*(*v); 2509 idx++; v++; 2510 } 2511 } 2512 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2513 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2514 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2515 PetscFunctionReturn(0); 2516 } 2517 2518 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2519 { 2520 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2521 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2522 const PetscScalar *x,*v; 2523 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2524 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2525 PetscErrorCode ierr; 2526 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2527 PetscInt n,i,jrow,j; 2528 2529 PetscFunctionBegin; 2530 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2531 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2532 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2533 idx = a->j; 2534 v = a->a; 2535 ii = a->i; 2536 2537 for (i=0; i<m; i++) { 2538 jrow = ii[i]; 2539 n = ii[i+1] - jrow; 2540 sum1 = 0.0; 2541 sum2 = 0.0; 2542 sum3 = 0.0; 2543 sum4 = 0.0; 2544 sum5 = 0.0; 2545 sum6 = 0.0; 2546 sum7 = 0.0; 2547 sum8 = 0.0; 2548 sum9 = 0.0; 2549 sum10 = 0.0; 2550 sum11 = 0.0; 2551 sum12 = 0.0; 2552 sum13 = 0.0; 2553 sum14 = 0.0; 2554 sum15 = 0.0; 2555 sum16 = 0.0; 2556 sum17 = 0.0; 2557 sum18 = 0.0; 2558 for (j=0; j<n; j++) { 2559 sum1 += v[jrow]*x[18*idx[jrow]]; 2560 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2561 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2562 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2563 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2564 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2565 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2566 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2567 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2568 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2569 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2570 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2571 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2572 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2573 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2574 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2575 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2576 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2577 jrow++; 2578 } 2579 y[18*i] += sum1; 2580 y[18*i+1] += sum2; 2581 y[18*i+2] += sum3; 2582 y[18*i+3] += sum4; 2583 y[18*i+4] += sum5; 2584 y[18*i+5] += sum6; 2585 y[18*i+6] += sum7; 2586 y[18*i+7] += sum8; 2587 y[18*i+8] += sum9; 2588 y[18*i+9] += sum10; 2589 y[18*i+10] += sum11; 2590 y[18*i+11] += sum12; 2591 y[18*i+12] += sum13; 2592 y[18*i+13] += sum14; 2593 y[18*i+14] += sum15; 2594 y[18*i+15] += sum16; 2595 y[18*i+16] += sum17; 2596 y[18*i+17] += sum18; 2597 } 2598 2599 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2600 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2601 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2602 PetscFunctionReturn(0); 2603 } 2604 2605 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2606 { 2607 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2608 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2609 const PetscScalar *x,*v; 2610 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2611 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2612 PetscErrorCode ierr; 2613 const PetscInt m = b->AIJ->rmap->n,*idx; 2614 PetscInt n,i; 2615 2616 PetscFunctionBegin; 2617 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2618 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2619 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2620 for (i=0; i<m; i++) { 2621 idx = a->j + a->i[i]; 2622 v = a->a + a->i[i]; 2623 n = a->i[i+1] - a->i[i]; 2624 alpha1 = x[18*i]; 2625 alpha2 = x[18*i+1]; 2626 alpha3 = x[18*i+2]; 2627 alpha4 = x[18*i+3]; 2628 alpha5 = x[18*i+4]; 2629 alpha6 = x[18*i+5]; 2630 alpha7 = x[18*i+6]; 2631 alpha8 = x[18*i+7]; 2632 alpha9 = x[18*i+8]; 2633 alpha10 = x[18*i+9]; 2634 alpha11 = x[18*i+10]; 2635 alpha12 = x[18*i+11]; 2636 alpha13 = x[18*i+12]; 2637 alpha14 = x[18*i+13]; 2638 alpha15 = x[18*i+14]; 2639 alpha16 = x[18*i+15]; 2640 alpha17 = x[18*i+16]; 2641 alpha18 = x[18*i+17]; 2642 while (n-->0) { 2643 y[18*(*idx)] += alpha1*(*v); 2644 y[18*(*idx)+1] += alpha2*(*v); 2645 y[18*(*idx)+2] += alpha3*(*v); 2646 y[18*(*idx)+3] += alpha4*(*v); 2647 y[18*(*idx)+4] += alpha5*(*v); 2648 y[18*(*idx)+5] += alpha6*(*v); 2649 y[18*(*idx)+6] += alpha7*(*v); 2650 y[18*(*idx)+7] += alpha8*(*v); 2651 y[18*(*idx)+8] += alpha9*(*v); 2652 y[18*(*idx)+9] += alpha10*(*v); 2653 y[18*(*idx)+10] += alpha11*(*v); 2654 y[18*(*idx)+11] += alpha12*(*v); 2655 y[18*(*idx)+12] += alpha13*(*v); 2656 y[18*(*idx)+13] += alpha14*(*v); 2657 y[18*(*idx)+14] += alpha15*(*v); 2658 y[18*(*idx)+15] += alpha16*(*v); 2659 y[18*(*idx)+16] += alpha17*(*v); 2660 y[18*(*idx)+17] += alpha18*(*v); 2661 idx++; v++; 2662 } 2663 } 2664 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2665 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2666 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2667 PetscFunctionReturn(0); 2668 } 2669 2670 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2671 { 2672 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2673 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2674 const PetscScalar *x,*v; 2675 PetscScalar *y,*sums; 2676 PetscErrorCode ierr; 2677 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2678 PetscInt n,i,jrow,j,dof = b->dof,k; 2679 2680 PetscFunctionBegin; 2681 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2682 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2683 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2684 idx = a->j; 2685 v = a->a; 2686 ii = a->i; 2687 2688 for (i=0; i<m; i++) { 2689 jrow = ii[i]; 2690 n = ii[i+1] - jrow; 2691 sums = y + dof*i; 2692 for (j=0; j<n; j++) { 2693 for (k=0; k<dof; k++) { 2694 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2695 } 2696 jrow++; 2697 } 2698 } 2699 2700 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2701 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2702 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2703 PetscFunctionReturn(0); 2704 } 2705 2706 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2707 { 2708 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2709 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2710 const PetscScalar *x,*v; 2711 PetscScalar *y,*sums; 2712 PetscErrorCode ierr; 2713 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2714 PetscInt n,i,jrow,j,dof = b->dof,k; 2715 2716 PetscFunctionBegin; 2717 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2718 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2719 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2720 idx = a->j; 2721 v = a->a; 2722 ii = a->i; 2723 2724 for (i=0; i<m; i++) { 2725 jrow = ii[i]; 2726 n = ii[i+1] - jrow; 2727 sums = y + dof*i; 2728 for (j=0; j<n; j++) { 2729 for (k=0; k<dof; k++) { 2730 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2731 } 2732 jrow++; 2733 } 2734 } 2735 2736 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2737 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2738 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2739 PetscFunctionReturn(0); 2740 } 2741 2742 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2743 { 2744 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2745 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2746 const PetscScalar *x,*v,*alpha; 2747 PetscScalar *y; 2748 PetscErrorCode ierr; 2749 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2750 PetscInt n,i,k; 2751 2752 PetscFunctionBegin; 2753 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2754 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2755 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2756 for (i=0; i<m; i++) { 2757 idx = a->j + a->i[i]; 2758 v = a->a + a->i[i]; 2759 n = a->i[i+1] - a->i[i]; 2760 alpha = x + dof*i; 2761 while (n-->0) { 2762 for (k=0; k<dof; k++) { 2763 y[dof*(*idx)+k] += alpha[k]*(*v); 2764 } 2765 idx++; v++; 2766 } 2767 } 2768 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2769 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2770 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2771 PetscFunctionReturn(0); 2772 } 2773 2774 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2775 { 2776 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2777 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2778 const PetscScalar *x,*v,*alpha; 2779 PetscScalar *y; 2780 PetscErrorCode ierr; 2781 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2782 PetscInt n,i,k; 2783 2784 PetscFunctionBegin; 2785 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2786 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2787 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2788 for (i=0; i<m; i++) { 2789 idx = a->j + a->i[i]; 2790 v = a->a + a->i[i]; 2791 n = a->i[i+1] - a->i[i]; 2792 alpha = x + dof*i; 2793 while (n-->0) { 2794 for (k=0; k<dof; k++) { 2795 y[dof*(*idx)+k] += alpha[k]*(*v); 2796 } 2797 idx++; v++; 2798 } 2799 } 2800 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2801 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2802 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2803 PetscFunctionReturn(0); 2804 } 2805 2806 /*===================================================================================*/ 2807 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2808 { 2809 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2810 PetscErrorCode ierr; 2811 2812 PetscFunctionBegin; 2813 /* start the scatter */ 2814 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2815 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2816 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2817 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2818 PetscFunctionReturn(0); 2819 } 2820 2821 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2822 { 2823 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2824 PetscErrorCode ierr; 2825 2826 PetscFunctionBegin; 2827 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2828 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2829 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2830 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2831 PetscFunctionReturn(0); 2832 } 2833 2834 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2835 { 2836 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2837 PetscErrorCode ierr; 2838 2839 PetscFunctionBegin; 2840 /* start the scatter */ 2841 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2842 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2843 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2844 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2845 PetscFunctionReturn(0); 2846 } 2847 2848 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2849 { 2850 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2851 PetscErrorCode ierr; 2852 2853 PetscFunctionBegin; 2854 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2855 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2856 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2857 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2858 PetscFunctionReturn(0); 2859 } 2860 2861 /* ----------------------------------------------------------------*/ 2862 PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 2863 { 2864 Mat_Product *product = C->product; 2865 2866 PetscFunctionBegin; 2867 if (product->type == MATPRODUCT_PtAP) { 2868 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 2869 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]); 2870 PetscFunctionReturn(0); 2871 } 2872 2873 PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 2874 { 2875 PetscErrorCode ierr; 2876 Mat_Product *product = C->product; 2877 PetscBool flg = PETSC_FALSE; 2878 Mat A=product->A,P=product->B; 2879 PetscInt alg=1; /* set default algorithm */ 2880 #if !defined(PETSC_HAVE_HYPRE) 2881 const char *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"}; 2882 PetscInt nalg=4; 2883 #else 2884 const char *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"}; 2885 PetscInt nalg=5; 2886 #endif 2887 2888 PetscFunctionBegin; 2889 PetscCheckFalse(product->type != MATPRODUCT_PtAP,PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]); 2890 2891 /* PtAP */ 2892 /* Check matrix local sizes */ 2893 PetscCheckFalse(A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend); 2894 PetscCheckFalse(A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend); 2895 2896 /* Set the default algorithm */ 2897 ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr); 2898 if (flg) { 2899 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2900 } 2901 2902 /* Get runtime option */ 2903 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 2904 ierr = PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2905 if (flg) { 2906 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2907 } 2908 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2909 2910 ierr = PetscStrcmp(C->product->alg,"allatonce",&flg);CHKERRQ(ierr); 2911 if (flg) { 2912 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 2913 PetscFunctionReturn(0); 2914 } 2915 2916 ierr = PetscStrcmp(C->product->alg,"allatonce_merged",&flg);CHKERRQ(ierr); 2917 if (flg) { 2918 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 2919 PetscFunctionReturn(0); 2920 } 2921 2922 /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 2923 ierr = PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n");CHKERRQ(ierr); 2924 ierr = MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);CHKERRQ(ierr); 2925 ierr = MatProductSetFromOptions(C);CHKERRQ(ierr); 2926 PetscFunctionReturn(0); 2927 } 2928 2929 /* ----------------------------------------------------------------*/ 2930 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C) 2931 { 2932 PetscErrorCode ierr; 2933 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2934 Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 2935 Mat P =pp->AIJ; 2936 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2937 PetscInt *pti,*ptj,*ptJ; 2938 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2939 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2940 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 2941 MatScalar *ca; 2942 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 2943 2944 PetscFunctionBegin; 2945 /* Get ij structure of P^T */ 2946 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2947 2948 cn = pn*ppdof; 2949 /* Allocate ci array, arrays for fill computation and */ 2950 /* free space for accumulating nonzero column info */ 2951 ierr = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr); 2952 ci[0] = 0; 2953 2954 /* Work arrays for rows of P^T*A */ 2955 ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr); 2956 ierr = PetscArrayzero(ptadenserow,an);CHKERRQ(ierr); 2957 ierr = PetscArrayzero(denserow,cn);CHKERRQ(ierr); 2958 2959 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2960 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2961 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2962 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr); 2963 current_space = free_space; 2964 2965 /* Determine symbolic info for each row of C: */ 2966 for (i=0; i<pn; i++) { 2967 ptnzi = pti[i+1] - pti[i]; 2968 ptJ = ptj + pti[i]; 2969 for (dof=0; dof<ppdof; dof++) { 2970 ptanzi = 0; 2971 /* Determine symbolic row of PtA: */ 2972 for (j=0; j<ptnzi; j++) { 2973 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2974 arow = ptJ[j]*ppdof + dof; 2975 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2976 anzj = ai[arow+1] - ai[arow]; 2977 ajj = aj + ai[arow]; 2978 for (k=0; k<anzj; k++) { 2979 if (!ptadenserow[ajj[k]]) { 2980 ptadenserow[ajj[k]] = -1; 2981 ptasparserow[ptanzi++] = ajj[k]; 2982 } 2983 } 2984 } 2985 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2986 ptaj = ptasparserow; 2987 cnzi = 0; 2988 for (j=0; j<ptanzi; j++) { 2989 /* Get offset within block of P */ 2990 pshift = *ptaj%ppdof; 2991 /* Get block row of P */ 2992 prow = (*ptaj++)/ppdof; /* integer division */ 2993 /* P has same number of nonzeros per row as the compressed form */ 2994 pnzj = pi[prow+1] - pi[prow]; 2995 pjj = pj + pi[prow]; 2996 for (k=0;k<pnzj;k++) { 2997 /* Locations in C are shifted by the offset within the block */ 2998 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2999 if (!denserow[pjj[k]*ppdof+pshift]) { 3000 denserow[pjj[k]*ppdof+pshift] = -1; 3001 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 3002 } 3003 } 3004 } 3005 3006 /* sort sparserow */ 3007 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 3008 3009 /* If free space is not available, make more free space */ 3010 /* Double the amount of total space in the list */ 3011 if (current_space->local_remaining<cnzi) { 3012 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 3013 } 3014 3015 /* Copy data into free space, and zero out denserows */ 3016 ierr = PetscArraycpy(current_space->array,sparserow,cnzi);CHKERRQ(ierr); 3017 3018 current_space->array += cnzi; 3019 current_space->local_used += cnzi; 3020 current_space->local_remaining -= cnzi; 3021 3022 for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 3023 for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 3024 3025 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 3026 /* For now, we will recompute what is needed. */ 3027 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 3028 } 3029 } 3030 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 3031 /* Allocate space for cj, initialize cj, and */ 3032 /* destroy list of free space and other temporary array(s) */ 3033 ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr); 3034 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 3035 ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 3036 3037 /* Allocate space for ca */ 3038 ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr); 3039 3040 /* put together the new matrix */ 3041 ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C);CHKERRQ(ierr); 3042 ierr = MatSetBlockSize(C,pp->dof);CHKERRQ(ierr); 3043 3044 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3045 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3046 c = (Mat_SeqAIJ*)(C->data); 3047 c->free_a = PETSC_TRUE; 3048 c->free_ij = PETSC_TRUE; 3049 c->nonew = 0; 3050 3051 C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 3052 C->ops->productnumeric = MatProductNumeric_PtAP; 3053 3054 /* Clean up. */ 3055 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 3056 PetscFunctionReturn(0); 3057 } 3058 3059 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 3060 { 3061 /* This routine requires testing -- first draft only */ 3062 PetscErrorCode ierr; 3063 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 3064 Mat P =pp->AIJ; 3065 Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 3066 Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 3067 Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 3068 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3069 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3070 const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3071 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3072 const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3073 MatScalar *ca=c->a,*caj,*apa; 3074 3075 PetscFunctionBegin; 3076 /* Allocate temporary array for storage of one row of A*P */ 3077 ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr); 3078 3079 /* Clear old values in C */ 3080 ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr); 3081 3082 for (i=0; i<am; i++) { 3083 /* Form sparse row of A*P */ 3084 anzi = ai[i+1] - ai[i]; 3085 apnzj = 0; 3086 for (j=0; j<anzi; j++) { 3087 /* Get offset within block of P */ 3088 pshift = *aj%ppdof; 3089 /* Get block row of P */ 3090 prow = *aj++/ppdof; /* integer division */ 3091 pnzj = pi[prow+1] - pi[prow]; 3092 pjj = pj + pi[prow]; 3093 paj = pa + pi[prow]; 3094 for (k=0; k<pnzj; k++) { 3095 poffset = pjj[k]*ppdof+pshift; 3096 if (!apjdense[poffset]) { 3097 apjdense[poffset] = -1; 3098 apj[apnzj++] = poffset; 3099 } 3100 apa[poffset] += (*aa)*paj[k]; 3101 } 3102 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 3103 aa++; 3104 } 3105 3106 /* Sort the j index array for quick sparse axpy. */ 3107 /* Note: a array does not need sorting as it is in dense storage locations. */ 3108 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 3109 3110 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3111 prow = i/ppdof; /* integer division */ 3112 pshift = i%ppdof; 3113 poffset = pi[prow]; 3114 pnzi = pi[prow+1] - poffset; 3115 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3116 pJ = pj+poffset; 3117 pA = pa+poffset; 3118 for (j=0; j<pnzi; j++) { 3119 crow = (*pJ)*ppdof+pshift; 3120 cjj = cj + ci[crow]; 3121 caj = ca + ci[crow]; 3122 pJ++; 3123 /* Perform sparse axpy operation. Note cjj includes apj. */ 3124 for (k=0,nextap=0; nextap<apnzj; k++) { 3125 if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 3126 } 3127 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 3128 pA++; 3129 } 3130 3131 /* Zero the current row info for A*P */ 3132 for (j=0; j<apnzj; j++) { 3133 apa[apj[j]] = 0.; 3134 apjdense[apj[j]] = 0; 3135 } 3136 } 3137 3138 /* Assemble the final matrix and clean up */ 3139 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3140 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3141 ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 3142 PetscFunctionReturn(0); 3143 } 3144 3145 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 3146 { 3147 PetscErrorCode ierr; 3148 Mat_Product *product = C->product; 3149 Mat A=product->A,P=product->B; 3150 3151 PetscFunctionBegin; 3152 ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C);CHKERRQ(ierr); 3153 PetscFunctionReturn(0); 3154 } 3155 3156 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3157 { 3158 PetscFunctionBegin; 3159 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet"); 3160 } 3161 3162 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3163 { 3164 PetscFunctionBegin; 3165 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3166 } 3167 3168 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat); 3169 3170 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C) 3171 { 3172 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3173 PetscErrorCode ierr; 3174 3175 PetscFunctionBegin; 3176 3177 ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);CHKERRQ(ierr); 3178 PetscFunctionReturn(0); 3179 } 3180 3181 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat); 3182 3183 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C) 3184 { 3185 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3186 PetscErrorCode ierr; 3187 3188 PetscFunctionBegin; 3189 ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr); 3190 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 3191 PetscFunctionReturn(0); 3192 } 3193 3194 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat); 3195 3196 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C) 3197 { 3198 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3199 PetscErrorCode ierr; 3200 3201 PetscFunctionBegin; 3202 3203 ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);CHKERRQ(ierr); 3204 PetscFunctionReturn(0); 3205 } 3206 3207 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat); 3208 3209 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C) 3210 { 3211 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3212 PetscErrorCode ierr; 3213 3214 PetscFunctionBegin; 3215 3216 ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr); 3217 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 3218 PetscFunctionReturn(0); 3219 } 3220 3221 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 3222 { 3223 PetscErrorCode ierr; 3224 Mat_Product *product = C->product; 3225 Mat A=product->A,P=product->B; 3226 PetscBool flg; 3227 3228 PetscFunctionBegin; 3229 ierr = PetscStrcmp(product->alg,"allatonce",&flg);CHKERRQ(ierr); 3230 if (flg) { 3231 ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C);CHKERRQ(ierr); 3232 C->ops->productnumeric = MatProductNumeric_PtAP; 3233 PetscFunctionReturn(0); 3234 } 3235 3236 ierr = PetscStrcmp(product->alg,"allatonce_merged",&flg);CHKERRQ(ierr); 3237 if (flg) { 3238 ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C);CHKERRQ(ierr); 3239 C->ops->productnumeric = MatProductNumeric_PtAP; 3240 PetscFunctionReturn(0); 3241 } 3242 3243 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 3244 } 3245 3246 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3247 { 3248 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3249 Mat a = b->AIJ,B; 3250 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3251 PetscErrorCode ierr; 3252 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3253 PetscInt *cols; 3254 PetscScalar *vals; 3255 3256 PetscFunctionBegin; 3257 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3258 ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr); 3259 for (i=0; i<m; i++) { 3260 nmax = PetscMax(nmax,aij->ilen[i]); 3261 for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 3262 } 3263 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3264 ierr = MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);CHKERRQ(ierr); 3265 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3266 ierr = MatSeqAIJSetPreallocation(B,0,ilen);CHKERRQ(ierr); 3267 ierr = PetscFree(ilen);CHKERRQ(ierr); 3268 ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr); 3269 ii = 0; 3270 for (i=0; i<m; i++) { 3271 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3272 for (j=0; j<dof; j++) { 3273 for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 3274 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3275 ii++; 3276 } 3277 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3278 } 3279 ierr = PetscFree(icols);CHKERRQ(ierr); 3280 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3281 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3282 3283 if (reuse == MAT_INPLACE_MATRIX) { 3284 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3285 } else { 3286 *newmat = B; 3287 } 3288 PetscFunctionReturn(0); 3289 } 3290 3291 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3292 3293 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3294 { 3295 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3296 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3297 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3298 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3299 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3300 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3301 PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 3302 PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 3303 PetscInt rstart,cstart,*garray,ii,k; 3304 PetscErrorCode ierr; 3305 PetscScalar *vals,*ovals; 3306 3307 PetscFunctionBegin; 3308 ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr); 3309 for (i=0; i<A->rmap->n/dof; i++) { 3310 nmax = PetscMax(nmax,AIJ->ilen[i]); 3311 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3312 for (j=0; j<dof; j++) { 3313 dnz[dof*i+j] = AIJ->ilen[i]; 3314 onz[dof*i+j] = OAIJ->ilen[i]; 3315 } 3316 } 3317 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3318 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3319 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3320 ierr = MatMPIAIJSetPreallocation(B,0,dnz,0,onz);CHKERRQ(ierr); 3321 ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr); 3322 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3323 3324 ierr = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr); 3325 rstart = dof*maij->A->rmap->rstart; 3326 cstart = dof*maij->A->cmap->rstart; 3327 garray = mpiaij->garray; 3328 3329 ii = rstart; 3330 for (i=0; i<A->rmap->n/dof; i++) { 3331 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3332 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3333 for (j=0; j<dof; j++) { 3334 for (k=0; k<ncols; k++) { 3335 icols[k] = cstart + dof*cols[k]+j; 3336 } 3337 for (k=0; k<oncols; k++) { 3338 oicols[k] = dof*garray[ocols[k]]+j; 3339 } 3340 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3341 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3342 ii++; 3343 } 3344 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3345 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3346 } 3347 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3348 3349 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3350 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3351 3352 if (reuse == MAT_INPLACE_MATRIX) { 3353 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3354 ((PetscObject)A)->refct = 1; 3355 3356 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3357 3358 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3359 } else { 3360 *newmat = B; 3361 } 3362 PetscFunctionReturn(0); 3363 } 3364 3365 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 3366 { 3367 PetscErrorCode ierr; 3368 Mat A; 3369 3370 PetscFunctionBegin; 3371 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3372 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 3373 ierr = MatDestroy(&A);CHKERRQ(ierr); 3374 PetscFunctionReturn(0); 3375 } 3376 3377 PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[]) 3378 { 3379 PetscErrorCode ierr; 3380 Mat A; 3381 3382 PetscFunctionBegin; 3383 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3384 ierr = MatCreateSubMatrices(A,n,irow,icol,scall,submat);CHKERRQ(ierr); 3385 ierr = MatDestroy(&A);CHKERRQ(ierr); 3386 PetscFunctionReturn(0); 3387 } 3388 3389 /* ---------------------------------------------------------------------------------- */ 3390 /*@ 3391 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3392 operations for multicomponent problems. It interpolates each component the same 3393 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3394 and MATMPIAIJ for distributed matrices. 3395 3396 Collective 3397 3398 Input Parameters: 3399 + A - the AIJ matrix describing the action on blocks 3400 - dof - the block size (number of components per node) 3401 3402 Output Parameter: 3403 . maij - the new MAIJ matrix 3404 3405 Operations provided: 3406 .vb 3407 MatMult 3408 MatMultTranspose 3409 MatMultAdd 3410 MatMultTransposeAdd 3411 MatView 3412 .ve 3413 3414 Level: advanced 3415 3416 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3417 @*/ 3418 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3419 { 3420 PetscErrorCode ierr; 3421 PetscMPIInt size; 3422 PetscInt n; 3423 Mat B; 3424 #if defined(PETSC_HAVE_CUDA) 3425 /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 3426 PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 3427 #endif 3428 3429 PetscFunctionBegin; 3430 dof = PetscAbs(dof); 3431 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3432 3433 if (dof == 1) *maij = A; 3434 else { 3435 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3436 /* propagate vec type */ 3437 ierr = MatSetVecType(B,A->defaultvectype);CHKERRQ(ierr); 3438 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3439 ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3440 ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3441 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3442 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3443 3444 B->assembled = PETSC_TRUE; 3445 3446 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3447 if (size == 1) { 3448 Mat_SeqMAIJ *b; 3449 3450 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3451 3452 B->ops->setup = NULL; 3453 B->ops->destroy = MatDestroy_SeqMAIJ; 3454 B->ops->view = MatView_SeqMAIJ; 3455 3456 b = (Mat_SeqMAIJ*)B->data; 3457 b->dof = dof; 3458 b->AIJ = A; 3459 3460 if (dof == 2) { 3461 B->ops->mult = MatMult_SeqMAIJ_2; 3462 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3463 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3464 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3465 } else if (dof == 3) { 3466 B->ops->mult = MatMult_SeqMAIJ_3; 3467 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3468 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3469 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3470 } else if (dof == 4) { 3471 B->ops->mult = MatMult_SeqMAIJ_4; 3472 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3473 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3474 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3475 } else if (dof == 5) { 3476 B->ops->mult = MatMult_SeqMAIJ_5; 3477 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3478 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3479 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3480 } else if (dof == 6) { 3481 B->ops->mult = MatMult_SeqMAIJ_6; 3482 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3483 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3484 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3485 } else if (dof == 7) { 3486 B->ops->mult = MatMult_SeqMAIJ_7; 3487 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3488 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3489 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3490 } else if (dof == 8) { 3491 B->ops->mult = MatMult_SeqMAIJ_8; 3492 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3493 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3494 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3495 } else if (dof == 9) { 3496 B->ops->mult = MatMult_SeqMAIJ_9; 3497 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3498 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3499 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3500 } else if (dof == 10) { 3501 B->ops->mult = MatMult_SeqMAIJ_10; 3502 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3503 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3504 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3505 } else if (dof == 11) { 3506 B->ops->mult = MatMult_SeqMAIJ_11; 3507 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3508 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3509 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3510 } else if (dof == 16) { 3511 B->ops->mult = MatMult_SeqMAIJ_16; 3512 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3513 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3514 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3515 } else if (dof == 18) { 3516 B->ops->mult = MatMult_SeqMAIJ_18; 3517 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3518 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3519 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3520 } else { 3521 B->ops->mult = MatMult_SeqMAIJ_N; 3522 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3523 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3524 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3525 } 3526 #if defined(PETSC_HAVE_CUDA) 3527 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3528 #endif 3529 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3530 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3531 } else { 3532 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3533 Mat_MPIMAIJ *b; 3534 IS from,to; 3535 Vec gvec; 3536 3537 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3538 3539 B->ops->setup = NULL; 3540 B->ops->destroy = MatDestroy_MPIMAIJ; 3541 B->ops->view = MatView_MPIMAIJ; 3542 3543 b = (Mat_MPIMAIJ*)B->data; 3544 b->dof = dof; 3545 b->A = A; 3546 3547 ierr = MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);CHKERRQ(ierr); 3548 ierr = MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);CHKERRQ(ierr); 3549 3550 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3551 ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3552 ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 3553 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3554 ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3555 3556 /* create two temporary Index sets for build scatter gather */ 3557 ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3558 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3559 3560 /* create temporary global vector to generate scatter context */ 3561 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 3562 3563 /* generate the scatter context */ 3564 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3565 3566 ierr = ISDestroy(&from);CHKERRQ(ierr); 3567 ierr = ISDestroy(&to);CHKERRQ(ierr); 3568 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3569 3570 B->ops->mult = MatMult_MPIMAIJ_dof; 3571 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3572 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3573 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3574 3575 #if defined(PETSC_HAVE_CUDA) 3576 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3577 #endif 3578 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3579 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ);CHKERRQ(ierr); 3580 } 3581 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3582 B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 3583 ierr = MatSetUp(B);CHKERRQ(ierr); 3584 #if defined(PETSC_HAVE_CUDA) 3585 /* temporary until we have CUDA implementation of MAIJ */ 3586 { 3587 PetscBool flg; 3588 if (convert) { 3589 ierr = PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");CHKERRQ(ierr); 3590 if (flg) { 3591 ierr = MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 3592 } 3593 } 3594 } 3595 #endif 3596 *maij = B; 3597 } 3598 PetscFunctionReturn(0); 3599 } 3600