1 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20 #include <../src/mat/utils/freespace.h> 21 22 /*@ 23 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 24 25 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 26 27 Input Parameter: 28 . A - the MAIJ matrix 29 30 Output Parameter: 31 . B - the AIJ matrix 32 33 Level: advanced 34 35 Notes: 36 The reference count on the AIJ matrix is not increased so you should not destroy it. 37 38 .seealso: MatCreateMAIJ() 39 @*/ 40 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 41 { 42 PetscErrorCode ierr; 43 PetscBool ismpimaij,isseqmaij; 44 45 PetscFunctionBegin; 46 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 47 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 48 if (ismpimaij) { 49 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 50 51 *B = b->A; 52 } else if (isseqmaij) { 53 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 54 55 *B = b->AIJ; 56 } else { 57 *B = A; 58 } 59 PetscFunctionReturn(0); 60 } 61 62 /*@ 63 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 64 65 Logically Collective 66 67 Input Parameters: 68 + A - the MAIJ matrix 69 - dof - the block size for the new matrix 70 71 Output Parameter: 72 . B - the new MAIJ matrix 73 74 Level: advanced 75 76 .seealso: MatCreateMAIJ() 77 @*/ 78 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 79 { 80 PetscErrorCode ierr; 81 Mat Aij = NULL; 82 83 PetscFunctionBegin; 84 PetscValidLogicalCollectiveInt(A,dof,2); 85 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 86 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 91 { 92 PetscErrorCode ierr; 93 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 94 95 PetscFunctionBegin; 96 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 97 ierr = PetscFree(A->data);CHKERRQ(ierr); 98 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr); 99 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL);CHKERRQ(ierr); 100 PetscFunctionReturn(0); 101 } 102 103 PetscErrorCode MatSetUp_MAIJ(Mat A) 104 { 105 PetscFunctionBegin; 106 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 107 } 108 109 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 110 { 111 PetscErrorCode ierr; 112 Mat B; 113 114 PetscFunctionBegin; 115 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 116 ierr = MatView(B,viewer);CHKERRQ(ierr); 117 ierr = MatDestroy(&B);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 122 { 123 PetscErrorCode ierr; 124 Mat B; 125 126 PetscFunctionBegin; 127 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 128 ierr = MatView(B,viewer);CHKERRQ(ierr); 129 ierr = MatDestroy(&B);CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 134 { 135 PetscErrorCode ierr; 136 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 137 138 PetscFunctionBegin; 139 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 140 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 141 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 142 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 143 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 144 ierr = PetscFree(A->data);CHKERRQ(ierr); 145 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr); 146 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr); 147 ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr); 148 PetscFunctionReturn(0); 149 } 150 151 /*MC 152 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 153 multicomponent problems, interpolating or restricting each component the same way independently. 154 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 155 156 Operations provided: 157 + MatMult 158 . MatMultTranspose 159 . MatMultAdd 160 - MatMultTransposeAdd 161 162 Level: advanced 163 164 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 165 M*/ 166 167 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 168 { 169 PetscErrorCode ierr; 170 Mat_MPIMAIJ *b; 171 PetscMPIInt size; 172 173 PetscFunctionBegin; 174 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 175 A->data = (void*)b; 176 177 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 178 179 A->ops->setup = MatSetUp_MAIJ; 180 181 b->AIJ = NULL; 182 b->dof = 0; 183 b->OAIJ = NULL; 184 b->ctx = NULL; 185 b->w = NULL; 186 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 187 if (size == 1) { 188 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 189 } else { 190 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 191 } 192 A->preallocated = PETSC_TRUE; 193 A->assembled = PETSC_TRUE; 194 PetscFunctionReturn(0); 195 } 196 197 /* --------------------------------------------------------------------------------------*/ 198 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 199 { 200 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 201 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 202 const PetscScalar *x,*v; 203 PetscScalar *y, sum1, sum2; 204 PetscErrorCode ierr; 205 PetscInt nonzerorow=0,n,i,jrow,j; 206 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 207 208 PetscFunctionBegin; 209 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 210 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 211 idx = a->j; 212 v = a->a; 213 ii = a->i; 214 215 for (i=0; i<m; i++) { 216 jrow = ii[i]; 217 n = ii[i+1] - jrow; 218 sum1 = 0.0; 219 sum2 = 0.0; 220 221 nonzerorow += (n>0); 222 for (j=0; j<n; j++) { 223 sum1 += v[jrow]*x[2*idx[jrow]]; 224 sum2 += v[jrow]*x[2*idx[jrow]+1]; 225 jrow++; 226 } 227 y[2*i] = sum1; 228 y[2*i+1] = sum2; 229 } 230 231 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 232 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 233 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 234 PetscFunctionReturn(0); 235 } 236 237 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 238 { 239 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 240 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 241 const PetscScalar *x,*v; 242 PetscScalar *y,alpha1,alpha2; 243 PetscErrorCode ierr; 244 PetscInt n,i; 245 const PetscInt m = b->AIJ->rmap->n,*idx; 246 247 PetscFunctionBegin; 248 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 249 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 250 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 251 252 for (i=0; i<m; i++) { 253 idx = a->j + a->i[i]; 254 v = a->a + a->i[i]; 255 n = a->i[i+1] - a->i[i]; 256 alpha1 = x[2*i]; 257 alpha2 = x[2*i+1]; 258 while (n-->0) { 259 y[2*(*idx)] += alpha1*(*v); 260 y[2*(*idx)+1] += alpha2*(*v); 261 idx++; v++; 262 } 263 } 264 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 265 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 266 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 271 { 272 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 273 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 274 const PetscScalar *x,*v; 275 PetscScalar *y,sum1, sum2; 276 PetscErrorCode ierr; 277 PetscInt n,i,jrow,j; 278 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 279 280 PetscFunctionBegin; 281 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 282 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 283 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 284 idx = a->j; 285 v = a->a; 286 ii = a->i; 287 288 for (i=0; i<m; i++) { 289 jrow = ii[i]; 290 n = ii[i+1] - jrow; 291 sum1 = 0.0; 292 sum2 = 0.0; 293 for (j=0; j<n; j++) { 294 sum1 += v[jrow]*x[2*idx[jrow]]; 295 sum2 += v[jrow]*x[2*idx[jrow]+1]; 296 jrow++; 297 } 298 y[2*i] += sum1; 299 y[2*i+1] += sum2; 300 } 301 302 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 303 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 304 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 305 PetscFunctionReturn(0); 306 } 307 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 308 { 309 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 310 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 311 const PetscScalar *x,*v; 312 PetscScalar *y,alpha1,alpha2; 313 PetscErrorCode ierr; 314 PetscInt n,i; 315 const PetscInt m = b->AIJ->rmap->n,*idx; 316 317 PetscFunctionBegin; 318 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 319 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 320 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 321 322 for (i=0; i<m; i++) { 323 idx = a->j + a->i[i]; 324 v = a->a + a->i[i]; 325 n = a->i[i+1] - a->i[i]; 326 alpha1 = x[2*i]; 327 alpha2 = x[2*i+1]; 328 while (n-->0) { 329 y[2*(*idx)] += alpha1*(*v); 330 y[2*(*idx)+1] += alpha2*(*v); 331 idx++; v++; 332 } 333 } 334 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 335 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 336 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 337 PetscFunctionReturn(0); 338 } 339 /* --------------------------------------------------------------------------------------*/ 340 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 341 { 342 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 343 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 344 const PetscScalar *x,*v; 345 PetscScalar *y,sum1, sum2, sum3; 346 PetscErrorCode ierr; 347 PetscInt nonzerorow=0,n,i,jrow,j; 348 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 349 350 PetscFunctionBegin; 351 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 352 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 353 idx = a->j; 354 v = a->a; 355 ii = a->i; 356 357 for (i=0; i<m; i++) { 358 jrow = ii[i]; 359 n = ii[i+1] - jrow; 360 sum1 = 0.0; 361 sum2 = 0.0; 362 sum3 = 0.0; 363 364 nonzerorow += (n>0); 365 for (j=0; j<n; j++) { 366 sum1 += v[jrow]*x[3*idx[jrow]]; 367 sum2 += v[jrow]*x[3*idx[jrow]+1]; 368 sum3 += v[jrow]*x[3*idx[jrow]+2]; 369 jrow++; 370 } 371 y[3*i] = sum1; 372 y[3*i+1] = sum2; 373 y[3*i+2] = sum3; 374 } 375 376 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 377 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 378 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 379 PetscFunctionReturn(0); 380 } 381 382 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 383 { 384 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 385 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 386 const PetscScalar *x,*v; 387 PetscScalar *y,alpha1,alpha2,alpha3; 388 PetscErrorCode ierr; 389 PetscInt n,i; 390 const PetscInt m = b->AIJ->rmap->n,*idx; 391 392 PetscFunctionBegin; 393 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 394 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 395 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 396 397 for (i=0; i<m; i++) { 398 idx = a->j + a->i[i]; 399 v = a->a + a->i[i]; 400 n = a->i[i+1] - a->i[i]; 401 alpha1 = x[3*i]; 402 alpha2 = x[3*i+1]; 403 alpha3 = x[3*i+2]; 404 while (n-->0) { 405 y[3*(*idx)] += alpha1*(*v); 406 y[3*(*idx)+1] += alpha2*(*v); 407 y[3*(*idx)+2] += alpha3*(*v); 408 idx++; v++; 409 } 410 } 411 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 412 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 413 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 418 { 419 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 420 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 421 const PetscScalar *x,*v; 422 PetscScalar *y,sum1, sum2, sum3; 423 PetscErrorCode ierr; 424 PetscInt n,i,jrow,j; 425 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 426 427 PetscFunctionBegin; 428 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 429 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 430 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 431 idx = a->j; 432 v = a->a; 433 ii = a->i; 434 435 for (i=0; i<m; i++) { 436 jrow = ii[i]; 437 n = ii[i+1] - jrow; 438 sum1 = 0.0; 439 sum2 = 0.0; 440 sum3 = 0.0; 441 for (j=0; j<n; j++) { 442 sum1 += v[jrow]*x[3*idx[jrow]]; 443 sum2 += v[jrow]*x[3*idx[jrow]+1]; 444 sum3 += v[jrow]*x[3*idx[jrow]+2]; 445 jrow++; 446 } 447 y[3*i] += sum1; 448 y[3*i+1] += sum2; 449 y[3*i+2] += sum3; 450 } 451 452 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 453 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 454 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 455 PetscFunctionReturn(0); 456 } 457 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 458 { 459 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 460 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 461 const PetscScalar *x,*v; 462 PetscScalar *y,alpha1,alpha2,alpha3; 463 PetscErrorCode ierr; 464 PetscInt n,i; 465 const PetscInt m = b->AIJ->rmap->n,*idx; 466 467 PetscFunctionBegin; 468 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 469 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 470 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 471 for (i=0; i<m; i++) { 472 idx = a->j + a->i[i]; 473 v = a->a + a->i[i]; 474 n = a->i[i+1] - a->i[i]; 475 alpha1 = x[3*i]; 476 alpha2 = x[3*i+1]; 477 alpha3 = x[3*i+2]; 478 while (n-->0) { 479 y[3*(*idx)] += alpha1*(*v); 480 y[3*(*idx)+1] += alpha2*(*v); 481 y[3*(*idx)+2] += alpha3*(*v); 482 idx++; v++; 483 } 484 } 485 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 486 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 487 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 /* ------------------------------------------------------------------------------*/ 492 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 493 { 494 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 495 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 496 const PetscScalar *x,*v; 497 PetscScalar *y,sum1, sum2, sum3, sum4; 498 PetscErrorCode ierr; 499 PetscInt nonzerorow=0,n,i,jrow,j; 500 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 501 502 PetscFunctionBegin; 503 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 504 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 505 idx = a->j; 506 v = a->a; 507 ii = a->i; 508 509 for (i=0; i<m; i++) { 510 jrow = ii[i]; 511 n = ii[i+1] - jrow; 512 sum1 = 0.0; 513 sum2 = 0.0; 514 sum3 = 0.0; 515 sum4 = 0.0; 516 nonzerorow += (n>0); 517 for (j=0; j<n; j++) { 518 sum1 += v[jrow]*x[4*idx[jrow]]; 519 sum2 += v[jrow]*x[4*idx[jrow]+1]; 520 sum3 += v[jrow]*x[4*idx[jrow]+2]; 521 sum4 += v[jrow]*x[4*idx[jrow]+3]; 522 jrow++; 523 } 524 y[4*i] = sum1; 525 y[4*i+1] = sum2; 526 y[4*i+2] = sum3; 527 y[4*i+3] = sum4; 528 } 529 530 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 531 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 532 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 537 { 538 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 540 const PetscScalar *x,*v; 541 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 542 PetscErrorCode ierr; 543 PetscInt n,i; 544 const PetscInt m = b->AIJ->rmap->n,*idx; 545 546 PetscFunctionBegin; 547 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 548 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 549 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 550 for (i=0; i<m; i++) { 551 idx = a->j + a->i[i]; 552 v = a->a + a->i[i]; 553 n = a->i[i+1] - a->i[i]; 554 alpha1 = x[4*i]; 555 alpha2 = x[4*i+1]; 556 alpha3 = x[4*i+2]; 557 alpha4 = x[4*i+3]; 558 while (n-->0) { 559 y[4*(*idx)] += alpha1*(*v); 560 y[4*(*idx)+1] += alpha2*(*v); 561 y[4*(*idx)+2] += alpha3*(*v); 562 y[4*(*idx)+3] += alpha4*(*v); 563 idx++; v++; 564 } 565 } 566 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 567 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 568 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 569 PetscFunctionReturn(0); 570 } 571 572 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 573 { 574 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 575 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 576 const PetscScalar *x,*v; 577 PetscScalar *y,sum1, sum2, sum3, sum4; 578 PetscErrorCode ierr; 579 PetscInt n,i,jrow,j; 580 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 581 582 PetscFunctionBegin; 583 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 584 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 585 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 586 idx = a->j; 587 v = a->a; 588 ii = a->i; 589 590 for (i=0; i<m; i++) { 591 jrow = ii[i]; 592 n = ii[i+1] - jrow; 593 sum1 = 0.0; 594 sum2 = 0.0; 595 sum3 = 0.0; 596 sum4 = 0.0; 597 for (j=0; j<n; j++) { 598 sum1 += v[jrow]*x[4*idx[jrow]]; 599 sum2 += v[jrow]*x[4*idx[jrow]+1]; 600 sum3 += v[jrow]*x[4*idx[jrow]+2]; 601 sum4 += v[jrow]*x[4*idx[jrow]+3]; 602 jrow++; 603 } 604 y[4*i] += sum1; 605 y[4*i+1] += sum2; 606 y[4*i+2] += sum3; 607 y[4*i+3] += sum4; 608 } 609 610 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 611 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 612 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 613 PetscFunctionReturn(0); 614 } 615 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 616 { 617 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 618 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 619 const PetscScalar *x,*v; 620 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 621 PetscErrorCode ierr; 622 PetscInt n,i; 623 const PetscInt m = b->AIJ->rmap->n,*idx; 624 625 PetscFunctionBegin; 626 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 627 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 628 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 629 630 for (i=0; i<m; i++) { 631 idx = a->j + a->i[i]; 632 v = a->a + a->i[i]; 633 n = a->i[i+1] - a->i[i]; 634 alpha1 = x[4*i]; 635 alpha2 = x[4*i+1]; 636 alpha3 = x[4*i+2]; 637 alpha4 = x[4*i+3]; 638 while (n-->0) { 639 y[4*(*idx)] += alpha1*(*v); 640 y[4*(*idx)+1] += alpha2*(*v); 641 y[4*(*idx)+2] += alpha3*(*v); 642 y[4*(*idx)+3] += alpha4*(*v); 643 idx++; v++; 644 } 645 } 646 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 647 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 648 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 649 PetscFunctionReturn(0); 650 } 651 /* ------------------------------------------------------------------------------*/ 652 653 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 654 { 655 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 656 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 657 const PetscScalar *x,*v; 658 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 659 PetscErrorCode ierr; 660 PetscInt nonzerorow=0,n,i,jrow,j; 661 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 662 663 PetscFunctionBegin; 664 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 665 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 666 idx = a->j; 667 v = a->a; 668 ii = a->i; 669 670 for (i=0; i<m; i++) { 671 jrow = ii[i]; 672 n = ii[i+1] - jrow; 673 sum1 = 0.0; 674 sum2 = 0.0; 675 sum3 = 0.0; 676 sum4 = 0.0; 677 sum5 = 0.0; 678 679 nonzerorow += (n>0); 680 for (j=0; j<n; j++) { 681 sum1 += v[jrow]*x[5*idx[jrow]]; 682 sum2 += v[jrow]*x[5*idx[jrow]+1]; 683 sum3 += v[jrow]*x[5*idx[jrow]+2]; 684 sum4 += v[jrow]*x[5*idx[jrow]+3]; 685 sum5 += v[jrow]*x[5*idx[jrow]+4]; 686 jrow++; 687 } 688 y[5*i] = sum1; 689 y[5*i+1] = sum2; 690 y[5*i+2] = sum3; 691 y[5*i+3] = sum4; 692 y[5*i+4] = sum5; 693 } 694 695 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 696 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 697 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 698 PetscFunctionReturn(0); 699 } 700 701 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 702 { 703 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 704 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 705 const PetscScalar *x,*v; 706 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 707 PetscErrorCode ierr; 708 PetscInt n,i; 709 const PetscInt m = b->AIJ->rmap->n,*idx; 710 711 PetscFunctionBegin; 712 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 713 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 714 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 715 716 for (i=0; i<m; i++) { 717 idx = a->j + a->i[i]; 718 v = a->a + a->i[i]; 719 n = a->i[i+1] - a->i[i]; 720 alpha1 = x[5*i]; 721 alpha2 = x[5*i+1]; 722 alpha3 = x[5*i+2]; 723 alpha4 = x[5*i+3]; 724 alpha5 = x[5*i+4]; 725 while (n-->0) { 726 y[5*(*idx)] += alpha1*(*v); 727 y[5*(*idx)+1] += alpha2*(*v); 728 y[5*(*idx)+2] += alpha3*(*v); 729 y[5*(*idx)+3] += alpha4*(*v); 730 y[5*(*idx)+4] += alpha5*(*v); 731 idx++; v++; 732 } 733 } 734 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 735 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 736 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 741 { 742 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 743 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 744 const PetscScalar *x,*v; 745 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 746 PetscErrorCode ierr; 747 PetscInt n,i,jrow,j; 748 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 749 750 PetscFunctionBegin; 751 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 752 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 753 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 754 idx = a->j; 755 v = a->a; 756 ii = a->i; 757 758 for (i=0; i<m; i++) { 759 jrow = ii[i]; 760 n = ii[i+1] - jrow; 761 sum1 = 0.0; 762 sum2 = 0.0; 763 sum3 = 0.0; 764 sum4 = 0.0; 765 sum5 = 0.0; 766 for (j=0; j<n; j++) { 767 sum1 += v[jrow]*x[5*idx[jrow]]; 768 sum2 += v[jrow]*x[5*idx[jrow]+1]; 769 sum3 += v[jrow]*x[5*idx[jrow]+2]; 770 sum4 += v[jrow]*x[5*idx[jrow]+3]; 771 sum5 += v[jrow]*x[5*idx[jrow]+4]; 772 jrow++; 773 } 774 y[5*i] += sum1; 775 y[5*i+1] += sum2; 776 y[5*i+2] += sum3; 777 y[5*i+3] += sum4; 778 y[5*i+4] += sum5; 779 } 780 781 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 782 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 783 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 784 PetscFunctionReturn(0); 785 } 786 787 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 788 { 789 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 790 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 791 const PetscScalar *x,*v; 792 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 793 PetscErrorCode ierr; 794 PetscInt n,i; 795 const PetscInt m = b->AIJ->rmap->n,*idx; 796 797 PetscFunctionBegin; 798 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 799 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 800 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 801 802 for (i=0; i<m; i++) { 803 idx = a->j + a->i[i]; 804 v = a->a + a->i[i]; 805 n = a->i[i+1] - a->i[i]; 806 alpha1 = x[5*i]; 807 alpha2 = x[5*i+1]; 808 alpha3 = x[5*i+2]; 809 alpha4 = x[5*i+3]; 810 alpha5 = x[5*i+4]; 811 while (n-->0) { 812 y[5*(*idx)] += alpha1*(*v); 813 y[5*(*idx)+1] += alpha2*(*v); 814 y[5*(*idx)+2] += alpha3*(*v); 815 y[5*(*idx)+3] += alpha4*(*v); 816 y[5*(*idx)+4] += alpha5*(*v); 817 idx++; v++; 818 } 819 } 820 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 821 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 822 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 823 PetscFunctionReturn(0); 824 } 825 826 /* ------------------------------------------------------------------------------*/ 827 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 828 { 829 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 830 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 831 const PetscScalar *x,*v; 832 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 833 PetscErrorCode ierr; 834 PetscInt nonzerorow=0,n,i,jrow,j; 835 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 836 837 PetscFunctionBegin; 838 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 839 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 840 idx = a->j; 841 v = a->a; 842 ii = a->i; 843 844 for (i=0; i<m; i++) { 845 jrow = ii[i]; 846 n = ii[i+1] - jrow; 847 sum1 = 0.0; 848 sum2 = 0.0; 849 sum3 = 0.0; 850 sum4 = 0.0; 851 sum5 = 0.0; 852 sum6 = 0.0; 853 854 nonzerorow += (n>0); 855 for (j=0; j<n; j++) { 856 sum1 += v[jrow]*x[6*idx[jrow]]; 857 sum2 += v[jrow]*x[6*idx[jrow]+1]; 858 sum3 += v[jrow]*x[6*idx[jrow]+2]; 859 sum4 += v[jrow]*x[6*idx[jrow]+3]; 860 sum5 += v[jrow]*x[6*idx[jrow]+4]; 861 sum6 += v[jrow]*x[6*idx[jrow]+5]; 862 jrow++; 863 } 864 y[6*i] = sum1; 865 y[6*i+1] = sum2; 866 y[6*i+2] = sum3; 867 y[6*i+3] = sum4; 868 y[6*i+4] = sum5; 869 y[6*i+5] = sum6; 870 } 871 872 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 873 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 874 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 879 { 880 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 881 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 882 const PetscScalar *x,*v; 883 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 884 PetscErrorCode ierr; 885 PetscInt n,i; 886 const PetscInt m = b->AIJ->rmap->n,*idx; 887 888 PetscFunctionBegin; 889 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 890 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 891 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 892 893 for (i=0; i<m; i++) { 894 idx = a->j + a->i[i]; 895 v = a->a + a->i[i]; 896 n = a->i[i+1] - a->i[i]; 897 alpha1 = x[6*i]; 898 alpha2 = x[6*i+1]; 899 alpha3 = x[6*i+2]; 900 alpha4 = x[6*i+3]; 901 alpha5 = x[6*i+4]; 902 alpha6 = x[6*i+5]; 903 while (n-->0) { 904 y[6*(*idx)] += alpha1*(*v); 905 y[6*(*idx)+1] += alpha2*(*v); 906 y[6*(*idx)+2] += alpha3*(*v); 907 y[6*(*idx)+3] += alpha4*(*v); 908 y[6*(*idx)+4] += alpha5*(*v); 909 y[6*(*idx)+5] += alpha6*(*v); 910 idx++; v++; 911 } 912 } 913 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 914 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 915 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 916 PetscFunctionReturn(0); 917 } 918 919 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 920 { 921 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 922 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 923 const PetscScalar *x,*v; 924 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 925 PetscErrorCode ierr; 926 PetscInt n,i,jrow,j; 927 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 928 929 PetscFunctionBegin; 930 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 931 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 932 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 933 idx = a->j; 934 v = a->a; 935 ii = a->i; 936 937 for (i=0; i<m; i++) { 938 jrow = ii[i]; 939 n = ii[i+1] - jrow; 940 sum1 = 0.0; 941 sum2 = 0.0; 942 sum3 = 0.0; 943 sum4 = 0.0; 944 sum5 = 0.0; 945 sum6 = 0.0; 946 for (j=0; j<n; j++) { 947 sum1 += v[jrow]*x[6*idx[jrow]]; 948 sum2 += v[jrow]*x[6*idx[jrow]+1]; 949 sum3 += v[jrow]*x[6*idx[jrow]+2]; 950 sum4 += v[jrow]*x[6*idx[jrow]+3]; 951 sum5 += v[jrow]*x[6*idx[jrow]+4]; 952 sum6 += v[jrow]*x[6*idx[jrow]+5]; 953 jrow++; 954 } 955 y[6*i] += sum1; 956 y[6*i+1] += sum2; 957 y[6*i+2] += sum3; 958 y[6*i+3] += sum4; 959 y[6*i+4] += sum5; 960 y[6*i+5] += sum6; 961 } 962 963 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 964 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 965 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 970 { 971 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 972 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 973 const PetscScalar *x,*v; 974 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 975 PetscErrorCode ierr; 976 PetscInt n,i; 977 const PetscInt m = b->AIJ->rmap->n,*idx; 978 979 PetscFunctionBegin; 980 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 981 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 982 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 983 984 for (i=0; i<m; i++) { 985 idx = a->j + a->i[i]; 986 v = a->a + a->i[i]; 987 n = a->i[i+1] - a->i[i]; 988 alpha1 = x[6*i]; 989 alpha2 = x[6*i+1]; 990 alpha3 = x[6*i+2]; 991 alpha4 = x[6*i+3]; 992 alpha5 = x[6*i+4]; 993 alpha6 = x[6*i+5]; 994 while (n-->0) { 995 y[6*(*idx)] += alpha1*(*v); 996 y[6*(*idx)+1] += alpha2*(*v); 997 y[6*(*idx)+2] += alpha3*(*v); 998 y[6*(*idx)+3] += alpha4*(*v); 999 y[6*(*idx)+4] += alpha5*(*v); 1000 y[6*(*idx)+5] += alpha6*(*v); 1001 idx++; v++; 1002 } 1003 } 1004 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1005 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1006 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1007 PetscFunctionReturn(0); 1008 } 1009 1010 /* ------------------------------------------------------------------------------*/ 1011 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1012 { 1013 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1014 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1015 const PetscScalar *x,*v; 1016 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1017 PetscErrorCode ierr; 1018 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1019 PetscInt nonzerorow=0,n,i,jrow,j; 1020 1021 PetscFunctionBegin; 1022 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1023 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1024 idx = a->j; 1025 v = a->a; 1026 ii = a->i; 1027 1028 for (i=0; i<m; i++) { 1029 jrow = ii[i]; 1030 n = ii[i+1] - jrow; 1031 sum1 = 0.0; 1032 sum2 = 0.0; 1033 sum3 = 0.0; 1034 sum4 = 0.0; 1035 sum5 = 0.0; 1036 sum6 = 0.0; 1037 sum7 = 0.0; 1038 1039 nonzerorow += (n>0); 1040 for (j=0; j<n; j++) { 1041 sum1 += v[jrow]*x[7*idx[jrow]]; 1042 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1043 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1044 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1045 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1046 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1047 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1048 jrow++; 1049 } 1050 y[7*i] = sum1; 1051 y[7*i+1] = sum2; 1052 y[7*i+2] = sum3; 1053 y[7*i+3] = sum4; 1054 y[7*i+4] = sum5; 1055 y[7*i+5] = sum6; 1056 y[7*i+6] = sum7; 1057 } 1058 1059 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1061 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1066 { 1067 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1068 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1069 const PetscScalar *x,*v; 1070 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1071 PetscErrorCode ierr; 1072 const PetscInt m = b->AIJ->rmap->n,*idx; 1073 PetscInt n,i; 1074 1075 PetscFunctionBegin; 1076 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1077 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1078 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1079 1080 for (i=0; i<m; i++) { 1081 idx = a->j + a->i[i]; 1082 v = a->a + a->i[i]; 1083 n = a->i[i+1] - a->i[i]; 1084 alpha1 = x[7*i]; 1085 alpha2 = x[7*i+1]; 1086 alpha3 = x[7*i+2]; 1087 alpha4 = x[7*i+3]; 1088 alpha5 = x[7*i+4]; 1089 alpha6 = x[7*i+5]; 1090 alpha7 = x[7*i+6]; 1091 while (n-->0) { 1092 y[7*(*idx)] += alpha1*(*v); 1093 y[7*(*idx)+1] += alpha2*(*v); 1094 y[7*(*idx)+2] += alpha3*(*v); 1095 y[7*(*idx)+3] += alpha4*(*v); 1096 y[7*(*idx)+4] += alpha5*(*v); 1097 y[7*(*idx)+5] += alpha6*(*v); 1098 y[7*(*idx)+6] += alpha7*(*v); 1099 idx++; v++; 1100 } 1101 } 1102 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1103 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1104 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1109 { 1110 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1111 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1112 const PetscScalar *x,*v; 1113 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1114 PetscErrorCode ierr; 1115 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1116 PetscInt n,i,jrow,j; 1117 1118 PetscFunctionBegin; 1119 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1120 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1121 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1122 idx = a->j; 1123 v = a->a; 1124 ii = a->i; 1125 1126 for (i=0; i<m; i++) { 1127 jrow = ii[i]; 1128 n = ii[i+1] - jrow; 1129 sum1 = 0.0; 1130 sum2 = 0.0; 1131 sum3 = 0.0; 1132 sum4 = 0.0; 1133 sum5 = 0.0; 1134 sum6 = 0.0; 1135 sum7 = 0.0; 1136 for (j=0; j<n; j++) { 1137 sum1 += v[jrow]*x[7*idx[jrow]]; 1138 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1139 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1140 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1141 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1142 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1143 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1144 jrow++; 1145 } 1146 y[7*i] += sum1; 1147 y[7*i+1] += sum2; 1148 y[7*i+2] += sum3; 1149 y[7*i+3] += sum4; 1150 y[7*i+4] += sum5; 1151 y[7*i+5] += sum6; 1152 y[7*i+6] += sum7; 1153 } 1154 1155 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1156 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1157 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1162 { 1163 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1164 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1165 const PetscScalar *x,*v; 1166 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1167 PetscErrorCode ierr; 1168 const PetscInt m = b->AIJ->rmap->n,*idx; 1169 PetscInt n,i; 1170 1171 PetscFunctionBegin; 1172 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1173 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1174 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1175 for (i=0; i<m; i++) { 1176 idx = a->j + a->i[i]; 1177 v = a->a + a->i[i]; 1178 n = a->i[i+1] - a->i[i]; 1179 alpha1 = x[7*i]; 1180 alpha2 = x[7*i+1]; 1181 alpha3 = x[7*i+2]; 1182 alpha4 = x[7*i+3]; 1183 alpha5 = x[7*i+4]; 1184 alpha6 = x[7*i+5]; 1185 alpha7 = x[7*i+6]; 1186 while (n-->0) { 1187 y[7*(*idx)] += alpha1*(*v); 1188 y[7*(*idx)+1] += alpha2*(*v); 1189 y[7*(*idx)+2] += alpha3*(*v); 1190 y[7*(*idx)+3] += alpha4*(*v); 1191 y[7*(*idx)+4] += alpha5*(*v); 1192 y[7*(*idx)+5] += alpha6*(*v); 1193 y[7*(*idx)+6] += alpha7*(*v); 1194 idx++; v++; 1195 } 1196 } 1197 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1198 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1199 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1204 { 1205 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1206 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1207 const PetscScalar *x,*v; 1208 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1209 PetscErrorCode ierr; 1210 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1211 PetscInt nonzerorow=0,n,i,jrow,j; 1212 1213 PetscFunctionBegin; 1214 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1215 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1216 idx = a->j; 1217 v = a->a; 1218 ii = a->i; 1219 1220 for (i=0; i<m; i++) { 1221 jrow = ii[i]; 1222 n = ii[i+1] - jrow; 1223 sum1 = 0.0; 1224 sum2 = 0.0; 1225 sum3 = 0.0; 1226 sum4 = 0.0; 1227 sum5 = 0.0; 1228 sum6 = 0.0; 1229 sum7 = 0.0; 1230 sum8 = 0.0; 1231 1232 nonzerorow += (n>0); 1233 for (j=0; j<n; j++) { 1234 sum1 += v[jrow]*x[8*idx[jrow]]; 1235 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1236 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1237 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1238 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1239 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1240 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1241 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1242 jrow++; 1243 } 1244 y[8*i] = sum1; 1245 y[8*i+1] = sum2; 1246 y[8*i+2] = sum3; 1247 y[8*i+3] = sum4; 1248 y[8*i+4] = sum5; 1249 y[8*i+5] = sum6; 1250 y[8*i+6] = sum7; 1251 y[8*i+7] = sum8; 1252 } 1253 1254 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1255 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1256 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1261 { 1262 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1263 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1264 const PetscScalar *x,*v; 1265 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1266 PetscErrorCode ierr; 1267 const PetscInt m = b->AIJ->rmap->n,*idx; 1268 PetscInt n,i; 1269 1270 PetscFunctionBegin; 1271 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1272 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1273 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1274 1275 for (i=0; i<m; i++) { 1276 idx = a->j + a->i[i]; 1277 v = a->a + a->i[i]; 1278 n = a->i[i+1] - a->i[i]; 1279 alpha1 = x[8*i]; 1280 alpha2 = x[8*i+1]; 1281 alpha3 = x[8*i+2]; 1282 alpha4 = x[8*i+3]; 1283 alpha5 = x[8*i+4]; 1284 alpha6 = x[8*i+5]; 1285 alpha7 = x[8*i+6]; 1286 alpha8 = x[8*i+7]; 1287 while (n-->0) { 1288 y[8*(*idx)] += alpha1*(*v); 1289 y[8*(*idx)+1] += alpha2*(*v); 1290 y[8*(*idx)+2] += alpha3*(*v); 1291 y[8*(*idx)+3] += alpha4*(*v); 1292 y[8*(*idx)+4] += alpha5*(*v); 1293 y[8*(*idx)+5] += alpha6*(*v); 1294 y[8*(*idx)+6] += alpha7*(*v); 1295 y[8*(*idx)+7] += alpha8*(*v); 1296 idx++; v++; 1297 } 1298 } 1299 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1300 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1301 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1306 { 1307 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1308 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1309 const PetscScalar *x,*v; 1310 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1311 PetscErrorCode ierr; 1312 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1313 PetscInt n,i,jrow,j; 1314 1315 PetscFunctionBegin; 1316 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1317 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1318 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1319 idx = a->j; 1320 v = a->a; 1321 ii = a->i; 1322 1323 for (i=0; i<m; i++) { 1324 jrow = ii[i]; 1325 n = ii[i+1] - jrow; 1326 sum1 = 0.0; 1327 sum2 = 0.0; 1328 sum3 = 0.0; 1329 sum4 = 0.0; 1330 sum5 = 0.0; 1331 sum6 = 0.0; 1332 sum7 = 0.0; 1333 sum8 = 0.0; 1334 for (j=0; j<n; j++) { 1335 sum1 += v[jrow]*x[8*idx[jrow]]; 1336 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1337 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1338 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1339 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1340 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1341 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1342 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1343 jrow++; 1344 } 1345 y[8*i] += sum1; 1346 y[8*i+1] += sum2; 1347 y[8*i+2] += sum3; 1348 y[8*i+3] += sum4; 1349 y[8*i+4] += sum5; 1350 y[8*i+5] += sum6; 1351 y[8*i+6] += sum7; 1352 y[8*i+7] += sum8; 1353 } 1354 1355 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1356 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1357 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1358 PetscFunctionReturn(0); 1359 } 1360 1361 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1362 { 1363 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1364 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1365 const PetscScalar *x,*v; 1366 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1367 PetscErrorCode ierr; 1368 const PetscInt m = b->AIJ->rmap->n,*idx; 1369 PetscInt n,i; 1370 1371 PetscFunctionBegin; 1372 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1373 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1374 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1375 for (i=0; i<m; i++) { 1376 idx = a->j + a->i[i]; 1377 v = a->a + a->i[i]; 1378 n = a->i[i+1] - a->i[i]; 1379 alpha1 = x[8*i]; 1380 alpha2 = x[8*i+1]; 1381 alpha3 = x[8*i+2]; 1382 alpha4 = x[8*i+3]; 1383 alpha5 = x[8*i+4]; 1384 alpha6 = x[8*i+5]; 1385 alpha7 = x[8*i+6]; 1386 alpha8 = x[8*i+7]; 1387 while (n-->0) { 1388 y[8*(*idx)] += alpha1*(*v); 1389 y[8*(*idx)+1] += alpha2*(*v); 1390 y[8*(*idx)+2] += alpha3*(*v); 1391 y[8*(*idx)+3] += alpha4*(*v); 1392 y[8*(*idx)+4] += alpha5*(*v); 1393 y[8*(*idx)+5] += alpha6*(*v); 1394 y[8*(*idx)+6] += alpha7*(*v); 1395 y[8*(*idx)+7] += alpha8*(*v); 1396 idx++; v++; 1397 } 1398 } 1399 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1400 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1401 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1402 PetscFunctionReturn(0); 1403 } 1404 1405 /* ------------------------------------------------------------------------------*/ 1406 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1407 { 1408 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1409 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1410 const PetscScalar *x,*v; 1411 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1412 PetscErrorCode ierr; 1413 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1414 PetscInt nonzerorow=0,n,i,jrow,j; 1415 1416 PetscFunctionBegin; 1417 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1418 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1419 idx = a->j; 1420 v = a->a; 1421 ii = a->i; 1422 1423 for (i=0; i<m; i++) { 1424 jrow = ii[i]; 1425 n = ii[i+1] - jrow; 1426 sum1 = 0.0; 1427 sum2 = 0.0; 1428 sum3 = 0.0; 1429 sum4 = 0.0; 1430 sum5 = 0.0; 1431 sum6 = 0.0; 1432 sum7 = 0.0; 1433 sum8 = 0.0; 1434 sum9 = 0.0; 1435 1436 nonzerorow += (n>0); 1437 for (j=0; j<n; j++) { 1438 sum1 += v[jrow]*x[9*idx[jrow]]; 1439 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1440 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1441 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1442 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1443 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1444 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1445 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1446 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1447 jrow++; 1448 } 1449 y[9*i] = sum1; 1450 y[9*i+1] = sum2; 1451 y[9*i+2] = sum3; 1452 y[9*i+3] = sum4; 1453 y[9*i+4] = sum5; 1454 y[9*i+5] = sum6; 1455 y[9*i+6] = sum7; 1456 y[9*i+7] = sum8; 1457 y[9*i+8] = sum9; 1458 } 1459 1460 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1461 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1462 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1463 PetscFunctionReturn(0); 1464 } 1465 1466 /* ------------------------------------------------------------------------------*/ 1467 1468 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1469 { 1470 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1471 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1472 const PetscScalar *x,*v; 1473 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1474 PetscErrorCode ierr; 1475 const PetscInt m = b->AIJ->rmap->n,*idx; 1476 PetscInt n,i; 1477 1478 PetscFunctionBegin; 1479 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1480 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1481 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1482 1483 for (i=0; i<m; i++) { 1484 idx = a->j + a->i[i]; 1485 v = a->a + a->i[i]; 1486 n = a->i[i+1] - a->i[i]; 1487 alpha1 = x[9*i]; 1488 alpha2 = x[9*i+1]; 1489 alpha3 = x[9*i+2]; 1490 alpha4 = x[9*i+3]; 1491 alpha5 = x[9*i+4]; 1492 alpha6 = x[9*i+5]; 1493 alpha7 = x[9*i+6]; 1494 alpha8 = x[9*i+7]; 1495 alpha9 = x[9*i+8]; 1496 while (n-->0) { 1497 y[9*(*idx)] += alpha1*(*v); 1498 y[9*(*idx)+1] += alpha2*(*v); 1499 y[9*(*idx)+2] += alpha3*(*v); 1500 y[9*(*idx)+3] += alpha4*(*v); 1501 y[9*(*idx)+4] += alpha5*(*v); 1502 y[9*(*idx)+5] += alpha6*(*v); 1503 y[9*(*idx)+6] += alpha7*(*v); 1504 y[9*(*idx)+7] += alpha8*(*v); 1505 y[9*(*idx)+8] += alpha9*(*v); 1506 idx++; v++; 1507 } 1508 } 1509 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1510 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1511 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1512 PetscFunctionReturn(0); 1513 } 1514 1515 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1516 { 1517 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1518 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1519 const PetscScalar *x,*v; 1520 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1521 PetscErrorCode ierr; 1522 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1523 PetscInt n,i,jrow,j; 1524 1525 PetscFunctionBegin; 1526 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1527 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1528 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1529 idx = a->j; 1530 v = a->a; 1531 ii = a->i; 1532 1533 for (i=0; i<m; i++) { 1534 jrow = ii[i]; 1535 n = ii[i+1] - jrow; 1536 sum1 = 0.0; 1537 sum2 = 0.0; 1538 sum3 = 0.0; 1539 sum4 = 0.0; 1540 sum5 = 0.0; 1541 sum6 = 0.0; 1542 sum7 = 0.0; 1543 sum8 = 0.0; 1544 sum9 = 0.0; 1545 for (j=0; j<n; j++) { 1546 sum1 += v[jrow]*x[9*idx[jrow]]; 1547 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1548 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1549 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1550 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1551 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1552 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1553 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1554 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1555 jrow++; 1556 } 1557 y[9*i] += sum1; 1558 y[9*i+1] += sum2; 1559 y[9*i+2] += sum3; 1560 y[9*i+3] += sum4; 1561 y[9*i+4] += sum5; 1562 y[9*i+5] += sum6; 1563 y[9*i+6] += sum7; 1564 y[9*i+7] += sum8; 1565 y[9*i+8] += sum9; 1566 } 1567 1568 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1569 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1570 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1571 PetscFunctionReturn(0); 1572 } 1573 1574 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1575 { 1576 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1577 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1578 const PetscScalar *x,*v; 1579 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1580 PetscErrorCode ierr; 1581 const PetscInt m = b->AIJ->rmap->n,*idx; 1582 PetscInt n,i; 1583 1584 PetscFunctionBegin; 1585 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1586 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1587 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1588 for (i=0; i<m; i++) { 1589 idx = a->j + a->i[i]; 1590 v = a->a + a->i[i]; 1591 n = a->i[i+1] - a->i[i]; 1592 alpha1 = x[9*i]; 1593 alpha2 = x[9*i+1]; 1594 alpha3 = x[9*i+2]; 1595 alpha4 = x[9*i+3]; 1596 alpha5 = x[9*i+4]; 1597 alpha6 = x[9*i+5]; 1598 alpha7 = x[9*i+6]; 1599 alpha8 = x[9*i+7]; 1600 alpha9 = x[9*i+8]; 1601 while (n-->0) { 1602 y[9*(*idx)] += alpha1*(*v); 1603 y[9*(*idx)+1] += alpha2*(*v); 1604 y[9*(*idx)+2] += alpha3*(*v); 1605 y[9*(*idx)+3] += alpha4*(*v); 1606 y[9*(*idx)+4] += alpha5*(*v); 1607 y[9*(*idx)+5] += alpha6*(*v); 1608 y[9*(*idx)+6] += alpha7*(*v); 1609 y[9*(*idx)+7] += alpha8*(*v); 1610 y[9*(*idx)+8] += alpha9*(*v); 1611 idx++; v++; 1612 } 1613 } 1614 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1615 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1616 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1617 PetscFunctionReturn(0); 1618 } 1619 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1620 { 1621 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1622 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1623 const PetscScalar *x,*v; 1624 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1625 PetscErrorCode ierr; 1626 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1627 PetscInt nonzerorow=0,n,i,jrow,j; 1628 1629 PetscFunctionBegin; 1630 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1631 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1632 idx = a->j; 1633 v = a->a; 1634 ii = a->i; 1635 1636 for (i=0; i<m; i++) { 1637 jrow = ii[i]; 1638 n = ii[i+1] - jrow; 1639 sum1 = 0.0; 1640 sum2 = 0.0; 1641 sum3 = 0.0; 1642 sum4 = 0.0; 1643 sum5 = 0.0; 1644 sum6 = 0.0; 1645 sum7 = 0.0; 1646 sum8 = 0.0; 1647 sum9 = 0.0; 1648 sum10 = 0.0; 1649 1650 nonzerorow += (n>0); 1651 for (j=0; j<n; j++) { 1652 sum1 += v[jrow]*x[10*idx[jrow]]; 1653 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1654 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1655 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1656 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1657 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1658 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1659 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1660 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1661 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1662 jrow++; 1663 } 1664 y[10*i] = sum1; 1665 y[10*i+1] = sum2; 1666 y[10*i+2] = sum3; 1667 y[10*i+3] = sum4; 1668 y[10*i+4] = sum5; 1669 y[10*i+5] = sum6; 1670 y[10*i+6] = sum7; 1671 y[10*i+7] = sum8; 1672 y[10*i+8] = sum9; 1673 y[10*i+9] = sum10; 1674 } 1675 1676 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1677 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1678 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1679 PetscFunctionReturn(0); 1680 } 1681 1682 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1683 { 1684 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1685 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1686 const PetscScalar *x,*v; 1687 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1688 PetscErrorCode ierr; 1689 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1690 PetscInt n,i,jrow,j; 1691 1692 PetscFunctionBegin; 1693 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1694 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1695 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1696 idx = a->j; 1697 v = a->a; 1698 ii = a->i; 1699 1700 for (i=0; i<m; i++) { 1701 jrow = ii[i]; 1702 n = ii[i+1] - jrow; 1703 sum1 = 0.0; 1704 sum2 = 0.0; 1705 sum3 = 0.0; 1706 sum4 = 0.0; 1707 sum5 = 0.0; 1708 sum6 = 0.0; 1709 sum7 = 0.0; 1710 sum8 = 0.0; 1711 sum9 = 0.0; 1712 sum10 = 0.0; 1713 for (j=0; j<n; j++) { 1714 sum1 += v[jrow]*x[10*idx[jrow]]; 1715 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1716 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1717 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1718 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1719 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1720 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1721 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1722 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1723 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1724 jrow++; 1725 } 1726 y[10*i] += sum1; 1727 y[10*i+1] += sum2; 1728 y[10*i+2] += sum3; 1729 y[10*i+3] += sum4; 1730 y[10*i+4] += sum5; 1731 y[10*i+5] += sum6; 1732 y[10*i+6] += sum7; 1733 y[10*i+7] += sum8; 1734 y[10*i+8] += sum9; 1735 y[10*i+9] += sum10; 1736 } 1737 1738 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1739 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1740 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1741 PetscFunctionReturn(0); 1742 } 1743 1744 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1745 { 1746 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1747 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1748 const PetscScalar *x,*v; 1749 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1750 PetscErrorCode ierr; 1751 const PetscInt m = b->AIJ->rmap->n,*idx; 1752 PetscInt n,i; 1753 1754 PetscFunctionBegin; 1755 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1756 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1757 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1758 1759 for (i=0; i<m; i++) { 1760 idx = a->j + a->i[i]; 1761 v = a->a + a->i[i]; 1762 n = a->i[i+1] - a->i[i]; 1763 alpha1 = x[10*i]; 1764 alpha2 = x[10*i+1]; 1765 alpha3 = x[10*i+2]; 1766 alpha4 = x[10*i+3]; 1767 alpha5 = x[10*i+4]; 1768 alpha6 = x[10*i+5]; 1769 alpha7 = x[10*i+6]; 1770 alpha8 = x[10*i+7]; 1771 alpha9 = x[10*i+8]; 1772 alpha10 = x[10*i+9]; 1773 while (n-->0) { 1774 y[10*(*idx)] += alpha1*(*v); 1775 y[10*(*idx)+1] += alpha2*(*v); 1776 y[10*(*idx)+2] += alpha3*(*v); 1777 y[10*(*idx)+3] += alpha4*(*v); 1778 y[10*(*idx)+4] += alpha5*(*v); 1779 y[10*(*idx)+5] += alpha6*(*v); 1780 y[10*(*idx)+6] += alpha7*(*v); 1781 y[10*(*idx)+7] += alpha8*(*v); 1782 y[10*(*idx)+8] += alpha9*(*v); 1783 y[10*(*idx)+9] += alpha10*(*v); 1784 idx++; v++; 1785 } 1786 } 1787 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1788 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1789 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1790 PetscFunctionReturn(0); 1791 } 1792 1793 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1794 { 1795 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1796 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1797 const PetscScalar *x,*v; 1798 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1799 PetscErrorCode ierr; 1800 const PetscInt m = b->AIJ->rmap->n,*idx; 1801 PetscInt n,i; 1802 1803 PetscFunctionBegin; 1804 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1805 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1806 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1807 for (i=0; i<m; i++) { 1808 idx = a->j + a->i[i]; 1809 v = a->a + a->i[i]; 1810 n = a->i[i+1] - a->i[i]; 1811 alpha1 = x[10*i]; 1812 alpha2 = x[10*i+1]; 1813 alpha3 = x[10*i+2]; 1814 alpha4 = x[10*i+3]; 1815 alpha5 = x[10*i+4]; 1816 alpha6 = x[10*i+5]; 1817 alpha7 = x[10*i+6]; 1818 alpha8 = x[10*i+7]; 1819 alpha9 = x[10*i+8]; 1820 alpha10 = x[10*i+9]; 1821 while (n-->0) { 1822 y[10*(*idx)] += alpha1*(*v); 1823 y[10*(*idx)+1] += alpha2*(*v); 1824 y[10*(*idx)+2] += alpha3*(*v); 1825 y[10*(*idx)+3] += alpha4*(*v); 1826 y[10*(*idx)+4] += alpha5*(*v); 1827 y[10*(*idx)+5] += alpha6*(*v); 1828 y[10*(*idx)+6] += alpha7*(*v); 1829 y[10*(*idx)+7] += alpha8*(*v); 1830 y[10*(*idx)+8] += alpha9*(*v); 1831 y[10*(*idx)+9] += alpha10*(*v); 1832 idx++; v++; 1833 } 1834 } 1835 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1836 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1837 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1838 PetscFunctionReturn(0); 1839 } 1840 1841 /*--------------------------------------------------------------------------------------------*/ 1842 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1843 { 1844 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1845 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1846 const PetscScalar *x,*v; 1847 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1848 PetscErrorCode ierr; 1849 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1850 PetscInt nonzerorow=0,n,i,jrow,j; 1851 1852 PetscFunctionBegin; 1853 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1854 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1855 idx = a->j; 1856 v = a->a; 1857 ii = a->i; 1858 1859 for (i=0; i<m; i++) { 1860 jrow = ii[i]; 1861 n = ii[i+1] - jrow; 1862 sum1 = 0.0; 1863 sum2 = 0.0; 1864 sum3 = 0.0; 1865 sum4 = 0.0; 1866 sum5 = 0.0; 1867 sum6 = 0.0; 1868 sum7 = 0.0; 1869 sum8 = 0.0; 1870 sum9 = 0.0; 1871 sum10 = 0.0; 1872 sum11 = 0.0; 1873 1874 nonzerorow += (n>0); 1875 for (j=0; j<n; j++) { 1876 sum1 += v[jrow]*x[11*idx[jrow]]; 1877 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1878 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1879 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1880 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1881 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1882 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1883 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1884 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1885 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1886 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1887 jrow++; 1888 } 1889 y[11*i] = sum1; 1890 y[11*i+1] = sum2; 1891 y[11*i+2] = sum3; 1892 y[11*i+3] = sum4; 1893 y[11*i+4] = sum5; 1894 y[11*i+5] = sum6; 1895 y[11*i+6] = sum7; 1896 y[11*i+7] = sum8; 1897 y[11*i+8] = sum9; 1898 y[11*i+9] = sum10; 1899 y[11*i+10] = sum11; 1900 } 1901 1902 ierr = PetscLogFlops(22.0*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1903 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1904 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1905 PetscFunctionReturn(0); 1906 } 1907 1908 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1909 { 1910 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1911 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1912 const PetscScalar *x,*v; 1913 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1914 PetscErrorCode ierr; 1915 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1916 PetscInt n,i,jrow,j; 1917 1918 PetscFunctionBegin; 1919 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1920 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1921 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1922 idx = a->j; 1923 v = a->a; 1924 ii = a->i; 1925 1926 for (i=0; i<m; i++) { 1927 jrow = ii[i]; 1928 n = ii[i+1] - jrow; 1929 sum1 = 0.0; 1930 sum2 = 0.0; 1931 sum3 = 0.0; 1932 sum4 = 0.0; 1933 sum5 = 0.0; 1934 sum6 = 0.0; 1935 sum7 = 0.0; 1936 sum8 = 0.0; 1937 sum9 = 0.0; 1938 sum10 = 0.0; 1939 sum11 = 0.0; 1940 for (j=0; j<n; j++) { 1941 sum1 += v[jrow]*x[11*idx[jrow]]; 1942 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1943 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1944 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1945 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1946 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1947 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1948 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1949 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1950 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1951 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1952 jrow++; 1953 } 1954 y[11*i] += sum1; 1955 y[11*i+1] += sum2; 1956 y[11*i+2] += sum3; 1957 y[11*i+3] += sum4; 1958 y[11*i+4] += sum5; 1959 y[11*i+5] += sum6; 1960 y[11*i+6] += sum7; 1961 y[11*i+7] += sum8; 1962 y[11*i+8] += sum9; 1963 y[11*i+9] += sum10; 1964 y[11*i+10] += sum11; 1965 } 1966 1967 ierr = PetscLogFlops(22.0*a->nz);CHKERRQ(ierr); 1968 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1969 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1970 PetscFunctionReturn(0); 1971 } 1972 1973 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1974 { 1975 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1976 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1977 const PetscScalar *x,*v; 1978 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1979 PetscErrorCode ierr; 1980 const PetscInt m = b->AIJ->rmap->n,*idx; 1981 PetscInt n,i; 1982 1983 PetscFunctionBegin; 1984 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1985 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1986 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1987 1988 for (i=0; i<m; i++) { 1989 idx = a->j + a->i[i]; 1990 v = a->a + a->i[i]; 1991 n = a->i[i+1] - a->i[i]; 1992 alpha1 = x[11*i]; 1993 alpha2 = x[11*i+1]; 1994 alpha3 = x[11*i+2]; 1995 alpha4 = x[11*i+3]; 1996 alpha5 = x[11*i+4]; 1997 alpha6 = x[11*i+5]; 1998 alpha7 = x[11*i+6]; 1999 alpha8 = x[11*i+7]; 2000 alpha9 = x[11*i+8]; 2001 alpha10 = x[11*i+9]; 2002 alpha11 = x[11*i+10]; 2003 while (n-->0) { 2004 y[11*(*idx)] += alpha1*(*v); 2005 y[11*(*idx)+1] += alpha2*(*v); 2006 y[11*(*idx)+2] += alpha3*(*v); 2007 y[11*(*idx)+3] += alpha4*(*v); 2008 y[11*(*idx)+4] += alpha5*(*v); 2009 y[11*(*idx)+5] += alpha6*(*v); 2010 y[11*(*idx)+6] += alpha7*(*v); 2011 y[11*(*idx)+7] += alpha8*(*v); 2012 y[11*(*idx)+8] += alpha9*(*v); 2013 y[11*(*idx)+9] += alpha10*(*v); 2014 y[11*(*idx)+10] += alpha11*(*v); 2015 idx++; v++; 2016 } 2017 } 2018 ierr = PetscLogFlops(22.0*a->nz);CHKERRQ(ierr); 2019 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2020 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2021 PetscFunctionReturn(0); 2022 } 2023 2024 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2025 { 2026 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2027 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2028 const PetscScalar *x,*v; 2029 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2030 PetscErrorCode ierr; 2031 const PetscInt m = b->AIJ->rmap->n,*idx; 2032 PetscInt n,i; 2033 2034 PetscFunctionBegin; 2035 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2036 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2037 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2038 for (i=0; i<m; i++) { 2039 idx = a->j + a->i[i]; 2040 v = a->a + a->i[i]; 2041 n = a->i[i+1] - a->i[i]; 2042 alpha1 = x[11*i]; 2043 alpha2 = x[11*i+1]; 2044 alpha3 = x[11*i+2]; 2045 alpha4 = x[11*i+3]; 2046 alpha5 = x[11*i+4]; 2047 alpha6 = x[11*i+5]; 2048 alpha7 = x[11*i+6]; 2049 alpha8 = x[11*i+7]; 2050 alpha9 = x[11*i+8]; 2051 alpha10 = x[11*i+9]; 2052 alpha11 = x[11*i+10]; 2053 while (n-->0) { 2054 y[11*(*idx)] += alpha1*(*v); 2055 y[11*(*idx)+1] += alpha2*(*v); 2056 y[11*(*idx)+2] += alpha3*(*v); 2057 y[11*(*idx)+3] += alpha4*(*v); 2058 y[11*(*idx)+4] += alpha5*(*v); 2059 y[11*(*idx)+5] += alpha6*(*v); 2060 y[11*(*idx)+6] += alpha7*(*v); 2061 y[11*(*idx)+7] += alpha8*(*v); 2062 y[11*(*idx)+8] += alpha9*(*v); 2063 y[11*(*idx)+9] += alpha10*(*v); 2064 y[11*(*idx)+10] += alpha11*(*v); 2065 idx++; v++; 2066 } 2067 } 2068 ierr = PetscLogFlops(22.0*a->nz);CHKERRQ(ierr); 2069 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2070 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2071 PetscFunctionReturn(0); 2072 } 2073 2074 /*--------------------------------------------------------------------------------------------*/ 2075 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2076 { 2077 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2078 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2079 const PetscScalar *x,*v; 2080 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2081 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2082 PetscErrorCode ierr; 2083 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2084 PetscInt nonzerorow=0,n,i,jrow,j; 2085 2086 PetscFunctionBegin; 2087 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2088 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2089 idx = a->j; 2090 v = a->a; 2091 ii = a->i; 2092 2093 for (i=0; i<m; i++) { 2094 jrow = ii[i]; 2095 n = ii[i+1] - jrow; 2096 sum1 = 0.0; 2097 sum2 = 0.0; 2098 sum3 = 0.0; 2099 sum4 = 0.0; 2100 sum5 = 0.0; 2101 sum6 = 0.0; 2102 sum7 = 0.0; 2103 sum8 = 0.0; 2104 sum9 = 0.0; 2105 sum10 = 0.0; 2106 sum11 = 0.0; 2107 sum12 = 0.0; 2108 sum13 = 0.0; 2109 sum14 = 0.0; 2110 sum15 = 0.0; 2111 sum16 = 0.0; 2112 2113 nonzerorow += (n>0); 2114 for (j=0; j<n; j++) { 2115 sum1 += v[jrow]*x[16*idx[jrow]]; 2116 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2117 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2118 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2119 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2120 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2121 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2122 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2123 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2124 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2125 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2126 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2127 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2128 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2129 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2130 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2131 jrow++; 2132 } 2133 y[16*i] = sum1; 2134 y[16*i+1] = sum2; 2135 y[16*i+2] = sum3; 2136 y[16*i+3] = sum4; 2137 y[16*i+4] = sum5; 2138 y[16*i+5] = sum6; 2139 y[16*i+6] = sum7; 2140 y[16*i+7] = sum8; 2141 y[16*i+8] = sum9; 2142 y[16*i+9] = sum10; 2143 y[16*i+10] = sum11; 2144 y[16*i+11] = sum12; 2145 y[16*i+12] = sum13; 2146 y[16*i+13] = sum14; 2147 y[16*i+14] = sum15; 2148 y[16*i+15] = sum16; 2149 } 2150 2151 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2152 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2153 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2154 PetscFunctionReturn(0); 2155 } 2156 2157 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2158 { 2159 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2160 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2161 const PetscScalar *x,*v; 2162 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2163 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2164 PetscErrorCode ierr; 2165 const PetscInt m = b->AIJ->rmap->n,*idx; 2166 PetscInt n,i; 2167 2168 PetscFunctionBegin; 2169 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2170 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2171 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2172 2173 for (i=0; i<m; i++) { 2174 idx = a->j + a->i[i]; 2175 v = a->a + a->i[i]; 2176 n = a->i[i+1] - a->i[i]; 2177 alpha1 = x[16*i]; 2178 alpha2 = x[16*i+1]; 2179 alpha3 = x[16*i+2]; 2180 alpha4 = x[16*i+3]; 2181 alpha5 = x[16*i+4]; 2182 alpha6 = x[16*i+5]; 2183 alpha7 = x[16*i+6]; 2184 alpha8 = x[16*i+7]; 2185 alpha9 = x[16*i+8]; 2186 alpha10 = x[16*i+9]; 2187 alpha11 = x[16*i+10]; 2188 alpha12 = x[16*i+11]; 2189 alpha13 = x[16*i+12]; 2190 alpha14 = x[16*i+13]; 2191 alpha15 = x[16*i+14]; 2192 alpha16 = x[16*i+15]; 2193 while (n-->0) { 2194 y[16*(*idx)] += alpha1*(*v); 2195 y[16*(*idx)+1] += alpha2*(*v); 2196 y[16*(*idx)+2] += alpha3*(*v); 2197 y[16*(*idx)+3] += alpha4*(*v); 2198 y[16*(*idx)+4] += alpha5*(*v); 2199 y[16*(*idx)+5] += alpha6*(*v); 2200 y[16*(*idx)+6] += alpha7*(*v); 2201 y[16*(*idx)+7] += alpha8*(*v); 2202 y[16*(*idx)+8] += alpha9*(*v); 2203 y[16*(*idx)+9] += alpha10*(*v); 2204 y[16*(*idx)+10] += alpha11*(*v); 2205 y[16*(*idx)+11] += alpha12*(*v); 2206 y[16*(*idx)+12] += alpha13*(*v); 2207 y[16*(*idx)+13] += alpha14*(*v); 2208 y[16*(*idx)+14] += alpha15*(*v); 2209 y[16*(*idx)+15] += alpha16*(*v); 2210 idx++; v++; 2211 } 2212 } 2213 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2214 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2215 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2216 PetscFunctionReturn(0); 2217 } 2218 2219 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2220 { 2221 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2222 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2223 const PetscScalar *x,*v; 2224 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2225 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2226 PetscErrorCode ierr; 2227 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2228 PetscInt n,i,jrow,j; 2229 2230 PetscFunctionBegin; 2231 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2232 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2233 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2234 idx = a->j; 2235 v = a->a; 2236 ii = a->i; 2237 2238 for (i=0; i<m; i++) { 2239 jrow = ii[i]; 2240 n = ii[i+1] - jrow; 2241 sum1 = 0.0; 2242 sum2 = 0.0; 2243 sum3 = 0.0; 2244 sum4 = 0.0; 2245 sum5 = 0.0; 2246 sum6 = 0.0; 2247 sum7 = 0.0; 2248 sum8 = 0.0; 2249 sum9 = 0.0; 2250 sum10 = 0.0; 2251 sum11 = 0.0; 2252 sum12 = 0.0; 2253 sum13 = 0.0; 2254 sum14 = 0.0; 2255 sum15 = 0.0; 2256 sum16 = 0.0; 2257 for (j=0; j<n; j++) { 2258 sum1 += v[jrow]*x[16*idx[jrow]]; 2259 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2260 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2261 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2262 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2263 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2264 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2265 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2266 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2267 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2268 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2269 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2270 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2271 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2272 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2273 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2274 jrow++; 2275 } 2276 y[16*i] += sum1; 2277 y[16*i+1] += sum2; 2278 y[16*i+2] += sum3; 2279 y[16*i+3] += sum4; 2280 y[16*i+4] += sum5; 2281 y[16*i+5] += sum6; 2282 y[16*i+6] += sum7; 2283 y[16*i+7] += sum8; 2284 y[16*i+8] += sum9; 2285 y[16*i+9] += sum10; 2286 y[16*i+10] += sum11; 2287 y[16*i+11] += sum12; 2288 y[16*i+12] += sum13; 2289 y[16*i+13] += sum14; 2290 y[16*i+14] += sum15; 2291 y[16*i+15] += sum16; 2292 } 2293 2294 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2295 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2296 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2297 PetscFunctionReturn(0); 2298 } 2299 2300 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2301 { 2302 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2303 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2304 const PetscScalar *x,*v; 2305 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2306 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2307 PetscErrorCode ierr; 2308 const PetscInt m = b->AIJ->rmap->n,*idx; 2309 PetscInt n,i; 2310 2311 PetscFunctionBegin; 2312 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2313 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2314 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2315 for (i=0; i<m; i++) { 2316 idx = a->j + a->i[i]; 2317 v = a->a + a->i[i]; 2318 n = a->i[i+1] - a->i[i]; 2319 alpha1 = x[16*i]; 2320 alpha2 = x[16*i+1]; 2321 alpha3 = x[16*i+2]; 2322 alpha4 = x[16*i+3]; 2323 alpha5 = x[16*i+4]; 2324 alpha6 = x[16*i+5]; 2325 alpha7 = x[16*i+6]; 2326 alpha8 = x[16*i+7]; 2327 alpha9 = x[16*i+8]; 2328 alpha10 = x[16*i+9]; 2329 alpha11 = x[16*i+10]; 2330 alpha12 = x[16*i+11]; 2331 alpha13 = x[16*i+12]; 2332 alpha14 = x[16*i+13]; 2333 alpha15 = x[16*i+14]; 2334 alpha16 = x[16*i+15]; 2335 while (n-->0) { 2336 y[16*(*idx)] += alpha1*(*v); 2337 y[16*(*idx)+1] += alpha2*(*v); 2338 y[16*(*idx)+2] += alpha3*(*v); 2339 y[16*(*idx)+3] += alpha4*(*v); 2340 y[16*(*idx)+4] += alpha5*(*v); 2341 y[16*(*idx)+5] += alpha6*(*v); 2342 y[16*(*idx)+6] += alpha7*(*v); 2343 y[16*(*idx)+7] += alpha8*(*v); 2344 y[16*(*idx)+8] += alpha9*(*v); 2345 y[16*(*idx)+9] += alpha10*(*v); 2346 y[16*(*idx)+10] += alpha11*(*v); 2347 y[16*(*idx)+11] += alpha12*(*v); 2348 y[16*(*idx)+12] += alpha13*(*v); 2349 y[16*(*idx)+13] += alpha14*(*v); 2350 y[16*(*idx)+14] += alpha15*(*v); 2351 y[16*(*idx)+15] += alpha16*(*v); 2352 idx++; v++; 2353 } 2354 } 2355 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2356 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2357 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2358 PetscFunctionReturn(0); 2359 } 2360 2361 /*--------------------------------------------------------------------------------------------*/ 2362 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2363 { 2364 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2365 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2366 const PetscScalar *x,*v; 2367 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2368 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2369 PetscErrorCode ierr; 2370 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2371 PetscInt nonzerorow=0,n,i,jrow,j; 2372 2373 PetscFunctionBegin; 2374 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2375 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2376 idx = a->j; 2377 v = a->a; 2378 ii = a->i; 2379 2380 for (i=0; i<m; i++) { 2381 jrow = ii[i]; 2382 n = ii[i+1] - jrow; 2383 sum1 = 0.0; 2384 sum2 = 0.0; 2385 sum3 = 0.0; 2386 sum4 = 0.0; 2387 sum5 = 0.0; 2388 sum6 = 0.0; 2389 sum7 = 0.0; 2390 sum8 = 0.0; 2391 sum9 = 0.0; 2392 sum10 = 0.0; 2393 sum11 = 0.0; 2394 sum12 = 0.0; 2395 sum13 = 0.0; 2396 sum14 = 0.0; 2397 sum15 = 0.0; 2398 sum16 = 0.0; 2399 sum17 = 0.0; 2400 sum18 = 0.0; 2401 2402 nonzerorow += (n>0); 2403 for (j=0; j<n; j++) { 2404 sum1 += v[jrow]*x[18*idx[jrow]]; 2405 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2406 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2407 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2408 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2409 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2410 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2411 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2412 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2413 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2414 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2415 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2416 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2417 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2418 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2419 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2420 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2421 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2422 jrow++; 2423 } 2424 y[18*i] = sum1; 2425 y[18*i+1] = sum2; 2426 y[18*i+2] = sum3; 2427 y[18*i+3] = sum4; 2428 y[18*i+4] = sum5; 2429 y[18*i+5] = sum6; 2430 y[18*i+6] = sum7; 2431 y[18*i+7] = sum8; 2432 y[18*i+8] = sum9; 2433 y[18*i+9] = sum10; 2434 y[18*i+10] = sum11; 2435 y[18*i+11] = sum12; 2436 y[18*i+12] = sum13; 2437 y[18*i+13] = sum14; 2438 y[18*i+14] = sum15; 2439 y[18*i+15] = sum16; 2440 y[18*i+16] = sum17; 2441 y[18*i+17] = sum18; 2442 } 2443 2444 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2445 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2446 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2447 PetscFunctionReturn(0); 2448 } 2449 2450 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2451 { 2452 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2453 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2454 const PetscScalar *x,*v; 2455 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2456 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2457 PetscErrorCode ierr; 2458 const PetscInt m = b->AIJ->rmap->n,*idx; 2459 PetscInt n,i; 2460 2461 PetscFunctionBegin; 2462 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2463 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2464 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2465 2466 for (i=0; i<m; i++) { 2467 idx = a->j + a->i[i]; 2468 v = a->a + a->i[i]; 2469 n = a->i[i+1] - a->i[i]; 2470 alpha1 = x[18*i]; 2471 alpha2 = x[18*i+1]; 2472 alpha3 = x[18*i+2]; 2473 alpha4 = x[18*i+3]; 2474 alpha5 = x[18*i+4]; 2475 alpha6 = x[18*i+5]; 2476 alpha7 = x[18*i+6]; 2477 alpha8 = x[18*i+7]; 2478 alpha9 = x[18*i+8]; 2479 alpha10 = x[18*i+9]; 2480 alpha11 = x[18*i+10]; 2481 alpha12 = x[18*i+11]; 2482 alpha13 = x[18*i+12]; 2483 alpha14 = x[18*i+13]; 2484 alpha15 = x[18*i+14]; 2485 alpha16 = x[18*i+15]; 2486 alpha17 = x[18*i+16]; 2487 alpha18 = x[18*i+17]; 2488 while (n-->0) { 2489 y[18*(*idx)] += alpha1*(*v); 2490 y[18*(*idx)+1] += alpha2*(*v); 2491 y[18*(*idx)+2] += alpha3*(*v); 2492 y[18*(*idx)+3] += alpha4*(*v); 2493 y[18*(*idx)+4] += alpha5*(*v); 2494 y[18*(*idx)+5] += alpha6*(*v); 2495 y[18*(*idx)+6] += alpha7*(*v); 2496 y[18*(*idx)+7] += alpha8*(*v); 2497 y[18*(*idx)+8] += alpha9*(*v); 2498 y[18*(*idx)+9] += alpha10*(*v); 2499 y[18*(*idx)+10] += alpha11*(*v); 2500 y[18*(*idx)+11] += alpha12*(*v); 2501 y[18*(*idx)+12] += alpha13*(*v); 2502 y[18*(*idx)+13] += alpha14*(*v); 2503 y[18*(*idx)+14] += alpha15*(*v); 2504 y[18*(*idx)+15] += alpha16*(*v); 2505 y[18*(*idx)+16] += alpha17*(*v); 2506 y[18*(*idx)+17] += alpha18*(*v); 2507 idx++; v++; 2508 } 2509 } 2510 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2511 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2512 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2513 PetscFunctionReturn(0); 2514 } 2515 2516 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2517 { 2518 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2519 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2520 const PetscScalar *x,*v; 2521 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2522 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2523 PetscErrorCode ierr; 2524 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2525 PetscInt n,i,jrow,j; 2526 2527 PetscFunctionBegin; 2528 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2529 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2530 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2531 idx = a->j; 2532 v = a->a; 2533 ii = a->i; 2534 2535 for (i=0; i<m; i++) { 2536 jrow = ii[i]; 2537 n = ii[i+1] - jrow; 2538 sum1 = 0.0; 2539 sum2 = 0.0; 2540 sum3 = 0.0; 2541 sum4 = 0.0; 2542 sum5 = 0.0; 2543 sum6 = 0.0; 2544 sum7 = 0.0; 2545 sum8 = 0.0; 2546 sum9 = 0.0; 2547 sum10 = 0.0; 2548 sum11 = 0.0; 2549 sum12 = 0.0; 2550 sum13 = 0.0; 2551 sum14 = 0.0; 2552 sum15 = 0.0; 2553 sum16 = 0.0; 2554 sum17 = 0.0; 2555 sum18 = 0.0; 2556 for (j=0; j<n; j++) { 2557 sum1 += v[jrow]*x[18*idx[jrow]]; 2558 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2559 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2560 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2561 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2562 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2563 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2564 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2565 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2566 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2567 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2568 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2569 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2570 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2571 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2572 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2573 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2574 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2575 jrow++; 2576 } 2577 y[18*i] += sum1; 2578 y[18*i+1] += sum2; 2579 y[18*i+2] += sum3; 2580 y[18*i+3] += sum4; 2581 y[18*i+4] += sum5; 2582 y[18*i+5] += sum6; 2583 y[18*i+6] += sum7; 2584 y[18*i+7] += sum8; 2585 y[18*i+8] += sum9; 2586 y[18*i+9] += sum10; 2587 y[18*i+10] += sum11; 2588 y[18*i+11] += sum12; 2589 y[18*i+12] += sum13; 2590 y[18*i+13] += sum14; 2591 y[18*i+14] += sum15; 2592 y[18*i+15] += sum16; 2593 y[18*i+16] += sum17; 2594 y[18*i+17] += sum18; 2595 } 2596 2597 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2598 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2599 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2600 PetscFunctionReturn(0); 2601 } 2602 2603 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2604 { 2605 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2606 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2607 const PetscScalar *x,*v; 2608 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2609 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2610 PetscErrorCode ierr; 2611 const PetscInt m = b->AIJ->rmap->n,*idx; 2612 PetscInt n,i; 2613 2614 PetscFunctionBegin; 2615 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2616 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2617 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2618 for (i=0; i<m; i++) { 2619 idx = a->j + a->i[i]; 2620 v = a->a + a->i[i]; 2621 n = a->i[i+1] - a->i[i]; 2622 alpha1 = x[18*i]; 2623 alpha2 = x[18*i+1]; 2624 alpha3 = x[18*i+2]; 2625 alpha4 = x[18*i+3]; 2626 alpha5 = x[18*i+4]; 2627 alpha6 = x[18*i+5]; 2628 alpha7 = x[18*i+6]; 2629 alpha8 = x[18*i+7]; 2630 alpha9 = x[18*i+8]; 2631 alpha10 = x[18*i+9]; 2632 alpha11 = x[18*i+10]; 2633 alpha12 = x[18*i+11]; 2634 alpha13 = x[18*i+12]; 2635 alpha14 = x[18*i+13]; 2636 alpha15 = x[18*i+14]; 2637 alpha16 = x[18*i+15]; 2638 alpha17 = x[18*i+16]; 2639 alpha18 = x[18*i+17]; 2640 while (n-->0) { 2641 y[18*(*idx)] += alpha1*(*v); 2642 y[18*(*idx)+1] += alpha2*(*v); 2643 y[18*(*idx)+2] += alpha3*(*v); 2644 y[18*(*idx)+3] += alpha4*(*v); 2645 y[18*(*idx)+4] += alpha5*(*v); 2646 y[18*(*idx)+5] += alpha6*(*v); 2647 y[18*(*idx)+6] += alpha7*(*v); 2648 y[18*(*idx)+7] += alpha8*(*v); 2649 y[18*(*idx)+8] += alpha9*(*v); 2650 y[18*(*idx)+9] += alpha10*(*v); 2651 y[18*(*idx)+10] += alpha11*(*v); 2652 y[18*(*idx)+11] += alpha12*(*v); 2653 y[18*(*idx)+12] += alpha13*(*v); 2654 y[18*(*idx)+13] += alpha14*(*v); 2655 y[18*(*idx)+14] += alpha15*(*v); 2656 y[18*(*idx)+15] += alpha16*(*v); 2657 y[18*(*idx)+16] += alpha17*(*v); 2658 y[18*(*idx)+17] += alpha18*(*v); 2659 idx++; v++; 2660 } 2661 } 2662 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2663 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2664 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2665 PetscFunctionReturn(0); 2666 } 2667 2668 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2669 { 2670 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2671 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2672 const PetscScalar *x,*v; 2673 PetscScalar *y,*sums; 2674 PetscErrorCode ierr; 2675 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2676 PetscInt n,i,jrow,j,dof = b->dof,k; 2677 2678 PetscFunctionBegin; 2679 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2680 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2681 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2682 idx = a->j; 2683 v = a->a; 2684 ii = a->i; 2685 2686 for (i=0; i<m; i++) { 2687 jrow = ii[i]; 2688 n = ii[i+1] - jrow; 2689 sums = y + dof*i; 2690 for (j=0; j<n; j++) { 2691 for (k=0; k<dof; k++) { 2692 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2693 } 2694 jrow++; 2695 } 2696 } 2697 2698 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2699 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2700 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2701 PetscFunctionReturn(0); 2702 } 2703 2704 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2705 { 2706 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2707 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2708 const PetscScalar *x,*v; 2709 PetscScalar *y,*sums; 2710 PetscErrorCode ierr; 2711 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2712 PetscInt n,i,jrow,j,dof = b->dof,k; 2713 2714 PetscFunctionBegin; 2715 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2716 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2717 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2718 idx = a->j; 2719 v = a->a; 2720 ii = a->i; 2721 2722 for (i=0; i<m; i++) { 2723 jrow = ii[i]; 2724 n = ii[i+1] - jrow; 2725 sums = y + dof*i; 2726 for (j=0; j<n; j++) { 2727 for (k=0; k<dof; k++) { 2728 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2729 } 2730 jrow++; 2731 } 2732 } 2733 2734 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2735 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2736 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2737 PetscFunctionReturn(0); 2738 } 2739 2740 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2741 { 2742 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2743 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2744 const PetscScalar *x,*v,*alpha; 2745 PetscScalar *y; 2746 PetscErrorCode ierr; 2747 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2748 PetscInt n,i,k; 2749 2750 PetscFunctionBegin; 2751 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2752 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2753 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2754 for (i=0; i<m; i++) { 2755 idx = a->j + a->i[i]; 2756 v = a->a + a->i[i]; 2757 n = a->i[i+1] - a->i[i]; 2758 alpha = x + dof*i; 2759 while (n-->0) { 2760 for (k=0; k<dof; k++) { 2761 y[dof*(*idx)+k] += alpha[k]*(*v); 2762 } 2763 idx++; v++; 2764 } 2765 } 2766 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2767 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2768 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2769 PetscFunctionReturn(0); 2770 } 2771 2772 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2773 { 2774 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2775 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2776 const PetscScalar *x,*v,*alpha; 2777 PetscScalar *y; 2778 PetscErrorCode ierr; 2779 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2780 PetscInt n,i,k; 2781 2782 PetscFunctionBegin; 2783 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2784 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2785 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2786 for (i=0; i<m; i++) { 2787 idx = a->j + a->i[i]; 2788 v = a->a + a->i[i]; 2789 n = a->i[i+1] - a->i[i]; 2790 alpha = x + dof*i; 2791 while (n-->0) { 2792 for (k=0; k<dof; k++) { 2793 y[dof*(*idx)+k] += alpha[k]*(*v); 2794 } 2795 idx++; v++; 2796 } 2797 } 2798 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2799 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2800 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2801 PetscFunctionReturn(0); 2802 } 2803 2804 /*===================================================================================*/ 2805 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2806 { 2807 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2808 PetscErrorCode ierr; 2809 2810 PetscFunctionBegin; 2811 /* start the scatter */ 2812 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2813 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2814 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2815 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2816 PetscFunctionReturn(0); 2817 } 2818 2819 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2820 { 2821 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2822 PetscErrorCode ierr; 2823 2824 PetscFunctionBegin; 2825 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2826 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2827 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2828 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2829 PetscFunctionReturn(0); 2830 } 2831 2832 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2833 { 2834 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2835 PetscErrorCode ierr; 2836 2837 PetscFunctionBegin; 2838 /* start the scatter */ 2839 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2840 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2841 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2842 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2843 PetscFunctionReturn(0); 2844 } 2845 2846 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2847 { 2848 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2849 PetscErrorCode ierr; 2850 2851 PetscFunctionBegin; 2852 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2853 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2854 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2855 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2856 PetscFunctionReturn(0); 2857 } 2858 2859 /* ----------------------------------------------------------------*/ 2860 PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 2861 { 2862 Mat_Product *product = C->product; 2863 2864 PetscFunctionBegin; 2865 if (product->type == MATPRODUCT_PtAP) { 2866 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 2867 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]); 2868 PetscFunctionReturn(0); 2869 } 2870 2871 PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 2872 { 2873 PetscErrorCode ierr; 2874 Mat_Product *product = C->product; 2875 PetscBool flg = PETSC_FALSE; 2876 Mat A=product->A,P=product->B; 2877 PetscInt alg=1; /* set default algorithm */ 2878 #if !defined(PETSC_HAVE_HYPRE) 2879 const char *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"}; 2880 PetscInt nalg=4; 2881 #else 2882 const char *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"}; 2883 PetscInt nalg=5; 2884 #endif 2885 2886 PetscFunctionBegin; 2887 if (product->type != MATPRODUCT_PtAP) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]); 2888 2889 /* PtAP */ 2890 /* Check matrix local sizes */ 2891 if (A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%D, %D) != Prow (%D,%D)",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend); 2892 if (A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%D, %D) != Prow (%D,%D)",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend); 2893 2894 /* Set the default algorithm */ 2895 ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr); 2896 if (flg) { 2897 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2898 } 2899 2900 /* Get runtime option */ 2901 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 2902 ierr = PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2903 if (flg) { 2904 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2905 } 2906 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2907 2908 ierr = PetscStrcmp(C->product->alg,"allatonce",&flg);CHKERRQ(ierr); 2909 if (flg) { 2910 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 2911 PetscFunctionReturn(0); 2912 } 2913 2914 ierr = PetscStrcmp(C->product->alg,"allatonce_merged",&flg);CHKERRQ(ierr); 2915 if (flg) { 2916 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 2917 PetscFunctionReturn(0); 2918 } 2919 2920 /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 2921 ierr = PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n");CHKERRQ(ierr); 2922 ierr = MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);CHKERRQ(ierr); 2923 ierr = MatProductSetFromOptions(C);CHKERRQ(ierr); 2924 PetscFunctionReturn(0); 2925 } 2926 2927 /* ----------------------------------------------------------------*/ 2928 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C) 2929 { 2930 PetscErrorCode ierr; 2931 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2932 Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 2933 Mat P =pp->AIJ; 2934 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2935 PetscInt *pti,*ptj,*ptJ; 2936 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2937 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2938 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 2939 MatScalar *ca; 2940 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 2941 2942 PetscFunctionBegin; 2943 /* Get ij structure of P^T */ 2944 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2945 2946 cn = pn*ppdof; 2947 /* Allocate ci array, arrays for fill computation and */ 2948 /* free space for accumulating nonzero column info */ 2949 ierr = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr); 2950 ci[0] = 0; 2951 2952 /* Work arrays for rows of P^T*A */ 2953 ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr); 2954 ierr = PetscArrayzero(ptadenserow,an);CHKERRQ(ierr); 2955 ierr = PetscArrayzero(denserow,cn);CHKERRQ(ierr); 2956 2957 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2958 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2959 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2960 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr); 2961 current_space = free_space; 2962 2963 /* Determine symbolic info for each row of C: */ 2964 for (i=0; i<pn; i++) { 2965 ptnzi = pti[i+1] - pti[i]; 2966 ptJ = ptj + pti[i]; 2967 for (dof=0; dof<ppdof; dof++) { 2968 ptanzi = 0; 2969 /* Determine symbolic row of PtA: */ 2970 for (j=0; j<ptnzi; j++) { 2971 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2972 arow = ptJ[j]*ppdof + dof; 2973 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2974 anzj = ai[arow+1] - ai[arow]; 2975 ajj = aj + ai[arow]; 2976 for (k=0; k<anzj; k++) { 2977 if (!ptadenserow[ajj[k]]) { 2978 ptadenserow[ajj[k]] = -1; 2979 ptasparserow[ptanzi++] = ajj[k]; 2980 } 2981 } 2982 } 2983 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2984 ptaj = ptasparserow; 2985 cnzi = 0; 2986 for (j=0; j<ptanzi; j++) { 2987 /* Get offset within block of P */ 2988 pshift = *ptaj%ppdof; 2989 /* Get block row of P */ 2990 prow = (*ptaj++)/ppdof; /* integer division */ 2991 /* P has same number of nonzeros per row as the compressed form */ 2992 pnzj = pi[prow+1] - pi[prow]; 2993 pjj = pj + pi[prow]; 2994 for (k=0;k<pnzj;k++) { 2995 /* Locations in C are shifted by the offset within the block */ 2996 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2997 if (!denserow[pjj[k]*ppdof+pshift]) { 2998 denserow[pjj[k]*ppdof+pshift] = -1; 2999 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 3000 } 3001 } 3002 } 3003 3004 /* sort sparserow */ 3005 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 3006 3007 /* If free space is not available, make more free space */ 3008 /* Double the amount of total space in the list */ 3009 if (current_space->local_remaining<cnzi) { 3010 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 3011 } 3012 3013 /* Copy data into free space, and zero out denserows */ 3014 ierr = PetscArraycpy(current_space->array,sparserow,cnzi);CHKERRQ(ierr); 3015 3016 current_space->array += cnzi; 3017 current_space->local_used += cnzi; 3018 current_space->local_remaining -= cnzi; 3019 3020 for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 3021 for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 3022 3023 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 3024 /* For now, we will recompute what is needed. */ 3025 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 3026 } 3027 } 3028 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 3029 /* Allocate space for cj, initialize cj, and */ 3030 /* destroy list of free space and other temporary array(s) */ 3031 ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr); 3032 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 3033 ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 3034 3035 /* Allocate space for ca */ 3036 ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr); 3037 3038 /* put together the new matrix */ 3039 ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C);CHKERRQ(ierr); 3040 ierr = MatSetBlockSize(C,pp->dof);CHKERRQ(ierr); 3041 3042 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3043 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3044 c = (Mat_SeqAIJ*)(C->data); 3045 c->free_a = PETSC_TRUE; 3046 c->free_ij = PETSC_TRUE; 3047 c->nonew = 0; 3048 3049 C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 3050 C->ops->productnumeric = MatProductNumeric_PtAP; 3051 3052 /* Clean up. */ 3053 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 3054 PetscFunctionReturn(0); 3055 } 3056 3057 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 3058 { 3059 /* This routine requires testing -- first draft only */ 3060 PetscErrorCode ierr; 3061 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 3062 Mat P =pp->AIJ; 3063 Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 3064 Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 3065 Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 3066 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3067 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3068 const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3069 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3070 const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3071 MatScalar *ca=c->a,*caj,*apa; 3072 3073 PetscFunctionBegin; 3074 /* Allocate temporary array for storage of one row of A*P */ 3075 ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr); 3076 3077 /* Clear old values in C */ 3078 ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr); 3079 3080 for (i=0; i<am; i++) { 3081 /* Form sparse row of A*P */ 3082 anzi = ai[i+1] - ai[i]; 3083 apnzj = 0; 3084 for (j=0; j<anzi; j++) { 3085 /* Get offset within block of P */ 3086 pshift = *aj%ppdof; 3087 /* Get block row of P */ 3088 prow = *aj++/ppdof; /* integer division */ 3089 pnzj = pi[prow+1] - pi[prow]; 3090 pjj = pj + pi[prow]; 3091 paj = pa + pi[prow]; 3092 for (k=0; k<pnzj; k++) { 3093 poffset = pjj[k]*ppdof+pshift; 3094 if (!apjdense[poffset]) { 3095 apjdense[poffset] = -1; 3096 apj[apnzj++] = poffset; 3097 } 3098 apa[poffset] += (*aa)*paj[k]; 3099 } 3100 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 3101 aa++; 3102 } 3103 3104 /* Sort the j index array for quick sparse axpy. */ 3105 /* Note: a array does not need sorting as it is in dense storage locations. */ 3106 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 3107 3108 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3109 prow = i/ppdof; /* integer division */ 3110 pshift = i%ppdof; 3111 poffset = pi[prow]; 3112 pnzi = pi[prow+1] - poffset; 3113 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3114 pJ = pj+poffset; 3115 pA = pa+poffset; 3116 for (j=0; j<pnzi; j++) { 3117 crow = (*pJ)*ppdof+pshift; 3118 cjj = cj + ci[crow]; 3119 caj = ca + ci[crow]; 3120 pJ++; 3121 /* Perform sparse axpy operation. Note cjj includes apj. */ 3122 for (k=0,nextap=0; nextap<apnzj; k++) { 3123 if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 3124 } 3125 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 3126 pA++; 3127 } 3128 3129 /* Zero the current row info for A*P */ 3130 for (j=0; j<apnzj; j++) { 3131 apa[apj[j]] = 0.; 3132 apjdense[apj[j]] = 0; 3133 } 3134 } 3135 3136 /* Assemble the final matrix and clean up */ 3137 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3138 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3139 ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 3140 PetscFunctionReturn(0); 3141 } 3142 3143 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 3144 { 3145 PetscErrorCode ierr; 3146 Mat_Product *product = C->product; 3147 Mat A=product->A,P=product->B; 3148 3149 PetscFunctionBegin; 3150 ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C);CHKERRQ(ierr); 3151 PetscFunctionReturn(0); 3152 } 3153 3154 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3155 { 3156 PetscFunctionBegin; 3157 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet"); 3158 } 3159 3160 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3161 { 3162 PetscFunctionBegin; 3163 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3164 } 3165 3166 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat); 3167 3168 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C) 3169 { 3170 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3171 PetscErrorCode ierr; 3172 3173 PetscFunctionBegin; 3174 3175 ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);CHKERRQ(ierr); 3176 PetscFunctionReturn(0); 3177 } 3178 3179 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat); 3180 3181 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C) 3182 { 3183 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3184 PetscErrorCode ierr; 3185 3186 PetscFunctionBegin; 3187 ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr); 3188 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 3189 PetscFunctionReturn(0); 3190 } 3191 3192 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat); 3193 3194 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C) 3195 { 3196 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3197 PetscErrorCode ierr; 3198 3199 PetscFunctionBegin; 3200 3201 ierr = MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);CHKERRQ(ierr); 3202 PetscFunctionReturn(0); 3203 } 3204 3205 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat); 3206 3207 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C) 3208 { 3209 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data; 3210 PetscErrorCode ierr; 3211 3212 PetscFunctionBegin; 3213 3214 ierr = MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);CHKERRQ(ierr); 3215 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 3216 PetscFunctionReturn(0); 3217 } 3218 3219 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 3220 { 3221 PetscErrorCode ierr; 3222 Mat_Product *product = C->product; 3223 Mat A=product->A,P=product->B; 3224 PetscBool flg; 3225 3226 PetscFunctionBegin; 3227 ierr = PetscStrcmp(product->alg,"allatonce",&flg);CHKERRQ(ierr); 3228 if (flg) { 3229 ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C);CHKERRQ(ierr); 3230 C->ops->productnumeric = MatProductNumeric_PtAP; 3231 PetscFunctionReturn(0); 3232 } 3233 3234 ierr = PetscStrcmp(product->alg,"allatonce_merged",&flg);CHKERRQ(ierr); 3235 if (flg) { 3236 ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C);CHKERRQ(ierr); 3237 C->ops->productnumeric = MatProductNumeric_PtAP; 3238 PetscFunctionReturn(0); 3239 } 3240 3241 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 3242 } 3243 3244 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3245 { 3246 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3247 Mat a = b->AIJ,B; 3248 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3249 PetscErrorCode ierr; 3250 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3251 PetscInt *cols; 3252 PetscScalar *vals; 3253 3254 PetscFunctionBegin; 3255 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3256 ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr); 3257 for (i=0; i<m; i++) { 3258 nmax = PetscMax(nmax,aij->ilen[i]); 3259 for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 3260 } 3261 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3262 ierr = MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);CHKERRQ(ierr); 3263 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3264 ierr = MatSeqAIJSetPreallocation(B,0,ilen);CHKERRQ(ierr); 3265 ierr = PetscFree(ilen);CHKERRQ(ierr); 3266 ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr); 3267 ii = 0; 3268 for (i=0; i<m; i++) { 3269 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3270 for (j=0; j<dof; j++) { 3271 for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 3272 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3273 ii++; 3274 } 3275 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3276 } 3277 ierr = PetscFree(icols);CHKERRQ(ierr); 3278 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3279 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3280 3281 if (reuse == MAT_INPLACE_MATRIX) { 3282 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3283 } else { 3284 *newmat = B; 3285 } 3286 PetscFunctionReturn(0); 3287 } 3288 3289 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3290 3291 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3292 { 3293 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3294 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3295 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3296 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3297 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3298 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3299 PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 3300 PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 3301 PetscInt rstart,cstart,*garray,ii,k; 3302 PetscErrorCode ierr; 3303 PetscScalar *vals,*ovals; 3304 3305 PetscFunctionBegin; 3306 ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr); 3307 for (i=0; i<A->rmap->n/dof; i++) { 3308 nmax = PetscMax(nmax,AIJ->ilen[i]); 3309 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3310 for (j=0; j<dof; j++) { 3311 dnz[dof*i+j] = AIJ->ilen[i]; 3312 onz[dof*i+j] = OAIJ->ilen[i]; 3313 } 3314 } 3315 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3316 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3317 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3318 ierr = MatMPIAIJSetPreallocation(B,0,dnz,0,onz);CHKERRQ(ierr); 3319 ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr); 3320 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3321 3322 ierr = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr); 3323 rstart = dof*maij->A->rmap->rstart; 3324 cstart = dof*maij->A->cmap->rstart; 3325 garray = mpiaij->garray; 3326 3327 ii = rstart; 3328 for (i=0; i<A->rmap->n/dof; i++) { 3329 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3330 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3331 for (j=0; j<dof; j++) { 3332 for (k=0; k<ncols; k++) { 3333 icols[k] = cstart + dof*cols[k]+j; 3334 } 3335 for (k=0; k<oncols; k++) { 3336 oicols[k] = dof*garray[ocols[k]]+j; 3337 } 3338 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3339 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3340 ii++; 3341 } 3342 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3343 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3344 } 3345 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3346 3347 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3348 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3349 3350 if (reuse == MAT_INPLACE_MATRIX) { 3351 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3352 ((PetscObject)A)->refct = 1; 3353 3354 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3355 3356 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3357 } else { 3358 *newmat = B; 3359 } 3360 PetscFunctionReturn(0); 3361 } 3362 3363 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 3364 { 3365 PetscErrorCode ierr; 3366 Mat A; 3367 3368 PetscFunctionBegin; 3369 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3370 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 3371 ierr = MatDestroy(&A);CHKERRQ(ierr); 3372 PetscFunctionReturn(0); 3373 } 3374 3375 PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[]) 3376 { 3377 PetscErrorCode ierr; 3378 Mat A; 3379 3380 PetscFunctionBegin; 3381 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3382 ierr = MatCreateSubMatrices(A,n,irow,icol,scall,submat);CHKERRQ(ierr); 3383 ierr = MatDestroy(&A);CHKERRQ(ierr); 3384 PetscFunctionReturn(0); 3385 } 3386 3387 /* ---------------------------------------------------------------------------------- */ 3388 /*@ 3389 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3390 operations for multicomponent problems. It interpolates each component the same 3391 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3392 and MATMPIAIJ for distributed matrices. 3393 3394 Collective 3395 3396 Input Parameters: 3397 + A - the AIJ matrix describing the action on blocks 3398 - dof - the block size (number of components per node) 3399 3400 Output Parameter: 3401 . maij - the new MAIJ matrix 3402 3403 Operations provided: 3404 + MatMult 3405 . MatMultTranspose 3406 . MatMultAdd 3407 . MatMultTransposeAdd 3408 - MatView 3409 3410 Level: advanced 3411 3412 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3413 @*/ 3414 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3415 { 3416 PetscErrorCode ierr; 3417 PetscMPIInt size; 3418 PetscInt n; 3419 Mat B; 3420 #if defined(PETSC_HAVE_CUDA) 3421 /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 3422 PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 3423 #endif 3424 3425 PetscFunctionBegin; 3426 dof = PetscAbs(dof); 3427 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3428 3429 if (dof == 1) *maij = A; 3430 else { 3431 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3432 /* propagate vec type */ 3433 ierr = MatSetVecType(B,A->defaultvectype);CHKERRQ(ierr); 3434 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3435 ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3436 ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3437 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3438 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3439 3440 B->assembled = PETSC_TRUE; 3441 3442 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3443 if (size == 1) { 3444 Mat_SeqMAIJ *b; 3445 3446 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3447 3448 B->ops->setup = NULL; 3449 B->ops->destroy = MatDestroy_SeqMAIJ; 3450 B->ops->view = MatView_SeqMAIJ; 3451 3452 b = (Mat_SeqMAIJ*)B->data; 3453 b->dof = dof; 3454 b->AIJ = A; 3455 3456 if (dof == 2) { 3457 B->ops->mult = MatMult_SeqMAIJ_2; 3458 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3459 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3460 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3461 } else if (dof == 3) { 3462 B->ops->mult = MatMult_SeqMAIJ_3; 3463 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3464 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3465 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3466 } else if (dof == 4) { 3467 B->ops->mult = MatMult_SeqMAIJ_4; 3468 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3469 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3470 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3471 } else if (dof == 5) { 3472 B->ops->mult = MatMult_SeqMAIJ_5; 3473 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3474 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3475 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3476 } else if (dof == 6) { 3477 B->ops->mult = MatMult_SeqMAIJ_6; 3478 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3479 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3480 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3481 } else if (dof == 7) { 3482 B->ops->mult = MatMult_SeqMAIJ_7; 3483 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3484 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3485 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3486 } else if (dof == 8) { 3487 B->ops->mult = MatMult_SeqMAIJ_8; 3488 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3489 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3490 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3491 } else if (dof == 9) { 3492 B->ops->mult = MatMult_SeqMAIJ_9; 3493 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3494 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3495 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3496 } else if (dof == 10) { 3497 B->ops->mult = MatMult_SeqMAIJ_10; 3498 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3499 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3500 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3501 } else if (dof == 11) { 3502 B->ops->mult = MatMult_SeqMAIJ_11; 3503 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3504 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3505 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3506 } else if (dof == 16) { 3507 B->ops->mult = MatMult_SeqMAIJ_16; 3508 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3509 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3510 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3511 } else if (dof == 18) { 3512 B->ops->mult = MatMult_SeqMAIJ_18; 3513 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3514 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3515 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3516 } else { 3517 B->ops->mult = MatMult_SeqMAIJ_N; 3518 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3519 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3520 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3521 } 3522 #if defined(PETSC_HAVE_CUDA) 3523 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3524 #endif 3525 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3526 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3527 } else { 3528 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3529 Mat_MPIMAIJ *b; 3530 IS from,to; 3531 Vec gvec; 3532 3533 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3534 3535 B->ops->setup = NULL; 3536 B->ops->destroy = MatDestroy_MPIMAIJ; 3537 B->ops->view = MatView_MPIMAIJ; 3538 3539 b = (Mat_MPIMAIJ*)B->data; 3540 b->dof = dof; 3541 b->A = A; 3542 3543 ierr = MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);CHKERRQ(ierr); 3544 ierr = MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);CHKERRQ(ierr); 3545 3546 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3547 ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3548 ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 3549 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3550 ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3551 3552 /* create two temporary Index sets for build scatter gather */ 3553 ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3554 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3555 3556 /* create temporary global vector to generate scatter context */ 3557 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 3558 3559 /* generate the scatter context */ 3560 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3561 3562 ierr = ISDestroy(&from);CHKERRQ(ierr); 3563 ierr = ISDestroy(&to);CHKERRQ(ierr); 3564 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3565 3566 B->ops->mult = MatMult_MPIMAIJ_dof; 3567 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3568 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3569 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3570 3571 #if defined(PETSC_HAVE_CUDA) 3572 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3573 #endif 3574 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3575 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ);CHKERRQ(ierr); 3576 } 3577 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3578 B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 3579 ierr = MatSetUp(B);CHKERRQ(ierr); 3580 #if defined(PETSC_HAVE_CUDA) 3581 /* temporary until we have CUDA implementation of MAIJ */ 3582 { 3583 PetscBool flg; 3584 if (convert) { 3585 ierr = PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");CHKERRQ(ierr); 3586 if (flg) { 3587 ierr = MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 3588 } 3589 } 3590 } 3591 #endif 3592 *maij = B; 3593 } 3594 PetscFunctionReturn(0); 3595 } 3596