1 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20 #include <../src/mat/utils/freespace.h> 21 22 /*@ 23 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 24 25 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 26 27 Input Parameter: 28 . A - the MAIJ matrix 29 30 Output Parameter: 31 . B - the AIJ matrix 32 33 Level: advanced 34 35 Notes: 36 The reference count on the AIJ matrix is not increased so you should not destroy it. 37 38 .seealso: MatCreateMAIJ() 39 @*/ 40 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 41 { 42 PetscErrorCode ierr; 43 PetscBool ismpimaij,isseqmaij; 44 45 PetscFunctionBegin; 46 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 47 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 48 if (ismpimaij) { 49 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 50 51 *B = b->A; 52 } else if (isseqmaij) { 53 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 54 55 *B = b->AIJ; 56 } else { 57 *B = A; 58 } 59 PetscFunctionReturn(0); 60 } 61 62 /*@ 63 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 64 65 Logically Collective 66 67 Input Parameter: 68 + A - the MAIJ matrix 69 - dof - the block size for the new matrix 70 71 Output Parameter: 72 . B - the new MAIJ matrix 73 74 Level: advanced 75 76 .seealso: MatCreateMAIJ() 77 @*/ 78 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 79 { 80 PetscErrorCode ierr; 81 Mat Aij = NULL; 82 83 PetscFunctionBegin; 84 PetscValidLogicalCollectiveInt(A,dof,2); 85 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 86 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 91 { 92 PetscErrorCode ierr; 93 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 94 95 PetscFunctionBegin; 96 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 97 ierr = PetscFree(A->data);CHKERRQ(ierr); 98 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr); 99 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr); 100 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijperm_seqmaij_C",NULL);CHKERRQ(ierr); 101 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijmkl_seqmaij_C",NULL);CHKERRQ(ierr); 102 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqmaij_C",NULL);CHKERRQ(ierr); 103 PetscFunctionReturn(0); 104 } 105 106 PetscErrorCode MatSetUp_MAIJ(Mat A) 107 { 108 PetscFunctionBegin; 109 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 110 PetscFunctionReturn(0); 111 } 112 113 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 114 { 115 PetscErrorCode ierr; 116 Mat B; 117 118 PetscFunctionBegin; 119 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 120 ierr = MatView(B,viewer);CHKERRQ(ierr); 121 ierr = MatDestroy(&B);CHKERRQ(ierr); 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 126 { 127 PetscErrorCode ierr; 128 Mat B; 129 130 PetscFunctionBegin; 131 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 132 ierr = MatView(B,viewer);CHKERRQ(ierr); 133 ierr = MatDestroy(&B);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 138 { 139 PetscErrorCode ierr; 140 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 141 142 PetscFunctionBegin; 143 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 144 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 145 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 146 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 147 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 148 ierr = PetscFree(A->data);CHKERRQ(ierr); 149 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr); 150 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr); 151 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_mpimaij_C",NULL);CHKERRQ(ierr); 152 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 153 PetscFunctionReturn(0); 154 } 155 156 /*MC 157 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 158 multicomponent problems, interpolating or restricting each component the same way independently. 159 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 160 161 Operations provided: 162 . MatMult 163 . MatMultTranspose 164 . MatMultAdd 165 . MatMultTransposeAdd 166 167 Level: advanced 168 169 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 170 M*/ 171 172 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 173 { 174 PetscErrorCode ierr; 175 Mat_MPIMAIJ *b; 176 PetscMPIInt size; 177 178 PetscFunctionBegin; 179 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 180 A->data = (void*)b; 181 182 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 183 184 A->ops->setup = MatSetUp_MAIJ; 185 186 b->AIJ = 0; 187 b->dof = 0; 188 b->OAIJ = 0; 189 b->ctx = 0; 190 b->w = 0; 191 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 192 if (size == 1) { 193 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 194 } else { 195 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 196 } 197 A->preallocated = PETSC_TRUE; 198 A->assembled = PETSC_TRUE; 199 PetscFunctionReturn(0); 200 } 201 202 /* --------------------------------------------------------------------------------------*/ 203 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 204 { 205 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 207 const PetscScalar *x,*v; 208 PetscScalar *y, sum1, sum2; 209 PetscErrorCode ierr; 210 PetscInt nonzerorow=0,n,i,jrow,j; 211 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 212 213 PetscFunctionBegin; 214 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 215 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 216 idx = a->j; 217 v = a->a; 218 ii = a->i; 219 220 for (i=0; i<m; i++) { 221 jrow = ii[i]; 222 n = ii[i+1] - jrow; 223 sum1 = 0.0; 224 sum2 = 0.0; 225 226 nonzerorow += (n>0); 227 for (j=0; j<n; j++) { 228 sum1 += v[jrow]*x[2*idx[jrow]]; 229 sum2 += v[jrow]*x[2*idx[jrow]+1]; 230 jrow++; 231 } 232 y[2*i] = sum1; 233 y[2*i+1] = sum2; 234 } 235 236 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 237 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 238 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 242 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 243 { 244 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 245 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 246 const PetscScalar *x,*v; 247 PetscScalar *y,alpha1,alpha2; 248 PetscErrorCode ierr; 249 PetscInt n,i; 250 const PetscInt m = b->AIJ->rmap->n,*idx; 251 252 PetscFunctionBegin; 253 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 254 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 255 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 256 257 for (i=0; i<m; i++) { 258 idx = a->j + a->i[i]; 259 v = a->a + a->i[i]; 260 n = a->i[i+1] - a->i[i]; 261 alpha1 = x[2*i]; 262 alpha2 = x[2*i+1]; 263 while (n-->0) { 264 y[2*(*idx)] += alpha1*(*v); 265 y[2*(*idx)+1] += alpha2*(*v); 266 idx++; v++; 267 } 268 } 269 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 270 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 271 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 272 PetscFunctionReturn(0); 273 } 274 275 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 276 { 277 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 279 const PetscScalar *x,*v; 280 PetscScalar *y,sum1, sum2; 281 PetscErrorCode ierr; 282 PetscInt n,i,jrow,j; 283 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 284 285 PetscFunctionBegin; 286 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 287 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 288 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 289 idx = a->j; 290 v = a->a; 291 ii = a->i; 292 293 for (i=0; i<m; i++) { 294 jrow = ii[i]; 295 n = ii[i+1] - jrow; 296 sum1 = 0.0; 297 sum2 = 0.0; 298 for (j=0; j<n; j++) { 299 sum1 += v[jrow]*x[2*idx[jrow]]; 300 sum2 += v[jrow]*x[2*idx[jrow]+1]; 301 jrow++; 302 } 303 y[2*i] += sum1; 304 y[2*i+1] += sum2; 305 } 306 307 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 308 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 309 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 310 PetscFunctionReturn(0); 311 } 312 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 313 { 314 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 315 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 316 const PetscScalar *x,*v; 317 PetscScalar *y,alpha1,alpha2; 318 PetscErrorCode ierr; 319 PetscInt n,i; 320 const PetscInt m = b->AIJ->rmap->n,*idx; 321 322 PetscFunctionBegin; 323 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 324 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 325 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 326 327 for (i=0; i<m; i++) { 328 idx = a->j + a->i[i]; 329 v = a->a + a->i[i]; 330 n = a->i[i+1] - a->i[i]; 331 alpha1 = x[2*i]; 332 alpha2 = x[2*i+1]; 333 while (n-->0) { 334 y[2*(*idx)] += alpha1*(*v); 335 y[2*(*idx)+1] += alpha2*(*v); 336 idx++; v++; 337 } 338 } 339 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 340 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 341 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 /* --------------------------------------------------------------------------------------*/ 345 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 346 { 347 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 348 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 349 const PetscScalar *x,*v; 350 PetscScalar *y,sum1, sum2, sum3; 351 PetscErrorCode ierr; 352 PetscInt nonzerorow=0,n,i,jrow,j; 353 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 354 355 PetscFunctionBegin; 356 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 357 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 358 idx = a->j; 359 v = a->a; 360 ii = a->i; 361 362 for (i=0; i<m; i++) { 363 jrow = ii[i]; 364 n = ii[i+1] - jrow; 365 sum1 = 0.0; 366 sum2 = 0.0; 367 sum3 = 0.0; 368 369 nonzerorow += (n>0); 370 for (j=0; j<n; j++) { 371 sum1 += v[jrow]*x[3*idx[jrow]]; 372 sum2 += v[jrow]*x[3*idx[jrow]+1]; 373 sum3 += v[jrow]*x[3*idx[jrow]+2]; 374 jrow++; 375 } 376 y[3*i] = sum1; 377 y[3*i+1] = sum2; 378 y[3*i+2] = sum3; 379 } 380 381 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 382 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 383 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 384 PetscFunctionReturn(0); 385 } 386 387 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 388 { 389 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 390 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 391 const PetscScalar *x,*v; 392 PetscScalar *y,alpha1,alpha2,alpha3; 393 PetscErrorCode ierr; 394 PetscInt n,i; 395 const PetscInt m = b->AIJ->rmap->n,*idx; 396 397 PetscFunctionBegin; 398 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 399 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 400 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 401 402 for (i=0; i<m; i++) { 403 idx = a->j + a->i[i]; 404 v = a->a + a->i[i]; 405 n = a->i[i+1] - a->i[i]; 406 alpha1 = x[3*i]; 407 alpha2 = x[3*i+1]; 408 alpha3 = x[3*i+2]; 409 while (n-->0) { 410 y[3*(*idx)] += alpha1*(*v); 411 y[3*(*idx)+1] += alpha2*(*v); 412 y[3*(*idx)+2] += alpha3*(*v); 413 idx++; v++; 414 } 415 } 416 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 417 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 418 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 422 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 423 { 424 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 425 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 426 const PetscScalar *x,*v; 427 PetscScalar *y,sum1, sum2, sum3; 428 PetscErrorCode ierr; 429 PetscInt n,i,jrow,j; 430 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 431 432 PetscFunctionBegin; 433 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 434 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 435 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 436 idx = a->j; 437 v = a->a; 438 ii = a->i; 439 440 for (i=0; i<m; i++) { 441 jrow = ii[i]; 442 n = ii[i+1] - jrow; 443 sum1 = 0.0; 444 sum2 = 0.0; 445 sum3 = 0.0; 446 for (j=0; j<n; j++) { 447 sum1 += v[jrow]*x[3*idx[jrow]]; 448 sum2 += v[jrow]*x[3*idx[jrow]+1]; 449 sum3 += v[jrow]*x[3*idx[jrow]+2]; 450 jrow++; 451 } 452 y[3*i] += sum1; 453 y[3*i+1] += sum2; 454 y[3*i+2] += sum3; 455 } 456 457 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 458 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 459 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 463 { 464 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 465 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 466 const PetscScalar *x,*v; 467 PetscScalar *y,alpha1,alpha2,alpha3; 468 PetscErrorCode ierr; 469 PetscInt n,i; 470 const PetscInt m = b->AIJ->rmap->n,*idx; 471 472 PetscFunctionBegin; 473 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 474 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 475 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 476 for (i=0; i<m; i++) { 477 idx = a->j + a->i[i]; 478 v = a->a + a->i[i]; 479 n = a->i[i+1] - a->i[i]; 480 alpha1 = x[3*i]; 481 alpha2 = x[3*i+1]; 482 alpha3 = x[3*i+2]; 483 while (n-->0) { 484 y[3*(*idx)] += alpha1*(*v); 485 y[3*(*idx)+1] += alpha2*(*v); 486 y[3*(*idx)+2] += alpha3*(*v); 487 idx++; v++; 488 } 489 } 490 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 492 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 493 PetscFunctionReturn(0); 494 } 495 496 /* ------------------------------------------------------------------------------*/ 497 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 498 { 499 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 500 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 501 const PetscScalar *x,*v; 502 PetscScalar *y,sum1, sum2, sum3, sum4; 503 PetscErrorCode ierr; 504 PetscInt nonzerorow=0,n,i,jrow,j; 505 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 506 507 PetscFunctionBegin; 508 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 509 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 510 idx = a->j; 511 v = a->a; 512 ii = a->i; 513 514 for (i=0; i<m; i++) { 515 jrow = ii[i]; 516 n = ii[i+1] - jrow; 517 sum1 = 0.0; 518 sum2 = 0.0; 519 sum3 = 0.0; 520 sum4 = 0.0; 521 nonzerorow += (n>0); 522 for (j=0; j<n; j++) { 523 sum1 += v[jrow]*x[4*idx[jrow]]; 524 sum2 += v[jrow]*x[4*idx[jrow]+1]; 525 sum3 += v[jrow]*x[4*idx[jrow]+2]; 526 sum4 += v[jrow]*x[4*idx[jrow]+3]; 527 jrow++; 528 } 529 y[4*i] = sum1; 530 y[4*i+1] = sum2; 531 y[4*i+2] = sum3; 532 y[4*i+3] = sum4; 533 } 534 535 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 536 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 537 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 538 PetscFunctionReturn(0); 539 } 540 541 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 542 { 543 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 544 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 545 const PetscScalar *x,*v; 546 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 547 PetscErrorCode ierr; 548 PetscInt n,i; 549 const PetscInt m = b->AIJ->rmap->n,*idx; 550 551 PetscFunctionBegin; 552 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 553 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 554 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 555 for (i=0; i<m; i++) { 556 idx = a->j + a->i[i]; 557 v = a->a + a->i[i]; 558 n = a->i[i+1] - a->i[i]; 559 alpha1 = x[4*i]; 560 alpha2 = x[4*i+1]; 561 alpha3 = x[4*i+2]; 562 alpha4 = x[4*i+3]; 563 while (n-->0) { 564 y[4*(*idx)] += alpha1*(*v); 565 y[4*(*idx)+1] += alpha2*(*v); 566 y[4*(*idx)+2] += alpha3*(*v); 567 y[4*(*idx)+3] += alpha4*(*v); 568 idx++; v++; 569 } 570 } 571 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 572 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 573 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 574 PetscFunctionReturn(0); 575 } 576 577 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 578 { 579 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 580 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 581 const PetscScalar *x,*v; 582 PetscScalar *y,sum1, sum2, sum3, sum4; 583 PetscErrorCode ierr; 584 PetscInt n,i,jrow,j; 585 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 586 587 PetscFunctionBegin; 588 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 589 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 590 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 591 idx = a->j; 592 v = a->a; 593 ii = a->i; 594 595 for (i=0; i<m; i++) { 596 jrow = ii[i]; 597 n = ii[i+1] - jrow; 598 sum1 = 0.0; 599 sum2 = 0.0; 600 sum3 = 0.0; 601 sum4 = 0.0; 602 for (j=0; j<n; j++) { 603 sum1 += v[jrow]*x[4*idx[jrow]]; 604 sum2 += v[jrow]*x[4*idx[jrow]+1]; 605 sum3 += v[jrow]*x[4*idx[jrow]+2]; 606 sum4 += v[jrow]*x[4*idx[jrow]+3]; 607 jrow++; 608 } 609 y[4*i] += sum1; 610 y[4*i+1] += sum2; 611 y[4*i+2] += sum3; 612 y[4*i+3] += sum4; 613 } 614 615 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 616 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 617 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 618 PetscFunctionReturn(0); 619 } 620 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 621 { 622 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 623 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 624 const PetscScalar *x,*v; 625 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 626 PetscErrorCode ierr; 627 PetscInt n,i; 628 const PetscInt m = b->AIJ->rmap->n,*idx; 629 630 PetscFunctionBegin; 631 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 632 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 633 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 634 635 for (i=0; i<m; i++) { 636 idx = a->j + a->i[i]; 637 v = a->a + a->i[i]; 638 n = a->i[i+1] - a->i[i]; 639 alpha1 = x[4*i]; 640 alpha2 = x[4*i+1]; 641 alpha3 = x[4*i+2]; 642 alpha4 = x[4*i+3]; 643 while (n-->0) { 644 y[4*(*idx)] += alpha1*(*v); 645 y[4*(*idx)+1] += alpha2*(*v); 646 y[4*(*idx)+2] += alpha3*(*v); 647 y[4*(*idx)+3] += alpha4*(*v); 648 idx++; v++; 649 } 650 } 651 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 652 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 653 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 /* ------------------------------------------------------------------------------*/ 657 658 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 659 { 660 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 661 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 662 const PetscScalar *x,*v; 663 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 664 PetscErrorCode ierr; 665 PetscInt nonzerorow=0,n,i,jrow,j; 666 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 667 668 PetscFunctionBegin; 669 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 670 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 671 idx = a->j; 672 v = a->a; 673 ii = a->i; 674 675 for (i=0; i<m; i++) { 676 jrow = ii[i]; 677 n = ii[i+1] - jrow; 678 sum1 = 0.0; 679 sum2 = 0.0; 680 sum3 = 0.0; 681 sum4 = 0.0; 682 sum5 = 0.0; 683 684 nonzerorow += (n>0); 685 for (j=0; j<n; j++) { 686 sum1 += v[jrow]*x[5*idx[jrow]]; 687 sum2 += v[jrow]*x[5*idx[jrow]+1]; 688 sum3 += v[jrow]*x[5*idx[jrow]+2]; 689 sum4 += v[jrow]*x[5*idx[jrow]+3]; 690 sum5 += v[jrow]*x[5*idx[jrow]+4]; 691 jrow++; 692 } 693 y[5*i] = sum1; 694 y[5*i+1] = sum2; 695 y[5*i+2] = sum3; 696 y[5*i+3] = sum4; 697 y[5*i+4] = sum5; 698 } 699 700 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 701 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 702 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 703 PetscFunctionReturn(0); 704 } 705 706 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 707 { 708 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 709 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 710 const PetscScalar *x,*v; 711 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 712 PetscErrorCode ierr; 713 PetscInt n,i; 714 const PetscInt m = b->AIJ->rmap->n,*idx; 715 716 PetscFunctionBegin; 717 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 718 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 719 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 720 721 for (i=0; i<m; i++) { 722 idx = a->j + a->i[i]; 723 v = a->a + a->i[i]; 724 n = a->i[i+1] - a->i[i]; 725 alpha1 = x[5*i]; 726 alpha2 = x[5*i+1]; 727 alpha3 = x[5*i+2]; 728 alpha4 = x[5*i+3]; 729 alpha5 = x[5*i+4]; 730 while (n-->0) { 731 y[5*(*idx)] += alpha1*(*v); 732 y[5*(*idx)+1] += alpha2*(*v); 733 y[5*(*idx)+2] += alpha3*(*v); 734 y[5*(*idx)+3] += alpha4*(*v); 735 y[5*(*idx)+4] += alpha5*(*v); 736 idx++; v++; 737 } 738 } 739 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 740 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 741 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } 744 745 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 746 { 747 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 748 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 749 const PetscScalar *x,*v; 750 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 751 PetscErrorCode ierr; 752 PetscInt n,i,jrow,j; 753 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 754 755 PetscFunctionBegin; 756 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 757 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 758 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 759 idx = a->j; 760 v = a->a; 761 ii = a->i; 762 763 for (i=0; i<m; i++) { 764 jrow = ii[i]; 765 n = ii[i+1] - jrow; 766 sum1 = 0.0; 767 sum2 = 0.0; 768 sum3 = 0.0; 769 sum4 = 0.0; 770 sum5 = 0.0; 771 for (j=0; j<n; j++) { 772 sum1 += v[jrow]*x[5*idx[jrow]]; 773 sum2 += v[jrow]*x[5*idx[jrow]+1]; 774 sum3 += v[jrow]*x[5*idx[jrow]+2]; 775 sum4 += v[jrow]*x[5*idx[jrow]+3]; 776 sum5 += v[jrow]*x[5*idx[jrow]+4]; 777 jrow++; 778 } 779 y[5*i] += sum1; 780 y[5*i+1] += sum2; 781 y[5*i+2] += sum3; 782 y[5*i+3] += sum4; 783 y[5*i+4] += sum5; 784 } 785 786 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 787 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 788 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 789 PetscFunctionReturn(0); 790 } 791 792 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 793 { 794 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 795 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 796 const PetscScalar *x,*v; 797 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 798 PetscErrorCode ierr; 799 PetscInt n,i; 800 const PetscInt m = b->AIJ->rmap->n,*idx; 801 802 PetscFunctionBegin; 803 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 804 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 805 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 806 807 for (i=0; i<m; i++) { 808 idx = a->j + a->i[i]; 809 v = a->a + a->i[i]; 810 n = a->i[i+1] - a->i[i]; 811 alpha1 = x[5*i]; 812 alpha2 = x[5*i+1]; 813 alpha3 = x[5*i+2]; 814 alpha4 = x[5*i+3]; 815 alpha5 = x[5*i+4]; 816 while (n-->0) { 817 y[5*(*idx)] += alpha1*(*v); 818 y[5*(*idx)+1] += alpha2*(*v); 819 y[5*(*idx)+2] += alpha3*(*v); 820 y[5*(*idx)+3] += alpha4*(*v); 821 y[5*(*idx)+4] += alpha5*(*v); 822 idx++; v++; 823 } 824 } 825 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 826 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 827 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 828 PetscFunctionReturn(0); 829 } 830 831 /* ------------------------------------------------------------------------------*/ 832 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 833 { 834 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 835 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 836 const PetscScalar *x,*v; 837 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 838 PetscErrorCode ierr; 839 PetscInt nonzerorow=0,n,i,jrow,j; 840 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 841 842 PetscFunctionBegin; 843 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 844 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 845 idx = a->j; 846 v = a->a; 847 ii = a->i; 848 849 for (i=0; i<m; i++) { 850 jrow = ii[i]; 851 n = ii[i+1] - jrow; 852 sum1 = 0.0; 853 sum2 = 0.0; 854 sum3 = 0.0; 855 sum4 = 0.0; 856 sum5 = 0.0; 857 sum6 = 0.0; 858 859 nonzerorow += (n>0); 860 for (j=0; j<n; j++) { 861 sum1 += v[jrow]*x[6*idx[jrow]]; 862 sum2 += v[jrow]*x[6*idx[jrow]+1]; 863 sum3 += v[jrow]*x[6*idx[jrow]+2]; 864 sum4 += v[jrow]*x[6*idx[jrow]+3]; 865 sum5 += v[jrow]*x[6*idx[jrow]+4]; 866 sum6 += v[jrow]*x[6*idx[jrow]+5]; 867 jrow++; 868 } 869 y[6*i] = sum1; 870 y[6*i+1] = sum2; 871 y[6*i+2] = sum3; 872 y[6*i+3] = sum4; 873 y[6*i+4] = sum5; 874 y[6*i+5] = sum6; 875 } 876 877 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 878 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 879 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 884 { 885 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 886 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 887 const PetscScalar *x,*v; 888 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 889 PetscErrorCode ierr; 890 PetscInt n,i; 891 const PetscInt m = b->AIJ->rmap->n,*idx; 892 893 PetscFunctionBegin; 894 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 895 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 896 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 897 898 for (i=0; i<m; i++) { 899 idx = a->j + a->i[i]; 900 v = a->a + a->i[i]; 901 n = a->i[i+1] - a->i[i]; 902 alpha1 = x[6*i]; 903 alpha2 = x[6*i+1]; 904 alpha3 = x[6*i+2]; 905 alpha4 = x[6*i+3]; 906 alpha5 = x[6*i+4]; 907 alpha6 = x[6*i+5]; 908 while (n-->0) { 909 y[6*(*idx)] += alpha1*(*v); 910 y[6*(*idx)+1] += alpha2*(*v); 911 y[6*(*idx)+2] += alpha3*(*v); 912 y[6*(*idx)+3] += alpha4*(*v); 913 y[6*(*idx)+4] += alpha5*(*v); 914 y[6*(*idx)+5] += alpha6*(*v); 915 idx++; v++; 916 } 917 } 918 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 919 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 920 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 921 PetscFunctionReturn(0); 922 } 923 924 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 925 { 926 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 927 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 928 const PetscScalar *x,*v; 929 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 930 PetscErrorCode ierr; 931 PetscInt n,i,jrow,j; 932 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 933 934 PetscFunctionBegin; 935 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 936 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 937 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 938 idx = a->j; 939 v = a->a; 940 ii = a->i; 941 942 for (i=0; i<m; i++) { 943 jrow = ii[i]; 944 n = ii[i+1] - jrow; 945 sum1 = 0.0; 946 sum2 = 0.0; 947 sum3 = 0.0; 948 sum4 = 0.0; 949 sum5 = 0.0; 950 sum6 = 0.0; 951 for (j=0; j<n; j++) { 952 sum1 += v[jrow]*x[6*idx[jrow]]; 953 sum2 += v[jrow]*x[6*idx[jrow]+1]; 954 sum3 += v[jrow]*x[6*idx[jrow]+2]; 955 sum4 += v[jrow]*x[6*idx[jrow]+3]; 956 sum5 += v[jrow]*x[6*idx[jrow]+4]; 957 sum6 += v[jrow]*x[6*idx[jrow]+5]; 958 jrow++; 959 } 960 y[6*i] += sum1; 961 y[6*i+1] += sum2; 962 y[6*i+2] += sum3; 963 y[6*i+3] += sum4; 964 y[6*i+4] += sum5; 965 y[6*i+5] += sum6; 966 } 967 968 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 969 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 970 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } 973 974 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 975 { 976 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 977 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 978 const PetscScalar *x,*v; 979 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 980 PetscErrorCode ierr; 981 PetscInt n,i; 982 const PetscInt m = b->AIJ->rmap->n,*idx; 983 984 PetscFunctionBegin; 985 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 986 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 987 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 988 989 for (i=0; i<m; i++) { 990 idx = a->j + a->i[i]; 991 v = a->a + a->i[i]; 992 n = a->i[i+1] - a->i[i]; 993 alpha1 = x[6*i]; 994 alpha2 = x[6*i+1]; 995 alpha3 = x[6*i+2]; 996 alpha4 = x[6*i+3]; 997 alpha5 = x[6*i+4]; 998 alpha6 = x[6*i+5]; 999 while (n-->0) { 1000 y[6*(*idx)] += alpha1*(*v); 1001 y[6*(*idx)+1] += alpha2*(*v); 1002 y[6*(*idx)+2] += alpha3*(*v); 1003 y[6*(*idx)+3] += alpha4*(*v); 1004 y[6*(*idx)+4] += alpha5*(*v); 1005 y[6*(*idx)+5] += alpha6*(*v); 1006 idx++; v++; 1007 } 1008 } 1009 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1010 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1011 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 1015 /* ------------------------------------------------------------------------------*/ 1016 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1017 { 1018 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1019 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1020 const PetscScalar *x,*v; 1021 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1022 PetscErrorCode ierr; 1023 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1024 PetscInt nonzerorow=0,n,i,jrow,j; 1025 1026 PetscFunctionBegin; 1027 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1028 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1029 idx = a->j; 1030 v = a->a; 1031 ii = a->i; 1032 1033 for (i=0; i<m; i++) { 1034 jrow = ii[i]; 1035 n = ii[i+1] - jrow; 1036 sum1 = 0.0; 1037 sum2 = 0.0; 1038 sum3 = 0.0; 1039 sum4 = 0.0; 1040 sum5 = 0.0; 1041 sum6 = 0.0; 1042 sum7 = 0.0; 1043 1044 nonzerorow += (n>0); 1045 for (j=0; j<n; j++) { 1046 sum1 += v[jrow]*x[7*idx[jrow]]; 1047 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1048 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1049 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1050 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1051 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1052 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1053 jrow++; 1054 } 1055 y[7*i] = sum1; 1056 y[7*i+1] = sum2; 1057 y[7*i+2] = sum3; 1058 y[7*i+3] = sum4; 1059 y[7*i+4] = sum5; 1060 y[7*i+5] = sum6; 1061 y[7*i+6] = sum7; 1062 } 1063 1064 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1065 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1066 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1067 PetscFunctionReturn(0); 1068 } 1069 1070 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1071 { 1072 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1073 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1074 const PetscScalar *x,*v; 1075 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1076 PetscErrorCode ierr; 1077 const PetscInt m = b->AIJ->rmap->n,*idx; 1078 PetscInt n,i; 1079 1080 PetscFunctionBegin; 1081 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1082 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1083 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1084 1085 for (i=0; i<m; i++) { 1086 idx = a->j + a->i[i]; 1087 v = a->a + a->i[i]; 1088 n = a->i[i+1] - a->i[i]; 1089 alpha1 = x[7*i]; 1090 alpha2 = x[7*i+1]; 1091 alpha3 = x[7*i+2]; 1092 alpha4 = x[7*i+3]; 1093 alpha5 = x[7*i+4]; 1094 alpha6 = x[7*i+5]; 1095 alpha7 = x[7*i+6]; 1096 while (n-->0) { 1097 y[7*(*idx)] += alpha1*(*v); 1098 y[7*(*idx)+1] += alpha2*(*v); 1099 y[7*(*idx)+2] += alpha3*(*v); 1100 y[7*(*idx)+3] += alpha4*(*v); 1101 y[7*(*idx)+4] += alpha5*(*v); 1102 y[7*(*idx)+5] += alpha6*(*v); 1103 y[7*(*idx)+6] += alpha7*(*v); 1104 idx++; v++; 1105 } 1106 } 1107 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1108 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1109 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1114 { 1115 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1116 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1117 const PetscScalar *x,*v; 1118 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1119 PetscErrorCode ierr; 1120 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1121 PetscInt n,i,jrow,j; 1122 1123 PetscFunctionBegin; 1124 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1125 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1126 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1127 idx = a->j; 1128 v = a->a; 1129 ii = a->i; 1130 1131 for (i=0; i<m; i++) { 1132 jrow = ii[i]; 1133 n = ii[i+1] - jrow; 1134 sum1 = 0.0; 1135 sum2 = 0.0; 1136 sum3 = 0.0; 1137 sum4 = 0.0; 1138 sum5 = 0.0; 1139 sum6 = 0.0; 1140 sum7 = 0.0; 1141 for (j=0; j<n; j++) { 1142 sum1 += v[jrow]*x[7*idx[jrow]]; 1143 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1144 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1145 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1146 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1147 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1148 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1149 jrow++; 1150 } 1151 y[7*i] += sum1; 1152 y[7*i+1] += sum2; 1153 y[7*i+2] += sum3; 1154 y[7*i+3] += sum4; 1155 y[7*i+4] += sum5; 1156 y[7*i+5] += sum6; 1157 y[7*i+6] += sum7; 1158 } 1159 1160 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1161 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1162 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1163 PetscFunctionReturn(0); 1164 } 1165 1166 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1167 { 1168 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1169 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1170 const PetscScalar *x,*v; 1171 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1172 PetscErrorCode ierr; 1173 const PetscInt m = b->AIJ->rmap->n,*idx; 1174 PetscInt n,i; 1175 1176 PetscFunctionBegin; 1177 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1178 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1179 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1180 for (i=0; i<m; i++) { 1181 idx = a->j + a->i[i]; 1182 v = a->a + a->i[i]; 1183 n = a->i[i+1] - a->i[i]; 1184 alpha1 = x[7*i]; 1185 alpha2 = x[7*i+1]; 1186 alpha3 = x[7*i+2]; 1187 alpha4 = x[7*i+3]; 1188 alpha5 = x[7*i+4]; 1189 alpha6 = x[7*i+5]; 1190 alpha7 = x[7*i+6]; 1191 while (n-->0) { 1192 y[7*(*idx)] += alpha1*(*v); 1193 y[7*(*idx)+1] += alpha2*(*v); 1194 y[7*(*idx)+2] += alpha3*(*v); 1195 y[7*(*idx)+3] += alpha4*(*v); 1196 y[7*(*idx)+4] += alpha5*(*v); 1197 y[7*(*idx)+5] += alpha6*(*v); 1198 y[7*(*idx)+6] += alpha7*(*v); 1199 idx++; v++; 1200 } 1201 } 1202 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1203 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1204 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1209 { 1210 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1211 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1212 const PetscScalar *x,*v; 1213 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1214 PetscErrorCode ierr; 1215 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1216 PetscInt nonzerorow=0,n,i,jrow,j; 1217 1218 PetscFunctionBegin; 1219 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1220 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1221 idx = a->j; 1222 v = a->a; 1223 ii = a->i; 1224 1225 for (i=0; i<m; i++) { 1226 jrow = ii[i]; 1227 n = ii[i+1] - jrow; 1228 sum1 = 0.0; 1229 sum2 = 0.0; 1230 sum3 = 0.0; 1231 sum4 = 0.0; 1232 sum5 = 0.0; 1233 sum6 = 0.0; 1234 sum7 = 0.0; 1235 sum8 = 0.0; 1236 1237 nonzerorow += (n>0); 1238 for (j=0; j<n; j++) { 1239 sum1 += v[jrow]*x[8*idx[jrow]]; 1240 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1241 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1242 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1243 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1244 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1245 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1246 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1247 jrow++; 1248 } 1249 y[8*i] = sum1; 1250 y[8*i+1] = sum2; 1251 y[8*i+2] = sum3; 1252 y[8*i+3] = sum4; 1253 y[8*i+4] = sum5; 1254 y[8*i+5] = sum6; 1255 y[8*i+6] = sum7; 1256 y[8*i+7] = sum8; 1257 } 1258 1259 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1260 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1261 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1262 PetscFunctionReturn(0); 1263 } 1264 1265 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1266 { 1267 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1268 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1269 const PetscScalar *x,*v; 1270 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1271 PetscErrorCode ierr; 1272 const PetscInt m = b->AIJ->rmap->n,*idx; 1273 PetscInt n,i; 1274 1275 PetscFunctionBegin; 1276 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1277 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1278 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1279 1280 for (i=0; i<m; i++) { 1281 idx = a->j + a->i[i]; 1282 v = a->a + a->i[i]; 1283 n = a->i[i+1] - a->i[i]; 1284 alpha1 = x[8*i]; 1285 alpha2 = x[8*i+1]; 1286 alpha3 = x[8*i+2]; 1287 alpha4 = x[8*i+3]; 1288 alpha5 = x[8*i+4]; 1289 alpha6 = x[8*i+5]; 1290 alpha7 = x[8*i+6]; 1291 alpha8 = x[8*i+7]; 1292 while (n-->0) { 1293 y[8*(*idx)] += alpha1*(*v); 1294 y[8*(*idx)+1] += alpha2*(*v); 1295 y[8*(*idx)+2] += alpha3*(*v); 1296 y[8*(*idx)+3] += alpha4*(*v); 1297 y[8*(*idx)+4] += alpha5*(*v); 1298 y[8*(*idx)+5] += alpha6*(*v); 1299 y[8*(*idx)+6] += alpha7*(*v); 1300 y[8*(*idx)+7] += alpha8*(*v); 1301 idx++; v++; 1302 } 1303 } 1304 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1305 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1306 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1311 { 1312 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1313 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1314 const PetscScalar *x,*v; 1315 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1316 PetscErrorCode ierr; 1317 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1318 PetscInt n,i,jrow,j; 1319 1320 PetscFunctionBegin; 1321 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1322 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1323 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1324 idx = a->j; 1325 v = a->a; 1326 ii = a->i; 1327 1328 for (i=0; i<m; i++) { 1329 jrow = ii[i]; 1330 n = ii[i+1] - jrow; 1331 sum1 = 0.0; 1332 sum2 = 0.0; 1333 sum3 = 0.0; 1334 sum4 = 0.0; 1335 sum5 = 0.0; 1336 sum6 = 0.0; 1337 sum7 = 0.0; 1338 sum8 = 0.0; 1339 for (j=0; j<n; j++) { 1340 sum1 += v[jrow]*x[8*idx[jrow]]; 1341 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1342 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1343 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1344 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1345 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1346 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1347 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1348 jrow++; 1349 } 1350 y[8*i] += sum1; 1351 y[8*i+1] += sum2; 1352 y[8*i+2] += sum3; 1353 y[8*i+3] += sum4; 1354 y[8*i+4] += sum5; 1355 y[8*i+5] += sum6; 1356 y[8*i+6] += sum7; 1357 y[8*i+7] += sum8; 1358 } 1359 1360 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1361 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1362 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1367 { 1368 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1369 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1370 const PetscScalar *x,*v; 1371 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1372 PetscErrorCode ierr; 1373 const PetscInt m = b->AIJ->rmap->n,*idx; 1374 PetscInt n,i; 1375 1376 PetscFunctionBegin; 1377 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1378 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1379 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1380 for (i=0; i<m; i++) { 1381 idx = a->j + a->i[i]; 1382 v = a->a + a->i[i]; 1383 n = a->i[i+1] - a->i[i]; 1384 alpha1 = x[8*i]; 1385 alpha2 = x[8*i+1]; 1386 alpha3 = x[8*i+2]; 1387 alpha4 = x[8*i+3]; 1388 alpha5 = x[8*i+4]; 1389 alpha6 = x[8*i+5]; 1390 alpha7 = x[8*i+6]; 1391 alpha8 = x[8*i+7]; 1392 while (n-->0) { 1393 y[8*(*idx)] += alpha1*(*v); 1394 y[8*(*idx)+1] += alpha2*(*v); 1395 y[8*(*idx)+2] += alpha3*(*v); 1396 y[8*(*idx)+3] += alpha4*(*v); 1397 y[8*(*idx)+4] += alpha5*(*v); 1398 y[8*(*idx)+5] += alpha6*(*v); 1399 y[8*(*idx)+6] += alpha7*(*v); 1400 y[8*(*idx)+7] += alpha8*(*v); 1401 idx++; v++; 1402 } 1403 } 1404 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1405 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1406 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 1410 /* ------------------------------------------------------------------------------*/ 1411 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1412 { 1413 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1414 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1415 const PetscScalar *x,*v; 1416 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1417 PetscErrorCode ierr; 1418 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1419 PetscInt nonzerorow=0,n,i,jrow,j; 1420 1421 PetscFunctionBegin; 1422 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1423 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1424 idx = a->j; 1425 v = a->a; 1426 ii = a->i; 1427 1428 for (i=0; i<m; i++) { 1429 jrow = ii[i]; 1430 n = ii[i+1] - jrow; 1431 sum1 = 0.0; 1432 sum2 = 0.0; 1433 sum3 = 0.0; 1434 sum4 = 0.0; 1435 sum5 = 0.0; 1436 sum6 = 0.0; 1437 sum7 = 0.0; 1438 sum8 = 0.0; 1439 sum9 = 0.0; 1440 1441 nonzerorow += (n>0); 1442 for (j=0; j<n; j++) { 1443 sum1 += v[jrow]*x[9*idx[jrow]]; 1444 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1445 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1446 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1447 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1448 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1449 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1450 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1451 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1452 jrow++; 1453 } 1454 y[9*i] = sum1; 1455 y[9*i+1] = sum2; 1456 y[9*i+2] = sum3; 1457 y[9*i+3] = sum4; 1458 y[9*i+4] = sum5; 1459 y[9*i+5] = sum6; 1460 y[9*i+6] = sum7; 1461 y[9*i+7] = sum8; 1462 y[9*i+8] = sum9; 1463 } 1464 1465 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1466 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1467 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } 1470 1471 /* ------------------------------------------------------------------------------*/ 1472 1473 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1474 { 1475 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1476 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1477 const PetscScalar *x,*v; 1478 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1479 PetscErrorCode ierr; 1480 const PetscInt m = b->AIJ->rmap->n,*idx; 1481 PetscInt n,i; 1482 1483 PetscFunctionBegin; 1484 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1485 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1486 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1487 1488 for (i=0; i<m; i++) { 1489 idx = a->j + a->i[i]; 1490 v = a->a + a->i[i]; 1491 n = a->i[i+1] - a->i[i]; 1492 alpha1 = x[9*i]; 1493 alpha2 = x[9*i+1]; 1494 alpha3 = x[9*i+2]; 1495 alpha4 = x[9*i+3]; 1496 alpha5 = x[9*i+4]; 1497 alpha6 = x[9*i+5]; 1498 alpha7 = x[9*i+6]; 1499 alpha8 = x[9*i+7]; 1500 alpha9 = x[9*i+8]; 1501 while (n-->0) { 1502 y[9*(*idx)] += alpha1*(*v); 1503 y[9*(*idx)+1] += alpha2*(*v); 1504 y[9*(*idx)+2] += alpha3*(*v); 1505 y[9*(*idx)+3] += alpha4*(*v); 1506 y[9*(*idx)+4] += alpha5*(*v); 1507 y[9*(*idx)+5] += alpha6*(*v); 1508 y[9*(*idx)+6] += alpha7*(*v); 1509 y[9*(*idx)+7] += alpha8*(*v); 1510 y[9*(*idx)+8] += alpha9*(*v); 1511 idx++; v++; 1512 } 1513 } 1514 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1515 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1516 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1517 PetscFunctionReturn(0); 1518 } 1519 1520 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1521 { 1522 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1523 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1524 const PetscScalar *x,*v; 1525 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1526 PetscErrorCode ierr; 1527 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1528 PetscInt n,i,jrow,j; 1529 1530 PetscFunctionBegin; 1531 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1532 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1533 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1534 idx = a->j; 1535 v = a->a; 1536 ii = a->i; 1537 1538 for (i=0; i<m; i++) { 1539 jrow = ii[i]; 1540 n = ii[i+1] - jrow; 1541 sum1 = 0.0; 1542 sum2 = 0.0; 1543 sum3 = 0.0; 1544 sum4 = 0.0; 1545 sum5 = 0.0; 1546 sum6 = 0.0; 1547 sum7 = 0.0; 1548 sum8 = 0.0; 1549 sum9 = 0.0; 1550 for (j=0; j<n; j++) { 1551 sum1 += v[jrow]*x[9*idx[jrow]]; 1552 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1553 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1554 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1555 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1556 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1557 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1558 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1559 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1560 jrow++; 1561 } 1562 y[9*i] += sum1; 1563 y[9*i+1] += sum2; 1564 y[9*i+2] += sum3; 1565 y[9*i+3] += sum4; 1566 y[9*i+4] += sum5; 1567 y[9*i+5] += sum6; 1568 y[9*i+6] += sum7; 1569 y[9*i+7] += sum8; 1570 y[9*i+8] += sum9; 1571 } 1572 1573 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1574 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1575 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1580 { 1581 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1582 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1583 const PetscScalar *x,*v; 1584 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1585 PetscErrorCode ierr; 1586 const PetscInt m = b->AIJ->rmap->n,*idx; 1587 PetscInt n,i; 1588 1589 PetscFunctionBegin; 1590 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1591 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1592 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1593 for (i=0; i<m; i++) { 1594 idx = a->j + a->i[i]; 1595 v = a->a + a->i[i]; 1596 n = a->i[i+1] - a->i[i]; 1597 alpha1 = x[9*i]; 1598 alpha2 = x[9*i+1]; 1599 alpha3 = x[9*i+2]; 1600 alpha4 = x[9*i+3]; 1601 alpha5 = x[9*i+4]; 1602 alpha6 = x[9*i+5]; 1603 alpha7 = x[9*i+6]; 1604 alpha8 = x[9*i+7]; 1605 alpha9 = x[9*i+8]; 1606 while (n-->0) { 1607 y[9*(*idx)] += alpha1*(*v); 1608 y[9*(*idx)+1] += alpha2*(*v); 1609 y[9*(*idx)+2] += alpha3*(*v); 1610 y[9*(*idx)+3] += alpha4*(*v); 1611 y[9*(*idx)+4] += alpha5*(*v); 1612 y[9*(*idx)+5] += alpha6*(*v); 1613 y[9*(*idx)+6] += alpha7*(*v); 1614 y[9*(*idx)+7] += alpha8*(*v); 1615 y[9*(*idx)+8] += alpha9*(*v); 1616 idx++; v++; 1617 } 1618 } 1619 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1620 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1621 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1622 PetscFunctionReturn(0); 1623 } 1624 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1625 { 1626 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1627 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1628 const PetscScalar *x,*v; 1629 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1630 PetscErrorCode ierr; 1631 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1632 PetscInt nonzerorow=0,n,i,jrow,j; 1633 1634 PetscFunctionBegin; 1635 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1636 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1637 idx = a->j; 1638 v = a->a; 1639 ii = a->i; 1640 1641 for (i=0; i<m; i++) { 1642 jrow = ii[i]; 1643 n = ii[i+1] - jrow; 1644 sum1 = 0.0; 1645 sum2 = 0.0; 1646 sum3 = 0.0; 1647 sum4 = 0.0; 1648 sum5 = 0.0; 1649 sum6 = 0.0; 1650 sum7 = 0.0; 1651 sum8 = 0.0; 1652 sum9 = 0.0; 1653 sum10 = 0.0; 1654 1655 nonzerorow += (n>0); 1656 for (j=0; j<n; j++) { 1657 sum1 += v[jrow]*x[10*idx[jrow]]; 1658 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1659 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1660 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1661 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1662 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1663 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1664 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1665 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1666 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1667 jrow++; 1668 } 1669 y[10*i] = sum1; 1670 y[10*i+1] = sum2; 1671 y[10*i+2] = sum3; 1672 y[10*i+3] = sum4; 1673 y[10*i+4] = sum5; 1674 y[10*i+5] = sum6; 1675 y[10*i+6] = sum7; 1676 y[10*i+7] = sum8; 1677 y[10*i+8] = sum9; 1678 y[10*i+9] = sum10; 1679 } 1680 1681 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1682 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1683 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1684 PetscFunctionReturn(0); 1685 } 1686 1687 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1688 { 1689 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1690 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1691 const PetscScalar *x,*v; 1692 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1693 PetscErrorCode ierr; 1694 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1695 PetscInt n,i,jrow,j; 1696 1697 PetscFunctionBegin; 1698 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1699 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1700 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1701 idx = a->j; 1702 v = a->a; 1703 ii = a->i; 1704 1705 for (i=0; i<m; i++) { 1706 jrow = ii[i]; 1707 n = ii[i+1] - jrow; 1708 sum1 = 0.0; 1709 sum2 = 0.0; 1710 sum3 = 0.0; 1711 sum4 = 0.0; 1712 sum5 = 0.0; 1713 sum6 = 0.0; 1714 sum7 = 0.0; 1715 sum8 = 0.0; 1716 sum9 = 0.0; 1717 sum10 = 0.0; 1718 for (j=0; j<n; j++) { 1719 sum1 += v[jrow]*x[10*idx[jrow]]; 1720 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1721 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1722 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1723 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1724 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1725 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1726 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1727 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1728 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1729 jrow++; 1730 } 1731 y[10*i] += sum1; 1732 y[10*i+1] += sum2; 1733 y[10*i+2] += sum3; 1734 y[10*i+3] += sum4; 1735 y[10*i+4] += sum5; 1736 y[10*i+5] += sum6; 1737 y[10*i+6] += sum7; 1738 y[10*i+7] += sum8; 1739 y[10*i+8] += sum9; 1740 y[10*i+9] += sum10; 1741 } 1742 1743 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1744 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1745 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1746 PetscFunctionReturn(0); 1747 } 1748 1749 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1750 { 1751 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1752 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1753 const PetscScalar *x,*v; 1754 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1755 PetscErrorCode ierr; 1756 const PetscInt m = b->AIJ->rmap->n,*idx; 1757 PetscInt n,i; 1758 1759 PetscFunctionBegin; 1760 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1761 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1762 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1763 1764 for (i=0; i<m; i++) { 1765 idx = a->j + a->i[i]; 1766 v = a->a + a->i[i]; 1767 n = a->i[i+1] - a->i[i]; 1768 alpha1 = x[10*i]; 1769 alpha2 = x[10*i+1]; 1770 alpha3 = x[10*i+2]; 1771 alpha4 = x[10*i+3]; 1772 alpha5 = x[10*i+4]; 1773 alpha6 = x[10*i+5]; 1774 alpha7 = x[10*i+6]; 1775 alpha8 = x[10*i+7]; 1776 alpha9 = x[10*i+8]; 1777 alpha10 = x[10*i+9]; 1778 while (n-->0) { 1779 y[10*(*idx)] += alpha1*(*v); 1780 y[10*(*idx)+1] += alpha2*(*v); 1781 y[10*(*idx)+2] += alpha3*(*v); 1782 y[10*(*idx)+3] += alpha4*(*v); 1783 y[10*(*idx)+4] += alpha5*(*v); 1784 y[10*(*idx)+5] += alpha6*(*v); 1785 y[10*(*idx)+6] += alpha7*(*v); 1786 y[10*(*idx)+7] += alpha8*(*v); 1787 y[10*(*idx)+8] += alpha9*(*v); 1788 y[10*(*idx)+9] += alpha10*(*v); 1789 idx++; v++; 1790 } 1791 } 1792 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1793 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1794 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1795 PetscFunctionReturn(0); 1796 } 1797 1798 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1799 { 1800 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1801 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1802 const PetscScalar *x,*v; 1803 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1804 PetscErrorCode ierr; 1805 const PetscInt m = b->AIJ->rmap->n,*idx; 1806 PetscInt n,i; 1807 1808 PetscFunctionBegin; 1809 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1810 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1811 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1812 for (i=0; i<m; i++) { 1813 idx = a->j + a->i[i]; 1814 v = a->a + a->i[i]; 1815 n = a->i[i+1] - a->i[i]; 1816 alpha1 = x[10*i]; 1817 alpha2 = x[10*i+1]; 1818 alpha3 = x[10*i+2]; 1819 alpha4 = x[10*i+3]; 1820 alpha5 = x[10*i+4]; 1821 alpha6 = x[10*i+5]; 1822 alpha7 = x[10*i+6]; 1823 alpha8 = x[10*i+7]; 1824 alpha9 = x[10*i+8]; 1825 alpha10 = x[10*i+9]; 1826 while (n-->0) { 1827 y[10*(*idx)] += alpha1*(*v); 1828 y[10*(*idx)+1] += alpha2*(*v); 1829 y[10*(*idx)+2] += alpha3*(*v); 1830 y[10*(*idx)+3] += alpha4*(*v); 1831 y[10*(*idx)+4] += alpha5*(*v); 1832 y[10*(*idx)+5] += alpha6*(*v); 1833 y[10*(*idx)+6] += alpha7*(*v); 1834 y[10*(*idx)+7] += alpha8*(*v); 1835 y[10*(*idx)+8] += alpha9*(*v); 1836 y[10*(*idx)+9] += alpha10*(*v); 1837 idx++; v++; 1838 } 1839 } 1840 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1841 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1842 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1843 PetscFunctionReturn(0); 1844 } 1845 1846 1847 /*--------------------------------------------------------------------------------------------*/ 1848 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1849 { 1850 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1851 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1852 const PetscScalar *x,*v; 1853 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1854 PetscErrorCode ierr; 1855 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1856 PetscInt nonzerorow=0,n,i,jrow,j; 1857 1858 PetscFunctionBegin; 1859 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1860 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1861 idx = a->j; 1862 v = a->a; 1863 ii = a->i; 1864 1865 for (i=0; i<m; i++) { 1866 jrow = ii[i]; 1867 n = ii[i+1] - jrow; 1868 sum1 = 0.0; 1869 sum2 = 0.0; 1870 sum3 = 0.0; 1871 sum4 = 0.0; 1872 sum5 = 0.0; 1873 sum6 = 0.0; 1874 sum7 = 0.0; 1875 sum8 = 0.0; 1876 sum9 = 0.0; 1877 sum10 = 0.0; 1878 sum11 = 0.0; 1879 1880 nonzerorow += (n>0); 1881 for (j=0; j<n; j++) { 1882 sum1 += v[jrow]*x[11*idx[jrow]]; 1883 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1884 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1885 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1886 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1887 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1888 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1889 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1890 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1891 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1892 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1893 jrow++; 1894 } 1895 y[11*i] = sum1; 1896 y[11*i+1] = sum2; 1897 y[11*i+2] = sum3; 1898 y[11*i+3] = sum4; 1899 y[11*i+4] = sum5; 1900 y[11*i+5] = sum6; 1901 y[11*i+6] = sum7; 1902 y[11*i+7] = sum8; 1903 y[11*i+8] = sum9; 1904 y[11*i+9] = sum10; 1905 y[11*i+10] = sum11; 1906 } 1907 1908 ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1909 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1910 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1911 PetscFunctionReturn(0); 1912 } 1913 1914 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1915 { 1916 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1917 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1918 const PetscScalar *x,*v; 1919 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1920 PetscErrorCode ierr; 1921 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1922 PetscInt n,i,jrow,j; 1923 1924 PetscFunctionBegin; 1925 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1926 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1927 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1928 idx = a->j; 1929 v = a->a; 1930 ii = a->i; 1931 1932 for (i=0; i<m; i++) { 1933 jrow = ii[i]; 1934 n = ii[i+1] - jrow; 1935 sum1 = 0.0; 1936 sum2 = 0.0; 1937 sum3 = 0.0; 1938 sum4 = 0.0; 1939 sum5 = 0.0; 1940 sum6 = 0.0; 1941 sum7 = 0.0; 1942 sum8 = 0.0; 1943 sum9 = 0.0; 1944 sum10 = 0.0; 1945 sum11 = 0.0; 1946 for (j=0; j<n; j++) { 1947 sum1 += v[jrow]*x[11*idx[jrow]]; 1948 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1949 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1950 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1951 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1952 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1953 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1954 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1955 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1956 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1957 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1958 jrow++; 1959 } 1960 y[11*i] += sum1; 1961 y[11*i+1] += sum2; 1962 y[11*i+2] += sum3; 1963 y[11*i+3] += sum4; 1964 y[11*i+4] += sum5; 1965 y[11*i+5] += sum6; 1966 y[11*i+6] += sum7; 1967 y[11*i+7] += sum8; 1968 y[11*i+8] += sum9; 1969 y[11*i+9] += sum10; 1970 y[11*i+10] += sum11; 1971 } 1972 1973 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 1974 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1975 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1976 PetscFunctionReturn(0); 1977 } 1978 1979 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1980 { 1981 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1982 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1983 const PetscScalar *x,*v; 1984 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1985 PetscErrorCode ierr; 1986 const PetscInt m = b->AIJ->rmap->n,*idx; 1987 PetscInt n,i; 1988 1989 PetscFunctionBegin; 1990 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1991 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1992 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1993 1994 for (i=0; i<m; i++) { 1995 idx = a->j + a->i[i]; 1996 v = a->a + a->i[i]; 1997 n = a->i[i+1] - a->i[i]; 1998 alpha1 = x[11*i]; 1999 alpha2 = x[11*i+1]; 2000 alpha3 = x[11*i+2]; 2001 alpha4 = x[11*i+3]; 2002 alpha5 = x[11*i+4]; 2003 alpha6 = x[11*i+5]; 2004 alpha7 = x[11*i+6]; 2005 alpha8 = x[11*i+7]; 2006 alpha9 = x[11*i+8]; 2007 alpha10 = x[11*i+9]; 2008 alpha11 = x[11*i+10]; 2009 while (n-->0) { 2010 y[11*(*idx)] += alpha1*(*v); 2011 y[11*(*idx)+1] += alpha2*(*v); 2012 y[11*(*idx)+2] += alpha3*(*v); 2013 y[11*(*idx)+3] += alpha4*(*v); 2014 y[11*(*idx)+4] += alpha5*(*v); 2015 y[11*(*idx)+5] += alpha6*(*v); 2016 y[11*(*idx)+6] += alpha7*(*v); 2017 y[11*(*idx)+7] += alpha8*(*v); 2018 y[11*(*idx)+8] += alpha9*(*v); 2019 y[11*(*idx)+9] += alpha10*(*v); 2020 y[11*(*idx)+10] += alpha11*(*v); 2021 idx++; v++; 2022 } 2023 } 2024 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2025 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2026 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2027 PetscFunctionReturn(0); 2028 } 2029 2030 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2031 { 2032 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2033 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2034 const PetscScalar *x,*v; 2035 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2036 PetscErrorCode ierr; 2037 const PetscInt m = b->AIJ->rmap->n,*idx; 2038 PetscInt n,i; 2039 2040 PetscFunctionBegin; 2041 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2042 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2043 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2044 for (i=0; i<m; i++) { 2045 idx = a->j + a->i[i]; 2046 v = a->a + a->i[i]; 2047 n = a->i[i+1] - a->i[i]; 2048 alpha1 = x[11*i]; 2049 alpha2 = x[11*i+1]; 2050 alpha3 = x[11*i+2]; 2051 alpha4 = x[11*i+3]; 2052 alpha5 = x[11*i+4]; 2053 alpha6 = x[11*i+5]; 2054 alpha7 = x[11*i+6]; 2055 alpha8 = x[11*i+7]; 2056 alpha9 = x[11*i+8]; 2057 alpha10 = x[11*i+9]; 2058 alpha11 = x[11*i+10]; 2059 while (n-->0) { 2060 y[11*(*idx)] += alpha1*(*v); 2061 y[11*(*idx)+1] += alpha2*(*v); 2062 y[11*(*idx)+2] += alpha3*(*v); 2063 y[11*(*idx)+3] += alpha4*(*v); 2064 y[11*(*idx)+4] += alpha5*(*v); 2065 y[11*(*idx)+5] += alpha6*(*v); 2066 y[11*(*idx)+6] += alpha7*(*v); 2067 y[11*(*idx)+7] += alpha8*(*v); 2068 y[11*(*idx)+8] += alpha9*(*v); 2069 y[11*(*idx)+9] += alpha10*(*v); 2070 y[11*(*idx)+10] += alpha11*(*v); 2071 idx++; v++; 2072 } 2073 } 2074 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2075 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2076 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2077 PetscFunctionReturn(0); 2078 } 2079 2080 2081 /*--------------------------------------------------------------------------------------------*/ 2082 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2083 { 2084 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2085 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2086 const PetscScalar *x,*v; 2087 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2088 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2089 PetscErrorCode ierr; 2090 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2091 PetscInt nonzerorow=0,n,i,jrow,j; 2092 2093 PetscFunctionBegin; 2094 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2095 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2096 idx = a->j; 2097 v = a->a; 2098 ii = a->i; 2099 2100 for (i=0; i<m; i++) { 2101 jrow = ii[i]; 2102 n = ii[i+1] - jrow; 2103 sum1 = 0.0; 2104 sum2 = 0.0; 2105 sum3 = 0.0; 2106 sum4 = 0.0; 2107 sum5 = 0.0; 2108 sum6 = 0.0; 2109 sum7 = 0.0; 2110 sum8 = 0.0; 2111 sum9 = 0.0; 2112 sum10 = 0.0; 2113 sum11 = 0.0; 2114 sum12 = 0.0; 2115 sum13 = 0.0; 2116 sum14 = 0.0; 2117 sum15 = 0.0; 2118 sum16 = 0.0; 2119 2120 nonzerorow += (n>0); 2121 for (j=0; j<n; j++) { 2122 sum1 += v[jrow]*x[16*idx[jrow]]; 2123 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2124 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2125 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2126 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2127 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2128 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2129 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2130 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2131 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2132 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2133 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2134 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2135 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2136 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2137 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2138 jrow++; 2139 } 2140 y[16*i] = sum1; 2141 y[16*i+1] = sum2; 2142 y[16*i+2] = sum3; 2143 y[16*i+3] = sum4; 2144 y[16*i+4] = sum5; 2145 y[16*i+5] = sum6; 2146 y[16*i+6] = sum7; 2147 y[16*i+7] = sum8; 2148 y[16*i+8] = sum9; 2149 y[16*i+9] = sum10; 2150 y[16*i+10] = sum11; 2151 y[16*i+11] = sum12; 2152 y[16*i+12] = sum13; 2153 y[16*i+13] = sum14; 2154 y[16*i+14] = sum15; 2155 y[16*i+15] = sum16; 2156 } 2157 2158 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2159 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2160 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2161 PetscFunctionReturn(0); 2162 } 2163 2164 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2165 { 2166 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2167 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2168 const PetscScalar *x,*v; 2169 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2170 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2171 PetscErrorCode ierr; 2172 const PetscInt m = b->AIJ->rmap->n,*idx; 2173 PetscInt n,i; 2174 2175 PetscFunctionBegin; 2176 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2177 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2178 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2179 2180 for (i=0; i<m; i++) { 2181 idx = a->j + a->i[i]; 2182 v = a->a + a->i[i]; 2183 n = a->i[i+1] - a->i[i]; 2184 alpha1 = x[16*i]; 2185 alpha2 = x[16*i+1]; 2186 alpha3 = x[16*i+2]; 2187 alpha4 = x[16*i+3]; 2188 alpha5 = x[16*i+4]; 2189 alpha6 = x[16*i+5]; 2190 alpha7 = x[16*i+6]; 2191 alpha8 = x[16*i+7]; 2192 alpha9 = x[16*i+8]; 2193 alpha10 = x[16*i+9]; 2194 alpha11 = x[16*i+10]; 2195 alpha12 = x[16*i+11]; 2196 alpha13 = x[16*i+12]; 2197 alpha14 = x[16*i+13]; 2198 alpha15 = x[16*i+14]; 2199 alpha16 = x[16*i+15]; 2200 while (n-->0) { 2201 y[16*(*idx)] += alpha1*(*v); 2202 y[16*(*idx)+1] += alpha2*(*v); 2203 y[16*(*idx)+2] += alpha3*(*v); 2204 y[16*(*idx)+3] += alpha4*(*v); 2205 y[16*(*idx)+4] += alpha5*(*v); 2206 y[16*(*idx)+5] += alpha6*(*v); 2207 y[16*(*idx)+6] += alpha7*(*v); 2208 y[16*(*idx)+7] += alpha8*(*v); 2209 y[16*(*idx)+8] += alpha9*(*v); 2210 y[16*(*idx)+9] += alpha10*(*v); 2211 y[16*(*idx)+10] += alpha11*(*v); 2212 y[16*(*idx)+11] += alpha12*(*v); 2213 y[16*(*idx)+12] += alpha13*(*v); 2214 y[16*(*idx)+13] += alpha14*(*v); 2215 y[16*(*idx)+14] += alpha15*(*v); 2216 y[16*(*idx)+15] += alpha16*(*v); 2217 idx++; v++; 2218 } 2219 } 2220 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2221 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2222 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2223 PetscFunctionReturn(0); 2224 } 2225 2226 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2227 { 2228 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2229 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2230 const PetscScalar *x,*v; 2231 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2232 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2233 PetscErrorCode ierr; 2234 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2235 PetscInt n,i,jrow,j; 2236 2237 PetscFunctionBegin; 2238 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2239 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2240 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2241 idx = a->j; 2242 v = a->a; 2243 ii = a->i; 2244 2245 for (i=0; i<m; i++) { 2246 jrow = ii[i]; 2247 n = ii[i+1] - jrow; 2248 sum1 = 0.0; 2249 sum2 = 0.0; 2250 sum3 = 0.0; 2251 sum4 = 0.0; 2252 sum5 = 0.0; 2253 sum6 = 0.0; 2254 sum7 = 0.0; 2255 sum8 = 0.0; 2256 sum9 = 0.0; 2257 sum10 = 0.0; 2258 sum11 = 0.0; 2259 sum12 = 0.0; 2260 sum13 = 0.0; 2261 sum14 = 0.0; 2262 sum15 = 0.0; 2263 sum16 = 0.0; 2264 for (j=0; j<n; j++) { 2265 sum1 += v[jrow]*x[16*idx[jrow]]; 2266 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2267 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2268 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2269 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2270 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2271 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2272 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2273 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2274 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2275 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2276 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2277 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2278 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2279 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2280 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2281 jrow++; 2282 } 2283 y[16*i] += sum1; 2284 y[16*i+1] += sum2; 2285 y[16*i+2] += sum3; 2286 y[16*i+3] += sum4; 2287 y[16*i+4] += sum5; 2288 y[16*i+5] += sum6; 2289 y[16*i+6] += sum7; 2290 y[16*i+7] += sum8; 2291 y[16*i+8] += sum9; 2292 y[16*i+9] += sum10; 2293 y[16*i+10] += sum11; 2294 y[16*i+11] += sum12; 2295 y[16*i+12] += sum13; 2296 y[16*i+13] += sum14; 2297 y[16*i+14] += sum15; 2298 y[16*i+15] += sum16; 2299 } 2300 2301 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2302 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2303 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2308 { 2309 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2310 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2311 const PetscScalar *x,*v; 2312 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2313 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2314 PetscErrorCode ierr; 2315 const PetscInt m = b->AIJ->rmap->n,*idx; 2316 PetscInt n,i; 2317 2318 PetscFunctionBegin; 2319 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2320 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2321 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2322 for (i=0; i<m; i++) { 2323 idx = a->j + a->i[i]; 2324 v = a->a + a->i[i]; 2325 n = a->i[i+1] - a->i[i]; 2326 alpha1 = x[16*i]; 2327 alpha2 = x[16*i+1]; 2328 alpha3 = x[16*i+2]; 2329 alpha4 = x[16*i+3]; 2330 alpha5 = x[16*i+4]; 2331 alpha6 = x[16*i+5]; 2332 alpha7 = x[16*i+6]; 2333 alpha8 = x[16*i+7]; 2334 alpha9 = x[16*i+8]; 2335 alpha10 = x[16*i+9]; 2336 alpha11 = x[16*i+10]; 2337 alpha12 = x[16*i+11]; 2338 alpha13 = x[16*i+12]; 2339 alpha14 = x[16*i+13]; 2340 alpha15 = x[16*i+14]; 2341 alpha16 = x[16*i+15]; 2342 while (n-->0) { 2343 y[16*(*idx)] += alpha1*(*v); 2344 y[16*(*idx)+1] += alpha2*(*v); 2345 y[16*(*idx)+2] += alpha3*(*v); 2346 y[16*(*idx)+3] += alpha4*(*v); 2347 y[16*(*idx)+4] += alpha5*(*v); 2348 y[16*(*idx)+5] += alpha6*(*v); 2349 y[16*(*idx)+6] += alpha7*(*v); 2350 y[16*(*idx)+7] += alpha8*(*v); 2351 y[16*(*idx)+8] += alpha9*(*v); 2352 y[16*(*idx)+9] += alpha10*(*v); 2353 y[16*(*idx)+10] += alpha11*(*v); 2354 y[16*(*idx)+11] += alpha12*(*v); 2355 y[16*(*idx)+12] += alpha13*(*v); 2356 y[16*(*idx)+13] += alpha14*(*v); 2357 y[16*(*idx)+14] += alpha15*(*v); 2358 y[16*(*idx)+15] += alpha16*(*v); 2359 idx++; v++; 2360 } 2361 } 2362 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2363 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2364 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2365 PetscFunctionReturn(0); 2366 } 2367 2368 /*--------------------------------------------------------------------------------------------*/ 2369 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2370 { 2371 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2372 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2373 const PetscScalar *x,*v; 2374 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2375 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2376 PetscErrorCode ierr; 2377 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2378 PetscInt nonzerorow=0,n,i,jrow,j; 2379 2380 PetscFunctionBegin; 2381 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2382 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2383 idx = a->j; 2384 v = a->a; 2385 ii = a->i; 2386 2387 for (i=0; i<m; i++) { 2388 jrow = ii[i]; 2389 n = ii[i+1] - jrow; 2390 sum1 = 0.0; 2391 sum2 = 0.0; 2392 sum3 = 0.0; 2393 sum4 = 0.0; 2394 sum5 = 0.0; 2395 sum6 = 0.0; 2396 sum7 = 0.0; 2397 sum8 = 0.0; 2398 sum9 = 0.0; 2399 sum10 = 0.0; 2400 sum11 = 0.0; 2401 sum12 = 0.0; 2402 sum13 = 0.0; 2403 sum14 = 0.0; 2404 sum15 = 0.0; 2405 sum16 = 0.0; 2406 sum17 = 0.0; 2407 sum18 = 0.0; 2408 2409 nonzerorow += (n>0); 2410 for (j=0; j<n; j++) { 2411 sum1 += v[jrow]*x[18*idx[jrow]]; 2412 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2413 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2414 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2415 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2416 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2417 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2418 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2419 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2420 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2421 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2422 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2423 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2424 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2425 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2426 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2427 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2428 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2429 jrow++; 2430 } 2431 y[18*i] = sum1; 2432 y[18*i+1] = sum2; 2433 y[18*i+2] = sum3; 2434 y[18*i+3] = sum4; 2435 y[18*i+4] = sum5; 2436 y[18*i+5] = sum6; 2437 y[18*i+6] = sum7; 2438 y[18*i+7] = sum8; 2439 y[18*i+8] = sum9; 2440 y[18*i+9] = sum10; 2441 y[18*i+10] = sum11; 2442 y[18*i+11] = sum12; 2443 y[18*i+12] = sum13; 2444 y[18*i+13] = sum14; 2445 y[18*i+14] = sum15; 2446 y[18*i+15] = sum16; 2447 y[18*i+16] = sum17; 2448 y[18*i+17] = sum18; 2449 } 2450 2451 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2452 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2453 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2454 PetscFunctionReturn(0); 2455 } 2456 2457 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2458 { 2459 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2460 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2461 const PetscScalar *x,*v; 2462 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2463 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2464 PetscErrorCode ierr; 2465 const PetscInt m = b->AIJ->rmap->n,*idx; 2466 PetscInt n,i; 2467 2468 PetscFunctionBegin; 2469 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2470 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2471 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2472 2473 for (i=0; i<m; i++) { 2474 idx = a->j + a->i[i]; 2475 v = a->a + a->i[i]; 2476 n = a->i[i+1] - a->i[i]; 2477 alpha1 = x[18*i]; 2478 alpha2 = x[18*i+1]; 2479 alpha3 = x[18*i+2]; 2480 alpha4 = x[18*i+3]; 2481 alpha5 = x[18*i+4]; 2482 alpha6 = x[18*i+5]; 2483 alpha7 = x[18*i+6]; 2484 alpha8 = x[18*i+7]; 2485 alpha9 = x[18*i+8]; 2486 alpha10 = x[18*i+9]; 2487 alpha11 = x[18*i+10]; 2488 alpha12 = x[18*i+11]; 2489 alpha13 = x[18*i+12]; 2490 alpha14 = x[18*i+13]; 2491 alpha15 = x[18*i+14]; 2492 alpha16 = x[18*i+15]; 2493 alpha17 = x[18*i+16]; 2494 alpha18 = x[18*i+17]; 2495 while (n-->0) { 2496 y[18*(*idx)] += alpha1*(*v); 2497 y[18*(*idx)+1] += alpha2*(*v); 2498 y[18*(*idx)+2] += alpha3*(*v); 2499 y[18*(*idx)+3] += alpha4*(*v); 2500 y[18*(*idx)+4] += alpha5*(*v); 2501 y[18*(*idx)+5] += alpha6*(*v); 2502 y[18*(*idx)+6] += alpha7*(*v); 2503 y[18*(*idx)+7] += alpha8*(*v); 2504 y[18*(*idx)+8] += alpha9*(*v); 2505 y[18*(*idx)+9] += alpha10*(*v); 2506 y[18*(*idx)+10] += alpha11*(*v); 2507 y[18*(*idx)+11] += alpha12*(*v); 2508 y[18*(*idx)+12] += alpha13*(*v); 2509 y[18*(*idx)+13] += alpha14*(*v); 2510 y[18*(*idx)+14] += alpha15*(*v); 2511 y[18*(*idx)+15] += alpha16*(*v); 2512 y[18*(*idx)+16] += alpha17*(*v); 2513 y[18*(*idx)+17] += alpha18*(*v); 2514 idx++; v++; 2515 } 2516 } 2517 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2518 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2519 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2524 { 2525 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2527 const PetscScalar *x,*v; 2528 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2529 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2530 PetscErrorCode ierr; 2531 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2532 PetscInt n,i,jrow,j; 2533 2534 PetscFunctionBegin; 2535 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2536 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2537 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2538 idx = a->j; 2539 v = a->a; 2540 ii = a->i; 2541 2542 for (i=0; i<m; i++) { 2543 jrow = ii[i]; 2544 n = ii[i+1] - jrow; 2545 sum1 = 0.0; 2546 sum2 = 0.0; 2547 sum3 = 0.0; 2548 sum4 = 0.0; 2549 sum5 = 0.0; 2550 sum6 = 0.0; 2551 sum7 = 0.0; 2552 sum8 = 0.0; 2553 sum9 = 0.0; 2554 sum10 = 0.0; 2555 sum11 = 0.0; 2556 sum12 = 0.0; 2557 sum13 = 0.0; 2558 sum14 = 0.0; 2559 sum15 = 0.0; 2560 sum16 = 0.0; 2561 sum17 = 0.0; 2562 sum18 = 0.0; 2563 for (j=0; j<n; j++) { 2564 sum1 += v[jrow]*x[18*idx[jrow]]; 2565 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2566 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2567 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2568 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2569 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2570 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2571 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2572 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2573 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2574 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2575 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2576 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2577 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2578 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2579 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2580 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2581 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2582 jrow++; 2583 } 2584 y[18*i] += sum1; 2585 y[18*i+1] += sum2; 2586 y[18*i+2] += sum3; 2587 y[18*i+3] += sum4; 2588 y[18*i+4] += sum5; 2589 y[18*i+5] += sum6; 2590 y[18*i+6] += sum7; 2591 y[18*i+7] += sum8; 2592 y[18*i+8] += sum9; 2593 y[18*i+9] += sum10; 2594 y[18*i+10] += sum11; 2595 y[18*i+11] += sum12; 2596 y[18*i+12] += sum13; 2597 y[18*i+13] += sum14; 2598 y[18*i+14] += sum15; 2599 y[18*i+15] += sum16; 2600 y[18*i+16] += sum17; 2601 y[18*i+17] += sum18; 2602 } 2603 2604 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2605 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2606 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2607 PetscFunctionReturn(0); 2608 } 2609 2610 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2611 { 2612 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2613 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2614 const PetscScalar *x,*v; 2615 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2616 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2617 PetscErrorCode ierr; 2618 const PetscInt m = b->AIJ->rmap->n,*idx; 2619 PetscInt n,i; 2620 2621 PetscFunctionBegin; 2622 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2623 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2624 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2625 for (i=0; i<m; i++) { 2626 idx = a->j + a->i[i]; 2627 v = a->a + a->i[i]; 2628 n = a->i[i+1] - a->i[i]; 2629 alpha1 = x[18*i]; 2630 alpha2 = x[18*i+1]; 2631 alpha3 = x[18*i+2]; 2632 alpha4 = x[18*i+3]; 2633 alpha5 = x[18*i+4]; 2634 alpha6 = x[18*i+5]; 2635 alpha7 = x[18*i+6]; 2636 alpha8 = x[18*i+7]; 2637 alpha9 = x[18*i+8]; 2638 alpha10 = x[18*i+9]; 2639 alpha11 = x[18*i+10]; 2640 alpha12 = x[18*i+11]; 2641 alpha13 = x[18*i+12]; 2642 alpha14 = x[18*i+13]; 2643 alpha15 = x[18*i+14]; 2644 alpha16 = x[18*i+15]; 2645 alpha17 = x[18*i+16]; 2646 alpha18 = x[18*i+17]; 2647 while (n-->0) { 2648 y[18*(*idx)] += alpha1*(*v); 2649 y[18*(*idx)+1] += alpha2*(*v); 2650 y[18*(*idx)+2] += alpha3*(*v); 2651 y[18*(*idx)+3] += alpha4*(*v); 2652 y[18*(*idx)+4] += alpha5*(*v); 2653 y[18*(*idx)+5] += alpha6*(*v); 2654 y[18*(*idx)+6] += alpha7*(*v); 2655 y[18*(*idx)+7] += alpha8*(*v); 2656 y[18*(*idx)+8] += alpha9*(*v); 2657 y[18*(*idx)+9] += alpha10*(*v); 2658 y[18*(*idx)+10] += alpha11*(*v); 2659 y[18*(*idx)+11] += alpha12*(*v); 2660 y[18*(*idx)+12] += alpha13*(*v); 2661 y[18*(*idx)+13] += alpha14*(*v); 2662 y[18*(*idx)+14] += alpha15*(*v); 2663 y[18*(*idx)+15] += alpha16*(*v); 2664 y[18*(*idx)+16] += alpha17*(*v); 2665 y[18*(*idx)+17] += alpha18*(*v); 2666 idx++; v++; 2667 } 2668 } 2669 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2670 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2671 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2672 PetscFunctionReturn(0); 2673 } 2674 2675 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2676 { 2677 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2678 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2679 const PetscScalar *x,*v; 2680 PetscScalar *y,*sums; 2681 PetscErrorCode ierr; 2682 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2683 PetscInt n,i,jrow,j,dof = b->dof,k; 2684 2685 PetscFunctionBegin; 2686 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2687 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2688 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2689 idx = a->j; 2690 v = a->a; 2691 ii = a->i; 2692 2693 for (i=0; i<m; i++) { 2694 jrow = ii[i]; 2695 n = ii[i+1] - jrow; 2696 sums = y + dof*i; 2697 for (j=0; j<n; j++) { 2698 for (k=0; k<dof; k++) { 2699 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2700 } 2701 jrow++; 2702 } 2703 } 2704 2705 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2706 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2707 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2708 PetscFunctionReturn(0); 2709 } 2710 2711 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2712 { 2713 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2714 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2715 const PetscScalar *x,*v; 2716 PetscScalar *y,*sums; 2717 PetscErrorCode ierr; 2718 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2719 PetscInt n,i,jrow,j,dof = b->dof,k; 2720 2721 PetscFunctionBegin; 2722 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2723 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2724 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2725 idx = a->j; 2726 v = a->a; 2727 ii = a->i; 2728 2729 for (i=0; i<m; i++) { 2730 jrow = ii[i]; 2731 n = ii[i+1] - jrow; 2732 sums = y + dof*i; 2733 for (j=0; j<n; j++) { 2734 for (k=0; k<dof; k++) { 2735 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2736 } 2737 jrow++; 2738 } 2739 } 2740 2741 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2742 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2743 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2744 PetscFunctionReturn(0); 2745 } 2746 2747 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2748 { 2749 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2750 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2751 const PetscScalar *x,*v,*alpha; 2752 PetscScalar *y; 2753 PetscErrorCode ierr; 2754 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2755 PetscInt n,i,k; 2756 2757 PetscFunctionBegin; 2758 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2759 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2760 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2761 for (i=0; i<m; i++) { 2762 idx = a->j + a->i[i]; 2763 v = a->a + a->i[i]; 2764 n = a->i[i+1] - a->i[i]; 2765 alpha = x + dof*i; 2766 while (n-->0) { 2767 for (k=0; k<dof; k++) { 2768 y[dof*(*idx)+k] += alpha[k]*(*v); 2769 } 2770 idx++; v++; 2771 } 2772 } 2773 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2774 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2775 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2776 PetscFunctionReturn(0); 2777 } 2778 2779 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2780 { 2781 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2782 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2783 const PetscScalar *x,*v,*alpha; 2784 PetscScalar *y; 2785 PetscErrorCode ierr; 2786 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2787 PetscInt n,i,k; 2788 2789 PetscFunctionBegin; 2790 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2791 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2792 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2793 for (i=0; i<m; i++) { 2794 idx = a->j + a->i[i]; 2795 v = a->a + a->i[i]; 2796 n = a->i[i+1] - a->i[i]; 2797 alpha = x + dof*i; 2798 while (n-->0) { 2799 for (k=0; k<dof; k++) { 2800 y[dof*(*idx)+k] += alpha[k]*(*v); 2801 } 2802 idx++; v++; 2803 } 2804 } 2805 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2806 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2807 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2808 PetscFunctionReturn(0); 2809 } 2810 2811 /*===================================================================================*/ 2812 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2813 { 2814 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2815 PetscErrorCode ierr; 2816 2817 PetscFunctionBegin; 2818 /* start the scatter */ 2819 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2820 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2821 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2822 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2823 PetscFunctionReturn(0); 2824 } 2825 2826 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2827 { 2828 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2829 PetscErrorCode ierr; 2830 2831 PetscFunctionBegin; 2832 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2833 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2834 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2835 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2836 PetscFunctionReturn(0); 2837 } 2838 2839 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2840 { 2841 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2842 PetscErrorCode ierr; 2843 2844 PetscFunctionBegin; 2845 /* start the scatter */ 2846 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2847 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2848 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2849 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2850 PetscFunctionReturn(0); 2851 } 2852 2853 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2854 { 2855 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2856 PetscErrorCode ierr; 2857 2858 PetscFunctionBegin; 2859 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2860 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2861 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2862 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2863 PetscFunctionReturn(0); 2864 } 2865 2866 /* ----------------------------------------------------------------*/ 2867 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2868 { 2869 PetscErrorCode ierr; 2870 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2871 Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 2872 Mat P =pp->AIJ; 2873 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2874 PetscInt *pti,*ptj,*ptJ; 2875 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2876 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2877 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 2878 MatScalar *ca; 2879 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 2880 2881 PetscFunctionBegin; 2882 /* Get ij structure of P^T */ 2883 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2884 2885 cn = pn*ppdof; 2886 /* Allocate ci array, arrays for fill computation and */ 2887 /* free space for accumulating nonzero column info */ 2888 ierr = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr); 2889 ci[0] = 0; 2890 2891 /* Work arrays for rows of P^T*A */ 2892 ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr); 2893 ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 2894 ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr); 2895 2896 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2897 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2898 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2899 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr); 2900 current_space = free_space; 2901 2902 /* Determine symbolic info for each row of C: */ 2903 for (i=0; i<pn; i++) { 2904 ptnzi = pti[i+1] - pti[i]; 2905 ptJ = ptj + pti[i]; 2906 for (dof=0; dof<ppdof; dof++) { 2907 ptanzi = 0; 2908 /* Determine symbolic row of PtA: */ 2909 for (j=0; j<ptnzi; j++) { 2910 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2911 arow = ptJ[j]*ppdof + dof; 2912 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2913 anzj = ai[arow+1] - ai[arow]; 2914 ajj = aj + ai[arow]; 2915 for (k=0; k<anzj; k++) { 2916 if (!ptadenserow[ajj[k]]) { 2917 ptadenserow[ajj[k]] = -1; 2918 ptasparserow[ptanzi++] = ajj[k]; 2919 } 2920 } 2921 } 2922 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2923 ptaj = ptasparserow; 2924 cnzi = 0; 2925 for (j=0; j<ptanzi; j++) { 2926 /* Get offset within block of P */ 2927 pshift = *ptaj%ppdof; 2928 /* Get block row of P */ 2929 prow = (*ptaj++)/ppdof; /* integer division */ 2930 /* P has same number of nonzeros per row as the compressed form */ 2931 pnzj = pi[prow+1] - pi[prow]; 2932 pjj = pj + pi[prow]; 2933 for (k=0;k<pnzj;k++) { 2934 /* Locations in C are shifted by the offset within the block */ 2935 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2936 if (!denserow[pjj[k]*ppdof+pshift]) { 2937 denserow[pjj[k]*ppdof+pshift] = -1; 2938 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 2939 } 2940 } 2941 } 2942 2943 /* sort sparserow */ 2944 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 2945 2946 /* If free space is not available, make more free space */ 2947 /* Double the amount of total space in the list */ 2948 if (current_space->local_remaining<cnzi) { 2949 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 2950 } 2951 2952 /* Copy data into free space, and zero out denserows */ 2953 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 2954 2955 current_space->array += cnzi; 2956 current_space->local_used += cnzi; 2957 current_space->local_remaining -= cnzi; 2958 2959 for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 2960 for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 2961 2962 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2963 /* For now, we will recompute what is needed. */ 2964 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 2965 } 2966 } 2967 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2968 /* Allocate space for cj, initialize cj, and */ 2969 /* destroy list of free space and other temporary array(s) */ 2970 ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr); 2971 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2972 ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 2973 2974 /* Allocate space for ca */ 2975 ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr); 2976 2977 /* put together the new matrix */ 2978 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 2979 ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr); 2980 2981 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2982 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2983 c = (Mat_SeqAIJ*)((*C)->data); 2984 c->free_a = PETSC_TRUE; 2985 c->free_ij = PETSC_TRUE; 2986 c->nonew = 0; 2987 2988 (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 2989 2990 /* Clean up. */ 2991 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2992 PetscFunctionReturn(0); 2993 } 2994 2995 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 2996 { 2997 /* This routine requires testing -- first draft only */ 2998 PetscErrorCode ierr; 2999 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 3000 Mat P =pp->AIJ; 3001 Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 3002 Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 3003 Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 3004 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3005 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3006 const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3007 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3008 const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3009 MatScalar *ca=c->a,*caj,*apa; 3010 3011 PetscFunctionBegin; 3012 /* Allocate temporary array for storage of one row of A*P */ 3013 ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr); 3014 3015 /* Clear old values in C */ 3016 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 3017 3018 for (i=0; i<am; i++) { 3019 /* Form sparse row of A*P */ 3020 anzi = ai[i+1] - ai[i]; 3021 apnzj = 0; 3022 for (j=0; j<anzi; j++) { 3023 /* Get offset within block of P */ 3024 pshift = *aj%ppdof; 3025 /* Get block row of P */ 3026 prow = *aj++/ppdof; /* integer division */ 3027 pnzj = pi[prow+1] - pi[prow]; 3028 pjj = pj + pi[prow]; 3029 paj = pa + pi[prow]; 3030 for (k=0; k<pnzj; k++) { 3031 poffset = pjj[k]*ppdof+pshift; 3032 if (!apjdense[poffset]) { 3033 apjdense[poffset] = -1; 3034 apj[apnzj++] = poffset; 3035 } 3036 apa[poffset] += (*aa)*paj[k]; 3037 } 3038 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 3039 aa++; 3040 } 3041 3042 /* Sort the j index array for quick sparse axpy. */ 3043 /* Note: a array does not need sorting as it is in dense storage locations. */ 3044 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 3045 3046 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3047 prow = i/ppdof; /* integer division */ 3048 pshift = i%ppdof; 3049 poffset = pi[prow]; 3050 pnzi = pi[prow+1] - poffset; 3051 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3052 pJ = pj+poffset; 3053 pA = pa+poffset; 3054 for (j=0; j<pnzi; j++) { 3055 crow = (*pJ)*ppdof+pshift; 3056 cjj = cj + ci[crow]; 3057 caj = ca + ci[crow]; 3058 pJ++; 3059 /* Perform sparse axpy operation. Note cjj includes apj. */ 3060 for (k=0,nextap=0; nextap<apnzj; k++) { 3061 if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 3062 } 3063 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 3064 pA++; 3065 } 3066 3067 /* Zero the current row info for A*P */ 3068 for (j=0; j<apnzj; j++) { 3069 apa[apj[j]] = 0.; 3070 apjdense[apj[j]] = 0; 3071 } 3072 } 3073 3074 /* Assemble the final matrix and clean up */ 3075 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3076 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3077 ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 3078 PetscFunctionReturn(0); 3079 } 3080 3081 PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3082 { 3083 PetscErrorCode ierr; 3084 3085 PetscFunctionBegin; 3086 if (scall == MAT_INITIAL_MATRIX) { 3087 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3088 ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr); 3089 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3090 } 3091 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3092 ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr); 3093 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3094 PetscFunctionReturn(0); 3095 } 3096 3097 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3098 { 3099 PetscErrorCode ierr; 3100 3101 PetscFunctionBegin; 3102 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 3103 ierr = MatConvert(PP,MATMPIAIJ,MAT_INPLACE_MATRIX,&PP);CHKERRQ(ierr); 3104 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 3105 PetscFunctionReturn(0); 3106 } 3107 3108 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3109 { 3110 PetscFunctionBegin; 3111 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3112 PetscFunctionReturn(0); 3113 } 3114 3115 PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3116 { 3117 PetscErrorCode ierr; 3118 3119 PetscFunctionBegin; 3120 if (scall == MAT_INITIAL_MATRIX) { 3121 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3122 ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr); 3123 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3124 } 3125 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3126 ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr); 3127 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3128 PetscFunctionReturn(0); 3129 } 3130 3131 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3132 { 3133 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3134 Mat a = b->AIJ,B; 3135 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3136 PetscErrorCode ierr; 3137 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3138 PetscInt *cols; 3139 PetscScalar *vals; 3140 3141 PetscFunctionBegin; 3142 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3143 ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr); 3144 for (i=0; i<m; i++) { 3145 nmax = PetscMax(nmax,aij->ilen[i]); 3146 for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 3147 } 3148 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 3149 ierr = PetscFree(ilen);CHKERRQ(ierr); 3150 ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr); 3151 ii = 0; 3152 for (i=0; i<m; i++) { 3153 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3154 for (j=0; j<dof; j++) { 3155 for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 3156 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3157 ii++; 3158 } 3159 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3160 } 3161 ierr = PetscFree(icols);CHKERRQ(ierr); 3162 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3163 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3164 3165 if (reuse == MAT_INPLACE_MATRIX) { 3166 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3167 } else { 3168 *newmat = B; 3169 } 3170 PetscFunctionReturn(0); 3171 } 3172 3173 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3174 3175 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3176 { 3177 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3178 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3179 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3180 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3181 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3182 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3183 PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 3184 PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 3185 PetscInt rstart,cstart,*garray,ii,k; 3186 PetscErrorCode ierr; 3187 PetscScalar *vals,*ovals; 3188 3189 PetscFunctionBegin; 3190 ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr); 3191 for (i=0; i<A->rmap->n/dof; i++) { 3192 nmax = PetscMax(nmax,AIJ->ilen[i]); 3193 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3194 for (j=0; j<dof; j++) { 3195 dnz[dof*i+j] = AIJ->ilen[i]; 3196 onz[dof*i+j] = OAIJ->ilen[i]; 3197 } 3198 } 3199 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3200 ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr); 3201 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3202 3203 ierr = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr); 3204 rstart = dof*maij->A->rmap->rstart; 3205 cstart = dof*maij->A->cmap->rstart; 3206 garray = mpiaij->garray; 3207 3208 ii = rstart; 3209 for (i=0; i<A->rmap->n/dof; i++) { 3210 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3211 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3212 for (j=0; j<dof; j++) { 3213 for (k=0; k<ncols; k++) { 3214 icols[k] = cstart + dof*cols[k]+j; 3215 } 3216 for (k=0; k<oncols; k++) { 3217 oicols[k] = dof*garray[ocols[k]]+j; 3218 } 3219 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3220 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3221 ii++; 3222 } 3223 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3224 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3225 } 3226 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3227 3228 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3229 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3230 3231 if (reuse == MAT_INPLACE_MATRIX) { 3232 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3233 ((PetscObject)A)->refct = 1; 3234 3235 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3236 3237 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3238 } else { 3239 *newmat = B; 3240 } 3241 PetscFunctionReturn(0); 3242 } 3243 3244 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 3245 { 3246 PetscErrorCode ierr; 3247 Mat A; 3248 3249 PetscFunctionBegin; 3250 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3251 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 3252 ierr = MatDestroy(&A);CHKERRQ(ierr); 3253 PetscFunctionReturn(0); 3254 } 3255 3256 PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[]) 3257 { 3258 PetscErrorCode ierr; 3259 Mat A; 3260 3261 PetscFunctionBegin; 3262 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3263 ierr = MatCreateSubMatrices(A,n,irow,icol,scall,submat);CHKERRQ(ierr); 3264 ierr = MatDestroy(&A);CHKERRQ(ierr); 3265 PetscFunctionReturn(0); 3266 } 3267 3268 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 3269 3270 3271 /* ---------------------------------------------------------------------------------- */ 3272 /*@ 3273 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3274 operations for multicomponent problems. It interpolates each component the same 3275 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3276 and MATMPIAIJ for distributed matrices. 3277 3278 Collective 3279 3280 Input Parameters: 3281 + A - the AIJ matrix describing the action on blocks 3282 - dof - the block size (number of components per node) 3283 3284 Output Parameter: 3285 . maij - the new MAIJ matrix 3286 3287 Operations provided: 3288 + MatMult 3289 . MatMultTranspose 3290 . MatMultAdd 3291 . MatMultTransposeAdd 3292 - MatView 3293 3294 Level: advanced 3295 3296 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3297 @*/ 3298 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3299 { 3300 PetscErrorCode ierr; 3301 PetscMPIInt size; 3302 PetscInt n; 3303 Mat B; 3304 3305 PetscFunctionBegin; 3306 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3307 3308 if (dof == 1) *maij = A; 3309 else { 3310 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3311 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3312 ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3313 ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3314 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3315 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3316 3317 B->assembled = PETSC_TRUE; 3318 3319 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3320 if (size == 1) { 3321 Mat_SeqMAIJ *b; 3322 3323 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3324 3325 B->ops->setup = NULL; 3326 B->ops->destroy = MatDestroy_SeqMAIJ; 3327 B->ops->view = MatView_SeqMAIJ; 3328 b = (Mat_SeqMAIJ*)B->data; 3329 b->dof = dof; 3330 b->AIJ = A; 3331 3332 if (dof == 2) { 3333 B->ops->mult = MatMult_SeqMAIJ_2; 3334 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3335 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3336 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3337 } else if (dof == 3) { 3338 B->ops->mult = MatMult_SeqMAIJ_3; 3339 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3340 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3341 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3342 } else if (dof == 4) { 3343 B->ops->mult = MatMult_SeqMAIJ_4; 3344 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3345 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3346 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3347 } else if (dof == 5) { 3348 B->ops->mult = MatMult_SeqMAIJ_5; 3349 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3350 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3351 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3352 } else if (dof == 6) { 3353 B->ops->mult = MatMult_SeqMAIJ_6; 3354 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3355 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3356 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3357 } else if (dof == 7) { 3358 B->ops->mult = MatMult_SeqMAIJ_7; 3359 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3360 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3361 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3362 } else if (dof == 8) { 3363 B->ops->mult = MatMult_SeqMAIJ_8; 3364 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3365 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3366 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3367 } else if (dof == 9) { 3368 B->ops->mult = MatMult_SeqMAIJ_9; 3369 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3370 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3371 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3372 } else if (dof == 10) { 3373 B->ops->mult = MatMult_SeqMAIJ_10; 3374 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3375 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3376 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3377 } else if (dof == 11) { 3378 B->ops->mult = MatMult_SeqMAIJ_11; 3379 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3380 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3381 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3382 } else if (dof == 16) { 3383 B->ops->mult = MatMult_SeqMAIJ_16; 3384 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3385 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3386 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3387 } else if (dof == 18) { 3388 B->ops->mult = MatMult_SeqMAIJ_18; 3389 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3390 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3391 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3392 } else { 3393 B->ops->mult = MatMult_SeqMAIJ_N; 3394 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3395 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3396 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3397 } 3398 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3399 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3400 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijperm_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3401 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijmkl_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3402 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqmaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 3403 } else { 3404 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3405 Mat_MPIMAIJ *b; 3406 IS from,to; 3407 Vec gvec; 3408 3409 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3410 3411 B->ops->setup = NULL; 3412 B->ops->destroy = MatDestroy_MPIMAIJ; 3413 B->ops->view = MatView_MPIMAIJ; 3414 3415 b = (Mat_MPIMAIJ*)B->data; 3416 b->dof = dof; 3417 b->A = A; 3418 3419 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 3420 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 3421 3422 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3423 ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3424 ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 3425 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3426 ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3427 3428 /* create two temporary Index sets for build scatter gather */ 3429 ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3430 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3431 3432 /* create temporary global vector to generate scatter context */ 3433 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 3434 3435 /* generate the scatter context */ 3436 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3437 3438 ierr = ISDestroy(&from);CHKERRQ(ierr); 3439 ierr = ISDestroy(&to);CHKERRQ(ierr); 3440 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3441 3442 B->ops->mult = MatMult_MPIMAIJ_dof; 3443 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3444 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3445 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3446 3447 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3448 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr); 3449 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpimaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 3450 } 3451 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3452 B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 3453 ierr = MatSetUp(B);CHKERRQ(ierr); 3454 *maij = B; 3455 ierr = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr); 3456 } 3457 PetscFunctionReturn(0); 3458 } 3459