1 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20 #include <../src/mat/utils/freespace.h> 21 22 /*@ 23 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 24 25 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 26 27 Input Parameter: 28 . A - the MAIJ matrix 29 30 Output Parameter: 31 . B - the AIJ matrix 32 33 Level: advanced 34 35 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 36 37 .seealso: MatCreateMAIJ() 38 @*/ 39 PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B) 40 { 41 PetscErrorCode ierr; 42 PetscBool ismpimaij,isseqmaij; 43 44 PetscFunctionBegin; 45 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 46 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 47 if (ismpimaij) { 48 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 49 50 *B = b->A; 51 } else if (isseqmaij) { 52 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 53 54 *B = b->AIJ; 55 } else { 56 *B = A; 57 } 58 PetscFunctionReturn(0); 59 } 60 61 /*@ 62 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 63 64 Logically Collective 65 66 Input Parameter: 67 + A - the MAIJ matrix 68 - dof - the block size for the new matrix 69 70 Output Parameter: 71 . B - the new MAIJ matrix 72 73 Level: advanced 74 75 .seealso: MatCreateMAIJ() 76 @*/ 77 PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 78 { 79 PetscErrorCode ierr; 80 Mat Aij = NULL; 81 82 PetscFunctionBegin; 83 PetscValidLogicalCollectiveInt(A,dof,2); 84 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 85 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 90 { 91 PetscErrorCode ierr; 92 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 93 94 PetscFunctionBegin; 95 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 96 ierr = PetscFree(A->data);CHKERRQ(ierr); 97 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);CHKERRQ(ierr); 98 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);CHKERRQ(ierr); 99 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijperm_seqmaij_C",NULL);CHKERRQ(ierr); 100 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijmkl_seqmaij_C",NULL);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 PetscErrorCode MatSetUp_MAIJ(Mat A) 105 { 106 PetscFunctionBegin; 107 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices"); 108 PetscFunctionReturn(0); 109 } 110 111 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 112 { 113 PetscErrorCode ierr; 114 Mat B; 115 116 PetscFunctionBegin; 117 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 118 ierr = MatView(B,viewer);CHKERRQ(ierr); 119 ierr = MatDestroy(&B);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 124 { 125 PetscErrorCode ierr; 126 Mat B; 127 128 PetscFunctionBegin; 129 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 130 ierr = MatView(B,viewer);CHKERRQ(ierr); 131 ierr = MatDestroy(&B);CHKERRQ(ierr); 132 PetscFunctionReturn(0); 133 } 134 135 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 136 { 137 PetscErrorCode ierr; 138 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 139 140 PetscFunctionBegin; 141 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 142 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 143 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 144 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 145 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 146 ierr = PetscFree(A->data);CHKERRQ(ierr); 147 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);CHKERRQ(ierr); 148 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);CHKERRQ(ierr); 149 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 /*MC 154 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 155 multicomponent problems, interpolating or restricting each component the same way independently. 156 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 157 158 Operations provided: 159 . MatMult 160 . MatMultTranspose 161 . MatMultAdd 162 . MatMultTransposeAdd 163 164 Level: advanced 165 166 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ() 167 M*/ 168 169 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 170 { 171 PetscErrorCode ierr; 172 Mat_MPIMAIJ *b; 173 PetscMPIInt size; 174 175 PetscFunctionBegin; 176 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 177 A->data = (void*)b; 178 179 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 180 181 A->ops->setup = MatSetUp_MAIJ; 182 183 b->AIJ = 0; 184 b->dof = 0; 185 b->OAIJ = 0; 186 b->ctx = 0; 187 b->w = 0; 188 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 189 if (size == 1) { 190 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 191 } else { 192 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 193 } 194 A->preallocated = PETSC_TRUE; 195 A->assembled = PETSC_TRUE; 196 PetscFunctionReturn(0); 197 } 198 199 /* --------------------------------------------------------------------------------------*/ 200 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 201 { 202 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 203 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 204 const PetscScalar *x,*v; 205 PetscScalar *y, sum1, sum2; 206 PetscErrorCode ierr; 207 PetscInt nonzerorow=0,n,i,jrow,j; 208 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 209 210 PetscFunctionBegin; 211 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 212 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 213 idx = a->j; 214 v = a->a; 215 ii = a->i; 216 217 for (i=0; i<m; i++) { 218 jrow = ii[i]; 219 n = ii[i+1] - jrow; 220 sum1 = 0.0; 221 sum2 = 0.0; 222 223 nonzerorow += (n>0); 224 for (j=0; j<n; j++) { 225 sum1 += v[jrow]*x[2*idx[jrow]]; 226 sum2 += v[jrow]*x[2*idx[jrow]+1]; 227 jrow++; 228 } 229 y[2*i] = sum1; 230 y[2*i+1] = sum2; 231 } 232 233 ierr = PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr); 234 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 235 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 239 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 240 { 241 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 242 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 243 const PetscScalar *x,*v; 244 PetscScalar *y,alpha1,alpha2; 245 PetscErrorCode ierr; 246 PetscInt n,i; 247 const PetscInt m = b->AIJ->rmap->n,*idx; 248 249 PetscFunctionBegin; 250 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 251 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 252 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 253 254 for (i=0; i<m; i++) { 255 idx = a->j + a->i[i]; 256 v = a->a + a->i[i]; 257 n = a->i[i+1] - a->i[i]; 258 alpha1 = x[2*i]; 259 alpha2 = x[2*i+1]; 260 while (n-->0) { 261 y[2*(*idx)] += alpha1*(*v); 262 y[2*(*idx)+1] += alpha2*(*v); 263 idx++; v++; 264 } 265 } 266 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 267 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 268 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 269 PetscFunctionReturn(0); 270 } 271 272 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 273 { 274 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 275 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 276 const PetscScalar *x,*v; 277 PetscScalar *y,sum1, sum2; 278 PetscErrorCode ierr; 279 PetscInt n,i,jrow,j; 280 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 281 282 PetscFunctionBegin; 283 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 284 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 285 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 286 idx = a->j; 287 v = a->a; 288 ii = a->i; 289 290 for (i=0; i<m; i++) { 291 jrow = ii[i]; 292 n = ii[i+1] - jrow; 293 sum1 = 0.0; 294 sum2 = 0.0; 295 for (j=0; j<n; j++) { 296 sum1 += v[jrow]*x[2*idx[jrow]]; 297 sum2 += v[jrow]*x[2*idx[jrow]+1]; 298 jrow++; 299 } 300 y[2*i] += sum1; 301 y[2*i+1] += sum2; 302 } 303 304 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 305 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 306 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 310 { 311 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 312 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 313 const PetscScalar *x,*v; 314 PetscScalar *y,alpha1,alpha2; 315 PetscErrorCode ierr; 316 PetscInt n,i; 317 const PetscInt m = b->AIJ->rmap->n,*idx; 318 319 PetscFunctionBegin; 320 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 321 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 322 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 323 324 for (i=0; i<m; i++) { 325 idx = a->j + a->i[i]; 326 v = a->a + a->i[i]; 327 n = a->i[i+1] - a->i[i]; 328 alpha1 = x[2*i]; 329 alpha2 = x[2*i+1]; 330 while (n-->0) { 331 y[2*(*idx)] += alpha1*(*v); 332 y[2*(*idx)+1] += alpha2*(*v); 333 idx++; v++; 334 } 335 } 336 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr); 337 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 338 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 339 PetscFunctionReturn(0); 340 } 341 /* --------------------------------------------------------------------------------------*/ 342 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 343 { 344 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 345 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 346 const PetscScalar *x,*v; 347 PetscScalar *y,sum1, sum2, sum3; 348 PetscErrorCode ierr; 349 PetscInt nonzerorow=0,n,i,jrow,j; 350 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 351 352 PetscFunctionBegin; 353 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 354 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 355 idx = a->j; 356 v = a->a; 357 ii = a->i; 358 359 for (i=0; i<m; i++) { 360 jrow = ii[i]; 361 n = ii[i+1] - jrow; 362 sum1 = 0.0; 363 sum2 = 0.0; 364 sum3 = 0.0; 365 366 nonzerorow += (n>0); 367 for (j=0; j<n; j++) { 368 sum1 += v[jrow]*x[3*idx[jrow]]; 369 sum2 += v[jrow]*x[3*idx[jrow]+1]; 370 sum3 += v[jrow]*x[3*idx[jrow]+2]; 371 jrow++; 372 } 373 y[3*i] = sum1; 374 y[3*i+1] = sum2; 375 y[3*i+2] = sum3; 376 } 377 378 ierr = PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr); 379 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 380 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 381 PetscFunctionReturn(0); 382 } 383 384 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 385 { 386 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 387 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 388 const PetscScalar *x,*v; 389 PetscScalar *y,alpha1,alpha2,alpha3; 390 PetscErrorCode ierr; 391 PetscInt n,i; 392 const PetscInt m = b->AIJ->rmap->n,*idx; 393 394 PetscFunctionBegin; 395 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 396 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 397 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 398 399 for (i=0; i<m; i++) { 400 idx = a->j + a->i[i]; 401 v = a->a + a->i[i]; 402 n = a->i[i+1] - a->i[i]; 403 alpha1 = x[3*i]; 404 alpha2 = x[3*i+1]; 405 alpha3 = x[3*i+2]; 406 while (n-->0) { 407 y[3*(*idx)] += alpha1*(*v); 408 y[3*(*idx)+1] += alpha2*(*v); 409 y[3*(*idx)+2] += alpha3*(*v); 410 idx++; v++; 411 } 412 } 413 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 415 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 420 { 421 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 422 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 423 const PetscScalar *x,*v; 424 PetscScalar *y,sum1, sum2, sum3; 425 PetscErrorCode ierr; 426 PetscInt n,i,jrow,j; 427 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 428 429 PetscFunctionBegin; 430 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 431 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 432 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 433 idx = a->j; 434 v = a->a; 435 ii = a->i; 436 437 for (i=0; i<m; i++) { 438 jrow = ii[i]; 439 n = ii[i+1] - jrow; 440 sum1 = 0.0; 441 sum2 = 0.0; 442 sum3 = 0.0; 443 for (j=0; j<n; j++) { 444 sum1 += v[jrow]*x[3*idx[jrow]]; 445 sum2 += v[jrow]*x[3*idx[jrow]+1]; 446 sum3 += v[jrow]*x[3*idx[jrow]+2]; 447 jrow++; 448 } 449 y[3*i] += sum1; 450 y[3*i+1] += sum2; 451 y[3*i+2] += sum3; 452 } 453 454 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 455 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 456 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 457 PetscFunctionReturn(0); 458 } 459 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 460 { 461 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 462 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 463 const PetscScalar *x,*v; 464 PetscScalar *y,alpha1,alpha2,alpha3; 465 PetscErrorCode ierr; 466 PetscInt n,i; 467 const PetscInt m = b->AIJ->rmap->n,*idx; 468 469 PetscFunctionBegin; 470 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 471 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 472 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 473 for (i=0; i<m; i++) { 474 idx = a->j + a->i[i]; 475 v = a->a + a->i[i]; 476 n = a->i[i+1] - a->i[i]; 477 alpha1 = x[3*i]; 478 alpha2 = x[3*i+1]; 479 alpha3 = x[3*i+2]; 480 while (n-->0) { 481 y[3*(*idx)] += alpha1*(*v); 482 y[3*(*idx)+1] += alpha2*(*v); 483 y[3*(*idx)+2] += alpha3*(*v); 484 idx++; v++; 485 } 486 } 487 ierr = PetscLogFlops(6.0*a->nz);CHKERRQ(ierr); 488 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 489 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492 493 /* ------------------------------------------------------------------------------*/ 494 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 495 { 496 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 497 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 498 const PetscScalar *x,*v; 499 PetscScalar *y,sum1, sum2, sum3, sum4; 500 PetscErrorCode ierr; 501 PetscInt nonzerorow=0,n,i,jrow,j; 502 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 503 504 PetscFunctionBegin; 505 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 506 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 507 idx = a->j; 508 v = a->a; 509 ii = a->i; 510 511 for (i=0; i<m; i++) { 512 jrow = ii[i]; 513 n = ii[i+1] - jrow; 514 sum1 = 0.0; 515 sum2 = 0.0; 516 sum3 = 0.0; 517 sum4 = 0.0; 518 nonzerorow += (n>0); 519 for (j=0; j<n; j++) { 520 sum1 += v[jrow]*x[4*idx[jrow]]; 521 sum2 += v[jrow]*x[4*idx[jrow]+1]; 522 sum3 += v[jrow]*x[4*idx[jrow]+2]; 523 sum4 += v[jrow]*x[4*idx[jrow]+3]; 524 jrow++; 525 } 526 y[4*i] = sum1; 527 y[4*i+1] = sum2; 528 y[4*i+2] = sum3; 529 y[4*i+3] = sum4; 530 } 531 532 ierr = PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr); 533 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 534 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 535 PetscFunctionReturn(0); 536 } 537 538 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 539 { 540 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 541 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 542 const PetscScalar *x,*v; 543 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 544 PetscErrorCode ierr; 545 PetscInt n,i; 546 const PetscInt m = b->AIJ->rmap->n,*idx; 547 548 PetscFunctionBegin; 549 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 550 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 551 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 552 for (i=0; i<m; i++) { 553 idx = a->j + a->i[i]; 554 v = a->a + a->i[i]; 555 n = a->i[i+1] - a->i[i]; 556 alpha1 = x[4*i]; 557 alpha2 = x[4*i+1]; 558 alpha3 = x[4*i+2]; 559 alpha4 = x[4*i+3]; 560 while (n-->0) { 561 y[4*(*idx)] += alpha1*(*v); 562 y[4*(*idx)+1] += alpha2*(*v); 563 y[4*(*idx)+2] += alpha3*(*v); 564 y[4*(*idx)+3] += alpha4*(*v); 565 idx++; v++; 566 } 567 } 568 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 569 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 570 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 575 { 576 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 577 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 578 const PetscScalar *x,*v; 579 PetscScalar *y,sum1, sum2, sum3, sum4; 580 PetscErrorCode ierr; 581 PetscInt n,i,jrow,j; 582 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 583 584 PetscFunctionBegin; 585 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 586 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 587 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 588 idx = a->j; 589 v = a->a; 590 ii = a->i; 591 592 for (i=0; i<m; i++) { 593 jrow = ii[i]; 594 n = ii[i+1] - jrow; 595 sum1 = 0.0; 596 sum2 = 0.0; 597 sum3 = 0.0; 598 sum4 = 0.0; 599 for (j=0; j<n; j++) { 600 sum1 += v[jrow]*x[4*idx[jrow]]; 601 sum2 += v[jrow]*x[4*idx[jrow]+1]; 602 sum3 += v[jrow]*x[4*idx[jrow]+2]; 603 sum4 += v[jrow]*x[4*idx[jrow]+3]; 604 jrow++; 605 } 606 y[4*i] += sum1; 607 y[4*i+1] += sum2; 608 y[4*i+2] += sum3; 609 y[4*i+3] += sum4; 610 } 611 612 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 613 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 614 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 615 PetscFunctionReturn(0); 616 } 617 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 618 { 619 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 620 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 621 const PetscScalar *x,*v; 622 PetscScalar *y,alpha1,alpha2,alpha3,alpha4; 623 PetscErrorCode ierr; 624 PetscInt n,i; 625 const PetscInt m = b->AIJ->rmap->n,*idx; 626 627 PetscFunctionBegin; 628 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 629 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 630 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 631 632 for (i=0; i<m; i++) { 633 idx = a->j + a->i[i]; 634 v = a->a + a->i[i]; 635 n = a->i[i+1] - a->i[i]; 636 alpha1 = x[4*i]; 637 alpha2 = x[4*i+1]; 638 alpha3 = x[4*i+2]; 639 alpha4 = x[4*i+3]; 640 while (n-->0) { 641 y[4*(*idx)] += alpha1*(*v); 642 y[4*(*idx)+1] += alpha2*(*v); 643 y[4*(*idx)+2] += alpha3*(*v); 644 y[4*(*idx)+3] += alpha4*(*v); 645 idx++; v++; 646 } 647 } 648 ierr = PetscLogFlops(8.0*a->nz);CHKERRQ(ierr); 649 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 650 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653 /* ------------------------------------------------------------------------------*/ 654 655 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 656 { 657 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 658 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 659 const PetscScalar *x,*v; 660 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 661 PetscErrorCode ierr; 662 PetscInt nonzerorow=0,n,i,jrow,j; 663 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 664 665 PetscFunctionBegin; 666 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 667 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 668 idx = a->j; 669 v = a->a; 670 ii = a->i; 671 672 for (i=0; i<m; i++) { 673 jrow = ii[i]; 674 n = ii[i+1] - jrow; 675 sum1 = 0.0; 676 sum2 = 0.0; 677 sum3 = 0.0; 678 sum4 = 0.0; 679 sum5 = 0.0; 680 681 nonzerorow += (n>0); 682 for (j=0; j<n; j++) { 683 sum1 += v[jrow]*x[5*idx[jrow]]; 684 sum2 += v[jrow]*x[5*idx[jrow]+1]; 685 sum3 += v[jrow]*x[5*idx[jrow]+2]; 686 sum4 += v[jrow]*x[5*idx[jrow]+3]; 687 sum5 += v[jrow]*x[5*idx[jrow]+4]; 688 jrow++; 689 } 690 y[5*i] = sum1; 691 y[5*i+1] = sum2; 692 y[5*i+2] = sum3; 693 y[5*i+3] = sum4; 694 y[5*i+4] = sum5; 695 } 696 697 ierr = PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); 698 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 699 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 704 { 705 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 706 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 707 const PetscScalar *x,*v; 708 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 709 PetscErrorCode ierr; 710 PetscInt n,i; 711 const PetscInt m = b->AIJ->rmap->n,*idx; 712 713 PetscFunctionBegin; 714 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 715 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 716 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 717 718 for (i=0; i<m; i++) { 719 idx = a->j + a->i[i]; 720 v = a->a + a->i[i]; 721 n = a->i[i+1] - a->i[i]; 722 alpha1 = x[5*i]; 723 alpha2 = x[5*i+1]; 724 alpha3 = x[5*i+2]; 725 alpha4 = x[5*i+3]; 726 alpha5 = x[5*i+4]; 727 while (n-->0) { 728 y[5*(*idx)] += alpha1*(*v); 729 y[5*(*idx)+1] += alpha2*(*v); 730 y[5*(*idx)+2] += alpha3*(*v); 731 y[5*(*idx)+3] += alpha4*(*v); 732 y[5*(*idx)+4] += alpha5*(*v); 733 idx++; v++; 734 } 735 } 736 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 737 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 738 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 743 { 744 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 745 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 746 const PetscScalar *x,*v; 747 PetscScalar *y,sum1, sum2, sum3, sum4, sum5; 748 PetscErrorCode ierr; 749 PetscInt n,i,jrow,j; 750 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 751 752 PetscFunctionBegin; 753 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 754 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 755 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 756 idx = a->j; 757 v = a->a; 758 ii = a->i; 759 760 for (i=0; i<m; i++) { 761 jrow = ii[i]; 762 n = ii[i+1] - jrow; 763 sum1 = 0.0; 764 sum2 = 0.0; 765 sum3 = 0.0; 766 sum4 = 0.0; 767 sum5 = 0.0; 768 for (j=0; j<n; j++) { 769 sum1 += v[jrow]*x[5*idx[jrow]]; 770 sum2 += v[jrow]*x[5*idx[jrow]+1]; 771 sum3 += v[jrow]*x[5*idx[jrow]+2]; 772 sum4 += v[jrow]*x[5*idx[jrow]+3]; 773 sum5 += v[jrow]*x[5*idx[jrow]+4]; 774 jrow++; 775 } 776 y[5*i] += sum1; 777 y[5*i+1] += sum2; 778 y[5*i+2] += sum3; 779 y[5*i+3] += sum4; 780 y[5*i+4] += sum5; 781 } 782 783 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 784 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 785 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 786 PetscFunctionReturn(0); 787 } 788 789 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 790 { 791 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 792 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 793 const PetscScalar *x,*v; 794 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5; 795 PetscErrorCode ierr; 796 PetscInt n,i; 797 const PetscInt m = b->AIJ->rmap->n,*idx; 798 799 PetscFunctionBegin; 800 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 801 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 802 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 803 804 for (i=0; i<m; i++) { 805 idx = a->j + a->i[i]; 806 v = a->a + a->i[i]; 807 n = a->i[i+1] - a->i[i]; 808 alpha1 = x[5*i]; 809 alpha2 = x[5*i+1]; 810 alpha3 = x[5*i+2]; 811 alpha4 = x[5*i+3]; 812 alpha5 = x[5*i+4]; 813 while (n-->0) { 814 y[5*(*idx)] += alpha1*(*v); 815 y[5*(*idx)+1] += alpha2*(*v); 816 y[5*(*idx)+2] += alpha3*(*v); 817 y[5*(*idx)+3] += alpha4*(*v); 818 y[5*(*idx)+4] += alpha5*(*v); 819 idx++; v++; 820 } 821 } 822 ierr = PetscLogFlops(10.0*a->nz);CHKERRQ(ierr); 823 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 824 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 825 PetscFunctionReturn(0); 826 } 827 828 /* ------------------------------------------------------------------------------*/ 829 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 830 { 831 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 832 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 833 const PetscScalar *x,*v; 834 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 835 PetscErrorCode ierr; 836 PetscInt nonzerorow=0,n,i,jrow,j; 837 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 838 839 PetscFunctionBegin; 840 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 841 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 842 idx = a->j; 843 v = a->a; 844 ii = a->i; 845 846 for (i=0; i<m; i++) { 847 jrow = ii[i]; 848 n = ii[i+1] - jrow; 849 sum1 = 0.0; 850 sum2 = 0.0; 851 sum3 = 0.0; 852 sum4 = 0.0; 853 sum5 = 0.0; 854 sum6 = 0.0; 855 856 nonzerorow += (n>0); 857 for (j=0; j<n; j++) { 858 sum1 += v[jrow]*x[6*idx[jrow]]; 859 sum2 += v[jrow]*x[6*idx[jrow]+1]; 860 sum3 += v[jrow]*x[6*idx[jrow]+2]; 861 sum4 += v[jrow]*x[6*idx[jrow]+3]; 862 sum5 += v[jrow]*x[6*idx[jrow]+4]; 863 sum6 += v[jrow]*x[6*idx[jrow]+5]; 864 jrow++; 865 } 866 y[6*i] = sum1; 867 y[6*i+1] = sum2; 868 y[6*i+2] = sum3; 869 y[6*i+3] = sum4; 870 y[6*i+4] = sum5; 871 y[6*i+5] = sum6; 872 } 873 874 ierr = PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr); 875 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 876 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 877 PetscFunctionReturn(0); 878 } 879 880 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 881 { 882 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 883 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 884 const PetscScalar *x,*v; 885 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 886 PetscErrorCode ierr; 887 PetscInt n,i; 888 const PetscInt m = b->AIJ->rmap->n,*idx; 889 890 PetscFunctionBegin; 891 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 892 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 893 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 894 895 for (i=0; i<m; i++) { 896 idx = a->j + a->i[i]; 897 v = a->a + a->i[i]; 898 n = a->i[i+1] - a->i[i]; 899 alpha1 = x[6*i]; 900 alpha2 = x[6*i+1]; 901 alpha3 = x[6*i+2]; 902 alpha4 = x[6*i+3]; 903 alpha5 = x[6*i+4]; 904 alpha6 = x[6*i+5]; 905 while (n-->0) { 906 y[6*(*idx)] += alpha1*(*v); 907 y[6*(*idx)+1] += alpha2*(*v); 908 y[6*(*idx)+2] += alpha3*(*v); 909 y[6*(*idx)+3] += alpha4*(*v); 910 y[6*(*idx)+4] += alpha5*(*v); 911 y[6*(*idx)+5] += alpha6*(*v); 912 idx++; v++; 913 } 914 } 915 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 916 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 917 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 918 PetscFunctionReturn(0); 919 } 920 921 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 922 { 923 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 924 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 925 const PetscScalar *x,*v; 926 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6; 927 PetscErrorCode ierr; 928 PetscInt n,i,jrow,j; 929 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 930 931 PetscFunctionBegin; 932 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 933 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 934 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 935 idx = a->j; 936 v = a->a; 937 ii = a->i; 938 939 for (i=0; i<m; i++) { 940 jrow = ii[i]; 941 n = ii[i+1] - jrow; 942 sum1 = 0.0; 943 sum2 = 0.0; 944 sum3 = 0.0; 945 sum4 = 0.0; 946 sum5 = 0.0; 947 sum6 = 0.0; 948 for (j=0; j<n; j++) { 949 sum1 += v[jrow]*x[6*idx[jrow]]; 950 sum2 += v[jrow]*x[6*idx[jrow]+1]; 951 sum3 += v[jrow]*x[6*idx[jrow]+2]; 952 sum4 += v[jrow]*x[6*idx[jrow]+3]; 953 sum5 += v[jrow]*x[6*idx[jrow]+4]; 954 sum6 += v[jrow]*x[6*idx[jrow]+5]; 955 jrow++; 956 } 957 y[6*i] += sum1; 958 y[6*i+1] += sum2; 959 y[6*i+2] += sum3; 960 y[6*i+3] += sum4; 961 y[6*i+4] += sum5; 962 y[6*i+5] += sum6; 963 } 964 965 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 966 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 967 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 972 { 973 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 974 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 975 const PetscScalar *x,*v; 976 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 977 PetscErrorCode ierr; 978 PetscInt n,i; 979 const PetscInt m = b->AIJ->rmap->n,*idx; 980 981 PetscFunctionBegin; 982 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 983 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 984 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 985 986 for (i=0; i<m; i++) { 987 idx = a->j + a->i[i]; 988 v = a->a + a->i[i]; 989 n = a->i[i+1] - a->i[i]; 990 alpha1 = x[6*i]; 991 alpha2 = x[6*i+1]; 992 alpha3 = x[6*i+2]; 993 alpha4 = x[6*i+3]; 994 alpha5 = x[6*i+4]; 995 alpha6 = x[6*i+5]; 996 while (n-->0) { 997 y[6*(*idx)] += alpha1*(*v); 998 y[6*(*idx)+1] += alpha2*(*v); 999 y[6*(*idx)+2] += alpha3*(*v); 1000 y[6*(*idx)+3] += alpha4*(*v); 1001 y[6*(*idx)+4] += alpha5*(*v); 1002 y[6*(*idx)+5] += alpha6*(*v); 1003 idx++; v++; 1004 } 1005 } 1006 ierr = PetscLogFlops(12.0*a->nz);CHKERRQ(ierr); 1007 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1008 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 /* ------------------------------------------------------------------------------*/ 1013 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1014 { 1015 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1016 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1017 const PetscScalar *x,*v; 1018 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1019 PetscErrorCode ierr; 1020 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1021 PetscInt nonzerorow=0,n,i,jrow,j; 1022 1023 PetscFunctionBegin; 1024 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1025 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1026 idx = a->j; 1027 v = a->a; 1028 ii = a->i; 1029 1030 for (i=0; i<m; i++) { 1031 jrow = ii[i]; 1032 n = ii[i+1] - jrow; 1033 sum1 = 0.0; 1034 sum2 = 0.0; 1035 sum3 = 0.0; 1036 sum4 = 0.0; 1037 sum5 = 0.0; 1038 sum6 = 0.0; 1039 sum7 = 0.0; 1040 1041 nonzerorow += (n>0); 1042 for (j=0; j<n; j++) { 1043 sum1 += v[jrow]*x[7*idx[jrow]]; 1044 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1045 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1046 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1047 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1048 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1049 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1050 jrow++; 1051 } 1052 y[7*i] = sum1; 1053 y[7*i+1] = sum2; 1054 y[7*i+2] = sum3; 1055 y[7*i+3] = sum4; 1056 y[7*i+4] = sum5; 1057 y[7*i+5] = sum6; 1058 y[7*i+6] = sum7; 1059 } 1060 1061 ierr = PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr); 1062 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1063 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1068 { 1069 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1070 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1071 const PetscScalar *x,*v; 1072 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1073 PetscErrorCode ierr; 1074 const PetscInt m = b->AIJ->rmap->n,*idx; 1075 PetscInt n,i; 1076 1077 PetscFunctionBegin; 1078 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1079 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1080 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1081 1082 for (i=0; i<m; i++) { 1083 idx = a->j + a->i[i]; 1084 v = a->a + a->i[i]; 1085 n = a->i[i+1] - a->i[i]; 1086 alpha1 = x[7*i]; 1087 alpha2 = x[7*i+1]; 1088 alpha3 = x[7*i+2]; 1089 alpha4 = x[7*i+3]; 1090 alpha5 = x[7*i+4]; 1091 alpha6 = x[7*i+5]; 1092 alpha7 = x[7*i+6]; 1093 while (n-->0) { 1094 y[7*(*idx)] += alpha1*(*v); 1095 y[7*(*idx)+1] += alpha2*(*v); 1096 y[7*(*idx)+2] += alpha3*(*v); 1097 y[7*(*idx)+3] += alpha4*(*v); 1098 y[7*(*idx)+4] += alpha5*(*v); 1099 y[7*(*idx)+5] += alpha6*(*v); 1100 y[7*(*idx)+6] += alpha7*(*v); 1101 idx++; v++; 1102 } 1103 } 1104 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1105 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1106 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1113 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1114 const PetscScalar *x,*v; 1115 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1116 PetscErrorCode ierr; 1117 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1118 PetscInt n,i,jrow,j; 1119 1120 PetscFunctionBegin; 1121 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1122 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1123 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1124 idx = a->j; 1125 v = a->a; 1126 ii = a->i; 1127 1128 for (i=0; i<m; i++) { 1129 jrow = ii[i]; 1130 n = ii[i+1] - jrow; 1131 sum1 = 0.0; 1132 sum2 = 0.0; 1133 sum3 = 0.0; 1134 sum4 = 0.0; 1135 sum5 = 0.0; 1136 sum6 = 0.0; 1137 sum7 = 0.0; 1138 for (j=0; j<n; j++) { 1139 sum1 += v[jrow]*x[7*idx[jrow]]; 1140 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1141 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1142 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1143 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1144 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1145 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1146 jrow++; 1147 } 1148 y[7*i] += sum1; 1149 y[7*i+1] += sum2; 1150 y[7*i+2] += sum3; 1151 y[7*i+3] += sum4; 1152 y[7*i+4] += sum5; 1153 y[7*i+5] += sum6; 1154 y[7*i+6] += sum7; 1155 } 1156 1157 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1158 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1159 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1164 { 1165 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1166 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1167 const PetscScalar *x,*v; 1168 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1169 PetscErrorCode ierr; 1170 const PetscInt m = b->AIJ->rmap->n,*idx; 1171 PetscInt n,i; 1172 1173 PetscFunctionBegin; 1174 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1175 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1176 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1177 for (i=0; i<m; i++) { 1178 idx = a->j + a->i[i]; 1179 v = a->a + a->i[i]; 1180 n = a->i[i+1] - a->i[i]; 1181 alpha1 = x[7*i]; 1182 alpha2 = x[7*i+1]; 1183 alpha3 = x[7*i+2]; 1184 alpha4 = x[7*i+3]; 1185 alpha5 = x[7*i+4]; 1186 alpha6 = x[7*i+5]; 1187 alpha7 = x[7*i+6]; 1188 while (n-->0) { 1189 y[7*(*idx)] += alpha1*(*v); 1190 y[7*(*idx)+1] += alpha2*(*v); 1191 y[7*(*idx)+2] += alpha3*(*v); 1192 y[7*(*idx)+3] += alpha4*(*v); 1193 y[7*(*idx)+4] += alpha5*(*v); 1194 y[7*(*idx)+5] += alpha6*(*v); 1195 y[7*(*idx)+6] += alpha7*(*v); 1196 idx++; v++; 1197 } 1198 } 1199 ierr = PetscLogFlops(14.0*a->nz);CHKERRQ(ierr); 1200 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1201 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1206 { 1207 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1208 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1209 const PetscScalar *x,*v; 1210 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1211 PetscErrorCode ierr; 1212 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1213 PetscInt nonzerorow=0,n,i,jrow,j; 1214 1215 PetscFunctionBegin; 1216 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1217 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1218 idx = a->j; 1219 v = a->a; 1220 ii = a->i; 1221 1222 for (i=0; i<m; i++) { 1223 jrow = ii[i]; 1224 n = ii[i+1] - jrow; 1225 sum1 = 0.0; 1226 sum2 = 0.0; 1227 sum3 = 0.0; 1228 sum4 = 0.0; 1229 sum5 = 0.0; 1230 sum6 = 0.0; 1231 sum7 = 0.0; 1232 sum8 = 0.0; 1233 1234 nonzerorow += (n>0); 1235 for (j=0; j<n; j++) { 1236 sum1 += v[jrow]*x[8*idx[jrow]]; 1237 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1238 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1239 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1240 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1241 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1242 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1243 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1244 jrow++; 1245 } 1246 y[8*i] = sum1; 1247 y[8*i+1] = sum2; 1248 y[8*i+2] = sum3; 1249 y[8*i+3] = sum4; 1250 y[8*i+4] = sum5; 1251 y[8*i+5] = sum6; 1252 y[8*i+6] = sum7; 1253 y[8*i+7] = sum8; 1254 } 1255 1256 ierr = PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);CHKERRQ(ierr); 1257 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1258 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1263 { 1264 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1265 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1266 const PetscScalar *x,*v; 1267 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1268 PetscErrorCode ierr; 1269 const PetscInt m = b->AIJ->rmap->n,*idx; 1270 PetscInt n,i; 1271 1272 PetscFunctionBegin; 1273 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1274 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1275 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1276 1277 for (i=0; i<m; i++) { 1278 idx = a->j + a->i[i]; 1279 v = a->a + a->i[i]; 1280 n = a->i[i+1] - a->i[i]; 1281 alpha1 = x[8*i]; 1282 alpha2 = x[8*i+1]; 1283 alpha3 = x[8*i+2]; 1284 alpha4 = x[8*i+3]; 1285 alpha5 = x[8*i+4]; 1286 alpha6 = x[8*i+5]; 1287 alpha7 = x[8*i+6]; 1288 alpha8 = x[8*i+7]; 1289 while (n-->0) { 1290 y[8*(*idx)] += alpha1*(*v); 1291 y[8*(*idx)+1] += alpha2*(*v); 1292 y[8*(*idx)+2] += alpha3*(*v); 1293 y[8*(*idx)+3] += alpha4*(*v); 1294 y[8*(*idx)+4] += alpha5*(*v); 1295 y[8*(*idx)+5] += alpha6*(*v); 1296 y[8*(*idx)+6] += alpha7*(*v); 1297 y[8*(*idx)+7] += alpha8*(*v); 1298 idx++; v++; 1299 } 1300 } 1301 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1302 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1303 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1308 { 1309 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1310 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1311 const PetscScalar *x,*v; 1312 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1313 PetscErrorCode ierr; 1314 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1315 PetscInt n,i,jrow,j; 1316 1317 PetscFunctionBegin; 1318 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1319 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1320 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1321 idx = a->j; 1322 v = a->a; 1323 ii = a->i; 1324 1325 for (i=0; i<m; i++) { 1326 jrow = ii[i]; 1327 n = ii[i+1] - jrow; 1328 sum1 = 0.0; 1329 sum2 = 0.0; 1330 sum3 = 0.0; 1331 sum4 = 0.0; 1332 sum5 = 0.0; 1333 sum6 = 0.0; 1334 sum7 = 0.0; 1335 sum8 = 0.0; 1336 for (j=0; j<n; j++) { 1337 sum1 += v[jrow]*x[8*idx[jrow]]; 1338 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1339 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1340 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1341 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1342 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1343 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1344 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1345 jrow++; 1346 } 1347 y[8*i] += sum1; 1348 y[8*i+1] += sum2; 1349 y[8*i+2] += sum3; 1350 y[8*i+3] += sum4; 1351 y[8*i+4] += sum5; 1352 y[8*i+5] += sum6; 1353 y[8*i+6] += sum7; 1354 y[8*i+7] += sum8; 1355 } 1356 1357 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1358 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1359 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } 1362 1363 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1364 { 1365 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1366 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1367 const PetscScalar *x,*v; 1368 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1369 PetscErrorCode ierr; 1370 const PetscInt m = b->AIJ->rmap->n,*idx; 1371 PetscInt n,i; 1372 1373 PetscFunctionBegin; 1374 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1375 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1376 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1377 for (i=0; i<m; i++) { 1378 idx = a->j + a->i[i]; 1379 v = a->a + a->i[i]; 1380 n = a->i[i+1] - a->i[i]; 1381 alpha1 = x[8*i]; 1382 alpha2 = x[8*i+1]; 1383 alpha3 = x[8*i+2]; 1384 alpha4 = x[8*i+3]; 1385 alpha5 = x[8*i+4]; 1386 alpha6 = x[8*i+5]; 1387 alpha7 = x[8*i+6]; 1388 alpha8 = x[8*i+7]; 1389 while (n-->0) { 1390 y[8*(*idx)] += alpha1*(*v); 1391 y[8*(*idx)+1] += alpha2*(*v); 1392 y[8*(*idx)+2] += alpha3*(*v); 1393 y[8*(*idx)+3] += alpha4*(*v); 1394 y[8*(*idx)+4] += alpha5*(*v); 1395 y[8*(*idx)+5] += alpha6*(*v); 1396 y[8*(*idx)+6] += alpha7*(*v); 1397 y[8*(*idx)+7] += alpha8*(*v); 1398 idx++; v++; 1399 } 1400 } 1401 ierr = PetscLogFlops(16.0*a->nz);CHKERRQ(ierr); 1402 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1403 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1404 PetscFunctionReturn(0); 1405 } 1406 1407 /* ------------------------------------------------------------------------------*/ 1408 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1409 { 1410 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1411 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1412 const PetscScalar *x,*v; 1413 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1414 PetscErrorCode ierr; 1415 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1416 PetscInt nonzerorow=0,n,i,jrow,j; 1417 1418 PetscFunctionBegin; 1419 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1420 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1421 idx = a->j; 1422 v = a->a; 1423 ii = a->i; 1424 1425 for (i=0; i<m; i++) { 1426 jrow = ii[i]; 1427 n = ii[i+1] - jrow; 1428 sum1 = 0.0; 1429 sum2 = 0.0; 1430 sum3 = 0.0; 1431 sum4 = 0.0; 1432 sum5 = 0.0; 1433 sum6 = 0.0; 1434 sum7 = 0.0; 1435 sum8 = 0.0; 1436 sum9 = 0.0; 1437 1438 nonzerorow += (n>0); 1439 for (j=0; j<n; j++) { 1440 sum1 += v[jrow]*x[9*idx[jrow]]; 1441 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1442 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1443 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1444 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1445 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1446 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1447 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1448 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1449 jrow++; 1450 } 1451 y[9*i] = sum1; 1452 y[9*i+1] = sum2; 1453 y[9*i+2] = sum3; 1454 y[9*i+3] = sum4; 1455 y[9*i+4] = sum5; 1456 y[9*i+5] = sum6; 1457 y[9*i+6] = sum7; 1458 y[9*i+7] = sum8; 1459 y[9*i+8] = sum9; 1460 } 1461 1462 ierr = PetscLogFlops(18.0*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1463 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1464 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1465 PetscFunctionReturn(0); 1466 } 1467 1468 /* ------------------------------------------------------------------------------*/ 1469 1470 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1471 { 1472 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1473 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1474 const PetscScalar *x,*v; 1475 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1476 PetscErrorCode ierr; 1477 const PetscInt m = b->AIJ->rmap->n,*idx; 1478 PetscInt n,i; 1479 1480 PetscFunctionBegin; 1481 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1482 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1483 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1484 1485 for (i=0; i<m; i++) { 1486 idx = a->j + a->i[i]; 1487 v = a->a + a->i[i]; 1488 n = a->i[i+1] - a->i[i]; 1489 alpha1 = x[9*i]; 1490 alpha2 = x[9*i+1]; 1491 alpha3 = x[9*i+2]; 1492 alpha4 = x[9*i+3]; 1493 alpha5 = x[9*i+4]; 1494 alpha6 = x[9*i+5]; 1495 alpha7 = x[9*i+6]; 1496 alpha8 = x[9*i+7]; 1497 alpha9 = x[9*i+8]; 1498 while (n-->0) { 1499 y[9*(*idx)] += alpha1*(*v); 1500 y[9*(*idx)+1] += alpha2*(*v); 1501 y[9*(*idx)+2] += alpha3*(*v); 1502 y[9*(*idx)+3] += alpha4*(*v); 1503 y[9*(*idx)+4] += alpha5*(*v); 1504 y[9*(*idx)+5] += alpha6*(*v); 1505 y[9*(*idx)+6] += alpha7*(*v); 1506 y[9*(*idx)+7] += alpha8*(*v); 1507 y[9*(*idx)+8] += alpha9*(*v); 1508 idx++; v++; 1509 } 1510 } 1511 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1512 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1513 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1514 PetscFunctionReturn(0); 1515 } 1516 1517 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1518 { 1519 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1520 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1521 const PetscScalar *x,*v; 1522 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1523 PetscErrorCode ierr; 1524 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1525 PetscInt n,i,jrow,j; 1526 1527 PetscFunctionBegin; 1528 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1529 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1530 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1531 idx = a->j; 1532 v = a->a; 1533 ii = a->i; 1534 1535 for (i=0; i<m; i++) { 1536 jrow = ii[i]; 1537 n = ii[i+1] - jrow; 1538 sum1 = 0.0; 1539 sum2 = 0.0; 1540 sum3 = 0.0; 1541 sum4 = 0.0; 1542 sum5 = 0.0; 1543 sum6 = 0.0; 1544 sum7 = 0.0; 1545 sum8 = 0.0; 1546 sum9 = 0.0; 1547 for (j=0; j<n; j++) { 1548 sum1 += v[jrow]*x[9*idx[jrow]]; 1549 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1550 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1551 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1552 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1553 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1554 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1555 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1556 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1557 jrow++; 1558 } 1559 y[9*i] += sum1; 1560 y[9*i+1] += sum2; 1561 y[9*i+2] += sum3; 1562 y[9*i+3] += sum4; 1563 y[9*i+4] += sum5; 1564 y[9*i+5] += sum6; 1565 y[9*i+6] += sum7; 1566 y[9*i+7] += sum8; 1567 y[9*i+8] += sum9; 1568 } 1569 1570 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1571 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1572 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1573 PetscFunctionReturn(0); 1574 } 1575 1576 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1577 { 1578 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1579 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1580 const PetscScalar *x,*v; 1581 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1582 PetscErrorCode ierr; 1583 const PetscInt m = b->AIJ->rmap->n,*idx; 1584 PetscInt n,i; 1585 1586 PetscFunctionBegin; 1587 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1588 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1589 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1590 for (i=0; i<m; i++) { 1591 idx = a->j + a->i[i]; 1592 v = a->a + a->i[i]; 1593 n = a->i[i+1] - a->i[i]; 1594 alpha1 = x[9*i]; 1595 alpha2 = x[9*i+1]; 1596 alpha3 = x[9*i+2]; 1597 alpha4 = x[9*i+3]; 1598 alpha5 = x[9*i+4]; 1599 alpha6 = x[9*i+5]; 1600 alpha7 = x[9*i+6]; 1601 alpha8 = x[9*i+7]; 1602 alpha9 = x[9*i+8]; 1603 while (n-->0) { 1604 y[9*(*idx)] += alpha1*(*v); 1605 y[9*(*idx)+1] += alpha2*(*v); 1606 y[9*(*idx)+2] += alpha3*(*v); 1607 y[9*(*idx)+3] += alpha4*(*v); 1608 y[9*(*idx)+4] += alpha5*(*v); 1609 y[9*(*idx)+5] += alpha6*(*v); 1610 y[9*(*idx)+6] += alpha7*(*v); 1611 y[9*(*idx)+7] += alpha8*(*v); 1612 y[9*(*idx)+8] += alpha9*(*v); 1613 idx++; v++; 1614 } 1615 } 1616 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr); 1617 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1618 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1619 PetscFunctionReturn(0); 1620 } 1621 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1622 { 1623 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1624 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1625 const PetscScalar *x,*v; 1626 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1627 PetscErrorCode ierr; 1628 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1629 PetscInt nonzerorow=0,n,i,jrow,j; 1630 1631 PetscFunctionBegin; 1632 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1633 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1634 idx = a->j; 1635 v = a->a; 1636 ii = a->i; 1637 1638 for (i=0; i<m; i++) { 1639 jrow = ii[i]; 1640 n = ii[i+1] - jrow; 1641 sum1 = 0.0; 1642 sum2 = 0.0; 1643 sum3 = 0.0; 1644 sum4 = 0.0; 1645 sum5 = 0.0; 1646 sum6 = 0.0; 1647 sum7 = 0.0; 1648 sum8 = 0.0; 1649 sum9 = 0.0; 1650 sum10 = 0.0; 1651 1652 nonzerorow += (n>0); 1653 for (j=0; j<n; j++) { 1654 sum1 += v[jrow]*x[10*idx[jrow]]; 1655 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1656 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1657 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1658 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1659 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1660 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1661 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1662 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1663 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1664 jrow++; 1665 } 1666 y[10*i] = sum1; 1667 y[10*i+1] = sum2; 1668 y[10*i+2] = sum3; 1669 y[10*i+3] = sum4; 1670 y[10*i+4] = sum5; 1671 y[10*i+5] = sum6; 1672 y[10*i+6] = sum7; 1673 y[10*i+7] = sum8; 1674 y[10*i+8] = sum9; 1675 y[10*i+9] = sum10; 1676 } 1677 1678 ierr = PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);CHKERRQ(ierr); 1679 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1680 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1681 PetscFunctionReturn(0); 1682 } 1683 1684 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1685 { 1686 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1687 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1688 const PetscScalar *x,*v; 1689 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1690 PetscErrorCode ierr; 1691 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1692 PetscInt n,i,jrow,j; 1693 1694 PetscFunctionBegin; 1695 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1696 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1697 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1698 idx = a->j; 1699 v = a->a; 1700 ii = a->i; 1701 1702 for (i=0; i<m; i++) { 1703 jrow = ii[i]; 1704 n = ii[i+1] - jrow; 1705 sum1 = 0.0; 1706 sum2 = 0.0; 1707 sum3 = 0.0; 1708 sum4 = 0.0; 1709 sum5 = 0.0; 1710 sum6 = 0.0; 1711 sum7 = 0.0; 1712 sum8 = 0.0; 1713 sum9 = 0.0; 1714 sum10 = 0.0; 1715 for (j=0; j<n; j++) { 1716 sum1 += v[jrow]*x[10*idx[jrow]]; 1717 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1718 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1719 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1720 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1721 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1722 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1723 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1724 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1725 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1726 jrow++; 1727 } 1728 y[10*i] += sum1; 1729 y[10*i+1] += sum2; 1730 y[10*i+2] += sum3; 1731 y[10*i+3] += sum4; 1732 y[10*i+4] += sum5; 1733 y[10*i+5] += sum6; 1734 y[10*i+6] += sum7; 1735 y[10*i+7] += sum8; 1736 y[10*i+8] += sum9; 1737 y[10*i+9] += sum10; 1738 } 1739 1740 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1741 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1742 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1743 PetscFunctionReturn(0); 1744 } 1745 1746 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1747 { 1748 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1749 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1750 const PetscScalar *x,*v; 1751 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1752 PetscErrorCode ierr; 1753 const PetscInt m = b->AIJ->rmap->n,*idx; 1754 PetscInt n,i; 1755 1756 PetscFunctionBegin; 1757 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1758 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1759 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1760 1761 for (i=0; i<m; i++) { 1762 idx = a->j + a->i[i]; 1763 v = a->a + a->i[i]; 1764 n = a->i[i+1] - a->i[i]; 1765 alpha1 = x[10*i]; 1766 alpha2 = x[10*i+1]; 1767 alpha3 = x[10*i+2]; 1768 alpha4 = x[10*i+3]; 1769 alpha5 = x[10*i+4]; 1770 alpha6 = x[10*i+5]; 1771 alpha7 = x[10*i+6]; 1772 alpha8 = x[10*i+7]; 1773 alpha9 = x[10*i+8]; 1774 alpha10 = x[10*i+9]; 1775 while (n-->0) { 1776 y[10*(*idx)] += alpha1*(*v); 1777 y[10*(*idx)+1] += alpha2*(*v); 1778 y[10*(*idx)+2] += alpha3*(*v); 1779 y[10*(*idx)+3] += alpha4*(*v); 1780 y[10*(*idx)+4] += alpha5*(*v); 1781 y[10*(*idx)+5] += alpha6*(*v); 1782 y[10*(*idx)+6] += alpha7*(*v); 1783 y[10*(*idx)+7] += alpha8*(*v); 1784 y[10*(*idx)+8] += alpha9*(*v); 1785 y[10*(*idx)+9] += alpha10*(*v); 1786 idx++; v++; 1787 } 1788 } 1789 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1790 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1791 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1792 PetscFunctionReturn(0); 1793 } 1794 1795 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1796 { 1797 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1798 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1799 const PetscScalar *x,*v; 1800 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1801 PetscErrorCode ierr; 1802 const PetscInt m = b->AIJ->rmap->n,*idx; 1803 PetscInt n,i; 1804 1805 PetscFunctionBegin; 1806 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1807 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1808 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1809 for (i=0; i<m; i++) { 1810 idx = a->j + a->i[i]; 1811 v = a->a + a->i[i]; 1812 n = a->i[i+1] - a->i[i]; 1813 alpha1 = x[10*i]; 1814 alpha2 = x[10*i+1]; 1815 alpha3 = x[10*i+2]; 1816 alpha4 = x[10*i+3]; 1817 alpha5 = x[10*i+4]; 1818 alpha6 = x[10*i+5]; 1819 alpha7 = x[10*i+6]; 1820 alpha8 = x[10*i+7]; 1821 alpha9 = x[10*i+8]; 1822 alpha10 = x[10*i+9]; 1823 while (n-->0) { 1824 y[10*(*idx)] += alpha1*(*v); 1825 y[10*(*idx)+1] += alpha2*(*v); 1826 y[10*(*idx)+2] += alpha3*(*v); 1827 y[10*(*idx)+3] += alpha4*(*v); 1828 y[10*(*idx)+4] += alpha5*(*v); 1829 y[10*(*idx)+5] += alpha6*(*v); 1830 y[10*(*idx)+6] += alpha7*(*v); 1831 y[10*(*idx)+7] += alpha8*(*v); 1832 y[10*(*idx)+8] += alpha9*(*v); 1833 y[10*(*idx)+9] += alpha10*(*v); 1834 idx++; v++; 1835 } 1836 } 1837 ierr = PetscLogFlops(20.0*a->nz);CHKERRQ(ierr); 1838 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1839 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1840 PetscFunctionReturn(0); 1841 } 1842 1843 1844 /*--------------------------------------------------------------------------------------------*/ 1845 PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1846 { 1847 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1848 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1849 const PetscScalar *x,*v; 1850 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1851 PetscErrorCode ierr; 1852 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1853 PetscInt nonzerorow=0,n,i,jrow,j; 1854 1855 PetscFunctionBegin; 1856 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1857 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1858 idx = a->j; 1859 v = a->a; 1860 ii = a->i; 1861 1862 for (i=0; i<m; i++) { 1863 jrow = ii[i]; 1864 n = ii[i+1] - jrow; 1865 sum1 = 0.0; 1866 sum2 = 0.0; 1867 sum3 = 0.0; 1868 sum4 = 0.0; 1869 sum5 = 0.0; 1870 sum6 = 0.0; 1871 sum7 = 0.0; 1872 sum8 = 0.0; 1873 sum9 = 0.0; 1874 sum10 = 0.0; 1875 sum11 = 0.0; 1876 1877 nonzerorow += (n>0); 1878 for (j=0; j<n; j++) { 1879 sum1 += v[jrow]*x[11*idx[jrow]]; 1880 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1881 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1882 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1883 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1884 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1885 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1886 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1887 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1888 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1889 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1890 jrow++; 1891 } 1892 y[11*i] = sum1; 1893 y[11*i+1] = sum2; 1894 y[11*i+2] = sum3; 1895 y[11*i+3] = sum4; 1896 y[11*i+4] = sum5; 1897 y[11*i+5] = sum6; 1898 y[11*i+6] = sum7; 1899 y[11*i+7] = sum8; 1900 y[11*i+8] = sum9; 1901 y[11*i+9] = sum10; 1902 y[11*i+10] = sum11; 1903 } 1904 1905 ierr = PetscLogFlops(22*a->nz - 11*nonzerorow);CHKERRQ(ierr); 1906 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1907 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1908 PetscFunctionReturn(0); 1909 } 1910 1911 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 1912 { 1913 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1914 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1915 const PetscScalar *x,*v; 1916 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1917 PetscErrorCode ierr; 1918 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 1919 PetscInt n,i,jrow,j; 1920 1921 PetscFunctionBegin; 1922 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1923 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1924 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1925 idx = a->j; 1926 v = a->a; 1927 ii = a->i; 1928 1929 for (i=0; i<m; i++) { 1930 jrow = ii[i]; 1931 n = ii[i+1] - jrow; 1932 sum1 = 0.0; 1933 sum2 = 0.0; 1934 sum3 = 0.0; 1935 sum4 = 0.0; 1936 sum5 = 0.0; 1937 sum6 = 0.0; 1938 sum7 = 0.0; 1939 sum8 = 0.0; 1940 sum9 = 0.0; 1941 sum10 = 0.0; 1942 sum11 = 0.0; 1943 for (j=0; j<n; j++) { 1944 sum1 += v[jrow]*x[11*idx[jrow]]; 1945 sum2 += v[jrow]*x[11*idx[jrow]+1]; 1946 sum3 += v[jrow]*x[11*idx[jrow]+2]; 1947 sum4 += v[jrow]*x[11*idx[jrow]+3]; 1948 sum5 += v[jrow]*x[11*idx[jrow]+4]; 1949 sum6 += v[jrow]*x[11*idx[jrow]+5]; 1950 sum7 += v[jrow]*x[11*idx[jrow]+6]; 1951 sum8 += v[jrow]*x[11*idx[jrow]+7]; 1952 sum9 += v[jrow]*x[11*idx[jrow]+8]; 1953 sum10 += v[jrow]*x[11*idx[jrow]+9]; 1954 sum11 += v[jrow]*x[11*idx[jrow]+10]; 1955 jrow++; 1956 } 1957 y[11*i] += sum1; 1958 y[11*i+1] += sum2; 1959 y[11*i+2] += sum3; 1960 y[11*i+3] += sum4; 1961 y[11*i+4] += sum5; 1962 y[11*i+5] += sum6; 1963 y[11*i+6] += sum7; 1964 y[11*i+7] += sum8; 1965 y[11*i+8] += sum9; 1966 y[11*i+9] += sum10; 1967 y[11*i+10] += sum11; 1968 } 1969 1970 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 1971 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 1972 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1973 PetscFunctionReturn(0); 1974 } 1975 1976 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy) 1977 { 1978 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1979 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1980 const PetscScalar *x,*v; 1981 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 1982 PetscErrorCode ierr; 1983 const PetscInt m = b->AIJ->rmap->n,*idx; 1984 PetscInt n,i; 1985 1986 PetscFunctionBegin; 1987 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 1988 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 1989 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1990 1991 for (i=0; i<m; i++) { 1992 idx = a->j + a->i[i]; 1993 v = a->a + a->i[i]; 1994 n = a->i[i+1] - a->i[i]; 1995 alpha1 = x[11*i]; 1996 alpha2 = x[11*i+1]; 1997 alpha3 = x[11*i+2]; 1998 alpha4 = x[11*i+3]; 1999 alpha5 = x[11*i+4]; 2000 alpha6 = x[11*i+5]; 2001 alpha7 = x[11*i+6]; 2002 alpha8 = x[11*i+7]; 2003 alpha9 = x[11*i+8]; 2004 alpha10 = x[11*i+9]; 2005 alpha11 = x[11*i+10]; 2006 while (n-->0) { 2007 y[11*(*idx)] += alpha1*(*v); 2008 y[11*(*idx)+1] += alpha2*(*v); 2009 y[11*(*idx)+2] += alpha3*(*v); 2010 y[11*(*idx)+3] += alpha4*(*v); 2011 y[11*(*idx)+4] += alpha5*(*v); 2012 y[11*(*idx)+5] += alpha6*(*v); 2013 y[11*(*idx)+6] += alpha7*(*v); 2014 y[11*(*idx)+7] += alpha8*(*v); 2015 y[11*(*idx)+8] += alpha9*(*v); 2016 y[11*(*idx)+9] += alpha10*(*v); 2017 y[11*(*idx)+10] += alpha11*(*v); 2018 idx++; v++; 2019 } 2020 } 2021 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2022 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2023 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2024 PetscFunctionReturn(0); 2025 } 2026 2027 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) 2028 { 2029 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2030 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2031 const PetscScalar *x,*v; 2032 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11; 2033 PetscErrorCode ierr; 2034 const PetscInt m = b->AIJ->rmap->n,*idx; 2035 PetscInt n,i; 2036 2037 PetscFunctionBegin; 2038 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2039 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2040 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2041 for (i=0; i<m; i++) { 2042 idx = a->j + a->i[i]; 2043 v = a->a + a->i[i]; 2044 n = a->i[i+1] - a->i[i]; 2045 alpha1 = x[11*i]; 2046 alpha2 = x[11*i+1]; 2047 alpha3 = x[11*i+2]; 2048 alpha4 = x[11*i+3]; 2049 alpha5 = x[11*i+4]; 2050 alpha6 = x[11*i+5]; 2051 alpha7 = x[11*i+6]; 2052 alpha8 = x[11*i+7]; 2053 alpha9 = x[11*i+8]; 2054 alpha10 = x[11*i+9]; 2055 alpha11 = x[11*i+10]; 2056 while (n-->0) { 2057 y[11*(*idx)] += alpha1*(*v); 2058 y[11*(*idx)+1] += alpha2*(*v); 2059 y[11*(*idx)+2] += alpha3*(*v); 2060 y[11*(*idx)+3] += alpha4*(*v); 2061 y[11*(*idx)+4] += alpha5*(*v); 2062 y[11*(*idx)+5] += alpha6*(*v); 2063 y[11*(*idx)+6] += alpha7*(*v); 2064 y[11*(*idx)+7] += alpha8*(*v); 2065 y[11*(*idx)+8] += alpha9*(*v); 2066 y[11*(*idx)+9] += alpha10*(*v); 2067 y[11*(*idx)+10] += alpha11*(*v); 2068 idx++; v++; 2069 } 2070 } 2071 ierr = PetscLogFlops(22*a->nz);CHKERRQ(ierr); 2072 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2073 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2074 PetscFunctionReturn(0); 2075 } 2076 2077 2078 /*--------------------------------------------------------------------------------------------*/ 2079 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2080 { 2081 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2082 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2083 const PetscScalar *x,*v; 2084 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2085 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2086 PetscErrorCode ierr; 2087 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2088 PetscInt nonzerorow=0,n,i,jrow,j; 2089 2090 PetscFunctionBegin; 2091 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2092 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2093 idx = a->j; 2094 v = a->a; 2095 ii = a->i; 2096 2097 for (i=0; i<m; i++) { 2098 jrow = ii[i]; 2099 n = ii[i+1] - jrow; 2100 sum1 = 0.0; 2101 sum2 = 0.0; 2102 sum3 = 0.0; 2103 sum4 = 0.0; 2104 sum5 = 0.0; 2105 sum6 = 0.0; 2106 sum7 = 0.0; 2107 sum8 = 0.0; 2108 sum9 = 0.0; 2109 sum10 = 0.0; 2110 sum11 = 0.0; 2111 sum12 = 0.0; 2112 sum13 = 0.0; 2113 sum14 = 0.0; 2114 sum15 = 0.0; 2115 sum16 = 0.0; 2116 2117 nonzerorow += (n>0); 2118 for (j=0; j<n; j++) { 2119 sum1 += v[jrow]*x[16*idx[jrow]]; 2120 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2121 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2122 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2123 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2124 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2125 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2126 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2127 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2128 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2129 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2130 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2131 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2132 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2133 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2134 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2135 jrow++; 2136 } 2137 y[16*i] = sum1; 2138 y[16*i+1] = sum2; 2139 y[16*i+2] = sum3; 2140 y[16*i+3] = sum4; 2141 y[16*i+4] = sum5; 2142 y[16*i+5] = sum6; 2143 y[16*i+6] = sum7; 2144 y[16*i+7] = sum8; 2145 y[16*i+8] = sum9; 2146 y[16*i+9] = sum10; 2147 y[16*i+10] = sum11; 2148 y[16*i+11] = sum12; 2149 y[16*i+12] = sum13; 2150 y[16*i+13] = sum14; 2151 y[16*i+14] = sum15; 2152 y[16*i+15] = sum16; 2153 } 2154 2155 ierr = PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);CHKERRQ(ierr); 2156 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2157 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2158 PetscFunctionReturn(0); 2159 } 2160 2161 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 2162 { 2163 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2164 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2165 const PetscScalar *x,*v; 2166 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2167 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2168 PetscErrorCode ierr; 2169 const PetscInt m = b->AIJ->rmap->n,*idx; 2170 PetscInt n,i; 2171 2172 PetscFunctionBegin; 2173 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2174 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2175 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2176 2177 for (i=0; i<m; i++) { 2178 idx = a->j + a->i[i]; 2179 v = a->a + a->i[i]; 2180 n = a->i[i+1] - a->i[i]; 2181 alpha1 = x[16*i]; 2182 alpha2 = x[16*i+1]; 2183 alpha3 = x[16*i+2]; 2184 alpha4 = x[16*i+3]; 2185 alpha5 = x[16*i+4]; 2186 alpha6 = x[16*i+5]; 2187 alpha7 = x[16*i+6]; 2188 alpha8 = x[16*i+7]; 2189 alpha9 = x[16*i+8]; 2190 alpha10 = x[16*i+9]; 2191 alpha11 = x[16*i+10]; 2192 alpha12 = x[16*i+11]; 2193 alpha13 = x[16*i+12]; 2194 alpha14 = x[16*i+13]; 2195 alpha15 = x[16*i+14]; 2196 alpha16 = x[16*i+15]; 2197 while (n-->0) { 2198 y[16*(*idx)] += alpha1*(*v); 2199 y[16*(*idx)+1] += alpha2*(*v); 2200 y[16*(*idx)+2] += alpha3*(*v); 2201 y[16*(*idx)+3] += alpha4*(*v); 2202 y[16*(*idx)+4] += alpha5*(*v); 2203 y[16*(*idx)+5] += alpha6*(*v); 2204 y[16*(*idx)+6] += alpha7*(*v); 2205 y[16*(*idx)+7] += alpha8*(*v); 2206 y[16*(*idx)+8] += alpha9*(*v); 2207 y[16*(*idx)+9] += alpha10*(*v); 2208 y[16*(*idx)+10] += alpha11*(*v); 2209 y[16*(*idx)+11] += alpha12*(*v); 2210 y[16*(*idx)+12] += alpha13*(*v); 2211 y[16*(*idx)+13] += alpha14*(*v); 2212 y[16*(*idx)+14] += alpha15*(*v); 2213 y[16*(*idx)+15] += alpha16*(*v); 2214 idx++; v++; 2215 } 2216 } 2217 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2218 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2219 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2220 PetscFunctionReturn(0); 2221 } 2222 2223 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2224 { 2225 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2226 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2227 const PetscScalar *x,*v; 2228 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2229 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2230 PetscErrorCode ierr; 2231 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2232 PetscInt n,i,jrow,j; 2233 2234 PetscFunctionBegin; 2235 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2236 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2237 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2238 idx = a->j; 2239 v = a->a; 2240 ii = a->i; 2241 2242 for (i=0; i<m; i++) { 2243 jrow = ii[i]; 2244 n = ii[i+1] - jrow; 2245 sum1 = 0.0; 2246 sum2 = 0.0; 2247 sum3 = 0.0; 2248 sum4 = 0.0; 2249 sum5 = 0.0; 2250 sum6 = 0.0; 2251 sum7 = 0.0; 2252 sum8 = 0.0; 2253 sum9 = 0.0; 2254 sum10 = 0.0; 2255 sum11 = 0.0; 2256 sum12 = 0.0; 2257 sum13 = 0.0; 2258 sum14 = 0.0; 2259 sum15 = 0.0; 2260 sum16 = 0.0; 2261 for (j=0; j<n; j++) { 2262 sum1 += v[jrow]*x[16*idx[jrow]]; 2263 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2264 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2265 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2266 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2267 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2268 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2269 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2270 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2271 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2272 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2273 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2274 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2275 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2276 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2277 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2278 jrow++; 2279 } 2280 y[16*i] += sum1; 2281 y[16*i+1] += sum2; 2282 y[16*i+2] += sum3; 2283 y[16*i+3] += sum4; 2284 y[16*i+4] += sum5; 2285 y[16*i+5] += sum6; 2286 y[16*i+6] += sum7; 2287 y[16*i+7] += sum8; 2288 y[16*i+8] += sum9; 2289 y[16*i+9] += sum10; 2290 y[16*i+10] += sum11; 2291 y[16*i+11] += sum12; 2292 y[16*i+12] += sum13; 2293 y[16*i+13] += sum14; 2294 y[16*i+14] += sum15; 2295 y[16*i+15] += sum16; 2296 } 2297 2298 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2299 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2300 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2305 { 2306 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2307 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2308 const PetscScalar *x,*v; 2309 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2310 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2311 PetscErrorCode ierr; 2312 const PetscInt m = b->AIJ->rmap->n,*idx; 2313 PetscInt n,i; 2314 2315 PetscFunctionBegin; 2316 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2317 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2318 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2319 for (i=0; i<m; i++) { 2320 idx = a->j + a->i[i]; 2321 v = a->a + a->i[i]; 2322 n = a->i[i+1] - a->i[i]; 2323 alpha1 = x[16*i]; 2324 alpha2 = x[16*i+1]; 2325 alpha3 = x[16*i+2]; 2326 alpha4 = x[16*i+3]; 2327 alpha5 = x[16*i+4]; 2328 alpha6 = x[16*i+5]; 2329 alpha7 = x[16*i+6]; 2330 alpha8 = x[16*i+7]; 2331 alpha9 = x[16*i+8]; 2332 alpha10 = x[16*i+9]; 2333 alpha11 = x[16*i+10]; 2334 alpha12 = x[16*i+11]; 2335 alpha13 = x[16*i+12]; 2336 alpha14 = x[16*i+13]; 2337 alpha15 = x[16*i+14]; 2338 alpha16 = x[16*i+15]; 2339 while (n-->0) { 2340 y[16*(*idx)] += alpha1*(*v); 2341 y[16*(*idx)+1] += alpha2*(*v); 2342 y[16*(*idx)+2] += alpha3*(*v); 2343 y[16*(*idx)+3] += alpha4*(*v); 2344 y[16*(*idx)+4] += alpha5*(*v); 2345 y[16*(*idx)+5] += alpha6*(*v); 2346 y[16*(*idx)+6] += alpha7*(*v); 2347 y[16*(*idx)+7] += alpha8*(*v); 2348 y[16*(*idx)+8] += alpha9*(*v); 2349 y[16*(*idx)+9] += alpha10*(*v); 2350 y[16*(*idx)+10] += alpha11*(*v); 2351 y[16*(*idx)+11] += alpha12*(*v); 2352 y[16*(*idx)+12] += alpha13*(*v); 2353 y[16*(*idx)+13] += alpha14*(*v); 2354 y[16*(*idx)+14] += alpha15*(*v); 2355 y[16*(*idx)+15] += alpha16*(*v); 2356 idx++; v++; 2357 } 2358 } 2359 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr); 2360 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2361 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2362 PetscFunctionReturn(0); 2363 } 2364 2365 /*--------------------------------------------------------------------------------------------*/ 2366 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2367 { 2368 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2369 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2370 const PetscScalar *x,*v; 2371 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2372 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2373 PetscErrorCode ierr; 2374 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2375 PetscInt nonzerorow=0,n,i,jrow,j; 2376 2377 PetscFunctionBegin; 2378 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2379 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2380 idx = a->j; 2381 v = a->a; 2382 ii = a->i; 2383 2384 for (i=0; i<m; i++) { 2385 jrow = ii[i]; 2386 n = ii[i+1] - jrow; 2387 sum1 = 0.0; 2388 sum2 = 0.0; 2389 sum3 = 0.0; 2390 sum4 = 0.0; 2391 sum5 = 0.0; 2392 sum6 = 0.0; 2393 sum7 = 0.0; 2394 sum8 = 0.0; 2395 sum9 = 0.0; 2396 sum10 = 0.0; 2397 sum11 = 0.0; 2398 sum12 = 0.0; 2399 sum13 = 0.0; 2400 sum14 = 0.0; 2401 sum15 = 0.0; 2402 sum16 = 0.0; 2403 sum17 = 0.0; 2404 sum18 = 0.0; 2405 2406 nonzerorow += (n>0); 2407 for (j=0; j<n; j++) { 2408 sum1 += v[jrow]*x[18*idx[jrow]]; 2409 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2410 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2411 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2412 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2413 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2414 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2415 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2416 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2417 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2418 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2419 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2420 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2421 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2422 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2423 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2424 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2425 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2426 jrow++; 2427 } 2428 y[18*i] = sum1; 2429 y[18*i+1] = sum2; 2430 y[18*i+2] = sum3; 2431 y[18*i+3] = sum4; 2432 y[18*i+4] = sum5; 2433 y[18*i+5] = sum6; 2434 y[18*i+6] = sum7; 2435 y[18*i+7] = sum8; 2436 y[18*i+8] = sum9; 2437 y[18*i+9] = sum10; 2438 y[18*i+10] = sum11; 2439 y[18*i+11] = sum12; 2440 y[18*i+12] = sum13; 2441 y[18*i+13] = sum14; 2442 y[18*i+14] = sum15; 2443 y[18*i+15] = sum16; 2444 y[18*i+16] = sum17; 2445 y[18*i+17] = sum18; 2446 } 2447 2448 ierr = PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);CHKERRQ(ierr); 2449 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2450 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2451 PetscFunctionReturn(0); 2452 } 2453 2454 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2455 { 2456 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2457 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2458 const PetscScalar *x,*v; 2459 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2460 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2461 PetscErrorCode ierr; 2462 const PetscInt m = b->AIJ->rmap->n,*idx; 2463 PetscInt n,i; 2464 2465 PetscFunctionBegin; 2466 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2467 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2468 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2469 2470 for (i=0; i<m; i++) { 2471 idx = a->j + a->i[i]; 2472 v = a->a + a->i[i]; 2473 n = a->i[i+1] - a->i[i]; 2474 alpha1 = x[18*i]; 2475 alpha2 = x[18*i+1]; 2476 alpha3 = x[18*i+2]; 2477 alpha4 = x[18*i+3]; 2478 alpha5 = x[18*i+4]; 2479 alpha6 = x[18*i+5]; 2480 alpha7 = x[18*i+6]; 2481 alpha8 = x[18*i+7]; 2482 alpha9 = x[18*i+8]; 2483 alpha10 = x[18*i+9]; 2484 alpha11 = x[18*i+10]; 2485 alpha12 = x[18*i+11]; 2486 alpha13 = x[18*i+12]; 2487 alpha14 = x[18*i+13]; 2488 alpha15 = x[18*i+14]; 2489 alpha16 = x[18*i+15]; 2490 alpha17 = x[18*i+16]; 2491 alpha18 = x[18*i+17]; 2492 while (n-->0) { 2493 y[18*(*idx)] += alpha1*(*v); 2494 y[18*(*idx)+1] += alpha2*(*v); 2495 y[18*(*idx)+2] += alpha3*(*v); 2496 y[18*(*idx)+3] += alpha4*(*v); 2497 y[18*(*idx)+4] += alpha5*(*v); 2498 y[18*(*idx)+5] += alpha6*(*v); 2499 y[18*(*idx)+6] += alpha7*(*v); 2500 y[18*(*idx)+7] += alpha8*(*v); 2501 y[18*(*idx)+8] += alpha9*(*v); 2502 y[18*(*idx)+9] += alpha10*(*v); 2503 y[18*(*idx)+10] += alpha11*(*v); 2504 y[18*(*idx)+11] += alpha12*(*v); 2505 y[18*(*idx)+12] += alpha13*(*v); 2506 y[18*(*idx)+13] += alpha14*(*v); 2507 y[18*(*idx)+14] += alpha15*(*v); 2508 y[18*(*idx)+15] += alpha16*(*v); 2509 y[18*(*idx)+16] += alpha17*(*v); 2510 y[18*(*idx)+17] += alpha18*(*v); 2511 idx++; v++; 2512 } 2513 } 2514 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2515 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2516 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2517 PetscFunctionReturn(0); 2518 } 2519 2520 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2521 { 2522 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2523 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2524 const PetscScalar *x,*v; 2525 PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2526 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2527 PetscErrorCode ierr; 2528 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2529 PetscInt n,i,jrow,j; 2530 2531 PetscFunctionBegin; 2532 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2533 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2534 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2535 idx = a->j; 2536 v = a->a; 2537 ii = a->i; 2538 2539 for (i=0; i<m; i++) { 2540 jrow = ii[i]; 2541 n = ii[i+1] - jrow; 2542 sum1 = 0.0; 2543 sum2 = 0.0; 2544 sum3 = 0.0; 2545 sum4 = 0.0; 2546 sum5 = 0.0; 2547 sum6 = 0.0; 2548 sum7 = 0.0; 2549 sum8 = 0.0; 2550 sum9 = 0.0; 2551 sum10 = 0.0; 2552 sum11 = 0.0; 2553 sum12 = 0.0; 2554 sum13 = 0.0; 2555 sum14 = 0.0; 2556 sum15 = 0.0; 2557 sum16 = 0.0; 2558 sum17 = 0.0; 2559 sum18 = 0.0; 2560 for (j=0; j<n; j++) { 2561 sum1 += v[jrow]*x[18*idx[jrow]]; 2562 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2563 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2564 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2565 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2566 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2567 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2568 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2569 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2570 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2571 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2572 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2573 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2574 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2575 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2576 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2577 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2578 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2579 jrow++; 2580 } 2581 y[18*i] += sum1; 2582 y[18*i+1] += sum2; 2583 y[18*i+2] += sum3; 2584 y[18*i+3] += sum4; 2585 y[18*i+4] += sum5; 2586 y[18*i+5] += sum6; 2587 y[18*i+6] += sum7; 2588 y[18*i+7] += sum8; 2589 y[18*i+8] += sum9; 2590 y[18*i+9] += sum10; 2591 y[18*i+10] += sum11; 2592 y[18*i+11] += sum12; 2593 y[18*i+12] += sum13; 2594 y[18*i+13] += sum14; 2595 y[18*i+14] += sum15; 2596 y[18*i+15] += sum16; 2597 y[18*i+16] += sum17; 2598 y[18*i+17] += sum18; 2599 } 2600 2601 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2602 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2603 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2604 PetscFunctionReturn(0); 2605 } 2606 2607 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2608 { 2609 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2610 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2611 const PetscScalar *x,*v; 2612 PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2613 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2614 PetscErrorCode ierr; 2615 const PetscInt m = b->AIJ->rmap->n,*idx; 2616 PetscInt n,i; 2617 2618 PetscFunctionBegin; 2619 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2620 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2621 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2622 for (i=0; i<m; i++) { 2623 idx = a->j + a->i[i]; 2624 v = a->a + a->i[i]; 2625 n = a->i[i+1] - a->i[i]; 2626 alpha1 = x[18*i]; 2627 alpha2 = x[18*i+1]; 2628 alpha3 = x[18*i+2]; 2629 alpha4 = x[18*i+3]; 2630 alpha5 = x[18*i+4]; 2631 alpha6 = x[18*i+5]; 2632 alpha7 = x[18*i+6]; 2633 alpha8 = x[18*i+7]; 2634 alpha9 = x[18*i+8]; 2635 alpha10 = x[18*i+9]; 2636 alpha11 = x[18*i+10]; 2637 alpha12 = x[18*i+11]; 2638 alpha13 = x[18*i+12]; 2639 alpha14 = x[18*i+13]; 2640 alpha15 = x[18*i+14]; 2641 alpha16 = x[18*i+15]; 2642 alpha17 = x[18*i+16]; 2643 alpha18 = x[18*i+17]; 2644 while (n-->0) { 2645 y[18*(*idx)] += alpha1*(*v); 2646 y[18*(*idx)+1] += alpha2*(*v); 2647 y[18*(*idx)+2] += alpha3*(*v); 2648 y[18*(*idx)+3] += alpha4*(*v); 2649 y[18*(*idx)+4] += alpha5*(*v); 2650 y[18*(*idx)+5] += alpha6*(*v); 2651 y[18*(*idx)+6] += alpha7*(*v); 2652 y[18*(*idx)+7] += alpha8*(*v); 2653 y[18*(*idx)+8] += alpha9*(*v); 2654 y[18*(*idx)+9] += alpha10*(*v); 2655 y[18*(*idx)+10] += alpha11*(*v); 2656 y[18*(*idx)+11] += alpha12*(*v); 2657 y[18*(*idx)+12] += alpha13*(*v); 2658 y[18*(*idx)+13] += alpha14*(*v); 2659 y[18*(*idx)+14] += alpha15*(*v); 2660 y[18*(*idx)+15] += alpha16*(*v); 2661 y[18*(*idx)+16] += alpha17*(*v); 2662 y[18*(*idx)+17] += alpha18*(*v); 2663 idx++; v++; 2664 } 2665 } 2666 ierr = PetscLogFlops(36.0*a->nz);CHKERRQ(ierr); 2667 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2668 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2669 PetscFunctionReturn(0); 2670 } 2671 2672 PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2673 { 2674 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2675 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2676 const PetscScalar *x,*v; 2677 PetscScalar *y,*sums; 2678 PetscErrorCode ierr; 2679 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2680 PetscInt n,i,jrow,j,dof = b->dof,k; 2681 2682 PetscFunctionBegin; 2683 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2684 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2685 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2686 idx = a->j; 2687 v = a->a; 2688 ii = a->i; 2689 2690 for (i=0; i<m; i++) { 2691 jrow = ii[i]; 2692 n = ii[i+1] - jrow; 2693 sums = y + dof*i; 2694 for (j=0; j<n; j++) { 2695 for (k=0; k<dof; k++) { 2696 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2697 } 2698 jrow++; 2699 } 2700 } 2701 2702 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2703 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2704 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2705 PetscFunctionReturn(0); 2706 } 2707 2708 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2709 { 2710 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2711 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2712 const PetscScalar *x,*v; 2713 PetscScalar *y,*sums; 2714 PetscErrorCode ierr; 2715 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 2716 PetscInt n,i,jrow,j,dof = b->dof,k; 2717 2718 PetscFunctionBegin; 2719 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2720 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2721 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2722 idx = a->j; 2723 v = a->a; 2724 ii = a->i; 2725 2726 for (i=0; i<m; i++) { 2727 jrow = ii[i]; 2728 n = ii[i+1] - jrow; 2729 sums = y + dof*i; 2730 for (j=0; j<n; j++) { 2731 for (k=0; k<dof; k++) { 2732 sums[k] += v[jrow]*x[dof*idx[jrow]+k]; 2733 } 2734 jrow++; 2735 } 2736 } 2737 2738 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2739 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2740 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2741 PetscFunctionReturn(0); 2742 } 2743 2744 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy) 2745 { 2746 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2747 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2748 const PetscScalar *x,*v,*alpha; 2749 PetscScalar *y; 2750 PetscErrorCode ierr; 2751 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2752 PetscInt n,i,k; 2753 2754 PetscFunctionBegin; 2755 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2756 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 2757 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2758 for (i=0; i<m; i++) { 2759 idx = a->j + a->i[i]; 2760 v = a->a + a->i[i]; 2761 n = a->i[i+1] - a->i[i]; 2762 alpha = x + dof*i; 2763 while (n-->0) { 2764 for (k=0; k<dof; k++) { 2765 y[dof*(*idx)+k] += alpha[k]*(*v); 2766 } 2767 idx++; v++; 2768 } 2769 } 2770 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2771 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2772 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2773 PetscFunctionReturn(0); 2774 } 2775 2776 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 2777 { 2778 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2779 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2780 const PetscScalar *x,*v,*alpha; 2781 PetscScalar *y; 2782 PetscErrorCode ierr; 2783 const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof; 2784 PetscInt n,i,k; 2785 2786 PetscFunctionBegin; 2787 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2788 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 2789 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2790 for (i=0; i<m; i++) { 2791 idx = a->j + a->i[i]; 2792 v = a->a + a->i[i]; 2793 n = a->i[i+1] - a->i[i]; 2794 alpha = x + dof*i; 2795 while (n-->0) { 2796 for (k=0; k<dof; k++) { 2797 y[dof*(*idx)+k] += alpha[k]*(*v); 2798 } 2799 idx++; v++; 2800 } 2801 } 2802 ierr = PetscLogFlops(2.0*dof*a->nz);CHKERRQ(ierr); 2803 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 2804 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2805 PetscFunctionReturn(0); 2806 } 2807 2808 /*===================================================================================*/ 2809 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2810 { 2811 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2812 PetscErrorCode ierr; 2813 2814 PetscFunctionBegin; 2815 /* start the scatter */ 2816 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2817 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2818 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2819 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2820 PetscFunctionReturn(0); 2821 } 2822 2823 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2824 { 2825 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2826 PetscErrorCode ierr; 2827 2828 PetscFunctionBegin; 2829 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2830 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2831 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2832 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2833 PetscFunctionReturn(0); 2834 } 2835 2836 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2837 { 2838 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2839 PetscErrorCode ierr; 2840 2841 PetscFunctionBegin; 2842 /* start the scatter */ 2843 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2844 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2845 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2846 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2847 PetscFunctionReturn(0); 2848 } 2849 2850 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2851 { 2852 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2853 PetscErrorCode ierr; 2854 2855 PetscFunctionBegin; 2856 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2857 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2858 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2859 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2860 PetscFunctionReturn(0); 2861 } 2862 2863 /* ----------------------------------------------------------------*/ 2864 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2865 { 2866 PetscErrorCode ierr; 2867 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2868 Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data; 2869 Mat P =pp->AIJ; 2870 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2871 PetscInt *pti,*ptj,*ptJ; 2872 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2873 const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof; 2874 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn; 2875 MatScalar *ca; 2876 const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj; 2877 2878 PetscFunctionBegin; 2879 /* Get ij structure of P^T */ 2880 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2881 2882 cn = pn*ppdof; 2883 /* Allocate ci array, arrays for fill computation and */ 2884 /* free space for accumulating nonzero column info */ 2885 ierr = PetscMalloc1(cn+1,&ci);CHKERRQ(ierr); 2886 ci[0] = 0; 2887 2888 /* Work arrays for rows of P^T*A */ 2889 ierr = PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);CHKERRQ(ierr); 2890 ierr = PetscMemzero(ptadenserow,an*sizeof(PetscInt));CHKERRQ(ierr); 2891 ierr = PetscMemzero(denserow,cn*sizeof(PetscInt));CHKERRQ(ierr); 2892 2893 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2894 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2895 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2896 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);CHKERRQ(ierr); 2897 current_space = free_space; 2898 2899 /* Determine symbolic info for each row of C: */ 2900 for (i=0; i<pn; i++) { 2901 ptnzi = pti[i+1] - pti[i]; 2902 ptJ = ptj + pti[i]; 2903 for (dof=0; dof<ppdof; dof++) { 2904 ptanzi = 0; 2905 /* Determine symbolic row of PtA: */ 2906 for (j=0; j<ptnzi; j++) { 2907 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2908 arow = ptJ[j]*ppdof + dof; 2909 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2910 anzj = ai[arow+1] - ai[arow]; 2911 ajj = aj + ai[arow]; 2912 for (k=0; k<anzj; k++) { 2913 if (!ptadenserow[ajj[k]]) { 2914 ptadenserow[ajj[k]] = -1; 2915 ptasparserow[ptanzi++] = ajj[k]; 2916 } 2917 } 2918 } 2919 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2920 ptaj = ptasparserow; 2921 cnzi = 0; 2922 for (j=0; j<ptanzi; j++) { 2923 /* Get offset within block of P */ 2924 pshift = *ptaj%ppdof; 2925 /* Get block row of P */ 2926 prow = (*ptaj++)/ppdof; /* integer division */ 2927 /* P has same number of nonzeros per row as the compressed form */ 2928 pnzj = pi[prow+1] - pi[prow]; 2929 pjj = pj + pi[prow]; 2930 for (k=0;k<pnzj;k++) { 2931 /* Locations in C are shifted by the offset within the block */ 2932 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2933 if (!denserow[pjj[k]*ppdof+pshift]) { 2934 denserow[pjj[k]*ppdof+pshift] = -1; 2935 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 2936 } 2937 } 2938 } 2939 2940 /* sort sparserow */ 2941 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 2942 2943 /* If free space is not available, make more free space */ 2944 /* Double the amount of total space in the list */ 2945 if (current_space->local_remaining<cnzi) { 2946 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 2947 } 2948 2949 /* Copy data into free space, and zero out denserows */ 2950 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 2951 2952 current_space->array += cnzi; 2953 current_space->local_used += cnzi; 2954 current_space->local_remaining -= cnzi; 2955 2956 for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 2957 for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0; 2958 2959 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2960 /* For now, we will recompute what is needed. */ 2961 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 2962 } 2963 } 2964 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2965 /* Allocate space for cj, initialize cj, and */ 2966 /* destroy list of free space and other temporary array(s) */ 2967 ierr = PetscMalloc1(ci[cn]+1,&cj);CHKERRQ(ierr); 2968 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2969 ierr = PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);CHKERRQ(ierr); 2970 2971 /* Allocate space for ca */ 2972 ierr = PetscCalloc1(ci[cn]+1,&ca);CHKERRQ(ierr); 2973 2974 /* put together the new matrix */ 2975 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 2976 ierr = MatSetBlockSize(*C,pp->dof);CHKERRQ(ierr); 2977 2978 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2979 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2980 c = (Mat_SeqAIJ*)((*C)->data); 2981 c->free_a = PETSC_TRUE; 2982 c->free_ij = PETSC_TRUE; 2983 c->nonew = 0; 2984 2985 (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 2986 2987 /* Clean up. */ 2988 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2989 PetscFunctionReturn(0); 2990 } 2991 2992 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 2993 { 2994 /* This routine requires testing -- first draft only */ 2995 PetscErrorCode ierr; 2996 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2997 Mat P =pp->AIJ; 2998 Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data; 2999 Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data; 3000 Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data; 3001 const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj; 3002 const PetscInt *ci=c->i,*cj=c->j,*cjj; 3003 const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof; 3004 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense; 3005 const MatScalar *aa=a->a,*pa=p->a,*pA,*paj; 3006 MatScalar *ca=c->a,*caj,*apa; 3007 3008 PetscFunctionBegin; 3009 /* Allocate temporary array for storage of one row of A*P */ 3010 ierr = PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);CHKERRQ(ierr); 3011 3012 /* Clear old values in C */ 3013 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 3014 3015 for (i=0; i<am; i++) { 3016 /* Form sparse row of A*P */ 3017 anzi = ai[i+1] - ai[i]; 3018 apnzj = 0; 3019 for (j=0; j<anzi; j++) { 3020 /* Get offset within block of P */ 3021 pshift = *aj%ppdof; 3022 /* Get block row of P */ 3023 prow = *aj++/ppdof; /* integer division */ 3024 pnzj = pi[prow+1] - pi[prow]; 3025 pjj = pj + pi[prow]; 3026 paj = pa + pi[prow]; 3027 for (k=0; k<pnzj; k++) { 3028 poffset = pjj[k]*ppdof+pshift; 3029 if (!apjdense[poffset]) { 3030 apjdense[poffset] = -1; 3031 apj[apnzj++] = poffset; 3032 } 3033 apa[poffset] += (*aa)*paj[k]; 3034 } 3035 ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr); 3036 aa++; 3037 } 3038 3039 /* Sort the j index array for quick sparse axpy. */ 3040 /* Note: a array does not need sorting as it is in dense storage locations. */ 3041 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 3042 3043 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 3044 prow = i/ppdof; /* integer division */ 3045 pshift = i%ppdof; 3046 poffset = pi[prow]; 3047 pnzi = pi[prow+1] - poffset; 3048 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 3049 pJ = pj+poffset; 3050 pA = pa+poffset; 3051 for (j=0; j<pnzi; j++) { 3052 crow = (*pJ)*ppdof+pshift; 3053 cjj = cj + ci[crow]; 3054 caj = ca + ci[crow]; 3055 pJ++; 3056 /* Perform sparse axpy operation. Note cjj includes apj. */ 3057 for (k=0,nextap=0; nextap<apnzj; k++) { 3058 if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]]; 3059 } 3060 ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr); 3061 pA++; 3062 } 3063 3064 /* Zero the current row info for A*P */ 3065 for (j=0; j<apnzj; j++) { 3066 apa[apj[j]] = 0.; 3067 apjdense[apj[j]] = 0; 3068 } 3069 } 3070 3071 /* Assemble the final matrix and clean up */ 3072 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3073 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3074 ierr = PetscFree3(apa,apj,apjdense);CHKERRQ(ierr); 3075 PetscFunctionReturn(0); 3076 } 3077 3078 PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3079 { 3080 PetscErrorCode ierr; 3081 3082 PetscFunctionBegin; 3083 if (scall == MAT_INITIAL_MATRIX) { 3084 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3085 ierr = MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);CHKERRQ(ierr); 3086 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3087 } 3088 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3089 ierr = MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);CHKERRQ(ierr); 3090 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3091 PetscFunctionReturn(0); 3092 } 3093 3094 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 3095 { 3096 PetscErrorCode ierr; 3097 3098 PetscFunctionBegin; 3099 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 3100 ierr = MatConvert(PP,MATMPIAIJ,MAT_INPLACE_MATRIX,&PP);CHKERRQ(ierr); 3101 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 3102 PetscFunctionReturn(0); 3103 } 3104 3105 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 3106 { 3107 PetscFunctionBegin; 3108 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3109 PetscFunctionReturn(0); 3110 } 3111 3112 PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3113 { 3114 PetscErrorCode ierr; 3115 3116 PetscFunctionBegin; 3117 if (scall == MAT_INITIAL_MATRIX) { 3118 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3119 ierr = MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);CHKERRQ(ierr); 3120 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 3121 } 3122 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3123 ierr = ((*C)->ops->ptapnumeric)(A,P,*C);CHKERRQ(ierr); 3124 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 3125 PetscFunctionReturn(0); 3126 } 3127 3128 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3129 { 3130 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 3131 Mat a = b->AIJ,B; 3132 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 3133 PetscErrorCode ierr; 3134 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 3135 PetscInt *cols; 3136 PetscScalar *vals; 3137 3138 PetscFunctionBegin; 3139 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 3140 ierr = PetscMalloc1(dof*m,&ilen);CHKERRQ(ierr); 3141 for (i=0; i<m; i++) { 3142 nmax = PetscMax(nmax,aij->ilen[i]); 3143 for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i]; 3144 } 3145 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 3146 ierr = PetscFree(ilen);CHKERRQ(ierr); 3147 ierr = PetscMalloc1(nmax,&icols);CHKERRQ(ierr); 3148 ii = 0; 3149 for (i=0; i<m; i++) { 3150 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3151 for (j=0; j<dof; j++) { 3152 for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j; 3153 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3154 ii++; 3155 } 3156 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3157 } 3158 ierr = PetscFree(icols);CHKERRQ(ierr); 3159 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3160 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3161 3162 if (reuse == MAT_INPLACE_MATRIX) { 3163 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3164 } else { 3165 *newmat = B; 3166 } 3167 PetscFunctionReturn(0); 3168 } 3169 3170 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3171 3172 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 3173 { 3174 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 3175 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 3176 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 3177 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 3178 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 3179 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 3180 PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0; 3181 PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL; 3182 PetscInt rstart,cstart,*garray,ii,k; 3183 PetscErrorCode ierr; 3184 PetscScalar *vals,*ovals; 3185 3186 PetscFunctionBegin; 3187 ierr = PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);CHKERRQ(ierr); 3188 for (i=0; i<A->rmap->n/dof; i++) { 3189 nmax = PetscMax(nmax,AIJ->ilen[i]); 3190 onmax = PetscMax(onmax,OAIJ->ilen[i]); 3191 for (j=0; j<dof; j++) { 3192 dnz[dof*i+j] = AIJ->ilen[i]; 3193 onz[dof*i+j] = OAIJ->ilen[i]; 3194 } 3195 } 3196 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 3197 ierr = MatSetBlockSize(B,dof);CHKERRQ(ierr); 3198 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 3199 3200 ierr = PetscMalloc2(nmax,&icols,onmax,&oicols);CHKERRQ(ierr); 3201 rstart = dof*maij->A->rmap->rstart; 3202 cstart = dof*maij->A->cmap->rstart; 3203 garray = mpiaij->garray; 3204 3205 ii = rstart; 3206 for (i=0; i<A->rmap->n/dof; i++) { 3207 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3208 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3209 for (j=0; j<dof; j++) { 3210 for (k=0; k<ncols; k++) { 3211 icols[k] = cstart + dof*cols[k]+j; 3212 } 3213 for (k=0; k<oncols; k++) { 3214 oicols[k] = dof*garray[ocols[k]]+j; 3215 } 3216 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 3217 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 3218 ii++; 3219 } 3220 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 3221 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 3222 } 3223 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 3224 3225 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3226 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3227 3228 if (reuse == MAT_INPLACE_MATRIX) { 3229 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3230 ((PetscObject)A)->refct = 1; 3231 3232 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3233 3234 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3235 } else { 3236 *newmat = B; 3237 } 3238 PetscFunctionReturn(0); 3239 } 3240 3241 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 3242 { 3243 PetscErrorCode ierr; 3244 Mat A; 3245 3246 PetscFunctionBegin; 3247 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 3248 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 3249 ierr = MatDestroy(&A);CHKERRQ(ierr); 3250 PetscFunctionReturn(0); 3251 } 3252 3253 /* ---------------------------------------------------------------------------------- */ 3254 /*@ 3255 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3256 operations for multicomponent problems. It interpolates each component the same 3257 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3258 and MATMPIAIJ for distributed matrices. 3259 3260 Collective 3261 3262 Input Parameters: 3263 + A - the AIJ matrix describing the action on blocks 3264 - dof - the block size (number of components per node) 3265 3266 Output Parameter: 3267 . maij - the new MAIJ matrix 3268 3269 Operations provided: 3270 + MatMult 3271 . MatMultTranspose 3272 . MatMultAdd 3273 . MatMultTransposeAdd 3274 - MatView 3275 3276 Level: advanced 3277 3278 .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ 3279 @*/ 3280 PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 3281 { 3282 PetscErrorCode ierr; 3283 PetscMPIInt size; 3284 PetscInt n; 3285 Mat B; 3286 3287 PetscFunctionBegin; 3288 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3289 3290 if (dof == 1) *maij = A; 3291 else { 3292 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3293 ierr = MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);CHKERRQ(ierr); 3294 ierr = PetscLayoutSetBlockSize(B->rmap,dof);CHKERRQ(ierr); 3295 ierr = PetscLayoutSetBlockSize(B->cmap,dof);CHKERRQ(ierr); 3296 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3297 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3298 3299 B->assembled = PETSC_TRUE; 3300 3301 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3302 if (size == 1) { 3303 Mat_SeqMAIJ *b; 3304 3305 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 3306 3307 B->ops->setup = NULL; 3308 B->ops->destroy = MatDestroy_SeqMAIJ; 3309 B->ops->view = MatView_SeqMAIJ; 3310 b = (Mat_SeqMAIJ*)B->data; 3311 b->dof = dof; 3312 b->AIJ = A; 3313 3314 if (dof == 2) { 3315 B->ops->mult = MatMult_SeqMAIJ_2; 3316 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3317 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3318 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3319 } else if (dof == 3) { 3320 B->ops->mult = MatMult_SeqMAIJ_3; 3321 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3322 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3323 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3324 } else if (dof == 4) { 3325 B->ops->mult = MatMult_SeqMAIJ_4; 3326 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3327 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3328 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3329 } else if (dof == 5) { 3330 B->ops->mult = MatMult_SeqMAIJ_5; 3331 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3332 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3333 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3334 } else if (dof == 6) { 3335 B->ops->mult = MatMult_SeqMAIJ_6; 3336 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3337 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3338 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3339 } else if (dof == 7) { 3340 B->ops->mult = MatMult_SeqMAIJ_7; 3341 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3342 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3343 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3344 } else if (dof == 8) { 3345 B->ops->mult = MatMult_SeqMAIJ_8; 3346 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3347 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3348 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3349 } else if (dof == 9) { 3350 B->ops->mult = MatMult_SeqMAIJ_9; 3351 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3352 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3353 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3354 } else if (dof == 10) { 3355 B->ops->mult = MatMult_SeqMAIJ_10; 3356 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3357 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3358 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3359 } else if (dof == 11) { 3360 B->ops->mult = MatMult_SeqMAIJ_11; 3361 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3362 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3363 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3364 } else if (dof == 16) { 3365 B->ops->mult = MatMult_SeqMAIJ_16; 3366 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3367 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3368 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3369 } else if (dof == 18) { 3370 B->ops->mult = MatMult_SeqMAIJ_18; 3371 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3372 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3373 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3374 } else { 3375 B->ops->mult = MatMult_SeqMAIJ_N; 3376 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3377 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3378 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3379 } 3380 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 3381 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3382 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijperm_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3383 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijmkl_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);CHKERRQ(ierr); 3384 } else { 3385 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 3386 Mat_MPIMAIJ *b; 3387 IS from,to; 3388 Vec gvec; 3389 3390 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 3391 3392 B->ops->setup = NULL; 3393 B->ops->destroy = MatDestroy_MPIMAIJ; 3394 B->ops->view = MatView_MPIMAIJ; 3395 3396 b = (Mat_MPIMAIJ*)B->data; 3397 b->dof = dof; 3398 b->A = A; 3399 3400 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 3401 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 3402 3403 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 3404 ierr = VecCreate(PETSC_COMM_SELF,&b->w);CHKERRQ(ierr); 3405 ierr = VecSetSizes(b->w,n*dof,n*dof);CHKERRQ(ierr); 3406 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 3407 ierr = VecSetType(b->w,VECSEQ);CHKERRQ(ierr); 3408 3409 /* create two temporary Index sets for build scatter gather */ 3410 ierr = ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 3411 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 3412 3413 /* create temporary global vector to generate scatter context */ 3414 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 3415 3416 /* generate the scatter context */ 3417 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3418 3419 ierr = ISDestroy(&from);CHKERRQ(ierr); 3420 ierr = ISDestroy(&to);CHKERRQ(ierr); 3421 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 3422 3423 B->ops->mult = MatMult_MPIMAIJ_dof; 3424 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3425 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3426 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3427 3428 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3429 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);CHKERRQ(ierr); 3430 } 3431 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3432 ierr = MatSetUp(B);CHKERRQ(ierr); 3433 *maij = B; 3434 ierr = MatViewFromOptions(B,NULL,"-mat_view");CHKERRQ(ierr); 3435 } 3436 PetscFunctionReturn(0); 3437 } 3438