1 2 /* 3 Defines the basic matrix operations for the MAIJ matrix storage format. 4 This format is used for restriction and interpolation operations for 5 multicomponent problems. It interpolates each component the same way 6 independently. 7 8 We provide: 9 MatMult() 10 MatMultTranspose() 11 MatMultTransposeAdd() 12 MatMultAdd() 13 and 14 MatCreateMAIJ(Mat,dof,Mat*) 15 16 This single directory handles both the sequential and parallel codes 17 */ 18 19 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 20 #include <../src/mat/utils/freespace.h> 21 22 /*@ 23 MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix 24 25 Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel 26 27 Input Parameter: 28 . A - the MAIJ matrix 29 30 Output Parameter: 31 . B - the AIJ matrix 32 33 Level: advanced 34 35 Notes: 36 The reference count on the AIJ matrix is not increased so you should not destroy it. 37 38 .seealso: `MatCreateMAIJ()` 39 @*/ 40 PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) { 41 PetscBool ismpimaij, isseqmaij; 42 43 PetscFunctionBegin; 44 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 45 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 46 if (ismpimaij) { 47 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 48 49 *B = b->A; 50 } else if (isseqmaij) { 51 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 52 53 *B = b->AIJ; 54 } else { 55 *B = A; 56 } 57 PetscFunctionReturn(0); 58 } 59 60 /*@ 61 MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size 62 63 Logically Collective 64 65 Input Parameters: 66 + A - the MAIJ matrix 67 - dof - the block size for the new matrix 68 69 Output Parameter: 70 . B - the new MAIJ matrix 71 72 Level: advanced 73 74 .seealso: `MatCreateMAIJ()` 75 @*/ 76 PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) { 77 Mat Aij = NULL; 78 79 PetscFunctionBegin; 80 PetscValidLogicalCollectiveInt(A, dof, 2); 81 PetscCall(MatMAIJGetAIJ(A, &Aij)); 82 PetscCall(MatCreateMAIJ(Aij, dof, B)); 83 PetscFunctionReturn(0); 84 } 85 86 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) { 87 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 88 89 PetscFunctionBegin; 90 PetscCall(MatDestroy(&b->AIJ)); 91 PetscCall(PetscFree(A->data)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 93 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 94 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 95 PetscFunctionReturn(0); 96 } 97 98 PetscErrorCode MatSetUp_MAIJ(Mat A) { 99 PetscFunctionBegin; 100 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 101 } 102 103 PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) { 104 Mat B; 105 106 PetscFunctionBegin; 107 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 108 PetscCall(MatView(B, viewer)); 109 PetscCall(MatDestroy(&B)); 110 PetscFunctionReturn(0); 111 } 112 113 PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) { 114 Mat B; 115 116 PetscFunctionBegin; 117 PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 118 PetscCall(MatView(B, viewer)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(0); 121 } 122 123 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) { 124 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 125 126 PetscFunctionBegin; 127 PetscCall(MatDestroy(&b->AIJ)); 128 PetscCall(MatDestroy(&b->OAIJ)); 129 PetscCall(MatDestroy(&b->A)); 130 PetscCall(VecScatterDestroy(&b->ctx)); 131 PetscCall(VecDestroy(&b->w)); 132 PetscCall(PetscFree(A->data)); 133 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 134 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 135 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 136 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 137 PetscFunctionReturn(0); 138 } 139 140 /*MC 141 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 142 multicomponent problems, interpolating or restricting each component the same way independently. 143 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 144 145 Operations provided: 146 .vb 147 MatMult 148 MatMultTranspose 149 MatMultAdd 150 MatMultTransposeAdd 151 .ve 152 153 Level: advanced 154 155 .seealso: `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 156 M*/ 157 158 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) { 159 Mat_MPIMAIJ *b; 160 PetscMPIInt size; 161 162 PetscFunctionBegin; 163 PetscCall(PetscNewLog(A, &b)); 164 A->data = (void *)b; 165 166 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 167 168 A->ops->setup = MatSetUp_MAIJ; 169 170 b->AIJ = NULL; 171 b->dof = 0; 172 b->OAIJ = NULL; 173 b->ctx = NULL; 174 b->w = NULL; 175 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 176 if (size == 1) { 177 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 178 } else { 179 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 180 } 181 A->preallocated = PETSC_TRUE; 182 A->assembled = PETSC_TRUE; 183 PetscFunctionReturn(0); 184 } 185 186 /* --------------------------------------------------------------------------------------*/ 187 PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy) { 188 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 189 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 190 const PetscScalar *x, *v; 191 PetscScalar *y, sum1, sum2; 192 PetscInt nonzerorow = 0, n, i, jrow, j; 193 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 194 195 PetscFunctionBegin; 196 PetscCall(VecGetArrayRead(xx, &x)); 197 PetscCall(VecGetArray(yy, &y)); 198 idx = a->j; 199 v = a->a; 200 ii = a->i; 201 202 for (i = 0; i < m; i++) { 203 jrow = ii[i]; 204 n = ii[i + 1] - jrow; 205 sum1 = 0.0; 206 sum2 = 0.0; 207 208 nonzerorow += (n > 0); 209 for (j = 0; j < n; j++) { 210 sum1 += v[jrow] * x[2 * idx[jrow]]; 211 sum2 += v[jrow] * x[2 * idx[jrow] + 1]; 212 jrow++; 213 } 214 y[2 * i] = sum1; 215 y[2 * i + 1] = sum2; 216 } 217 218 PetscCall(PetscLogFlops(4.0 * a->nz - 2.0 * nonzerorow)); 219 PetscCall(VecRestoreArrayRead(xx, &x)); 220 PetscCall(VecRestoreArray(yy, &y)); 221 PetscFunctionReturn(0); 222 } 223 224 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy) { 225 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 226 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 227 const PetscScalar *x, *v; 228 PetscScalar *y, alpha1, alpha2; 229 PetscInt n, i; 230 const PetscInt m = b->AIJ->rmap->n, *idx; 231 232 PetscFunctionBegin; 233 PetscCall(VecSet(yy, 0.0)); 234 PetscCall(VecGetArrayRead(xx, &x)); 235 PetscCall(VecGetArray(yy, &y)); 236 237 for (i = 0; i < m; i++) { 238 idx = a->j + a->i[i]; 239 v = a->a + a->i[i]; 240 n = a->i[i + 1] - a->i[i]; 241 alpha1 = x[2 * i]; 242 alpha2 = x[2 * i + 1]; 243 while (n-- > 0) { 244 y[2 * (*idx)] += alpha1 * (*v); 245 y[2 * (*idx) + 1] += alpha2 * (*v); 246 idx++; 247 v++; 248 } 249 } 250 PetscCall(PetscLogFlops(4.0 * a->nz)); 251 PetscCall(VecRestoreArrayRead(xx, &x)); 252 PetscCall(VecRestoreArray(yy, &y)); 253 PetscFunctionReturn(0); 254 } 255 256 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) { 257 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 258 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 259 const PetscScalar *x, *v; 260 PetscScalar *y, sum1, sum2; 261 PetscInt n, i, jrow, j; 262 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 263 264 PetscFunctionBegin; 265 if (yy != zz) PetscCall(VecCopy(yy, zz)); 266 PetscCall(VecGetArrayRead(xx, &x)); 267 PetscCall(VecGetArray(zz, &y)); 268 idx = a->j; 269 v = a->a; 270 ii = a->i; 271 272 for (i = 0; i < m; i++) { 273 jrow = ii[i]; 274 n = ii[i + 1] - jrow; 275 sum1 = 0.0; 276 sum2 = 0.0; 277 for (j = 0; j < n; j++) { 278 sum1 += v[jrow] * x[2 * idx[jrow]]; 279 sum2 += v[jrow] * x[2 * idx[jrow] + 1]; 280 jrow++; 281 } 282 y[2 * i] += sum1; 283 y[2 * i + 1] += sum2; 284 } 285 286 PetscCall(PetscLogFlops(4.0 * a->nz)); 287 PetscCall(VecRestoreArrayRead(xx, &x)); 288 PetscCall(VecRestoreArray(zz, &y)); 289 PetscFunctionReturn(0); 290 } 291 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) { 292 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 293 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 294 const PetscScalar *x, *v; 295 PetscScalar *y, alpha1, alpha2; 296 PetscInt n, i; 297 const PetscInt m = b->AIJ->rmap->n, *idx; 298 299 PetscFunctionBegin; 300 if (yy != zz) PetscCall(VecCopy(yy, zz)); 301 PetscCall(VecGetArrayRead(xx, &x)); 302 PetscCall(VecGetArray(zz, &y)); 303 304 for (i = 0; i < m; i++) { 305 idx = a->j + a->i[i]; 306 v = a->a + a->i[i]; 307 n = a->i[i + 1] - a->i[i]; 308 alpha1 = x[2 * i]; 309 alpha2 = x[2 * i + 1]; 310 while (n-- > 0) { 311 y[2 * (*idx)] += alpha1 * (*v); 312 y[2 * (*idx) + 1] += alpha2 * (*v); 313 idx++; 314 v++; 315 } 316 } 317 PetscCall(PetscLogFlops(4.0 * a->nz)); 318 PetscCall(VecRestoreArrayRead(xx, &x)); 319 PetscCall(VecRestoreArray(zz, &y)); 320 PetscFunctionReturn(0); 321 } 322 /* --------------------------------------------------------------------------------------*/ 323 PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy) { 324 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 325 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 326 const PetscScalar *x, *v; 327 PetscScalar *y, sum1, sum2, sum3; 328 PetscInt nonzerorow = 0, n, i, jrow, j; 329 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 330 331 PetscFunctionBegin; 332 PetscCall(VecGetArrayRead(xx, &x)); 333 PetscCall(VecGetArray(yy, &y)); 334 idx = a->j; 335 v = a->a; 336 ii = a->i; 337 338 for (i = 0; i < m; i++) { 339 jrow = ii[i]; 340 n = ii[i + 1] - jrow; 341 sum1 = 0.0; 342 sum2 = 0.0; 343 sum3 = 0.0; 344 345 nonzerorow += (n > 0); 346 for (j = 0; j < n; j++) { 347 sum1 += v[jrow] * x[3 * idx[jrow]]; 348 sum2 += v[jrow] * x[3 * idx[jrow] + 1]; 349 sum3 += v[jrow] * x[3 * idx[jrow] + 2]; 350 jrow++; 351 } 352 y[3 * i] = sum1; 353 y[3 * i + 1] = sum2; 354 y[3 * i + 2] = sum3; 355 } 356 357 PetscCall(PetscLogFlops(6.0 * a->nz - 3.0 * nonzerorow)); 358 PetscCall(VecRestoreArrayRead(xx, &x)); 359 PetscCall(VecRestoreArray(yy, &y)); 360 PetscFunctionReturn(0); 361 } 362 363 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy) { 364 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 365 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 366 const PetscScalar *x, *v; 367 PetscScalar *y, alpha1, alpha2, alpha3; 368 PetscInt n, i; 369 const PetscInt m = b->AIJ->rmap->n, *idx; 370 371 PetscFunctionBegin; 372 PetscCall(VecSet(yy, 0.0)); 373 PetscCall(VecGetArrayRead(xx, &x)); 374 PetscCall(VecGetArray(yy, &y)); 375 376 for (i = 0; i < m; i++) { 377 idx = a->j + a->i[i]; 378 v = a->a + a->i[i]; 379 n = a->i[i + 1] - a->i[i]; 380 alpha1 = x[3 * i]; 381 alpha2 = x[3 * i + 1]; 382 alpha3 = x[3 * i + 2]; 383 while (n-- > 0) { 384 y[3 * (*idx)] += alpha1 * (*v); 385 y[3 * (*idx) + 1] += alpha2 * (*v); 386 y[3 * (*idx) + 2] += alpha3 * (*v); 387 idx++; 388 v++; 389 } 390 } 391 PetscCall(PetscLogFlops(6.0 * a->nz)); 392 PetscCall(VecRestoreArrayRead(xx, &x)); 393 PetscCall(VecRestoreArray(yy, &y)); 394 PetscFunctionReturn(0); 395 } 396 397 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) { 398 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 399 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 400 const PetscScalar *x, *v; 401 PetscScalar *y, sum1, sum2, sum3; 402 PetscInt n, i, jrow, j; 403 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 404 405 PetscFunctionBegin; 406 if (yy != zz) PetscCall(VecCopy(yy, zz)); 407 PetscCall(VecGetArrayRead(xx, &x)); 408 PetscCall(VecGetArray(zz, &y)); 409 idx = a->j; 410 v = a->a; 411 ii = a->i; 412 413 for (i = 0; i < m; i++) { 414 jrow = ii[i]; 415 n = ii[i + 1] - jrow; 416 sum1 = 0.0; 417 sum2 = 0.0; 418 sum3 = 0.0; 419 for (j = 0; j < n; j++) { 420 sum1 += v[jrow] * x[3 * idx[jrow]]; 421 sum2 += v[jrow] * x[3 * idx[jrow] + 1]; 422 sum3 += v[jrow] * x[3 * idx[jrow] + 2]; 423 jrow++; 424 } 425 y[3 * i] += sum1; 426 y[3 * i + 1] += sum2; 427 y[3 * i + 2] += sum3; 428 } 429 430 PetscCall(PetscLogFlops(6.0 * a->nz)); 431 PetscCall(VecRestoreArrayRead(xx, &x)); 432 PetscCall(VecRestoreArray(zz, &y)); 433 PetscFunctionReturn(0); 434 } 435 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) { 436 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 437 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 438 const PetscScalar *x, *v; 439 PetscScalar *y, alpha1, alpha2, alpha3; 440 PetscInt n, i; 441 const PetscInt m = b->AIJ->rmap->n, *idx; 442 443 PetscFunctionBegin; 444 if (yy != zz) PetscCall(VecCopy(yy, zz)); 445 PetscCall(VecGetArrayRead(xx, &x)); 446 PetscCall(VecGetArray(zz, &y)); 447 for (i = 0; i < m; i++) { 448 idx = a->j + a->i[i]; 449 v = a->a + a->i[i]; 450 n = a->i[i + 1] - a->i[i]; 451 alpha1 = x[3 * i]; 452 alpha2 = x[3 * i + 1]; 453 alpha3 = x[3 * i + 2]; 454 while (n-- > 0) { 455 y[3 * (*idx)] += alpha1 * (*v); 456 y[3 * (*idx) + 1] += alpha2 * (*v); 457 y[3 * (*idx) + 2] += alpha3 * (*v); 458 idx++; 459 v++; 460 } 461 } 462 PetscCall(PetscLogFlops(6.0 * a->nz)); 463 PetscCall(VecRestoreArrayRead(xx, &x)); 464 PetscCall(VecRestoreArray(zz, &y)); 465 PetscFunctionReturn(0); 466 } 467 468 /* ------------------------------------------------------------------------------*/ 469 PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy) { 470 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 471 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 472 const PetscScalar *x, *v; 473 PetscScalar *y, sum1, sum2, sum3, sum4; 474 PetscInt nonzerorow = 0, n, i, jrow, j; 475 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 476 477 PetscFunctionBegin; 478 PetscCall(VecGetArrayRead(xx, &x)); 479 PetscCall(VecGetArray(yy, &y)); 480 idx = a->j; 481 v = a->a; 482 ii = a->i; 483 484 for (i = 0; i < m; i++) { 485 jrow = ii[i]; 486 n = ii[i + 1] - jrow; 487 sum1 = 0.0; 488 sum2 = 0.0; 489 sum3 = 0.0; 490 sum4 = 0.0; 491 nonzerorow += (n > 0); 492 for (j = 0; j < n; j++) { 493 sum1 += v[jrow] * x[4 * idx[jrow]]; 494 sum2 += v[jrow] * x[4 * idx[jrow] + 1]; 495 sum3 += v[jrow] * x[4 * idx[jrow] + 2]; 496 sum4 += v[jrow] * x[4 * idx[jrow] + 3]; 497 jrow++; 498 } 499 y[4 * i] = sum1; 500 y[4 * i + 1] = sum2; 501 y[4 * i + 2] = sum3; 502 y[4 * i + 3] = sum4; 503 } 504 505 PetscCall(PetscLogFlops(8.0 * a->nz - 4.0 * nonzerorow)); 506 PetscCall(VecRestoreArrayRead(xx, &x)); 507 PetscCall(VecRestoreArray(yy, &y)); 508 PetscFunctionReturn(0); 509 } 510 511 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy) { 512 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 513 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 514 const PetscScalar *x, *v; 515 PetscScalar *y, alpha1, alpha2, alpha3, alpha4; 516 PetscInt n, i; 517 const PetscInt m = b->AIJ->rmap->n, *idx; 518 519 PetscFunctionBegin; 520 PetscCall(VecSet(yy, 0.0)); 521 PetscCall(VecGetArrayRead(xx, &x)); 522 PetscCall(VecGetArray(yy, &y)); 523 for (i = 0; i < m; i++) { 524 idx = a->j + a->i[i]; 525 v = a->a + a->i[i]; 526 n = a->i[i + 1] - a->i[i]; 527 alpha1 = x[4 * i]; 528 alpha2 = x[4 * i + 1]; 529 alpha3 = x[4 * i + 2]; 530 alpha4 = x[4 * i + 3]; 531 while (n-- > 0) { 532 y[4 * (*idx)] += alpha1 * (*v); 533 y[4 * (*idx) + 1] += alpha2 * (*v); 534 y[4 * (*idx) + 2] += alpha3 * (*v); 535 y[4 * (*idx) + 3] += alpha4 * (*v); 536 idx++; 537 v++; 538 } 539 } 540 PetscCall(PetscLogFlops(8.0 * a->nz)); 541 PetscCall(VecRestoreArrayRead(xx, &x)); 542 PetscCall(VecRestoreArray(yy, &y)); 543 PetscFunctionReturn(0); 544 } 545 546 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) { 547 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 548 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 549 const PetscScalar *x, *v; 550 PetscScalar *y, sum1, sum2, sum3, sum4; 551 PetscInt n, i, jrow, j; 552 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 553 554 PetscFunctionBegin; 555 if (yy != zz) PetscCall(VecCopy(yy, zz)); 556 PetscCall(VecGetArrayRead(xx, &x)); 557 PetscCall(VecGetArray(zz, &y)); 558 idx = a->j; 559 v = a->a; 560 ii = a->i; 561 562 for (i = 0; i < m; i++) { 563 jrow = ii[i]; 564 n = ii[i + 1] - jrow; 565 sum1 = 0.0; 566 sum2 = 0.0; 567 sum3 = 0.0; 568 sum4 = 0.0; 569 for (j = 0; j < n; j++) { 570 sum1 += v[jrow] * x[4 * idx[jrow]]; 571 sum2 += v[jrow] * x[4 * idx[jrow] + 1]; 572 sum3 += v[jrow] * x[4 * idx[jrow] + 2]; 573 sum4 += v[jrow] * x[4 * idx[jrow] + 3]; 574 jrow++; 575 } 576 y[4 * i] += sum1; 577 y[4 * i + 1] += sum2; 578 y[4 * i + 2] += sum3; 579 y[4 * i + 3] += sum4; 580 } 581 582 PetscCall(PetscLogFlops(8.0 * a->nz)); 583 PetscCall(VecRestoreArrayRead(xx, &x)); 584 PetscCall(VecRestoreArray(zz, &y)); 585 PetscFunctionReturn(0); 586 } 587 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) { 588 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 589 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 590 const PetscScalar *x, *v; 591 PetscScalar *y, alpha1, alpha2, alpha3, alpha4; 592 PetscInt n, i; 593 const PetscInt m = b->AIJ->rmap->n, *idx; 594 595 PetscFunctionBegin; 596 if (yy != zz) PetscCall(VecCopy(yy, zz)); 597 PetscCall(VecGetArrayRead(xx, &x)); 598 PetscCall(VecGetArray(zz, &y)); 599 600 for (i = 0; i < m; i++) { 601 idx = a->j + a->i[i]; 602 v = a->a + a->i[i]; 603 n = a->i[i + 1] - a->i[i]; 604 alpha1 = x[4 * i]; 605 alpha2 = x[4 * i + 1]; 606 alpha3 = x[4 * i + 2]; 607 alpha4 = x[4 * i + 3]; 608 while (n-- > 0) { 609 y[4 * (*idx)] += alpha1 * (*v); 610 y[4 * (*idx) + 1] += alpha2 * (*v); 611 y[4 * (*idx) + 2] += alpha3 * (*v); 612 y[4 * (*idx) + 3] += alpha4 * (*v); 613 idx++; 614 v++; 615 } 616 } 617 PetscCall(PetscLogFlops(8.0 * a->nz)); 618 PetscCall(VecRestoreArrayRead(xx, &x)); 619 PetscCall(VecRestoreArray(zz, &y)); 620 PetscFunctionReturn(0); 621 } 622 /* ------------------------------------------------------------------------------*/ 623 624 PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy) { 625 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 626 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 627 const PetscScalar *x, *v; 628 PetscScalar *y, sum1, sum2, sum3, sum4, sum5; 629 PetscInt nonzerorow = 0, n, i, jrow, j; 630 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 631 632 PetscFunctionBegin; 633 PetscCall(VecGetArrayRead(xx, &x)); 634 PetscCall(VecGetArray(yy, &y)); 635 idx = a->j; 636 v = a->a; 637 ii = a->i; 638 639 for (i = 0; i < m; i++) { 640 jrow = ii[i]; 641 n = ii[i + 1] - jrow; 642 sum1 = 0.0; 643 sum2 = 0.0; 644 sum3 = 0.0; 645 sum4 = 0.0; 646 sum5 = 0.0; 647 648 nonzerorow += (n > 0); 649 for (j = 0; j < n; j++) { 650 sum1 += v[jrow] * x[5 * idx[jrow]]; 651 sum2 += v[jrow] * x[5 * idx[jrow] + 1]; 652 sum3 += v[jrow] * x[5 * idx[jrow] + 2]; 653 sum4 += v[jrow] * x[5 * idx[jrow] + 3]; 654 sum5 += v[jrow] * x[5 * idx[jrow] + 4]; 655 jrow++; 656 } 657 y[5 * i] = sum1; 658 y[5 * i + 1] = sum2; 659 y[5 * i + 2] = sum3; 660 y[5 * i + 3] = sum4; 661 y[5 * i + 4] = sum5; 662 } 663 664 PetscCall(PetscLogFlops(10.0 * a->nz - 5.0 * nonzerorow)); 665 PetscCall(VecRestoreArrayRead(xx, &x)); 666 PetscCall(VecRestoreArray(yy, &y)); 667 PetscFunctionReturn(0); 668 } 669 670 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy) { 671 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 672 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 673 const PetscScalar *x, *v; 674 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5; 675 PetscInt n, i; 676 const PetscInt m = b->AIJ->rmap->n, *idx; 677 678 PetscFunctionBegin; 679 PetscCall(VecSet(yy, 0.0)); 680 PetscCall(VecGetArrayRead(xx, &x)); 681 PetscCall(VecGetArray(yy, &y)); 682 683 for (i = 0; i < m; i++) { 684 idx = a->j + a->i[i]; 685 v = a->a + a->i[i]; 686 n = a->i[i + 1] - a->i[i]; 687 alpha1 = x[5 * i]; 688 alpha2 = x[5 * i + 1]; 689 alpha3 = x[5 * i + 2]; 690 alpha4 = x[5 * i + 3]; 691 alpha5 = x[5 * i + 4]; 692 while (n-- > 0) { 693 y[5 * (*idx)] += alpha1 * (*v); 694 y[5 * (*idx) + 1] += alpha2 * (*v); 695 y[5 * (*idx) + 2] += alpha3 * (*v); 696 y[5 * (*idx) + 3] += alpha4 * (*v); 697 y[5 * (*idx) + 4] += alpha5 * (*v); 698 idx++; 699 v++; 700 } 701 } 702 PetscCall(PetscLogFlops(10.0 * a->nz)); 703 PetscCall(VecRestoreArrayRead(xx, &x)); 704 PetscCall(VecRestoreArray(yy, &y)); 705 PetscFunctionReturn(0); 706 } 707 708 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) { 709 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 710 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 711 const PetscScalar *x, *v; 712 PetscScalar *y, sum1, sum2, sum3, sum4, sum5; 713 PetscInt n, i, jrow, j; 714 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 715 716 PetscFunctionBegin; 717 if (yy != zz) PetscCall(VecCopy(yy, zz)); 718 PetscCall(VecGetArrayRead(xx, &x)); 719 PetscCall(VecGetArray(zz, &y)); 720 idx = a->j; 721 v = a->a; 722 ii = a->i; 723 724 for (i = 0; i < m; i++) { 725 jrow = ii[i]; 726 n = ii[i + 1] - jrow; 727 sum1 = 0.0; 728 sum2 = 0.0; 729 sum3 = 0.0; 730 sum4 = 0.0; 731 sum5 = 0.0; 732 for (j = 0; j < n; j++) { 733 sum1 += v[jrow] * x[5 * idx[jrow]]; 734 sum2 += v[jrow] * x[5 * idx[jrow] + 1]; 735 sum3 += v[jrow] * x[5 * idx[jrow] + 2]; 736 sum4 += v[jrow] * x[5 * idx[jrow] + 3]; 737 sum5 += v[jrow] * x[5 * idx[jrow] + 4]; 738 jrow++; 739 } 740 y[5 * i] += sum1; 741 y[5 * i + 1] += sum2; 742 y[5 * i + 2] += sum3; 743 y[5 * i + 3] += sum4; 744 y[5 * i + 4] += sum5; 745 } 746 747 PetscCall(PetscLogFlops(10.0 * a->nz)); 748 PetscCall(VecRestoreArrayRead(xx, &x)); 749 PetscCall(VecRestoreArray(zz, &y)); 750 PetscFunctionReturn(0); 751 } 752 753 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) { 754 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 755 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 756 const PetscScalar *x, *v; 757 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5; 758 PetscInt n, i; 759 const PetscInt m = b->AIJ->rmap->n, *idx; 760 761 PetscFunctionBegin; 762 if (yy != zz) PetscCall(VecCopy(yy, zz)); 763 PetscCall(VecGetArrayRead(xx, &x)); 764 PetscCall(VecGetArray(zz, &y)); 765 766 for (i = 0; i < m; i++) { 767 idx = a->j + a->i[i]; 768 v = a->a + a->i[i]; 769 n = a->i[i + 1] - a->i[i]; 770 alpha1 = x[5 * i]; 771 alpha2 = x[5 * i + 1]; 772 alpha3 = x[5 * i + 2]; 773 alpha4 = x[5 * i + 3]; 774 alpha5 = x[5 * i + 4]; 775 while (n-- > 0) { 776 y[5 * (*idx)] += alpha1 * (*v); 777 y[5 * (*idx) + 1] += alpha2 * (*v); 778 y[5 * (*idx) + 2] += alpha3 * (*v); 779 y[5 * (*idx) + 3] += alpha4 * (*v); 780 y[5 * (*idx) + 4] += alpha5 * (*v); 781 idx++; 782 v++; 783 } 784 } 785 PetscCall(PetscLogFlops(10.0 * a->nz)); 786 PetscCall(VecRestoreArrayRead(xx, &x)); 787 PetscCall(VecRestoreArray(zz, &y)); 788 PetscFunctionReturn(0); 789 } 790 791 /* ------------------------------------------------------------------------------*/ 792 PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy) { 793 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 794 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 795 const PetscScalar *x, *v; 796 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6; 797 PetscInt nonzerorow = 0, n, i, jrow, j; 798 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 799 800 PetscFunctionBegin; 801 PetscCall(VecGetArrayRead(xx, &x)); 802 PetscCall(VecGetArray(yy, &y)); 803 idx = a->j; 804 v = a->a; 805 ii = a->i; 806 807 for (i = 0; i < m; i++) { 808 jrow = ii[i]; 809 n = ii[i + 1] - jrow; 810 sum1 = 0.0; 811 sum2 = 0.0; 812 sum3 = 0.0; 813 sum4 = 0.0; 814 sum5 = 0.0; 815 sum6 = 0.0; 816 817 nonzerorow += (n > 0); 818 for (j = 0; j < n; j++) { 819 sum1 += v[jrow] * x[6 * idx[jrow]]; 820 sum2 += v[jrow] * x[6 * idx[jrow] + 1]; 821 sum3 += v[jrow] * x[6 * idx[jrow] + 2]; 822 sum4 += v[jrow] * x[6 * idx[jrow] + 3]; 823 sum5 += v[jrow] * x[6 * idx[jrow] + 4]; 824 sum6 += v[jrow] * x[6 * idx[jrow] + 5]; 825 jrow++; 826 } 827 y[6 * i] = sum1; 828 y[6 * i + 1] = sum2; 829 y[6 * i + 2] = sum3; 830 y[6 * i + 3] = sum4; 831 y[6 * i + 4] = sum5; 832 y[6 * i + 5] = sum6; 833 } 834 835 PetscCall(PetscLogFlops(12.0 * a->nz - 6.0 * nonzerorow)); 836 PetscCall(VecRestoreArrayRead(xx, &x)); 837 PetscCall(VecRestoreArray(yy, &y)); 838 PetscFunctionReturn(0); 839 } 840 841 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy) { 842 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 843 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 844 const PetscScalar *x, *v; 845 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6; 846 PetscInt n, i; 847 const PetscInt m = b->AIJ->rmap->n, *idx; 848 849 PetscFunctionBegin; 850 PetscCall(VecSet(yy, 0.0)); 851 PetscCall(VecGetArrayRead(xx, &x)); 852 PetscCall(VecGetArray(yy, &y)); 853 854 for (i = 0; i < m; i++) { 855 idx = a->j + a->i[i]; 856 v = a->a + a->i[i]; 857 n = a->i[i + 1] - a->i[i]; 858 alpha1 = x[6 * i]; 859 alpha2 = x[6 * i + 1]; 860 alpha3 = x[6 * i + 2]; 861 alpha4 = x[6 * i + 3]; 862 alpha5 = x[6 * i + 4]; 863 alpha6 = x[6 * i + 5]; 864 while (n-- > 0) { 865 y[6 * (*idx)] += alpha1 * (*v); 866 y[6 * (*idx) + 1] += alpha2 * (*v); 867 y[6 * (*idx) + 2] += alpha3 * (*v); 868 y[6 * (*idx) + 3] += alpha4 * (*v); 869 y[6 * (*idx) + 4] += alpha5 * (*v); 870 y[6 * (*idx) + 5] += alpha6 * (*v); 871 idx++; 872 v++; 873 } 874 } 875 PetscCall(PetscLogFlops(12.0 * a->nz)); 876 PetscCall(VecRestoreArrayRead(xx, &x)); 877 PetscCall(VecRestoreArray(yy, &y)); 878 PetscFunctionReturn(0); 879 } 880 881 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) { 882 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 883 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 884 const PetscScalar *x, *v; 885 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6; 886 PetscInt n, i, jrow, j; 887 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 888 889 PetscFunctionBegin; 890 if (yy != zz) PetscCall(VecCopy(yy, zz)); 891 PetscCall(VecGetArrayRead(xx, &x)); 892 PetscCall(VecGetArray(zz, &y)); 893 idx = a->j; 894 v = a->a; 895 ii = a->i; 896 897 for (i = 0; i < m; i++) { 898 jrow = ii[i]; 899 n = ii[i + 1] - jrow; 900 sum1 = 0.0; 901 sum2 = 0.0; 902 sum3 = 0.0; 903 sum4 = 0.0; 904 sum5 = 0.0; 905 sum6 = 0.0; 906 for (j = 0; j < n; j++) { 907 sum1 += v[jrow] * x[6 * idx[jrow]]; 908 sum2 += v[jrow] * x[6 * idx[jrow] + 1]; 909 sum3 += v[jrow] * x[6 * idx[jrow] + 2]; 910 sum4 += v[jrow] * x[6 * idx[jrow] + 3]; 911 sum5 += v[jrow] * x[6 * idx[jrow] + 4]; 912 sum6 += v[jrow] * x[6 * idx[jrow] + 5]; 913 jrow++; 914 } 915 y[6 * i] += sum1; 916 y[6 * i + 1] += sum2; 917 y[6 * i + 2] += sum3; 918 y[6 * i + 3] += sum4; 919 y[6 * i + 4] += sum5; 920 y[6 * i + 5] += sum6; 921 } 922 923 PetscCall(PetscLogFlops(12.0 * a->nz)); 924 PetscCall(VecRestoreArrayRead(xx, &x)); 925 PetscCall(VecRestoreArray(zz, &y)); 926 PetscFunctionReturn(0); 927 } 928 929 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) { 930 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 931 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 932 const PetscScalar *x, *v; 933 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6; 934 PetscInt n, i; 935 const PetscInt m = b->AIJ->rmap->n, *idx; 936 937 PetscFunctionBegin; 938 if (yy != zz) PetscCall(VecCopy(yy, zz)); 939 PetscCall(VecGetArrayRead(xx, &x)); 940 PetscCall(VecGetArray(zz, &y)); 941 942 for (i = 0; i < m; i++) { 943 idx = a->j + a->i[i]; 944 v = a->a + a->i[i]; 945 n = a->i[i + 1] - a->i[i]; 946 alpha1 = x[6 * i]; 947 alpha2 = x[6 * i + 1]; 948 alpha3 = x[6 * i + 2]; 949 alpha4 = x[6 * i + 3]; 950 alpha5 = x[6 * i + 4]; 951 alpha6 = x[6 * i + 5]; 952 while (n-- > 0) { 953 y[6 * (*idx)] += alpha1 * (*v); 954 y[6 * (*idx) + 1] += alpha2 * (*v); 955 y[6 * (*idx) + 2] += alpha3 * (*v); 956 y[6 * (*idx) + 3] += alpha4 * (*v); 957 y[6 * (*idx) + 4] += alpha5 * (*v); 958 y[6 * (*idx) + 5] += alpha6 * (*v); 959 idx++; 960 v++; 961 } 962 } 963 PetscCall(PetscLogFlops(12.0 * a->nz)); 964 PetscCall(VecRestoreArrayRead(xx, &x)); 965 PetscCall(VecRestoreArray(zz, &y)); 966 PetscFunctionReturn(0); 967 } 968 969 /* ------------------------------------------------------------------------------*/ 970 PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy) { 971 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 972 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 973 const PetscScalar *x, *v; 974 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7; 975 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 976 PetscInt nonzerorow = 0, n, i, jrow, j; 977 978 PetscFunctionBegin; 979 PetscCall(VecGetArrayRead(xx, &x)); 980 PetscCall(VecGetArray(yy, &y)); 981 idx = a->j; 982 v = a->a; 983 ii = a->i; 984 985 for (i = 0; i < m; i++) { 986 jrow = ii[i]; 987 n = ii[i + 1] - jrow; 988 sum1 = 0.0; 989 sum2 = 0.0; 990 sum3 = 0.0; 991 sum4 = 0.0; 992 sum5 = 0.0; 993 sum6 = 0.0; 994 sum7 = 0.0; 995 996 nonzerorow += (n > 0); 997 for (j = 0; j < n; j++) { 998 sum1 += v[jrow] * x[7 * idx[jrow]]; 999 sum2 += v[jrow] * x[7 * idx[jrow] + 1]; 1000 sum3 += v[jrow] * x[7 * idx[jrow] + 2]; 1001 sum4 += v[jrow] * x[7 * idx[jrow] + 3]; 1002 sum5 += v[jrow] * x[7 * idx[jrow] + 4]; 1003 sum6 += v[jrow] * x[7 * idx[jrow] + 5]; 1004 sum7 += v[jrow] * x[7 * idx[jrow] + 6]; 1005 jrow++; 1006 } 1007 y[7 * i] = sum1; 1008 y[7 * i + 1] = sum2; 1009 y[7 * i + 2] = sum3; 1010 y[7 * i + 3] = sum4; 1011 y[7 * i + 4] = sum5; 1012 y[7 * i + 5] = sum6; 1013 y[7 * i + 6] = sum7; 1014 } 1015 1016 PetscCall(PetscLogFlops(14.0 * a->nz - 7.0 * nonzerorow)); 1017 PetscCall(VecRestoreArrayRead(xx, &x)); 1018 PetscCall(VecRestoreArray(yy, &y)); 1019 PetscFunctionReturn(0); 1020 } 1021 1022 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy) { 1023 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1024 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1025 const PetscScalar *x, *v; 1026 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7; 1027 const PetscInt m = b->AIJ->rmap->n, *idx; 1028 PetscInt n, i; 1029 1030 PetscFunctionBegin; 1031 PetscCall(VecSet(yy, 0.0)); 1032 PetscCall(VecGetArrayRead(xx, &x)); 1033 PetscCall(VecGetArray(yy, &y)); 1034 1035 for (i = 0; i < m; i++) { 1036 idx = a->j + a->i[i]; 1037 v = a->a + a->i[i]; 1038 n = a->i[i + 1] - a->i[i]; 1039 alpha1 = x[7 * i]; 1040 alpha2 = x[7 * i + 1]; 1041 alpha3 = x[7 * i + 2]; 1042 alpha4 = x[7 * i + 3]; 1043 alpha5 = x[7 * i + 4]; 1044 alpha6 = x[7 * i + 5]; 1045 alpha7 = x[7 * i + 6]; 1046 while (n-- > 0) { 1047 y[7 * (*idx)] += alpha1 * (*v); 1048 y[7 * (*idx) + 1] += alpha2 * (*v); 1049 y[7 * (*idx) + 2] += alpha3 * (*v); 1050 y[7 * (*idx) + 3] += alpha4 * (*v); 1051 y[7 * (*idx) + 4] += alpha5 * (*v); 1052 y[7 * (*idx) + 5] += alpha6 * (*v); 1053 y[7 * (*idx) + 6] += alpha7 * (*v); 1054 idx++; 1055 v++; 1056 } 1057 } 1058 PetscCall(PetscLogFlops(14.0 * a->nz)); 1059 PetscCall(VecRestoreArrayRead(xx, &x)); 1060 PetscCall(VecRestoreArray(yy, &y)); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) { 1065 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1066 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1067 const PetscScalar *x, *v; 1068 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1069 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1070 PetscInt n, i, jrow, j; 1071 1072 PetscFunctionBegin; 1073 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1074 PetscCall(VecGetArrayRead(xx, &x)); 1075 PetscCall(VecGetArray(zz, &y)); 1076 idx = a->j; 1077 v = a->a; 1078 ii = a->i; 1079 1080 for (i = 0; i < m; i++) { 1081 jrow = ii[i]; 1082 n = ii[i + 1] - jrow; 1083 sum1 = 0.0; 1084 sum2 = 0.0; 1085 sum3 = 0.0; 1086 sum4 = 0.0; 1087 sum5 = 0.0; 1088 sum6 = 0.0; 1089 sum7 = 0.0; 1090 for (j = 0; j < n; j++) { 1091 sum1 += v[jrow] * x[7 * idx[jrow]]; 1092 sum2 += v[jrow] * x[7 * idx[jrow] + 1]; 1093 sum3 += v[jrow] * x[7 * idx[jrow] + 2]; 1094 sum4 += v[jrow] * x[7 * idx[jrow] + 3]; 1095 sum5 += v[jrow] * x[7 * idx[jrow] + 4]; 1096 sum6 += v[jrow] * x[7 * idx[jrow] + 5]; 1097 sum7 += v[jrow] * x[7 * idx[jrow] + 6]; 1098 jrow++; 1099 } 1100 y[7 * i] += sum1; 1101 y[7 * i + 1] += sum2; 1102 y[7 * i + 2] += sum3; 1103 y[7 * i + 3] += sum4; 1104 y[7 * i + 4] += sum5; 1105 y[7 * i + 5] += sum6; 1106 y[7 * i + 6] += sum7; 1107 } 1108 1109 PetscCall(PetscLogFlops(14.0 * a->nz)); 1110 PetscCall(VecRestoreArrayRead(xx, &x)); 1111 PetscCall(VecRestoreArray(zz, &y)); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) { 1116 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1117 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1118 const PetscScalar *x, *v; 1119 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7; 1120 const PetscInt m = b->AIJ->rmap->n, *idx; 1121 PetscInt n, i; 1122 1123 PetscFunctionBegin; 1124 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1125 PetscCall(VecGetArrayRead(xx, &x)); 1126 PetscCall(VecGetArray(zz, &y)); 1127 for (i = 0; i < m; i++) { 1128 idx = a->j + a->i[i]; 1129 v = a->a + a->i[i]; 1130 n = a->i[i + 1] - a->i[i]; 1131 alpha1 = x[7 * i]; 1132 alpha2 = x[7 * i + 1]; 1133 alpha3 = x[7 * i + 2]; 1134 alpha4 = x[7 * i + 3]; 1135 alpha5 = x[7 * i + 4]; 1136 alpha6 = x[7 * i + 5]; 1137 alpha7 = x[7 * i + 6]; 1138 while (n-- > 0) { 1139 y[7 * (*idx)] += alpha1 * (*v); 1140 y[7 * (*idx) + 1] += alpha2 * (*v); 1141 y[7 * (*idx) + 2] += alpha3 * (*v); 1142 y[7 * (*idx) + 3] += alpha4 * (*v); 1143 y[7 * (*idx) + 4] += alpha5 * (*v); 1144 y[7 * (*idx) + 5] += alpha6 * (*v); 1145 y[7 * (*idx) + 6] += alpha7 * (*v); 1146 idx++; 1147 v++; 1148 } 1149 } 1150 PetscCall(PetscLogFlops(14.0 * a->nz)); 1151 PetscCall(VecRestoreArrayRead(xx, &x)); 1152 PetscCall(VecRestoreArray(zz, &y)); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy) { 1157 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1158 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1159 const PetscScalar *x, *v; 1160 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1161 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1162 PetscInt nonzerorow = 0, n, i, jrow, j; 1163 1164 PetscFunctionBegin; 1165 PetscCall(VecGetArrayRead(xx, &x)); 1166 PetscCall(VecGetArray(yy, &y)); 1167 idx = a->j; 1168 v = a->a; 1169 ii = a->i; 1170 1171 for (i = 0; i < m; i++) { 1172 jrow = ii[i]; 1173 n = ii[i + 1] - jrow; 1174 sum1 = 0.0; 1175 sum2 = 0.0; 1176 sum3 = 0.0; 1177 sum4 = 0.0; 1178 sum5 = 0.0; 1179 sum6 = 0.0; 1180 sum7 = 0.0; 1181 sum8 = 0.0; 1182 1183 nonzerorow += (n > 0); 1184 for (j = 0; j < n; j++) { 1185 sum1 += v[jrow] * x[8 * idx[jrow]]; 1186 sum2 += v[jrow] * x[8 * idx[jrow] + 1]; 1187 sum3 += v[jrow] * x[8 * idx[jrow] + 2]; 1188 sum4 += v[jrow] * x[8 * idx[jrow] + 3]; 1189 sum5 += v[jrow] * x[8 * idx[jrow] + 4]; 1190 sum6 += v[jrow] * x[8 * idx[jrow] + 5]; 1191 sum7 += v[jrow] * x[8 * idx[jrow] + 6]; 1192 sum8 += v[jrow] * x[8 * idx[jrow] + 7]; 1193 jrow++; 1194 } 1195 y[8 * i] = sum1; 1196 y[8 * i + 1] = sum2; 1197 y[8 * i + 2] = sum3; 1198 y[8 * i + 3] = sum4; 1199 y[8 * i + 4] = sum5; 1200 y[8 * i + 5] = sum6; 1201 y[8 * i + 6] = sum7; 1202 y[8 * i + 7] = sum8; 1203 } 1204 1205 PetscCall(PetscLogFlops(16.0 * a->nz - 8.0 * nonzerorow)); 1206 PetscCall(VecRestoreArrayRead(xx, &x)); 1207 PetscCall(VecRestoreArray(yy, &y)); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy) { 1212 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1213 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1214 const PetscScalar *x, *v; 1215 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 1216 const PetscInt m = b->AIJ->rmap->n, *idx; 1217 PetscInt n, i; 1218 1219 PetscFunctionBegin; 1220 PetscCall(VecSet(yy, 0.0)); 1221 PetscCall(VecGetArrayRead(xx, &x)); 1222 PetscCall(VecGetArray(yy, &y)); 1223 1224 for (i = 0; i < m; i++) { 1225 idx = a->j + a->i[i]; 1226 v = a->a + a->i[i]; 1227 n = a->i[i + 1] - a->i[i]; 1228 alpha1 = x[8 * i]; 1229 alpha2 = x[8 * i + 1]; 1230 alpha3 = x[8 * i + 2]; 1231 alpha4 = x[8 * i + 3]; 1232 alpha5 = x[8 * i + 4]; 1233 alpha6 = x[8 * i + 5]; 1234 alpha7 = x[8 * i + 6]; 1235 alpha8 = x[8 * i + 7]; 1236 while (n-- > 0) { 1237 y[8 * (*idx)] += alpha1 * (*v); 1238 y[8 * (*idx) + 1] += alpha2 * (*v); 1239 y[8 * (*idx) + 2] += alpha3 * (*v); 1240 y[8 * (*idx) + 3] += alpha4 * (*v); 1241 y[8 * (*idx) + 4] += alpha5 * (*v); 1242 y[8 * (*idx) + 5] += alpha6 * (*v); 1243 y[8 * (*idx) + 6] += alpha7 * (*v); 1244 y[8 * (*idx) + 7] += alpha8 * (*v); 1245 idx++; 1246 v++; 1247 } 1248 } 1249 PetscCall(PetscLogFlops(16.0 * a->nz)); 1250 PetscCall(VecRestoreArrayRead(xx, &x)); 1251 PetscCall(VecRestoreArray(yy, &y)); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) { 1256 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1257 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1258 const PetscScalar *x, *v; 1259 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1260 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1261 PetscInt n, i, jrow, j; 1262 1263 PetscFunctionBegin; 1264 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1265 PetscCall(VecGetArrayRead(xx, &x)); 1266 PetscCall(VecGetArray(zz, &y)); 1267 idx = a->j; 1268 v = a->a; 1269 ii = a->i; 1270 1271 for (i = 0; i < m; i++) { 1272 jrow = ii[i]; 1273 n = ii[i + 1] - jrow; 1274 sum1 = 0.0; 1275 sum2 = 0.0; 1276 sum3 = 0.0; 1277 sum4 = 0.0; 1278 sum5 = 0.0; 1279 sum6 = 0.0; 1280 sum7 = 0.0; 1281 sum8 = 0.0; 1282 for (j = 0; j < n; j++) { 1283 sum1 += v[jrow] * x[8 * idx[jrow]]; 1284 sum2 += v[jrow] * x[8 * idx[jrow] + 1]; 1285 sum3 += v[jrow] * x[8 * idx[jrow] + 2]; 1286 sum4 += v[jrow] * x[8 * idx[jrow] + 3]; 1287 sum5 += v[jrow] * x[8 * idx[jrow] + 4]; 1288 sum6 += v[jrow] * x[8 * idx[jrow] + 5]; 1289 sum7 += v[jrow] * x[8 * idx[jrow] + 6]; 1290 sum8 += v[jrow] * x[8 * idx[jrow] + 7]; 1291 jrow++; 1292 } 1293 y[8 * i] += sum1; 1294 y[8 * i + 1] += sum2; 1295 y[8 * i + 2] += sum3; 1296 y[8 * i + 3] += sum4; 1297 y[8 * i + 4] += sum5; 1298 y[8 * i + 5] += sum6; 1299 y[8 * i + 6] += sum7; 1300 y[8 * i + 7] += sum8; 1301 } 1302 1303 PetscCall(PetscLogFlops(16.0 * a->nz)); 1304 PetscCall(VecRestoreArrayRead(xx, &x)); 1305 PetscCall(VecRestoreArray(zz, &y)); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) { 1310 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1311 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1312 const PetscScalar *x, *v; 1313 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 1314 const PetscInt m = b->AIJ->rmap->n, *idx; 1315 PetscInt n, i; 1316 1317 PetscFunctionBegin; 1318 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1319 PetscCall(VecGetArrayRead(xx, &x)); 1320 PetscCall(VecGetArray(zz, &y)); 1321 for (i = 0; i < m; i++) { 1322 idx = a->j + a->i[i]; 1323 v = a->a + a->i[i]; 1324 n = a->i[i + 1] - a->i[i]; 1325 alpha1 = x[8 * i]; 1326 alpha2 = x[8 * i + 1]; 1327 alpha3 = x[8 * i + 2]; 1328 alpha4 = x[8 * i + 3]; 1329 alpha5 = x[8 * i + 4]; 1330 alpha6 = x[8 * i + 5]; 1331 alpha7 = x[8 * i + 6]; 1332 alpha8 = x[8 * i + 7]; 1333 while (n-- > 0) { 1334 y[8 * (*idx)] += alpha1 * (*v); 1335 y[8 * (*idx) + 1] += alpha2 * (*v); 1336 y[8 * (*idx) + 2] += alpha3 * (*v); 1337 y[8 * (*idx) + 3] += alpha4 * (*v); 1338 y[8 * (*idx) + 4] += alpha5 * (*v); 1339 y[8 * (*idx) + 5] += alpha6 * (*v); 1340 y[8 * (*idx) + 6] += alpha7 * (*v); 1341 y[8 * (*idx) + 7] += alpha8 * (*v); 1342 idx++; 1343 v++; 1344 } 1345 } 1346 PetscCall(PetscLogFlops(16.0 * a->nz)); 1347 PetscCall(VecRestoreArrayRead(xx, &x)); 1348 PetscCall(VecRestoreArray(zz, &y)); 1349 PetscFunctionReturn(0); 1350 } 1351 1352 /* ------------------------------------------------------------------------------*/ 1353 PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy) { 1354 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1355 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1356 const PetscScalar *x, *v; 1357 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1358 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1359 PetscInt nonzerorow = 0, n, i, jrow, j; 1360 1361 PetscFunctionBegin; 1362 PetscCall(VecGetArrayRead(xx, &x)); 1363 PetscCall(VecGetArray(yy, &y)); 1364 idx = a->j; 1365 v = a->a; 1366 ii = a->i; 1367 1368 for (i = 0; i < m; i++) { 1369 jrow = ii[i]; 1370 n = ii[i + 1] - jrow; 1371 sum1 = 0.0; 1372 sum2 = 0.0; 1373 sum3 = 0.0; 1374 sum4 = 0.0; 1375 sum5 = 0.0; 1376 sum6 = 0.0; 1377 sum7 = 0.0; 1378 sum8 = 0.0; 1379 sum9 = 0.0; 1380 1381 nonzerorow += (n > 0); 1382 for (j = 0; j < n; j++) { 1383 sum1 += v[jrow] * x[9 * idx[jrow]]; 1384 sum2 += v[jrow] * x[9 * idx[jrow] + 1]; 1385 sum3 += v[jrow] * x[9 * idx[jrow] + 2]; 1386 sum4 += v[jrow] * x[9 * idx[jrow] + 3]; 1387 sum5 += v[jrow] * x[9 * idx[jrow] + 4]; 1388 sum6 += v[jrow] * x[9 * idx[jrow] + 5]; 1389 sum7 += v[jrow] * x[9 * idx[jrow] + 6]; 1390 sum8 += v[jrow] * x[9 * idx[jrow] + 7]; 1391 sum9 += v[jrow] * x[9 * idx[jrow] + 8]; 1392 jrow++; 1393 } 1394 y[9 * i] = sum1; 1395 y[9 * i + 1] = sum2; 1396 y[9 * i + 2] = sum3; 1397 y[9 * i + 3] = sum4; 1398 y[9 * i + 4] = sum5; 1399 y[9 * i + 5] = sum6; 1400 y[9 * i + 6] = sum7; 1401 y[9 * i + 7] = sum8; 1402 y[9 * i + 8] = sum9; 1403 } 1404 1405 PetscCall(PetscLogFlops(18.0 * a->nz - 9 * nonzerorow)); 1406 PetscCall(VecRestoreArrayRead(xx, &x)); 1407 PetscCall(VecRestoreArray(yy, &y)); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 /* ------------------------------------------------------------------------------*/ 1412 1413 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy) { 1414 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1415 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1416 const PetscScalar *x, *v; 1417 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9; 1418 const PetscInt m = b->AIJ->rmap->n, *idx; 1419 PetscInt n, i; 1420 1421 PetscFunctionBegin; 1422 PetscCall(VecSet(yy, 0.0)); 1423 PetscCall(VecGetArrayRead(xx, &x)); 1424 PetscCall(VecGetArray(yy, &y)); 1425 1426 for (i = 0; i < m; i++) { 1427 idx = a->j + a->i[i]; 1428 v = a->a + a->i[i]; 1429 n = a->i[i + 1] - a->i[i]; 1430 alpha1 = x[9 * i]; 1431 alpha2 = x[9 * i + 1]; 1432 alpha3 = x[9 * i + 2]; 1433 alpha4 = x[9 * i + 3]; 1434 alpha5 = x[9 * i + 4]; 1435 alpha6 = x[9 * i + 5]; 1436 alpha7 = x[9 * i + 6]; 1437 alpha8 = x[9 * i + 7]; 1438 alpha9 = x[9 * i + 8]; 1439 while (n-- > 0) { 1440 y[9 * (*idx)] += alpha1 * (*v); 1441 y[9 * (*idx) + 1] += alpha2 * (*v); 1442 y[9 * (*idx) + 2] += alpha3 * (*v); 1443 y[9 * (*idx) + 3] += alpha4 * (*v); 1444 y[9 * (*idx) + 4] += alpha5 * (*v); 1445 y[9 * (*idx) + 5] += alpha6 * (*v); 1446 y[9 * (*idx) + 6] += alpha7 * (*v); 1447 y[9 * (*idx) + 7] += alpha8 * (*v); 1448 y[9 * (*idx) + 8] += alpha9 * (*v); 1449 idx++; 1450 v++; 1451 } 1452 } 1453 PetscCall(PetscLogFlops(18.0 * a->nz)); 1454 PetscCall(VecRestoreArrayRead(xx, &x)); 1455 PetscCall(VecRestoreArray(yy, &y)); 1456 PetscFunctionReturn(0); 1457 } 1458 1459 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) { 1460 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1461 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1462 const PetscScalar *x, *v; 1463 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1464 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1465 PetscInt n, i, jrow, j; 1466 1467 PetscFunctionBegin; 1468 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1469 PetscCall(VecGetArrayRead(xx, &x)); 1470 PetscCall(VecGetArray(zz, &y)); 1471 idx = a->j; 1472 v = a->a; 1473 ii = a->i; 1474 1475 for (i = 0; i < m; i++) { 1476 jrow = ii[i]; 1477 n = ii[i + 1] - jrow; 1478 sum1 = 0.0; 1479 sum2 = 0.0; 1480 sum3 = 0.0; 1481 sum4 = 0.0; 1482 sum5 = 0.0; 1483 sum6 = 0.0; 1484 sum7 = 0.0; 1485 sum8 = 0.0; 1486 sum9 = 0.0; 1487 for (j = 0; j < n; j++) { 1488 sum1 += v[jrow] * x[9 * idx[jrow]]; 1489 sum2 += v[jrow] * x[9 * idx[jrow] + 1]; 1490 sum3 += v[jrow] * x[9 * idx[jrow] + 2]; 1491 sum4 += v[jrow] * x[9 * idx[jrow] + 3]; 1492 sum5 += v[jrow] * x[9 * idx[jrow] + 4]; 1493 sum6 += v[jrow] * x[9 * idx[jrow] + 5]; 1494 sum7 += v[jrow] * x[9 * idx[jrow] + 6]; 1495 sum8 += v[jrow] * x[9 * idx[jrow] + 7]; 1496 sum9 += v[jrow] * x[9 * idx[jrow] + 8]; 1497 jrow++; 1498 } 1499 y[9 * i] += sum1; 1500 y[9 * i + 1] += sum2; 1501 y[9 * i + 2] += sum3; 1502 y[9 * i + 3] += sum4; 1503 y[9 * i + 4] += sum5; 1504 y[9 * i + 5] += sum6; 1505 y[9 * i + 6] += sum7; 1506 y[9 * i + 7] += sum8; 1507 y[9 * i + 8] += sum9; 1508 } 1509 1510 PetscCall(PetscLogFlops(18.0 * a->nz)); 1511 PetscCall(VecRestoreArrayRead(xx, &x)); 1512 PetscCall(VecRestoreArray(zz, &y)); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) { 1517 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1518 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1519 const PetscScalar *x, *v; 1520 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9; 1521 const PetscInt m = b->AIJ->rmap->n, *idx; 1522 PetscInt n, i; 1523 1524 PetscFunctionBegin; 1525 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1526 PetscCall(VecGetArrayRead(xx, &x)); 1527 PetscCall(VecGetArray(zz, &y)); 1528 for (i = 0; i < m; i++) { 1529 idx = a->j + a->i[i]; 1530 v = a->a + a->i[i]; 1531 n = a->i[i + 1] - a->i[i]; 1532 alpha1 = x[9 * i]; 1533 alpha2 = x[9 * i + 1]; 1534 alpha3 = x[9 * i + 2]; 1535 alpha4 = x[9 * i + 3]; 1536 alpha5 = x[9 * i + 4]; 1537 alpha6 = x[9 * i + 5]; 1538 alpha7 = x[9 * i + 6]; 1539 alpha8 = x[9 * i + 7]; 1540 alpha9 = x[9 * i + 8]; 1541 while (n-- > 0) { 1542 y[9 * (*idx)] += alpha1 * (*v); 1543 y[9 * (*idx) + 1] += alpha2 * (*v); 1544 y[9 * (*idx) + 2] += alpha3 * (*v); 1545 y[9 * (*idx) + 3] += alpha4 * (*v); 1546 y[9 * (*idx) + 4] += alpha5 * (*v); 1547 y[9 * (*idx) + 5] += alpha6 * (*v); 1548 y[9 * (*idx) + 6] += alpha7 * (*v); 1549 y[9 * (*idx) + 7] += alpha8 * (*v); 1550 y[9 * (*idx) + 8] += alpha9 * (*v); 1551 idx++; 1552 v++; 1553 } 1554 } 1555 PetscCall(PetscLogFlops(18.0 * a->nz)); 1556 PetscCall(VecRestoreArrayRead(xx, &x)); 1557 PetscCall(VecRestoreArray(zz, &y)); 1558 PetscFunctionReturn(0); 1559 } 1560 PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy) { 1561 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1562 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1563 const PetscScalar *x, *v; 1564 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1565 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1566 PetscInt nonzerorow = 0, n, i, jrow, j; 1567 1568 PetscFunctionBegin; 1569 PetscCall(VecGetArrayRead(xx, &x)); 1570 PetscCall(VecGetArray(yy, &y)); 1571 idx = a->j; 1572 v = a->a; 1573 ii = a->i; 1574 1575 for (i = 0; i < m; i++) { 1576 jrow = ii[i]; 1577 n = ii[i + 1] - jrow; 1578 sum1 = 0.0; 1579 sum2 = 0.0; 1580 sum3 = 0.0; 1581 sum4 = 0.0; 1582 sum5 = 0.0; 1583 sum6 = 0.0; 1584 sum7 = 0.0; 1585 sum8 = 0.0; 1586 sum9 = 0.0; 1587 sum10 = 0.0; 1588 1589 nonzerorow += (n > 0); 1590 for (j = 0; j < n; j++) { 1591 sum1 += v[jrow] * x[10 * idx[jrow]]; 1592 sum2 += v[jrow] * x[10 * idx[jrow] + 1]; 1593 sum3 += v[jrow] * x[10 * idx[jrow] + 2]; 1594 sum4 += v[jrow] * x[10 * idx[jrow] + 3]; 1595 sum5 += v[jrow] * x[10 * idx[jrow] + 4]; 1596 sum6 += v[jrow] * x[10 * idx[jrow] + 5]; 1597 sum7 += v[jrow] * x[10 * idx[jrow] + 6]; 1598 sum8 += v[jrow] * x[10 * idx[jrow] + 7]; 1599 sum9 += v[jrow] * x[10 * idx[jrow] + 8]; 1600 sum10 += v[jrow] * x[10 * idx[jrow] + 9]; 1601 jrow++; 1602 } 1603 y[10 * i] = sum1; 1604 y[10 * i + 1] = sum2; 1605 y[10 * i + 2] = sum3; 1606 y[10 * i + 3] = sum4; 1607 y[10 * i + 4] = sum5; 1608 y[10 * i + 5] = sum6; 1609 y[10 * i + 6] = sum7; 1610 y[10 * i + 7] = sum8; 1611 y[10 * i + 8] = sum9; 1612 y[10 * i + 9] = sum10; 1613 } 1614 1615 PetscCall(PetscLogFlops(20.0 * a->nz - 10.0 * nonzerorow)); 1616 PetscCall(VecRestoreArrayRead(xx, &x)); 1617 PetscCall(VecRestoreArray(yy, &y)); 1618 PetscFunctionReturn(0); 1619 } 1620 1621 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) { 1622 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1623 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1624 const PetscScalar *x, *v; 1625 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1626 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1627 PetscInt n, i, jrow, j; 1628 1629 PetscFunctionBegin; 1630 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1631 PetscCall(VecGetArrayRead(xx, &x)); 1632 PetscCall(VecGetArray(zz, &y)); 1633 idx = a->j; 1634 v = a->a; 1635 ii = a->i; 1636 1637 for (i = 0; i < m; i++) { 1638 jrow = ii[i]; 1639 n = ii[i + 1] - jrow; 1640 sum1 = 0.0; 1641 sum2 = 0.0; 1642 sum3 = 0.0; 1643 sum4 = 0.0; 1644 sum5 = 0.0; 1645 sum6 = 0.0; 1646 sum7 = 0.0; 1647 sum8 = 0.0; 1648 sum9 = 0.0; 1649 sum10 = 0.0; 1650 for (j = 0; j < n; j++) { 1651 sum1 += v[jrow] * x[10 * idx[jrow]]; 1652 sum2 += v[jrow] * x[10 * idx[jrow] + 1]; 1653 sum3 += v[jrow] * x[10 * idx[jrow] + 2]; 1654 sum4 += v[jrow] * x[10 * idx[jrow] + 3]; 1655 sum5 += v[jrow] * x[10 * idx[jrow] + 4]; 1656 sum6 += v[jrow] * x[10 * idx[jrow] + 5]; 1657 sum7 += v[jrow] * x[10 * idx[jrow] + 6]; 1658 sum8 += v[jrow] * x[10 * idx[jrow] + 7]; 1659 sum9 += v[jrow] * x[10 * idx[jrow] + 8]; 1660 sum10 += v[jrow] * x[10 * idx[jrow] + 9]; 1661 jrow++; 1662 } 1663 y[10 * i] += sum1; 1664 y[10 * i + 1] += sum2; 1665 y[10 * i + 2] += sum3; 1666 y[10 * i + 3] += sum4; 1667 y[10 * i + 4] += sum5; 1668 y[10 * i + 5] += sum6; 1669 y[10 * i + 6] += sum7; 1670 y[10 * i + 7] += sum8; 1671 y[10 * i + 8] += sum9; 1672 y[10 * i + 9] += sum10; 1673 } 1674 1675 PetscCall(PetscLogFlops(20.0 * a->nz)); 1676 PetscCall(VecRestoreArrayRead(xx, &x)); 1677 PetscCall(VecRestoreArray(yy, &y)); 1678 PetscFunctionReturn(0); 1679 } 1680 1681 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy) { 1682 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1683 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1684 const PetscScalar *x, *v; 1685 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10; 1686 const PetscInt m = b->AIJ->rmap->n, *idx; 1687 PetscInt n, i; 1688 1689 PetscFunctionBegin; 1690 PetscCall(VecSet(yy, 0.0)); 1691 PetscCall(VecGetArrayRead(xx, &x)); 1692 PetscCall(VecGetArray(yy, &y)); 1693 1694 for (i = 0; i < m; i++) { 1695 idx = a->j + a->i[i]; 1696 v = a->a + a->i[i]; 1697 n = a->i[i + 1] - a->i[i]; 1698 alpha1 = x[10 * i]; 1699 alpha2 = x[10 * i + 1]; 1700 alpha3 = x[10 * i + 2]; 1701 alpha4 = x[10 * i + 3]; 1702 alpha5 = x[10 * i + 4]; 1703 alpha6 = x[10 * i + 5]; 1704 alpha7 = x[10 * i + 6]; 1705 alpha8 = x[10 * i + 7]; 1706 alpha9 = x[10 * i + 8]; 1707 alpha10 = x[10 * i + 9]; 1708 while (n-- > 0) { 1709 y[10 * (*idx)] += alpha1 * (*v); 1710 y[10 * (*idx) + 1] += alpha2 * (*v); 1711 y[10 * (*idx) + 2] += alpha3 * (*v); 1712 y[10 * (*idx) + 3] += alpha4 * (*v); 1713 y[10 * (*idx) + 4] += alpha5 * (*v); 1714 y[10 * (*idx) + 5] += alpha6 * (*v); 1715 y[10 * (*idx) + 6] += alpha7 * (*v); 1716 y[10 * (*idx) + 7] += alpha8 * (*v); 1717 y[10 * (*idx) + 8] += alpha9 * (*v); 1718 y[10 * (*idx) + 9] += alpha10 * (*v); 1719 idx++; 1720 v++; 1721 } 1722 } 1723 PetscCall(PetscLogFlops(20.0 * a->nz)); 1724 PetscCall(VecRestoreArrayRead(xx, &x)); 1725 PetscCall(VecRestoreArray(yy, &y)); 1726 PetscFunctionReturn(0); 1727 } 1728 1729 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) { 1730 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1731 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1732 const PetscScalar *x, *v; 1733 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10; 1734 const PetscInt m = b->AIJ->rmap->n, *idx; 1735 PetscInt n, i; 1736 1737 PetscFunctionBegin; 1738 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1739 PetscCall(VecGetArrayRead(xx, &x)); 1740 PetscCall(VecGetArray(zz, &y)); 1741 for (i = 0; i < m; i++) { 1742 idx = a->j + a->i[i]; 1743 v = a->a + a->i[i]; 1744 n = a->i[i + 1] - a->i[i]; 1745 alpha1 = x[10 * i]; 1746 alpha2 = x[10 * i + 1]; 1747 alpha3 = x[10 * i + 2]; 1748 alpha4 = x[10 * i + 3]; 1749 alpha5 = x[10 * i + 4]; 1750 alpha6 = x[10 * i + 5]; 1751 alpha7 = x[10 * i + 6]; 1752 alpha8 = x[10 * i + 7]; 1753 alpha9 = x[10 * i + 8]; 1754 alpha10 = x[10 * i + 9]; 1755 while (n-- > 0) { 1756 y[10 * (*idx)] += alpha1 * (*v); 1757 y[10 * (*idx) + 1] += alpha2 * (*v); 1758 y[10 * (*idx) + 2] += alpha3 * (*v); 1759 y[10 * (*idx) + 3] += alpha4 * (*v); 1760 y[10 * (*idx) + 4] += alpha5 * (*v); 1761 y[10 * (*idx) + 5] += alpha6 * (*v); 1762 y[10 * (*idx) + 6] += alpha7 * (*v); 1763 y[10 * (*idx) + 7] += alpha8 * (*v); 1764 y[10 * (*idx) + 8] += alpha9 * (*v); 1765 y[10 * (*idx) + 9] += alpha10 * (*v); 1766 idx++; 1767 v++; 1768 } 1769 } 1770 PetscCall(PetscLogFlops(20.0 * a->nz)); 1771 PetscCall(VecRestoreArrayRead(xx, &x)); 1772 PetscCall(VecRestoreArray(zz, &y)); 1773 PetscFunctionReturn(0); 1774 } 1775 1776 /*--------------------------------------------------------------------------------------------*/ 1777 PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy) { 1778 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1779 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1780 const PetscScalar *x, *v; 1781 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1782 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1783 PetscInt nonzerorow = 0, n, i, jrow, j; 1784 1785 PetscFunctionBegin; 1786 PetscCall(VecGetArrayRead(xx, &x)); 1787 PetscCall(VecGetArray(yy, &y)); 1788 idx = a->j; 1789 v = a->a; 1790 ii = a->i; 1791 1792 for (i = 0; i < m; i++) { 1793 jrow = ii[i]; 1794 n = ii[i + 1] - jrow; 1795 sum1 = 0.0; 1796 sum2 = 0.0; 1797 sum3 = 0.0; 1798 sum4 = 0.0; 1799 sum5 = 0.0; 1800 sum6 = 0.0; 1801 sum7 = 0.0; 1802 sum8 = 0.0; 1803 sum9 = 0.0; 1804 sum10 = 0.0; 1805 sum11 = 0.0; 1806 1807 nonzerorow += (n > 0); 1808 for (j = 0; j < n; j++) { 1809 sum1 += v[jrow] * x[11 * idx[jrow]]; 1810 sum2 += v[jrow] * x[11 * idx[jrow] + 1]; 1811 sum3 += v[jrow] * x[11 * idx[jrow] + 2]; 1812 sum4 += v[jrow] * x[11 * idx[jrow] + 3]; 1813 sum5 += v[jrow] * x[11 * idx[jrow] + 4]; 1814 sum6 += v[jrow] * x[11 * idx[jrow] + 5]; 1815 sum7 += v[jrow] * x[11 * idx[jrow] + 6]; 1816 sum8 += v[jrow] * x[11 * idx[jrow] + 7]; 1817 sum9 += v[jrow] * x[11 * idx[jrow] + 8]; 1818 sum10 += v[jrow] * x[11 * idx[jrow] + 9]; 1819 sum11 += v[jrow] * x[11 * idx[jrow] + 10]; 1820 jrow++; 1821 } 1822 y[11 * i] = sum1; 1823 y[11 * i + 1] = sum2; 1824 y[11 * i + 2] = sum3; 1825 y[11 * i + 3] = sum4; 1826 y[11 * i + 4] = sum5; 1827 y[11 * i + 5] = sum6; 1828 y[11 * i + 6] = sum7; 1829 y[11 * i + 7] = sum8; 1830 y[11 * i + 8] = sum9; 1831 y[11 * i + 9] = sum10; 1832 y[11 * i + 10] = sum11; 1833 } 1834 1835 PetscCall(PetscLogFlops(22.0 * a->nz - 11 * nonzerorow)); 1836 PetscCall(VecRestoreArrayRead(xx, &x)); 1837 PetscCall(VecRestoreArray(yy, &y)); 1838 PetscFunctionReturn(0); 1839 } 1840 1841 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) { 1842 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1843 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1844 const PetscScalar *x, *v; 1845 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1846 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1847 PetscInt n, i, jrow, j; 1848 1849 PetscFunctionBegin; 1850 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1851 PetscCall(VecGetArrayRead(xx, &x)); 1852 PetscCall(VecGetArray(zz, &y)); 1853 idx = a->j; 1854 v = a->a; 1855 ii = a->i; 1856 1857 for (i = 0; i < m; i++) { 1858 jrow = ii[i]; 1859 n = ii[i + 1] - jrow; 1860 sum1 = 0.0; 1861 sum2 = 0.0; 1862 sum3 = 0.0; 1863 sum4 = 0.0; 1864 sum5 = 0.0; 1865 sum6 = 0.0; 1866 sum7 = 0.0; 1867 sum8 = 0.0; 1868 sum9 = 0.0; 1869 sum10 = 0.0; 1870 sum11 = 0.0; 1871 for (j = 0; j < n; j++) { 1872 sum1 += v[jrow] * x[11 * idx[jrow]]; 1873 sum2 += v[jrow] * x[11 * idx[jrow] + 1]; 1874 sum3 += v[jrow] * x[11 * idx[jrow] + 2]; 1875 sum4 += v[jrow] * x[11 * idx[jrow] + 3]; 1876 sum5 += v[jrow] * x[11 * idx[jrow] + 4]; 1877 sum6 += v[jrow] * x[11 * idx[jrow] + 5]; 1878 sum7 += v[jrow] * x[11 * idx[jrow] + 6]; 1879 sum8 += v[jrow] * x[11 * idx[jrow] + 7]; 1880 sum9 += v[jrow] * x[11 * idx[jrow] + 8]; 1881 sum10 += v[jrow] * x[11 * idx[jrow] + 9]; 1882 sum11 += v[jrow] * x[11 * idx[jrow] + 10]; 1883 jrow++; 1884 } 1885 y[11 * i] += sum1; 1886 y[11 * i + 1] += sum2; 1887 y[11 * i + 2] += sum3; 1888 y[11 * i + 3] += sum4; 1889 y[11 * i + 4] += sum5; 1890 y[11 * i + 5] += sum6; 1891 y[11 * i + 6] += sum7; 1892 y[11 * i + 7] += sum8; 1893 y[11 * i + 8] += sum9; 1894 y[11 * i + 9] += sum10; 1895 y[11 * i + 10] += sum11; 1896 } 1897 1898 PetscCall(PetscLogFlops(22.0 * a->nz)); 1899 PetscCall(VecRestoreArrayRead(xx, &x)); 1900 PetscCall(VecRestoreArray(yy, &y)); 1901 PetscFunctionReturn(0); 1902 } 1903 1904 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy) { 1905 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1906 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1907 const PetscScalar *x, *v; 1908 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11; 1909 const PetscInt m = b->AIJ->rmap->n, *idx; 1910 PetscInt n, i; 1911 1912 PetscFunctionBegin; 1913 PetscCall(VecSet(yy, 0.0)); 1914 PetscCall(VecGetArrayRead(xx, &x)); 1915 PetscCall(VecGetArray(yy, &y)); 1916 1917 for (i = 0; i < m; i++) { 1918 idx = a->j + a->i[i]; 1919 v = a->a + a->i[i]; 1920 n = a->i[i + 1] - a->i[i]; 1921 alpha1 = x[11 * i]; 1922 alpha2 = x[11 * i + 1]; 1923 alpha3 = x[11 * i + 2]; 1924 alpha4 = x[11 * i + 3]; 1925 alpha5 = x[11 * i + 4]; 1926 alpha6 = x[11 * i + 5]; 1927 alpha7 = x[11 * i + 6]; 1928 alpha8 = x[11 * i + 7]; 1929 alpha9 = x[11 * i + 8]; 1930 alpha10 = x[11 * i + 9]; 1931 alpha11 = x[11 * i + 10]; 1932 while (n-- > 0) { 1933 y[11 * (*idx)] += alpha1 * (*v); 1934 y[11 * (*idx) + 1] += alpha2 * (*v); 1935 y[11 * (*idx) + 2] += alpha3 * (*v); 1936 y[11 * (*idx) + 3] += alpha4 * (*v); 1937 y[11 * (*idx) + 4] += alpha5 * (*v); 1938 y[11 * (*idx) + 5] += alpha6 * (*v); 1939 y[11 * (*idx) + 6] += alpha7 * (*v); 1940 y[11 * (*idx) + 7] += alpha8 * (*v); 1941 y[11 * (*idx) + 8] += alpha9 * (*v); 1942 y[11 * (*idx) + 9] += alpha10 * (*v); 1943 y[11 * (*idx) + 10] += alpha11 * (*v); 1944 idx++; 1945 v++; 1946 } 1947 } 1948 PetscCall(PetscLogFlops(22.0 * a->nz)); 1949 PetscCall(VecRestoreArrayRead(xx, &x)); 1950 PetscCall(VecRestoreArray(yy, &y)); 1951 PetscFunctionReturn(0); 1952 } 1953 1954 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) { 1955 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1956 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1957 const PetscScalar *x, *v; 1958 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11; 1959 const PetscInt m = b->AIJ->rmap->n, *idx; 1960 PetscInt n, i; 1961 1962 PetscFunctionBegin; 1963 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1964 PetscCall(VecGetArrayRead(xx, &x)); 1965 PetscCall(VecGetArray(zz, &y)); 1966 for (i = 0; i < m; i++) { 1967 idx = a->j + a->i[i]; 1968 v = a->a + a->i[i]; 1969 n = a->i[i + 1] - a->i[i]; 1970 alpha1 = x[11 * i]; 1971 alpha2 = x[11 * i + 1]; 1972 alpha3 = x[11 * i + 2]; 1973 alpha4 = x[11 * i + 3]; 1974 alpha5 = x[11 * i + 4]; 1975 alpha6 = x[11 * i + 5]; 1976 alpha7 = x[11 * i + 6]; 1977 alpha8 = x[11 * i + 7]; 1978 alpha9 = x[11 * i + 8]; 1979 alpha10 = x[11 * i + 9]; 1980 alpha11 = x[11 * i + 10]; 1981 while (n-- > 0) { 1982 y[11 * (*idx)] += alpha1 * (*v); 1983 y[11 * (*idx) + 1] += alpha2 * (*v); 1984 y[11 * (*idx) + 2] += alpha3 * (*v); 1985 y[11 * (*idx) + 3] += alpha4 * (*v); 1986 y[11 * (*idx) + 4] += alpha5 * (*v); 1987 y[11 * (*idx) + 5] += alpha6 * (*v); 1988 y[11 * (*idx) + 6] += alpha7 * (*v); 1989 y[11 * (*idx) + 7] += alpha8 * (*v); 1990 y[11 * (*idx) + 8] += alpha9 * (*v); 1991 y[11 * (*idx) + 9] += alpha10 * (*v); 1992 y[11 * (*idx) + 10] += alpha11 * (*v); 1993 idx++; 1994 v++; 1995 } 1996 } 1997 PetscCall(PetscLogFlops(22.0 * a->nz)); 1998 PetscCall(VecRestoreArrayRead(xx, &x)); 1999 PetscCall(VecRestoreArray(zz, &y)); 2000 PetscFunctionReturn(0); 2001 } 2002 2003 /*--------------------------------------------------------------------------------------------*/ 2004 PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy) { 2005 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2006 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2007 const PetscScalar *x, *v; 2008 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2009 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2010 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2011 PetscInt nonzerorow = 0, n, i, jrow, j; 2012 2013 PetscFunctionBegin; 2014 PetscCall(VecGetArrayRead(xx, &x)); 2015 PetscCall(VecGetArray(yy, &y)); 2016 idx = a->j; 2017 v = a->a; 2018 ii = a->i; 2019 2020 for (i = 0; i < m; i++) { 2021 jrow = ii[i]; 2022 n = ii[i + 1] - jrow; 2023 sum1 = 0.0; 2024 sum2 = 0.0; 2025 sum3 = 0.0; 2026 sum4 = 0.0; 2027 sum5 = 0.0; 2028 sum6 = 0.0; 2029 sum7 = 0.0; 2030 sum8 = 0.0; 2031 sum9 = 0.0; 2032 sum10 = 0.0; 2033 sum11 = 0.0; 2034 sum12 = 0.0; 2035 sum13 = 0.0; 2036 sum14 = 0.0; 2037 sum15 = 0.0; 2038 sum16 = 0.0; 2039 2040 nonzerorow += (n > 0); 2041 for (j = 0; j < n; j++) { 2042 sum1 += v[jrow] * x[16 * idx[jrow]]; 2043 sum2 += v[jrow] * x[16 * idx[jrow] + 1]; 2044 sum3 += v[jrow] * x[16 * idx[jrow] + 2]; 2045 sum4 += v[jrow] * x[16 * idx[jrow] + 3]; 2046 sum5 += v[jrow] * x[16 * idx[jrow] + 4]; 2047 sum6 += v[jrow] * x[16 * idx[jrow] + 5]; 2048 sum7 += v[jrow] * x[16 * idx[jrow] + 6]; 2049 sum8 += v[jrow] * x[16 * idx[jrow] + 7]; 2050 sum9 += v[jrow] * x[16 * idx[jrow] + 8]; 2051 sum10 += v[jrow] * x[16 * idx[jrow] + 9]; 2052 sum11 += v[jrow] * x[16 * idx[jrow] + 10]; 2053 sum12 += v[jrow] * x[16 * idx[jrow] + 11]; 2054 sum13 += v[jrow] * x[16 * idx[jrow] + 12]; 2055 sum14 += v[jrow] * x[16 * idx[jrow] + 13]; 2056 sum15 += v[jrow] * x[16 * idx[jrow] + 14]; 2057 sum16 += v[jrow] * x[16 * idx[jrow] + 15]; 2058 jrow++; 2059 } 2060 y[16 * i] = sum1; 2061 y[16 * i + 1] = sum2; 2062 y[16 * i + 2] = sum3; 2063 y[16 * i + 3] = sum4; 2064 y[16 * i + 4] = sum5; 2065 y[16 * i + 5] = sum6; 2066 y[16 * i + 6] = sum7; 2067 y[16 * i + 7] = sum8; 2068 y[16 * i + 8] = sum9; 2069 y[16 * i + 9] = sum10; 2070 y[16 * i + 10] = sum11; 2071 y[16 * i + 11] = sum12; 2072 y[16 * i + 12] = sum13; 2073 y[16 * i + 13] = sum14; 2074 y[16 * i + 14] = sum15; 2075 y[16 * i + 15] = sum16; 2076 } 2077 2078 PetscCall(PetscLogFlops(32.0 * a->nz - 16.0 * nonzerorow)); 2079 PetscCall(VecRestoreArrayRead(xx, &x)); 2080 PetscCall(VecRestoreArray(yy, &y)); 2081 PetscFunctionReturn(0); 2082 } 2083 2084 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy) { 2085 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2086 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2087 const PetscScalar *x, *v; 2088 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2089 PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16; 2090 const PetscInt m = b->AIJ->rmap->n, *idx; 2091 PetscInt n, i; 2092 2093 PetscFunctionBegin; 2094 PetscCall(VecSet(yy, 0.0)); 2095 PetscCall(VecGetArrayRead(xx, &x)); 2096 PetscCall(VecGetArray(yy, &y)); 2097 2098 for (i = 0; i < m; i++) { 2099 idx = a->j + a->i[i]; 2100 v = a->a + a->i[i]; 2101 n = a->i[i + 1] - a->i[i]; 2102 alpha1 = x[16 * i]; 2103 alpha2 = x[16 * i + 1]; 2104 alpha3 = x[16 * i + 2]; 2105 alpha4 = x[16 * i + 3]; 2106 alpha5 = x[16 * i + 4]; 2107 alpha6 = x[16 * i + 5]; 2108 alpha7 = x[16 * i + 6]; 2109 alpha8 = x[16 * i + 7]; 2110 alpha9 = x[16 * i + 8]; 2111 alpha10 = x[16 * i + 9]; 2112 alpha11 = x[16 * i + 10]; 2113 alpha12 = x[16 * i + 11]; 2114 alpha13 = x[16 * i + 12]; 2115 alpha14 = x[16 * i + 13]; 2116 alpha15 = x[16 * i + 14]; 2117 alpha16 = x[16 * i + 15]; 2118 while (n-- > 0) { 2119 y[16 * (*idx)] += alpha1 * (*v); 2120 y[16 * (*idx) + 1] += alpha2 * (*v); 2121 y[16 * (*idx) + 2] += alpha3 * (*v); 2122 y[16 * (*idx) + 3] += alpha4 * (*v); 2123 y[16 * (*idx) + 4] += alpha5 * (*v); 2124 y[16 * (*idx) + 5] += alpha6 * (*v); 2125 y[16 * (*idx) + 6] += alpha7 * (*v); 2126 y[16 * (*idx) + 7] += alpha8 * (*v); 2127 y[16 * (*idx) + 8] += alpha9 * (*v); 2128 y[16 * (*idx) + 9] += alpha10 * (*v); 2129 y[16 * (*idx) + 10] += alpha11 * (*v); 2130 y[16 * (*idx) + 11] += alpha12 * (*v); 2131 y[16 * (*idx) + 12] += alpha13 * (*v); 2132 y[16 * (*idx) + 13] += alpha14 * (*v); 2133 y[16 * (*idx) + 14] += alpha15 * (*v); 2134 y[16 * (*idx) + 15] += alpha16 * (*v); 2135 idx++; 2136 v++; 2137 } 2138 } 2139 PetscCall(PetscLogFlops(32.0 * a->nz)); 2140 PetscCall(VecRestoreArrayRead(xx, &x)); 2141 PetscCall(VecRestoreArray(yy, &y)); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) { 2146 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2147 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2148 const PetscScalar *x, *v; 2149 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2150 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2151 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2152 PetscInt n, i, jrow, j; 2153 2154 PetscFunctionBegin; 2155 if (yy != zz) PetscCall(VecCopy(yy, zz)); 2156 PetscCall(VecGetArrayRead(xx, &x)); 2157 PetscCall(VecGetArray(zz, &y)); 2158 idx = a->j; 2159 v = a->a; 2160 ii = a->i; 2161 2162 for (i = 0; i < m; i++) { 2163 jrow = ii[i]; 2164 n = ii[i + 1] - jrow; 2165 sum1 = 0.0; 2166 sum2 = 0.0; 2167 sum3 = 0.0; 2168 sum4 = 0.0; 2169 sum5 = 0.0; 2170 sum6 = 0.0; 2171 sum7 = 0.0; 2172 sum8 = 0.0; 2173 sum9 = 0.0; 2174 sum10 = 0.0; 2175 sum11 = 0.0; 2176 sum12 = 0.0; 2177 sum13 = 0.0; 2178 sum14 = 0.0; 2179 sum15 = 0.0; 2180 sum16 = 0.0; 2181 for (j = 0; j < n; j++) { 2182 sum1 += v[jrow] * x[16 * idx[jrow]]; 2183 sum2 += v[jrow] * x[16 * idx[jrow] + 1]; 2184 sum3 += v[jrow] * x[16 * idx[jrow] + 2]; 2185 sum4 += v[jrow] * x[16 * idx[jrow] + 3]; 2186 sum5 += v[jrow] * x[16 * idx[jrow] + 4]; 2187 sum6 += v[jrow] * x[16 * idx[jrow] + 5]; 2188 sum7 += v[jrow] * x[16 * idx[jrow] + 6]; 2189 sum8 += v[jrow] * x[16 * idx[jrow] + 7]; 2190 sum9 += v[jrow] * x[16 * idx[jrow] + 8]; 2191 sum10 += v[jrow] * x[16 * idx[jrow] + 9]; 2192 sum11 += v[jrow] * x[16 * idx[jrow] + 10]; 2193 sum12 += v[jrow] * x[16 * idx[jrow] + 11]; 2194 sum13 += v[jrow] * x[16 * idx[jrow] + 12]; 2195 sum14 += v[jrow] * x[16 * idx[jrow] + 13]; 2196 sum15 += v[jrow] * x[16 * idx[jrow] + 14]; 2197 sum16 += v[jrow] * x[16 * idx[jrow] + 15]; 2198 jrow++; 2199 } 2200 y[16 * i] += sum1; 2201 y[16 * i + 1] += sum2; 2202 y[16 * i + 2] += sum3; 2203 y[16 * i + 3] += sum4; 2204 y[16 * i + 4] += sum5; 2205 y[16 * i + 5] += sum6; 2206 y[16 * i + 6] += sum7; 2207 y[16 * i + 7] += sum8; 2208 y[16 * i + 8] += sum9; 2209 y[16 * i + 9] += sum10; 2210 y[16 * i + 10] += sum11; 2211 y[16 * i + 11] += sum12; 2212 y[16 * i + 12] += sum13; 2213 y[16 * i + 13] += sum14; 2214 y[16 * i + 14] += sum15; 2215 y[16 * i + 15] += sum16; 2216 } 2217 2218 PetscCall(PetscLogFlops(32.0 * a->nz)); 2219 PetscCall(VecRestoreArrayRead(xx, &x)); 2220 PetscCall(VecRestoreArray(zz, &y)); 2221 PetscFunctionReturn(0); 2222 } 2223 2224 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) { 2225 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2226 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2227 const PetscScalar *x, *v; 2228 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2229 PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16; 2230 const PetscInt m = b->AIJ->rmap->n, *idx; 2231 PetscInt n, i; 2232 2233 PetscFunctionBegin; 2234 if (yy != zz) PetscCall(VecCopy(yy, zz)); 2235 PetscCall(VecGetArrayRead(xx, &x)); 2236 PetscCall(VecGetArray(zz, &y)); 2237 for (i = 0; i < m; i++) { 2238 idx = a->j + a->i[i]; 2239 v = a->a + a->i[i]; 2240 n = a->i[i + 1] - a->i[i]; 2241 alpha1 = x[16 * i]; 2242 alpha2 = x[16 * i + 1]; 2243 alpha3 = x[16 * i + 2]; 2244 alpha4 = x[16 * i + 3]; 2245 alpha5 = x[16 * i + 4]; 2246 alpha6 = x[16 * i + 5]; 2247 alpha7 = x[16 * i + 6]; 2248 alpha8 = x[16 * i + 7]; 2249 alpha9 = x[16 * i + 8]; 2250 alpha10 = x[16 * i + 9]; 2251 alpha11 = x[16 * i + 10]; 2252 alpha12 = x[16 * i + 11]; 2253 alpha13 = x[16 * i + 12]; 2254 alpha14 = x[16 * i + 13]; 2255 alpha15 = x[16 * i + 14]; 2256 alpha16 = x[16 * i + 15]; 2257 while (n-- > 0) { 2258 y[16 * (*idx)] += alpha1 * (*v); 2259 y[16 * (*idx) + 1] += alpha2 * (*v); 2260 y[16 * (*idx) + 2] += alpha3 * (*v); 2261 y[16 * (*idx) + 3] += alpha4 * (*v); 2262 y[16 * (*idx) + 4] += alpha5 * (*v); 2263 y[16 * (*idx) + 5] += alpha6 * (*v); 2264 y[16 * (*idx) + 6] += alpha7 * (*v); 2265 y[16 * (*idx) + 7] += alpha8 * (*v); 2266 y[16 * (*idx) + 8] += alpha9 * (*v); 2267 y[16 * (*idx) + 9] += alpha10 * (*v); 2268 y[16 * (*idx) + 10] += alpha11 * (*v); 2269 y[16 * (*idx) + 11] += alpha12 * (*v); 2270 y[16 * (*idx) + 12] += alpha13 * (*v); 2271 y[16 * (*idx) + 13] += alpha14 * (*v); 2272 y[16 * (*idx) + 14] += alpha15 * (*v); 2273 y[16 * (*idx) + 15] += alpha16 * (*v); 2274 idx++; 2275 v++; 2276 } 2277 } 2278 PetscCall(PetscLogFlops(32.0 * a->nz)); 2279 PetscCall(VecRestoreArrayRead(xx, &x)); 2280 PetscCall(VecRestoreArray(zz, &y)); 2281 PetscFunctionReturn(0); 2282 } 2283 2284 /*--------------------------------------------------------------------------------------------*/ 2285 PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy) { 2286 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2287 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2288 const PetscScalar *x, *v; 2289 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2290 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2291 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2292 PetscInt nonzerorow = 0, n, i, jrow, j; 2293 2294 PetscFunctionBegin; 2295 PetscCall(VecGetArrayRead(xx, &x)); 2296 PetscCall(VecGetArray(yy, &y)); 2297 idx = a->j; 2298 v = a->a; 2299 ii = a->i; 2300 2301 for (i = 0; i < m; i++) { 2302 jrow = ii[i]; 2303 n = ii[i + 1] - jrow; 2304 sum1 = 0.0; 2305 sum2 = 0.0; 2306 sum3 = 0.0; 2307 sum4 = 0.0; 2308 sum5 = 0.0; 2309 sum6 = 0.0; 2310 sum7 = 0.0; 2311 sum8 = 0.0; 2312 sum9 = 0.0; 2313 sum10 = 0.0; 2314 sum11 = 0.0; 2315 sum12 = 0.0; 2316 sum13 = 0.0; 2317 sum14 = 0.0; 2318 sum15 = 0.0; 2319 sum16 = 0.0; 2320 sum17 = 0.0; 2321 sum18 = 0.0; 2322 2323 nonzerorow += (n > 0); 2324 for (j = 0; j < n; j++) { 2325 sum1 += v[jrow] * x[18 * idx[jrow]]; 2326 sum2 += v[jrow] * x[18 * idx[jrow] + 1]; 2327 sum3 += v[jrow] * x[18 * idx[jrow] + 2]; 2328 sum4 += v[jrow] * x[18 * idx[jrow] + 3]; 2329 sum5 += v[jrow] * x[18 * idx[jrow] + 4]; 2330 sum6 += v[jrow] * x[18 * idx[jrow] + 5]; 2331 sum7 += v[jrow] * x[18 * idx[jrow] + 6]; 2332 sum8 += v[jrow] * x[18 * idx[jrow] + 7]; 2333 sum9 += v[jrow] * x[18 * idx[jrow] + 8]; 2334 sum10 += v[jrow] * x[18 * idx[jrow] + 9]; 2335 sum11 += v[jrow] * x[18 * idx[jrow] + 10]; 2336 sum12 += v[jrow] * x[18 * idx[jrow] + 11]; 2337 sum13 += v[jrow] * x[18 * idx[jrow] + 12]; 2338 sum14 += v[jrow] * x[18 * idx[jrow] + 13]; 2339 sum15 += v[jrow] * x[18 * idx[jrow] + 14]; 2340 sum16 += v[jrow] * x[18 * idx[jrow] + 15]; 2341 sum17 += v[jrow] * x[18 * idx[jrow] + 16]; 2342 sum18 += v[jrow] * x[18 * idx[jrow] + 17]; 2343 jrow++; 2344 } 2345 y[18 * i] = sum1; 2346 y[18 * i + 1] = sum2; 2347 y[18 * i + 2] = sum3; 2348 y[18 * i + 3] = sum4; 2349 y[18 * i + 4] = sum5; 2350 y[18 * i + 5] = sum6; 2351 y[18 * i + 6] = sum7; 2352 y[18 * i + 7] = sum8; 2353 y[18 * i + 8] = sum9; 2354 y[18 * i + 9] = sum10; 2355 y[18 * i + 10] = sum11; 2356 y[18 * i + 11] = sum12; 2357 y[18 * i + 12] = sum13; 2358 y[18 * i + 13] = sum14; 2359 y[18 * i + 14] = sum15; 2360 y[18 * i + 15] = sum16; 2361 y[18 * i + 16] = sum17; 2362 y[18 * i + 17] = sum18; 2363 } 2364 2365 PetscCall(PetscLogFlops(36.0 * a->nz - 18.0 * nonzerorow)); 2366 PetscCall(VecRestoreArrayRead(xx, &x)); 2367 PetscCall(VecRestoreArray(yy, &y)); 2368 PetscFunctionReturn(0); 2369 } 2370 2371 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy) { 2372 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2373 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2374 const PetscScalar *x, *v; 2375 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2376 PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18; 2377 const PetscInt m = b->AIJ->rmap->n, *idx; 2378 PetscInt n, i; 2379 2380 PetscFunctionBegin; 2381 PetscCall(VecSet(yy, 0.0)); 2382 PetscCall(VecGetArrayRead(xx, &x)); 2383 PetscCall(VecGetArray(yy, &y)); 2384 2385 for (i = 0; i < m; i++) { 2386 idx = a->j + a->i[i]; 2387 v = a->a + a->i[i]; 2388 n = a->i[i + 1] - a->i[i]; 2389 alpha1 = x[18 * i]; 2390 alpha2 = x[18 * i + 1]; 2391 alpha3 = x[18 * i + 2]; 2392 alpha4 = x[18 * i + 3]; 2393 alpha5 = x[18 * i + 4]; 2394 alpha6 = x[18 * i + 5]; 2395 alpha7 = x[18 * i + 6]; 2396 alpha8 = x[18 * i + 7]; 2397 alpha9 = x[18 * i + 8]; 2398 alpha10 = x[18 * i + 9]; 2399 alpha11 = x[18 * i + 10]; 2400 alpha12 = x[18 * i + 11]; 2401 alpha13 = x[18 * i + 12]; 2402 alpha14 = x[18 * i + 13]; 2403 alpha15 = x[18 * i + 14]; 2404 alpha16 = x[18 * i + 15]; 2405 alpha17 = x[18 * i + 16]; 2406 alpha18 = x[18 * i + 17]; 2407 while (n-- > 0) { 2408 y[18 * (*idx)] += alpha1 * (*v); 2409 y[18 * (*idx) + 1] += alpha2 * (*v); 2410 y[18 * (*idx) + 2] += alpha3 * (*v); 2411 y[18 * (*idx) + 3] += alpha4 * (*v); 2412 y[18 * (*idx) + 4] += alpha5 * (*v); 2413 y[18 * (*idx) + 5] += alpha6 * (*v); 2414 y[18 * (*idx) + 6] += alpha7 * (*v); 2415 y[18 * (*idx) + 7] += alpha8 * (*v); 2416 y[18 * (*idx) + 8] += alpha9 * (*v); 2417 y[18 * (*idx) + 9] += alpha10 * (*v); 2418 y[18 * (*idx) + 10] += alpha11 * (*v); 2419 y[18 * (*idx) + 11] += alpha12 * (*v); 2420 y[18 * (*idx) + 12] += alpha13 * (*v); 2421 y[18 * (*idx) + 13] += alpha14 * (*v); 2422 y[18 * (*idx) + 14] += alpha15 * (*v); 2423 y[18 * (*idx) + 15] += alpha16 * (*v); 2424 y[18 * (*idx) + 16] += alpha17 * (*v); 2425 y[18 * (*idx) + 17] += alpha18 * (*v); 2426 idx++; 2427 v++; 2428 } 2429 } 2430 PetscCall(PetscLogFlops(36.0 * a->nz)); 2431 PetscCall(VecRestoreArrayRead(xx, &x)); 2432 PetscCall(VecRestoreArray(yy, &y)); 2433 PetscFunctionReturn(0); 2434 } 2435 2436 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) { 2437 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2438 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2439 const PetscScalar *x, *v; 2440 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2441 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2442 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2443 PetscInt n, i, jrow, j; 2444 2445 PetscFunctionBegin; 2446 if (yy != zz) PetscCall(VecCopy(yy, zz)); 2447 PetscCall(VecGetArrayRead(xx, &x)); 2448 PetscCall(VecGetArray(zz, &y)); 2449 idx = a->j; 2450 v = a->a; 2451 ii = a->i; 2452 2453 for (i = 0; i < m; i++) { 2454 jrow = ii[i]; 2455 n = ii[i + 1] - jrow; 2456 sum1 = 0.0; 2457 sum2 = 0.0; 2458 sum3 = 0.0; 2459 sum4 = 0.0; 2460 sum5 = 0.0; 2461 sum6 = 0.0; 2462 sum7 = 0.0; 2463 sum8 = 0.0; 2464 sum9 = 0.0; 2465 sum10 = 0.0; 2466 sum11 = 0.0; 2467 sum12 = 0.0; 2468 sum13 = 0.0; 2469 sum14 = 0.0; 2470 sum15 = 0.0; 2471 sum16 = 0.0; 2472 sum17 = 0.0; 2473 sum18 = 0.0; 2474 for (j = 0; j < n; j++) { 2475 sum1 += v[jrow] * x[18 * idx[jrow]]; 2476 sum2 += v[jrow] * x[18 * idx[jrow] + 1]; 2477 sum3 += v[jrow] * x[18 * idx[jrow] + 2]; 2478 sum4 += v[jrow] * x[18 * idx[jrow] + 3]; 2479 sum5 += v[jrow] * x[18 * idx[jrow] + 4]; 2480 sum6 += v[jrow] * x[18 * idx[jrow] + 5]; 2481 sum7 += v[jrow] * x[18 * idx[jrow] + 6]; 2482 sum8 += v[jrow] * x[18 * idx[jrow] + 7]; 2483 sum9 += v[jrow] * x[18 * idx[jrow] + 8]; 2484 sum10 += v[jrow] * x[18 * idx[jrow] + 9]; 2485 sum11 += v[jrow] * x[18 * idx[jrow] + 10]; 2486 sum12 += v[jrow] * x[18 * idx[jrow] + 11]; 2487 sum13 += v[jrow] * x[18 * idx[jrow] + 12]; 2488 sum14 += v[jrow] * x[18 * idx[jrow] + 13]; 2489 sum15 += v[jrow] * x[18 * idx[jrow] + 14]; 2490 sum16 += v[jrow] * x[18 * idx[jrow] + 15]; 2491 sum17 += v[jrow] * x[18 * idx[jrow] + 16]; 2492 sum18 += v[jrow] * x[18 * idx[jrow] + 17]; 2493 jrow++; 2494 } 2495 y[18 * i] += sum1; 2496 y[18 * i + 1] += sum2; 2497 y[18 * i + 2] += sum3; 2498 y[18 * i + 3] += sum4; 2499 y[18 * i + 4] += sum5; 2500 y[18 * i + 5] += sum6; 2501 y[18 * i + 6] += sum7; 2502 y[18 * i + 7] += sum8; 2503 y[18 * i + 8] += sum9; 2504 y[18 * i + 9] += sum10; 2505 y[18 * i + 10] += sum11; 2506 y[18 * i + 11] += sum12; 2507 y[18 * i + 12] += sum13; 2508 y[18 * i + 13] += sum14; 2509 y[18 * i + 14] += sum15; 2510 y[18 * i + 15] += sum16; 2511 y[18 * i + 16] += sum17; 2512 y[18 * i + 17] += sum18; 2513 } 2514 2515 PetscCall(PetscLogFlops(36.0 * a->nz)); 2516 PetscCall(VecRestoreArrayRead(xx, &x)); 2517 PetscCall(VecRestoreArray(zz, &y)); 2518 PetscFunctionReturn(0); 2519 } 2520 2521 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) { 2522 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2523 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2524 const PetscScalar *x, *v; 2525 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2526 PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18; 2527 const PetscInt m = b->AIJ->rmap->n, *idx; 2528 PetscInt n, i; 2529 2530 PetscFunctionBegin; 2531 if (yy != zz) PetscCall(VecCopy(yy, zz)); 2532 PetscCall(VecGetArrayRead(xx, &x)); 2533 PetscCall(VecGetArray(zz, &y)); 2534 for (i = 0; i < m; i++) { 2535 idx = a->j + a->i[i]; 2536 v = a->a + a->i[i]; 2537 n = a->i[i + 1] - a->i[i]; 2538 alpha1 = x[18 * i]; 2539 alpha2 = x[18 * i + 1]; 2540 alpha3 = x[18 * i + 2]; 2541 alpha4 = x[18 * i + 3]; 2542 alpha5 = x[18 * i + 4]; 2543 alpha6 = x[18 * i + 5]; 2544 alpha7 = x[18 * i + 6]; 2545 alpha8 = x[18 * i + 7]; 2546 alpha9 = x[18 * i + 8]; 2547 alpha10 = x[18 * i + 9]; 2548 alpha11 = x[18 * i + 10]; 2549 alpha12 = x[18 * i + 11]; 2550 alpha13 = x[18 * i + 12]; 2551 alpha14 = x[18 * i + 13]; 2552 alpha15 = x[18 * i + 14]; 2553 alpha16 = x[18 * i + 15]; 2554 alpha17 = x[18 * i + 16]; 2555 alpha18 = x[18 * i + 17]; 2556 while (n-- > 0) { 2557 y[18 * (*idx)] += alpha1 * (*v); 2558 y[18 * (*idx) + 1] += alpha2 * (*v); 2559 y[18 * (*idx) + 2] += alpha3 * (*v); 2560 y[18 * (*idx) + 3] += alpha4 * (*v); 2561 y[18 * (*idx) + 4] += alpha5 * (*v); 2562 y[18 * (*idx) + 5] += alpha6 * (*v); 2563 y[18 * (*idx) + 6] += alpha7 * (*v); 2564 y[18 * (*idx) + 7] += alpha8 * (*v); 2565 y[18 * (*idx) + 8] += alpha9 * (*v); 2566 y[18 * (*idx) + 9] += alpha10 * (*v); 2567 y[18 * (*idx) + 10] += alpha11 * (*v); 2568 y[18 * (*idx) + 11] += alpha12 * (*v); 2569 y[18 * (*idx) + 12] += alpha13 * (*v); 2570 y[18 * (*idx) + 13] += alpha14 * (*v); 2571 y[18 * (*idx) + 14] += alpha15 * (*v); 2572 y[18 * (*idx) + 15] += alpha16 * (*v); 2573 y[18 * (*idx) + 16] += alpha17 * (*v); 2574 y[18 * (*idx) + 17] += alpha18 * (*v); 2575 idx++; 2576 v++; 2577 } 2578 } 2579 PetscCall(PetscLogFlops(36.0 * a->nz)); 2580 PetscCall(VecRestoreArrayRead(xx, &x)); 2581 PetscCall(VecRestoreArray(zz, &y)); 2582 PetscFunctionReturn(0); 2583 } 2584 2585 PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) { 2586 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2587 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2588 const PetscScalar *x, *v; 2589 PetscScalar *y, *sums; 2590 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2591 PetscInt n, i, jrow, j, dof = b->dof, k; 2592 2593 PetscFunctionBegin; 2594 PetscCall(VecGetArrayRead(xx, &x)); 2595 PetscCall(VecSet(yy, 0.0)); 2596 PetscCall(VecGetArray(yy, &y)); 2597 idx = a->j; 2598 v = a->a; 2599 ii = a->i; 2600 2601 for (i = 0; i < m; i++) { 2602 jrow = ii[i]; 2603 n = ii[i + 1] - jrow; 2604 sums = y + dof * i; 2605 for (j = 0; j < n; j++) { 2606 for (k = 0; k < dof; k++) { sums[k] += v[jrow] * x[dof * idx[jrow] + k]; } 2607 jrow++; 2608 } 2609 } 2610 2611 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 2612 PetscCall(VecRestoreArrayRead(xx, &x)); 2613 PetscCall(VecRestoreArray(yy, &y)); 2614 PetscFunctionReturn(0); 2615 } 2616 2617 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) { 2618 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2619 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2620 const PetscScalar *x, *v; 2621 PetscScalar *y, *sums; 2622 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2623 PetscInt n, i, jrow, j, dof = b->dof, k; 2624 2625 PetscFunctionBegin; 2626 if (yy != zz) PetscCall(VecCopy(yy, zz)); 2627 PetscCall(VecGetArrayRead(xx, &x)); 2628 PetscCall(VecGetArray(zz, &y)); 2629 idx = a->j; 2630 v = a->a; 2631 ii = a->i; 2632 2633 for (i = 0; i < m; i++) { 2634 jrow = ii[i]; 2635 n = ii[i + 1] - jrow; 2636 sums = y + dof * i; 2637 for (j = 0; j < n; j++) { 2638 for (k = 0; k < dof; k++) { sums[k] += v[jrow] * x[dof * idx[jrow] + k]; } 2639 jrow++; 2640 } 2641 } 2642 2643 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 2644 PetscCall(VecRestoreArrayRead(xx, &x)); 2645 PetscCall(VecRestoreArray(zz, &y)); 2646 PetscFunctionReturn(0); 2647 } 2648 2649 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) { 2650 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2651 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2652 const PetscScalar *x, *v, *alpha; 2653 PetscScalar *y; 2654 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 2655 PetscInt n, i, k; 2656 2657 PetscFunctionBegin; 2658 PetscCall(VecGetArrayRead(xx, &x)); 2659 PetscCall(VecSet(yy, 0.0)); 2660 PetscCall(VecGetArray(yy, &y)); 2661 for (i = 0; i < m; i++) { 2662 idx = a->j + a->i[i]; 2663 v = a->a + a->i[i]; 2664 n = a->i[i + 1] - a->i[i]; 2665 alpha = x + dof * i; 2666 while (n-- > 0) { 2667 for (k = 0; k < dof; k++) { y[dof * (*idx) + k] += alpha[k] * (*v); } 2668 idx++; 2669 v++; 2670 } 2671 } 2672 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 2673 PetscCall(VecRestoreArrayRead(xx, &x)); 2674 PetscCall(VecRestoreArray(yy, &y)); 2675 PetscFunctionReturn(0); 2676 } 2677 2678 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) { 2679 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2680 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2681 const PetscScalar *x, *v, *alpha; 2682 PetscScalar *y; 2683 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 2684 PetscInt n, i, k; 2685 2686 PetscFunctionBegin; 2687 if (yy != zz) PetscCall(VecCopy(yy, zz)); 2688 PetscCall(VecGetArrayRead(xx, &x)); 2689 PetscCall(VecGetArray(zz, &y)); 2690 for (i = 0; i < m; i++) { 2691 idx = a->j + a->i[i]; 2692 v = a->a + a->i[i]; 2693 n = a->i[i + 1] - a->i[i]; 2694 alpha = x + dof * i; 2695 while (n-- > 0) { 2696 for (k = 0; k < dof; k++) { y[dof * (*idx) + k] += alpha[k] * (*v); } 2697 idx++; 2698 v++; 2699 } 2700 } 2701 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 2702 PetscCall(VecRestoreArrayRead(xx, &x)); 2703 PetscCall(VecRestoreArray(zz, &y)); 2704 PetscFunctionReturn(0); 2705 } 2706 2707 /*===================================================================================*/ 2708 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) { 2709 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2710 2711 PetscFunctionBegin; 2712 /* start the scatter */ 2713 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 2714 PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 2715 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 2716 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 2717 PetscFunctionReturn(0); 2718 } 2719 2720 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) { 2721 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2722 2723 PetscFunctionBegin; 2724 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 2725 PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 2726 PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 2727 PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 2728 PetscFunctionReturn(0); 2729 } 2730 2731 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) { 2732 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2733 2734 PetscFunctionBegin; 2735 /* start the scatter */ 2736 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 2737 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 2738 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 2739 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 2740 PetscFunctionReturn(0); 2741 } 2742 2743 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) { 2744 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2745 2746 PetscFunctionBegin; 2747 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 2748 PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 2749 PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 2750 PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 2751 PetscFunctionReturn(0); 2752 } 2753 2754 /* ----------------------------------------------------------------*/ 2755 PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) { 2756 Mat_Product *product = C->product; 2757 2758 PetscFunctionBegin; 2759 if (product->type == MATPRODUCT_PtAP) { 2760 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 2761 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 2762 PetscFunctionReturn(0); 2763 } 2764 2765 PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) { 2766 Mat_Product *product = C->product; 2767 PetscBool flg = PETSC_FALSE; 2768 Mat A = product->A, P = product->B; 2769 PetscInt alg = 1; /* set default algorithm */ 2770 #if !defined(PETSC_HAVE_HYPRE) 2771 const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 2772 PetscInt nalg = 4; 2773 #else 2774 const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 2775 PetscInt nalg = 5; 2776 #endif 2777 2778 PetscFunctionBegin; 2779 PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 2780 2781 /* PtAP */ 2782 /* Check matrix local sizes */ 2783 PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2784 A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 2785 PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2786 A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 2787 2788 /* Set the default algorithm */ 2789 PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 2790 if (flg) { PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); } 2791 2792 /* Get runtime option */ 2793 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 2794 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 2795 if (flg) { PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); } 2796 PetscOptionsEnd(); 2797 2798 PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 2799 if (flg) { 2800 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 2801 PetscFunctionReturn(0); 2802 } 2803 2804 PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 2805 if (flg) { 2806 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 2807 PetscFunctionReturn(0); 2808 } 2809 2810 /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 2811 PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 2812 PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 2813 PetscCall(MatProductSetFromOptions(C)); 2814 PetscFunctionReturn(0); 2815 } 2816 2817 /* ----------------------------------------------------------------*/ 2818 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) { 2819 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2820 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 2821 Mat P = pp->AIJ; 2822 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 2823 PetscInt *pti, *ptj, *ptJ; 2824 PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 2825 const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 2826 PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 2827 MatScalar *ca; 2828 const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 2829 2830 PetscFunctionBegin; 2831 /* Get ij structure of P^T */ 2832 PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 2833 2834 cn = pn * ppdof; 2835 /* Allocate ci array, arrays for fill computation and */ 2836 /* free space for accumulating nonzero column info */ 2837 PetscCall(PetscMalloc1(cn + 1, &ci)); 2838 ci[0] = 0; 2839 2840 /* Work arrays for rows of P^T*A */ 2841 PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 2842 PetscCall(PetscArrayzero(ptadenserow, an)); 2843 PetscCall(PetscArrayzero(denserow, cn)); 2844 2845 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2846 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2847 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2848 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 2849 current_space = free_space; 2850 2851 /* Determine symbolic info for each row of C: */ 2852 for (i = 0; i < pn; i++) { 2853 ptnzi = pti[i + 1] - pti[i]; 2854 ptJ = ptj + pti[i]; 2855 for (dof = 0; dof < ppdof; dof++) { 2856 ptanzi = 0; 2857 /* Determine symbolic row of PtA: */ 2858 for (j = 0; j < ptnzi; j++) { 2859 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2860 arow = ptJ[j] * ppdof + dof; 2861 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2862 anzj = ai[arow + 1] - ai[arow]; 2863 ajj = aj + ai[arow]; 2864 for (k = 0; k < anzj; k++) { 2865 if (!ptadenserow[ajj[k]]) { 2866 ptadenserow[ajj[k]] = -1; 2867 ptasparserow[ptanzi++] = ajj[k]; 2868 } 2869 } 2870 } 2871 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2872 ptaj = ptasparserow; 2873 cnzi = 0; 2874 for (j = 0; j < ptanzi; j++) { 2875 /* Get offset within block of P */ 2876 pshift = *ptaj % ppdof; 2877 /* Get block row of P */ 2878 prow = (*ptaj++) / ppdof; /* integer division */ 2879 /* P has same number of nonzeros per row as the compressed form */ 2880 pnzj = pi[prow + 1] - pi[prow]; 2881 pjj = pj + pi[prow]; 2882 for (k = 0; k < pnzj; k++) { 2883 /* Locations in C are shifted by the offset within the block */ 2884 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2885 if (!denserow[pjj[k] * ppdof + pshift]) { 2886 denserow[pjj[k] * ppdof + pshift] = -1; 2887 sparserow[cnzi++] = pjj[k] * ppdof + pshift; 2888 } 2889 } 2890 } 2891 2892 /* sort sparserow */ 2893 PetscCall(PetscSortInt(cnzi, sparserow)); 2894 2895 /* If free space is not available, make more free space */ 2896 /* Double the amount of total space in the list */ 2897 if (current_space->local_remaining < cnzi) { PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); } 2898 2899 /* Copy data into free space, and zero out denserows */ 2900 PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 2901 2902 current_space->array += cnzi; 2903 current_space->local_used += cnzi; 2904 current_space->local_remaining -= cnzi; 2905 2906 for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 2907 for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 2908 2909 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2910 /* For now, we will recompute what is needed. */ 2911 ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 2912 } 2913 } 2914 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2915 /* Allocate space for cj, initialize cj, and */ 2916 /* destroy list of free space and other temporary array(s) */ 2917 PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 2918 PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 2919 PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 2920 2921 /* Allocate space for ca */ 2922 PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 2923 2924 /* put together the new matrix */ 2925 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 2926 PetscCall(MatSetBlockSize(C, pp->dof)); 2927 2928 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2929 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2930 c = (Mat_SeqAIJ *)(C->data); 2931 c->free_a = PETSC_TRUE; 2932 c->free_ij = PETSC_TRUE; 2933 c->nonew = 0; 2934 2935 C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 2936 C->ops->productnumeric = MatProductNumeric_PtAP; 2937 2938 /* Clean up. */ 2939 PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 2940 PetscFunctionReturn(0); 2941 } 2942 2943 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) { 2944 /* This routine requires testing -- first draft only */ 2945 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 2946 Mat P = pp->AIJ; 2947 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2948 Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 2949 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 2950 const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 2951 const PetscInt *ci = c->i, *cj = c->j, *cjj; 2952 const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 2953 PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 2954 const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 2955 MatScalar *ca = c->a, *caj, *apa; 2956 2957 PetscFunctionBegin; 2958 /* Allocate temporary array for storage of one row of A*P */ 2959 PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 2960 2961 /* Clear old values in C */ 2962 PetscCall(PetscArrayzero(ca, ci[cm])); 2963 2964 for (i = 0; i < am; i++) { 2965 /* Form sparse row of A*P */ 2966 anzi = ai[i + 1] - ai[i]; 2967 apnzj = 0; 2968 for (j = 0; j < anzi; j++) { 2969 /* Get offset within block of P */ 2970 pshift = *aj % ppdof; 2971 /* Get block row of P */ 2972 prow = *aj++ / ppdof; /* integer division */ 2973 pnzj = pi[prow + 1] - pi[prow]; 2974 pjj = pj + pi[prow]; 2975 paj = pa + pi[prow]; 2976 for (k = 0; k < pnzj; k++) { 2977 poffset = pjj[k] * ppdof + pshift; 2978 if (!apjdense[poffset]) { 2979 apjdense[poffset] = -1; 2980 apj[apnzj++] = poffset; 2981 } 2982 apa[poffset] += (*aa) * paj[k]; 2983 } 2984 PetscCall(PetscLogFlops(2.0 * pnzj)); 2985 aa++; 2986 } 2987 2988 /* Sort the j index array for quick sparse axpy. */ 2989 /* Note: a array does not need sorting as it is in dense storage locations. */ 2990 PetscCall(PetscSortInt(apnzj, apj)); 2991 2992 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 2993 prow = i / ppdof; /* integer division */ 2994 pshift = i % ppdof; 2995 poffset = pi[prow]; 2996 pnzi = pi[prow + 1] - poffset; 2997 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 2998 pJ = pj + poffset; 2999 pA = pa + poffset; 3000 for (j = 0; j < pnzi; j++) { 3001 crow = (*pJ) * ppdof + pshift; 3002 cjj = cj + ci[crow]; 3003 caj = ca + ci[crow]; 3004 pJ++; 3005 /* Perform sparse axpy operation. Note cjj includes apj. */ 3006 for (k = 0, nextap = 0; nextap < apnzj; k++) { 3007 if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 3008 } 3009 PetscCall(PetscLogFlops(2.0 * apnzj)); 3010 pA++; 3011 } 3012 3013 /* Zero the current row info for A*P */ 3014 for (j = 0; j < apnzj; j++) { 3015 apa[apj[j]] = 0.; 3016 apjdense[apj[j]] = 0; 3017 } 3018 } 3019 3020 /* Assemble the final matrix and clean up */ 3021 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3022 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3023 PetscCall(PetscFree3(apa, apj, apjdense)); 3024 PetscFunctionReturn(0); 3025 } 3026 3027 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) { 3028 Mat_Product *product = C->product; 3029 Mat A = product->A, P = product->B; 3030 3031 PetscFunctionBegin; 3032 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 3033 PetscFunctionReturn(0); 3034 } 3035 3036 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A, Mat PP, PetscReal fill, Mat *C) { 3037 PetscFunctionBegin; 3038 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet"); 3039 } 3040 3041 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A, Mat PP, Mat C) { 3042 PetscFunctionBegin; 3043 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3044 } 3045 3046 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 3047 3048 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) { 3049 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3050 3051 PetscFunctionBegin; 3052 3053 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 3054 PetscFunctionReturn(0); 3055 } 3056 3057 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 3058 3059 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) { 3060 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3061 3062 PetscFunctionBegin; 3063 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 3064 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 3065 PetscFunctionReturn(0); 3066 } 3067 3068 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 3069 3070 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) { 3071 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3072 3073 PetscFunctionBegin; 3074 3075 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 3076 PetscFunctionReturn(0); 3077 } 3078 3079 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 3080 3081 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) { 3082 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3083 3084 PetscFunctionBegin; 3085 3086 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 3087 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 3088 PetscFunctionReturn(0); 3089 } 3090 3091 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) { 3092 Mat_Product *product = C->product; 3093 Mat A = product->A, P = product->B; 3094 PetscBool flg; 3095 3096 PetscFunctionBegin; 3097 PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 3098 if (flg) { 3099 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 3100 C->ops->productnumeric = MatProductNumeric_PtAP; 3101 PetscFunctionReturn(0); 3102 } 3103 3104 PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 3105 if (flg) { 3106 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 3107 C->ops->productnumeric = MatProductNumeric_PtAP; 3108 PetscFunctionReturn(0); 3109 } 3110 3111 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 3112 } 3113 3114 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 3115 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 3116 Mat a = b->AIJ, B; 3117 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 3118 PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 3119 PetscInt *cols; 3120 PetscScalar *vals; 3121 3122 PetscFunctionBegin; 3123 PetscCall(MatGetSize(a, &m, &n)); 3124 PetscCall(PetscMalloc1(dof * m, &ilen)); 3125 for (i = 0; i < m; i++) { 3126 nmax = PetscMax(nmax, aij->ilen[i]); 3127 for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 3128 } 3129 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 3130 PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 3131 PetscCall(MatSetType(B, newtype)); 3132 PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 3133 PetscCall(PetscFree(ilen)); 3134 PetscCall(PetscMalloc1(nmax, &icols)); 3135 ii = 0; 3136 for (i = 0; i < m; i++) { 3137 PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 3138 for (j = 0; j < dof; j++) { 3139 for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 3140 PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 3141 ii++; 3142 } 3143 PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 3144 } 3145 PetscCall(PetscFree(icols)); 3146 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3147 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3148 3149 if (reuse == MAT_INPLACE_MATRIX) { 3150 PetscCall(MatHeaderReplace(A, &B)); 3151 } else { 3152 *newmat = B; 3153 } 3154 PetscFunctionReturn(0); 3155 } 3156 3157 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3158 3159 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 3160 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 3161 Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 3162 Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 3163 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 3164 Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 3165 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 3166 PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 3167 PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 3168 PetscInt rstart, cstart, *garray, ii, k; 3169 PetscScalar *vals, *ovals; 3170 3171 PetscFunctionBegin; 3172 PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 3173 for (i = 0; i < A->rmap->n / dof; i++) { 3174 nmax = PetscMax(nmax, AIJ->ilen[i]); 3175 onmax = PetscMax(onmax, OAIJ->ilen[i]); 3176 for (j = 0; j < dof; j++) { 3177 dnz[dof * i + j] = AIJ->ilen[i]; 3178 onz[dof * i + j] = OAIJ->ilen[i]; 3179 } 3180 } 3181 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3182 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3183 PetscCall(MatSetType(B, newtype)); 3184 PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 3185 PetscCall(MatSetBlockSize(B, dof)); 3186 PetscCall(PetscFree2(dnz, onz)); 3187 3188 PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 3189 rstart = dof * maij->A->rmap->rstart; 3190 cstart = dof * maij->A->cmap->rstart; 3191 garray = mpiaij->garray; 3192 3193 ii = rstart; 3194 for (i = 0; i < A->rmap->n / dof; i++) { 3195 PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 3196 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 3197 for (j = 0; j < dof; j++) { 3198 for (k = 0; k < ncols; k++) { icols[k] = cstart + dof * cols[k] + j; } 3199 for (k = 0; k < oncols; k++) { oicols[k] = dof * garray[ocols[k]] + j; } 3200 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 3201 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 3202 ii++; 3203 } 3204 PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 3205 PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 3206 } 3207 PetscCall(PetscFree2(icols, oicols)); 3208 3209 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3210 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3211 3212 if (reuse == MAT_INPLACE_MATRIX) { 3213 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3214 ((PetscObject)A)->refct = 1; 3215 3216 PetscCall(MatHeaderReplace(A, &B)); 3217 3218 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3219 } else { 3220 *newmat = B; 3221 } 3222 PetscFunctionReturn(0); 3223 } 3224 3225 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) { 3226 Mat A; 3227 3228 PetscFunctionBegin; 3229 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 3230 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 3231 PetscCall(MatDestroy(&A)); 3232 PetscFunctionReturn(0); 3233 } 3234 3235 PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) { 3236 Mat A; 3237 3238 PetscFunctionBegin; 3239 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 3240 PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 3241 PetscCall(MatDestroy(&A)); 3242 PetscFunctionReturn(0); 3243 } 3244 3245 /* ---------------------------------------------------------------------------------- */ 3246 /*@ 3247 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3248 operations for multicomponent problems. It interpolates each component the same 3249 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 3250 and MATMPIAIJ for distributed matrices. 3251 3252 Collective 3253 3254 Input Parameters: 3255 + A - the AIJ matrix describing the action on blocks 3256 - dof - the block size (number of components per node) 3257 3258 Output Parameter: 3259 . maij - the new MAIJ matrix 3260 3261 Operations provided: 3262 .vb 3263 MatMult 3264 MatMultTranspose 3265 MatMultAdd 3266 MatMultTransposeAdd 3267 MatView 3268 .ve 3269 3270 Level: advanced 3271 3272 .seealso: `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ` 3273 @*/ 3274 PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) { 3275 PetscInt n; 3276 Mat B; 3277 PetscBool flg; 3278 #if defined(PETSC_HAVE_CUDA) 3279 /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 3280 PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 3281 #endif 3282 3283 PetscFunctionBegin; 3284 dof = PetscAbs(dof); 3285 PetscCall(PetscObjectReference((PetscObject)A)); 3286 3287 if (dof == 1) *maij = A; 3288 else { 3289 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3290 /* propagate vec type */ 3291 PetscCall(MatSetVecType(B, A->defaultvectype)); 3292 PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 3293 PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 3294 PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 3295 PetscCall(PetscLayoutSetUp(B->rmap)); 3296 PetscCall(PetscLayoutSetUp(B->cmap)); 3297 3298 B->assembled = PETSC_TRUE; 3299 3300 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 3301 if (flg) { 3302 Mat_SeqMAIJ *b; 3303 3304 PetscCall(MatSetType(B, MATSEQMAIJ)); 3305 3306 B->ops->setup = NULL; 3307 B->ops->destroy = MatDestroy_SeqMAIJ; 3308 B->ops->view = MatView_SeqMAIJ; 3309 3310 b = (Mat_SeqMAIJ *)B->data; 3311 b->dof = dof; 3312 b->AIJ = A; 3313 3314 if (dof == 2) { 3315 B->ops->mult = MatMult_SeqMAIJ_2; 3316 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3317 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3318 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3319 } else if (dof == 3) { 3320 B->ops->mult = MatMult_SeqMAIJ_3; 3321 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3322 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3323 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3324 } else if (dof == 4) { 3325 B->ops->mult = MatMult_SeqMAIJ_4; 3326 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3327 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3328 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3329 } else if (dof == 5) { 3330 B->ops->mult = MatMult_SeqMAIJ_5; 3331 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3332 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3333 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3334 } else if (dof == 6) { 3335 B->ops->mult = MatMult_SeqMAIJ_6; 3336 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3337 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3338 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3339 } else if (dof == 7) { 3340 B->ops->mult = MatMult_SeqMAIJ_7; 3341 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3342 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3343 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3344 } else if (dof == 8) { 3345 B->ops->mult = MatMult_SeqMAIJ_8; 3346 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3347 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3348 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3349 } else if (dof == 9) { 3350 B->ops->mult = MatMult_SeqMAIJ_9; 3351 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3352 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3353 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3354 } else if (dof == 10) { 3355 B->ops->mult = MatMult_SeqMAIJ_10; 3356 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3357 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3358 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3359 } else if (dof == 11) { 3360 B->ops->mult = MatMult_SeqMAIJ_11; 3361 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3362 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3363 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3364 } else if (dof == 16) { 3365 B->ops->mult = MatMult_SeqMAIJ_16; 3366 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3367 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3368 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3369 } else if (dof == 18) { 3370 B->ops->mult = MatMult_SeqMAIJ_18; 3371 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3372 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3373 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3374 } else { 3375 B->ops->mult = MatMult_SeqMAIJ_N; 3376 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3377 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3378 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3379 } 3380 #if defined(PETSC_HAVE_CUDA) 3381 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 3382 #endif 3383 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 3384 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 3385 } else { 3386 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3387 Mat_MPIMAIJ *b; 3388 IS from, to; 3389 Vec gvec; 3390 3391 PetscCall(MatSetType(B, MATMPIMAIJ)); 3392 3393 B->ops->setup = NULL; 3394 B->ops->destroy = MatDestroy_MPIMAIJ; 3395 B->ops->view = MatView_MPIMAIJ; 3396 3397 b = (Mat_MPIMAIJ *)B->data; 3398 b->dof = dof; 3399 b->A = A; 3400 3401 PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 3402 PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 3403 3404 PetscCall(VecGetSize(mpiaij->lvec, &n)); 3405 PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 3406 PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 3407 PetscCall(VecSetBlockSize(b->w, dof)); 3408 PetscCall(VecSetType(b->w, VECSEQ)); 3409 3410 /* create two temporary Index sets for build scatter gather */ 3411 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 3412 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 3413 3414 /* create temporary global vector to generate scatter context */ 3415 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 3416 3417 /* generate the scatter context */ 3418 PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 3419 3420 PetscCall(ISDestroy(&from)); 3421 PetscCall(ISDestroy(&to)); 3422 PetscCall(VecDestroy(&gvec)); 3423 3424 B->ops->mult = MatMult_MPIMAIJ_dof; 3425 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3426 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3427 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3428 3429 #if defined(PETSC_HAVE_CUDA) 3430 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 3431 #endif 3432 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 3433 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 3434 } 3435 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3436 B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 3437 PetscCall(MatSetUp(B)); 3438 #if defined(PETSC_HAVE_CUDA) 3439 /* temporary until we have CUDA implementation of MAIJ */ 3440 { 3441 PetscBool flg; 3442 if (convert) { 3443 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 3444 if (flg) { PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); } 3445 } 3446 } 3447 #endif 3448 *maij = B; 3449 } 3450 PetscFunctionReturn(0); 3451 } 3452