1 2 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/utils/freespace.h> 4 5 /*@ 6 MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix 7 8 Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 9 10 Input Parameter: 11 . A - the `MATMAIJ` matrix 12 13 Output Parameter: 14 . B - the `MATAIJ` matrix 15 16 Level: advanced 17 18 Note: 19 The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 20 21 .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` 22 @*/ 23 PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) { 24 PetscBool ismpimaij, isseqmaij; 25 26 PetscFunctionBegin; 27 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 28 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 29 if (ismpimaij) { 30 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 31 32 *B = b->A; 33 } else if (isseqmaij) { 34 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 35 36 *B = b->AIJ; 37 } else { 38 *B = A; 39 } 40 PetscFunctionReturn(0); 41 } 42 43 /*@ 44 MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size 45 46 Logically Collective 47 48 Input Parameters: 49 + A - the `MATMAIJ` matrix 50 - dof - the block size for the new matrix 51 52 Output Parameter: 53 . B - the new `MATMAIJ` matrix 54 55 Level: advanced 56 57 .seealso: `MATMAIJ`, `MatCreateMAIJ()` 58 @*/ 59 PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) { 60 Mat Aij = NULL; 61 62 PetscFunctionBegin; 63 PetscValidLogicalCollectiveInt(A, dof, 2); 64 PetscCall(MatMAIJGetAIJ(A, &Aij)); 65 PetscCall(MatCreateMAIJ(Aij, dof, B)); 66 PetscFunctionReturn(0); 67 } 68 69 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) { 70 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 71 72 PetscFunctionBegin; 73 PetscCall(MatDestroy(&b->AIJ)); 74 PetscCall(PetscFree(A->data)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 78 PetscFunctionReturn(0); 79 } 80 81 PetscErrorCode MatSetUp_MAIJ(Mat A) { 82 PetscFunctionBegin; 83 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 84 } 85 86 PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) { 87 Mat B; 88 89 PetscFunctionBegin; 90 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 91 PetscCall(MatView(B, viewer)); 92 PetscCall(MatDestroy(&B)); 93 PetscFunctionReturn(0); 94 } 95 96 PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) { 97 Mat B; 98 99 PetscFunctionBegin; 100 PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 101 PetscCall(MatView(B, viewer)); 102 PetscCall(MatDestroy(&B)); 103 PetscFunctionReturn(0); 104 } 105 106 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) { 107 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 108 109 PetscFunctionBegin; 110 PetscCall(MatDestroy(&b->AIJ)); 111 PetscCall(MatDestroy(&b->OAIJ)); 112 PetscCall(MatDestroy(&b->A)); 113 PetscCall(VecScatterDestroy(&b->ctx)); 114 PetscCall(VecDestroy(&b->w)); 115 PetscCall(PetscFree(A->data)); 116 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 117 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 118 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 119 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 120 PetscFunctionReturn(0); 121 } 122 123 /*MC 124 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 125 multicomponent problems, interpolating or restricting each component the same way independently. 126 The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. 127 128 Operations provided: 129 .vb 130 MatMult() 131 MatMultTranspose() 132 MatMultAdd() 133 MatMultTransposeAdd() 134 .ve 135 136 Level: advanced 137 138 .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 139 M*/ 140 141 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) { 142 Mat_MPIMAIJ *b; 143 PetscMPIInt size; 144 145 PetscFunctionBegin; 146 PetscCall(PetscNewLog(A, &b)); 147 A->data = (void *)b; 148 149 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 150 151 A->ops->setup = MatSetUp_MAIJ; 152 153 b->AIJ = NULL; 154 b->dof = 0; 155 b->OAIJ = NULL; 156 b->ctx = NULL; 157 b->w = NULL; 158 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 159 if (size == 1) { 160 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 161 } else { 162 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 163 } 164 A->preallocated = PETSC_TRUE; 165 A->assembled = PETSC_TRUE; 166 PetscFunctionReturn(0); 167 } 168 169 /* --------------------------------------------------------------------------------------*/ 170 PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy) { 171 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 172 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 173 const PetscScalar *x, *v; 174 PetscScalar *y, sum1, sum2; 175 PetscInt nonzerorow = 0, n, i, jrow, j; 176 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 177 178 PetscFunctionBegin; 179 PetscCall(VecGetArrayRead(xx, &x)); 180 PetscCall(VecGetArray(yy, &y)); 181 idx = a->j; 182 v = a->a; 183 ii = a->i; 184 185 for (i = 0; i < m; i++) { 186 jrow = ii[i]; 187 n = ii[i + 1] - jrow; 188 sum1 = 0.0; 189 sum2 = 0.0; 190 191 nonzerorow += (n > 0); 192 for (j = 0; j < n; j++) { 193 sum1 += v[jrow] * x[2 * idx[jrow]]; 194 sum2 += v[jrow] * x[2 * idx[jrow] + 1]; 195 jrow++; 196 } 197 y[2 * i] = sum1; 198 y[2 * i + 1] = sum2; 199 } 200 201 PetscCall(PetscLogFlops(4.0 * a->nz - 2.0 * nonzerorow)); 202 PetscCall(VecRestoreArrayRead(xx, &x)); 203 PetscCall(VecRestoreArray(yy, &y)); 204 PetscFunctionReturn(0); 205 } 206 207 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy) { 208 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 209 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 210 const PetscScalar *x, *v; 211 PetscScalar *y, alpha1, alpha2; 212 PetscInt n, i; 213 const PetscInt m = b->AIJ->rmap->n, *idx; 214 215 PetscFunctionBegin; 216 PetscCall(VecSet(yy, 0.0)); 217 PetscCall(VecGetArrayRead(xx, &x)); 218 PetscCall(VecGetArray(yy, &y)); 219 220 for (i = 0; i < m; i++) { 221 idx = a->j + a->i[i]; 222 v = a->a + a->i[i]; 223 n = a->i[i + 1] - a->i[i]; 224 alpha1 = x[2 * i]; 225 alpha2 = x[2 * i + 1]; 226 while (n-- > 0) { 227 y[2 * (*idx)] += alpha1 * (*v); 228 y[2 * (*idx) + 1] += alpha2 * (*v); 229 idx++; 230 v++; 231 } 232 } 233 PetscCall(PetscLogFlops(4.0 * a->nz)); 234 PetscCall(VecRestoreArrayRead(xx, &x)); 235 PetscCall(VecRestoreArray(yy, &y)); 236 PetscFunctionReturn(0); 237 } 238 239 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) { 240 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 241 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 242 const PetscScalar *x, *v; 243 PetscScalar *y, sum1, sum2; 244 PetscInt n, i, jrow, j; 245 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 246 247 PetscFunctionBegin; 248 if (yy != zz) PetscCall(VecCopy(yy, zz)); 249 PetscCall(VecGetArrayRead(xx, &x)); 250 PetscCall(VecGetArray(zz, &y)); 251 idx = a->j; 252 v = a->a; 253 ii = a->i; 254 255 for (i = 0; i < m; i++) { 256 jrow = ii[i]; 257 n = ii[i + 1] - jrow; 258 sum1 = 0.0; 259 sum2 = 0.0; 260 for (j = 0; j < n; j++) { 261 sum1 += v[jrow] * x[2 * idx[jrow]]; 262 sum2 += v[jrow] * x[2 * idx[jrow] + 1]; 263 jrow++; 264 } 265 y[2 * i] += sum1; 266 y[2 * i + 1] += sum2; 267 } 268 269 PetscCall(PetscLogFlops(4.0 * a->nz)); 270 PetscCall(VecRestoreArrayRead(xx, &x)); 271 PetscCall(VecRestoreArray(zz, &y)); 272 PetscFunctionReturn(0); 273 } 274 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) { 275 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 276 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 277 const PetscScalar *x, *v; 278 PetscScalar *y, alpha1, alpha2; 279 PetscInt n, i; 280 const PetscInt m = b->AIJ->rmap->n, *idx; 281 282 PetscFunctionBegin; 283 if (yy != zz) PetscCall(VecCopy(yy, zz)); 284 PetscCall(VecGetArrayRead(xx, &x)); 285 PetscCall(VecGetArray(zz, &y)); 286 287 for (i = 0; i < m; i++) { 288 idx = a->j + a->i[i]; 289 v = a->a + a->i[i]; 290 n = a->i[i + 1] - a->i[i]; 291 alpha1 = x[2 * i]; 292 alpha2 = x[2 * i + 1]; 293 while (n-- > 0) { 294 y[2 * (*idx)] += alpha1 * (*v); 295 y[2 * (*idx) + 1] += alpha2 * (*v); 296 idx++; 297 v++; 298 } 299 } 300 PetscCall(PetscLogFlops(4.0 * a->nz)); 301 PetscCall(VecRestoreArrayRead(xx, &x)); 302 PetscCall(VecRestoreArray(zz, &y)); 303 PetscFunctionReturn(0); 304 } 305 /* --------------------------------------------------------------------------------------*/ 306 PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy) { 307 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 308 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 309 const PetscScalar *x, *v; 310 PetscScalar *y, sum1, sum2, sum3; 311 PetscInt nonzerorow = 0, n, i, jrow, j; 312 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 313 314 PetscFunctionBegin; 315 PetscCall(VecGetArrayRead(xx, &x)); 316 PetscCall(VecGetArray(yy, &y)); 317 idx = a->j; 318 v = a->a; 319 ii = a->i; 320 321 for (i = 0; i < m; i++) { 322 jrow = ii[i]; 323 n = ii[i + 1] - jrow; 324 sum1 = 0.0; 325 sum2 = 0.0; 326 sum3 = 0.0; 327 328 nonzerorow += (n > 0); 329 for (j = 0; j < n; j++) { 330 sum1 += v[jrow] * x[3 * idx[jrow]]; 331 sum2 += v[jrow] * x[3 * idx[jrow] + 1]; 332 sum3 += v[jrow] * x[3 * idx[jrow] + 2]; 333 jrow++; 334 } 335 y[3 * i] = sum1; 336 y[3 * i + 1] = sum2; 337 y[3 * i + 2] = sum3; 338 } 339 340 PetscCall(PetscLogFlops(6.0 * a->nz - 3.0 * nonzerorow)); 341 PetscCall(VecRestoreArrayRead(xx, &x)); 342 PetscCall(VecRestoreArray(yy, &y)); 343 PetscFunctionReturn(0); 344 } 345 346 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy) { 347 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 348 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 349 const PetscScalar *x, *v; 350 PetscScalar *y, alpha1, alpha2, alpha3; 351 PetscInt n, i; 352 const PetscInt m = b->AIJ->rmap->n, *idx; 353 354 PetscFunctionBegin; 355 PetscCall(VecSet(yy, 0.0)); 356 PetscCall(VecGetArrayRead(xx, &x)); 357 PetscCall(VecGetArray(yy, &y)); 358 359 for (i = 0; i < m; i++) { 360 idx = a->j + a->i[i]; 361 v = a->a + a->i[i]; 362 n = a->i[i + 1] - a->i[i]; 363 alpha1 = x[3 * i]; 364 alpha2 = x[3 * i + 1]; 365 alpha3 = x[3 * i + 2]; 366 while (n-- > 0) { 367 y[3 * (*idx)] += alpha1 * (*v); 368 y[3 * (*idx) + 1] += alpha2 * (*v); 369 y[3 * (*idx) + 2] += alpha3 * (*v); 370 idx++; 371 v++; 372 } 373 } 374 PetscCall(PetscLogFlops(6.0 * a->nz)); 375 PetscCall(VecRestoreArrayRead(xx, &x)); 376 PetscCall(VecRestoreArray(yy, &y)); 377 PetscFunctionReturn(0); 378 } 379 380 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) { 381 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 382 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 383 const PetscScalar *x, *v; 384 PetscScalar *y, sum1, sum2, sum3; 385 PetscInt n, i, jrow, j; 386 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 387 388 PetscFunctionBegin; 389 if (yy != zz) PetscCall(VecCopy(yy, zz)); 390 PetscCall(VecGetArrayRead(xx, &x)); 391 PetscCall(VecGetArray(zz, &y)); 392 idx = a->j; 393 v = a->a; 394 ii = a->i; 395 396 for (i = 0; i < m; i++) { 397 jrow = ii[i]; 398 n = ii[i + 1] - jrow; 399 sum1 = 0.0; 400 sum2 = 0.0; 401 sum3 = 0.0; 402 for (j = 0; j < n; j++) { 403 sum1 += v[jrow] * x[3 * idx[jrow]]; 404 sum2 += v[jrow] * x[3 * idx[jrow] + 1]; 405 sum3 += v[jrow] * x[3 * idx[jrow] + 2]; 406 jrow++; 407 } 408 y[3 * i] += sum1; 409 y[3 * i + 1] += sum2; 410 y[3 * i + 2] += sum3; 411 } 412 413 PetscCall(PetscLogFlops(6.0 * a->nz)); 414 PetscCall(VecRestoreArrayRead(xx, &x)); 415 PetscCall(VecRestoreArray(zz, &y)); 416 PetscFunctionReturn(0); 417 } 418 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) { 419 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 420 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 421 const PetscScalar *x, *v; 422 PetscScalar *y, alpha1, alpha2, alpha3; 423 PetscInt n, i; 424 const PetscInt m = b->AIJ->rmap->n, *idx; 425 426 PetscFunctionBegin; 427 if (yy != zz) PetscCall(VecCopy(yy, zz)); 428 PetscCall(VecGetArrayRead(xx, &x)); 429 PetscCall(VecGetArray(zz, &y)); 430 for (i = 0; i < m; i++) { 431 idx = a->j + a->i[i]; 432 v = a->a + a->i[i]; 433 n = a->i[i + 1] - a->i[i]; 434 alpha1 = x[3 * i]; 435 alpha2 = x[3 * i + 1]; 436 alpha3 = x[3 * i + 2]; 437 while (n-- > 0) { 438 y[3 * (*idx)] += alpha1 * (*v); 439 y[3 * (*idx) + 1] += alpha2 * (*v); 440 y[3 * (*idx) + 2] += alpha3 * (*v); 441 idx++; 442 v++; 443 } 444 } 445 PetscCall(PetscLogFlops(6.0 * a->nz)); 446 PetscCall(VecRestoreArrayRead(xx, &x)); 447 PetscCall(VecRestoreArray(zz, &y)); 448 PetscFunctionReturn(0); 449 } 450 451 /* ------------------------------------------------------------------------------*/ 452 PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy) { 453 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 454 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 455 const PetscScalar *x, *v; 456 PetscScalar *y, sum1, sum2, sum3, sum4; 457 PetscInt nonzerorow = 0, n, i, jrow, j; 458 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 459 460 PetscFunctionBegin; 461 PetscCall(VecGetArrayRead(xx, &x)); 462 PetscCall(VecGetArray(yy, &y)); 463 idx = a->j; 464 v = a->a; 465 ii = a->i; 466 467 for (i = 0; i < m; i++) { 468 jrow = ii[i]; 469 n = ii[i + 1] - jrow; 470 sum1 = 0.0; 471 sum2 = 0.0; 472 sum3 = 0.0; 473 sum4 = 0.0; 474 nonzerorow += (n > 0); 475 for (j = 0; j < n; j++) { 476 sum1 += v[jrow] * x[4 * idx[jrow]]; 477 sum2 += v[jrow] * x[4 * idx[jrow] + 1]; 478 sum3 += v[jrow] * x[4 * idx[jrow] + 2]; 479 sum4 += v[jrow] * x[4 * idx[jrow] + 3]; 480 jrow++; 481 } 482 y[4 * i] = sum1; 483 y[4 * i + 1] = sum2; 484 y[4 * i + 2] = sum3; 485 y[4 * i + 3] = sum4; 486 } 487 488 PetscCall(PetscLogFlops(8.0 * a->nz - 4.0 * nonzerorow)); 489 PetscCall(VecRestoreArrayRead(xx, &x)); 490 PetscCall(VecRestoreArray(yy, &y)); 491 PetscFunctionReturn(0); 492 } 493 494 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy) { 495 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 496 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 497 const PetscScalar *x, *v; 498 PetscScalar *y, alpha1, alpha2, alpha3, alpha4; 499 PetscInt n, i; 500 const PetscInt m = b->AIJ->rmap->n, *idx; 501 502 PetscFunctionBegin; 503 PetscCall(VecSet(yy, 0.0)); 504 PetscCall(VecGetArrayRead(xx, &x)); 505 PetscCall(VecGetArray(yy, &y)); 506 for (i = 0; i < m; i++) { 507 idx = a->j + a->i[i]; 508 v = a->a + a->i[i]; 509 n = a->i[i + 1] - a->i[i]; 510 alpha1 = x[4 * i]; 511 alpha2 = x[4 * i + 1]; 512 alpha3 = x[4 * i + 2]; 513 alpha4 = x[4 * i + 3]; 514 while (n-- > 0) { 515 y[4 * (*idx)] += alpha1 * (*v); 516 y[4 * (*idx) + 1] += alpha2 * (*v); 517 y[4 * (*idx) + 2] += alpha3 * (*v); 518 y[4 * (*idx) + 3] += alpha4 * (*v); 519 idx++; 520 v++; 521 } 522 } 523 PetscCall(PetscLogFlops(8.0 * a->nz)); 524 PetscCall(VecRestoreArrayRead(xx, &x)); 525 PetscCall(VecRestoreArray(yy, &y)); 526 PetscFunctionReturn(0); 527 } 528 529 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) { 530 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 531 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 532 const PetscScalar *x, *v; 533 PetscScalar *y, sum1, sum2, sum3, sum4; 534 PetscInt n, i, jrow, j; 535 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 536 537 PetscFunctionBegin; 538 if (yy != zz) PetscCall(VecCopy(yy, zz)); 539 PetscCall(VecGetArrayRead(xx, &x)); 540 PetscCall(VecGetArray(zz, &y)); 541 idx = a->j; 542 v = a->a; 543 ii = a->i; 544 545 for (i = 0; i < m; i++) { 546 jrow = ii[i]; 547 n = ii[i + 1] - jrow; 548 sum1 = 0.0; 549 sum2 = 0.0; 550 sum3 = 0.0; 551 sum4 = 0.0; 552 for (j = 0; j < n; j++) { 553 sum1 += v[jrow] * x[4 * idx[jrow]]; 554 sum2 += v[jrow] * x[4 * idx[jrow] + 1]; 555 sum3 += v[jrow] * x[4 * idx[jrow] + 2]; 556 sum4 += v[jrow] * x[4 * idx[jrow] + 3]; 557 jrow++; 558 } 559 y[4 * i] += sum1; 560 y[4 * i + 1] += sum2; 561 y[4 * i + 2] += sum3; 562 y[4 * i + 3] += sum4; 563 } 564 565 PetscCall(PetscLogFlops(8.0 * a->nz)); 566 PetscCall(VecRestoreArrayRead(xx, &x)); 567 PetscCall(VecRestoreArray(zz, &y)); 568 PetscFunctionReturn(0); 569 } 570 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) { 571 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 572 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 573 const PetscScalar *x, *v; 574 PetscScalar *y, alpha1, alpha2, alpha3, alpha4; 575 PetscInt n, i; 576 const PetscInt m = b->AIJ->rmap->n, *idx; 577 578 PetscFunctionBegin; 579 if (yy != zz) PetscCall(VecCopy(yy, zz)); 580 PetscCall(VecGetArrayRead(xx, &x)); 581 PetscCall(VecGetArray(zz, &y)); 582 583 for (i = 0; i < m; i++) { 584 idx = a->j + a->i[i]; 585 v = a->a + a->i[i]; 586 n = a->i[i + 1] - a->i[i]; 587 alpha1 = x[4 * i]; 588 alpha2 = x[4 * i + 1]; 589 alpha3 = x[4 * i + 2]; 590 alpha4 = x[4 * i + 3]; 591 while (n-- > 0) { 592 y[4 * (*idx)] += alpha1 * (*v); 593 y[4 * (*idx) + 1] += alpha2 * (*v); 594 y[4 * (*idx) + 2] += alpha3 * (*v); 595 y[4 * (*idx) + 3] += alpha4 * (*v); 596 idx++; 597 v++; 598 } 599 } 600 PetscCall(PetscLogFlops(8.0 * a->nz)); 601 PetscCall(VecRestoreArrayRead(xx, &x)); 602 PetscCall(VecRestoreArray(zz, &y)); 603 PetscFunctionReturn(0); 604 } 605 /* ------------------------------------------------------------------------------*/ 606 607 PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy) { 608 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 609 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 610 const PetscScalar *x, *v; 611 PetscScalar *y, sum1, sum2, sum3, sum4, sum5; 612 PetscInt nonzerorow = 0, n, i, jrow, j; 613 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 614 615 PetscFunctionBegin; 616 PetscCall(VecGetArrayRead(xx, &x)); 617 PetscCall(VecGetArray(yy, &y)); 618 idx = a->j; 619 v = a->a; 620 ii = a->i; 621 622 for (i = 0; i < m; i++) { 623 jrow = ii[i]; 624 n = ii[i + 1] - jrow; 625 sum1 = 0.0; 626 sum2 = 0.0; 627 sum3 = 0.0; 628 sum4 = 0.0; 629 sum5 = 0.0; 630 631 nonzerorow += (n > 0); 632 for (j = 0; j < n; j++) { 633 sum1 += v[jrow] * x[5 * idx[jrow]]; 634 sum2 += v[jrow] * x[5 * idx[jrow] + 1]; 635 sum3 += v[jrow] * x[5 * idx[jrow] + 2]; 636 sum4 += v[jrow] * x[5 * idx[jrow] + 3]; 637 sum5 += v[jrow] * x[5 * idx[jrow] + 4]; 638 jrow++; 639 } 640 y[5 * i] = sum1; 641 y[5 * i + 1] = sum2; 642 y[5 * i + 2] = sum3; 643 y[5 * i + 3] = sum4; 644 y[5 * i + 4] = sum5; 645 } 646 647 PetscCall(PetscLogFlops(10.0 * a->nz - 5.0 * nonzerorow)); 648 PetscCall(VecRestoreArrayRead(xx, &x)); 649 PetscCall(VecRestoreArray(yy, &y)); 650 PetscFunctionReturn(0); 651 } 652 653 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy) { 654 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 655 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 656 const PetscScalar *x, *v; 657 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5; 658 PetscInt n, i; 659 const PetscInt m = b->AIJ->rmap->n, *idx; 660 661 PetscFunctionBegin; 662 PetscCall(VecSet(yy, 0.0)); 663 PetscCall(VecGetArrayRead(xx, &x)); 664 PetscCall(VecGetArray(yy, &y)); 665 666 for (i = 0; i < m; i++) { 667 idx = a->j + a->i[i]; 668 v = a->a + a->i[i]; 669 n = a->i[i + 1] - a->i[i]; 670 alpha1 = x[5 * i]; 671 alpha2 = x[5 * i + 1]; 672 alpha3 = x[5 * i + 2]; 673 alpha4 = x[5 * i + 3]; 674 alpha5 = x[5 * i + 4]; 675 while (n-- > 0) { 676 y[5 * (*idx)] += alpha1 * (*v); 677 y[5 * (*idx) + 1] += alpha2 * (*v); 678 y[5 * (*idx) + 2] += alpha3 * (*v); 679 y[5 * (*idx) + 3] += alpha4 * (*v); 680 y[5 * (*idx) + 4] += alpha5 * (*v); 681 idx++; 682 v++; 683 } 684 } 685 PetscCall(PetscLogFlops(10.0 * a->nz)); 686 PetscCall(VecRestoreArrayRead(xx, &x)); 687 PetscCall(VecRestoreArray(yy, &y)); 688 PetscFunctionReturn(0); 689 } 690 691 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) { 692 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 693 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 694 const PetscScalar *x, *v; 695 PetscScalar *y, sum1, sum2, sum3, sum4, sum5; 696 PetscInt n, i, jrow, j; 697 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 698 699 PetscFunctionBegin; 700 if (yy != zz) PetscCall(VecCopy(yy, zz)); 701 PetscCall(VecGetArrayRead(xx, &x)); 702 PetscCall(VecGetArray(zz, &y)); 703 idx = a->j; 704 v = a->a; 705 ii = a->i; 706 707 for (i = 0; i < m; i++) { 708 jrow = ii[i]; 709 n = ii[i + 1] - jrow; 710 sum1 = 0.0; 711 sum2 = 0.0; 712 sum3 = 0.0; 713 sum4 = 0.0; 714 sum5 = 0.0; 715 for (j = 0; j < n; j++) { 716 sum1 += v[jrow] * x[5 * idx[jrow]]; 717 sum2 += v[jrow] * x[5 * idx[jrow] + 1]; 718 sum3 += v[jrow] * x[5 * idx[jrow] + 2]; 719 sum4 += v[jrow] * x[5 * idx[jrow] + 3]; 720 sum5 += v[jrow] * x[5 * idx[jrow] + 4]; 721 jrow++; 722 } 723 y[5 * i] += sum1; 724 y[5 * i + 1] += sum2; 725 y[5 * i + 2] += sum3; 726 y[5 * i + 3] += sum4; 727 y[5 * i + 4] += sum5; 728 } 729 730 PetscCall(PetscLogFlops(10.0 * a->nz)); 731 PetscCall(VecRestoreArrayRead(xx, &x)); 732 PetscCall(VecRestoreArray(zz, &y)); 733 PetscFunctionReturn(0); 734 } 735 736 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) { 737 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 738 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 739 const PetscScalar *x, *v; 740 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5; 741 PetscInt n, i; 742 const PetscInt m = b->AIJ->rmap->n, *idx; 743 744 PetscFunctionBegin; 745 if (yy != zz) PetscCall(VecCopy(yy, zz)); 746 PetscCall(VecGetArrayRead(xx, &x)); 747 PetscCall(VecGetArray(zz, &y)); 748 749 for (i = 0; i < m; i++) { 750 idx = a->j + a->i[i]; 751 v = a->a + a->i[i]; 752 n = a->i[i + 1] - a->i[i]; 753 alpha1 = x[5 * i]; 754 alpha2 = x[5 * i + 1]; 755 alpha3 = x[5 * i + 2]; 756 alpha4 = x[5 * i + 3]; 757 alpha5 = x[5 * i + 4]; 758 while (n-- > 0) { 759 y[5 * (*idx)] += alpha1 * (*v); 760 y[5 * (*idx) + 1] += alpha2 * (*v); 761 y[5 * (*idx) + 2] += alpha3 * (*v); 762 y[5 * (*idx) + 3] += alpha4 * (*v); 763 y[5 * (*idx) + 4] += alpha5 * (*v); 764 idx++; 765 v++; 766 } 767 } 768 PetscCall(PetscLogFlops(10.0 * a->nz)); 769 PetscCall(VecRestoreArrayRead(xx, &x)); 770 PetscCall(VecRestoreArray(zz, &y)); 771 PetscFunctionReturn(0); 772 } 773 774 /* ------------------------------------------------------------------------------*/ 775 PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy) { 776 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 777 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 778 const PetscScalar *x, *v; 779 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6; 780 PetscInt nonzerorow = 0, n, i, jrow, j; 781 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 782 783 PetscFunctionBegin; 784 PetscCall(VecGetArrayRead(xx, &x)); 785 PetscCall(VecGetArray(yy, &y)); 786 idx = a->j; 787 v = a->a; 788 ii = a->i; 789 790 for (i = 0; i < m; i++) { 791 jrow = ii[i]; 792 n = ii[i + 1] - jrow; 793 sum1 = 0.0; 794 sum2 = 0.0; 795 sum3 = 0.0; 796 sum4 = 0.0; 797 sum5 = 0.0; 798 sum6 = 0.0; 799 800 nonzerorow += (n > 0); 801 for (j = 0; j < n; j++) { 802 sum1 += v[jrow] * x[6 * idx[jrow]]; 803 sum2 += v[jrow] * x[6 * idx[jrow] + 1]; 804 sum3 += v[jrow] * x[6 * idx[jrow] + 2]; 805 sum4 += v[jrow] * x[6 * idx[jrow] + 3]; 806 sum5 += v[jrow] * x[6 * idx[jrow] + 4]; 807 sum6 += v[jrow] * x[6 * idx[jrow] + 5]; 808 jrow++; 809 } 810 y[6 * i] = sum1; 811 y[6 * i + 1] = sum2; 812 y[6 * i + 2] = sum3; 813 y[6 * i + 3] = sum4; 814 y[6 * i + 4] = sum5; 815 y[6 * i + 5] = sum6; 816 } 817 818 PetscCall(PetscLogFlops(12.0 * a->nz - 6.0 * nonzerorow)); 819 PetscCall(VecRestoreArrayRead(xx, &x)); 820 PetscCall(VecRestoreArray(yy, &y)); 821 PetscFunctionReturn(0); 822 } 823 824 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy) { 825 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 826 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 827 const PetscScalar *x, *v; 828 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6; 829 PetscInt n, i; 830 const PetscInt m = b->AIJ->rmap->n, *idx; 831 832 PetscFunctionBegin; 833 PetscCall(VecSet(yy, 0.0)); 834 PetscCall(VecGetArrayRead(xx, &x)); 835 PetscCall(VecGetArray(yy, &y)); 836 837 for (i = 0; i < m; i++) { 838 idx = a->j + a->i[i]; 839 v = a->a + a->i[i]; 840 n = a->i[i + 1] - a->i[i]; 841 alpha1 = x[6 * i]; 842 alpha2 = x[6 * i + 1]; 843 alpha3 = x[6 * i + 2]; 844 alpha4 = x[6 * i + 3]; 845 alpha5 = x[6 * i + 4]; 846 alpha6 = x[6 * i + 5]; 847 while (n-- > 0) { 848 y[6 * (*idx)] += alpha1 * (*v); 849 y[6 * (*idx) + 1] += alpha2 * (*v); 850 y[6 * (*idx) + 2] += alpha3 * (*v); 851 y[6 * (*idx) + 3] += alpha4 * (*v); 852 y[6 * (*idx) + 4] += alpha5 * (*v); 853 y[6 * (*idx) + 5] += alpha6 * (*v); 854 idx++; 855 v++; 856 } 857 } 858 PetscCall(PetscLogFlops(12.0 * a->nz)); 859 PetscCall(VecRestoreArrayRead(xx, &x)); 860 PetscCall(VecRestoreArray(yy, &y)); 861 PetscFunctionReturn(0); 862 } 863 864 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) { 865 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 866 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 867 const PetscScalar *x, *v; 868 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6; 869 PetscInt n, i, jrow, j; 870 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 871 872 PetscFunctionBegin; 873 if (yy != zz) PetscCall(VecCopy(yy, zz)); 874 PetscCall(VecGetArrayRead(xx, &x)); 875 PetscCall(VecGetArray(zz, &y)); 876 idx = a->j; 877 v = a->a; 878 ii = a->i; 879 880 for (i = 0; i < m; i++) { 881 jrow = ii[i]; 882 n = ii[i + 1] - jrow; 883 sum1 = 0.0; 884 sum2 = 0.0; 885 sum3 = 0.0; 886 sum4 = 0.0; 887 sum5 = 0.0; 888 sum6 = 0.0; 889 for (j = 0; j < n; j++) { 890 sum1 += v[jrow] * x[6 * idx[jrow]]; 891 sum2 += v[jrow] * x[6 * idx[jrow] + 1]; 892 sum3 += v[jrow] * x[6 * idx[jrow] + 2]; 893 sum4 += v[jrow] * x[6 * idx[jrow] + 3]; 894 sum5 += v[jrow] * x[6 * idx[jrow] + 4]; 895 sum6 += v[jrow] * x[6 * idx[jrow] + 5]; 896 jrow++; 897 } 898 y[6 * i] += sum1; 899 y[6 * i + 1] += sum2; 900 y[6 * i + 2] += sum3; 901 y[6 * i + 3] += sum4; 902 y[6 * i + 4] += sum5; 903 y[6 * i + 5] += sum6; 904 } 905 906 PetscCall(PetscLogFlops(12.0 * a->nz)); 907 PetscCall(VecRestoreArrayRead(xx, &x)); 908 PetscCall(VecRestoreArray(zz, &y)); 909 PetscFunctionReturn(0); 910 } 911 912 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) { 913 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 914 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 915 const PetscScalar *x, *v; 916 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6; 917 PetscInt n, i; 918 const PetscInt m = b->AIJ->rmap->n, *idx; 919 920 PetscFunctionBegin; 921 if (yy != zz) PetscCall(VecCopy(yy, zz)); 922 PetscCall(VecGetArrayRead(xx, &x)); 923 PetscCall(VecGetArray(zz, &y)); 924 925 for (i = 0; i < m; i++) { 926 idx = a->j + a->i[i]; 927 v = a->a + a->i[i]; 928 n = a->i[i + 1] - a->i[i]; 929 alpha1 = x[6 * i]; 930 alpha2 = x[6 * i + 1]; 931 alpha3 = x[6 * i + 2]; 932 alpha4 = x[6 * i + 3]; 933 alpha5 = x[6 * i + 4]; 934 alpha6 = x[6 * i + 5]; 935 while (n-- > 0) { 936 y[6 * (*idx)] += alpha1 * (*v); 937 y[6 * (*idx) + 1] += alpha2 * (*v); 938 y[6 * (*idx) + 2] += alpha3 * (*v); 939 y[6 * (*idx) + 3] += alpha4 * (*v); 940 y[6 * (*idx) + 4] += alpha5 * (*v); 941 y[6 * (*idx) + 5] += alpha6 * (*v); 942 idx++; 943 v++; 944 } 945 } 946 PetscCall(PetscLogFlops(12.0 * a->nz)); 947 PetscCall(VecRestoreArrayRead(xx, &x)); 948 PetscCall(VecRestoreArray(zz, &y)); 949 PetscFunctionReturn(0); 950 } 951 952 /* ------------------------------------------------------------------------------*/ 953 PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy) { 954 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 955 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 956 const PetscScalar *x, *v; 957 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7; 958 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 959 PetscInt nonzerorow = 0, n, i, jrow, j; 960 961 PetscFunctionBegin; 962 PetscCall(VecGetArrayRead(xx, &x)); 963 PetscCall(VecGetArray(yy, &y)); 964 idx = a->j; 965 v = a->a; 966 ii = a->i; 967 968 for (i = 0; i < m; i++) { 969 jrow = ii[i]; 970 n = ii[i + 1] - jrow; 971 sum1 = 0.0; 972 sum2 = 0.0; 973 sum3 = 0.0; 974 sum4 = 0.0; 975 sum5 = 0.0; 976 sum6 = 0.0; 977 sum7 = 0.0; 978 979 nonzerorow += (n > 0); 980 for (j = 0; j < n; j++) { 981 sum1 += v[jrow] * x[7 * idx[jrow]]; 982 sum2 += v[jrow] * x[7 * idx[jrow] + 1]; 983 sum3 += v[jrow] * x[7 * idx[jrow] + 2]; 984 sum4 += v[jrow] * x[7 * idx[jrow] + 3]; 985 sum5 += v[jrow] * x[7 * idx[jrow] + 4]; 986 sum6 += v[jrow] * x[7 * idx[jrow] + 5]; 987 sum7 += v[jrow] * x[7 * idx[jrow] + 6]; 988 jrow++; 989 } 990 y[7 * i] = sum1; 991 y[7 * i + 1] = sum2; 992 y[7 * i + 2] = sum3; 993 y[7 * i + 3] = sum4; 994 y[7 * i + 4] = sum5; 995 y[7 * i + 5] = sum6; 996 y[7 * i + 6] = sum7; 997 } 998 999 PetscCall(PetscLogFlops(14.0 * a->nz - 7.0 * nonzerorow)); 1000 PetscCall(VecRestoreArrayRead(xx, &x)); 1001 PetscCall(VecRestoreArray(yy, &y)); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy) { 1006 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1007 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1008 const PetscScalar *x, *v; 1009 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7; 1010 const PetscInt m = b->AIJ->rmap->n, *idx; 1011 PetscInt n, i; 1012 1013 PetscFunctionBegin; 1014 PetscCall(VecSet(yy, 0.0)); 1015 PetscCall(VecGetArrayRead(xx, &x)); 1016 PetscCall(VecGetArray(yy, &y)); 1017 1018 for (i = 0; i < m; i++) { 1019 idx = a->j + a->i[i]; 1020 v = a->a + a->i[i]; 1021 n = a->i[i + 1] - a->i[i]; 1022 alpha1 = x[7 * i]; 1023 alpha2 = x[7 * i + 1]; 1024 alpha3 = x[7 * i + 2]; 1025 alpha4 = x[7 * i + 3]; 1026 alpha5 = x[7 * i + 4]; 1027 alpha6 = x[7 * i + 5]; 1028 alpha7 = x[7 * i + 6]; 1029 while (n-- > 0) { 1030 y[7 * (*idx)] += alpha1 * (*v); 1031 y[7 * (*idx) + 1] += alpha2 * (*v); 1032 y[7 * (*idx) + 2] += alpha3 * (*v); 1033 y[7 * (*idx) + 3] += alpha4 * (*v); 1034 y[7 * (*idx) + 4] += alpha5 * (*v); 1035 y[7 * (*idx) + 5] += alpha6 * (*v); 1036 y[7 * (*idx) + 6] += alpha7 * (*v); 1037 idx++; 1038 v++; 1039 } 1040 } 1041 PetscCall(PetscLogFlops(14.0 * a->nz)); 1042 PetscCall(VecRestoreArrayRead(xx, &x)); 1043 PetscCall(VecRestoreArray(yy, &y)); 1044 PetscFunctionReturn(0); 1045 } 1046 1047 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) { 1048 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1049 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1050 const PetscScalar *x, *v; 1051 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1052 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1053 PetscInt n, i, jrow, j; 1054 1055 PetscFunctionBegin; 1056 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1057 PetscCall(VecGetArrayRead(xx, &x)); 1058 PetscCall(VecGetArray(zz, &y)); 1059 idx = a->j; 1060 v = a->a; 1061 ii = a->i; 1062 1063 for (i = 0; i < m; i++) { 1064 jrow = ii[i]; 1065 n = ii[i + 1] - jrow; 1066 sum1 = 0.0; 1067 sum2 = 0.0; 1068 sum3 = 0.0; 1069 sum4 = 0.0; 1070 sum5 = 0.0; 1071 sum6 = 0.0; 1072 sum7 = 0.0; 1073 for (j = 0; j < n; j++) { 1074 sum1 += v[jrow] * x[7 * idx[jrow]]; 1075 sum2 += v[jrow] * x[7 * idx[jrow] + 1]; 1076 sum3 += v[jrow] * x[7 * idx[jrow] + 2]; 1077 sum4 += v[jrow] * x[7 * idx[jrow] + 3]; 1078 sum5 += v[jrow] * x[7 * idx[jrow] + 4]; 1079 sum6 += v[jrow] * x[7 * idx[jrow] + 5]; 1080 sum7 += v[jrow] * x[7 * idx[jrow] + 6]; 1081 jrow++; 1082 } 1083 y[7 * i] += sum1; 1084 y[7 * i + 1] += sum2; 1085 y[7 * i + 2] += sum3; 1086 y[7 * i + 3] += sum4; 1087 y[7 * i + 4] += sum5; 1088 y[7 * i + 5] += sum6; 1089 y[7 * i + 6] += sum7; 1090 } 1091 1092 PetscCall(PetscLogFlops(14.0 * a->nz)); 1093 PetscCall(VecRestoreArrayRead(xx, &x)); 1094 PetscCall(VecRestoreArray(zz, &y)); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) { 1099 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1100 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1101 const PetscScalar *x, *v; 1102 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7; 1103 const PetscInt m = b->AIJ->rmap->n, *idx; 1104 PetscInt n, i; 1105 1106 PetscFunctionBegin; 1107 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1108 PetscCall(VecGetArrayRead(xx, &x)); 1109 PetscCall(VecGetArray(zz, &y)); 1110 for (i = 0; i < m; i++) { 1111 idx = a->j + a->i[i]; 1112 v = a->a + a->i[i]; 1113 n = a->i[i + 1] - a->i[i]; 1114 alpha1 = x[7 * i]; 1115 alpha2 = x[7 * i + 1]; 1116 alpha3 = x[7 * i + 2]; 1117 alpha4 = x[7 * i + 3]; 1118 alpha5 = x[7 * i + 4]; 1119 alpha6 = x[7 * i + 5]; 1120 alpha7 = x[7 * i + 6]; 1121 while (n-- > 0) { 1122 y[7 * (*idx)] += alpha1 * (*v); 1123 y[7 * (*idx) + 1] += alpha2 * (*v); 1124 y[7 * (*idx) + 2] += alpha3 * (*v); 1125 y[7 * (*idx) + 3] += alpha4 * (*v); 1126 y[7 * (*idx) + 4] += alpha5 * (*v); 1127 y[7 * (*idx) + 5] += alpha6 * (*v); 1128 y[7 * (*idx) + 6] += alpha7 * (*v); 1129 idx++; 1130 v++; 1131 } 1132 } 1133 PetscCall(PetscLogFlops(14.0 * a->nz)); 1134 PetscCall(VecRestoreArrayRead(xx, &x)); 1135 PetscCall(VecRestoreArray(zz, &y)); 1136 PetscFunctionReturn(0); 1137 } 1138 1139 PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy) { 1140 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1141 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1142 const PetscScalar *x, *v; 1143 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1144 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1145 PetscInt nonzerorow = 0, n, i, jrow, j; 1146 1147 PetscFunctionBegin; 1148 PetscCall(VecGetArrayRead(xx, &x)); 1149 PetscCall(VecGetArray(yy, &y)); 1150 idx = a->j; 1151 v = a->a; 1152 ii = a->i; 1153 1154 for (i = 0; i < m; i++) { 1155 jrow = ii[i]; 1156 n = ii[i + 1] - jrow; 1157 sum1 = 0.0; 1158 sum2 = 0.0; 1159 sum3 = 0.0; 1160 sum4 = 0.0; 1161 sum5 = 0.0; 1162 sum6 = 0.0; 1163 sum7 = 0.0; 1164 sum8 = 0.0; 1165 1166 nonzerorow += (n > 0); 1167 for (j = 0; j < n; j++) { 1168 sum1 += v[jrow] * x[8 * idx[jrow]]; 1169 sum2 += v[jrow] * x[8 * idx[jrow] + 1]; 1170 sum3 += v[jrow] * x[8 * idx[jrow] + 2]; 1171 sum4 += v[jrow] * x[8 * idx[jrow] + 3]; 1172 sum5 += v[jrow] * x[8 * idx[jrow] + 4]; 1173 sum6 += v[jrow] * x[8 * idx[jrow] + 5]; 1174 sum7 += v[jrow] * x[8 * idx[jrow] + 6]; 1175 sum8 += v[jrow] * x[8 * idx[jrow] + 7]; 1176 jrow++; 1177 } 1178 y[8 * i] = sum1; 1179 y[8 * i + 1] = sum2; 1180 y[8 * i + 2] = sum3; 1181 y[8 * i + 3] = sum4; 1182 y[8 * i + 4] = sum5; 1183 y[8 * i + 5] = sum6; 1184 y[8 * i + 6] = sum7; 1185 y[8 * i + 7] = sum8; 1186 } 1187 1188 PetscCall(PetscLogFlops(16.0 * a->nz - 8.0 * nonzerorow)); 1189 PetscCall(VecRestoreArrayRead(xx, &x)); 1190 PetscCall(VecRestoreArray(yy, &y)); 1191 PetscFunctionReturn(0); 1192 } 1193 1194 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy) { 1195 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1196 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1197 const PetscScalar *x, *v; 1198 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 1199 const PetscInt m = b->AIJ->rmap->n, *idx; 1200 PetscInt n, i; 1201 1202 PetscFunctionBegin; 1203 PetscCall(VecSet(yy, 0.0)); 1204 PetscCall(VecGetArrayRead(xx, &x)); 1205 PetscCall(VecGetArray(yy, &y)); 1206 1207 for (i = 0; i < m; i++) { 1208 idx = a->j + a->i[i]; 1209 v = a->a + a->i[i]; 1210 n = a->i[i + 1] - a->i[i]; 1211 alpha1 = x[8 * i]; 1212 alpha2 = x[8 * i + 1]; 1213 alpha3 = x[8 * i + 2]; 1214 alpha4 = x[8 * i + 3]; 1215 alpha5 = x[8 * i + 4]; 1216 alpha6 = x[8 * i + 5]; 1217 alpha7 = x[8 * i + 6]; 1218 alpha8 = x[8 * i + 7]; 1219 while (n-- > 0) { 1220 y[8 * (*idx)] += alpha1 * (*v); 1221 y[8 * (*idx) + 1] += alpha2 * (*v); 1222 y[8 * (*idx) + 2] += alpha3 * (*v); 1223 y[8 * (*idx) + 3] += alpha4 * (*v); 1224 y[8 * (*idx) + 4] += alpha5 * (*v); 1225 y[8 * (*idx) + 5] += alpha6 * (*v); 1226 y[8 * (*idx) + 6] += alpha7 * (*v); 1227 y[8 * (*idx) + 7] += alpha8 * (*v); 1228 idx++; 1229 v++; 1230 } 1231 } 1232 PetscCall(PetscLogFlops(16.0 * a->nz)); 1233 PetscCall(VecRestoreArrayRead(xx, &x)); 1234 PetscCall(VecRestoreArray(yy, &y)); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) { 1239 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1240 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1241 const PetscScalar *x, *v; 1242 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1243 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1244 PetscInt n, i, jrow, j; 1245 1246 PetscFunctionBegin; 1247 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1248 PetscCall(VecGetArrayRead(xx, &x)); 1249 PetscCall(VecGetArray(zz, &y)); 1250 idx = a->j; 1251 v = a->a; 1252 ii = a->i; 1253 1254 for (i = 0; i < m; i++) { 1255 jrow = ii[i]; 1256 n = ii[i + 1] - jrow; 1257 sum1 = 0.0; 1258 sum2 = 0.0; 1259 sum3 = 0.0; 1260 sum4 = 0.0; 1261 sum5 = 0.0; 1262 sum6 = 0.0; 1263 sum7 = 0.0; 1264 sum8 = 0.0; 1265 for (j = 0; j < n; j++) { 1266 sum1 += v[jrow] * x[8 * idx[jrow]]; 1267 sum2 += v[jrow] * x[8 * idx[jrow] + 1]; 1268 sum3 += v[jrow] * x[8 * idx[jrow] + 2]; 1269 sum4 += v[jrow] * x[8 * idx[jrow] + 3]; 1270 sum5 += v[jrow] * x[8 * idx[jrow] + 4]; 1271 sum6 += v[jrow] * x[8 * idx[jrow] + 5]; 1272 sum7 += v[jrow] * x[8 * idx[jrow] + 6]; 1273 sum8 += v[jrow] * x[8 * idx[jrow] + 7]; 1274 jrow++; 1275 } 1276 y[8 * i] += sum1; 1277 y[8 * i + 1] += sum2; 1278 y[8 * i + 2] += sum3; 1279 y[8 * i + 3] += sum4; 1280 y[8 * i + 4] += sum5; 1281 y[8 * i + 5] += sum6; 1282 y[8 * i + 6] += sum7; 1283 y[8 * i + 7] += sum8; 1284 } 1285 1286 PetscCall(PetscLogFlops(16.0 * a->nz)); 1287 PetscCall(VecRestoreArrayRead(xx, &x)); 1288 PetscCall(VecRestoreArray(zz, &y)); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) { 1293 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1294 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1295 const PetscScalar *x, *v; 1296 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 1297 const PetscInt m = b->AIJ->rmap->n, *idx; 1298 PetscInt n, i; 1299 1300 PetscFunctionBegin; 1301 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1302 PetscCall(VecGetArrayRead(xx, &x)); 1303 PetscCall(VecGetArray(zz, &y)); 1304 for (i = 0; i < m; i++) { 1305 idx = a->j + a->i[i]; 1306 v = a->a + a->i[i]; 1307 n = a->i[i + 1] - a->i[i]; 1308 alpha1 = x[8 * i]; 1309 alpha2 = x[8 * i + 1]; 1310 alpha3 = x[8 * i + 2]; 1311 alpha4 = x[8 * i + 3]; 1312 alpha5 = x[8 * i + 4]; 1313 alpha6 = x[8 * i + 5]; 1314 alpha7 = x[8 * i + 6]; 1315 alpha8 = x[8 * i + 7]; 1316 while (n-- > 0) { 1317 y[8 * (*idx)] += alpha1 * (*v); 1318 y[8 * (*idx) + 1] += alpha2 * (*v); 1319 y[8 * (*idx) + 2] += alpha3 * (*v); 1320 y[8 * (*idx) + 3] += alpha4 * (*v); 1321 y[8 * (*idx) + 4] += alpha5 * (*v); 1322 y[8 * (*idx) + 5] += alpha6 * (*v); 1323 y[8 * (*idx) + 6] += alpha7 * (*v); 1324 y[8 * (*idx) + 7] += alpha8 * (*v); 1325 idx++; 1326 v++; 1327 } 1328 } 1329 PetscCall(PetscLogFlops(16.0 * a->nz)); 1330 PetscCall(VecRestoreArrayRead(xx, &x)); 1331 PetscCall(VecRestoreArray(zz, &y)); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 /* ------------------------------------------------------------------------------*/ 1336 PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy) { 1337 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1338 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1339 const PetscScalar *x, *v; 1340 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1341 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1342 PetscInt nonzerorow = 0, n, i, jrow, j; 1343 1344 PetscFunctionBegin; 1345 PetscCall(VecGetArrayRead(xx, &x)); 1346 PetscCall(VecGetArray(yy, &y)); 1347 idx = a->j; 1348 v = a->a; 1349 ii = a->i; 1350 1351 for (i = 0; i < m; i++) { 1352 jrow = ii[i]; 1353 n = ii[i + 1] - jrow; 1354 sum1 = 0.0; 1355 sum2 = 0.0; 1356 sum3 = 0.0; 1357 sum4 = 0.0; 1358 sum5 = 0.0; 1359 sum6 = 0.0; 1360 sum7 = 0.0; 1361 sum8 = 0.0; 1362 sum9 = 0.0; 1363 1364 nonzerorow += (n > 0); 1365 for (j = 0; j < n; j++) { 1366 sum1 += v[jrow] * x[9 * idx[jrow]]; 1367 sum2 += v[jrow] * x[9 * idx[jrow] + 1]; 1368 sum3 += v[jrow] * x[9 * idx[jrow] + 2]; 1369 sum4 += v[jrow] * x[9 * idx[jrow] + 3]; 1370 sum5 += v[jrow] * x[9 * idx[jrow] + 4]; 1371 sum6 += v[jrow] * x[9 * idx[jrow] + 5]; 1372 sum7 += v[jrow] * x[9 * idx[jrow] + 6]; 1373 sum8 += v[jrow] * x[9 * idx[jrow] + 7]; 1374 sum9 += v[jrow] * x[9 * idx[jrow] + 8]; 1375 jrow++; 1376 } 1377 y[9 * i] = sum1; 1378 y[9 * i + 1] = sum2; 1379 y[9 * i + 2] = sum3; 1380 y[9 * i + 3] = sum4; 1381 y[9 * i + 4] = sum5; 1382 y[9 * i + 5] = sum6; 1383 y[9 * i + 6] = sum7; 1384 y[9 * i + 7] = sum8; 1385 y[9 * i + 8] = sum9; 1386 } 1387 1388 PetscCall(PetscLogFlops(18.0 * a->nz - 9 * nonzerorow)); 1389 PetscCall(VecRestoreArrayRead(xx, &x)); 1390 PetscCall(VecRestoreArray(yy, &y)); 1391 PetscFunctionReturn(0); 1392 } 1393 1394 /* ------------------------------------------------------------------------------*/ 1395 1396 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy) { 1397 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1398 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1399 const PetscScalar *x, *v; 1400 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9; 1401 const PetscInt m = b->AIJ->rmap->n, *idx; 1402 PetscInt n, i; 1403 1404 PetscFunctionBegin; 1405 PetscCall(VecSet(yy, 0.0)); 1406 PetscCall(VecGetArrayRead(xx, &x)); 1407 PetscCall(VecGetArray(yy, &y)); 1408 1409 for (i = 0; i < m; i++) { 1410 idx = a->j + a->i[i]; 1411 v = a->a + a->i[i]; 1412 n = a->i[i + 1] - a->i[i]; 1413 alpha1 = x[9 * i]; 1414 alpha2 = x[9 * i + 1]; 1415 alpha3 = x[9 * i + 2]; 1416 alpha4 = x[9 * i + 3]; 1417 alpha5 = x[9 * i + 4]; 1418 alpha6 = x[9 * i + 5]; 1419 alpha7 = x[9 * i + 6]; 1420 alpha8 = x[9 * i + 7]; 1421 alpha9 = x[9 * i + 8]; 1422 while (n-- > 0) { 1423 y[9 * (*idx)] += alpha1 * (*v); 1424 y[9 * (*idx) + 1] += alpha2 * (*v); 1425 y[9 * (*idx) + 2] += alpha3 * (*v); 1426 y[9 * (*idx) + 3] += alpha4 * (*v); 1427 y[9 * (*idx) + 4] += alpha5 * (*v); 1428 y[9 * (*idx) + 5] += alpha6 * (*v); 1429 y[9 * (*idx) + 6] += alpha7 * (*v); 1430 y[9 * (*idx) + 7] += alpha8 * (*v); 1431 y[9 * (*idx) + 8] += alpha9 * (*v); 1432 idx++; 1433 v++; 1434 } 1435 } 1436 PetscCall(PetscLogFlops(18.0 * a->nz)); 1437 PetscCall(VecRestoreArrayRead(xx, &x)); 1438 PetscCall(VecRestoreArray(yy, &y)); 1439 PetscFunctionReturn(0); 1440 } 1441 1442 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) { 1443 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1444 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1445 const PetscScalar *x, *v; 1446 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1447 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1448 PetscInt n, i, jrow, j; 1449 1450 PetscFunctionBegin; 1451 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1452 PetscCall(VecGetArrayRead(xx, &x)); 1453 PetscCall(VecGetArray(zz, &y)); 1454 idx = a->j; 1455 v = a->a; 1456 ii = a->i; 1457 1458 for (i = 0; i < m; i++) { 1459 jrow = ii[i]; 1460 n = ii[i + 1] - jrow; 1461 sum1 = 0.0; 1462 sum2 = 0.0; 1463 sum3 = 0.0; 1464 sum4 = 0.0; 1465 sum5 = 0.0; 1466 sum6 = 0.0; 1467 sum7 = 0.0; 1468 sum8 = 0.0; 1469 sum9 = 0.0; 1470 for (j = 0; j < n; j++) { 1471 sum1 += v[jrow] * x[9 * idx[jrow]]; 1472 sum2 += v[jrow] * x[9 * idx[jrow] + 1]; 1473 sum3 += v[jrow] * x[9 * idx[jrow] + 2]; 1474 sum4 += v[jrow] * x[9 * idx[jrow] + 3]; 1475 sum5 += v[jrow] * x[9 * idx[jrow] + 4]; 1476 sum6 += v[jrow] * x[9 * idx[jrow] + 5]; 1477 sum7 += v[jrow] * x[9 * idx[jrow] + 6]; 1478 sum8 += v[jrow] * x[9 * idx[jrow] + 7]; 1479 sum9 += v[jrow] * x[9 * idx[jrow] + 8]; 1480 jrow++; 1481 } 1482 y[9 * i] += sum1; 1483 y[9 * i + 1] += sum2; 1484 y[9 * i + 2] += sum3; 1485 y[9 * i + 3] += sum4; 1486 y[9 * i + 4] += sum5; 1487 y[9 * i + 5] += sum6; 1488 y[9 * i + 6] += sum7; 1489 y[9 * i + 7] += sum8; 1490 y[9 * i + 8] += sum9; 1491 } 1492 1493 PetscCall(PetscLogFlops(18.0 * a->nz)); 1494 PetscCall(VecRestoreArrayRead(xx, &x)); 1495 PetscCall(VecRestoreArray(zz, &y)); 1496 PetscFunctionReturn(0); 1497 } 1498 1499 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) { 1500 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1501 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1502 const PetscScalar *x, *v; 1503 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9; 1504 const PetscInt m = b->AIJ->rmap->n, *idx; 1505 PetscInt n, i; 1506 1507 PetscFunctionBegin; 1508 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1509 PetscCall(VecGetArrayRead(xx, &x)); 1510 PetscCall(VecGetArray(zz, &y)); 1511 for (i = 0; i < m; i++) { 1512 idx = a->j + a->i[i]; 1513 v = a->a + a->i[i]; 1514 n = a->i[i + 1] - a->i[i]; 1515 alpha1 = x[9 * i]; 1516 alpha2 = x[9 * i + 1]; 1517 alpha3 = x[9 * i + 2]; 1518 alpha4 = x[9 * i + 3]; 1519 alpha5 = x[9 * i + 4]; 1520 alpha6 = x[9 * i + 5]; 1521 alpha7 = x[9 * i + 6]; 1522 alpha8 = x[9 * i + 7]; 1523 alpha9 = x[9 * i + 8]; 1524 while (n-- > 0) { 1525 y[9 * (*idx)] += alpha1 * (*v); 1526 y[9 * (*idx) + 1] += alpha2 * (*v); 1527 y[9 * (*idx) + 2] += alpha3 * (*v); 1528 y[9 * (*idx) + 3] += alpha4 * (*v); 1529 y[9 * (*idx) + 4] += alpha5 * (*v); 1530 y[9 * (*idx) + 5] += alpha6 * (*v); 1531 y[9 * (*idx) + 6] += alpha7 * (*v); 1532 y[9 * (*idx) + 7] += alpha8 * (*v); 1533 y[9 * (*idx) + 8] += alpha9 * (*v); 1534 idx++; 1535 v++; 1536 } 1537 } 1538 PetscCall(PetscLogFlops(18.0 * a->nz)); 1539 PetscCall(VecRestoreArrayRead(xx, &x)); 1540 PetscCall(VecRestoreArray(zz, &y)); 1541 PetscFunctionReturn(0); 1542 } 1543 PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy) { 1544 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1545 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1546 const PetscScalar *x, *v; 1547 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1548 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1549 PetscInt nonzerorow = 0, n, i, jrow, j; 1550 1551 PetscFunctionBegin; 1552 PetscCall(VecGetArrayRead(xx, &x)); 1553 PetscCall(VecGetArray(yy, &y)); 1554 idx = a->j; 1555 v = a->a; 1556 ii = a->i; 1557 1558 for (i = 0; i < m; i++) { 1559 jrow = ii[i]; 1560 n = ii[i + 1] - jrow; 1561 sum1 = 0.0; 1562 sum2 = 0.0; 1563 sum3 = 0.0; 1564 sum4 = 0.0; 1565 sum5 = 0.0; 1566 sum6 = 0.0; 1567 sum7 = 0.0; 1568 sum8 = 0.0; 1569 sum9 = 0.0; 1570 sum10 = 0.0; 1571 1572 nonzerorow += (n > 0); 1573 for (j = 0; j < n; j++) { 1574 sum1 += v[jrow] * x[10 * idx[jrow]]; 1575 sum2 += v[jrow] * x[10 * idx[jrow] + 1]; 1576 sum3 += v[jrow] * x[10 * idx[jrow] + 2]; 1577 sum4 += v[jrow] * x[10 * idx[jrow] + 3]; 1578 sum5 += v[jrow] * x[10 * idx[jrow] + 4]; 1579 sum6 += v[jrow] * x[10 * idx[jrow] + 5]; 1580 sum7 += v[jrow] * x[10 * idx[jrow] + 6]; 1581 sum8 += v[jrow] * x[10 * idx[jrow] + 7]; 1582 sum9 += v[jrow] * x[10 * idx[jrow] + 8]; 1583 sum10 += v[jrow] * x[10 * idx[jrow] + 9]; 1584 jrow++; 1585 } 1586 y[10 * i] = sum1; 1587 y[10 * i + 1] = sum2; 1588 y[10 * i + 2] = sum3; 1589 y[10 * i + 3] = sum4; 1590 y[10 * i + 4] = sum5; 1591 y[10 * i + 5] = sum6; 1592 y[10 * i + 6] = sum7; 1593 y[10 * i + 7] = sum8; 1594 y[10 * i + 8] = sum9; 1595 y[10 * i + 9] = sum10; 1596 } 1597 1598 PetscCall(PetscLogFlops(20.0 * a->nz - 10.0 * nonzerorow)); 1599 PetscCall(VecRestoreArrayRead(xx, &x)); 1600 PetscCall(VecRestoreArray(yy, &y)); 1601 PetscFunctionReturn(0); 1602 } 1603 1604 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) { 1605 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1606 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1607 const PetscScalar *x, *v; 1608 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1609 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1610 PetscInt n, i, jrow, j; 1611 1612 PetscFunctionBegin; 1613 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1614 PetscCall(VecGetArrayRead(xx, &x)); 1615 PetscCall(VecGetArray(zz, &y)); 1616 idx = a->j; 1617 v = a->a; 1618 ii = a->i; 1619 1620 for (i = 0; i < m; i++) { 1621 jrow = ii[i]; 1622 n = ii[i + 1] - jrow; 1623 sum1 = 0.0; 1624 sum2 = 0.0; 1625 sum3 = 0.0; 1626 sum4 = 0.0; 1627 sum5 = 0.0; 1628 sum6 = 0.0; 1629 sum7 = 0.0; 1630 sum8 = 0.0; 1631 sum9 = 0.0; 1632 sum10 = 0.0; 1633 for (j = 0; j < n; j++) { 1634 sum1 += v[jrow] * x[10 * idx[jrow]]; 1635 sum2 += v[jrow] * x[10 * idx[jrow] + 1]; 1636 sum3 += v[jrow] * x[10 * idx[jrow] + 2]; 1637 sum4 += v[jrow] * x[10 * idx[jrow] + 3]; 1638 sum5 += v[jrow] * x[10 * idx[jrow] + 4]; 1639 sum6 += v[jrow] * x[10 * idx[jrow] + 5]; 1640 sum7 += v[jrow] * x[10 * idx[jrow] + 6]; 1641 sum8 += v[jrow] * x[10 * idx[jrow] + 7]; 1642 sum9 += v[jrow] * x[10 * idx[jrow] + 8]; 1643 sum10 += v[jrow] * x[10 * idx[jrow] + 9]; 1644 jrow++; 1645 } 1646 y[10 * i] += sum1; 1647 y[10 * i + 1] += sum2; 1648 y[10 * i + 2] += sum3; 1649 y[10 * i + 3] += sum4; 1650 y[10 * i + 4] += sum5; 1651 y[10 * i + 5] += sum6; 1652 y[10 * i + 6] += sum7; 1653 y[10 * i + 7] += sum8; 1654 y[10 * i + 8] += sum9; 1655 y[10 * i + 9] += sum10; 1656 } 1657 1658 PetscCall(PetscLogFlops(20.0 * a->nz)); 1659 PetscCall(VecRestoreArrayRead(xx, &x)); 1660 PetscCall(VecRestoreArray(yy, &y)); 1661 PetscFunctionReturn(0); 1662 } 1663 1664 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy) { 1665 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1666 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1667 const PetscScalar *x, *v; 1668 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10; 1669 const PetscInt m = b->AIJ->rmap->n, *idx; 1670 PetscInt n, i; 1671 1672 PetscFunctionBegin; 1673 PetscCall(VecSet(yy, 0.0)); 1674 PetscCall(VecGetArrayRead(xx, &x)); 1675 PetscCall(VecGetArray(yy, &y)); 1676 1677 for (i = 0; i < m; i++) { 1678 idx = a->j + a->i[i]; 1679 v = a->a + a->i[i]; 1680 n = a->i[i + 1] - a->i[i]; 1681 alpha1 = x[10 * i]; 1682 alpha2 = x[10 * i + 1]; 1683 alpha3 = x[10 * i + 2]; 1684 alpha4 = x[10 * i + 3]; 1685 alpha5 = x[10 * i + 4]; 1686 alpha6 = x[10 * i + 5]; 1687 alpha7 = x[10 * i + 6]; 1688 alpha8 = x[10 * i + 7]; 1689 alpha9 = x[10 * i + 8]; 1690 alpha10 = x[10 * i + 9]; 1691 while (n-- > 0) { 1692 y[10 * (*idx)] += alpha1 * (*v); 1693 y[10 * (*idx) + 1] += alpha2 * (*v); 1694 y[10 * (*idx) + 2] += alpha3 * (*v); 1695 y[10 * (*idx) + 3] += alpha4 * (*v); 1696 y[10 * (*idx) + 4] += alpha5 * (*v); 1697 y[10 * (*idx) + 5] += alpha6 * (*v); 1698 y[10 * (*idx) + 6] += alpha7 * (*v); 1699 y[10 * (*idx) + 7] += alpha8 * (*v); 1700 y[10 * (*idx) + 8] += alpha9 * (*v); 1701 y[10 * (*idx) + 9] += alpha10 * (*v); 1702 idx++; 1703 v++; 1704 } 1705 } 1706 PetscCall(PetscLogFlops(20.0 * a->nz)); 1707 PetscCall(VecRestoreArrayRead(xx, &x)); 1708 PetscCall(VecRestoreArray(yy, &y)); 1709 PetscFunctionReturn(0); 1710 } 1711 1712 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) { 1713 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1714 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1715 const PetscScalar *x, *v; 1716 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10; 1717 const PetscInt m = b->AIJ->rmap->n, *idx; 1718 PetscInt n, i; 1719 1720 PetscFunctionBegin; 1721 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1722 PetscCall(VecGetArrayRead(xx, &x)); 1723 PetscCall(VecGetArray(zz, &y)); 1724 for (i = 0; i < m; i++) { 1725 idx = a->j + a->i[i]; 1726 v = a->a + a->i[i]; 1727 n = a->i[i + 1] - a->i[i]; 1728 alpha1 = x[10 * i]; 1729 alpha2 = x[10 * i + 1]; 1730 alpha3 = x[10 * i + 2]; 1731 alpha4 = x[10 * i + 3]; 1732 alpha5 = x[10 * i + 4]; 1733 alpha6 = x[10 * i + 5]; 1734 alpha7 = x[10 * i + 6]; 1735 alpha8 = x[10 * i + 7]; 1736 alpha9 = x[10 * i + 8]; 1737 alpha10 = x[10 * i + 9]; 1738 while (n-- > 0) { 1739 y[10 * (*idx)] += alpha1 * (*v); 1740 y[10 * (*idx) + 1] += alpha2 * (*v); 1741 y[10 * (*idx) + 2] += alpha3 * (*v); 1742 y[10 * (*idx) + 3] += alpha4 * (*v); 1743 y[10 * (*idx) + 4] += alpha5 * (*v); 1744 y[10 * (*idx) + 5] += alpha6 * (*v); 1745 y[10 * (*idx) + 6] += alpha7 * (*v); 1746 y[10 * (*idx) + 7] += alpha8 * (*v); 1747 y[10 * (*idx) + 8] += alpha9 * (*v); 1748 y[10 * (*idx) + 9] += alpha10 * (*v); 1749 idx++; 1750 v++; 1751 } 1752 } 1753 PetscCall(PetscLogFlops(20.0 * a->nz)); 1754 PetscCall(VecRestoreArrayRead(xx, &x)); 1755 PetscCall(VecRestoreArray(zz, &y)); 1756 PetscFunctionReturn(0); 1757 } 1758 1759 /*--------------------------------------------------------------------------------------------*/ 1760 PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy) { 1761 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1762 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1763 const PetscScalar *x, *v; 1764 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1765 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1766 PetscInt nonzerorow = 0, n, i, jrow, j; 1767 1768 PetscFunctionBegin; 1769 PetscCall(VecGetArrayRead(xx, &x)); 1770 PetscCall(VecGetArray(yy, &y)); 1771 idx = a->j; 1772 v = a->a; 1773 ii = a->i; 1774 1775 for (i = 0; i < m; i++) { 1776 jrow = ii[i]; 1777 n = ii[i + 1] - jrow; 1778 sum1 = 0.0; 1779 sum2 = 0.0; 1780 sum3 = 0.0; 1781 sum4 = 0.0; 1782 sum5 = 0.0; 1783 sum6 = 0.0; 1784 sum7 = 0.0; 1785 sum8 = 0.0; 1786 sum9 = 0.0; 1787 sum10 = 0.0; 1788 sum11 = 0.0; 1789 1790 nonzerorow += (n > 0); 1791 for (j = 0; j < n; j++) { 1792 sum1 += v[jrow] * x[11 * idx[jrow]]; 1793 sum2 += v[jrow] * x[11 * idx[jrow] + 1]; 1794 sum3 += v[jrow] * x[11 * idx[jrow] + 2]; 1795 sum4 += v[jrow] * x[11 * idx[jrow] + 3]; 1796 sum5 += v[jrow] * x[11 * idx[jrow] + 4]; 1797 sum6 += v[jrow] * x[11 * idx[jrow] + 5]; 1798 sum7 += v[jrow] * x[11 * idx[jrow] + 6]; 1799 sum8 += v[jrow] * x[11 * idx[jrow] + 7]; 1800 sum9 += v[jrow] * x[11 * idx[jrow] + 8]; 1801 sum10 += v[jrow] * x[11 * idx[jrow] + 9]; 1802 sum11 += v[jrow] * x[11 * idx[jrow] + 10]; 1803 jrow++; 1804 } 1805 y[11 * i] = sum1; 1806 y[11 * i + 1] = sum2; 1807 y[11 * i + 2] = sum3; 1808 y[11 * i + 3] = sum4; 1809 y[11 * i + 4] = sum5; 1810 y[11 * i + 5] = sum6; 1811 y[11 * i + 6] = sum7; 1812 y[11 * i + 7] = sum8; 1813 y[11 * i + 8] = sum9; 1814 y[11 * i + 9] = sum10; 1815 y[11 * i + 10] = sum11; 1816 } 1817 1818 PetscCall(PetscLogFlops(22.0 * a->nz - 11 * nonzerorow)); 1819 PetscCall(VecRestoreArrayRead(xx, &x)); 1820 PetscCall(VecRestoreArray(yy, &y)); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) { 1825 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1826 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1827 const PetscScalar *x, *v; 1828 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; 1829 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1830 PetscInt n, i, jrow, j; 1831 1832 PetscFunctionBegin; 1833 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1834 PetscCall(VecGetArrayRead(xx, &x)); 1835 PetscCall(VecGetArray(zz, &y)); 1836 idx = a->j; 1837 v = a->a; 1838 ii = a->i; 1839 1840 for (i = 0; i < m; i++) { 1841 jrow = ii[i]; 1842 n = ii[i + 1] - jrow; 1843 sum1 = 0.0; 1844 sum2 = 0.0; 1845 sum3 = 0.0; 1846 sum4 = 0.0; 1847 sum5 = 0.0; 1848 sum6 = 0.0; 1849 sum7 = 0.0; 1850 sum8 = 0.0; 1851 sum9 = 0.0; 1852 sum10 = 0.0; 1853 sum11 = 0.0; 1854 for (j = 0; j < n; j++) { 1855 sum1 += v[jrow] * x[11 * idx[jrow]]; 1856 sum2 += v[jrow] * x[11 * idx[jrow] + 1]; 1857 sum3 += v[jrow] * x[11 * idx[jrow] + 2]; 1858 sum4 += v[jrow] * x[11 * idx[jrow] + 3]; 1859 sum5 += v[jrow] * x[11 * idx[jrow] + 4]; 1860 sum6 += v[jrow] * x[11 * idx[jrow] + 5]; 1861 sum7 += v[jrow] * x[11 * idx[jrow] + 6]; 1862 sum8 += v[jrow] * x[11 * idx[jrow] + 7]; 1863 sum9 += v[jrow] * x[11 * idx[jrow] + 8]; 1864 sum10 += v[jrow] * x[11 * idx[jrow] + 9]; 1865 sum11 += v[jrow] * x[11 * idx[jrow] + 10]; 1866 jrow++; 1867 } 1868 y[11 * i] += sum1; 1869 y[11 * i + 1] += sum2; 1870 y[11 * i + 2] += sum3; 1871 y[11 * i + 3] += sum4; 1872 y[11 * i + 4] += sum5; 1873 y[11 * i + 5] += sum6; 1874 y[11 * i + 6] += sum7; 1875 y[11 * i + 7] += sum8; 1876 y[11 * i + 8] += sum9; 1877 y[11 * i + 9] += sum10; 1878 y[11 * i + 10] += sum11; 1879 } 1880 1881 PetscCall(PetscLogFlops(22.0 * a->nz)); 1882 PetscCall(VecRestoreArrayRead(xx, &x)); 1883 PetscCall(VecRestoreArray(yy, &y)); 1884 PetscFunctionReturn(0); 1885 } 1886 1887 PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy) { 1888 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1889 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1890 const PetscScalar *x, *v; 1891 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11; 1892 const PetscInt m = b->AIJ->rmap->n, *idx; 1893 PetscInt n, i; 1894 1895 PetscFunctionBegin; 1896 PetscCall(VecSet(yy, 0.0)); 1897 PetscCall(VecGetArrayRead(xx, &x)); 1898 PetscCall(VecGetArray(yy, &y)); 1899 1900 for (i = 0; i < m; i++) { 1901 idx = a->j + a->i[i]; 1902 v = a->a + a->i[i]; 1903 n = a->i[i + 1] - a->i[i]; 1904 alpha1 = x[11 * i]; 1905 alpha2 = x[11 * i + 1]; 1906 alpha3 = x[11 * i + 2]; 1907 alpha4 = x[11 * i + 3]; 1908 alpha5 = x[11 * i + 4]; 1909 alpha6 = x[11 * i + 5]; 1910 alpha7 = x[11 * i + 6]; 1911 alpha8 = x[11 * i + 7]; 1912 alpha9 = x[11 * i + 8]; 1913 alpha10 = x[11 * i + 9]; 1914 alpha11 = x[11 * i + 10]; 1915 while (n-- > 0) { 1916 y[11 * (*idx)] += alpha1 * (*v); 1917 y[11 * (*idx) + 1] += alpha2 * (*v); 1918 y[11 * (*idx) + 2] += alpha3 * (*v); 1919 y[11 * (*idx) + 3] += alpha4 * (*v); 1920 y[11 * (*idx) + 4] += alpha5 * (*v); 1921 y[11 * (*idx) + 5] += alpha6 * (*v); 1922 y[11 * (*idx) + 6] += alpha7 * (*v); 1923 y[11 * (*idx) + 7] += alpha8 * (*v); 1924 y[11 * (*idx) + 8] += alpha9 * (*v); 1925 y[11 * (*idx) + 9] += alpha10 * (*v); 1926 y[11 * (*idx) + 10] += alpha11 * (*v); 1927 idx++; 1928 v++; 1929 } 1930 } 1931 PetscCall(PetscLogFlops(22.0 * a->nz)); 1932 PetscCall(VecRestoreArrayRead(xx, &x)); 1933 PetscCall(VecRestoreArray(yy, &y)); 1934 PetscFunctionReturn(0); 1935 } 1936 1937 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) { 1938 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1939 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1940 const PetscScalar *x, *v; 1941 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11; 1942 const PetscInt m = b->AIJ->rmap->n, *idx; 1943 PetscInt n, i; 1944 1945 PetscFunctionBegin; 1946 if (yy != zz) PetscCall(VecCopy(yy, zz)); 1947 PetscCall(VecGetArrayRead(xx, &x)); 1948 PetscCall(VecGetArray(zz, &y)); 1949 for (i = 0; i < m; i++) { 1950 idx = a->j + a->i[i]; 1951 v = a->a + a->i[i]; 1952 n = a->i[i + 1] - a->i[i]; 1953 alpha1 = x[11 * i]; 1954 alpha2 = x[11 * i + 1]; 1955 alpha3 = x[11 * i + 2]; 1956 alpha4 = x[11 * i + 3]; 1957 alpha5 = x[11 * i + 4]; 1958 alpha6 = x[11 * i + 5]; 1959 alpha7 = x[11 * i + 6]; 1960 alpha8 = x[11 * i + 7]; 1961 alpha9 = x[11 * i + 8]; 1962 alpha10 = x[11 * i + 9]; 1963 alpha11 = x[11 * i + 10]; 1964 while (n-- > 0) { 1965 y[11 * (*idx)] += alpha1 * (*v); 1966 y[11 * (*idx) + 1] += alpha2 * (*v); 1967 y[11 * (*idx) + 2] += alpha3 * (*v); 1968 y[11 * (*idx) + 3] += alpha4 * (*v); 1969 y[11 * (*idx) + 4] += alpha5 * (*v); 1970 y[11 * (*idx) + 5] += alpha6 * (*v); 1971 y[11 * (*idx) + 6] += alpha7 * (*v); 1972 y[11 * (*idx) + 7] += alpha8 * (*v); 1973 y[11 * (*idx) + 8] += alpha9 * (*v); 1974 y[11 * (*idx) + 9] += alpha10 * (*v); 1975 y[11 * (*idx) + 10] += alpha11 * (*v); 1976 idx++; 1977 v++; 1978 } 1979 } 1980 PetscCall(PetscLogFlops(22.0 * a->nz)); 1981 PetscCall(VecRestoreArrayRead(xx, &x)); 1982 PetscCall(VecRestoreArray(zz, &y)); 1983 PetscFunctionReturn(0); 1984 } 1985 1986 /*--------------------------------------------------------------------------------------------*/ 1987 PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy) { 1988 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 1989 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 1990 const PetscScalar *x, *v; 1991 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1992 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1993 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 1994 PetscInt nonzerorow = 0, n, i, jrow, j; 1995 1996 PetscFunctionBegin; 1997 PetscCall(VecGetArrayRead(xx, &x)); 1998 PetscCall(VecGetArray(yy, &y)); 1999 idx = a->j; 2000 v = a->a; 2001 ii = a->i; 2002 2003 for (i = 0; i < m; i++) { 2004 jrow = ii[i]; 2005 n = ii[i + 1] - jrow; 2006 sum1 = 0.0; 2007 sum2 = 0.0; 2008 sum3 = 0.0; 2009 sum4 = 0.0; 2010 sum5 = 0.0; 2011 sum6 = 0.0; 2012 sum7 = 0.0; 2013 sum8 = 0.0; 2014 sum9 = 0.0; 2015 sum10 = 0.0; 2016 sum11 = 0.0; 2017 sum12 = 0.0; 2018 sum13 = 0.0; 2019 sum14 = 0.0; 2020 sum15 = 0.0; 2021 sum16 = 0.0; 2022 2023 nonzerorow += (n > 0); 2024 for (j = 0; j < n; j++) { 2025 sum1 += v[jrow] * x[16 * idx[jrow]]; 2026 sum2 += v[jrow] * x[16 * idx[jrow] + 1]; 2027 sum3 += v[jrow] * x[16 * idx[jrow] + 2]; 2028 sum4 += v[jrow] * x[16 * idx[jrow] + 3]; 2029 sum5 += v[jrow] * x[16 * idx[jrow] + 4]; 2030 sum6 += v[jrow] * x[16 * idx[jrow] + 5]; 2031 sum7 += v[jrow] * x[16 * idx[jrow] + 6]; 2032 sum8 += v[jrow] * x[16 * idx[jrow] + 7]; 2033 sum9 += v[jrow] * x[16 * idx[jrow] + 8]; 2034 sum10 += v[jrow] * x[16 * idx[jrow] + 9]; 2035 sum11 += v[jrow] * x[16 * idx[jrow] + 10]; 2036 sum12 += v[jrow] * x[16 * idx[jrow] + 11]; 2037 sum13 += v[jrow] * x[16 * idx[jrow] + 12]; 2038 sum14 += v[jrow] * x[16 * idx[jrow] + 13]; 2039 sum15 += v[jrow] * x[16 * idx[jrow] + 14]; 2040 sum16 += v[jrow] * x[16 * idx[jrow] + 15]; 2041 jrow++; 2042 } 2043 y[16 * i] = sum1; 2044 y[16 * i + 1] = sum2; 2045 y[16 * i + 2] = sum3; 2046 y[16 * i + 3] = sum4; 2047 y[16 * i + 4] = sum5; 2048 y[16 * i + 5] = sum6; 2049 y[16 * i + 6] = sum7; 2050 y[16 * i + 7] = sum8; 2051 y[16 * i + 8] = sum9; 2052 y[16 * i + 9] = sum10; 2053 y[16 * i + 10] = sum11; 2054 y[16 * i + 11] = sum12; 2055 y[16 * i + 12] = sum13; 2056 y[16 * i + 13] = sum14; 2057 y[16 * i + 14] = sum15; 2058 y[16 * i + 15] = sum16; 2059 } 2060 2061 PetscCall(PetscLogFlops(32.0 * a->nz - 16.0 * nonzerorow)); 2062 PetscCall(VecRestoreArrayRead(xx, &x)); 2063 PetscCall(VecRestoreArray(yy, &y)); 2064 PetscFunctionReturn(0); 2065 } 2066 2067 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy) { 2068 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2069 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2070 const PetscScalar *x, *v; 2071 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2072 PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16; 2073 const PetscInt m = b->AIJ->rmap->n, *idx; 2074 PetscInt n, i; 2075 2076 PetscFunctionBegin; 2077 PetscCall(VecSet(yy, 0.0)); 2078 PetscCall(VecGetArrayRead(xx, &x)); 2079 PetscCall(VecGetArray(yy, &y)); 2080 2081 for (i = 0; i < m; i++) { 2082 idx = a->j + a->i[i]; 2083 v = a->a + a->i[i]; 2084 n = a->i[i + 1] - a->i[i]; 2085 alpha1 = x[16 * i]; 2086 alpha2 = x[16 * i + 1]; 2087 alpha3 = x[16 * i + 2]; 2088 alpha4 = x[16 * i + 3]; 2089 alpha5 = x[16 * i + 4]; 2090 alpha6 = x[16 * i + 5]; 2091 alpha7 = x[16 * i + 6]; 2092 alpha8 = x[16 * i + 7]; 2093 alpha9 = x[16 * i + 8]; 2094 alpha10 = x[16 * i + 9]; 2095 alpha11 = x[16 * i + 10]; 2096 alpha12 = x[16 * i + 11]; 2097 alpha13 = x[16 * i + 12]; 2098 alpha14 = x[16 * i + 13]; 2099 alpha15 = x[16 * i + 14]; 2100 alpha16 = x[16 * i + 15]; 2101 while (n-- > 0) { 2102 y[16 * (*idx)] += alpha1 * (*v); 2103 y[16 * (*idx) + 1] += alpha2 * (*v); 2104 y[16 * (*idx) + 2] += alpha3 * (*v); 2105 y[16 * (*idx) + 3] += alpha4 * (*v); 2106 y[16 * (*idx) + 4] += alpha5 * (*v); 2107 y[16 * (*idx) + 5] += alpha6 * (*v); 2108 y[16 * (*idx) + 6] += alpha7 * (*v); 2109 y[16 * (*idx) + 7] += alpha8 * (*v); 2110 y[16 * (*idx) + 8] += alpha9 * (*v); 2111 y[16 * (*idx) + 9] += alpha10 * (*v); 2112 y[16 * (*idx) + 10] += alpha11 * (*v); 2113 y[16 * (*idx) + 11] += alpha12 * (*v); 2114 y[16 * (*idx) + 12] += alpha13 * (*v); 2115 y[16 * (*idx) + 13] += alpha14 * (*v); 2116 y[16 * (*idx) + 14] += alpha15 * (*v); 2117 y[16 * (*idx) + 15] += alpha16 * (*v); 2118 idx++; 2119 v++; 2120 } 2121 } 2122 PetscCall(PetscLogFlops(32.0 * a->nz)); 2123 PetscCall(VecRestoreArrayRead(xx, &x)); 2124 PetscCall(VecRestoreArray(yy, &y)); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) { 2129 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2130 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2131 const PetscScalar *x, *v; 2132 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2133 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 2134 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2135 PetscInt n, i, jrow, j; 2136 2137 PetscFunctionBegin; 2138 if (yy != zz) PetscCall(VecCopy(yy, zz)); 2139 PetscCall(VecGetArrayRead(xx, &x)); 2140 PetscCall(VecGetArray(zz, &y)); 2141 idx = a->j; 2142 v = a->a; 2143 ii = a->i; 2144 2145 for (i = 0; i < m; i++) { 2146 jrow = ii[i]; 2147 n = ii[i + 1] - jrow; 2148 sum1 = 0.0; 2149 sum2 = 0.0; 2150 sum3 = 0.0; 2151 sum4 = 0.0; 2152 sum5 = 0.0; 2153 sum6 = 0.0; 2154 sum7 = 0.0; 2155 sum8 = 0.0; 2156 sum9 = 0.0; 2157 sum10 = 0.0; 2158 sum11 = 0.0; 2159 sum12 = 0.0; 2160 sum13 = 0.0; 2161 sum14 = 0.0; 2162 sum15 = 0.0; 2163 sum16 = 0.0; 2164 for (j = 0; j < n; j++) { 2165 sum1 += v[jrow] * x[16 * idx[jrow]]; 2166 sum2 += v[jrow] * x[16 * idx[jrow] + 1]; 2167 sum3 += v[jrow] * x[16 * idx[jrow] + 2]; 2168 sum4 += v[jrow] * x[16 * idx[jrow] + 3]; 2169 sum5 += v[jrow] * x[16 * idx[jrow] + 4]; 2170 sum6 += v[jrow] * x[16 * idx[jrow] + 5]; 2171 sum7 += v[jrow] * x[16 * idx[jrow] + 6]; 2172 sum8 += v[jrow] * x[16 * idx[jrow] + 7]; 2173 sum9 += v[jrow] * x[16 * idx[jrow] + 8]; 2174 sum10 += v[jrow] * x[16 * idx[jrow] + 9]; 2175 sum11 += v[jrow] * x[16 * idx[jrow] + 10]; 2176 sum12 += v[jrow] * x[16 * idx[jrow] + 11]; 2177 sum13 += v[jrow] * x[16 * idx[jrow] + 12]; 2178 sum14 += v[jrow] * x[16 * idx[jrow] + 13]; 2179 sum15 += v[jrow] * x[16 * idx[jrow] + 14]; 2180 sum16 += v[jrow] * x[16 * idx[jrow] + 15]; 2181 jrow++; 2182 } 2183 y[16 * i] += sum1; 2184 y[16 * i + 1] += sum2; 2185 y[16 * i + 2] += sum3; 2186 y[16 * i + 3] += sum4; 2187 y[16 * i + 4] += sum5; 2188 y[16 * i + 5] += sum6; 2189 y[16 * i + 6] += sum7; 2190 y[16 * i + 7] += sum8; 2191 y[16 * i + 8] += sum9; 2192 y[16 * i + 9] += sum10; 2193 y[16 * i + 10] += sum11; 2194 y[16 * i + 11] += sum12; 2195 y[16 * i + 12] += sum13; 2196 y[16 * i + 13] += sum14; 2197 y[16 * i + 14] += sum15; 2198 y[16 * i + 15] += sum16; 2199 } 2200 2201 PetscCall(PetscLogFlops(32.0 * a->nz)); 2202 PetscCall(VecRestoreArrayRead(xx, &x)); 2203 PetscCall(VecRestoreArray(zz, &y)); 2204 PetscFunctionReturn(0); 2205 } 2206 2207 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) { 2208 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2209 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2210 const PetscScalar *x, *v; 2211 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2212 PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16; 2213 const PetscInt m = b->AIJ->rmap->n, *idx; 2214 PetscInt n, i; 2215 2216 PetscFunctionBegin; 2217 if (yy != zz) PetscCall(VecCopy(yy, zz)); 2218 PetscCall(VecGetArrayRead(xx, &x)); 2219 PetscCall(VecGetArray(zz, &y)); 2220 for (i = 0; i < m; i++) { 2221 idx = a->j + a->i[i]; 2222 v = a->a + a->i[i]; 2223 n = a->i[i + 1] - a->i[i]; 2224 alpha1 = x[16 * i]; 2225 alpha2 = x[16 * i + 1]; 2226 alpha3 = x[16 * i + 2]; 2227 alpha4 = x[16 * i + 3]; 2228 alpha5 = x[16 * i + 4]; 2229 alpha6 = x[16 * i + 5]; 2230 alpha7 = x[16 * i + 6]; 2231 alpha8 = x[16 * i + 7]; 2232 alpha9 = x[16 * i + 8]; 2233 alpha10 = x[16 * i + 9]; 2234 alpha11 = x[16 * i + 10]; 2235 alpha12 = x[16 * i + 11]; 2236 alpha13 = x[16 * i + 12]; 2237 alpha14 = x[16 * i + 13]; 2238 alpha15 = x[16 * i + 14]; 2239 alpha16 = x[16 * i + 15]; 2240 while (n-- > 0) { 2241 y[16 * (*idx)] += alpha1 * (*v); 2242 y[16 * (*idx) + 1] += alpha2 * (*v); 2243 y[16 * (*idx) + 2] += alpha3 * (*v); 2244 y[16 * (*idx) + 3] += alpha4 * (*v); 2245 y[16 * (*idx) + 4] += alpha5 * (*v); 2246 y[16 * (*idx) + 5] += alpha6 * (*v); 2247 y[16 * (*idx) + 6] += alpha7 * (*v); 2248 y[16 * (*idx) + 7] += alpha8 * (*v); 2249 y[16 * (*idx) + 8] += alpha9 * (*v); 2250 y[16 * (*idx) + 9] += alpha10 * (*v); 2251 y[16 * (*idx) + 10] += alpha11 * (*v); 2252 y[16 * (*idx) + 11] += alpha12 * (*v); 2253 y[16 * (*idx) + 12] += alpha13 * (*v); 2254 y[16 * (*idx) + 13] += alpha14 * (*v); 2255 y[16 * (*idx) + 14] += alpha15 * (*v); 2256 y[16 * (*idx) + 15] += alpha16 * (*v); 2257 idx++; 2258 v++; 2259 } 2260 } 2261 PetscCall(PetscLogFlops(32.0 * a->nz)); 2262 PetscCall(VecRestoreArrayRead(xx, &x)); 2263 PetscCall(VecRestoreArray(zz, &y)); 2264 PetscFunctionReturn(0); 2265 } 2266 2267 /*--------------------------------------------------------------------------------------------*/ 2268 PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy) { 2269 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2270 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2271 const PetscScalar *x, *v; 2272 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2273 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2274 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2275 PetscInt nonzerorow = 0, n, i, jrow, j; 2276 2277 PetscFunctionBegin; 2278 PetscCall(VecGetArrayRead(xx, &x)); 2279 PetscCall(VecGetArray(yy, &y)); 2280 idx = a->j; 2281 v = a->a; 2282 ii = a->i; 2283 2284 for (i = 0; i < m; i++) { 2285 jrow = ii[i]; 2286 n = ii[i + 1] - jrow; 2287 sum1 = 0.0; 2288 sum2 = 0.0; 2289 sum3 = 0.0; 2290 sum4 = 0.0; 2291 sum5 = 0.0; 2292 sum6 = 0.0; 2293 sum7 = 0.0; 2294 sum8 = 0.0; 2295 sum9 = 0.0; 2296 sum10 = 0.0; 2297 sum11 = 0.0; 2298 sum12 = 0.0; 2299 sum13 = 0.0; 2300 sum14 = 0.0; 2301 sum15 = 0.0; 2302 sum16 = 0.0; 2303 sum17 = 0.0; 2304 sum18 = 0.0; 2305 2306 nonzerorow += (n > 0); 2307 for (j = 0; j < n; j++) { 2308 sum1 += v[jrow] * x[18 * idx[jrow]]; 2309 sum2 += v[jrow] * x[18 * idx[jrow] + 1]; 2310 sum3 += v[jrow] * x[18 * idx[jrow] + 2]; 2311 sum4 += v[jrow] * x[18 * idx[jrow] + 3]; 2312 sum5 += v[jrow] * x[18 * idx[jrow] + 4]; 2313 sum6 += v[jrow] * x[18 * idx[jrow] + 5]; 2314 sum7 += v[jrow] * x[18 * idx[jrow] + 6]; 2315 sum8 += v[jrow] * x[18 * idx[jrow] + 7]; 2316 sum9 += v[jrow] * x[18 * idx[jrow] + 8]; 2317 sum10 += v[jrow] * x[18 * idx[jrow] + 9]; 2318 sum11 += v[jrow] * x[18 * idx[jrow] + 10]; 2319 sum12 += v[jrow] * x[18 * idx[jrow] + 11]; 2320 sum13 += v[jrow] * x[18 * idx[jrow] + 12]; 2321 sum14 += v[jrow] * x[18 * idx[jrow] + 13]; 2322 sum15 += v[jrow] * x[18 * idx[jrow] + 14]; 2323 sum16 += v[jrow] * x[18 * idx[jrow] + 15]; 2324 sum17 += v[jrow] * x[18 * idx[jrow] + 16]; 2325 sum18 += v[jrow] * x[18 * idx[jrow] + 17]; 2326 jrow++; 2327 } 2328 y[18 * i] = sum1; 2329 y[18 * i + 1] = sum2; 2330 y[18 * i + 2] = sum3; 2331 y[18 * i + 3] = sum4; 2332 y[18 * i + 4] = sum5; 2333 y[18 * i + 5] = sum6; 2334 y[18 * i + 6] = sum7; 2335 y[18 * i + 7] = sum8; 2336 y[18 * i + 8] = sum9; 2337 y[18 * i + 9] = sum10; 2338 y[18 * i + 10] = sum11; 2339 y[18 * i + 11] = sum12; 2340 y[18 * i + 12] = sum13; 2341 y[18 * i + 13] = sum14; 2342 y[18 * i + 14] = sum15; 2343 y[18 * i + 15] = sum16; 2344 y[18 * i + 16] = sum17; 2345 y[18 * i + 17] = sum18; 2346 } 2347 2348 PetscCall(PetscLogFlops(36.0 * a->nz - 18.0 * nonzerorow)); 2349 PetscCall(VecRestoreArrayRead(xx, &x)); 2350 PetscCall(VecRestoreArray(yy, &y)); 2351 PetscFunctionReturn(0); 2352 } 2353 2354 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy) { 2355 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2356 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2357 const PetscScalar *x, *v; 2358 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2359 PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18; 2360 const PetscInt m = b->AIJ->rmap->n, *idx; 2361 PetscInt n, i; 2362 2363 PetscFunctionBegin; 2364 PetscCall(VecSet(yy, 0.0)); 2365 PetscCall(VecGetArrayRead(xx, &x)); 2366 PetscCall(VecGetArray(yy, &y)); 2367 2368 for (i = 0; i < m; i++) { 2369 idx = a->j + a->i[i]; 2370 v = a->a + a->i[i]; 2371 n = a->i[i + 1] - a->i[i]; 2372 alpha1 = x[18 * i]; 2373 alpha2 = x[18 * i + 1]; 2374 alpha3 = x[18 * i + 2]; 2375 alpha4 = x[18 * i + 3]; 2376 alpha5 = x[18 * i + 4]; 2377 alpha6 = x[18 * i + 5]; 2378 alpha7 = x[18 * i + 6]; 2379 alpha8 = x[18 * i + 7]; 2380 alpha9 = x[18 * i + 8]; 2381 alpha10 = x[18 * i + 9]; 2382 alpha11 = x[18 * i + 10]; 2383 alpha12 = x[18 * i + 11]; 2384 alpha13 = x[18 * i + 12]; 2385 alpha14 = x[18 * i + 13]; 2386 alpha15 = x[18 * i + 14]; 2387 alpha16 = x[18 * i + 15]; 2388 alpha17 = x[18 * i + 16]; 2389 alpha18 = x[18 * i + 17]; 2390 while (n-- > 0) { 2391 y[18 * (*idx)] += alpha1 * (*v); 2392 y[18 * (*idx) + 1] += alpha2 * (*v); 2393 y[18 * (*idx) + 2] += alpha3 * (*v); 2394 y[18 * (*idx) + 3] += alpha4 * (*v); 2395 y[18 * (*idx) + 4] += alpha5 * (*v); 2396 y[18 * (*idx) + 5] += alpha6 * (*v); 2397 y[18 * (*idx) + 6] += alpha7 * (*v); 2398 y[18 * (*idx) + 7] += alpha8 * (*v); 2399 y[18 * (*idx) + 8] += alpha9 * (*v); 2400 y[18 * (*idx) + 9] += alpha10 * (*v); 2401 y[18 * (*idx) + 10] += alpha11 * (*v); 2402 y[18 * (*idx) + 11] += alpha12 * (*v); 2403 y[18 * (*idx) + 12] += alpha13 * (*v); 2404 y[18 * (*idx) + 13] += alpha14 * (*v); 2405 y[18 * (*idx) + 14] += alpha15 * (*v); 2406 y[18 * (*idx) + 15] += alpha16 * (*v); 2407 y[18 * (*idx) + 16] += alpha17 * (*v); 2408 y[18 * (*idx) + 17] += alpha18 * (*v); 2409 idx++; 2410 v++; 2411 } 2412 } 2413 PetscCall(PetscLogFlops(36.0 * a->nz)); 2414 PetscCall(VecRestoreArrayRead(xx, &x)); 2415 PetscCall(VecRestoreArray(yy, &y)); 2416 PetscFunctionReturn(0); 2417 } 2418 2419 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) { 2420 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2421 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2422 const PetscScalar *x, *v; 2423 PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2424 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2425 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2426 PetscInt n, i, jrow, j; 2427 2428 PetscFunctionBegin; 2429 if (yy != zz) PetscCall(VecCopy(yy, zz)); 2430 PetscCall(VecGetArrayRead(xx, &x)); 2431 PetscCall(VecGetArray(zz, &y)); 2432 idx = a->j; 2433 v = a->a; 2434 ii = a->i; 2435 2436 for (i = 0; i < m; i++) { 2437 jrow = ii[i]; 2438 n = ii[i + 1] - jrow; 2439 sum1 = 0.0; 2440 sum2 = 0.0; 2441 sum3 = 0.0; 2442 sum4 = 0.0; 2443 sum5 = 0.0; 2444 sum6 = 0.0; 2445 sum7 = 0.0; 2446 sum8 = 0.0; 2447 sum9 = 0.0; 2448 sum10 = 0.0; 2449 sum11 = 0.0; 2450 sum12 = 0.0; 2451 sum13 = 0.0; 2452 sum14 = 0.0; 2453 sum15 = 0.0; 2454 sum16 = 0.0; 2455 sum17 = 0.0; 2456 sum18 = 0.0; 2457 for (j = 0; j < n; j++) { 2458 sum1 += v[jrow] * x[18 * idx[jrow]]; 2459 sum2 += v[jrow] * x[18 * idx[jrow] + 1]; 2460 sum3 += v[jrow] * x[18 * idx[jrow] + 2]; 2461 sum4 += v[jrow] * x[18 * idx[jrow] + 3]; 2462 sum5 += v[jrow] * x[18 * idx[jrow] + 4]; 2463 sum6 += v[jrow] * x[18 * idx[jrow] + 5]; 2464 sum7 += v[jrow] * x[18 * idx[jrow] + 6]; 2465 sum8 += v[jrow] * x[18 * idx[jrow] + 7]; 2466 sum9 += v[jrow] * x[18 * idx[jrow] + 8]; 2467 sum10 += v[jrow] * x[18 * idx[jrow] + 9]; 2468 sum11 += v[jrow] * x[18 * idx[jrow] + 10]; 2469 sum12 += v[jrow] * x[18 * idx[jrow] + 11]; 2470 sum13 += v[jrow] * x[18 * idx[jrow] + 12]; 2471 sum14 += v[jrow] * x[18 * idx[jrow] + 13]; 2472 sum15 += v[jrow] * x[18 * idx[jrow] + 14]; 2473 sum16 += v[jrow] * x[18 * idx[jrow] + 15]; 2474 sum17 += v[jrow] * x[18 * idx[jrow] + 16]; 2475 sum18 += v[jrow] * x[18 * idx[jrow] + 17]; 2476 jrow++; 2477 } 2478 y[18 * i] += sum1; 2479 y[18 * i + 1] += sum2; 2480 y[18 * i + 2] += sum3; 2481 y[18 * i + 3] += sum4; 2482 y[18 * i + 4] += sum5; 2483 y[18 * i + 5] += sum6; 2484 y[18 * i + 6] += sum7; 2485 y[18 * i + 7] += sum8; 2486 y[18 * i + 8] += sum9; 2487 y[18 * i + 9] += sum10; 2488 y[18 * i + 10] += sum11; 2489 y[18 * i + 11] += sum12; 2490 y[18 * i + 12] += sum13; 2491 y[18 * i + 13] += sum14; 2492 y[18 * i + 14] += sum15; 2493 y[18 * i + 15] += sum16; 2494 y[18 * i + 16] += sum17; 2495 y[18 * i + 17] += sum18; 2496 } 2497 2498 PetscCall(PetscLogFlops(36.0 * a->nz)); 2499 PetscCall(VecRestoreArrayRead(xx, &x)); 2500 PetscCall(VecRestoreArray(zz, &y)); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) { 2505 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2506 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2507 const PetscScalar *x, *v; 2508 PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; 2509 PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18; 2510 const PetscInt m = b->AIJ->rmap->n, *idx; 2511 PetscInt n, i; 2512 2513 PetscFunctionBegin; 2514 if (yy != zz) PetscCall(VecCopy(yy, zz)); 2515 PetscCall(VecGetArrayRead(xx, &x)); 2516 PetscCall(VecGetArray(zz, &y)); 2517 for (i = 0; i < m; i++) { 2518 idx = a->j + a->i[i]; 2519 v = a->a + a->i[i]; 2520 n = a->i[i + 1] - a->i[i]; 2521 alpha1 = x[18 * i]; 2522 alpha2 = x[18 * i + 1]; 2523 alpha3 = x[18 * i + 2]; 2524 alpha4 = x[18 * i + 3]; 2525 alpha5 = x[18 * i + 4]; 2526 alpha6 = x[18 * i + 5]; 2527 alpha7 = x[18 * i + 6]; 2528 alpha8 = x[18 * i + 7]; 2529 alpha9 = x[18 * i + 8]; 2530 alpha10 = x[18 * i + 9]; 2531 alpha11 = x[18 * i + 10]; 2532 alpha12 = x[18 * i + 11]; 2533 alpha13 = x[18 * i + 12]; 2534 alpha14 = x[18 * i + 13]; 2535 alpha15 = x[18 * i + 14]; 2536 alpha16 = x[18 * i + 15]; 2537 alpha17 = x[18 * i + 16]; 2538 alpha18 = x[18 * i + 17]; 2539 while (n-- > 0) { 2540 y[18 * (*idx)] += alpha1 * (*v); 2541 y[18 * (*idx) + 1] += alpha2 * (*v); 2542 y[18 * (*idx) + 2] += alpha3 * (*v); 2543 y[18 * (*idx) + 3] += alpha4 * (*v); 2544 y[18 * (*idx) + 4] += alpha5 * (*v); 2545 y[18 * (*idx) + 5] += alpha6 * (*v); 2546 y[18 * (*idx) + 6] += alpha7 * (*v); 2547 y[18 * (*idx) + 7] += alpha8 * (*v); 2548 y[18 * (*idx) + 8] += alpha9 * (*v); 2549 y[18 * (*idx) + 9] += alpha10 * (*v); 2550 y[18 * (*idx) + 10] += alpha11 * (*v); 2551 y[18 * (*idx) + 11] += alpha12 * (*v); 2552 y[18 * (*idx) + 12] += alpha13 * (*v); 2553 y[18 * (*idx) + 13] += alpha14 * (*v); 2554 y[18 * (*idx) + 14] += alpha15 * (*v); 2555 y[18 * (*idx) + 15] += alpha16 * (*v); 2556 y[18 * (*idx) + 16] += alpha17 * (*v); 2557 y[18 * (*idx) + 17] += alpha18 * (*v); 2558 idx++; 2559 v++; 2560 } 2561 } 2562 PetscCall(PetscLogFlops(36.0 * a->nz)); 2563 PetscCall(VecRestoreArrayRead(xx, &x)); 2564 PetscCall(VecRestoreArray(zz, &y)); 2565 PetscFunctionReturn(0); 2566 } 2567 2568 PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) { 2569 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2570 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2571 const PetscScalar *x, *v; 2572 PetscScalar *y, *sums; 2573 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2574 PetscInt n, i, jrow, j, dof = b->dof, k; 2575 2576 PetscFunctionBegin; 2577 PetscCall(VecGetArrayRead(xx, &x)); 2578 PetscCall(VecSet(yy, 0.0)); 2579 PetscCall(VecGetArray(yy, &y)); 2580 idx = a->j; 2581 v = a->a; 2582 ii = a->i; 2583 2584 for (i = 0; i < m; i++) { 2585 jrow = ii[i]; 2586 n = ii[i + 1] - jrow; 2587 sums = y + dof * i; 2588 for (j = 0; j < n; j++) { 2589 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 2590 jrow++; 2591 } 2592 } 2593 2594 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 2595 PetscCall(VecRestoreArrayRead(xx, &x)); 2596 PetscCall(VecRestoreArray(yy, &y)); 2597 PetscFunctionReturn(0); 2598 } 2599 2600 PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) { 2601 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2602 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2603 const PetscScalar *x, *v; 2604 PetscScalar *y, *sums; 2605 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 2606 PetscInt n, i, jrow, j, dof = b->dof, k; 2607 2608 PetscFunctionBegin; 2609 if (yy != zz) PetscCall(VecCopy(yy, zz)); 2610 PetscCall(VecGetArrayRead(xx, &x)); 2611 PetscCall(VecGetArray(zz, &y)); 2612 idx = a->j; 2613 v = a->a; 2614 ii = a->i; 2615 2616 for (i = 0; i < m; i++) { 2617 jrow = ii[i]; 2618 n = ii[i + 1] - jrow; 2619 sums = y + dof * i; 2620 for (j = 0; j < n; j++) { 2621 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 2622 jrow++; 2623 } 2624 } 2625 2626 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 2627 PetscCall(VecRestoreArrayRead(xx, &x)); 2628 PetscCall(VecRestoreArray(zz, &y)); 2629 PetscFunctionReturn(0); 2630 } 2631 2632 PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) { 2633 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2634 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2635 const PetscScalar *x, *v, *alpha; 2636 PetscScalar *y; 2637 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 2638 PetscInt n, i, k; 2639 2640 PetscFunctionBegin; 2641 PetscCall(VecGetArrayRead(xx, &x)); 2642 PetscCall(VecSet(yy, 0.0)); 2643 PetscCall(VecGetArray(yy, &y)); 2644 for (i = 0; i < m; i++) { 2645 idx = a->j + a->i[i]; 2646 v = a->a + a->i[i]; 2647 n = a->i[i + 1] - a->i[i]; 2648 alpha = x + dof * i; 2649 while (n-- > 0) { 2650 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 2651 idx++; 2652 v++; 2653 } 2654 } 2655 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 2656 PetscCall(VecRestoreArrayRead(xx, &x)); 2657 PetscCall(VecRestoreArray(yy, &y)); 2658 PetscFunctionReturn(0); 2659 } 2660 2661 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) { 2662 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 2663 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 2664 const PetscScalar *x, *v, *alpha; 2665 PetscScalar *y; 2666 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 2667 PetscInt n, i, k; 2668 2669 PetscFunctionBegin; 2670 if (yy != zz) PetscCall(VecCopy(yy, zz)); 2671 PetscCall(VecGetArrayRead(xx, &x)); 2672 PetscCall(VecGetArray(zz, &y)); 2673 for (i = 0; i < m; i++) { 2674 idx = a->j + a->i[i]; 2675 v = a->a + a->i[i]; 2676 n = a->i[i + 1] - a->i[i]; 2677 alpha = x + dof * i; 2678 while (n-- > 0) { 2679 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 2680 idx++; 2681 v++; 2682 } 2683 } 2684 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 2685 PetscCall(VecRestoreArrayRead(xx, &x)); 2686 PetscCall(VecRestoreArray(zz, &y)); 2687 PetscFunctionReturn(0); 2688 } 2689 2690 /*===================================================================================*/ 2691 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) { 2692 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2693 2694 PetscFunctionBegin; 2695 /* start the scatter */ 2696 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 2697 PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 2698 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 2699 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 2700 PetscFunctionReturn(0); 2701 } 2702 2703 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) { 2704 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2705 2706 PetscFunctionBegin; 2707 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 2708 PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 2709 PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 2710 PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 2711 PetscFunctionReturn(0); 2712 } 2713 2714 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) { 2715 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2716 2717 PetscFunctionBegin; 2718 /* start the scatter */ 2719 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 2720 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 2721 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 2722 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 2723 PetscFunctionReturn(0); 2724 } 2725 2726 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) { 2727 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 2728 2729 PetscFunctionBegin; 2730 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 2731 PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 2732 PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 2733 PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 2734 PetscFunctionReturn(0); 2735 } 2736 2737 /* ----------------------------------------------------------------*/ 2738 PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) { 2739 Mat_Product *product = C->product; 2740 2741 PetscFunctionBegin; 2742 if (product->type == MATPRODUCT_PtAP) { 2743 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 2744 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 2745 PetscFunctionReturn(0); 2746 } 2747 2748 PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) { 2749 Mat_Product *product = C->product; 2750 PetscBool flg = PETSC_FALSE; 2751 Mat A = product->A, P = product->B; 2752 PetscInt alg = 1; /* set default algorithm */ 2753 #if !defined(PETSC_HAVE_HYPRE) 2754 const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 2755 PetscInt nalg = 4; 2756 #else 2757 const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 2758 PetscInt nalg = 5; 2759 #endif 2760 2761 PetscFunctionBegin; 2762 PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 2763 2764 /* PtAP */ 2765 /* Check matrix local sizes */ 2766 PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2767 A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 2768 PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 2769 A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 2770 2771 /* Set the default algorithm */ 2772 PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 2773 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2774 2775 /* Get runtime option */ 2776 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 2777 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 2778 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 2779 PetscOptionsEnd(); 2780 2781 PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 2782 if (flg) { 2783 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 2784 PetscFunctionReturn(0); 2785 } 2786 2787 PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 2788 if (flg) { 2789 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 2790 PetscFunctionReturn(0); 2791 } 2792 2793 /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 2794 PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 2795 PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 2796 PetscCall(MatProductSetFromOptions(C)); 2797 PetscFunctionReturn(0); 2798 } 2799 2800 /* ----------------------------------------------------------------*/ 2801 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) { 2802 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2803 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 2804 Mat P = pp->AIJ; 2805 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 2806 PetscInt *pti, *ptj, *ptJ; 2807 PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 2808 const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 2809 PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 2810 MatScalar *ca; 2811 const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 2812 2813 PetscFunctionBegin; 2814 /* Get ij structure of P^T */ 2815 PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 2816 2817 cn = pn * ppdof; 2818 /* Allocate ci array, arrays for fill computation and */ 2819 /* free space for accumulating nonzero column info */ 2820 PetscCall(PetscMalloc1(cn + 1, &ci)); 2821 ci[0] = 0; 2822 2823 /* Work arrays for rows of P^T*A */ 2824 PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 2825 PetscCall(PetscArrayzero(ptadenserow, an)); 2826 PetscCall(PetscArrayzero(denserow, cn)); 2827 2828 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2829 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2830 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2831 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 2832 current_space = free_space; 2833 2834 /* Determine symbolic info for each row of C: */ 2835 for (i = 0; i < pn; i++) { 2836 ptnzi = pti[i + 1] - pti[i]; 2837 ptJ = ptj + pti[i]; 2838 for (dof = 0; dof < ppdof; dof++) { 2839 ptanzi = 0; 2840 /* Determine symbolic row of PtA: */ 2841 for (j = 0; j < ptnzi; j++) { 2842 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2843 arow = ptJ[j] * ppdof + dof; 2844 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2845 anzj = ai[arow + 1] - ai[arow]; 2846 ajj = aj + ai[arow]; 2847 for (k = 0; k < anzj; k++) { 2848 if (!ptadenserow[ajj[k]]) { 2849 ptadenserow[ajj[k]] = -1; 2850 ptasparserow[ptanzi++] = ajj[k]; 2851 } 2852 } 2853 } 2854 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2855 ptaj = ptasparserow; 2856 cnzi = 0; 2857 for (j = 0; j < ptanzi; j++) { 2858 /* Get offset within block of P */ 2859 pshift = *ptaj % ppdof; 2860 /* Get block row of P */ 2861 prow = (*ptaj++) / ppdof; /* integer division */ 2862 /* P has same number of nonzeros per row as the compressed form */ 2863 pnzj = pi[prow + 1] - pi[prow]; 2864 pjj = pj + pi[prow]; 2865 for (k = 0; k < pnzj; k++) { 2866 /* Locations in C are shifted by the offset within the block */ 2867 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2868 if (!denserow[pjj[k] * ppdof + pshift]) { 2869 denserow[pjj[k] * ppdof + pshift] = -1; 2870 sparserow[cnzi++] = pjj[k] * ppdof + pshift; 2871 } 2872 } 2873 } 2874 2875 /* sort sparserow */ 2876 PetscCall(PetscSortInt(cnzi, sparserow)); 2877 2878 /* If free space is not available, make more free space */ 2879 /* Double the amount of total space in the list */ 2880 if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 2881 2882 /* Copy data into free space, and zero out denserows */ 2883 PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 2884 2885 current_space->array += cnzi; 2886 current_space->local_used += cnzi; 2887 current_space->local_remaining -= cnzi; 2888 2889 for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 2890 for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 2891 2892 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2893 /* For now, we will recompute what is needed. */ 2894 ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 2895 } 2896 } 2897 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2898 /* Allocate space for cj, initialize cj, and */ 2899 /* destroy list of free space and other temporary array(s) */ 2900 PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 2901 PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 2902 PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 2903 2904 /* Allocate space for ca */ 2905 PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 2906 2907 /* put together the new matrix */ 2908 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 2909 PetscCall(MatSetBlockSize(C, pp->dof)); 2910 2911 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2912 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2913 c = (Mat_SeqAIJ *)(C->data); 2914 c->free_a = PETSC_TRUE; 2915 c->free_ij = PETSC_TRUE; 2916 c->nonew = 0; 2917 2918 C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 2919 C->ops->productnumeric = MatProductNumeric_PtAP; 2920 2921 /* Clean up. */ 2922 PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 2923 PetscFunctionReturn(0); 2924 } 2925 2926 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) { 2927 /* This routine requires testing -- first draft only */ 2928 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 2929 Mat P = pp->AIJ; 2930 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2931 Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 2932 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 2933 const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 2934 const PetscInt *ci = c->i, *cj = c->j, *cjj; 2935 const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 2936 PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 2937 const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 2938 MatScalar *ca = c->a, *caj, *apa; 2939 2940 PetscFunctionBegin; 2941 /* Allocate temporary array for storage of one row of A*P */ 2942 PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 2943 2944 /* Clear old values in C */ 2945 PetscCall(PetscArrayzero(ca, ci[cm])); 2946 2947 for (i = 0; i < am; i++) { 2948 /* Form sparse row of A*P */ 2949 anzi = ai[i + 1] - ai[i]; 2950 apnzj = 0; 2951 for (j = 0; j < anzi; j++) { 2952 /* Get offset within block of P */ 2953 pshift = *aj % ppdof; 2954 /* Get block row of P */ 2955 prow = *aj++ / ppdof; /* integer division */ 2956 pnzj = pi[prow + 1] - pi[prow]; 2957 pjj = pj + pi[prow]; 2958 paj = pa + pi[prow]; 2959 for (k = 0; k < pnzj; k++) { 2960 poffset = pjj[k] * ppdof + pshift; 2961 if (!apjdense[poffset]) { 2962 apjdense[poffset] = -1; 2963 apj[apnzj++] = poffset; 2964 } 2965 apa[poffset] += (*aa) * paj[k]; 2966 } 2967 PetscCall(PetscLogFlops(2.0 * pnzj)); 2968 aa++; 2969 } 2970 2971 /* Sort the j index array for quick sparse axpy. */ 2972 /* Note: a array does not need sorting as it is in dense storage locations. */ 2973 PetscCall(PetscSortInt(apnzj, apj)); 2974 2975 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 2976 prow = i / ppdof; /* integer division */ 2977 pshift = i % ppdof; 2978 poffset = pi[prow]; 2979 pnzi = pi[prow + 1] - poffset; 2980 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 2981 pJ = pj + poffset; 2982 pA = pa + poffset; 2983 for (j = 0; j < pnzi; j++) { 2984 crow = (*pJ) * ppdof + pshift; 2985 cjj = cj + ci[crow]; 2986 caj = ca + ci[crow]; 2987 pJ++; 2988 /* Perform sparse axpy operation. Note cjj includes apj. */ 2989 for (k = 0, nextap = 0; nextap < apnzj; k++) { 2990 if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 2991 } 2992 PetscCall(PetscLogFlops(2.0 * apnzj)); 2993 pA++; 2994 } 2995 2996 /* Zero the current row info for A*P */ 2997 for (j = 0; j < apnzj; j++) { 2998 apa[apj[j]] = 0.; 2999 apjdense[apj[j]] = 0; 3000 } 3001 } 3002 3003 /* Assemble the final matrix and clean up */ 3004 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3005 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3006 PetscCall(PetscFree3(apa, apj, apjdense)); 3007 PetscFunctionReturn(0); 3008 } 3009 3010 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) { 3011 Mat_Product *product = C->product; 3012 Mat A = product->A, P = product->B; 3013 3014 PetscFunctionBegin; 3015 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 3016 PetscFunctionReturn(0); 3017 } 3018 3019 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A, Mat PP, PetscReal fill, Mat *C) { 3020 PetscFunctionBegin; 3021 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet"); 3022 } 3023 3024 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A, Mat PP, Mat C) { 3025 PetscFunctionBegin; 3026 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 3027 } 3028 3029 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 3030 3031 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) { 3032 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3033 3034 PetscFunctionBegin; 3035 3036 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 3037 PetscFunctionReturn(0); 3038 } 3039 3040 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 3041 3042 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) { 3043 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3044 3045 PetscFunctionBegin; 3046 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 3047 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 3048 PetscFunctionReturn(0); 3049 } 3050 3051 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 3052 3053 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) { 3054 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3055 3056 PetscFunctionBegin; 3057 3058 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 3059 PetscFunctionReturn(0); 3060 } 3061 3062 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 3063 3064 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) { 3065 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 3066 3067 PetscFunctionBegin; 3068 3069 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 3070 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 3071 PetscFunctionReturn(0); 3072 } 3073 3074 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) { 3075 Mat_Product *product = C->product; 3076 Mat A = product->A, P = product->B; 3077 PetscBool flg; 3078 3079 PetscFunctionBegin; 3080 PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 3081 if (flg) { 3082 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 3083 C->ops->productnumeric = MatProductNumeric_PtAP; 3084 PetscFunctionReturn(0); 3085 } 3086 3087 PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 3088 if (flg) { 3089 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 3090 C->ops->productnumeric = MatProductNumeric_PtAP; 3091 PetscFunctionReturn(0); 3092 } 3093 3094 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 3095 } 3096 3097 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 3098 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 3099 Mat a = b->AIJ, B; 3100 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 3101 PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 3102 PetscInt *cols; 3103 PetscScalar *vals; 3104 3105 PetscFunctionBegin; 3106 PetscCall(MatGetSize(a, &m, &n)); 3107 PetscCall(PetscMalloc1(dof * m, &ilen)); 3108 for (i = 0; i < m; i++) { 3109 nmax = PetscMax(nmax, aij->ilen[i]); 3110 for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 3111 } 3112 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 3113 PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 3114 PetscCall(MatSetType(B, newtype)); 3115 PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 3116 PetscCall(PetscFree(ilen)); 3117 PetscCall(PetscMalloc1(nmax, &icols)); 3118 ii = 0; 3119 for (i = 0; i < m; i++) { 3120 PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 3121 for (j = 0; j < dof; j++) { 3122 for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 3123 PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 3124 ii++; 3125 } 3126 PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 3127 } 3128 PetscCall(PetscFree(icols)); 3129 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3130 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3131 3132 if (reuse == MAT_INPLACE_MATRIX) { 3133 PetscCall(MatHeaderReplace(A, &B)); 3134 } else { 3135 *newmat = B; 3136 } 3137 PetscFunctionReturn(0); 3138 } 3139 3140 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3141 3142 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 3143 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 3144 Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 3145 Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 3146 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 3147 Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 3148 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 3149 PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 3150 PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 3151 PetscInt rstart, cstart, *garray, ii, k; 3152 PetscScalar *vals, *ovals; 3153 3154 PetscFunctionBegin; 3155 PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 3156 for (i = 0; i < A->rmap->n / dof; i++) { 3157 nmax = PetscMax(nmax, AIJ->ilen[i]); 3158 onmax = PetscMax(onmax, OAIJ->ilen[i]); 3159 for (j = 0; j < dof; j++) { 3160 dnz[dof * i + j] = AIJ->ilen[i]; 3161 onz[dof * i + j] = OAIJ->ilen[i]; 3162 } 3163 } 3164 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3165 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 3166 PetscCall(MatSetType(B, newtype)); 3167 PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 3168 PetscCall(MatSetBlockSize(B, dof)); 3169 PetscCall(PetscFree2(dnz, onz)); 3170 3171 PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 3172 rstart = dof * maij->A->rmap->rstart; 3173 cstart = dof * maij->A->cmap->rstart; 3174 garray = mpiaij->garray; 3175 3176 ii = rstart; 3177 for (i = 0; i < A->rmap->n / dof; i++) { 3178 PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 3179 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 3180 for (j = 0; j < dof; j++) { 3181 for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; 3182 for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; 3183 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 3184 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 3185 ii++; 3186 } 3187 PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 3188 PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 3189 } 3190 PetscCall(PetscFree2(icols, oicols)); 3191 3192 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3193 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3194 3195 if (reuse == MAT_INPLACE_MATRIX) { 3196 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 3197 ((PetscObject)A)->refct = 1; 3198 3199 PetscCall(MatHeaderReplace(A, &B)); 3200 3201 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 3202 } else { 3203 *newmat = B; 3204 } 3205 PetscFunctionReturn(0); 3206 } 3207 3208 PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) { 3209 Mat A; 3210 3211 PetscFunctionBegin; 3212 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 3213 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 3214 PetscCall(MatDestroy(&A)); 3215 PetscFunctionReturn(0); 3216 } 3217 3218 PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) { 3219 Mat A; 3220 3221 PetscFunctionBegin; 3222 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 3223 PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 3224 PetscCall(MatDestroy(&A)); 3225 PetscFunctionReturn(0); 3226 } 3227 3228 /* ---------------------------------------------------------------------------------- */ 3229 /*@ 3230 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 3231 operations for multicomponent problems. It interpolates each component the same 3232 way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, 3233 and `MATMPIAIJ` for distributed matrices. 3234 3235 Collective 3236 3237 Input Parameters: 3238 + A - the `MATAIJ` matrix describing the action on blocks 3239 - dof - the block size (number of components per node) 3240 3241 Output Parameter: 3242 . maij - the new `MATMAIJ` matrix 3243 3244 Operations provided: 3245 .vb 3246 MatMult() 3247 MatMultTranspose() 3248 MatMultAdd() 3249 MatMultTransposeAdd() 3250 MatView() 3251 .ve 3252 3253 Level: advanced 3254 3255 .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ` 3256 @*/ 3257 PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) { 3258 PetscInt n; 3259 Mat B; 3260 PetscBool flg; 3261 #if defined(PETSC_HAVE_CUDA) 3262 /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 3263 PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 3264 #endif 3265 3266 PetscFunctionBegin; 3267 dof = PetscAbs(dof); 3268 PetscCall(PetscObjectReference((PetscObject)A)); 3269 3270 if (dof == 1) *maij = A; 3271 else { 3272 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 3273 /* propagate vec type */ 3274 PetscCall(MatSetVecType(B, A->defaultvectype)); 3275 PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 3276 PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 3277 PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 3278 PetscCall(PetscLayoutSetUp(B->rmap)); 3279 PetscCall(PetscLayoutSetUp(B->cmap)); 3280 3281 B->assembled = PETSC_TRUE; 3282 3283 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 3284 if (flg) { 3285 Mat_SeqMAIJ *b; 3286 3287 PetscCall(MatSetType(B, MATSEQMAIJ)); 3288 3289 B->ops->setup = NULL; 3290 B->ops->destroy = MatDestroy_SeqMAIJ; 3291 B->ops->view = MatView_SeqMAIJ; 3292 3293 b = (Mat_SeqMAIJ *)B->data; 3294 b->dof = dof; 3295 b->AIJ = A; 3296 3297 if (dof == 2) { 3298 B->ops->mult = MatMult_SeqMAIJ_2; 3299 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 3300 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 3301 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 3302 } else if (dof == 3) { 3303 B->ops->mult = MatMult_SeqMAIJ_3; 3304 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 3305 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 3306 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 3307 } else if (dof == 4) { 3308 B->ops->mult = MatMult_SeqMAIJ_4; 3309 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 3310 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 3311 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 3312 } else if (dof == 5) { 3313 B->ops->mult = MatMult_SeqMAIJ_5; 3314 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 3315 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 3316 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 3317 } else if (dof == 6) { 3318 B->ops->mult = MatMult_SeqMAIJ_6; 3319 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 3320 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 3321 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 3322 } else if (dof == 7) { 3323 B->ops->mult = MatMult_SeqMAIJ_7; 3324 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 3325 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 3326 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 3327 } else if (dof == 8) { 3328 B->ops->mult = MatMult_SeqMAIJ_8; 3329 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 3330 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 3331 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 3332 } else if (dof == 9) { 3333 B->ops->mult = MatMult_SeqMAIJ_9; 3334 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 3335 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 3336 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 3337 } else if (dof == 10) { 3338 B->ops->mult = MatMult_SeqMAIJ_10; 3339 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 3340 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 3341 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 3342 } else if (dof == 11) { 3343 B->ops->mult = MatMult_SeqMAIJ_11; 3344 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 3345 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 3346 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 3347 } else if (dof == 16) { 3348 B->ops->mult = MatMult_SeqMAIJ_16; 3349 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 3350 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 3351 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 3352 } else if (dof == 18) { 3353 B->ops->mult = MatMult_SeqMAIJ_18; 3354 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 3355 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 3356 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 3357 } else { 3358 B->ops->mult = MatMult_SeqMAIJ_N; 3359 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 3360 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 3361 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 3362 } 3363 #if defined(PETSC_HAVE_CUDA) 3364 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 3365 #endif 3366 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 3367 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 3368 } else { 3369 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 3370 Mat_MPIMAIJ *b; 3371 IS from, to; 3372 Vec gvec; 3373 3374 PetscCall(MatSetType(B, MATMPIMAIJ)); 3375 3376 B->ops->setup = NULL; 3377 B->ops->destroy = MatDestroy_MPIMAIJ; 3378 B->ops->view = MatView_MPIMAIJ; 3379 3380 b = (Mat_MPIMAIJ *)B->data; 3381 b->dof = dof; 3382 b->A = A; 3383 3384 PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 3385 PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 3386 3387 PetscCall(VecGetSize(mpiaij->lvec, &n)); 3388 PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 3389 PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 3390 PetscCall(VecSetBlockSize(b->w, dof)); 3391 PetscCall(VecSetType(b->w, VECSEQ)); 3392 3393 /* create two temporary Index sets for build scatter gather */ 3394 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 3395 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 3396 3397 /* create temporary global vector to generate scatter context */ 3398 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 3399 3400 /* generate the scatter context */ 3401 PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 3402 3403 PetscCall(ISDestroy(&from)); 3404 PetscCall(ISDestroy(&to)); 3405 PetscCall(VecDestroy(&gvec)); 3406 3407 B->ops->mult = MatMult_MPIMAIJ_dof; 3408 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3409 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3410 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3411 3412 #if defined(PETSC_HAVE_CUDA) 3413 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 3414 #endif 3415 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 3416 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 3417 } 3418 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 3419 B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 3420 PetscCall(MatSetUp(B)); 3421 #if defined(PETSC_HAVE_CUDA) 3422 /* temporary until we have CUDA implementation of MAIJ */ 3423 { 3424 PetscBool flg; 3425 if (convert) { 3426 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 3427 if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 3428 } 3429 } 3430 #endif 3431 *maij = B; 3432 } 3433 PetscFunctionReturn(0); 3434 } 3435