#include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ #include <../src/mat/utils/freespace.h> /*@ MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel Input Parameter: . A - the `MATMAIJ` matrix Output Parameter: . B - the `MATAIJ` matrix Level: advanced Note: The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. .seealso: `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` @*/ PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) { PetscBool ismpimaij, isseqmaij; PetscFunctionBegin; PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); if (ismpimaij) { Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; *B = b->A; } else if (isseqmaij) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; *B = b->AIJ; } else { *B = A; } PetscFunctionReturn(0); } /*@ MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size Logically Collective Input Parameters: + A - the `MATMAIJ` matrix - dof - the block size for the new matrix Output Parameter: . B - the new `MATMAIJ` matrix Level: advanced .seealso: `MATMAIJ`, `MatCreateMAIJ()` @*/ PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) { Mat Aij = NULL; PetscFunctionBegin; PetscValidLogicalCollectiveInt(A, dof, 2); PetscCall(MatMAIJGetAIJ(A, &Aij)); PetscCall(MatCreateMAIJ(Aij, dof, B)); PetscFunctionReturn(0); } PetscErrorCode MatDestroy_SeqMAIJ(Mat A) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; PetscFunctionBegin; PetscCall(MatDestroy(&b->AIJ)); PetscCall(PetscFree(A->data)); PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); PetscFunctionReturn(0); } PetscErrorCode MatSetUp_MAIJ(Mat A) { PetscFunctionBegin; SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); } PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) { Mat B; PetscFunctionBegin; PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); PetscCall(MatView(B, viewer)); PetscCall(MatDestroy(&B)); PetscFunctionReturn(0); } PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) { Mat B; PetscFunctionBegin; PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); PetscCall(MatView(B, viewer)); PetscCall(MatDestroy(&B)); PetscFunctionReturn(0); } PetscErrorCode MatDestroy_MPIMAIJ(Mat A) { Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; PetscFunctionBegin; PetscCall(MatDestroy(&b->AIJ)); PetscCall(MatDestroy(&b->OAIJ)); PetscCall(MatDestroy(&b->A)); PetscCall(VecScatterDestroy(&b->ctx)); PetscCall(VecDestroy(&b->w)); PetscCall(PetscFree(A->data)); PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); PetscFunctionReturn(0); } /*MC MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for multicomponent problems, interpolating or restricting each component the same way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. Operations provided: .vb MatMult() MatMultTranspose() MatMultAdd() MatMultTransposeAdd() .ve Level: advanced .seealso: `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` M*/ PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) { Mat_MPIMAIJ *b; PetscMPIInt size; PetscFunctionBegin; PetscCall(PetscNewLog(A, &b)); A->data = (void *)b; PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); A->ops->setup = MatSetUp_MAIJ; b->AIJ = NULL; b->dof = 0; b->OAIJ = NULL; b->ctx = NULL; b->w = NULL; PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); if (size == 1) { PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); } else { PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); } A->preallocated = PETSC_TRUE; A->assembled = PETSC_TRUE; PetscFunctionReturn(0); } /* --------------------------------------------------------------------------------------*/ PetscErrorCode MatMult_SeqMAIJ_2(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2; PetscInt nonzerorow = 0, n, i, jrow, j; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscFunctionBegin; PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; nonzerorow += (n > 0); for (j = 0; j < n; j++) { sum1 += v[jrow] * x[2 * idx[jrow]]; sum2 += v[jrow] * x[2 * idx[jrow] + 1]; jrow++; } y[2 * i] = sum1; y[2 * i + 1] = sum2; } PetscCall(PetscLogFlops(4.0 * a->nz - 2.0 * nonzerorow)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2; PetscInt n, i; const PetscInt m = b->AIJ->rmap->n, *idx; PetscFunctionBegin; PetscCall(VecSet(yy, 0.0)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[2 * i]; alpha2 = x[2 * i + 1]; while (n-- > 0) { y[2 * (*idx)] += alpha1 * (*v); y[2 * (*idx) + 1] += alpha2 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(4.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2; PetscInt n, i, jrow, j; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; for (j = 0; j < n; j++) { sum1 += v[jrow] * x[2 * idx[jrow]]; sum2 += v[jrow] * x[2 * idx[jrow] + 1]; jrow++; } y[2 * i] += sum1; y[2 * i + 1] += sum2; } PetscCall(PetscLogFlops(4.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2; PetscInt n, i; const PetscInt m = b->AIJ->rmap->n, *idx; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[2 * i]; alpha2 = x[2 * i + 1]; while (n-- > 0) { y[2 * (*idx)] += alpha1 * (*v); y[2 * (*idx) + 1] += alpha2 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(4.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } /* --------------------------------------------------------------------------------------*/ PetscErrorCode MatMult_SeqMAIJ_3(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3; PetscInt nonzerorow = 0, n, i, jrow, j; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscFunctionBegin; PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; nonzerorow += (n > 0); for (j = 0; j < n; j++) { sum1 += v[jrow] * x[3 * idx[jrow]]; sum2 += v[jrow] * x[3 * idx[jrow] + 1]; sum3 += v[jrow] * x[3 * idx[jrow] + 2]; jrow++; } y[3 * i] = sum1; y[3 * i + 1] = sum2; y[3 * i + 2] = sum3; } PetscCall(PetscLogFlops(6.0 * a->nz - 3.0 * nonzerorow)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3; PetscInt n, i; const PetscInt m = b->AIJ->rmap->n, *idx; PetscFunctionBegin; PetscCall(VecSet(yy, 0.0)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[3 * i]; alpha2 = x[3 * i + 1]; alpha3 = x[3 * i + 2]; while (n-- > 0) { y[3 * (*idx)] += alpha1 * (*v); y[3 * (*idx) + 1] += alpha2 * (*v); y[3 * (*idx) + 2] += alpha3 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(6.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3; PetscInt n, i, jrow, j; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; for (j = 0; j < n; j++) { sum1 += v[jrow] * x[3 * idx[jrow]]; sum2 += v[jrow] * x[3 * idx[jrow] + 1]; sum3 += v[jrow] * x[3 * idx[jrow] + 2]; jrow++; } y[3 * i] += sum1; y[3 * i + 1] += sum2; y[3 * i + 2] += sum3; } PetscCall(PetscLogFlops(6.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3; PetscInt n, i; const PetscInt m = b->AIJ->rmap->n, *idx; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[3 * i]; alpha2 = x[3 * i + 1]; alpha3 = x[3 * i + 2]; while (n-- > 0) { y[3 * (*idx)] += alpha1 * (*v); y[3 * (*idx) + 1] += alpha2 * (*v); y[3 * (*idx) + 2] += alpha3 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(6.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } /* ------------------------------------------------------------------------------*/ PetscErrorCode MatMult_SeqMAIJ_4(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4; PetscInt nonzerorow = 0, n, i, jrow, j; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscFunctionBegin; PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; nonzerorow += (n > 0); for (j = 0; j < n; j++) { sum1 += v[jrow] * x[4 * idx[jrow]]; sum2 += v[jrow] * x[4 * idx[jrow] + 1]; sum3 += v[jrow] * x[4 * idx[jrow] + 2]; sum4 += v[jrow] * x[4 * idx[jrow] + 3]; jrow++; } y[4 * i] = sum1; y[4 * i + 1] = sum2; y[4 * i + 2] = sum3; y[4 * i + 3] = sum4; } PetscCall(PetscLogFlops(8.0 * a->nz - 4.0 * nonzerorow)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4; PetscInt n, i; const PetscInt m = b->AIJ->rmap->n, *idx; PetscFunctionBegin; PetscCall(VecSet(yy, 0.0)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[4 * i]; alpha2 = x[4 * i + 1]; alpha3 = x[4 * i + 2]; alpha4 = x[4 * i + 3]; while (n-- > 0) { y[4 * (*idx)] += alpha1 * (*v); y[4 * (*idx) + 1] += alpha2 * (*v); y[4 * (*idx) + 2] += alpha3 * (*v); y[4 * (*idx) + 3] += alpha4 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(8.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4; PetscInt n, i, jrow, j; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; for (j = 0; j < n; j++) { sum1 += v[jrow] * x[4 * idx[jrow]]; sum2 += v[jrow] * x[4 * idx[jrow] + 1]; sum3 += v[jrow] * x[4 * idx[jrow] + 2]; sum4 += v[jrow] * x[4 * idx[jrow] + 3]; jrow++; } y[4 * i] += sum1; y[4 * i + 1] += sum2; y[4 * i + 2] += sum3; y[4 * i + 3] += sum4; } PetscCall(PetscLogFlops(8.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4; PetscInt n, i; const PetscInt m = b->AIJ->rmap->n, *idx; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[4 * i]; alpha2 = x[4 * i + 1]; alpha3 = x[4 * i + 2]; alpha4 = x[4 * i + 3]; while (n-- > 0) { y[4 * (*idx)] += alpha1 * (*v); y[4 * (*idx) + 1] += alpha2 * (*v); y[4 * (*idx) + 2] += alpha3 * (*v); y[4 * (*idx) + 3] += alpha4 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(8.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } /* ------------------------------------------------------------------------------*/ PetscErrorCode MatMult_SeqMAIJ_5(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4, sum5; PetscInt nonzerorow = 0, n, i, jrow, j; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscFunctionBegin; PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; nonzerorow += (n > 0); for (j = 0; j < n; j++) { sum1 += v[jrow] * x[5 * idx[jrow]]; sum2 += v[jrow] * x[5 * idx[jrow] + 1]; sum3 += v[jrow] * x[5 * idx[jrow] + 2]; sum4 += v[jrow] * x[5 * idx[jrow] + 3]; sum5 += v[jrow] * x[5 * idx[jrow] + 4]; jrow++; } y[5 * i] = sum1; y[5 * i + 1] = sum2; y[5 * i + 2] = sum3; y[5 * i + 3] = sum4; y[5 * i + 4] = sum5; } PetscCall(PetscLogFlops(10.0 * a->nz - 5.0 * nonzerorow)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5; PetscInt n, i; const PetscInt m = b->AIJ->rmap->n, *idx; PetscFunctionBegin; PetscCall(VecSet(yy, 0.0)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[5 * i]; alpha2 = x[5 * i + 1]; alpha3 = x[5 * i + 2]; alpha4 = x[5 * i + 3]; alpha5 = x[5 * i + 4]; while (n-- > 0) { y[5 * (*idx)] += alpha1 * (*v); y[5 * (*idx) + 1] += alpha2 * (*v); y[5 * (*idx) + 2] += alpha3 * (*v); y[5 * (*idx) + 3] += alpha4 * (*v); y[5 * (*idx) + 4] += alpha5 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(10.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4, sum5; PetscInt n, i, jrow, j; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; for (j = 0; j < n; j++) { sum1 += v[jrow] * x[5 * idx[jrow]]; sum2 += v[jrow] * x[5 * idx[jrow] + 1]; sum3 += v[jrow] * x[5 * idx[jrow] + 2]; sum4 += v[jrow] * x[5 * idx[jrow] + 3]; sum5 += v[jrow] * x[5 * idx[jrow] + 4]; jrow++; } y[5 * i] += sum1; y[5 * i + 1] += sum2; y[5 * i + 2] += sum3; y[5 * i + 3] += sum4; y[5 * i + 4] += sum5; } PetscCall(PetscLogFlops(10.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5; PetscInt n, i; const PetscInt m = b->AIJ->rmap->n, *idx; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[5 * i]; alpha2 = x[5 * i + 1]; alpha3 = x[5 * i + 2]; alpha4 = x[5 * i + 3]; alpha5 = x[5 * i + 4]; while (n-- > 0) { y[5 * (*idx)] += alpha1 * (*v); y[5 * (*idx) + 1] += alpha2 * (*v); y[5 * (*idx) + 2] += alpha3 * (*v); y[5 * (*idx) + 3] += alpha4 * (*v); y[5 * (*idx) + 4] += alpha5 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(10.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } /* ------------------------------------------------------------------------------*/ PetscErrorCode MatMult_SeqMAIJ_6(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6; PetscInt nonzerorow = 0, n, i, jrow, j; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscFunctionBegin; PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; nonzerorow += (n > 0); for (j = 0; j < n; j++) { sum1 += v[jrow] * x[6 * idx[jrow]]; sum2 += v[jrow] * x[6 * idx[jrow] + 1]; sum3 += v[jrow] * x[6 * idx[jrow] + 2]; sum4 += v[jrow] * x[6 * idx[jrow] + 3]; sum5 += v[jrow] * x[6 * idx[jrow] + 4]; sum6 += v[jrow] * x[6 * idx[jrow] + 5]; jrow++; } y[6 * i] = sum1; y[6 * i + 1] = sum2; y[6 * i + 2] = sum3; y[6 * i + 3] = sum4; y[6 * i + 4] = sum5; y[6 * i + 5] = sum6; } PetscCall(PetscLogFlops(12.0 * a->nz - 6.0 * nonzerorow)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6; PetscInt n, i; const PetscInt m = b->AIJ->rmap->n, *idx; PetscFunctionBegin; PetscCall(VecSet(yy, 0.0)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[6 * i]; alpha2 = x[6 * i + 1]; alpha3 = x[6 * i + 2]; alpha4 = x[6 * i + 3]; alpha5 = x[6 * i + 4]; alpha6 = x[6 * i + 5]; while (n-- > 0) { y[6 * (*idx)] += alpha1 * (*v); y[6 * (*idx) + 1] += alpha2 * (*v); y[6 * (*idx) + 2] += alpha3 * (*v); y[6 * (*idx) + 3] += alpha4 * (*v); y[6 * (*idx) + 4] += alpha5 * (*v); y[6 * (*idx) + 5] += alpha6 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(12.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6; PetscInt n, i, jrow, j; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; for (j = 0; j < n; j++) { sum1 += v[jrow] * x[6 * idx[jrow]]; sum2 += v[jrow] * x[6 * idx[jrow] + 1]; sum3 += v[jrow] * x[6 * idx[jrow] + 2]; sum4 += v[jrow] * x[6 * idx[jrow] + 3]; sum5 += v[jrow] * x[6 * idx[jrow] + 4]; sum6 += v[jrow] * x[6 * idx[jrow] + 5]; jrow++; } y[6 * i] += sum1; y[6 * i + 1] += sum2; y[6 * i + 2] += sum3; y[6 * i + 3] += sum4; y[6 * i + 4] += sum5; y[6 * i + 5] += sum6; } PetscCall(PetscLogFlops(12.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6; PetscInt n, i; const PetscInt m = b->AIJ->rmap->n, *idx; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[6 * i]; alpha2 = x[6 * i + 1]; alpha3 = x[6 * i + 2]; alpha4 = x[6 * i + 3]; alpha5 = x[6 * i + 4]; alpha6 = x[6 * i + 5]; while (n-- > 0) { y[6 * (*idx)] += alpha1 * (*v); y[6 * (*idx) + 1] += alpha2 * (*v); y[6 * (*idx) + 2] += alpha3 * (*v); y[6 * (*idx) + 3] += alpha4 * (*v); y[6 * (*idx) + 4] += alpha5 * (*v); y[6 * (*idx) + 5] += alpha6 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(12.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } /* ------------------------------------------------------------------------------*/ PetscErrorCode MatMult_SeqMAIJ_7(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscInt nonzerorow = 0, n, i, jrow, j; PetscFunctionBegin; PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; nonzerorow += (n > 0); for (j = 0; j < n; j++) { sum1 += v[jrow] * x[7 * idx[jrow]]; sum2 += v[jrow] * x[7 * idx[jrow] + 1]; sum3 += v[jrow] * x[7 * idx[jrow] + 2]; sum4 += v[jrow] * x[7 * idx[jrow] + 3]; sum5 += v[jrow] * x[7 * idx[jrow] + 4]; sum6 += v[jrow] * x[7 * idx[jrow] + 5]; sum7 += v[jrow] * x[7 * idx[jrow] + 6]; jrow++; } y[7 * i] = sum1; y[7 * i + 1] = sum2; y[7 * i + 2] = sum3; y[7 * i + 3] = sum4; y[7 * i + 4] = sum5; y[7 * i + 5] = sum6; y[7 * i + 6] = sum7; } PetscCall(PetscLogFlops(14.0 * a->nz - 7.0 * nonzerorow)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7; const PetscInt m = b->AIJ->rmap->n, *idx; PetscInt n, i; PetscFunctionBegin; PetscCall(VecSet(yy, 0.0)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[7 * i]; alpha2 = x[7 * i + 1]; alpha3 = x[7 * i + 2]; alpha4 = x[7 * i + 3]; alpha5 = x[7 * i + 4]; alpha6 = x[7 * i + 5]; alpha7 = x[7 * i + 6]; while (n-- > 0) { y[7 * (*idx)] += alpha1 * (*v); y[7 * (*idx) + 1] += alpha2 * (*v); y[7 * (*idx) + 2] += alpha3 * (*v); y[7 * (*idx) + 3] += alpha4 * (*v); y[7 * (*idx) + 4] += alpha5 * (*v); y[7 * (*idx) + 5] += alpha6 * (*v); y[7 * (*idx) + 6] += alpha7 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(14.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscInt n, i, jrow, j; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; for (j = 0; j < n; j++) { sum1 += v[jrow] * x[7 * idx[jrow]]; sum2 += v[jrow] * x[7 * idx[jrow] + 1]; sum3 += v[jrow] * x[7 * idx[jrow] + 2]; sum4 += v[jrow] * x[7 * idx[jrow] + 3]; sum5 += v[jrow] * x[7 * idx[jrow] + 4]; sum6 += v[jrow] * x[7 * idx[jrow] + 5]; sum7 += v[jrow] * x[7 * idx[jrow] + 6]; jrow++; } y[7 * i] += sum1; y[7 * i + 1] += sum2; y[7 * i + 2] += sum3; y[7 * i + 3] += sum4; y[7 * i + 4] += sum5; y[7 * i + 5] += sum6; y[7 * i + 6] += sum7; } PetscCall(PetscLogFlops(14.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7; const PetscInt m = b->AIJ->rmap->n, *idx; PetscInt n, i; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[7 * i]; alpha2 = x[7 * i + 1]; alpha3 = x[7 * i + 2]; alpha4 = x[7 * i + 3]; alpha5 = x[7 * i + 4]; alpha6 = x[7 * i + 5]; alpha7 = x[7 * i + 6]; while (n-- > 0) { y[7 * (*idx)] += alpha1 * (*v); y[7 * (*idx) + 1] += alpha2 * (*v); y[7 * (*idx) + 2] += alpha3 * (*v); y[7 * (*idx) + 3] += alpha4 * (*v); y[7 * (*idx) + 4] += alpha5 * (*v); y[7 * (*idx) + 5] += alpha6 * (*v); y[7 * (*idx) + 6] += alpha7 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(14.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMult_SeqMAIJ_8(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscInt nonzerorow = 0, n, i, jrow, j; PetscFunctionBegin; PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; sum8 = 0.0; nonzerorow += (n > 0); for (j = 0; j < n; j++) { sum1 += v[jrow] * x[8 * idx[jrow]]; sum2 += v[jrow] * x[8 * idx[jrow] + 1]; sum3 += v[jrow] * x[8 * idx[jrow] + 2]; sum4 += v[jrow] * x[8 * idx[jrow] + 3]; sum5 += v[jrow] * x[8 * idx[jrow] + 4]; sum6 += v[jrow] * x[8 * idx[jrow] + 5]; sum7 += v[jrow] * x[8 * idx[jrow] + 6]; sum8 += v[jrow] * x[8 * idx[jrow] + 7]; jrow++; } y[8 * i] = sum1; y[8 * i + 1] = sum2; y[8 * i + 2] = sum3; y[8 * i + 3] = sum4; y[8 * i + 4] = sum5; y[8 * i + 5] = sum6; y[8 * i + 6] = sum7; y[8 * i + 7] = sum8; } PetscCall(PetscLogFlops(16.0 * a->nz - 8.0 * nonzerorow)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; const PetscInt m = b->AIJ->rmap->n, *idx; PetscInt n, i; PetscFunctionBegin; PetscCall(VecSet(yy, 0.0)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[8 * i]; alpha2 = x[8 * i + 1]; alpha3 = x[8 * i + 2]; alpha4 = x[8 * i + 3]; alpha5 = x[8 * i + 4]; alpha6 = x[8 * i + 5]; alpha7 = x[8 * i + 6]; alpha8 = x[8 * i + 7]; while (n-- > 0) { y[8 * (*idx)] += alpha1 * (*v); y[8 * (*idx) + 1] += alpha2 * (*v); y[8 * (*idx) + 2] += alpha3 * (*v); y[8 * (*idx) + 3] += alpha4 * (*v); y[8 * (*idx) + 4] += alpha5 * (*v); y[8 * (*idx) + 5] += alpha6 * (*v); y[8 * (*idx) + 6] += alpha7 * (*v); y[8 * (*idx) + 7] += alpha8 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(16.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscInt n, i, jrow, j; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; sum8 = 0.0; for (j = 0; j < n; j++) { sum1 += v[jrow] * x[8 * idx[jrow]]; sum2 += v[jrow] * x[8 * idx[jrow] + 1]; sum3 += v[jrow] * x[8 * idx[jrow] + 2]; sum4 += v[jrow] * x[8 * idx[jrow] + 3]; sum5 += v[jrow] * x[8 * idx[jrow] + 4]; sum6 += v[jrow] * x[8 * idx[jrow] + 5]; sum7 += v[jrow] * x[8 * idx[jrow] + 6]; sum8 += v[jrow] * x[8 * idx[jrow] + 7]; jrow++; } y[8 * i] += sum1; y[8 * i + 1] += sum2; y[8 * i + 2] += sum3; y[8 * i + 3] += sum4; y[8 * i + 4] += sum5; y[8 * i + 5] += sum6; y[8 * i + 6] += sum7; y[8 * i + 7] += sum8; } PetscCall(PetscLogFlops(16.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; const PetscInt m = b->AIJ->rmap->n, *idx; PetscInt n, i; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[8 * i]; alpha2 = x[8 * i + 1]; alpha3 = x[8 * i + 2]; alpha4 = x[8 * i + 3]; alpha5 = x[8 * i + 4]; alpha6 = x[8 * i + 5]; alpha7 = x[8 * i + 6]; alpha8 = x[8 * i + 7]; while (n-- > 0) { y[8 * (*idx)] += alpha1 * (*v); y[8 * (*idx) + 1] += alpha2 * (*v); y[8 * (*idx) + 2] += alpha3 * (*v); y[8 * (*idx) + 3] += alpha4 * (*v); y[8 * (*idx) + 4] += alpha5 * (*v); y[8 * (*idx) + 5] += alpha6 * (*v); y[8 * (*idx) + 6] += alpha7 * (*v); y[8 * (*idx) + 7] += alpha8 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(16.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } /* ------------------------------------------------------------------------------*/ PetscErrorCode MatMult_SeqMAIJ_9(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscInt nonzerorow = 0, n, i, jrow, j; PetscFunctionBegin; PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; sum8 = 0.0; sum9 = 0.0; nonzerorow += (n > 0); for (j = 0; j < n; j++) { sum1 += v[jrow] * x[9 * idx[jrow]]; sum2 += v[jrow] * x[9 * idx[jrow] + 1]; sum3 += v[jrow] * x[9 * idx[jrow] + 2]; sum4 += v[jrow] * x[9 * idx[jrow] + 3]; sum5 += v[jrow] * x[9 * idx[jrow] + 4]; sum6 += v[jrow] * x[9 * idx[jrow] + 5]; sum7 += v[jrow] * x[9 * idx[jrow] + 6]; sum8 += v[jrow] * x[9 * idx[jrow] + 7]; sum9 += v[jrow] * x[9 * idx[jrow] + 8]; jrow++; } y[9 * i] = sum1; y[9 * i + 1] = sum2; y[9 * i + 2] = sum3; y[9 * i + 3] = sum4; y[9 * i + 4] = sum5; y[9 * i + 5] = sum6; y[9 * i + 6] = sum7; y[9 * i + 7] = sum8; y[9 * i + 8] = sum9; } PetscCall(PetscLogFlops(18.0 * a->nz - 9 * nonzerorow)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } /* ------------------------------------------------------------------------------*/ PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9; const PetscInt m = b->AIJ->rmap->n, *idx; PetscInt n, i; PetscFunctionBegin; PetscCall(VecSet(yy, 0.0)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[9 * i]; alpha2 = x[9 * i + 1]; alpha3 = x[9 * i + 2]; alpha4 = x[9 * i + 3]; alpha5 = x[9 * i + 4]; alpha6 = x[9 * i + 5]; alpha7 = x[9 * i + 6]; alpha8 = x[9 * i + 7]; alpha9 = x[9 * i + 8]; while (n-- > 0) { y[9 * (*idx)] += alpha1 * (*v); y[9 * (*idx) + 1] += alpha2 * (*v); y[9 * (*idx) + 2] += alpha3 * (*v); y[9 * (*idx) + 3] += alpha4 * (*v); y[9 * (*idx) + 4] += alpha5 * (*v); y[9 * (*idx) + 5] += alpha6 * (*v); y[9 * (*idx) + 6] += alpha7 * (*v); y[9 * (*idx) + 7] += alpha8 * (*v); y[9 * (*idx) + 8] += alpha9 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(18.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscInt n, i, jrow, j; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; sum8 = 0.0; sum9 = 0.0; for (j = 0; j < n; j++) { sum1 += v[jrow] * x[9 * idx[jrow]]; sum2 += v[jrow] * x[9 * idx[jrow] + 1]; sum3 += v[jrow] * x[9 * idx[jrow] + 2]; sum4 += v[jrow] * x[9 * idx[jrow] + 3]; sum5 += v[jrow] * x[9 * idx[jrow] + 4]; sum6 += v[jrow] * x[9 * idx[jrow] + 5]; sum7 += v[jrow] * x[9 * idx[jrow] + 6]; sum8 += v[jrow] * x[9 * idx[jrow] + 7]; sum9 += v[jrow] * x[9 * idx[jrow] + 8]; jrow++; } y[9 * i] += sum1; y[9 * i + 1] += sum2; y[9 * i + 2] += sum3; y[9 * i + 3] += sum4; y[9 * i + 4] += sum5; y[9 * i + 5] += sum6; y[9 * i + 6] += sum7; y[9 * i + 7] += sum8; y[9 * i + 8] += sum9; } PetscCall(PetscLogFlops(18.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9; const PetscInt m = b->AIJ->rmap->n, *idx; PetscInt n, i; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[9 * i]; alpha2 = x[9 * i + 1]; alpha3 = x[9 * i + 2]; alpha4 = x[9 * i + 3]; alpha5 = x[9 * i + 4]; alpha6 = x[9 * i + 5]; alpha7 = x[9 * i + 6]; alpha8 = x[9 * i + 7]; alpha9 = x[9 * i + 8]; while (n-- > 0) { y[9 * (*idx)] += alpha1 * (*v); y[9 * (*idx) + 1] += alpha2 * (*v); y[9 * (*idx) + 2] += alpha3 * (*v); y[9 * (*idx) + 3] += alpha4 * (*v); y[9 * (*idx) + 4] += alpha5 * (*v); y[9 * (*idx) + 5] += alpha6 * (*v); y[9 * (*idx) + 6] += alpha7 * (*v); y[9 * (*idx) + 7] += alpha8 * (*v); y[9 * (*idx) + 8] += alpha9 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(18.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMult_SeqMAIJ_10(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscInt nonzerorow = 0, n, i, jrow, j; PetscFunctionBegin; PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; nonzerorow += (n > 0); for (j = 0; j < n; j++) { sum1 += v[jrow] * x[10 * idx[jrow]]; sum2 += v[jrow] * x[10 * idx[jrow] + 1]; sum3 += v[jrow] * x[10 * idx[jrow] + 2]; sum4 += v[jrow] * x[10 * idx[jrow] + 3]; sum5 += v[jrow] * x[10 * idx[jrow] + 4]; sum6 += v[jrow] * x[10 * idx[jrow] + 5]; sum7 += v[jrow] * x[10 * idx[jrow] + 6]; sum8 += v[jrow] * x[10 * idx[jrow] + 7]; sum9 += v[jrow] * x[10 * idx[jrow] + 8]; sum10 += v[jrow] * x[10 * idx[jrow] + 9]; jrow++; } y[10 * i] = sum1; y[10 * i + 1] = sum2; y[10 * i + 2] = sum3; y[10 * i + 3] = sum4; y[10 * i + 4] = sum5; y[10 * i + 5] = sum6; y[10 * i + 6] = sum7; y[10 * i + 7] = sum8; y[10 * i + 8] = sum9; y[10 * i + 9] = sum10; } PetscCall(PetscLogFlops(20.0 * a->nz - 10.0 * nonzerorow)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscInt n, i, jrow, j; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; for (j = 0; j < n; j++) { sum1 += v[jrow] * x[10 * idx[jrow]]; sum2 += v[jrow] * x[10 * idx[jrow] + 1]; sum3 += v[jrow] * x[10 * idx[jrow] + 2]; sum4 += v[jrow] * x[10 * idx[jrow] + 3]; sum5 += v[jrow] * x[10 * idx[jrow] + 4]; sum6 += v[jrow] * x[10 * idx[jrow] + 5]; sum7 += v[jrow] * x[10 * idx[jrow] + 6]; sum8 += v[jrow] * x[10 * idx[jrow] + 7]; sum9 += v[jrow] * x[10 * idx[jrow] + 8]; sum10 += v[jrow] * x[10 * idx[jrow] + 9]; jrow++; } y[10 * i] += sum1; y[10 * i + 1] += sum2; y[10 * i + 2] += sum3; y[10 * i + 3] += sum4; y[10 * i + 4] += sum5; y[10 * i + 5] += sum6; y[10 * i + 6] += sum7; y[10 * i + 7] += sum8; y[10 * i + 8] += sum9; y[10 * i + 9] += sum10; } PetscCall(PetscLogFlops(20.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10; const PetscInt m = b->AIJ->rmap->n, *idx; PetscInt n, i; PetscFunctionBegin; PetscCall(VecSet(yy, 0.0)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[10 * i]; alpha2 = x[10 * i + 1]; alpha3 = x[10 * i + 2]; alpha4 = x[10 * i + 3]; alpha5 = x[10 * i + 4]; alpha6 = x[10 * i + 5]; alpha7 = x[10 * i + 6]; alpha8 = x[10 * i + 7]; alpha9 = x[10 * i + 8]; alpha10 = x[10 * i + 9]; while (n-- > 0) { y[10 * (*idx)] += alpha1 * (*v); y[10 * (*idx) + 1] += alpha2 * (*v); y[10 * (*idx) + 2] += alpha3 * (*v); y[10 * (*idx) + 3] += alpha4 * (*v); y[10 * (*idx) + 4] += alpha5 * (*v); y[10 * (*idx) + 5] += alpha6 * (*v); y[10 * (*idx) + 6] += alpha7 * (*v); y[10 * (*idx) + 7] += alpha8 * (*v); y[10 * (*idx) + 8] += alpha9 * (*v); y[10 * (*idx) + 9] += alpha10 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(20.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10; const PetscInt m = b->AIJ->rmap->n, *idx; PetscInt n, i; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[10 * i]; alpha2 = x[10 * i + 1]; alpha3 = x[10 * i + 2]; alpha4 = x[10 * i + 3]; alpha5 = x[10 * i + 4]; alpha6 = x[10 * i + 5]; alpha7 = x[10 * i + 6]; alpha8 = x[10 * i + 7]; alpha9 = x[10 * i + 8]; alpha10 = x[10 * i + 9]; while (n-- > 0) { y[10 * (*idx)] += alpha1 * (*v); y[10 * (*idx) + 1] += alpha2 * (*v); y[10 * (*idx) + 2] += alpha3 * (*v); y[10 * (*idx) + 3] += alpha4 * (*v); y[10 * (*idx) + 4] += alpha5 * (*v); y[10 * (*idx) + 5] += alpha6 * (*v); y[10 * (*idx) + 6] += alpha7 * (*v); y[10 * (*idx) + 7] += alpha8 * (*v); y[10 * (*idx) + 8] += alpha9 * (*v); y[10 * (*idx) + 9] += alpha10 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(20.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } /*--------------------------------------------------------------------------------------------*/ PetscErrorCode MatMult_SeqMAIJ_11(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscInt nonzerorow = 0, n, i, jrow, j; PetscFunctionBegin; PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; nonzerorow += (n > 0); for (j = 0; j < n; j++) { sum1 += v[jrow] * x[11 * idx[jrow]]; sum2 += v[jrow] * x[11 * idx[jrow] + 1]; sum3 += v[jrow] * x[11 * idx[jrow] + 2]; sum4 += v[jrow] * x[11 * idx[jrow] + 3]; sum5 += v[jrow] * x[11 * idx[jrow] + 4]; sum6 += v[jrow] * x[11 * idx[jrow] + 5]; sum7 += v[jrow] * x[11 * idx[jrow] + 6]; sum8 += v[jrow] * x[11 * idx[jrow] + 7]; sum9 += v[jrow] * x[11 * idx[jrow] + 8]; sum10 += v[jrow] * x[11 * idx[jrow] + 9]; sum11 += v[jrow] * x[11 * idx[jrow] + 10]; jrow++; } y[11 * i] = sum1; y[11 * i + 1] = sum2; y[11 * i + 2] = sum3; y[11 * i + 3] = sum4; y[11 * i + 4] = sum5; y[11 * i + 5] = sum6; y[11 * i + 6] = sum7; y[11 * i + 7] = sum8; y[11 * i + 8] = sum9; y[11 * i + 9] = sum10; y[11 * i + 10] = sum11; } PetscCall(PetscLogFlops(22.0 * a->nz - 11 * nonzerorow)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscInt n, i, jrow, j; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; for (j = 0; j < n; j++) { sum1 += v[jrow] * x[11 * idx[jrow]]; sum2 += v[jrow] * x[11 * idx[jrow] + 1]; sum3 += v[jrow] * x[11 * idx[jrow] + 2]; sum4 += v[jrow] * x[11 * idx[jrow] + 3]; sum5 += v[jrow] * x[11 * idx[jrow] + 4]; sum6 += v[jrow] * x[11 * idx[jrow] + 5]; sum7 += v[jrow] * x[11 * idx[jrow] + 6]; sum8 += v[jrow] * x[11 * idx[jrow] + 7]; sum9 += v[jrow] * x[11 * idx[jrow] + 8]; sum10 += v[jrow] * x[11 * idx[jrow] + 9]; sum11 += v[jrow] * x[11 * idx[jrow] + 10]; jrow++; } y[11 * i] += sum1; y[11 * i + 1] += sum2; y[11 * i + 2] += sum3; y[11 * i + 3] += sum4; y[11 * i + 4] += sum5; y[11 * i + 5] += sum6; y[11 * i + 6] += sum7; y[11 * i + 7] += sum8; y[11 * i + 8] += sum9; y[11 * i + 9] += sum10; y[11 * i + 10] += sum11; } PetscCall(PetscLogFlops(22.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11; const PetscInt m = b->AIJ->rmap->n, *idx; PetscInt n, i; PetscFunctionBegin; PetscCall(VecSet(yy, 0.0)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[11 * i]; alpha2 = x[11 * i + 1]; alpha3 = x[11 * i + 2]; alpha4 = x[11 * i + 3]; alpha5 = x[11 * i + 4]; alpha6 = x[11 * i + 5]; alpha7 = x[11 * i + 6]; alpha8 = x[11 * i + 7]; alpha9 = x[11 * i + 8]; alpha10 = x[11 * i + 9]; alpha11 = x[11 * i + 10]; while (n-- > 0) { y[11 * (*idx)] += alpha1 * (*v); y[11 * (*idx) + 1] += alpha2 * (*v); y[11 * (*idx) + 2] += alpha3 * (*v); y[11 * (*idx) + 3] += alpha4 * (*v); y[11 * (*idx) + 4] += alpha5 * (*v); y[11 * (*idx) + 5] += alpha6 * (*v); y[11 * (*idx) + 6] += alpha7 * (*v); y[11 * (*idx) + 7] += alpha8 * (*v); y[11 * (*idx) + 8] += alpha9 * (*v); y[11 * (*idx) + 9] += alpha10 * (*v); y[11 * (*idx) + 10] += alpha11 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(22.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8, alpha9, alpha10, alpha11; const PetscInt m = b->AIJ->rmap->n, *idx; PetscInt n, i; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[11 * i]; alpha2 = x[11 * i + 1]; alpha3 = x[11 * i + 2]; alpha4 = x[11 * i + 3]; alpha5 = x[11 * i + 4]; alpha6 = x[11 * i + 5]; alpha7 = x[11 * i + 6]; alpha8 = x[11 * i + 7]; alpha9 = x[11 * i + 8]; alpha10 = x[11 * i + 9]; alpha11 = x[11 * i + 10]; while (n-- > 0) { y[11 * (*idx)] += alpha1 * (*v); y[11 * (*idx) + 1] += alpha2 * (*v); y[11 * (*idx) + 2] += alpha3 * (*v); y[11 * (*idx) + 3] += alpha4 * (*v); y[11 * (*idx) + 4] += alpha5 * (*v); y[11 * (*idx) + 5] += alpha6 * (*v); y[11 * (*idx) + 6] += alpha7 * (*v); y[11 * (*idx) + 7] += alpha8 * (*v); y[11 * (*idx) + 8] += alpha9 * (*v); y[11 * (*idx) + 9] += alpha10 * (*v); y[11 * (*idx) + 10] += alpha11 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(22.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } /*--------------------------------------------------------------------------------------------*/ PetscErrorCode MatMult_SeqMAIJ_16(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscInt nonzerorow = 0, n, i, jrow, j; PetscFunctionBegin; PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; sum12 = 0.0; sum13 = 0.0; sum14 = 0.0; sum15 = 0.0; sum16 = 0.0; nonzerorow += (n > 0); for (j = 0; j < n; j++) { sum1 += v[jrow] * x[16 * idx[jrow]]; sum2 += v[jrow] * x[16 * idx[jrow] + 1]; sum3 += v[jrow] * x[16 * idx[jrow] + 2]; sum4 += v[jrow] * x[16 * idx[jrow] + 3]; sum5 += v[jrow] * x[16 * idx[jrow] + 4]; sum6 += v[jrow] * x[16 * idx[jrow] + 5]; sum7 += v[jrow] * x[16 * idx[jrow] + 6]; sum8 += v[jrow] * x[16 * idx[jrow] + 7]; sum9 += v[jrow] * x[16 * idx[jrow] + 8]; sum10 += v[jrow] * x[16 * idx[jrow] + 9]; sum11 += v[jrow] * x[16 * idx[jrow] + 10]; sum12 += v[jrow] * x[16 * idx[jrow] + 11]; sum13 += v[jrow] * x[16 * idx[jrow] + 12]; sum14 += v[jrow] * x[16 * idx[jrow] + 13]; sum15 += v[jrow] * x[16 * idx[jrow] + 14]; sum16 += v[jrow] * x[16 * idx[jrow] + 15]; jrow++; } y[16 * i] = sum1; y[16 * i + 1] = sum2; y[16 * i + 2] = sum3; y[16 * i + 3] = sum4; y[16 * i + 4] = sum5; y[16 * i + 5] = sum6; y[16 * i + 6] = sum7; y[16 * i + 7] = sum8; y[16 * i + 8] = sum9; y[16 * i + 9] = sum10; y[16 * i + 10] = sum11; y[16 * i + 11] = sum12; y[16 * i + 12] = sum13; y[16 * i + 13] = sum14; y[16 * i + 14] = sum15; y[16 * i + 15] = sum16; } PetscCall(PetscLogFlops(32.0 * a->nz - 16.0 * nonzerorow)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16; const PetscInt m = b->AIJ->rmap->n, *idx; PetscInt n, i; PetscFunctionBegin; PetscCall(VecSet(yy, 0.0)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[16 * i]; alpha2 = x[16 * i + 1]; alpha3 = x[16 * i + 2]; alpha4 = x[16 * i + 3]; alpha5 = x[16 * i + 4]; alpha6 = x[16 * i + 5]; alpha7 = x[16 * i + 6]; alpha8 = x[16 * i + 7]; alpha9 = x[16 * i + 8]; alpha10 = x[16 * i + 9]; alpha11 = x[16 * i + 10]; alpha12 = x[16 * i + 11]; alpha13 = x[16 * i + 12]; alpha14 = x[16 * i + 13]; alpha15 = x[16 * i + 14]; alpha16 = x[16 * i + 15]; while (n-- > 0) { y[16 * (*idx)] += alpha1 * (*v); y[16 * (*idx) + 1] += alpha2 * (*v); y[16 * (*idx) + 2] += alpha3 * (*v); y[16 * (*idx) + 3] += alpha4 * (*v); y[16 * (*idx) + 4] += alpha5 * (*v); y[16 * (*idx) + 5] += alpha6 * (*v); y[16 * (*idx) + 6] += alpha7 * (*v); y[16 * (*idx) + 7] += alpha8 * (*v); y[16 * (*idx) + 8] += alpha9 * (*v); y[16 * (*idx) + 9] += alpha10 * (*v); y[16 * (*idx) + 10] += alpha11 * (*v); y[16 * (*idx) + 11] += alpha12 * (*v); y[16 * (*idx) + 12] += alpha13 * (*v); y[16 * (*idx) + 13] += alpha14 * (*v); y[16 * (*idx) + 14] += alpha15 * (*v); y[16 * (*idx) + 15] += alpha16 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(32.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscInt n, i, jrow, j; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; sum12 = 0.0; sum13 = 0.0; sum14 = 0.0; sum15 = 0.0; sum16 = 0.0; for (j = 0; j < n; j++) { sum1 += v[jrow] * x[16 * idx[jrow]]; sum2 += v[jrow] * x[16 * idx[jrow] + 1]; sum3 += v[jrow] * x[16 * idx[jrow] + 2]; sum4 += v[jrow] * x[16 * idx[jrow] + 3]; sum5 += v[jrow] * x[16 * idx[jrow] + 4]; sum6 += v[jrow] * x[16 * idx[jrow] + 5]; sum7 += v[jrow] * x[16 * idx[jrow] + 6]; sum8 += v[jrow] * x[16 * idx[jrow] + 7]; sum9 += v[jrow] * x[16 * idx[jrow] + 8]; sum10 += v[jrow] * x[16 * idx[jrow] + 9]; sum11 += v[jrow] * x[16 * idx[jrow] + 10]; sum12 += v[jrow] * x[16 * idx[jrow] + 11]; sum13 += v[jrow] * x[16 * idx[jrow] + 12]; sum14 += v[jrow] * x[16 * idx[jrow] + 13]; sum15 += v[jrow] * x[16 * idx[jrow] + 14]; sum16 += v[jrow] * x[16 * idx[jrow] + 15]; jrow++; } y[16 * i] += sum1; y[16 * i + 1] += sum2; y[16 * i + 2] += sum3; y[16 * i + 3] += sum4; y[16 * i + 4] += sum5; y[16 * i + 5] += sum6; y[16 * i + 6] += sum7; y[16 * i + 7] += sum8; y[16 * i + 8] += sum9; y[16 * i + 9] += sum10; y[16 * i + 10] += sum11; y[16 * i + 11] += sum12; y[16 * i + 12] += sum13; y[16 * i + 13] += sum14; y[16 * i + 14] += sum15; y[16 * i + 15] += sum16; } PetscCall(PetscLogFlops(32.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16; const PetscInt m = b->AIJ->rmap->n, *idx; PetscInt n, i; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[16 * i]; alpha2 = x[16 * i + 1]; alpha3 = x[16 * i + 2]; alpha4 = x[16 * i + 3]; alpha5 = x[16 * i + 4]; alpha6 = x[16 * i + 5]; alpha7 = x[16 * i + 6]; alpha8 = x[16 * i + 7]; alpha9 = x[16 * i + 8]; alpha10 = x[16 * i + 9]; alpha11 = x[16 * i + 10]; alpha12 = x[16 * i + 11]; alpha13 = x[16 * i + 12]; alpha14 = x[16 * i + 13]; alpha15 = x[16 * i + 14]; alpha16 = x[16 * i + 15]; while (n-- > 0) { y[16 * (*idx)] += alpha1 * (*v); y[16 * (*idx) + 1] += alpha2 * (*v); y[16 * (*idx) + 2] += alpha3 * (*v); y[16 * (*idx) + 3] += alpha4 * (*v); y[16 * (*idx) + 4] += alpha5 * (*v); y[16 * (*idx) + 5] += alpha6 * (*v); y[16 * (*idx) + 6] += alpha7 * (*v); y[16 * (*idx) + 7] += alpha8 * (*v); y[16 * (*idx) + 8] += alpha9 * (*v); y[16 * (*idx) + 9] += alpha10 * (*v); y[16 * (*idx) + 10] += alpha11 * (*v); y[16 * (*idx) + 11] += alpha12 * (*v); y[16 * (*idx) + 12] += alpha13 * (*v); y[16 * (*idx) + 13] += alpha14 * (*v); y[16 * (*idx) + 14] += alpha15 * (*v); y[16 * (*idx) + 15] += alpha16 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(32.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } /*--------------------------------------------------------------------------------------------*/ PetscErrorCode MatMult_SeqMAIJ_18(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscInt nonzerorow = 0, n, i, jrow, j; PetscFunctionBegin; PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; sum12 = 0.0; sum13 = 0.0; sum14 = 0.0; sum15 = 0.0; sum16 = 0.0; sum17 = 0.0; sum18 = 0.0; nonzerorow += (n > 0); for (j = 0; j < n; j++) { sum1 += v[jrow] * x[18 * idx[jrow]]; sum2 += v[jrow] * x[18 * idx[jrow] + 1]; sum3 += v[jrow] * x[18 * idx[jrow] + 2]; sum4 += v[jrow] * x[18 * idx[jrow] + 3]; sum5 += v[jrow] * x[18 * idx[jrow] + 4]; sum6 += v[jrow] * x[18 * idx[jrow] + 5]; sum7 += v[jrow] * x[18 * idx[jrow] + 6]; sum8 += v[jrow] * x[18 * idx[jrow] + 7]; sum9 += v[jrow] * x[18 * idx[jrow] + 8]; sum10 += v[jrow] * x[18 * idx[jrow] + 9]; sum11 += v[jrow] * x[18 * idx[jrow] + 10]; sum12 += v[jrow] * x[18 * idx[jrow] + 11]; sum13 += v[jrow] * x[18 * idx[jrow] + 12]; sum14 += v[jrow] * x[18 * idx[jrow] + 13]; sum15 += v[jrow] * x[18 * idx[jrow] + 14]; sum16 += v[jrow] * x[18 * idx[jrow] + 15]; sum17 += v[jrow] * x[18 * idx[jrow] + 16]; sum18 += v[jrow] * x[18 * idx[jrow] + 17]; jrow++; } y[18 * i] = sum1; y[18 * i + 1] = sum2; y[18 * i + 2] = sum3; y[18 * i + 3] = sum4; y[18 * i + 4] = sum5; y[18 * i + 5] = sum6; y[18 * i + 6] = sum7; y[18 * i + 7] = sum8; y[18 * i + 8] = sum9; y[18 * i + 9] = sum10; y[18 * i + 10] = sum11; y[18 * i + 11] = sum12; y[18 * i + 12] = sum13; y[18 * i + 13] = sum14; y[18 * i + 14] = sum15; y[18 * i + 15] = sum16; y[18 * i + 16] = sum17; y[18 * i + 17] = sum18; } PetscCall(PetscLogFlops(36.0 * a->nz - 18.0 * nonzerorow)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18; const PetscInt m = b->AIJ->rmap->n, *idx; PetscInt n, i; PetscFunctionBegin; PetscCall(VecSet(yy, 0.0)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(yy, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[18 * i]; alpha2 = x[18 * i + 1]; alpha3 = x[18 * i + 2]; alpha4 = x[18 * i + 3]; alpha5 = x[18 * i + 4]; alpha6 = x[18 * i + 5]; alpha7 = x[18 * i + 6]; alpha8 = x[18 * i + 7]; alpha9 = x[18 * i + 8]; alpha10 = x[18 * i + 9]; alpha11 = x[18 * i + 10]; alpha12 = x[18 * i + 11]; alpha13 = x[18 * i + 12]; alpha14 = x[18 * i + 13]; alpha15 = x[18 * i + 14]; alpha16 = x[18 * i + 15]; alpha17 = x[18 * i + 16]; alpha18 = x[18 * i + 17]; while (n-- > 0) { y[18 * (*idx)] += alpha1 * (*v); y[18 * (*idx) + 1] += alpha2 * (*v); y[18 * (*idx) + 2] += alpha3 * (*v); y[18 * (*idx) + 3] += alpha4 * (*v); y[18 * (*idx) + 4] += alpha5 * (*v); y[18 * (*idx) + 5] += alpha6 * (*v); y[18 * (*idx) + 6] += alpha7 * (*v); y[18 * (*idx) + 7] += alpha8 * (*v); y[18 * (*idx) + 8] += alpha9 * (*v); y[18 * (*idx) + 9] += alpha10 * (*v); y[18 * (*idx) + 10] += alpha11 * (*v); y[18 * (*idx) + 11] += alpha12 * (*v); y[18 * (*idx) + 12] += alpha13 * (*v); y[18 * (*idx) + 13] += alpha14 * (*v); y[18 * (*idx) + 14] += alpha15 * (*v); y[18 * (*idx) + 15] += alpha16 * (*v); y[18 * (*idx) + 16] += alpha17 * (*v); y[18 * (*idx) + 17] += alpha18 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(36.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscInt n, i, jrow, j; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; sum12 = 0.0; sum13 = 0.0; sum14 = 0.0; sum15 = 0.0; sum16 = 0.0; sum17 = 0.0; sum18 = 0.0; for (j = 0; j < n; j++) { sum1 += v[jrow] * x[18 * idx[jrow]]; sum2 += v[jrow] * x[18 * idx[jrow] + 1]; sum3 += v[jrow] * x[18 * idx[jrow] + 2]; sum4 += v[jrow] * x[18 * idx[jrow] + 3]; sum5 += v[jrow] * x[18 * idx[jrow] + 4]; sum6 += v[jrow] * x[18 * idx[jrow] + 5]; sum7 += v[jrow] * x[18 * idx[jrow] + 6]; sum8 += v[jrow] * x[18 * idx[jrow] + 7]; sum9 += v[jrow] * x[18 * idx[jrow] + 8]; sum10 += v[jrow] * x[18 * idx[jrow] + 9]; sum11 += v[jrow] * x[18 * idx[jrow] + 10]; sum12 += v[jrow] * x[18 * idx[jrow] + 11]; sum13 += v[jrow] * x[18 * idx[jrow] + 12]; sum14 += v[jrow] * x[18 * idx[jrow] + 13]; sum15 += v[jrow] * x[18 * idx[jrow] + 14]; sum16 += v[jrow] * x[18 * idx[jrow] + 15]; sum17 += v[jrow] * x[18 * idx[jrow] + 16]; sum18 += v[jrow] * x[18 * idx[jrow] + 17]; jrow++; } y[18 * i] += sum1; y[18 * i + 1] += sum2; y[18 * i + 2] += sum3; y[18 * i + 3] += sum4; y[18 * i + 4] += sum5; y[18 * i + 5] += sum6; y[18 * i + 6] += sum7; y[18 * i + 7] += sum8; y[18 * i + 8] += sum9; y[18 * i + 9] += sum10; y[18 * i + 10] += sum11; y[18 * i + 11] += sum12; y[18 * i + 12] += sum13; y[18 * i + 13] += sum14; y[18 * i + 14] += sum15; y[18 * i + 15] += sum16; y[18 * i + 16] += sum17; y[18 * i + 17] += sum18; } PetscCall(PetscLogFlops(36.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, alpha1, alpha2, alpha3, alpha4, alpha5, alpha6, alpha7, alpha8; PetscScalar alpha9, alpha10, alpha11, alpha12, alpha13, alpha14, alpha15, alpha16, alpha17, alpha18; const PetscInt m = b->AIJ->rmap->n, *idx; PetscInt n, i; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha1 = x[18 * i]; alpha2 = x[18 * i + 1]; alpha3 = x[18 * i + 2]; alpha4 = x[18 * i + 3]; alpha5 = x[18 * i + 4]; alpha6 = x[18 * i + 5]; alpha7 = x[18 * i + 6]; alpha8 = x[18 * i + 7]; alpha9 = x[18 * i + 8]; alpha10 = x[18 * i + 9]; alpha11 = x[18 * i + 10]; alpha12 = x[18 * i + 11]; alpha13 = x[18 * i + 12]; alpha14 = x[18 * i + 13]; alpha15 = x[18 * i + 14]; alpha16 = x[18 * i + 15]; alpha17 = x[18 * i + 16]; alpha18 = x[18 * i + 17]; while (n-- > 0) { y[18 * (*idx)] += alpha1 * (*v); y[18 * (*idx) + 1] += alpha2 * (*v); y[18 * (*idx) + 2] += alpha3 * (*v); y[18 * (*idx) + 3] += alpha4 * (*v); y[18 * (*idx) + 4] += alpha5 * (*v); y[18 * (*idx) + 5] += alpha6 * (*v); y[18 * (*idx) + 6] += alpha7 * (*v); y[18 * (*idx) + 7] += alpha8 * (*v); y[18 * (*idx) + 8] += alpha9 * (*v); y[18 * (*idx) + 9] += alpha10 * (*v); y[18 * (*idx) + 10] += alpha11 * (*v); y[18 * (*idx) + 11] += alpha12 * (*v); y[18 * (*idx) + 12] += alpha13 * (*v); y[18 * (*idx) + 13] += alpha14 * (*v); y[18 * (*idx) + 14] += alpha15 * (*v); y[18 * (*idx) + 15] += alpha16 * (*v); y[18 * (*idx) + 16] += alpha17 * (*v); y[18 * (*idx) + 17] += alpha18 * (*v); idx++; v++; } } PetscCall(PetscLogFlops(36.0 * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, *sums; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscInt n, i, jrow, j, dof = b->dof, k; PetscFunctionBegin; PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecSet(yy, 0.0)); PetscCall(VecGetArray(yy, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sums = y + dof * i; for (j = 0; j < n; j++) { for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; jrow++; } } PetscCall(PetscLogFlops(2.0 * dof * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v; PetscScalar *y, *sums; const PetscInt m = b->AIJ->rmap->n, *idx, *ii; PetscInt n, i, jrow, j, dof = b->dof, k; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); idx = a->j; v = a->a; ii = a->i; for (i = 0; i < m; i++) { jrow = ii[i]; n = ii[i + 1] - jrow; sums = y + dof * i; for (j = 0; j < n; j++) { for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; jrow++; } } PetscCall(PetscLogFlops(2.0 * dof * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v, *alpha; PetscScalar *y; const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; PetscInt n, i, k; PetscFunctionBegin; PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecSet(yy, 0.0)); PetscCall(VecGetArray(yy, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha = x + dof * i; while (n-- > 0) { for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); idx++; v++; } } PetscCall(PetscLogFlops(2.0 * dof * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(yy, &y)); PetscFunctionReturn(0); } PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; const PetscScalar *x, *v, *alpha; PetscScalar *y; const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; PetscInt n, i, k; PetscFunctionBegin; if (yy != zz) PetscCall(VecCopy(yy, zz)); PetscCall(VecGetArrayRead(xx, &x)); PetscCall(VecGetArray(zz, &y)); for (i = 0; i < m; i++) { idx = a->j + a->i[i]; v = a->a + a->i[i]; n = a->i[i + 1] - a->i[i]; alpha = x + dof * i; while (n-- > 0) { for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); idx++; v++; } } PetscCall(PetscLogFlops(2.0 * dof * a->nz)); PetscCall(VecRestoreArrayRead(xx, &x)); PetscCall(VecRestoreArray(zz, &y)); PetscFunctionReturn(0); } /*===================================================================================*/ PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) { Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; PetscFunctionBegin; /* start the scatter */ PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); PetscFunctionReturn(0); } PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) { Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; PetscFunctionBegin; PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); PetscFunctionReturn(0); } PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) { Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; PetscFunctionBegin; /* start the scatter */ PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); PetscFunctionReturn(0); } PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) { Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; PetscFunctionBegin; PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); PetscFunctionReturn(0); } /* ----------------------------------------------------------------*/ PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) { Mat_Product *product = C->product; PetscFunctionBegin; if (product->type == MATPRODUCT_PtAP) { C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); PetscFunctionReturn(0); } PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) { Mat_Product *product = C->product; PetscBool flg = PETSC_FALSE; Mat A = product->A, P = product->B; PetscInt alg = 1; /* set default algorithm */ #if !defined(PETSC_HAVE_HYPRE) const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; PetscInt nalg = 4; #else const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; PetscInt nalg = 5; #endif PetscFunctionBegin; PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); /* PtAP */ /* Check matrix local sizes */ PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); /* Set the default algorithm */ PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); /* Get runtime option */ PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); PetscOptionsEnd(); PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); if (flg) { C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; PetscFunctionReturn(0); } PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); if (flg) { C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; PetscFunctionReturn(0); } /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); PetscCall(MatProductSetFromOptions(C)); PetscFunctionReturn(0); } /* ----------------------------------------------------------------*/ PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) { PetscFreeSpaceList free_space = NULL, current_space = NULL; Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; Mat P = pp->AIJ; Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; PetscInt *pti, *ptj, *ptJ; PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; MatScalar *ca; const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; PetscFunctionBegin; /* Get ij structure of P^T */ PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); cn = pn * ppdof; /* Allocate ci array, arrays for fill computation and */ /* free space for accumulating nonzero column info */ PetscCall(PetscMalloc1(cn + 1, &ci)); ci[0] = 0; /* Work arrays for rows of P^T*A */ PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); PetscCall(PetscArrayzero(ptadenserow, an)); PetscCall(PetscArrayzero(denserow, cn)); /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ /* This should be reasonable if sparsity of PtAP is similar to that of A. */ /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); current_space = free_space; /* Determine symbolic info for each row of C: */ for (i = 0; i < pn; i++) { ptnzi = pti[i + 1] - pti[i]; ptJ = ptj + pti[i]; for (dof = 0; dof < ppdof; dof++) { ptanzi = 0; /* Determine symbolic row of PtA: */ for (j = 0; j < ptnzi; j++) { /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ arow = ptJ[j] * ppdof + dof; /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ anzj = ai[arow + 1] - ai[arow]; ajj = aj + ai[arow]; for (k = 0; k < anzj; k++) { if (!ptadenserow[ajj[k]]) { ptadenserow[ajj[k]] = -1; ptasparserow[ptanzi++] = ajj[k]; } } } /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ ptaj = ptasparserow; cnzi = 0; for (j = 0; j < ptanzi; j++) { /* Get offset within block of P */ pshift = *ptaj % ppdof; /* Get block row of P */ prow = (*ptaj++) / ppdof; /* integer division */ /* P has same number of nonzeros per row as the compressed form */ pnzj = pi[prow + 1] - pi[prow]; pjj = pj + pi[prow]; for (k = 0; k < pnzj; k++) { /* Locations in C are shifted by the offset within the block */ /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ if (!denserow[pjj[k] * ppdof + pshift]) { denserow[pjj[k] * ppdof + pshift] = -1; sparserow[cnzi++] = pjj[k] * ppdof + pshift; } } } /* sort sparserow */ PetscCall(PetscSortInt(cnzi, sparserow)); /* If free space is not available, make more free space */ /* Double the amount of total space in the list */ if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); /* Copy data into free space, and zero out denserows */ PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); current_space->array += cnzi; current_space->local_used += cnzi; current_space->local_remaining -= cnzi; for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; /* Aside: Perhaps we should save the pta info for the numerical factorization. */ /* For now, we will recompute what is needed. */ ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; } } /* nnz is now stored in ci[ptm], column indices are in the list of free space */ /* Allocate space for cj, initialize cj, and */ /* destroy list of free space and other temporary array(s) */ PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); /* Allocate space for ca */ PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); /* put together the new matrix */ PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); PetscCall(MatSetBlockSize(C, pp->dof)); /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ /* Since these are PETSc arrays, change flags to free them as necessary. */ c = (Mat_SeqAIJ *)(C->data); c->free_a = PETSC_TRUE; c->free_ij = PETSC_TRUE; c->nonew = 0; C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; C->ops->productnumeric = MatProductNumeric_PtAP; /* Clean up. */ PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); PetscFunctionReturn(0); } PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) { /* This routine requires testing -- first draft only */ Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; Mat P = pp->AIJ; Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; const PetscInt *ci = c->i, *cj = c->j, *cjj; const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; MatScalar *ca = c->a, *caj, *apa; PetscFunctionBegin; /* Allocate temporary array for storage of one row of A*P */ PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); /* Clear old values in C */ PetscCall(PetscArrayzero(ca, ci[cm])); for (i = 0; i < am; i++) { /* Form sparse row of A*P */ anzi = ai[i + 1] - ai[i]; apnzj = 0; for (j = 0; j < anzi; j++) { /* Get offset within block of P */ pshift = *aj % ppdof; /* Get block row of P */ prow = *aj++ / ppdof; /* integer division */ pnzj = pi[prow + 1] - pi[prow]; pjj = pj + pi[prow]; paj = pa + pi[prow]; for (k = 0; k < pnzj; k++) { poffset = pjj[k] * ppdof + pshift; if (!apjdense[poffset]) { apjdense[poffset] = -1; apj[apnzj++] = poffset; } apa[poffset] += (*aa) * paj[k]; } PetscCall(PetscLogFlops(2.0 * pnzj)); aa++; } /* Sort the j index array for quick sparse axpy. */ /* Note: a array does not need sorting as it is in dense storage locations. */ PetscCall(PetscSortInt(apnzj, apj)); /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ prow = i / ppdof; /* integer division */ pshift = i % ppdof; poffset = pi[prow]; pnzi = pi[prow + 1] - poffset; /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ pJ = pj + poffset; pA = pa + poffset; for (j = 0; j < pnzi; j++) { crow = (*pJ) * ppdof + pshift; cjj = cj + ci[crow]; caj = ca + ci[crow]; pJ++; /* Perform sparse axpy operation. Note cjj includes apj. */ for (k = 0, nextap = 0; nextap < apnzj; k++) { if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; } PetscCall(PetscLogFlops(2.0 * apnzj)); pA++; } /* Zero the current row info for A*P */ for (j = 0; j < apnzj; j++) { apa[apj[j]] = 0.; apjdense[apj[j]] = 0; } } /* Assemble the final matrix and clean up */ PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); PetscCall(PetscFree3(apa, apj, apjdense)); PetscFunctionReturn(0); } PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) { Mat_Product *product = C->product; Mat A = product->A, P = product->B; PetscFunctionBegin; PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); PetscFunctionReturn(0); } PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A, Mat PP, PetscReal fill, Mat *C) { PetscFunctionBegin; SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet"); } PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A, Mat PP, Mat C) { PetscFunctionBegin; SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); } PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) { Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; PetscFunctionBegin; PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); PetscFunctionReturn(0); } PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) { Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; PetscFunctionBegin; PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; PetscFunctionReturn(0); } PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) { Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; PetscFunctionBegin; PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); PetscFunctionReturn(0); } PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) { Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; PetscFunctionBegin; PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; PetscFunctionReturn(0); } PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) { Mat_Product *product = C->product; Mat A = product->A, P = product->B; PetscBool flg; PetscFunctionBegin; PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); if (flg) { PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); C->ops->productnumeric = MatProductNumeric_PtAP; PetscFunctionReturn(0); } PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); if (flg) { PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); C->ops->productnumeric = MatProductNumeric_PtAP; PetscFunctionReturn(0); } SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); } PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; Mat a = b->AIJ, B; Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; PetscInt *cols; PetscScalar *vals; PetscFunctionBegin; PetscCall(MatGetSize(a, &m, &n)); PetscCall(PetscMalloc1(dof * m, &ilen)); for (i = 0; i < m; i++) { nmax = PetscMax(nmax, aij->ilen[i]); for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; } PetscCall(MatCreate(PETSC_COMM_SELF, &B)); PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); PetscCall(MatSetType(B, newtype)); PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); PetscCall(PetscFree(ilen)); PetscCall(PetscMalloc1(nmax, &icols)); ii = 0; for (i = 0; i < m; i++) { PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); for (j = 0; j < dof; j++) { for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); ii++; } PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); } PetscCall(PetscFree(icols)); PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); if (reuse == MAT_INPLACE_MATRIX) { PetscCall(MatHeaderReplace(A, &B)); } else { *newmat = B; } PetscFunctionReturn(0); } #include <../src/mat/impls/aij/mpi/mpiaij.h> PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; PetscInt rstart, cstart, *garray, ii, k; PetscScalar *vals, *ovals; PetscFunctionBegin; PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); for (i = 0; i < A->rmap->n / dof; i++) { nmax = PetscMax(nmax, AIJ->ilen[i]); onmax = PetscMax(onmax, OAIJ->ilen[i]); for (j = 0; j < dof; j++) { dnz[dof * i + j] = AIJ->ilen[i]; onz[dof * i + j] = OAIJ->ilen[i]; } } PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); PetscCall(MatSetType(B, newtype)); PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); PetscCall(MatSetBlockSize(B, dof)); PetscCall(PetscFree2(dnz, onz)); PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); rstart = dof * maij->A->rmap->rstart; cstart = dof * maij->A->cmap->rstart; garray = mpiaij->garray; ii = rstart; for (i = 0; i < A->rmap->n / dof; i++) { PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); for (j = 0; j < dof; j++) { for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); ii++; } PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); } PetscCall(PetscFree2(icols, oicols)); PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); if (reuse == MAT_INPLACE_MATRIX) { PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ ((PetscObject)A)->refct = 1; PetscCall(MatHeaderReplace(A, &B)); ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ } else { *newmat = B; } PetscFunctionReturn(0); } PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) { Mat A; PetscFunctionBegin; PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); PetscCall(MatDestroy(&A)); PetscFunctionReturn(0); } PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) { Mat A; PetscFunctionBegin; PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); PetscCall(MatDestroy(&A)); PetscFunctionReturn(0); } /* ---------------------------------------------------------------------------------- */ /*@ MatCreateMAIJ - Creates a matrix type providing restriction and interpolation operations for multicomponent problems. It interpolates each component the same way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. Collective Input Parameters: + A - the `MATAIJ` matrix describing the action on blocks - dof - the block size (number of components per node) Output Parameter: . maij - the new `MATMAIJ` matrix Operations provided: .vb MatMult() MatMultTranspose() MatMultAdd() MatMultTransposeAdd() MatView() .ve Level: advanced .seealso: `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ` @*/ PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) { PetscInt n; Mat B; PetscBool flg; #if defined(PETSC_HAVE_CUDA) /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; #endif PetscFunctionBegin; dof = PetscAbs(dof); PetscCall(PetscObjectReference((PetscObject)A)); if (dof == 1) *maij = A; else { PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); /* propagate vec type */ PetscCall(MatSetVecType(B, A->defaultvectype)); PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); PetscCall(PetscLayoutSetUp(B->rmap)); PetscCall(PetscLayoutSetUp(B->cmap)); B->assembled = PETSC_TRUE; PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); if (flg) { Mat_SeqMAIJ *b; PetscCall(MatSetType(B, MATSEQMAIJ)); B->ops->setup = NULL; B->ops->destroy = MatDestroy_SeqMAIJ; B->ops->view = MatView_SeqMAIJ; b = (Mat_SeqMAIJ *)B->data; b->dof = dof; b->AIJ = A; if (dof == 2) { B->ops->mult = MatMult_SeqMAIJ_2; B->ops->multadd = MatMultAdd_SeqMAIJ_2; B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; } else if (dof == 3) { B->ops->mult = MatMult_SeqMAIJ_3; B->ops->multadd = MatMultAdd_SeqMAIJ_3; B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; } else if (dof == 4) { B->ops->mult = MatMult_SeqMAIJ_4; B->ops->multadd = MatMultAdd_SeqMAIJ_4; B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; } else if (dof == 5) { B->ops->mult = MatMult_SeqMAIJ_5; B->ops->multadd = MatMultAdd_SeqMAIJ_5; B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; } else if (dof == 6) { B->ops->mult = MatMult_SeqMAIJ_6; B->ops->multadd = MatMultAdd_SeqMAIJ_6; B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; } else if (dof == 7) { B->ops->mult = MatMult_SeqMAIJ_7; B->ops->multadd = MatMultAdd_SeqMAIJ_7; B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; } else if (dof == 8) { B->ops->mult = MatMult_SeqMAIJ_8; B->ops->multadd = MatMultAdd_SeqMAIJ_8; B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; } else if (dof == 9) { B->ops->mult = MatMult_SeqMAIJ_9; B->ops->multadd = MatMultAdd_SeqMAIJ_9; B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; } else if (dof == 10) { B->ops->mult = MatMult_SeqMAIJ_10; B->ops->multadd = MatMultAdd_SeqMAIJ_10; B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; } else if (dof == 11) { B->ops->mult = MatMult_SeqMAIJ_11; B->ops->multadd = MatMultAdd_SeqMAIJ_11; B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; } else if (dof == 16) { B->ops->mult = MatMult_SeqMAIJ_16; B->ops->multadd = MatMultAdd_SeqMAIJ_16; B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; } else if (dof == 18) { B->ops->mult = MatMult_SeqMAIJ_18; B->ops->multadd = MatMultAdd_SeqMAIJ_18; B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; } else { B->ops->mult = MatMult_SeqMAIJ_N; B->ops->multadd = MatMultAdd_SeqMAIJ_N; B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; } #if defined(PETSC_HAVE_CUDA) PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); #endif PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); } else { Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; Mat_MPIMAIJ *b; IS from, to; Vec gvec; PetscCall(MatSetType(B, MATMPIMAIJ)); B->ops->setup = NULL; B->ops->destroy = MatDestroy_MPIMAIJ; B->ops->view = MatView_MPIMAIJ; b = (Mat_MPIMAIJ *)B->data; b->dof = dof; b->A = A; PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); PetscCall(VecGetSize(mpiaij->lvec, &n)); PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); PetscCall(VecSetSizes(b->w, n * dof, n * dof)); PetscCall(VecSetBlockSize(b->w, dof)); PetscCall(VecSetType(b->w, VECSEQ)); /* create two temporary Index sets for build scatter gather */ PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); /* create temporary global vector to generate scatter context */ PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); /* generate the scatter context */ PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); PetscCall(ISDestroy(&from)); PetscCall(ISDestroy(&to)); PetscCall(VecDestroy(&gvec)); B->ops->mult = MatMult_MPIMAIJ_dof; B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; B->ops->multadd = MatMultAdd_MPIMAIJ_dof; B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; #if defined(PETSC_HAVE_CUDA) PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); #endif PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); } B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; PetscCall(MatSetUp(B)); #if defined(PETSC_HAVE_CUDA) /* temporary until we have CUDA implementation of MAIJ */ { PetscBool flg; if (convert) { PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); } } #endif *maij = B; } PetscFunctionReturn(0); }