xref: /petsc/src/mat/impls/aij/seq/crl/crl.c (revision d0e6bf2ad94dcc89b258ce16c7987200a4714786) !
181824310SBarry Smith /*
281824310SBarry Smith   Defines a matrix-vector product for the MATSEQAIJCRL matrix class.
381824310SBarry Smith   This class is derived from the MATSEQAIJ class and retains the
481824310SBarry Smith   compressed row storage (aka Yale sparse matrix format) but augments
581824310SBarry Smith   it with a column oriented storage that is more efficient for
681824310SBarry Smith   matrix vector products on Vector machines.
781824310SBarry Smith 
881824310SBarry Smith   CRL stands for constant row length (that is the same number of columns
981824310SBarry Smith   is kept (padded with zeros) for each row of the sparse matrix.
1081824310SBarry Smith */
11c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/crl.h>
1281824310SBarry Smith 
MatDestroy_SeqAIJCRL(Mat A)1366976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqAIJCRL(Mat A)
14d71ae5a4SJacob Faibussowitsch {
155a11e1b2SBarry Smith   Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr;
1681824310SBarry Smith 
17362febeeSStefano Zampini   PetscFunctionBegin;
185a11e1b2SBarry Smith   /* Free everything in the Mat_AIJCRL data structure. */
191baa6e33SBarry Smith   if (aijcrl) PetscCall(PetscFree2(aijcrl->acols, aijcrl->icols));
209566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->spptr));
219566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
229566063dSJacob Faibussowitsch   PetscCall(MatDestroy_SeqAIJ(A));
233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2481824310SBarry Smith }
2581824310SBarry Smith 
MatDuplicate_AIJCRL(Mat A,MatDuplicateOption op,Mat * M)26d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M)
27d71ae5a4SJacob Faibussowitsch {
285a11e1b2SBarry Smith   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot duplicate AIJCRL matrices yet");
2981824310SBarry Smith }
3081824310SBarry Smith 
MatSeqAIJCRL_create_aijcrl(Mat A)3166976f2fSJacob Faibussowitsch static PetscErrorCode MatSeqAIJCRL_create_aijcrl(Mat A)
32d71ae5a4SJacob Faibussowitsch {
33*57508eceSPierre Jolivet   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
345a11e1b2SBarry Smith   Mat_AIJCRL  *aijcrl = (Mat_AIJCRL *)A->spptr;
35d0f46423SBarry Smith   PetscInt     m      = A->rmap->n; /* Number of rows in the matrix. */
3681824310SBarry Smith   PetscInt    *aj     = a->j;       /* From the CSR representation; points to the beginning  of each row. */
3781824310SBarry Smith   PetscInt     i, j, rmax = a->rmax, *icols, *ilen = a->ilen;
38dd6ea824SBarry Smith   MatScalar   *aa = a->a;
39dd6ea824SBarry Smith   PetscScalar *acols;
4081824310SBarry Smith 
4181824310SBarry Smith   PetscFunctionBegin;
425a11e1b2SBarry Smith   aijcrl->nz   = a->nz;
435a11e1b2SBarry Smith   aijcrl->m    = A->rmap->n;
445a11e1b2SBarry Smith   aijcrl->rmax = rmax;
452205254eSKarl Rupp 
469566063dSJacob Faibussowitsch   PetscCall(PetscFree2(aijcrl->acols, aijcrl->icols));
479566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(rmax * m, &aijcrl->acols, rmax * m, &aijcrl->icols));
485a11e1b2SBarry Smith   acols = aijcrl->acols;
495a11e1b2SBarry Smith   icols = aijcrl->icols;
5081824310SBarry Smith   for (i = 0; i < m; i++) {
5181824310SBarry Smith     for (j = 0; j < ilen[i]; j++) {
5281824310SBarry Smith       acols[j * m + i] = *aa++;
5381824310SBarry Smith       icols[j * m + i] = *aj++;
5481824310SBarry Smith     }
5581824310SBarry Smith     for (; j < rmax; j++) { /* empty column entries */
5681824310SBarry Smith       acols[j * m + i] = 0.0;
5781824310SBarry Smith       icols[j * m + i] = (j) ? icols[(j - 1) * m + i] : 0; /* handle case where row is EMPTY */
5881824310SBarry Smith     }
5981824310SBarry Smith   }
608564c97fSStefano Zampini   PetscCall(PetscInfo(A, "Percentage of 0's introduced for vectorized multiply %g. Rmax= %" PetscInt_FMT "\n", 1.0 - ((double)a->nz) / PetscMax((double)rmax * m, 1), rmax));
613ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6281824310SBarry Smith }
6381824310SBarry Smith 
MatAssemblyEnd_SeqAIJCRL(Mat A,MatAssemblyType mode)6466976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode)
65d71ae5a4SJacob Faibussowitsch {
6681824310SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
6781824310SBarry Smith 
68c02b24c5SBarry Smith   PetscFunctionBegin;
6981824310SBarry Smith   a->inode.use = PETSC_FALSE;
702205254eSKarl Rupp 
719566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
723ba16761SJacob Faibussowitsch   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
7381824310SBarry Smith 
7481824310SBarry Smith   /* Now calculate the permutation and grouping information. */
759566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCRL_create_aijcrl(A));
763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7781824310SBarry Smith }
7881824310SBarry Smith 
79c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h>
805aeacdfdSBarry Smith 
814723c594SBarry Smith /*
825a11e1b2SBarry Smith     Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL
834723c594SBarry Smith     - the scatter is used only in the parallel version
844723c594SBarry Smith 
854723c594SBarry Smith */
MatMult_AIJCRL(Mat A,Vec xx,Vec yy)86d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_AIJCRL(Mat A, Vec xx, Vec yy)
87d71ae5a4SJacob Faibussowitsch {
885a11e1b2SBarry Smith   Mat_AIJCRL        *aijcrl = (Mat_AIJCRL *)A->spptr;
895a11e1b2SBarry Smith   PetscInt           m      = aijcrl->m; /* Number of rows in the matrix. */
905a11e1b2SBarry Smith   PetscInt           rmax = aijcrl->rmax, *icols = aijcrl->icols;
915a11e1b2SBarry Smith   PetscScalar       *acols = aijcrl->acols;
92d9ca1df4SBarry Smith   PetscScalar       *y;
93d9ca1df4SBarry Smith   const PetscScalar *x;
9481824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
9581824310SBarry Smith   PetscInt i, j, ii;
9681824310SBarry Smith #endif
9781824310SBarry Smith 
9881824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
9981824310SBarry Smith   #pragma disjoint(*x, *y, *aa)
10081824310SBarry Smith #endif
10181824310SBarry Smith 
10281824310SBarry Smith   PetscFunctionBegin;
1035a11e1b2SBarry Smith   if (aijcrl->xscat) {
1049566063dSJacob Faibussowitsch     PetscCall(VecCopy(xx, aijcrl->xwork));
105c02b24c5SBarry Smith     /* get remote values needed for local part of multiply */
1069566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(aijcrl->xscat, xx, aijcrl->fwork, INSERT_VALUES, SCATTER_FORWARD));
1079566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(aijcrl->xscat, xx, aijcrl->fwork, INSERT_VALUES, SCATTER_FORWARD));
1085a11e1b2SBarry Smith     xx = aijcrl->xwork;
1092205254eSKarl Rupp   }
110c02b24c5SBarry Smith 
1119566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1129566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
11381824310SBarry Smith 
11481824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
11581824310SBarry Smith   fortranmultcrl_(&m, &rmax, x, y, icols, acols);
11681824310SBarry Smith #else
11781824310SBarry Smith 
1184be65460SBarry Smith   /* first column */
1192205254eSKarl Rupp   for (j = 0; j < m; j++) y[j] = acols[j] * x[icols[j]];
1204be65460SBarry Smith 
1214be65460SBarry Smith   /* other columns */
122b409243cSBarry Smith   #if defined(PETSC_HAVE_CRAY_VECTOR)
12381824310SBarry Smith     #pragma _CRI preferstream
12481824310SBarry Smith   #endif
1254be65460SBarry Smith   for (i = 1; i < rmax; i++) {
12681824310SBarry Smith     ii = i * m;
127b409243cSBarry Smith   #if defined(PETSC_HAVE_CRAY_VECTOR)
12881824310SBarry Smith     #pragma _CRI prefervector
12981824310SBarry Smith   #endif
1302205254eSKarl Rupp     for (j = 0; j < m; j++) y[j] = y[j] + acols[ii + j] * x[icols[ii + j]];
13181824310SBarry Smith   }
13281824310SBarry Smith #endif
1339566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * aijcrl->nz - m));
1349566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
1359566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
1363ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
13781824310SBarry Smith }
13881824310SBarry Smith 
1395a11e1b2SBarry Smith /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a
1405a11e1b2SBarry Smith  * SeqAIJCRL matrix.  This routine is called by the MatCreate_SeqAIJCRL()
14181824310SBarry Smith  * routine, but can also be used to convert an assembled SeqAIJ matrix
1425a11e1b2SBarry Smith  * into a SeqAIJCRL one. */
MatConvert_SeqAIJ_SeqAIJCRL(Mat A,MatType type,MatReuse reuse,Mat * newmat)143d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJCRL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
144d71ae5a4SJacob Faibussowitsch {
14581824310SBarry Smith   Mat         B = *newmat;
1465a11e1b2SBarry Smith   Mat_AIJCRL *aijcrl;
1474099cc6bSBarry Smith   PetscBool   sametype;
14881824310SBarry Smith 
14981824310SBarry Smith   PetscFunctionBegin;
15048a46eb9SPierre Jolivet   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
1519566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
1523ba16761SJacob Faibussowitsch   if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
15381824310SBarry Smith 
1544dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&aijcrl));
1555a11e1b2SBarry Smith   B->spptr = (void *)aijcrl;
15681824310SBarry Smith 
15717667f90SBarry Smith   /* Set function pointers for methods that we inherit from AIJ but override. */
1585a11e1b2SBarry Smith   B->ops->duplicate   = MatDuplicate_AIJCRL;
1595a11e1b2SBarry Smith   B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL;
1605a11e1b2SBarry Smith   B->ops->destroy     = MatDestroy_SeqAIJCRL;
1615a11e1b2SBarry Smith   B->ops->mult        = MatMult_AIJCRL;
16281824310SBarry Smith 
16381824310SBarry Smith   /* If A has already been assembled, compute the permutation. */
1641baa6e33SBarry Smith   if (A->assembled) PetscCall(MatSeqAIJCRL_create_aijcrl(B));
1659566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJCRL));
16681824310SBarry Smith   *newmat = B;
1673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16881824310SBarry Smith }
16981824310SBarry Smith 
17081824310SBarry Smith /*@C
17111a5261eSBarry Smith   MatCreateSeqAIJCRL - Creates a sparse matrix of type `MATSEQAIJCRL`.
17281824310SBarry Smith 
173d083f849SBarry Smith   Collective
17481824310SBarry Smith 
17581824310SBarry Smith   Input Parameters:
17611a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF`
17781824310SBarry Smith . m    - number of rows
17881824310SBarry Smith . n    - number of columns
17920f4b53cSBarry Smith . nz   - number of nonzeros per row (same for all rows), ignored if `nnz` is given
18081824310SBarry Smith - nnz  - array containing the number of nonzeros in the various rows
1812ef1f0ffSBarry Smith          (possibly different for each row) or `NULL`
18281824310SBarry Smith 
18381824310SBarry Smith   Output Parameter:
18481824310SBarry Smith . A - the matrix
18581824310SBarry Smith 
18681824310SBarry Smith   Level: intermediate
18781824310SBarry Smith 
1882920cce0SJacob Faibussowitsch   Notes:
1892920cce0SJacob Faibussowitsch   This type inherits from `MATSEQAIJ`, but stores some additional information that is used to
1902920cce0SJacob Faibussowitsch   allow better vectorization of the matrix-vector product. At the cost of increased storage,
1912920cce0SJacob Faibussowitsch   the `MATSEQAIJ` formatted matrix can be copied to a format in which pieces of the matrix are
1922920cce0SJacob Faibussowitsch   stored in ELLPACK format, allowing the vectorized matrix multiply routine to use stride-1
1932920cce0SJacob Faibussowitsch   memory accesses.
1942920cce0SJacob Faibussowitsch 
1951cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJPERM()`, `MatSetValues()`
19681824310SBarry Smith @*/
MatCreateSeqAIJCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat * A)197d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqAIJCRL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
198d71ae5a4SJacob Faibussowitsch {
19981824310SBarry Smith   PetscFunctionBegin;
2009566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
2019566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
2029566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQAIJCRL));
2039566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
2043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20581824310SBarry Smith }
20681824310SBarry Smith 
MatCreate_SeqAIJCRL(Mat A)207d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJCRL(Mat A)
208d71ae5a4SJacob Faibussowitsch {
20981824310SBarry Smith   PetscFunctionBegin;
2109566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATSEQAIJ));
2119566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqAIJCRL(A, MATSEQAIJCRL, MAT_INPLACE_MATRIX, &A));
2123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
21381824310SBarry Smith }
214