xref: /petsc/src/mat/impls/sell/seq/sell.h (revision bcd3bd92eda2d5998e2f14c4bbfb33bd936bdc3e)
1 
2 #ifndef __SELL_H
3 #define __SELL_H
4 
5 #include <petsc/private/matimpl.h>
6 #include <petsc/private/hashmapi.h>
7 
8 /*
9  For NVIDIA GPUs each slice should be padded to the boundary of 16 elements for best performance.
10  The optimal memory alignment in device memory is 128 bytes, 64 bytes, 32 bytes for double precision, single precision and half precision.
11 */
12 #if defined(PETSC_HAVE_DEVICE)
13   #define DEVICE_MEM_ALIGN 16
14 #endif
15 
16 /*
17  Struct header for SeqSELL matrix format
18 */
19 #define SEQSELLHEADER(datatype) \
20   PetscBool    roworiented;        /* if true, row-oriented input, default */ \
21   PetscInt     nonew;              /* 1 don't add new nonzeros, -1 generate error on new */ \
22   PetscInt     nounused;           /* -1 generate error on unused space */ \
23   PetscBool    singlemalloc;       /* if true a, i, and j have been obtained with one big malloc */ \
24   PetscInt     maxallocmat;        /* max allocated space for the matrix */ \
25   PetscInt     maxallocrow;        /* max allocated space for each row */ \
26   PetscInt     nz;                 /* actual nonzeros */ \
27   PetscInt     rlenmax;            /* max actual row length, rmax cannot exceed maxallocrow */ \
28   PetscInt    *rlen;               /* actual length of each row (padding zeros excluded) */ \
29   PetscBool    free_rlen;          /* free rlen array ? */ \
30   PetscInt     reallocs;           /* number of mallocs done during MatSetValues() \
31 as more values are set than were prealloced */ \
32   PetscBool    keepnonzeropattern; /* keeps matrix structure same in calls to MatZeroRows()*/ \
33   PetscBool    ignorezeroentries; \
34   PetscBool    free_colidx;     /* free the column indices colidx when the matrix is destroyed */ \
35   PetscBool    free_val;        /* free the numerical values when matrix is destroy */ \
36   PetscInt    *colidx;          /* column index */ \
37   PetscInt    *diag;            /* pointers to diagonal elements */ \
38   PetscInt     nonzerorowcnt;   /* how many rows have nonzero entries */ \
39   PetscBool    free_diag;       /* free diag ? */ \
40   datatype    *val;             /* elements including nonzeros and padding zeros */ \
41   PetscScalar *solve_work;      /* work space used in MatSolve */ \
42   IS           row, col, icol;  /* index sets, used for reorderings */ \
43   PetscBool    pivotinblocks;   /* pivot inside factorization of each diagonal block */ \
44   Mat          parent;          /* set if this matrix was formed with MatDuplicate(...,MAT_SHARE_NONZERO_PATTERN,....);
45 means that this shares some data structures with the parent including diag, ilen, imax, i, j */ \
46   PetscInt    *sliidx;          /* slice index */ \
47   PetscInt     totalslices;     /* total number of slices */ \
48   PetscInt     sliceheight;     /* slice height */ \
49   PetscReal    fillratio;       /* ratio of number of padded zeros over total number of elements  */ \
50   PetscReal    avgslicewidth;   /* average slice width */ \
51   PetscInt     maxslicewidth;   /* maximum slice width */ \
52   PetscReal    varslicesize;    /* variance of slice size */ \
53   PetscInt    *sliperm;         /* slice permutation array, CUDA only */ \
54   PetscInt     totalblocks;     /* total number of blocks, CUDA only */ \
55   PetscInt    *blockidx;        /* block index, CUDA only */ \
56   PetscInt    *block_row_map;   /* starting row of the current block, CUDA only */ \
57   PetscInt     chunksize;       /* chunk size, CUDA only */ \
58   PetscInt     totalchunks;     /* total number of chunks, CUDA only */ \
59   PetscInt    *chunk_slice_map; /* starting slice of the currect chunk, CUDA only */ \
60   PetscInt    *getrowcols;      /* workarray for MatGetRow_SeqSELL */ \
61   PetscScalar *getrowvals       /* workarray for MatGetRow_SeqSELL */
62 
63 typedef struct {
64   SEQSELLHEADER(MatScalar);
65   MatScalar   *saved_values;              /* location for stashing nonzero values of matrix */
66   PetscScalar *idiag, *mdiag, *ssor_work; /* inverse of diagonal entries, diagonal values and workspace for Eisenstat trick */
67   PetscBool    idiagvalid;                /* current idiag[] and mdiag[] are valid */
68   PetscScalar  fshift, omega;             /* last used omega and fshift */
69   ISColoring   coloring;                  /* set with MatADSetColoring() used by MatADSetValues() */
70 } Mat_SeqSELL;
71 
72 /*
73  Frees the arrays from the XSELLPACK matrix type
74  */
75 static inline PetscErrorCode MatSeqXSELLFreeSELL(Mat AA, MatScalar **val, PetscInt **colidx)
76 {
77   Mat_SeqSELL *A = (Mat_SeqSELL *)AA->data;
78   if (A->singlemalloc) {
79     PetscCall(PetscFree2(*val, *colidx));
80   } else {
81     if (A->free_val) PetscCall(PetscFree(*val));
82     if (A->free_colidx) PetscCall(PetscFree(*colidx));
83   }
84   return PETSC_SUCCESS;
85 }
86 
87 #define MatSeqXSELLReallocateSELL(Amat, AM, BS2, WIDTH, SIDX, SH, SID, ROW, COL, COLIDX, VAL, CP, VP, NONEW, datatype, MUL) \
88   do { \
89     if (WIDTH >= (SIDX[SID + 1] - SIDX[SID]) / SH) { \
90       Mat_SeqSELL *Ain = (Mat_SeqSELL *)Amat->data; \
91       /* there is no extra room in row, therefore enlarge 1 slice column */ \
92       PetscInt  new_size = Ain->maxallocmat + SH * MUL, *new_colidx; \
93       datatype *new_val; \
94 \
95       PetscCheck(NONEW != -2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "New nonzero at (%" PetscInt_FMT ",%" PetscInt_FMT ") caused a malloc\nUse MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE) to turn off this check", ROW, COL); \
96       /* malloc new storage space */ \
97       PetscCall(PetscMalloc2(BS2 *new_size, &new_val, BS2 *new_size, &new_colidx)); \
98 \
99       /* copy over old data into new slots by two steps: one step for data before the current slice and the other for the rest */ \
100       PetscCall(PetscArraycpy(new_val, VAL, SIDX[SID + 1])); \
101       PetscCall(PetscArraycpy(new_colidx, COLIDX, SIDX[SID + 1])); \
102       PetscCall(PetscArraycpy(new_val + SIDX[SID + 1] + SH * MUL, VAL + SIDX[SID + 1], SIDX[Ain->totalslices] - SIDX[SID + 1])); \
103       PetscCall(PetscArraycpy(new_colidx + SIDX[SID + 1] + SH * MUL, COLIDX + SIDX[SID + 1], SIDX[Ain->totalslices] - SIDX[SID + 1])); \
104       /* update slice_idx */ \
105       for (ii = SID + 1; ii <= Ain->totalslices; ii++) { SIDX[ii] += SH * MUL; } \
106       /* update pointers. Notice that they point to the FIRST postion of the row */ \
107       CP = new_colidx + SIDX[SID] + (ROW % SH); \
108       VP = new_val + SIDX[SID] + (ROW % SH); \
109       /* free up old matrix storage */ \
110       PetscCall(MatSeqXSELLFreeSELL(A, &Ain->val, &Ain->colidx)); \
111       Ain->val          = (MatScalar *)new_val; \
112       Ain->colidx       = new_colidx; \
113       Ain->singlemalloc = PETSC_TRUE; \
114       Ain->maxallocmat  = new_size; \
115       Ain->reallocs++; \
116       if (WIDTH >= Ain->maxallocrow) Ain->maxallocrow += MUL; \
117       if (WIDTH >= Ain->rlenmax) Ain->rlenmax++; \
118     } \
119   } while (0)
120 
121 #define MatSetValue_SeqSELL_Private(A, row, col, value, addv, orow, ocol, cp, vp, lastcol, low, high) \
122   do { \
123     Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; \
124     found          = PETSC_FALSE; \
125     if (col <= lastcol) low = 0; \
126     else high = a->rlen[row]; \
127     lastcol = col; \
128     while (high - low > 5) { \
129       t = (low + high) / 2; \
130       if (*(cp + a->sliceheight * t) > col) high = t; \
131       else low = t; \
132     } \
133     for (_i = low; _i < high; _i++) { \
134       if (*(cp + a->sliceheight * _i) > col) break; \
135       if (*(cp + a->sliceheight * _i) == col) { \
136         if (addv == ADD_VALUES) *(vp + a->sliceheight * _i) += value; \
137         else *(vp + a->sliceheight * _i) = value; \
138         found = PETSC_TRUE; \
139         break; \
140       } \
141     } \
142     if (!found) { \
143       PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
144       if (a->nonew != 1 && !(value == 0.0 && a->ignorezeroentries) && a->rlen[row] >= (a->sliidx[row / a->sliceheight + 1] - a->sliidx[row / a->sliceheight]) / a->sliceheight) { \
145         /* there is no extra room in row, therefore enlarge 1 slice column */ \
146         if (a->maxallocmat < a->sliidx[a->totalslices] + a->sliceheight) { \
147           /* allocates a larger array for the XSELL matrix types; only extend the current slice by one more column. */ \
148           PetscInt   new_size = a->maxallocmat + a->sliceheight, *new_colidx; \
149           MatScalar *new_val; \
150           PetscCheck(a->nonew != -2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "New nonzero at (%" PetscInt_FMT ",%" PetscInt_FMT ") caused a malloc\nUse MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE) to turn off this check", orow, ocol); \
151           /* malloc new storage space */ \
152           PetscCall(PetscMalloc2(new_size, &new_val, new_size, &new_colidx)); \
153           /* copy over old data into new slots by two steps: one step for data before the current slice and the other for the rest */ \
154           PetscCall(PetscArraycpy(new_val, a->val, a->sliidx[row / a->sliceheight + 1])); \
155           PetscCall(PetscArraycpy(new_colidx, a->colidx, a->sliidx[row / a->sliceheight + 1])); \
156           PetscCall(PetscArraycpy(new_val + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->val + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \
157           PetscCall(PetscArraycpy(new_colidx + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->colidx + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \
158           /* update pointers. Notice that they point to the FIRST postion of the row */ \
159           cp = new_colidx + a->sliidx[row / a->sliceheight] + (row % a->sliceheight); \
160           vp = new_val + a->sliidx[row / a->sliceheight] + (row % a->sliceheight); \
161           /* free up old matrix storage */ \
162           PetscCall(MatSeqXSELLFreeSELL(A, &a->val, &a->colidx)); \
163           a->val          = (MatScalar *)new_val; \
164           a->colidx       = new_colidx; \
165           a->singlemalloc = PETSC_TRUE; \
166           a->maxallocmat  = new_size; \
167           a->reallocs++; \
168         } else { \
169           /* no need to reallocate, just shift the following slices to create space for the added slice column */ \
170           PetscCall(PetscArraymove(a->val + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->val + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \
171           PetscCall(PetscArraymove(a->colidx + a->sliidx[row / a->sliceheight + 1] + a->sliceheight, a->colidx + a->sliidx[row / a->sliceheight + 1], a->sliidx[a->totalslices] - a->sliidx[row / a->sliceheight + 1])); \
172         } \
173         /* update slice_idx */ \
174         for (ii = row / a->sliceheight + 1; ii <= a->totalslices; ii++) a->sliidx[ii] += a->sliceheight; \
175         if (a->rlen[row] >= a->maxallocrow) a->maxallocrow++; \
176         if (a->rlen[row] >= a->rlenmax) a->rlenmax++; \
177       } \
178       /* shift up all the later entries in this row */ \
179       for (ii = a->rlen[row] - 1; ii >= _i; ii--) { \
180         *(cp + a->sliceheight * (ii + 1)) = *(cp + a->sliceheight * ii); \
181         *(vp + a->sliceheight * (ii + 1)) = *(vp + a->sliceheight * ii); \
182       } \
183       *(cp + a->sliceheight * _i) = col; \
184       *(vp + a->sliceheight * _i) = value; \
185       a->nz++; \
186       a->rlen[row]++; \
187       A->nonzerostate++; \
188       low = _i + 1; \
189       high++; \
190     } \
191   } while (0)
192 
193 PETSC_INTERN PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat, PetscInt, const PetscInt[]);
194 PETSC_INTERN PetscErrorCode MatMult_SeqSELL(Mat, Vec, Vec);
195 PETSC_INTERN PetscErrorCode MatMultAdd_SeqSELL(Mat, Vec, Vec, Vec);
196 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqSELL(Mat, Vec, Vec);
197 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat, Vec, Vec, Vec);
198 PETSC_INTERN PetscErrorCode MatMissingDiagonal_SeqSELL(Mat, PetscBool *, PetscInt *);
199 PETSC_INTERN PetscErrorCode MatMarkDiagonal_SeqSELL(Mat);
200 PETSC_INTERN PetscErrorCode MatInvertDiagonal_SeqSELL(Mat, PetscScalar, PetscScalar);
201 PETSC_INTERN PetscErrorCode MatZeroEntries_SeqSELL(Mat);
202 PETSC_INTERN PetscErrorCode MatDestroy_SeqSELL(Mat);
203 PETSC_INTERN PetscErrorCode MatSetOption_SeqSELL(Mat, MatOption, PetscBool);
204 PETSC_INTERN PetscErrorCode MatGetDiagonal_SeqSELL(Mat, Vec v);
205 PETSC_INTERN PetscErrorCode MatGetValues_SeqSELL(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], PetscScalar[]);
206 PETSC_INTERN PetscErrorCode MatView_SeqSELL(Mat, PetscViewer);
207 PETSC_INTERN PetscErrorCode MatAssemblyEnd_SeqSELL(Mat, MatAssemblyType);
208 PETSC_INTERN PetscErrorCode MatGetInfo_SeqSELL(Mat, MatInfoType, MatInfo *);
209 PETSC_INTERN PetscErrorCode MatSetValues_SeqSELL(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
210 PETSC_INTERN PetscErrorCode MatCopy_SeqSELL(Mat, Mat, MatStructure);
211 PETSC_INTERN PetscErrorCode MatSetUp_SeqSELL(Mat);
212 PETSC_INTERN PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat, PetscScalar *[]);
213 PETSC_INTERN PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat, PetscScalar *[]);
214 PETSC_INTERN PetscErrorCode MatShift_SeqSELL(Mat, PetscScalar);
215 PETSC_INTERN PetscErrorCode MatSOR_SeqSELL(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec);
216 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat);
217 PETSC_INTERN PetscErrorCode MatDuplicate_SeqSELL(Mat, MatDuplicateOption, Mat *);
218 PETSC_INTERN PetscErrorCode MatEqual_SeqSELL(Mat, Mat, PetscBool *);
219 PETSC_INTERN PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat);
220 PETSC_INTERN PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat, MatType, MatReuse, Mat *);
221 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat, MatType, MatReuse, Mat *);
222 PETSC_INTERN PetscErrorCode MatFDColoringCreate_SeqSELL(Mat, ISColoring, MatFDColoring);
223 PETSC_INTERN PetscErrorCode MatFDColoringSetUp_SeqSELL(Mat, ISColoring, MatFDColoring);
224 PETSC_INTERN PetscErrorCode MatGetColumnIJ_SeqSELL_Color(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscInt *[], PetscBool *);
225 PETSC_INTERN PetscErrorCode MatRestoreColumnIJ_SeqSELL_Color(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscInt *[], PetscBool *);
226 PETSC_INTERN PetscErrorCode MatConjugate_SeqSELL(Mat A);
227 PETSC_INTERN PetscErrorCode MatScale_SeqSELL(Mat, PetscScalar);
228 PETSC_INTERN PetscErrorCode MatDiagonalScale_SeqSELL(Mat, Vec, Vec);
229 #endif
230