xref: /petsc/include/petsc/private/matimpl.h (revision 2daea058f1fa0b4b5a893c784be52f4813ced5f1) !
1 #pragma once
2 
3 #include <petscmat.h>
4 #include <petscmatcoarsen.h>
5 #include <petsc/private/petscimpl.h>
6 
7 PETSC_EXTERN PetscBool      MatRegisterAllCalled;
8 PETSC_EXTERN PetscBool      MatSeqAIJRegisterAllCalled;
9 PETSC_EXTERN PetscBool      MatOrderingRegisterAllCalled;
10 PETSC_EXTERN PetscBool      MatColoringRegisterAllCalled;
11 PETSC_EXTERN PetscBool      MatPartitioningRegisterAllCalled;
12 PETSC_EXTERN PetscBool      MatCoarsenRegisterAllCalled;
13 PETSC_EXTERN PetscErrorCode MatRegisterAll(void);
14 PETSC_EXTERN PetscErrorCode MatOrderingRegisterAll(void);
15 PETSC_EXTERN PetscErrorCode MatColoringRegisterAll(void);
16 PETSC_EXTERN PetscErrorCode MatPartitioningRegisterAll(void);
17 PETSC_EXTERN PetscErrorCode MatCoarsenRegisterAll(void);
18 PETSC_EXTERN PetscErrorCode MatSeqAIJRegisterAll(void);
19 
20 /* Gets the root type of the input matrix's type (e.g., MATAIJ for MATSEQAIJ) */
21 PETSC_EXTERN PetscErrorCode MatGetRootType_Private(Mat, MatType *);
22 
23 /* Gets the MPI type corresponding to the input matrix's type (e.g., MATMPIAIJ for MATSEQAIJ) */
24 PETSC_INTERN PetscErrorCode MatGetMPIMatType_Private(Mat, MatType *);
25 
26 /*
27   This file defines the parts of the matrix data structure that are
28   shared by all matrix types.
29 */
30 
31 /*
32     If you add entries here also add them to the MATOP enum
33     in include/petscmat.h
34 */
35 typedef struct _MatOps *MatOps;
36 struct _MatOps {
37   /* 0*/
38   PetscErrorCode (*setvalues)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
39   PetscErrorCode (*getrow)(Mat, PetscInt, PetscInt *, PetscInt *[], PetscScalar *[]);
40   PetscErrorCode (*restorerow)(Mat, PetscInt, PetscInt *, PetscInt *[], PetscScalar *[]);
41   PetscErrorCode (*mult)(Mat, Vec, Vec);
42   PetscErrorCode (*multadd)(Mat, Vec, Vec, Vec);
43   /* 5*/
44   PetscErrorCode (*multtranspose)(Mat, Vec, Vec);
45   PetscErrorCode (*multtransposeadd)(Mat, Vec, Vec, Vec);
46   PetscErrorCode (*solve)(Mat, Vec, Vec);
47   PetscErrorCode (*solveadd)(Mat, Vec, Vec, Vec);
48   PetscErrorCode (*solvetranspose)(Mat, Vec, Vec);
49   /*10*/
50   PetscErrorCode (*solvetransposeadd)(Mat, Vec, Vec, Vec);
51   PetscErrorCode (*lufactor)(Mat, IS, IS, const MatFactorInfo *);
52   PetscErrorCode (*choleskyfactor)(Mat, IS, const MatFactorInfo *);
53   PetscErrorCode (*sor)(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec);
54   PetscErrorCode (*transpose)(Mat, MatReuse, Mat *);
55   /*15*/
56   PetscErrorCode (*getinfo)(Mat, MatInfoType, MatInfo *);
57   PetscErrorCode (*equal)(Mat, Mat, PetscBool *);
58   PetscErrorCode (*getdiagonal)(Mat, Vec);
59   PetscErrorCode (*diagonalscale)(Mat, Vec, Vec);
60   PetscErrorCode (*norm)(Mat, NormType, PetscReal *);
61   /*20*/
62   PetscErrorCode (*assemblybegin)(Mat, MatAssemblyType);
63   PetscErrorCode (*assemblyend)(Mat, MatAssemblyType);
64   PetscErrorCode (*setoption)(Mat, MatOption, PetscBool);
65   PetscErrorCode (*zeroentries)(Mat);
66   /*24*/
67   PetscErrorCode (*zerorows)(Mat, PetscInt, const PetscInt[], PetscScalar, Vec, Vec);
68   PetscErrorCode (*lufactorsymbolic)(Mat, Mat, IS, IS, const MatFactorInfo *);
69   PetscErrorCode (*lufactornumeric)(Mat, Mat, const MatFactorInfo *);
70   PetscErrorCode (*choleskyfactorsymbolic)(Mat, Mat, IS, const MatFactorInfo *);
71   PetscErrorCode (*choleskyfactornumeric)(Mat, Mat, const MatFactorInfo *);
72   /*29*/
73   PetscErrorCode (*setup)(Mat);
74   PetscErrorCode (*ilufactorsymbolic)(Mat, Mat, IS, IS, const MatFactorInfo *);
75   PetscErrorCode (*iccfactorsymbolic)(Mat, Mat, IS, const MatFactorInfo *);
76   PetscErrorCode (*getdiagonalblock)(Mat, Mat *);
77   PetscErrorCode (*setinf)(Mat);
78   /*34*/
79   PetscErrorCode (*duplicate)(Mat, MatDuplicateOption, Mat *);
80   PetscErrorCode (*forwardsolve)(Mat, Vec, Vec);
81   PetscErrorCode (*backwardsolve)(Mat, Vec, Vec);
82   PetscErrorCode (*ilufactor)(Mat, IS, IS, const MatFactorInfo *);
83   PetscErrorCode (*iccfactor)(Mat, IS, const MatFactorInfo *);
84   /*39*/
85   PetscErrorCode (*axpy)(Mat, PetscScalar, Mat, MatStructure);
86   PetscErrorCode (*createsubmatrices)(Mat, PetscInt, const IS[], const IS[], MatReuse, Mat *[]);
87   PetscErrorCode (*increaseoverlap)(Mat, PetscInt, IS[], PetscInt);
88   PetscErrorCode (*getvalues)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], PetscScalar[]);
89   PetscErrorCode (*copy)(Mat, Mat, MatStructure);
90   /*44*/
91   PetscErrorCode (*getrowmax)(Mat, Vec, PetscInt[]);
92   PetscErrorCode (*scale)(Mat, PetscScalar);
93   PetscErrorCode (*shift)(Mat, PetscScalar);
94   PetscErrorCode (*diagonalset)(Mat, Vec, InsertMode);
95   PetscErrorCode (*zerorowscolumns)(Mat, PetscInt, const PetscInt[], PetscScalar, Vec, Vec);
96   /*49*/
97   PetscErrorCode (*setrandom)(Mat, PetscRandom);
98   PetscErrorCode (*getrowij)(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscBool *);
99   PetscErrorCode (*restorerowij)(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscBool *);
100   PetscErrorCode (*getcolumnij)(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscBool *);
101   PetscErrorCode (*restorecolumnij)(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscBool *);
102   /*54*/
103   PetscErrorCode (*fdcoloringcreate)(Mat, ISColoring, MatFDColoring);
104   PetscErrorCode (*coloringpatch)(Mat, PetscInt, PetscInt, ISColoringValue[], ISColoring *);
105   PetscErrorCode (*setunfactored)(Mat);
106   PetscErrorCode (*permute)(Mat, IS, IS, Mat *);
107   PetscErrorCode (*setvaluesblocked)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
108   /*59*/
109   PetscErrorCode (*createsubmatrix)(Mat, IS, IS, MatReuse, Mat *);
110   PetscErrorCode (*destroy)(Mat);
111   PetscErrorCode (*view)(Mat, PetscViewer);
112   PetscErrorCode (*convertfrom)(Mat, MatType, MatReuse, Mat *);
113   PetscErrorCode (*matmatmultsymbolic)(Mat, Mat, Mat, PetscReal, Mat);
114   /*64*/
115   PetscErrorCode (*matmatmultnumeric)(Mat, Mat, Mat, Mat);
116   PetscErrorCode (*setlocaltoglobalmapping)(Mat, ISLocalToGlobalMapping, ISLocalToGlobalMapping);
117   PetscErrorCode (*setvalueslocal)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
118   PetscErrorCode (*zerorowslocal)(Mat, PetscInt, const PetscInt[], PetscScalar, Vec, Vec);
119   PetscErrorCode (*getrowmaxabs)(Mat, Vec, PetscInt[]);
120   /*69*/
121   PetscErrorCode (*getrowminabs)(Mat, Vec, PetscInt[]);
122   PetscErrorCode (*convert)(Mat, MatType, MatReuse, Mat *);
123   PetscErrorCode (*hasoperation)(Mat, MatOperation, PetscBool *);
124   PetscErrorCode (*fdcoloringapply)(Mat, MatFDColoring, Vec, void *);
125   PetscErrorCode (*setfromoptions)(Mat, PetscOptionItems);
126   /*74*/
127   PetscErrorCode (*findzerodiagonals)(Mat, IS *);
128   PetscErrorCode (*mults)(Mat, Vecs, Vecs);
129   PetscErrorCode (*solves)(Mat, Vecs, Vecs);
130   PetscErrorCode (*getinertia)(Mat, PetscInt *, PetscInt *, PetscInt *);
131   PetscErrorCode (*load)(Mat, PetscViewer);
132   /*79*/
133   PetscErrorCode (*issymmetric)(Mat, PetscReal, PetscBool *);
134   PetscErrorCode (*ishermitian)(Mat, PetscReal, PetscBool *);
135   PetscErrorCode (*isstructurallysymmetric)(Mat, PetscBool *);
136   PetscErrorCode (*setvaluesblockedlocal)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
137   PetscErrorCode (*getvecs)(Mat, Vec *, Vec *);
138   /*84*/
139   PetscErrorCode (*matmultsymbolic)(Mat, Mat, PetscReal, Mat);
140   PetscErrorCode (*matmultnumeric)(Mat, Mat, Mat);
141   PetscErrorCode (*ptapnumeric)(Mat, Mat, Mat); /* double dispatch wrapper routine */
142   PetscErrorCode (*mattransposemultsymbolic)(Mat, Mat, PetscReal, Mat);
143   PetscErrorCode (*mattransposemultnumeric)(Mat, Mat, Mat);
144   /*89*/
145   PetscErrorCode (*bindtocpu)(Mat, PetscBool);
146   PetscErrorCode (*productsetfromoptions)(Mat);
147   PetscErrorCode (*productsymbolic)(Mat);
148   PetscErrorCode (*productnumeric)(Mat);
149   PetscErrorCode (*conjugate)(Mat); /* complex conjugate */
150   /*94*/
151   PetscErrorCode (*viewnative)(Mat, PetscViewer);
152   PetscErrorCode (*setvaluesrow)(Mat, PetscInt, const PetscScalar[]);
153   PetscErrorCode (*realpart)(Mat);
154   PetscErrorCode (*imaginarypart)(Mat);
155   PetscErrorCode (*getrowuppertriangular)(Mat);
156   /*99*/
157   PetscErrorCode (*restorerowuppertriangular)(Mat);
158   PetscErrorCode (*matsolve)(Mat, Mat, Mat);
159   PetscErrorCode (*matsolvetranspose)(Mat, Mat, Mat);
160   PetscErrorCode (*getrowmin)(Mat, Vec, PetscInt[]);
161   PetscErrorCode (*getcolumnvector)(Mat, Vec, PetscInt);
162   /*104*/
163   PetscErrorCode (*getseqnonzerostructure)(Mat, Mat *);
164   PetscErrorCode (*create)(Mat);
165   PetscErrorCode (*getghosts)(Mat, PetscInt *, const PetscInt *[]);
166   PetscErrorCode (*getlocalsubmatrix)(Mat, IS, IS, Mat *);
167   PetscErrorCode (*restorelocalsubmatrix)(Mat, IS, IS, Mat *);
168   /*109*/
169   PetscErrorCode (*multdiagonalblock)(Mat, Vec, Vec);
170   PetscErrorCode (*hermitiantranspose)(Mat, MatReuse, Mat *);
171   PetscErrorCode (*multhermitiantranspose)(Mat, Vec, Vec);
172   PetscErrorCode (*multhermitiantransposeadd)(Mat, Vec, Vec, Vec);
173   PetscErrorCode (*getmultiprocblock)(Mat, MPI_Comm, MatReuse, Mat *);
174   /*114*/
175   PetscErrorCode (*findnonzerorows)(Mat, IS *);
176   PetscErrorCode (*getcolumnreductions)(Mat, PetscInt, PetscReal *);
177   PetscErrorCode (*invertblockdiagonal)(Mat, const PetscScalar **);
178   PetscErrorCode (*invertvariableblockdiagonal)(Mat, PetscInt, const PetscInt *, PetscScalar *);
179   PetscErrorCode (*createsubmatricesmpi)(Mat, PetscInt, const IS[], const IS[], MatReuse, Mat **);
180   /*119*/
181   PetscErrorCode (*setvaluesbatch)(Mat, PetscInt, PetscInt, PetscInt *, const PetscScalar *);
182   PetscErrorCode (*transposematmultsymbolic)(Mat, Mat, PetscReal, Mat);
183   PetscErrorCode (*transposematmultnumeric)(Mat, Mat, Mat);
184   PetscErrorCode (*transposecoloringcreate)(Mat, ISColoring, MatTransposeColoring);
185   PetscErrorCode (*transcoloringapplysptoden)(MatTransposeColoring, Mat, Mat);
186   /*124*/
187   PetscErrorCode (*transcoloringapplydentosp)(MatTransposeColoring, Mat, Mat);
188   PetscErrorCode (*rartnumeric)(Mat, Mat, Mat); /* double dispatch wrapper routine */
189   PetscErrorCode (*setblocksizes)(Mat, PetscInt, PetscInt);
190   PetscErrorCode (*residual)(Mat, Vec, Vec, Vec);
191   PetscErrorCode (*fdcoloringsetup)(Mat, ISColoring, MatFDColoring);
192   /*129*/
193   PetscErrorCode (*findoffblockdiagonalentries)(Mat, IS *);
194   PetscErrorCode (*creatempimatconcatenateseqmat)(MPI_Comm, Mat, PetscInt, MatReuse, Mat *);
195   PetscErrorCode (*destroysubmatrices)(PetscInt, Mat *[]);
196   PetscErrorCode (*mattransposesolve)(Mat, Mat, Mat);
197   PetscErrorCode (*getvalueslocal)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], PetscScalar[]);
198   /*134*/
199   PetscErrorCode (*creategraph)(Mat, PetscBool, PetscBool, PetscReal, PetscInt, PetscInt[], Mat *);
200   PetscErrorCode (*transposesymbolic)(Mat, Mat *);
201   PetscErrorCode (*eliminatezeros)(Mat, PetscBool);
202   PetscErrorCode (*getrowsumabs)(Mat, Vec);
203   PetscErrorCode (*getfactor)(Mat, MatSolverType, MatFactorType, Mat *);
204   /*139*/
205   PetscErrorCode (*getblockdiagonal)(Mat, Mat *);  // NOTE: the caller of get{block, vblock}diagonal owns the returned matrix;
206   PetscErrorCode (*getvblockdiagonal)(Mat, Mat *); // they must destroy it after use
207   PetscErrorCode (*copyhashtoxaij)(Mat, Mat);
208   PetscErrorCode (*getcurrentmemtype)(Mat, PetscMemType *);
209   PetscErrorCode (*zerorowscolumnslocal)(Mat, PetscInt, const PetscInt[], PetscScalar, Vec, Vec);
210 };
211 /*
212     If you add MatOps entries above also add them to the MATOP enum
213     in include/petscmat.h
214 */
215 
216 #include <petscsys.h>
217 
218 typedef struct _p_MatRootName *MatRootName;
219 struct _p_MatRootName {
220   char       *rname, *sname, *mname;
221   MatRootName next;
222 };
223 
224 PETSC_EXTERN MatRootName MatRootNameList;
225 
226 /*
227    Utility private matrix routines used outside Mat
228 */
229 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode MatFindNonzeroRowsOrCols_Basic(Mat, PetscBool, PetscReal, IS *);
230 PETSC_EXTERN PetscErrorCode                MatShellGetScalingShifts(Mat, PetscScalar *, PetscScalar *, Vec *, Vec *, Vec *, Mat *, IS *, IS *);
231 
232 #define MAT_SHELL_NOT_ALLOWED (void *)-1
233 
234 /*
235    Utility private matrix routines
236 */
237 PETSC_INTERN PetscErrorCode MatConvert_Basic(Mat, MatType, MatReuse, Mat *);
238 PETSC_INTERN PetscErrorCode MatConvert_Shell(Mat, MatType, MatReuse, Mat *);
239 PETSC_INTERN PetscErrorCode MatConvertFrom_Shell(Mat, MatType, MatReuse, Mat *);
240 PETSC_INTERN PetscErrorCode MatShellSetContext_Immutable(Mat, void *);
241 PETSC_INTERN PetscErrorCode MatShellSetContextDestroy_Immutable(Mat, PetscCtxDestroyFn *);
242 PETSC_INTERN PetscErrorCode MatShellSetManageScalingShifts_Immutable(Mat);
243 PETSC_INTERN PetscErrorCode MatCopy_Basic(Mat, Mat, MatStructure);
244 PETSC_INTERN PetscErrorCode MatDiagonalSet_Default(Mat, Vec, InsertMode);
245 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
246 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
247 #endif
248 PETSC_INTERN PetscErrorCode MatSetPreallocationCOO_Basic(Mat, PetscCount, PetscInt[], PetscInt[]);
249 PETSC_INTERN PetscErrorCode MatSetValuesCOO_Basic(Mat, const PetscScalar[], InsertMode);
250 
251 /* Scattering of dense matrices with strided PetscSF */
252 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode MatDenseScatter_Private(PetscSF, Mat, Mat, InsertMode, ScatterMode);
253 
254 /* This can be moved to the public header after implementing some missing MatProducts */
255 PETSC_INTERN PetscErrorCode MatCreateFromISLocalToGlobalMapping(ISLocalToGlobalMapping, Mat, PetscBool, PetscBool, MatType, Mat *);
256 
257 /* these callbacks rely on the old matrix function pointers for
258    matmat operations. They are unsafe, and should be removed.
259    However, the amount of work needed to clean up all the
260    implementations is not negligible */
261 PETSC_INTERN PetscErrorCode MatProductSymbolic_AB(Mat);
262 PETSC_INTERN PetscErrorCode MatProductNumeric_AB(Mat);
263 PETSC_INTERN PetscErrorCode MatProductSymbolic_AtB(Mat);
264 PETSC_INTERN PetscErrorCode MatProductNumeric_AtB(Mat);
265 PETSC_INTERN PetscErrorCode MatProductSymbolic_ABt(Mat);
266 PETSC_INTERN PetscErrorCode MatProductNumeric_ABt(Mat);
267 PETSC_INTERN PetscErrorCode MatProductNumeric_PtAP(Mat);
268 PETSC_INTERN PetscErrorCode MatProductNumeric_RARt(Mat);
269 PETSC_INTERN PetscErrorCode MatProductSymbolic_ABC(Mat);
270 PETSC_INTERN PetscErrorCode MatProductNumeric_ABC(Mat);
271 
272 PETSC_INTERN PetscErrorCode MatProductCreate_Private(Mat, Mat, Mat, Mat);
273 /* this callback handles all the different triple products and
274    does not rely on the function pointers; used by cuSPARSE/hipSPARSE and KOKKOS-KERNELS */
275 PETSC_INTERN PetscErrorCode MatProductSymbolic_ABC_Basic(Mat);
276 
277 /* CreateGraph is common to AIJ seq and mpi */
278 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat, PetscBool, PetscBool, PetscReal, PetscInt, PetscInt[], Mat *);
279 
280 #if defined(PETSC_CLANG_STATIC_ANALYZER)
281 template <typename Tm>
282 extern void MatCheckPreallocated(Tm, int);
283 template <typename Tm>
284 extern void MatCheckProduct(Tm, int);
285 #else /* PETSC_CLANG_STATIC_ANALYZER */
286   #define MatCheckPreallocated(A, arg) \
287     do { \
288       if (!(A)->preallocated) PetscCall(MatSetUp(A)); \
289     } while (0)
290 
291   #if defined(PETSC_USE_DEBUG)
292     #define MatCheckProduct(A, arg) \
293       do { \
294         PetscCheck((A)->product, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Argument %d \"%s\" is not a matrix obtained from MatProductCreate()", (arg), #A); \
295       } while (0)
296   #else
297     #define MatCheckProduct(A, arg) \
298       do { \
299       } while (0)
300   #endif
301 #endif /* PETSC_CLANG_STATIC_ANALYZER */
302 
303 /*
304   The stash is used to temporarily store inserted matrix values that
305   belong to another processor. During the assembly phase the stashed
306   values are moved to the correct processor and
307 */
308 
309 typedef struct _MatStashSpace *PetscMatStashSpace;
310 
311 struct _MatStashSpace {
312   PetscMatStashSpace next;
313   PetscScalar       *space_head, *val;
314   PetscInt          *idx, *idy;
315   PetscInt           total_space_size;
316   PetscInt           local_used;
317   PetscInt           local_remaining;
318 };
319 
320 PETSC_EXTERN PetscErrorCode PetscMatStashSpaceGet(PetscInt, PetscInt, PetscMatStashSpace *);
321 PETSC_EXTERN PetscErrorCode PetscMatStashSpaceContiguous(PetscInt, PetscMatStashSpace *, PetscScalar *, PetscInt *, PetscInt *);
322 PETSC_EXTERN PetscErrorCode PetscMatStashSpaceDestroy(PetscMatStashSpace *);
323 
324 typedef struct {
325   PetscInt count;
326 } MatStashHeader;
327 
328 typedef struct {
329   void    *buffer; /* Of type blocktype, dynamically constructed  */
330   PetscInt count;
331   char     pending;
332 } MatStashFrame;
333 
334 typedef struct _MatStash MatStash;
335 struct _MatStash {
336   PetscInt           nmax;              /* maximum stash size */
337   PetscInt           umax;              /* user specified max-size */
338   PetscInt           oldnmax;           /* the nmax value used previously */
339   PetscInt           n;                 /* stash size */
340   PetscInt           bs;                /* block size of the stash */
341   PetscInt           reallocs;          /* preserve the no of mallocs invoked */
342   PetscMatStashSpace space_head, space; /* linked list to hold stashed global row/column numbers and matrix values */
343 
344   PetscErrorCode (*ScatterBegin)(Mat, MatStash *, PetscInt *);
345   PetscErrorCode (*ScatterGetMesg)(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
346   PetscErrorCode (*ScatterEnd)(MatStash *);
347   PetscErrorCode (*ScatterDestroy)(MatStash *);
348 
349   /* The following variables are used for communication */
350   MPI_Comm      comm;
351   PetscMPIInt   size, rank;
352   PetscMPIInt   tag1, tag2;
353   MPI_Request  *send_waits;     /* array of send requests */
354   MPI_Request  *recv_waits;     /* array of receive requests */
355   MPI_Status   *send_status;    /* array of send status */
356   PetscMPIInt   nsends, nrecvs; /* numbers of sends and receives */
357   PetscScalar  *svalues;        /* sending data */
358   PetscInt     *sindices;
359   PetscScalar **rvalues;    /* receiving data (values) */
360   PetscInt    **rindices;   /* receiving data (indices) */
361   PetscMPIInt   nprocessed; /* number of messages already processed */
362   PetscMPIInt  *flg_v;      /* indicates what messages have arrived so far and from whom */
363   PetscBool     reproduce;
364   PetscMPIInt   reproduce_count;
365 
366   /* The following variables are used for BTS communication */
367   PetscBool       first_assembly_done; /* Is the first time matrix assembly done? */
368   PetscBool       use_status;          /* Use MPI_Status to determine number of items in each message */
369   PetscMPIInt     nsendranks;
370   PetscMPIInt     nrecvranks;
371   PetscMPIInt    *sendranks;
372   PetscMPIInt    *recvranks;
373   MatStashHeader *sendhdr, *recvhdr;
374   MatStashFrame  *sendframes; /* pointers to the main messages */
375   MatStashFrame  *recvframes;
376   MatStashFrame  *recvframe_active;
377   PetscInt        recvframe_i;     /* index of block within active frame */
378   PetscInt        recvframe_count; /* Count actually sent for current frame */
379   PetscMPIInt     recvcount;       /* Number of receives processed so far */
380   PetscMPIInt    *some_indices;    /* From last call to MPI_Waitsome */
381   MPI_Status     *some_statuses;   /* Statuses from last call to MPI_Waitsome */
382   PetscMPIInt     some_count;      /* Number of requests completed in last call to MPI_Waitsome */
383   PetscMPIInt     some_i;          /* Index of request currently being processed */
384   MPI_Request    *sendreqs;
385   MPI_Request    *recvreqs;
386   PetscSegBuffer  segsendblocks;
387   PetscSegBuffer  segrecvframe;
388   PetscSegBuffer  segrecvblocks;
389   MPI_Datatype    blocktype;
390   size_t          blocktype_size;
391   InsertMode     *insertmode; /* Pointer to check mat->insertmode and set upon message arrival in case no local values have been set. */
392 };
393 
394 #if !defined(PETSC_HAVE_MPIUNI)
395 PETSC_INTERN PetscErrorCode MatStashScatterDestroy_BTS(MatStash *);
396 #endif
397 PETSC_INTERN PetscErrorCode MatStashCreate_Private(MPI_Comm, PetscInt, MatStash *);
398 PETSC_INTERN PetscErrorCode MatStashDestroy_Private(MatStash *);
399 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Private(MatStash *);
400 PETSC_INTERN PetscErrorCode MatStashSetInitialSize_Private(MatStash *, PetscInt);
401 PETSC_INTERN PetscErrorCode MatStashGetInfo_Private(MatStash *, PetscInt *, PetscInt *);
402 PETSC_INTERN PetscErrorCode MatStashValuesRow_Private(MatStash *, PetscInt, PetscInt, const PetscInt[], const PetscScalar[], PetscBool);
403 PETSC_INTERN PetscErrorCode MatStashValuesCol_Private(MatStash *, PetscInt, PetscInt, const PetscInt[], const PetscScalar[], PetscInt, PetscBool);
404 PETSC_INTERN PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *, PetscInt, PetscInt, const PetscInt[], const PetscScalar[], PetscInt, PetscInt, PetscInt);
405 PETSC_INTERN PetscErrorCode MatStashValuesColBlocked_Private(MatStash *, PetscInt, PetscInt, const PetscInt[], const PetscScalar[], PetscInt, PetscInt, PetscInt);
406 PETSC_INTERN PetscErrorCode MatStashScatterBegin_Private(Mat, MatStash *, PetscInt *);
407 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Private(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
408 PETSC_INTERN PetscErrorCode MatGetInfo_External(Mat, MatInfoType, MatInfo *);
409 
410 typedef struct {
411   PetscInt  dim;
412   PetscInt  dims[4];
413   PetscInt  starts[4];
414   PetscBool noc; /* this is a single component problem, hence user will not set MatStencil.c */
415 } MatStencilInfo;
416 
417 /* Info about using compressed row format */
418 typedef struct {
419   PetscBool use;    /* indicates compressed rows have been checked and will be used */
420   PetscInt  nrows;  /* number of non-zero rows */
421   PetscInt *i;      /* compressed row pointer  */
422   PetscInt *rindex; /* compressed row index               */
423 } Mat_CompressedRow;
424 PETSC_EXTERN PetscErrorCode MatCheckCompressedRow(Mat, PetscInt, Mat_CompressedRow *, PetscInt *, PetscInt, PetscReal);
425 
426 typedef struct { /* used by MatCreateRedundantMatrix() for reusing matredundant */
427   PetscInt     nzlocal, nsends, nrecvs;
428   PetscMPIInt *send_rank, *recv_rank;
429   PetscInt    *sbuf_nz, *rbuf_nz, *sbuf_j, **rbuf_j;
430   PetscScalar *sbuf_a, **rbuf_a;
431   MPI_Comm     subcomm; /* when user does not provide a subcomm */
432   IS           isrow, iscol;
433   Mat         *matseq;
434 } Mat_Redundant;
435 
436 typedef struct { /* used by MatProduct() */
437   MatProductType type;
438   char          *alg;
439   Mat            A, B, C, Dwork;
440   PetscBool      symbolic_used_the_fact_A_is_symmetric; /* Symbolic phase took advantage of the fact that A is symmetric, and optimized e.g. AtB as AB. Then, .. */
441   PetscBool      symbolic_used_the_fact_B_is_symmetric; /* .. in the numeric phase, if a new A is not symmetric (but has the same sparsity as the old A therefore .. */
442   PetscBool      symbolic_used_the_fact_C_is_symmetric; /* MatMatMult(A,B,MAT_REUSE_MATRIX,..&C) is still legitimate), we need to redo symbolic! */
443   PetscObjectParameterDeclare(PetscReal, fill);
444   PetscBool api_user; /* used to distinguish command line options and to indicate the matrix values are ready to be consumed at symbolic phase if needed */
445   PetscBool setfromoptionscalled;
446 
447   /* Some products may display the information on the algorithm used */
448   PetscErrorCode (*view)(Mat, PetscViewer);
449 
450   /* many products have intermediate data structures, each specific to Mat types and product type */
451   PetscBool          clear;   /* whether or not to clear the data structures after MatProductNumeric has been called */
452   void              *data;    /* where to stash those structures */
453   PetscCtxDestroyFn *destroy; /* freeing data */
454 } Mat_Product;
455 
456 struct _p_Mat {
457   PETSCHEADER(struct _MatOps);
458   PetscLayout      rmap, cmap;
459   void            *data;                                    /* implementation-specific data */
460   MatFactorType    factortype;                              /* MAT_FACTOR_LU, ILU, CHOLESKY or ICC */
461   PetscBool        trivialsymbolic;                         /* indicates the symbolic factorization doesn't actually do a symbolic factorization, it is delayed to the numeric factorization */
462   PetscBool        canuseordering;                          /* factorization can use ordering provide to routine (most PETSc implementations) */
463   MatOrderingType  preferredordering[MAT_FACTOR_NUM_TYPES]; /* what is the preferred (or default) ordering for the matrix solver type */
464   PetscBool        assembled;                               /* is the matrix assembled? */
465   PetscBool        was_assembled;                           /* new values inserted into assembled mat */
466   PetscInt         num_ass;                                 /* number of times matrix has been assembled */
467   PetscObjectState nonzerostate;                            /* each time new nonzeros locations are introduced into the matrix this is updated */
468   PetscObjectState ass_nonzerostate;                        /* nonzero state at last assembly */
469   MatInfo          info;                                    /* matrix information */
470   InsertMode       insertmode;                              /* have values been inserted in matrix or added? */
471   MatStash         stash, bstash;                           /* used for assembling off-proc mat emements */
472   MatNullSpace     nullsp;                                  /* null space (operator is singular) */
473   MatNullSpace     transnullsp;                             /* null space of transpose of operator */
474   MatNullSpace     nearnullsp;                              /* near null space to be used by multigrid methods */
475   PetscInt         congruentlayouts;                        /* are the rows and columns layouts congruent? */
476   PetscBool        preallocated;
477   MatStencilInfo   stencil; /* information for structured grid */
478   PetscBool3       symmetric, hermitian, structurally_symmetric, spd;
479   PetscBool        symmetry_eternal, structural_symmetry_eternal, spd_eternal;
480   PetscBool        nooffprocentries, nooffproczerorows;
481   PetscBool        assembly_subset; /* set by MAT_SUBSET_OFF_PROC_ENTRIES */
482   PetscBool        submat_singleis; /* for efficient PCSetUp_ASM() */
483   PetscBool        structure_only;
484   PetscBool        sortedfull;      /* full, sorted rows are inserted */
485   PetscBool        force_diagonals; /* set by MAT_FORCE_DIAGONAL_ENTRIES */
486 #if defined(PETSC_HAVE_DEVICE)
487   PetscOffloadMask offloadmask; /* a mask which indicates where the valid matrix data is (GPU, CPU or both) */
488   PetscBool        boundtocpu;
489   PetscBool        bindingpropagates;
490 #endif
491   char                *defaultrandtype;
492   void                *spptr; /* pointer for special library like SuperLU */
493   char                *solvertype;
494   PetscBool            checksymmetryonassembly, checknullspaceonassembly;
495   PetscReal            checksymmetrytol;
496   Mat                  schur;                            /* Schur complement matrix */
497   MatFactorSchurStatus schur_status;                     /* status of the Schur complement matrix */
498   Mat_Redundant       *redundant;                        /* used by MatCreateRedundantMatrix() */
499   PetscBool            erroriffailure;                   /* Generate an error if detected (for example a zero pivot) instead of returning */
500   MatFactorError       factorerrortype;                  /* type of error in factorization */
501   PetscReal            factorerror_zeropivot_value;      /* If numerical zero pivot was detected this is the computed value */
502   PetscInt             factorerror_zeropivot_row;        /* Row where zero pivot was detected */
503   PetscInt             nblocks, *bsizes;                 /* support for MatSetVariableBlockSizes() */
504   PetscInt             p_cstart, p_rank, p_cend, n_rank; /* Information from parallel MatComputeVariableBlockEnvelope() */
505   PetscBool            p_parallel;
506   char                *defaultvectype;
507   Mat_Product         *product;
508   PetscBool            form_explicit_transpose; /* hint to generate an explicit mat tranpsose for operations like MatMultTranspose() */
509   PetscBool            transupdated;            /* whether or not the explicitly generated transpose is up-to-date */
510   char                *factorprefix;            /* the prefix to use with factored matrix that is created */
511   PetscBool            hash_active;             /* indicates MatSetValues() is being handled by hashing */
512 };
513 
514 PETSC_INTERN PetscErrorCode MatAXPY_Basic(Mat, PetscScalar, Mat, MatStructure);
515 PETSC_INTERN PetscErrorCode MatAXPY_BasicWithPreallocation(Mat, Mat, PetscScalar, Mat, MatStructure);
516 PETSC_INTERN PetscErrorCode MatAXPY_Basic_Preallocate(Mat, Mat, Mat *);
517 PETSC_INTERN PetscErrorCode MatAXPY_Dense_Nest(Mat, PetscScalar, Mat);
518 
519 PETSC_INTERN PetscErrorCode MatSetUp_Default(Mat);
520 
521 /*
522     Utility for MatZeroRows
523 */
524 PETSC_INTERN PetscErrorCode MatZeroRowsMapLocal_Private(Mat, PetscInt, const PetscInt *, PetscInt *, PetscInt **);
525 
526 /*
527     Utility for MatView/MatLoad
528 */
529 PETSC_INTERN PetscErrorCode MatView_Binary_BlockSizes(Mat, PetscViewer);
530 PETSC_INTERN PetscErrorCode MatLoad_Binary_BlockSizes(Mat, PetscViewer);
531 
532 /*
533     Object for partitioning graphs
534 */
535 
536 typedef struct _MatPartitioningOps *MatPartitioningOps;
537 struct _MatPartitioningOps {
538   PetscErrorCode (*apply)(MatPartitioning, IS *);
539   PetscErrorCode (*applynd)(MatPartitioning, IS *);
540   PetscErrorCode (*setfromoptions)(MatPartitioning, PetscOptionItems);
541   PetscErrorCode (*destroy)(MatPartitioning);
542   PetscErrorCode (*view)(MatPartitioning, PetscViewer);
543   PetscErrorCode (*improve)(MatPartitioning, IS *);
544 };
545 
546 struct _p_MatPartitioning {
547   PETSCHEADER(struct _MatPartitioningOps);
548   Mat        adj;
549   PetscInt  *vertex_weights;
550   PetscReal *part_weights;
551   PetscInt   n;    /* number of partitions */
552   PetscInt   ncon; /* number of vertex weights per vertex */
553   void      *data;
554   PetscBool  use_edge_weights; /* A flag indicates whether or not to use edge weights */
555 };
556 
557 /* needed for parallel nested dissection by ParMetis and PTSCOTCH */
558 PETSC_INTERN PetscErrorCode MatPartitioningSizesToSep_Private(PetscInt, PetscInt[], PetscInt[], PetscInt[]);
559 
560 /*
561     Object for coarsen graphs
562 */
563 typedef struct _MatCoarsenOps *MatCoarsenOps;
564 struct _MatCoarsenOps {
565   PetscErrorCode (*apply)(MatCoarsen);
566   PetscErrorCode (*setfromoptions)(MatCoarsen, PetscOptionItems);
567   PetscErrorCode (*destroy)(MatCoarsen);
568   PetscErrorCode (*view)(MatCoarsen, PetscViewer);
569 };
570 
571 #define MAT_COARSEN_STRENGTH_INDEX_SIZE 3
572 struct _p_MatCoarsen {
573   PETSCHEADER(struct _MatCoarsenOps);
574   Mat   graph;
575   void *subctx;
576   /* */
577   PetscBool         strict_aggs;
578   IS                perm;
579   PetscCoarsenData *agg_lists;
580   PetscInt          max_it;    /* number of iterations in HEM */
581   PetscReal         threshold; /* HEM can filter interim graphs */
582   PetscInt          strength_index_size;
583   PetscInt          strength_index[MAT_COARSEN_STRENGTH_INDEX_SIZE];
584 };
585 
586 PETSC_EXTERN PetscErrorCode MatCoarsenMISKSetDistance(MatCoarsen, PetscInt);
587 PETSC_EXTERN PetscErrorCode MatCoarsenMISKGetDistance(MatCoarsen, PetscInt *);
588 
589 /*
590     Used in aijdevice.h
591 */
592 typedef struct {
593   PetscInt    *i;
594   PetscInt    *j;
595   PetscScalar *a;
596   PetscInt     n;
597   PetscInt     ignorezeroentries;
598 } PetscCSRDataStructure;
599 
600 /*
601     MatFDColoring is used to compute Jacobian matrices efficiently
602   via coloring. The data structure is explained below in an example.
603 
604    Color =   0    1     0    2   |   2      3       0
605    ---------------------------------------------------
606             00   01              |          05
607             10   11              |   14     15               Processor  0
608                        22    23  |          25
609                        32    33  |
610    ===================================================
611                                  |   44     45     46
612             50                   |          55               Processor 1
613                                  |   64            66
614    ---------------------------------------------------
615 
616     ncolors = 4;
617 
618     ncolumns      = {2,1,1,0}
619     columns       = {{0,2},{1},{3},{}}
620     nrows         = {4,2,3,3}
621     rows          = {{0,1,2,3},{0,1},{1,2,3},{0,1,2}}
622     vwscale       = {dx(0),dx(1),dx(2),dx(3)}               MPI Vec
623     vscale        = {dx(0),dx(1),dx(2),dx(3),dx(4),dx(5)}   Seq Vec
624 
625     ncolumns      = {1,0,1,1}
626     columns       = {{6},{},{4},{5}}
627     nrows         = {3,0,2,2}
628     rows          = {{0,1,2},{},{1,2},{1,2}}
629     vwscale       = {dx(4),dx(5),dx(6)}              MPI Vec
630     vscale        = {dx(0),dx(4),dx(5),dx(6)}        Seq Vec
631 
632     See the routine MatFDColoringApply() for how this data is used
633     to compute the Jacobian.
634 
635 */
636 typedef struct {
637   PetscInt     row;
638   PetscInt     col;
639   PetscScalar *valaddr; /* address of value */
640 } MatEntry;
641 
642 typedef struct {
643   PetscInt     row;
644   PetscScalar *valaddr; /* address of value */
645 } MatEntry2;
646 
647 struct _p_MatFDColoring {
648   PETSCHEADER(int);
649   PetscInt                M, N, m;          /* total rows, columns; local rows */
650   PetscInt                rstart;           /* first row owned by local processor */
651   PetscInt                ncolors;          /* number of colors */
652   PetscInt               *ncolumns;         /* number of local columns for a color */
653   PetscInt              **columns;          /* lists the local columns of each color (using global column numbering) */
654   IS                     *isa;              /* these are the IS that contain the column values given in columns */
655   PetscInt               *nrows;            /* number of local rows for each color */
656   MatEntry               *matentry;         /* holds (row, column, address of value) for Jacobian matrix entry */
657   MatEntry2              *matentry2;        /* holds (row, address of value) for Jacobian matrix entry */
658   PetscScalar            *dy;               /* store a block of F(x+dx)-F(x) when J is in BAIJ format */
659   PetscReal               error_rel;        /* square root of relative error in computing function */
660   PetscReal               umin;             /* minimum allowable u'dx value */
661   Vec                     w1, w2, w3;       /* work vectors used in computing Jacobian */
662   PetscBool               fset;             /* indicates that the initial function value F(X) is set */
663   MatFDColoringFn        *f;                /* function that defines Jacobian */
664   void                   *fctx;             /* optional user-defined context for use by the function f */
665   Vec                     vscale;           /* holds FD scaling, i.e. 1/dx for each perturbed column */
666   PetscInt                currentcolor;     /* color for which function evaluation is being done now */
667   const char             *htype;            /* "wp" or "ds" */
668   ISColoringType          ctype;            /* IS_COLORING_GLOBAL or IS_COLORING_LOCAL */
669   PetscInt                brows, bcols;     /* number of block rows or columns for speedup inserting the dense matrix into sparse Jacobian */
670   PetscBool               setupcalled;      /* true if setup has been called */
671   PetscBool               viewed;           /* true if the -mat_fd_coloring_view has been triggered already */
672   PetscFortranCallbackFn *ftn_func_pointer; /* serve the same purpose as *fortran_func_pointers in PETSc objects */
673   void                   *ftn_func_cntx;
674   PetscObjectId           matid; /* matrix this object was created with, must always be the same */
675 };
676 
677 typedef struct _MatColoringOps *MatColoringOps;
678 struct _MatColoringOps {
679   PetscErrorCode (*destroy)(MatColoring);
680   PetscErrorCode (*setfromoptions)(MatColoring, PetscOptionItems);
681   PetscErrorCode (*view)(MatColoring, PetscViewer);
682   PetscErrorCode (*apply)(MatColoring, ISColoring *);
683   PetscErrorCode (*weights)(MatColoring, PetscReal **, PetscInt **);
684 };
685 
686 struct _p_MatColoring {
687   PETSCHEADER(struct _MatColoringOps);
688   Mat                   mat;
689   PetscInt              dist;         /* distance of the coloring */
690   PetscInt              maxcolors;    /* the maximum number of colors returned, maxcolors=1 for MIS */
691   void                 *data;         /* inner context */
692   PetscBool             valid;        /* check to see if what is produced is a valid coloring */
693   MatColoringWeightType weight_type;  /* type of weight computation to be performed */
694   PetscReal            *user_weights; /* custom weights and permutation */
695   PetscInt             *user_lperm;
696   PetscBool             valid_iscoloring; /* check to see if matcoloring is produced a valid iscoloring */
697 };
698 
699 struct _p_MatTransposeColoring {
700   PETSCHEADER(int);
701   PetscInt       M, N, m;      /* total rows, columns; local rows */
702   PetscInt       rstart;       /* first row owned by local processor */
703   PetscInt       ncolors;      /* number of colors */
704   PetscInt      *ncolumns;     /* number of local columns for a color */
705   PetscInt      *nrows;        /* number of local rows for each color */
706   PetscInt       currentcolor; /* color for which function evaluation is being done now */
707   ISColoringType ctype;        /* IS_COLORING_GLOBAL or IS_COLORING_LOCAL */
708 
709   PetscInt *colorforrow, *colorforcol; /* pointer to rows and columns */
710   PetscInt *rows;                      /* lists the local rows for each color (using the local row numbering) */
711   PetscInt *den2sp;                    /* maps (row,color) in the dense matrix to index of sparse matrix array a->a */
712   PetscInt *columns;                   /* lists the local columns of each color (using global column numbering) */
713   PetscInt  brows;                     /* number of rows for efficient implementation of MatTransColoringApplyDenToSp() */
714   PetscInt *lstart;                    /* array used for loop over row blocks of Csparse */
715 };
716 
717 /*
718    Null space context for preconditioner/operators
719 */
720 struct _p_MatNullSpace {
721   PETSCHEADER(int);
722   PetscBool             has_cnst;
723   PetscInt              n;
724   Vec                  *vecs;
725   PetscScalar          *alpha;  /* for projections */
726   MatNullSpaceRemoveFn *remove; /* for user provided removal function */
727   void                 *rmctx;  /* context for remove() function */
728 };
729 
730 /*
731    Internal data structure for MATMPIDENSE
732 */
733 typedef struct {
734   Mat A; /* local submatrix */
735 
736   /* The following variables are used for matrix assembly */
737   PetscBool    donotstash;        /* Flag indicating if values should be stashed */
738   MPI_Request *send_waits;        /* array of send requests */
739   MPI_Request *recv_waits;        /* array of receive requests */
740   PetscInt     nsends, nrecvs;    /* numbers of sends and receives */
741   PetscScalar *svalues, *rvalues; /* sending and receiving data */
742   PetscInt     rmax;              /* maximum message length */
743 
744   /* The following variables are used for matrix-vector products */
745   Vec       lvec;        /* local vector */
746   PetscSF   Mvctx;       /* for mat-mult communications */
747   PetscBool roworiented; /* if true, row-oriented input (default) */
748 
749   /* Support for MatDenseGetColumnVec and MatDenseGetSubMatrix */
750   Mat                cmat;     /* matrix representation of a given subset of columns */
751   Vec                cvec;     /* vector representation of a given column */
752   const PetscScalar *ptrinuse; /* holds array to be restored (just a placeholder) */
753   PetscInt           vecinuse; /* if cvec is in use (col = vecinuse-1) */
754   PetscInt           matinuse; /* if cmat is in use (cbegin = matinuse-1) */
755   /* if this is from MatDenseGetSubMatrix, which columns and rows does it correspond to? */
756   PetscInt sub_rbegin;
757   PetscInt sub_rend;
758   PetscInt sub_cbegin;
759   PetscInt sub_cend;
760 } Mat_MPIDense;
761 
762 /*
763    Checking zero pivot for LU, ILU preconditioners.
764 */
765 typedef struct {
766   PetscInt    nshift, nshift_max;
767   PetscReal   shift_amount, shift_lo, shift_hi, shift_top, shift_fraction;
768   PetscBool   newshift;
769   PetscReal   rs; /* active row sum of abs(off-diagonals) */
770   PetscScalar pv; /* pivot of the active row */
771 } FactorShiftCtx;
772 
773 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode MatTransposeCheckNonzeroState_Private(Mat, Mat);
774 
775 /*
776  Used by MatTranspose() and potentially other functions to track the matrix used in the generation of another matrix
777 */
778 typedef struct {
779   PetscObjectId    id;
780   PetscObjectState state;
781   PetscObjectState nonzerostate;
782 } MatParentState;
783 
784 PETSC_EXTERN PetscErrorCode MatFactorDumpMatrix(Mat);
785 PETSC_INTERN PetscErrorCode MatSetBlockSizes_Default(Mat, PetscInt, PetscInt);
786 
787 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode MatShift_Basic(Mat, PetscScalar);
788 
MatPivotCheck_nz(PETSC_UNUSED Mat mat,const MatFactorInfo * info,FactorShiftCtx * sctx,PETSC_UNUSED PetscInt row)789 static inline PetscErrorCode MatPivotCheck_nz(PETSC_UNUSED Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PETSC_UNUSED PetscInt row)
790 {
791   PetscReal _rs   = sctx->rs;
792   PetscReal _zero = info->zeropivot * _rs;
793 
794   PetscFunctionBegin;
795   if (PetscAbsScalar(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
796     /* force |diag| > zeropivot*rs */
797     if (!sctx->nshift) sctx->shift_amount = info->shiftamount;
798     else sctx->shift_amount *= 2.0;
799     sctx->newshift = PETSC_TRUE;
800     (sctx->nshift)++;
801   } else {
802     sctx->newshift = PETSC_FALSE;
803   }
804   PetscFunctionReturn(PETSC_SUCCESS);
805 }
806 
MatPivotCheck_pd(PETSC_UNUSED Mat mat,const MatFactorInfo * info,FactorShiftCtx * sctx,PETSC_UNUSED PetscInt row)807 static inline PetscErrorCode MatPivotCheck_pd(PETSC_UNUSED Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PETSC_UNUSED PetscInt row)
808 {
809   PetscReal _rs   = sctx->rs;
810   PetscReal _zero = info->zeropivot * _rs;
811 
812   PetscFunctionBegin;
813   if (PetscRealPart(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
814     /* force matfactor to be diagonally dominant */
815     if (sctx->nshift == sctx->nshift_max) {
816       sctx->shift_fraction = sctx->shift_hi;
817     } else {
818       sctx->shift_lo       = sctx->shift_fraction;
819       sctx->shift_fraction = (sctx->shift_hi + sctx->shift_lo) / (PetscReal)2.;
820     }
821     sctx->shift_amount = sctx->shift_fraction * sctx->shift_top;
822     sctx->nshift++;
823     sctx->newshift = PETSC_TRUE;
824   } else {
825     sctx->newshift = PETSC_FALSE;
826   }
827   PetscFunctionReturn(PETSC_SUCCESS);
828 }
829 
MatPivotCheck_inblocks(PETSC_UNUSED Mat mat,const MatFactorInfo * info,FactorShiftCtx * sctx,PETSC_UNUSED PetscInt row)830 static inline PetscErrorCode MatPivotCheck_inblocks(PETSC_UNUSED Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PETSC_UNUSED PetscInt row)
831 {
832   PetscReal _zero = info->zeropivot;
833 
834   PetscFunctionBegin;
835   if (PetscAbsScalar(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
836     sctx->pv += info->shiftamount;
837     sctx->shift_amount = 0.0;
838     sctx->nshift++;
839   }
840   sctx->newshift = PETSC_FALSE;
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
MatPivotCheck_none(Mat fact,Mat mat,const MatFactorInfo * info,FactorShiftCtx * sctx,PetscInt row)844 static inline PetscErrorCode MatPivotCheck_none(Mat fact, Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PetscInt row)
845 {
846   PetscReal _zero = info->zeropivot;
847 
848   PetscFunctionBegin;
849   sctx->newshift = PETSC_FALSE;
850   if (PetscAbsScalar(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
851     PetscCheck(!mat->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot row %" PetscInt_FMT " value %g tolerance %g", row, (double)PetscAbsScalar(sctx->pv), (double)_zero);
852     PetscCall(PetscInfo(mat, "Detected zero pivot in factorization in row %" PetscInt_FMT " value %g tolerance %g\n", row, (double)PetscAbsScalar(sctx->pv), (double)_zero));
853     fact->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
854     fact->factorerror_zeropivot_value = PetscAbsScalar(sctx->pv);
855     fact->factorerror_zeropivot_row   = row;
856   }
857   PetscFunctionReturn(PETSC_SUCCESS);
858 }
859 
MatPivotCheck(Mat fact,Mat mat,const MatFactorInfo * info,FactorShiftCtx * sctx,PetscInt row)860 static inline PetscErrorCode MatPivotCheck(Mat fact, Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PetscInt row)
861 {
862   PetscFunctionBegin;
863   if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) PetscCall(MatPivotCheck_nz(mat, info, sctx, row));
864   else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) PetscCall(MatPivotCheck_pd(mat, info, sctx, row));
865   else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) PetscCall(MatPivotCheck_inblocks(mat, info, sctx, row));
866   else PetscCall(MatPivotCheck_none(fact, mat, info, sctx, row));
867   PetscFunctionReturn(PETSC_SUCCESS);
868 }
869 
870 #include <petscbt.h>
871 /*
872   Create and initialize a linked list
873   Input Parameters:
874     idx_start - starting index of the list
875     lnk_max   - max value of lnk indicating the end of the list
876     nlnk      - max length of the list
877   Output Parameters:
878     lnk       - list initialized
879     bt        - PetscBT (bitarray) with all bits set to false
880     lnk_empty - flg indicating the list is empty
881 */
882 #define PetscLLCreate(idx_start, lnk_max, nlnk, lnk, bt) ((PetscErrorCode)(PetscMalloc1(nlnk, &lnk) || PetscBTCreate(nlnk, &(bt)) || (lnk[idx_start] = lnk_max, PETSC_SUCCESS)))
883 
884 #define PetscLLCreate_new(idx_start, lnk_max, nlnk, lnk, bt, lnk_empty) ((PetscErrorCode)(PetscMalloc1(nlnk, &lnk) || PetscBTCreate(nlnk, &(bt)) || (lnk_empty = PETSC_TRUE, 0) || (lnk[idx_start] = lnk_max, PETSC_SUCCESS)))
885 
PetscLLInsertLocation_Private(PetscBool assume_sorted,PetscInt k,PetscInt idx_start,PetscInt entry,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnkdata,PetscInt * PETSC_RESTRICT lnk)886 static inline PetscErrorCode PetscLLInsertLocation_Private(PetscBool assume_sorted, PetscInt k, PetscInt idx_start, PetscInt entry, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnkdata, PetscInt *PETSC_RESTRICT lnk)
887 {
888   PetscInt location;
889 
890   PetscFunctionBegin;
891   /* start from the beginning if entry < previous entry */
892   if (!assume_sorted && k && entry < *lnkdata) *lnkdata = idx_start;
893   /* search for insertion location */
894   do {
895     location = *lnkdata;
896     *lnkdata = lnk[location];
897   } while (entry > *lnkdata);
898   /* insertion location is found, add entry into lnk */
899   lnk[location] = entry;
900   lnk[entry]    = *lnkdata;
901   ++(*nlnk);
902   *lnkdata = entry; /* next search starts from here if next_entry > entry */
903   PetscFunctionReturn(PETSC_SUCCESS);
904 }
905 
PetscLLAdd_Private(PetscInt nidx,const PetscInt * PETSC_RESTRICT indices,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscBT bt,PetscBool assume_sorted)906 static inline PetscErrorCode PetscLLAdd_Private(PetscInt nidx, const PetscInt *PETSC_RESTRICT indices, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt, PetscBool assume_sorted)
907 {
908   PetscFunctionBegin;
909   *nlnk = 0;
910   for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
911     const PetscInt entry = indices[k];
912 
913     if (!PetscBTLookupSet(bt, entry)) PetscCall(PetscLLInsertLocation_Private(assume_sorted, k, idx_start, entry, nlnk, &lnkdata, lnk));
914   }
915   PetscFunctionReturn(PETSC_SUCCESS);
916 }
917 
918 /*
919   Add an index set into a sorted linked list
920   Input Parameters:
921     nidx      - number of input indices
922     indices   - integer array
923     idx_start - starting index of the list
924     lnk       - linked list(an integer array) that is created
925     bt        - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
926   output Parameters:
927     nlnk      - number of newly added indices
928     lnk       - the sorted(increasing order) linked list containing new and non-redundate entries from indices
929     bt        - updated PetscBT (bitarray)
930 */
PetscLLAdd(PetscInt nidx,const PetscInt * PETSC_RESTRICT indices,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscBT bt)931 static inline PetscErrorCode PetscLLAdd(PetscInt nidx, const PetscInt *PETSC_RESTRICT indices, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt)
932 {
933   PetscFunctionBegin;
934   PetscCall(PetscLLAdd_Private(nidx, indices, idx_start, nlnk, lnk, bt, PETSC_FALSE));
935   PetscFunctionReturn(PETSC_SUCCESS);
936 }
937 
938 /*
939   Add a SORTED ascending index set into a sorted linked list - same as PetscLLAdd() bus skip 'if (_k && _entry < _lnkdata) _lnkdata  = idx_start;'
940   Input Parameters:
941     nidx      - number of input indices
942     indices   - sorted integer array
943     idx_start - starting index of the list
944     lnk       - linked list(an integer array) that is created
945     bt        - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
946   output Parameters:
947     nlnk      - number of newly added indices
948     lnk       - the sorted(increasing order) linked list containing new and non-redundate entries from indices
949     bt        - updated PetscBT (bitarray)
950 */
PetscLLAddSorted(PetscInt nidx,const PetscInt * PETSC_RESTRICT indices,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscBT bt)951 static inline PetscErrorCode PetscLLAddSorted(PetscInt nidx, const PetscInt *PETSC_RESTRICT indices, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt)
952 {
953   PetscFunctionBegin;
954   PetscCall(PetscLLAdd_Private(nidx, indices, idx_start, nlnk, lnk, bt, PETSC_TRUE));
955   PetscFunctionReturn(PETSC_SUCCESS);
956 }
957 
958 /*
959   Add a permuted index set into a sorted linked list
960   Input Parameters:
961     nidx      - number of input indices
962     indices   - integer array
963     perm      - permutation of indices
964     idx_start - starting index of the list
965     lnk       - linked list(an integer array) that is created
966     bt        - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
967   output Parameters:
968     nlnk      - number of newly added indices
969     lnk       - the sorted(increasing order) linked list containing new and non-redundate entries from indices
970     bt        - updated PetscBT (bitarray)
971 */
PetscLLAddPerm(PetscInt nidx,const PetscInt * PETSC_RESTRICT indices,const PetscInt * PETSC_RESTRICT perm,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscBT bt)972 static inline PetscErrorCode PetscLLAddPerm(PetscInt nidx, const PetscInt *PETSC_RESTRICT indices, const PetscInt *PETSC_RESTRICT perm, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt)
973 {
974   PetscFunctionBegin;
975   *nlnk = 0;
976   for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
977     const PetscInt entry = perm[indices[k]];
978 
979     if (!PetscBTLookupSet(bt, entry)) PetscCall(PetscLLInsertLocation_Private(PETSC_FALSE, k, idx_start, entry, nlnk, &lnkdata, lnk));
980   }
981   PetscFunctionReturn(PETSC_SUCCESS);
982 }
983 
984 #if 0
985 /* this appears to be unused? */
986 static inline PetscErrorCode PetscLLAddSorted_new(PetscInt nidx, PetscInt *indices, PetscInt idx_start, PetscBool *lnk_empty, PetscInt *nlnk, PetscInt *lnk, PetscBT bt)
987 {
988   PetscInt lnkdata = idx_start;
989 
990   PetscFunctionBegin;
991   if (*lnk_empty) {
992     for (PetscInt k = 0; k < nidx; ++k) {
993       const PetscInt entry = indices[k], location = lnkdata;
994 
995       PetscCall(PetscBTSet(bt,entry)); /* mark the new entry */
996       lnkdata       = lnk[location];
997       /* insertion location is found, add entry into lnk */
998       lnk[location] = entry;
999       lnk[entry]    = lnkdata;
1000       lnkdata       = entry; /* next search starts from here */
1001     }
1002     /* lnk[indices[nidx-1]] = lnk[idx_start];
1003        lnk[idx_start]       = indices[0];
1004        PetscCall(PetscBTSet(bt,indices[0]));
1005        for (_k=1; _k<nidx; _k++) {
1006        PetscCall(PetscBTSet(bt,indices[_k]));
1007        lnk[indices[_k-1]] = indices[_k];
1008        }
1009     */
1010     *nlnk      = nidx;
1011     *lnk_empty = PETSC_FALSE;
1012   } else {
1013     *nlnk = 0;
1014     for (PetscInt k = 0; k < nidx; ++k) {
1015       const PetscInt entry = indices[k];
1016 
1017       if (!PetscBTLookupSet(bt,entry)) PetscCall(PetscLLInsertLocation_Private(PETSC_TRUE,k,idx_start,entry,nlnk,&lnkdata,lnk));
1018     }
1019   }
1020   PetscFunctionReturn(PETSC_SUCCESS);
1021 }
1022 #endif
1023 
1024 /*
1025   Add a SORTED index set into a sorted linked list used for LUFactorSymbolic()
1026   Same as PetscLLAddSorted() with an additional operation:
1027        count the number of input indices that are no larger than 'diag'
1028   Input Parameters:
1029     indices   - sorted integer array
1030     idx_start - starting index of the list, index of pivot row
1031     lnk       - linked list(an integer array) that is created
1032     bt        - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1033     diag      - index of the active row in LUFactorSymbolic
1034     nzbd      - number of input indices with indices <= idx_start
1035     im        - im[idx_start] is initialized as num of nonzero entries in row=idx_start
1036   output Parameters:
1037     nlnk      - number of newly added indices
1038     lnk       - the sorted(increasing order) linked list containing new and non-redundate entries from indices
1039     bt        - updated PetscBT (bitarray)
1040     im        - im[idx_start]: unchanged if diag is not an entry
1041                              : num of entries with indices <= diag if diag is an entry
1042 */
PetscLLAddSortedLU(const PetscInt * PETSC_RESTRICT indices,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscBT bt,PetscInt diag,PetscInt nzbd,PetscInt * PETSC_RESTRICT im)1043 static inline PetscErrorCode PetscLLAddSortedLU(const PetscInt *PETSC_RESTRICT indices, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt, PetscInt diag, PetscInt nzbd, PetscInt *PETSC_RESTRICT im)
1044 {
1045   const PetscInt nidx = im[idx_start] - nzbd; /* num of entries with idx_start < index <= diag */
1046 
1047   PetscFunctionBegin;
1048   *nlnk = 0;
1049   for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
1050     const PetscInt entry = indices[k];
1051 
1052     ++nzbd;
1053     if (entry == diag) im[idx_start] = nzbd;
1054     if (!PetscBTLookupSet(bt, entry)) PetscCall(PetscLLInsertLocation_Private(PETSC_TRUE, k, idx_start, entry, nlnk, &lnkdata, lnk));
1055   }
1056   PetscFunctionReturn(PETSC_SUCCESS);
1057 }
1058 
1059 /*
1060   Copy data on the list into an array, then initialize the list
1061   Input Parameters:
1062     idx_start - starting index of the list
1063     lnk_max   - max value of lnk indicating the end of the list
1064     nlnk      - number of data on the list to be copied
1065     lnk       - linked list
1066     bt        - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1067   output Parameters:
1068     indices   - array that contains the copied data
1069     lnk       - linked list that is cleaned and initialize
1070     bt        - PetscBT (bitarray) with all bits set to false
1071 */
PetscLLClean(PetscInt idx_start,PetscInt lnk_max,PetscInt nlnk,PetscInt * PETSC_RESTRICT lnk,PetscInt * PETSC_RESTRICT indices,PetscBT bt)1072 static inline PetscErrorCode PetscLLClean(PetscInt idx_start, PetscInt lnk_max, PetscInt nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT indices, PetscBT bt)
1073 {
1074   PetscFunctionBegin;
1075   for (PetscInt j = 0, idx = idx_start; j < nlnk; ++j) {
1076     idx        = lnk[idx];
1077     indices[j] = idx;
1078     PetscCall(PetscBTClear(bt, idx));
1079   }
1080   lnk[idx_start] = lnk_max;
1081   PetscFunctionReturn(PETSC_SUCCESS);
1082 }
1083 
1084 /*
1085   Free memories used by the list
1086 */
1087 #define PetscLLDestroy(lnk, bt) ((PetscErrorCode)(PetscFree(lnk) || PetscBTDestroy(&(bt))))
1088 
1089 /* Routines below are used for incomplete matrix factorization */
1090 /*
1091   Create and initialize a linked list and its levels
1092   Input Parameters:
1093     idx_start - starting index of the list
1094     lnk_max   - max value of lnk indicating the end of the list
1095     nlnk      - max length of the list
1096   Output Parameters:
1097     lnk       - list initialized
1098     lnk_lvl   - array of size nlnk for storing levels of lnk
1099     bt        - PetscBT (bitarray) with all bits set to false
1100 */
1101 #define PetscIncompleteLLCreate(idx_start, lnk_max, nlnk, lnk, lnk_lvl, bt) \
1102   ((PetscErrorCode)(PetscIntMultError(2, nlnk, NULL) || PetscMalloc1(2 * nlnk, &lnk) || PetscBTCreate(nlnk, &(bt)) || (lnk[idx_start] = lnk_max, lnk_lvl = lnk + nlnk, PETSC_SUCCESS)))
1103 
PetscIncompleteLLInsertLocation_Private(PetscBool assume_sorted,PetscInt k,PetscInt idx_start,PetscInt entry,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnkdata,PetscInt * PETSC_RESTRICT lnk,PetscInt * PETSC_RESTRICT lnklvl,PetscInt newval)1104 static inline PetscErrorCode PetscIncompleteLLInsertLocation_Private(PetscBool assume_sorted, PetscInt k, PetscInt idx_start, PetscInt entry, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnkdata, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscInt newval)
1105 {
1106   PetscFunctionBegin;
1107   PetscCall(PetscLLInsertLocation_Private(assume_sorted, k, idx_start, entry, nlnk, lnkdata, lnk));
1108   lnklvl[entry] = newval;
1109   PetscFunctionReturn(PETSC_SUCCESS);
1110 }
1111 
1112 /*
1113   Initialize a sorted linked list used for ILU and ICC
1114   Input Parameters:
1115     nidx      - number of input idx
1116     idx       - integer array used for storing column indices
1117     idx_start - starting index of the list
1118     perm      - indices of an IS
1119     lnk       - linked list(an integer array) that is created
1120     lnklvl    - levels of lnk
1121     bt        - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1122   output Parameters:
1123     nlnk     - number of newly added idx
1124     lnk      - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1125     lnklvl   - levels of lnk
1126     bt       - updated PetscBT (bitarray)
1127 */
PetscIncompleteLLInit(PetscInt nidx,const PetscInt * PETSC_RESTRICT idx,PetscInt idx_start,const PetscInt * PETSC_RESTRICT perm,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscInt * PETSC_RESTRICT lnklvl,PetscBT bt)1128 static inline PetscErrorCode PetscIncompleteLLInit(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscInt idx_start, const PetscInt *PETSC_RESTRICT perm, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt)
1129 {
1130   PetscFunctionBegin;
1131   *nlnk = 0;
1132   for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
1133     const PetscInt entry = perm[idx[k]];
1134 
1135     if (!PetscBTLookupSet(bt, entry)) PetscCall(PetscIncompleteLLInsertLocation_Private(PETSC_FALSE, k, idx_start, entry, nlnk, &lnkdata, lnk, lnklvl, 0));
1136   }
1137   PetscFunctionReturn(PETSC_SUCCESS);
1138 }
1139 
PetscIncompleteLLAdd_Private(PetscInt nidx,const PetscInt * PETSC_RESTRICT idx,PetscReal level,const PetscInt * PETSC_RESTRICT idxlvl,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscInt * PETSC_RESTRICT lnklvl,PetscBT bt,PetscInt prow_offset,PetscBool assume_sorted)1140 static inline PetscErrorCode PetscIncompleteLLAdd_Private(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscReal level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt, PetscInt prow_offset, PetscBool assume_sorted)
1141 {
1142   PetscFunctionBegin;
1143   *nlnk = 0;
1144   for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
1145     const PetscInt incrlev = idxlvl[k] + prow_offset + 1;
1146 
1147     if (incrlev <= level) {
1148       const PetscInt entry = idx[k];
1149 
1150       if (!PetscBTLookupSet(bt, entry)) PetscCall(PetscIncompleteLLInsertLocation_Private(assume_sorted, k, idx_start, entry, nlnk, &lnkdata, lnk, lnklvl, incrlev));
1151       else if (lnklvl[entry] > incrlev) lnklvl[entry] = incrlev; /* existing entry */
1152     }
1153   }
1154   PetscFunctionReturn(PETSC_SUCCESS);
1155 }
1156 
1157 /*
1158   Add a SORTED index set into a sorted linked list for ICC
1159   Input Parameters:
1160     nidx      - number of input indices
1161     idx       - sorted integer array used for storing column indices
1162     level     - level of fill, e.g., ICC(level)
1163     idxlvl    - level of idx
1164     idx_start - starting index of the list
1165     lnk       - linked list(an integer array) that is created
1166     lnklvl    - levels of lnk
1167     bt        - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1168     idxlvl_prow - idxlvl[prow], where prow is the row number of the idx
1169   output Parameters:
1170     nlnk   - number of newly added indices
1171     lnk    - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1172     lnklvl - levels of lnk
1173     bt     - updated PetscBT (bitarray)
1174   Note: the level of U(i,j) is set as lvl(i,j) = min{ lvl(i,j), lvl(prow,i)+lvl(prow,j)+1)
1175         where idx = non-zero columns of U(prow,prow+1:n-1), prow<i
1176 */
PetscICCLLAddSorted(PetscInt nidx,const PetscInt * PETSC_RESTRICT idx,PetscReal level,const PetscInt * PETSC_RESTRICT idxlvl,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscInt * PETSC_RESTRICT lnklvl,PetscBT bt,PetscInt idxlvl_prow)1177 static inline PetscErrorCode PetscICCLLAddSorted(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscReal level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt, PetscInt idxlvl_prow)
1178 {
1179   PetscFunctionBegin;
1180   PetscCall(PetscIncompleteLLAdd_Private(nidx, idx, level, idxlvl, idx_start, nlnk, lnk, lnklvl, bt, idxlvl_prow, PETSC_TRUE));
1181   PetscFunctionReturn(PETSC_SUCCESS);
1182 }
1183 
1184 /*
1185   Add a SORTED index set into a sorted linked list for ILU
1186   Input Parameters:
1187     nidx      - number of input indices
1188     idx       - sorted integer array used for storing column indices
1189     level     - level of fill, e.g., ICC(level)
1190     idxlvl    - level of idx
1191     idx_start - starting index of the list
1192     lnk       - linked list(an integer array) that is created
1193     lnklvl    - levels of lnk
1194     bt        - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1195     prow      - the row number of idx
1196   output Parameters:
1197     nlnk     - number of newly added idx
1198     lnk      - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1199     lnklvl   - levels of lnk
1200     bt       - updated PetscBT (bitarray)
1201 
1202   Note: the level of factor(i,j) is set as lvl(i,j) = min{ lvl(i,j), lvl(i,prow)+lvl(prow,j)+1)
1203         where idx = non-zero columns of U(prow,prow+1:n-1), prow<i
1204 */
PetscILULLAddSorted(PetscInt nidx,const PetscInt * PETSC_RESTRICT idx,PetscInt level,const PetscInt * PETSC_RESTRICT idxlvl,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscInt * PETSC_RESTRICT lnklvl,PetscBT bt,PetscInt prow)1205 static inline PetscErrorCode PetscILULLAddSorted(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscInt level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt, PetscInt prow)
1206 {
1207   PetscFunctionBegin;
1208   PetscCall(PetscIncompleteLLAdd_Private(nidx, idx, level, idxlvl, idx_start, nlnk, lnk, lnklvl, bt, lnklvl[prow], PETSC_TRUE));
1209   PetscFunctionReturn(PETSC_SUCCESS);
1210 }
1211 
1212 /*
1213   Add a index set into a sorted linked list
1214   Input Parameters:
1215     nidx      - number of input idx
1216     idx   - integer array used for storing column indices
1217     level     - level of fill, e.g., ICC(level)
1218     idxlvl - level of idx
1219     idx_start - starting index of the list
1220     lnk       - linked list(an integer array) that is created
1221     lnklvl   - levels of lnk
1222     bt        - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1223   output Parameters:
1224     nlnk      - number of newly added idx
1225     lnk       - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1226     lnklvl   - levels of lnk
1227     bt        - updated PetscBT (bitarray)
1228 */
PetscIncompleteLLAdd(PetscInt nidx,const PetscInt * PETSC_RESTRICT idx,PetscReal level,const PetscInt * PETSC_RESTRICT idxlvl,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscInt * PETSC_RESTRICT lnklvl,PetscBT bt)1229 static inline PetscErrorCode PetscIncompleteLLAdd(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscReal level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt)
1230 {
1231   PetscFunctionBegin;
1232   PetscCall(PetscIncompleteLLAdd_Private(nidx, idx, level, idxlvl, idx_start, nlnk, lnk, lnklvl, bt, 0, PETSC_FALSE));
1233   PetscFunctionReturn(PETSC_SUCCESS);
1234 }
1235 
1236 /*
1237   Add a SORTED index set into a sorted linked list
1238   Input Parameters:
1239     nidx      - number of input indices
1240     idx   - sorted integer array used for storing column indices
1241     level     - level of fill, e.g., ICC(level)
1242     idxlvl - level of idx
1243     idx_start - starting index of the list
1244     lnk       - linked list(an integer array) that is created
1245     lnklvl    - levels of lnk
1246     bt        - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1247   output Parameters:
1248     nlnk      - number of newly added idx
1249     lnk       - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1250     lnklvl    - levels of lnk
1251     bt        - updated PetscBT (bitarray)
1252 */
PetscIncompleteLLAddSorted(PetscInt nidx,const PetscInt * PETSC_RESTRICT idx,PetscReal level,const PetscInt * PETSC_RESTRICT idxlvl,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscInt * PETSC_RESTRICT lnklvl,PetscBT bt)1253 static inline PetscErrorCode PetscIncompleteLLAddSorted(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscReal level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt)
1254 {
1255   PetscFunctionBegin;
1256   PetscCall(PetscIncompleteLLAdd_Private(nidx, idx, level, idxlvl, idx_start, nlnk, lnk, lnklvl, bt, 0, PETSC_TRUE));
1257   PetscFunctionReturn(PETSC_SUCCESS);
1258 }
1259 
1260 /*
1261   Copy data on the list into an array, then initialize the list
1262   Input Parameters:
1263     idx_start - starting index of the list
1264     lnk_max   - max value of lnk indicating the end of the list
1265     nlnk      - number of data on the list to be copied
1266     lnk       - linked list
1267     lnklvl    - level of lnk
1268     bt        - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1269   output Parameters:
1270     indices - array that contains the copied data
1271     lnk     - linked list that is cleaned and initialize
1272     lnklvl  - level of lnk that is reinitialized
1273     bt      - PetscBT (bitarray) with all bits set to false
1274 */
PetscIncompleteLLClean(PetscInt idx_start,PetscInt lnk_max,PetscInt nlnk,PetscInt * PETSC_RESTRICT lnk,PetscInt * PETSC_RESTRICT lnklvl,PetscInt * PETSC_RESTRICT indices,PetscInt * PETSC_RESTRICT indiceslvl,PetscBT bt)1275 static inline PetscErrorCode PetscIncompleteLLClean(PetscInt idx_start, PetscInt lnk_max, PetscInt nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscInt *PETSC_RESTRICT indices, PetscInt *PETSC_RESTRICT indiceslvl, PetscBT bt)
1276 {
1277   PetscFunctionBegin;
1278   for (PetscInt j = 0, idx = idx_start; j < nlnk; ++j) {
1279     idx           = lnk[idx];
1280     indices[j]    = idx;
1281     indiceslvl[j] = lnklvl[idx];
1282     lnklvl[idx]   = -1;
1283     PetscCall(PetscBTClear(bt, idx));
1284   }
1285   lnk[idx_start] = lnk_max;
1286   PetscFunctionReturn(PETSC_SUCCESS);
1287 }
1288 
1289 /*
1290   Free memories used by the list
1291 */
1292 #define PetscIncompleteLLDestroy(lnk, bt) ((PetscErrorCode)(PetscFree(lnk) || PetscBTDestroy(&(bt))))
1293 
1294 #if !defined(PETSC_CLANG_STATIC_ANALYZER)
1295   #define MatCheckSameLocalSize(A, ar1, B, ar2) \
1296     do { \
1297       PetscCheckSameComm(A, ar1, B, ar2); \
1298       PetscCheck(((A)->rmap->n == (B)->rmap->n) && ((A)->cmap->n == (B)->cmap->n), PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Incompatible matrix local sizes: parameter # %d (%" PetscInt_FMT " x %" PetscInt_FMT ") != parameter # %d (%" PetscInt_FMT " x %" PetscInt_FMT ")", ar1, \
1299                  (A)->rmap->n, (A)->cmap->n, ar2, (B)->rmap->n, (B)->cmap->n); \
1300     } while (0)
1301   #define MatCheckSameSize(A, ar1, B, ar2) \
1302     do { \
1303       PetscCheck(((A)->rmap->N == (B)->rmap->N) && ((A)->cmap->N == (B)->cmap->N), PetscObjectComm((PetscObject)(A)), PETSC_ERR_ARG_INCOMP, "Incompatible matrix global sizes: parameter # %d (%" PetscInt_FMT " x %" PetscInt_FMT ") != parameter # %d (%" PetscInt_FMT " x %" PetscInt_FMT ")", ar1, \
1304                  (A)->rmap->N, (A)->cmap->N, ar2, (B)->rmap->N, (B)->cmap->N); \
1305       MatCheckSameLocalSize(A, ar1, B, ar2); \
1306     } while (0)
1307 #else
1308 template <typename Tm>
1309 extern void MatCheckSameLocalSize(Tm, int, Tm, int);
1310 template <typename Tm>
1311 extern void MatCheckSameSize(Tm, int, Tm, int);
1312 #endif
1313 
1314 #define VecCheckMatCompatible(M, x, ar1, b, ar2) \
1315   do { \
1316     PetscCheck((M)->cmap->N == (x)->map->N, PetscObjectComm((PetscObject)(M)), PETSC_ERR_ARG_SIZ, "Vector global length incompatible with matrix: parameter # %d global size %" PetscInt_FMT " != matrix column global size %" PetscInt_FMT, ar1, (x)->map->N, \
1317                (M)->cmap->N); \
1318     PetscCheck((M)->rmap->N == (b)->map->N, PetscObjectComm((PetscObject)(M)), PETSC_ERR_ARG_SIZ, "Vector global length incompatible with matrix: parameter # %d global size %" PetscInt_FMT " != matrix row global size %" PetscInt_FMT, ar2, (b)->map->N, \
1319                (M)->rmap->N); \
1320   } while (0)
1321 
1322 /*
1323   Create and initialize a condensed linked list -
1324     same as PetscLLCreate(), but uses a scalable array 'lnk' with size of max number of entries, not O(N).
1325     Barry suggested this approach (Dec. 6, 2011):
1326       I've thought of an alternative way of representing a linked list that is efficient but doesn't have the O(N) scaling issue
1327       (it may be faster than the O(N) even sequentially due to less crazy memory access).
1328 
1329       Instead of having some like  a  2  -> 4 -> 11 ->  22  list that uses slot 2  4 11 and 22 in a big array use a small array with two slots
1330       for each entry for example  [ 2 1 | 4 3 | 22 -1 | 11 2]   so the first number (of the pair) is the value while the second tells you where
1331       in the list the next entry is. Inserting a new link means just append another pair at the end. For example say we want to insert 13 into the
1332       list it would then become [2 1 | 4 3 | 22 -1 | 11 4 | 13 2 ] you just add a pair at the end and fix the point for the one that points to it.
1333       That is 11 use to point to the 2 slot, after the change 11 points to the 4th slot which has the value 13. Note that values are always next
1334       to each other so memory access is much better than using the big array.
1335 
1336   Example:
1337      nlnk_max=5, lnk_max=36:
1338      Initial list: [0, 0 | 36, 2 | 0, 0 | 0, 0 | 0, 0 | 0, 0 | 0, 0]
1339      here, head_node has index 2 with value lnk[2]=lnk_max=36,
1340            0-th entry is used to store the number of entries in the list,
1341      The initial lnk represents head -> tail(marked by 36) with number of entries = lnk[0]=0.
1342 
1343      Now adding a sorted set {2,4}, the list becomes
1344      [2, 0 | 36, 4 |2, 6 | 4, 2 | 0, 0 | 0, 0 | 0, 0 ]
1345      represents head -> 2 -> 4 -> tail with number of entries = lnk[0]=2.
1346 
1347      Then adding a sorted set {0,3,35}, the list
1348      [5, 0 | 36, 8 | 2, 10 | 4, 12 | 0, 4 | 3, 6 | 35, 2 ]
1349      represents head -> 0 -> 2 -> 3 -> 4 -> 35 -> tail with number of entries = lnk[0]=5.
1350 
1351   Input Parameters:
1352     nlnk_max  - max length of the list
1353     lnk_max   - max value of the entries
1354   Output Parameters:
1355     lnk       - list created and initialized
1356     bt        - PetscBT (bitarray) with all bits set to false. Note: bt has size lnk_max, not nln_max!
1357 */
PetscLLCondensedCreate(PetscInt nlnk_max,PetscInt lnk_max,PetscInt ** lnk,PetscBT * bt)1358 static inline PetscErrorCode PetscLLCondensedCreate(PetscInt nlnk_max, PetscInt lnk_max, PetscInt **lnk, PetscBT *bt)
1359 {
1360   PetscInt *llnk, lsize = 0;
1361 
1362   PetscFunctionBegin;
1363   PetscCall(PetscIntMultError(2, nlnk_max + 2, &lsize));
1364   PetscCall(PetscMalloc1(lsize, lnk));
1365   PetscCall(PetscBTCreate(lnk_max, bt));
1366   llnk    = *lnk;
1367   llnk[0] = 0;       /* number of entries on the list */
1368   llnk[2] = lnk_max; /* value in the head node */
1369   llnk[3] = 2;       /* next for the head node */
1370   PetscFunctionReturn(PETSC_SUCCESS);
1371 }
1372 
1373 /*
1374   Add a SORTED ascending index set into a sorted linked list. See PetscLLCondensedCreate() for detailed description.
1375   Input Parameters:
1376     nidx      - number of input indices
1377     indices   - sorted integer array
1378     lnk       - condensed linked list(an integer array) that is created
1379     bt        - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1380   output Parameters:
1381     lnk       - the sorted(increasing order) linked list containing previous and newly added non-redundate indices
1382     bt        - updated PetscBT (bitarray)
1383 */
PetscLLCondensedAddSorted(PetscInt nidx,const PetscInt indices[],PetscInt lnk[],PetscBT bt)1384 static inline PetscErrorCode PetscLLCondensedAddSorted(PetscInt nidx, const PetscInt indices[], PetscInt lnk[], PetscBT bt)
1385 {
1386   PetscInt location = 2;      /* head */
1387   PetscInt nlnk     = lnk[0]; /* num of entries on the input lnk */
1388 
1389   PetscFunctionBegin;
1390   for (PetscInt k = 0; k < nidx; k++) {
1391     const PetscInt entry = indices[k];
1392     if (!PetscBTLookupSet(bt, entry)) { /* new entry */
1393       PetscInt next, lnkdata;
1394 
1395       /* search for insertion location */
1396       do {
1397         next     = location + 1;  /* link from previous node to next node */
1398         location = lnk[next];     /* idx of next node */
1399         lnkdata  = lnk[location]; /* value of next node */
1400       } while (entry > lnkdata);
1401       /* insertion location is found, add entry into lnk */
1402       const PetscInt newnode = 2 * (nlnk + 2); /* index for this new node */
1403       lnk[next]              = newnode;        /* connect previous node to the new node */
1404       lnk[newnode]           = entry;          /* set value of the new node */
1405       lnk[newnode + 1]       = location;       /* connect new node to next node */
1406       location               = newnode;        /* next search starts from the new node */
1407       nlnk++;
1408     }
1409   }
1410   lnk[0] = nlnk; /* number of entries in the list */
1411   PetscFunctionReturn(PETSC_SUCCESS);
1412 }
1413 
PetscLLCondensedClean(PetscInt lnk_max,PETSC_UNUSED PetscInt nidx,PetscInt * indices,PetscInt lnk[],PetscBT bt)1414 static inline PetscErrorCode PetscLLCondensedClean(PetscInt lnk_max, PETSC_UNUSED PetscInt nidx, PetscInt *indices, PetscInt lnk[], PetscBT bt)
1415 {
1416   const PetscInt nlnk = lnk[0]; /* num of entries on the list */
1417   PetscInt       next = lnk[3]; /* head node */
1418 
1419   PetscFunctionBegin;
1420   for (PetscInt k = 0; k < nlnk; k++) {
1421     indices[k] = lnk[next];
1422     next       = lnk[next + 1];
1423     PetscCall(PetscBTClear(bt, indices[k]));
1424   }
1425   lnk[0] = 0;       /* num of entries on the list */
1426   lnk[2] = lnk_max; /* initialize head node */
1427   lnk[3] = 2;       /* head node */
1428   PetscFunctionReturn(PETSC_SUCCESS);
1429 }
1430 
PetscLLCondensedView(PetscInt * lnk)1431 static inline PetscErrorCode PetscLLCondensedView(PetscInt *lnk)
1432 {
1433   PetscFunctionBegin;
1434   PetscCall(PetscPrintf(PETSC_COMM_SELF, "LLCondensed of size %" PetscInt_FMT ", (val,  next)\n", lnk[0]));
1435   for (PetscInt k = 2; k < lnk[0] + 2; ++k) PetscCall(PetscPrintf(PETSC_COMM_SELF, " %" PetscInt_FMT ": (%" PetscInt_FMT ", %" PetscInt_FMT ")\n", 2 * k, lnk[2 * k], lnk[2 * k + 1]));
1436   PetscFunctionReturn(PETSC_SUCCESS);
1437 }
1438 
1439 /*
1440   Free memories used by the list
1441 */
PetscLLCondensedDestroy(PetscInt * lnk,PetscBT bt)1442 static inline PetscErrorCode PetscLLCondensedDestroy(PetscInt *lnk, PetscBT bt)
1443 {
1444   PetscFunctionBegin;
1445   PetscCall(PetscFree(lnk));
1446   PetscCall(PetscBTDestroy(&bt));
1447   PetscFunctionReturn(PETSC_SUCCESS);
1448 }
1449 
1450 /*
1451  Same as PetscLLCondensedCreate(), but does not use non-scalable O(lnk_max) bitarray
1452   Input Parameters:
1453     nlnk_max  - max length of the list
1454   Output Parameters:
1455     lnk       - list created and initialized
1456 */
PetscLLCondensedCreate_Scalable(PetscInt nlnk_max,PetscInt ** lnk)1457 static inline PetscErrorCode PetscLLCondensedCreate_Scalable(PetscInt nlnk_max, PetscInt **lnk)
1458 {
1459   PetscInt *llnk, lsize = 0;
1460 
1461   PetscFunctionBegin;
1462   PetscCall(PetscIntMultError(2, nlnk_max + 2, &lsize));
1463   PetscCall(PetscMalloc1(lsize, lnk));
1464   llnk    = *lnk;
1465   llnk[0] = 0;             /* number of entries on the list */
1466   llnk[2] = PETSC_INT_MAX; /* value in the head node */
1467   llnk[3] = 2;             /* next for the head node */
1468   PetscFunctionReturn(PETSC_SUCCESS);
1469 }
1470 
PetscLLCondensedExpand_Scalable(PetscInt nlnk_max,PetscInt ** lnk)1471 static inline PetscErrorCode PetscLLCondensedExpand_Scalable(PetscInt nlnk_max, PetscInt **lnk)
1472 {
1473   PetscInt lsize = 0;
1474 
1475   PetscFunctionBegin;
1476   PetscCall(PetscIntMultError(2, nlnk_max + 2, &lsize));
1477   PetscCall(PetscRealloc(lsize * sizeof(PetscInt), lnk));
1478   PetscFunctionReturn(PETSC_SUCCESS);
1479 }
1480 
PetscLLCondensedAddSorted_Scalable(PetscInt nidx,const PetscInt indices[],PetscInt lnk[])1481 static inline PetscErrorCode PetscLLCondensedAddSorted_Scalable(PetscInt nidx, const PetscInt indices[], PetscInt lnk[])
1482 {
1483   PetscInt location = 2;      /* head */
1484   PetscInt nlnk     = lnk[0]; /* num of entries on the input lnk */
1485 
1486   for (PetscInt k = 0; k < nidx; k++) {
1487     const PetscInt entry = indices[k];
1488     PetscInt       next, lnkdata;
1489 
1490     /* search for insertion location */
1491     do {
1492       next     = location + 1;  /* link from previous node to next node */
1493       location = lnk[next];     /* idx of next node */
1494       lnkdata  = lnk[location]; /* value of next node */
1495     } while (entry > lnkdata);
1496     if (entry < lnkdata) {
1497       /* insertion location is found, add entry into lnk */
1498       const PetscInt newnode = 2 * (nlnk + 2); /* index for this new node */
1499       lnk[next]              = newnode;        /* connect previous node to the new node */
1500       lnk[newnode]           = entry;          /* set value of the new node */
1501       lnk[newnode + 1]       = location;       /* connect new node to next node */
1502       location               = newnode;        /* next search starts from the new node */
1503       nlnk++;
1504     }
1505   }
1506   lnk[0] = nlnk; /* number of entries in the list */
1507   return PETSC_SUCCESS;
1508 }
1509 
PetscLLCondensedClean_Scalable(PETSC_UNUSED PetscInt nidx,PetscInt * indices,PetscInt * lnk)1510 static inline PetscErrorCode PetscLLCondensedClean_Scalable(PETSC_UNUSED PetscInt nidx, PetscInt *indices, PetscInt *lnk)
1511 {
1512   const PetscInt nlnk = lnk[0];
1513   PetscInt       next = lnk[3]; /* head node */
1514 
1515   for (PetscInt k = 0; k < nlnk; k++) {
1516     indices[k] = lnk[next];
1517     next       = lnk[next + 1];
1518   }
1519   lnk[0] = 0; /* num of entries on the list */
1520   lnk[3] = 2; /* head node */
1521   return PETSC_SUCCESS;
1522 }
1523 
PetscLLCondensedDestroy_Scalable(PetscInt * lnk)1524 static inline PetscErrorCode PetscLLCondensedDestroy_Scalable(PetscInt *lnk)
1525 {
1526   return PetscFree(lnk);
1527 }
1528 
1529 /*
1530       lnk[0]   number of links
1531       lnk[1]   number of entries
1532       lnk[3n]  value
1533       lnk[3n+1] len
1534       lnk[3n+2] link to next value
1535 
1536       The next three are always the first link
1537 
1538       lnk[3]    PETSC_INT_MIN+1
1539       lnk[4]    1
1540       lnk[5]    link to first real entry
1541 
1542       The next three are always the last link
1543 
1544       lnk[6]    PETSC_INT_MAX - 1
1545       lnk[7]    1
1546       lnk[8]    next valid link (this is the same as lnk[0] but without the decreases)
1547 */
1548 
PetscLLCondensedCreate_fast(PetscInt nlnk_max,PetscInt ** lnk)1549 static inline PetscErrorCode PetscLLCondensedCreate_fast(PetscInt nlnk_max, PetscInt **lnk)
1550 {
1551   PetscInt *llnk;
1552   PetscInt  lsize = 0;
1553 
1554   PetscFunctionBegin;
1555   PetscCall(PetscIntMultError(3, nlnk_max + 3, &lsize));
1556   PetscCall(PetscMalloc1(lsize, lnk));
1557   llnk    = *lnk;
1558   llnk[0] = 0;                 /* nlnk: number of entries on the list */
1559   llnk[1] = 0;                 /* number of integer entries represented in list */
1560   llnk[3] = PETSC_INT_MIN + 1; /* value in the first node */
1561   llnk[4] = 1;                 /* count for the first node */
1562   llnk[5] = 6;                 /* next for the first node */
1563   llnk[6] = PETSC_INT_MAX - 1; /* value in the last node */
1564   llnk[7] = 1;                 /* count for the last node */
1565   llnk[8] = 0;                 /* next valid node to be used */
1566   PetscFunctionReturn(PETSC_SUCCESS);
1567 }
1568 
PetscLLCondensedAddSorted_fast(PetscInt nidx,const PetscInt indices[],PetscInt lnk[])1569 static inline PetscErrorCode PetscLLCondensedAddSorted_fast(PetscInt nidx, const PetscInt indices[], PetscInt lnk[])
1570 {
1571   for (PetscInt k = 0, prev = 3 /* first value */; k < nidx; k++) {
1572     const PetscInt entry = indices[k];
1573     PetscInt       next  = lnk[prev + 2];
1574 
1575     /* search for insertion location */
1576     while (entry >= lnk[next]) {
1577       prev = next;
1578       next = lnk[next + 2];
1579     }
1580     /* entry is in range of previous list */
1581     if (entry < lnk[prev] + lnk[prev + 1]) continue;
1582     lnk[1]++;
1583     /* entry is right after previous list */
1584     if (entry == lnk[prev] + lnk[prev + 1]) {
1585       lnk[prev + 1]++;
1586       if (lnk[next] == entry + 1) { /* combine two contiguous strings */
1587         lnk[prev + 1] += lnk[next + 1];
1588         lnk[prev + 2] = lnk[next + 2];
1589         next          = lnk[next + 2];
1590         lnk[0]--;
1591       }
1592       continue;
1593     }
1594     /* entry is right before next list */
1595     if (entry == lnk[next] - 1) {
1596       lnk[next]--;
1597       lnk[next + 1]++;
1598       prev = next;
1599       next = lnk[prev + 2];
1600       continue;
1601     }
1602     /*  add entry into lnk */
1603     lnk[prev + 2] = 3 * ((lnk[8]++) + 3); /* connect previous node to the new node */
1604     prev          = lnk[prev + 2];
1605     lnk[prev]     = entry; /* set value of the new node */
1606     lnk[prev + 1] = 1;     /* number of values in contiguous string is one to start */
1607     lnk[prev + 2] = next;  /* connect new node to next node */
1608     lnk[0]++;
1609   }
1610   return PETSC_SUCCESS;
1611 }
1612 
PetscLLCondensedClean_fast(PETSC_UNUSED PetscInt nidx,PetscInt * indices,PetscInt * lnk)1613 static inline PetscErrorCode PetscLLCondensedClean_fast(PETSC_UNUSED PetscInt nidx, PetscInt *indices, PetscInt *lnk)
1614 {
1615   const PetscInt nlnk = lnk[0];
1616   PetscInt       next = lnk[5]; /* first node */
1617 
1618   for (PetscInt k = 0, cnt = 0; k < nlnk; k++) {
1619     for (PetscInt j = 0; j < lnk[next + 1]; j++) indices[cnt++] = lnk[next] + j;
1620     next = lnk[next + 2];
1621   }
1622   lnk[0] = 0;                 /* nlnk: number of links */
1623   lnk[1] = 0;                 /* number of integer entries represented in list */
1624   lnk[3] = PETSC_INT_MIN + 1; /* value in the first node */
1625   lnk[4] = 1;                 /* count for the first node */
1626   lnk[5] = 6;                 /* next for the first node */
1627   lnk[6] = PETSC_INT_MAX - 1; /* value in the last node */
1628   lnk[7] = 1;                 /* count for the last node */
1629   lnk[8] = 0;                 /* next valid location to make link */
1630   return PETSC_SUCCESS;
1631 }
1632 
PetscLLCondensedView_fast(const PetscInt * lnk)1633 static inline PetscErrorCode PetscLLCondensedView_fast(const PetscInt *lnk)
1634 {
1635   const PetscInt nlnk = lnk[0];
1636   PetscInt       next = lnk[5]; /* first node */
1637 
1638   for (PetscInt k = 0; k < nlnk; k++) {
1639 #if 0 /* Debugging code */
1640     printf("%d value %d len %d next %d\n", next, lnk[next], lnk[next + 1], lnk[next + 2]);
1641 #endif
1642     next = lnk[next + 2];
1643   }
1644   return PETSC_SUCCESS;
1645 }
1646 
PetscLLCondensedDestroy_fast(PetscInt * lnk)1647 static inline PetscErrorCode PetscLLCondensedDestroy_fast(PetscInt *lnk)
1648 {
1649   return PetscFree(lnk);
1650 }
1651 
1652 PETSC_EXTERN PetscErrorCode PetscCDCreate(PetscInt, PetscCoarsenData **);
1653 PETSC_EXTERN PetscErrorCode PetscCDDestroy(PetscCoarsenData *);
1654 PETSC_EXTERN PetscErrorCode PetscCDIntNdSetID(PetscCDIntNd *, PetscInt);
1655 PETSC_EXTERN PetscErrorCode PetscCDIntNdGetID(const PetscCDIntNd *, PetscInt *);
1656 PETSC_EXTERN PetscErrorCode PetscCDAppendID(PetscCoarsenData *, PetscInt, PetscInt);
1657 PETSC_EXTERN PetscErrorCode PetscCDMoveAppend(PetscCoarsenData *, PetscInt, PetscInt);
1658 PETSC_EXTERN PetscErrorCode PetscCDAppendNode(PetscCoarsenData *, PetscInt, PetscCDIntNd *);
1659 PETSC_EXTERN PetscErrorCode PetscCDRemoveNextNode(PetscCoarsenData *, PetscInt, PetscCDIntNd *);
1660 PETSC_EXTERN PetscErrorCode PetscCDCountAt(const PetscCoarsenData *, PetscInt, PetscInt *);
1661 PETSC_EXTERN PetscErrorCode PetscCDIsEmptyAt(const PetscCoarsenData *, PetscInt, PetscBool *);
1662 PETSC_EXTERN PetscErrorCode PetscCDSetChunkSize(PetscCoarsenData *, PetscInt);
1663 PETSC_EXTERN PetscErrorCode PetscCDPrint(const PetscCoarsenData *, PetscInt, MPI_Comm);
1664 PETSC_EXTERN PetscErrorCode PetscCDGetNonemptyIS(PetscCoarsenData *, IS *);
1665 PETSC_EXTERN PetscErrorCode PetscCDGetMat(PetscCoarsenData *, Mat *);
1666 PETSC_EXTERN PetscErrorCode PetscCDSetMat(PetscCoarsenData *, Mat);
1667 PETSC_EXTERN PetscErrorCode PetscCDClearMat(PetscCoarsenData *);
1668 PETSC_EXTERN PetscErrorCode PetscCDRemoveAllAt(PetscCoarsenData *, PetscInt);
1669 PETSC_EXTERN PetscErrorCode PetscCDCount(const PetscCoarsenData *, PetscInt *_sz);
1670 
1671 PETSC_EXTERN PetscErrorCode PetscCDGetHeadPos(const PetscCoarsenData *, PetscInt, PetscCDIntNd **);
1672 PETSC_EXTERN PetscErrorCode PetscCDGetNextPos(const PetscCoarsenData *, PetscInt, PetscCDIntNd **);
1673 PETSC_EXTERN PetscErrorCode PetscCDGetASMBlocks(const PetscCoarsenData *, const PetscInt, PetscInt *, IS **);
1674 
1675 PETSC_SINGLE_LIBRARY_VISIBILITY_INTERNAL PetscErrorCode MatFDColoringApply_AIJ(Mat, MatFDColoring, Vec, void *);
1676 
1677 PETSC_EXTERN PetscLogEvent MAT_Mult;
1678 PETSC_EXTERN PetscLogEvent MAT_MultAdd;
1679 PETSC_EXTERN PetscLogEvent MAT_MultTranspose;
1680 PETSC_EXTERN PetscLogEvent MAT_MultHermitianTranspose;
1681 PETSC_EXTERN PetscLogEvent MAT_MultTransposeAdd;
1682 PETSC_EXTERN PetscLogEvent MAT_MultHermitianTransposeAdd;
1683 PETSC_EXTERN PetscLogEvent MAT_Solve;
1684 PETSC_EXTERN PetscLogEvent MAT_Solves;
1685 PETSC_EXTERN PetscLogEvent MAT_SolveAdd;
1686 PETSC_EXTERN PetscLogEvent MAT_SolveTranspose;
1687 PETSC_EXTERN PetscLogEvent MAT_SolveTransposeAdd;
1688 PETSC_EXTERN PetscLogEvent MAT_SOR;
1689 PETSC_EXTERN PetscLogEvent MAT_ForwardSolve;
1690 PETSC_EXTERN PetscLogEvent MAT_BackwardSolve;
1691 PETSC_EXTERN PetscLogEvent MAT_LUFactor;
1692 PETSC_EXTERN PetscLogEvent MAT_LUFactorSymbolic;
1693 PETSC_EXTERN PetscLogEvent MAT_LUFactorNumeric;
1694 PETSC_EXTERN PetscLogEvent MAT_QRFactor;
1695 PETSC_EXTERN PetscLogEvent MAT_QRFactorSymbolic;
1696 PETSC_EXTERN PetscLogEvent MAT_QRFactorNumeric;
1697 PETSC_EXTERN PetscLogEvent MAT_CholeskyFactor;
1698 PETSC_EXTERN PetscLogEvent MAT_CholeskyFactorSymbolic;
1699 PETSC_EXTERN PetscLogEvent MAT_CholeskyFactorNumeric;
1700 PETSC_EXTERN PetscLogEvent MAT_ILUFactor;
1701 PETSC_EXTERN PetscLogEvent MAT_ILUFactorSymbolic;
1702 PETSC_EXTERN PetscLogEvent MAT_ICCFactorSymbolic;
1703 PETSC_EXTERN PetscLogEvent MAT_Copy;
1704 PETSC_EXTERN PetscLogEvent MAT_Convert;
1705 PETSC_EXTERN PetscLogEvent MAT_Scale;
1706 PETSC_EXTERN PetscLogEvent MAT_AssemblyBegin;
1707 PETSC_EXTERN PetscLogEvent MAT_AssemblyEnd;
1708 PETSC_EXTERN PetscLogEvent MAT_SetValues;
1709 PETSC_EXTERN PetscLogEvent MAT_GetValues;
1710 PETSC_EXTERN PetscLogEvent MAT_GetRow;
1711 PETSC_EXTERN PetscLogEvent MAT_GetRowIJ;
1712 PETSC_EXTERN PetscLogEvent MAT_CreateSubMats;
1713 PETSC_EXTERN PetscLogEvent MAT_GetOrdering;
1714 PETSC_EXTERN PetscLogEvent MAT_RedundantMat;
1715 PETSC_EXTERN PetscLogEvent MAT_IncreaseOverlap;
1716 PETSC_EXTERN PetscLogEvent MAT_Partitioning;
1717 PETSC_EXTERN PetscLogEvent MAT_PartitioningND;
1718 PETSC_EXTERN PetscLogEvent MAT_Coarsen;
1719 PETSC_EXTERN PetscLogEvent MAT_ZeroEntries;
1720 PETSC_EXTERN PetscLogEvent MAT_Load;
1721 PETSC_EXTERN PetscLogEvent MAT_View;
1722 PETSC_EXTERN PetscLogEvent MAT_AXPY;
1723 PETSC_EXTERN PetscLogEvent MAT_FDColoringCreate;
1724 PETSC_EXTERN PetscLogEvent MAT_TransposeColoringCreate;
1725 PETSC_EXTERN PetscLogEvent MAT_FDColoringSetUp;
1726 PETSC_EXTERN PetscLogEvent MAT_FDColoringApply;
1727 PETSC_EXTERN PetscLogEvent MAT_Transpose;
1728 PETSC_EXTERN PetscLogEvent MAT_FDColoringFunction;
1729 PETSC_EXTERN PetscLogEvent MAT_CreateSubMat;
1730 PETSC_EXTERN PetscLogEvent MAT_MatSolve;
1731 PETSC_EXTERN PetscLogEvent MAT_MatTrSolve;
1732 PETSC_EXTERN PetscLogEvent MAT_MatMultSymbolic;
1733 PETSC_EXTERN PetscLogEvent MAT_MatMultNumeric;
1734 PETSC_EXTERN PetscLogEvent MAT_Getlocalmatcondensed;
1735 PETSC_EXTERN PetscLogEvent MAT_GetBrowsOfAcols;
1736 PETSC_EXTERN PetscLogEvent MAT_GetBrowsOfAocols;
1737 PETSC_EXTERN PetscLogEvent MAT_PtAPSymbolic;
1738 PETSC_EXTERN PetscLogEvent MAT_PtAPNumeric;
1739 PETSC_EXTERN PetscLogEvent MAT_Seqstompinum;
1740 PETSC_EXTERN PetscLogEvent MAT_Seqstompisym;
1741 PETSC_EXTERN PetscLogEvent MAT_Seqstompi;
1742 PETSC_EXTERN PetscLogEvent MAT_Getlocalmat;
1743 PETSC_EXTERN PetscLogEvent MAT_RARtSymbolic;
1744 PETSC_EXTERN PetscLogEvent MAT_RARtNumeric;
1745 PETSC_EXTERN PetscLogEvent MAT_MatTransposeMultSymbolic;
1746 PETSC_EXTERN PetscLogEvent MAT_MatTransposeMultNumeric;
1747 PETSC_EXTERN PetscLogEvent MAT_TransposeMatMultSymbolic;
1748 PETSC_EXTERN PetscLogEvent MAT_TransposeMatMultNumeric;
1749 PETSC_EXTERN PetscLogEvent MAT_MatMatMultSymbolic;
1750 PETSC_EXTERN PetscLogEvent MAT_MatMatMultNumeric;
1751 PETSC_EXTERN PetscLogEvent MAT_Getsymtransreduced;
1752 PETSC_EXTERN PetscLogEvent MAT_GetSeqNonzeroStructure;
1753 PETSC_EXTERN PetscLogEvent MATMFFD_Mult;
1754 PETSC_EXTERN PetscLogEvent MAT_GetMultiProcBlock;
1755 PETSC_EXTERN PetscLogEvent MAT_CUSPARSECopyToGPU;
1756 PETSC_EXTERN PetscLogEvent MAT_CUSPARSECopyFromGPU;
1757 PETSC_EXTERN PetscLogEvent MAT_CUSPARSEGenerateTranspose;
1758 PETSC_EXTERN PetscLogEvent MAT_CUSPARSESolveAnalysis;
1759 PETSC_EXTERN PetscLogEvent MAT_HIPSPARSECopyToGPU;
1760 PETSC_EXTERN PetscLogEvent MAT_HIPSPARSECopyFromGPU;
1761 PETSC_EXTERN PetscLogEvent MAT_HIPSPARSEGenerateTranspose;
1762 PETSC_EXTERN PetscLogEvent MAT_HIPSPARSESolveAnalysis;
1763 PETSC_EXTERN PetscLogEvent MAT_SetValuesBatch;
1764 PETSC_EXTERN PetscLogEvent MAT_CreateGraph;
1765 PETSC_EXTERN PetscLogEvent MAT_ViennaCLCopyToGPU;
1766 PETSC_EXTERN PetscLogEvent MAT_DenseCopyToGPU;
1767 PETSC_EXTERN PetscLogEvent MAT_DenseCopyFromGPU;
1768 PETSC_EXTERN PetscLogEvent MAT_Merge;
1769 PETSC_EXTERN PetscLogEvent MAT_Residual;
1770 PETSC_EXTERN PetscLogEvent MAT_SetRandom;
1771 PETSC_EXTERN PetscLogEvent MAT_FactorFactS;
1772 PETSC_EXTERN PetscLogEvent MAT_FactorInvS;
1773 PETSC_EXTERN PetscLogEvent MAT_PreallCOO;
1774 PETSC_EXTERN PetscLogEvent MAT_SetVCOO;
1775 PETSC_EXTERN PetscLogEvent MATCOLORING_Apply;
1776 PETSC_EXTERN PetscLogEvent MATCOLORING_Comm;
1777 PETSC_EXTERN PetscLogEvent MATCOLORING_Local;
1778 PETSC_EXTERN PetscLogEvent MATCOLORING_ISCreate;
1779 PETSC_EXTERN PetscLogEvent MATCOLORING_SetUp;
1780 PETSC_EXTERN PetscLogEvent MATCOLORING_Weights;
1781 PETSC_EXTERN PetscLogEvent MAT_H2Opus_Build;
1782 PETSC_EXTERN PetscLogEvent MAT_H2Opus_Compress;
1783 PETSC_EXTERN PetscLogEvent MAT_H2Opus_Orthog;
1784 PETSC_EXTERN PetscLogEvent MAT_H2Opus_LR;
1785 PETSC_EXTERN PetscLogEvent MAT_CUDACopyToGPU;
1786 PETSC_EXTERN PetscLogEvent MAT_HIPCopyToGPU;
1787 
1788 #if defined(PETSC_CLANG_STATIC_ANALYZER)
1789   #define MatGetDiagonalMarkers(SeqXXX, bs)
1790 #else
1791   /*
1792    Adds diagonal pointers to sparse matrix nonzero structure and determines if all diagonal entries are present
1793 
1794    Rechecks the matrix data structure automatically if the nonzero structure of the matrix changed since the last call
1795 
1796    Potential optimization: since the a->j[j] are sorted this could use bisection to find the diagonal
1797 
1798    Developer Note:
1799    Uses the C preprocessor as a template mechanism to produce MatGetDiagonal_Seq[SB]AIJ() to avoid duplicate code
1800 */
1801   #define MatGetDiagonalMarkers(SeqXXX, bs) \
1802     PetscErrorCode MatGetDiagonalMarkers_##SeqXXX(Mat A, const PetscInt **diag, PetscBool *diagDense) \
1803     { \
1804       Mat_##SeqXXX *a = (Mat_##SeqXXX *)A->data; \
1805 \
1806       PetscFunctionBegin; \
1807       if (A->factortype != MAT_FACTOR_NONE) { \
1808         if (diagDense) *diagDense = PETSC_TRUE; \
1809         if (diag) *diag = a->diag; \
1810         PetscFunctionReturn(PETSC_SUCCESS); \
1811       } \
1812       PetscCheck(diag || diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "At least one of diag or diagDense must be requested"); \
1813       if (a->diagNonzeroState != A->nonzerostate || (diag && !a->diag)) { \
1814         const PetscInt m = A->rmap->n / bs; \
1815 \
1816         if (!diag && !a->diag) { \
1817           a->diagDense = PETSC_TRUE; \
1818           for (PetscInt i = 0; i < m; i++) { \
1819             PetscBool found = PETSC_FALSE; \
1820 \
1821             for (PetscInt j = a->i[i]; j < a->i[i + 1]; j++) { \
1822               if (a->j[j] == i) { \
1823                 found = PETSC_TRUE; \
1824                 break; \
1825               } \
1826             } \
1827             if (!found) { \
1828               a->diagDense        = PETSC_FALSE; \
1829               *diagDense          = a->diagDense; \
1830               a->diagNonzeroState = A->nonzerostate; \
1831               PetscFunctionReturn(PETSC_SUCCESS); \
1832             } \
1833           } \
1834         } else { \
1835           if (!a->diag) PetscCall(PetscMalloc1(m, &a->diag)); \
1836           a->diagDense = PETSC_TRUE; \
1837           for (PetscInt i = 0; i < m; i++) { \
1838             PetscBool found = PETSC_FALSE; \
1839 \
1840             a->diag[i] = a->i[i + 1]; \
1841             for (PetscInt j = a->i[i]; j < a->i[i + 1]; j++) { \
1842               if (a->j[j] == i) { \
1843                 a->diag[i] = j; \
1844                 found      = PETSC_TRUE; \
1845                 break; \
1846               } \
1847             } \
1848             if (!found) a->diagDense = PETSC_FALSE; \
1849           } \
1850         } \
1851         a->diagNonzeroState = A->nonzerostate; \
1852       } \
1853       if (diag) *diag = a->diag; \
1854       if (diagDense) *diagDense = a->diagDense; \
1855       PetscFunctionReturn(PETSC_SUCCESS); \
1856     }
1857 #endif
1858