1 #pragma once
2
3 #include <petscmat.h>
4 #include <petscmatcoarsen.h>
5 #include <petsc/private/petscimpl.h>
6
7 PETSC_EXTERN PetscBool MatRegisterAllCalled;
8 PETSC_EXTERN PetscBool MatSeqAIJRegisterAllCalled;
9 PETSC_EXTERN PetscBool MatOrderingRegisterAllCalled;
10 PETSC_EXTERN PetscBool MatColoringRegisterAllCalled;
11 PETSC_EXTERN PetscBool MatPartitioningRegisterAllCalled;
12 PETSC_EXTERN PetscBool MatCoarsenRegisterAllCalled;
13 PETSC_EXTERN PetscErrorCode MatRegisterAll(void);
14 PETSC_EXTERN PetscErrorCode MatOrderingRegisterAll(void);
15 PETSC_EXTERN PetscErrorCode MatColoringRegisterAll(void);
16 PETSC_EXTERN PetscErrorCode MatPartitioningRegisterAll(void);
17 PETSC_EXTERN PetscErrorCode MatCoarsenRegisterAll(void);
18 PETSC_EXTERN PetscErrorCode MatSeqAIJRegisterAll(void);
19
20 /* Gets the root type of the input matrix's type (e.g., MATAIJ for MATSEQAIJ) */
21 PETSC_EXTERN PetscErrorCode MatGetRootType_Private(Mat, MatType *);
22
23 /* Gets the MPI type corresponding to the input matrix's type (e.g., MATMPIAIJ for MATSEQAIJ) */
24 PETSC_INTERN PetscErrorCode MatGetMPIMatType_Private(Mat, MatType *);
25
26 /*
27 This file defines the parts of the matrix data structure that are
28 shared by all matrix types.
29 */
30
31 /*
32 If you add entries here also add them to the MATOP enum
33 in include/petscmat.h
34 */
35 typedef struct _MatOps *MatOps;
36 struct _MatOps {
37 /* 0*/
38 PetscErrorCode (*setvalues)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
39 PetscErrorCode (*getrow)(Mat, PetscInt, PetscInt *, PetscInt *[], PetscScalar *[]);
40 PetscErrorCode (*restorerow)(Mat, PetscInt, PetscInt *, PetscInt *[], PetscScalar *[]);
41 PetscErrorCode (*mult)(Mat, Vec, Vec);
42 PetscErrorCode (*multadd)(Mat, Vec, Vec, Vec);
43 /* 5*/
44 PetscErrorCode (*multtranspose)(Mat, Vec, Vec);
45 PetscErrorCode (*multtransposeadd)(Mat, Vec, Vec, Vec);
46 PetscErrorCode (*solve)(Mat, Vec, Vec);
47 PetscErrorCode (*solveadd)(Mat, Vec, Vec, Vec);
48 PetscErrorCode (*solvetranspose)(Mat, Vec, Vec);
49 /*10*/
50 PetscErrorCode (*solvetransposeadd)(Mat, Vec, Vec, Vec);
51 PetscErrorCode (*lufactor)(Mat, IS, IS, const MatFactorInfo *);
52 PetscErrorCode (*choleskyfactor)(Mat, IS, const MatFactorInfo *);
53 PetscErrorCode (*sor)(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec);
54 PetscErrorCode (*transpose)(Mat, MatReuse, Mat *);
55 /*15*/
56 PetscErrorCode (*getinfo)(Mat, MatInfoType, MatInfo *);
57 PetscErrorCode (*equal)(Mat, Mat, PetscBool *);
58 PetscErrorCode (*getdiagonal)(Mat, Vec);
59 PetscErrorCode (*diagonalscale)(Mat, Vec, Vec);
60 PetscErrorCode (*norm)(Mat, NormType, PetscReal *);
61 /*20*/
62 PetscErrorCode (*assemblybegin)(Mat, MatAssemblyType);
63 PetscErrorCode (*assemblyend)(Mat, MatAssemblyType);
64 PetscErrorCode (*setoption)(Mat, MatOption, PetscBool);
65 PetscErrorCode (*zeroentries)(Mat);
66 /*24*/
67 PetscErrorCode (*zerorows)(Mat, PetscInt, const PetscInt[], PetscScalar, Vec, Vec);
68 PetscErrorCode (*lufactorsymbolic)(Mat, Mat, IS, IS, const MatFactorInfo *);
69 PetscErrorCode (*lufactornumeric)(Mat, Mat, const MatFactorInfo *);
70 PetscErrorCode (*choleskyfactorsymbolic)(Mat, Mat, IS, const MatFactorInfo *);
71 PetscErrorCode (*choleskyfactornumeric)(Mat, Mat, const MatFactorInfo *);
72 /*29*/
73 PetscErrorCode (*setup)(Mat);
74 PetscErrorCode (*ilufactorsymbolic)(Mat, Mat, IS, IS, const MatFactorInfo *);
75 PetscErrorCode (*iccfactorsymbolic)(Mat, Mat, IS, const MatFactorInfo *);
76 PetscErrorCode (*getdiagonalblock)(Mat, Mat *);
77 PetscErrorCode (*setinf)(Mat);
78 /*34*/
79 PetscErrorCode (*duplicate)(Mat, MatDuplicateOption, Mat *);
80 PetscErrorCode (*forwardsolve)(Mat, Vec, Vec);
81 PetscErrorCode (*backwardsolve)(Mat, Vec, Vec);
82 PetscErrorCode (*ilufactor)(Mat, IS, IS, const MatFactorInfo *);
83 PetscErrorCode (*iccfactor)(Mat, IS, const MatFactorInfo *);
84 /*39*/
85 PetscErrorCode (*axpy)(Mat, PetscScalar, Mat, MatStructure);
86 PetscErrorCode (*createsubmatrices)(Mat, PetscInt, const IS[], const IS[], MatReuse, Mat *[]);
87 PetscErrorCode (*increaseoverlap)(Mat, PetscInt, IS[], PetscInt);
88 PetscErrorCode (*getvalues)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], PetscScalar[]);
89 PetscErrorCode (*copy)(Mat, Mat, MatStructure);
90 /*44*/
91 PetscErrorCode (*getrowmax)(Mat, Vec, PetscInt[]);
92 PetscErrorCode (*scale)(Mat, PetscScalar);
93 PetscErrorCode (*shift)(Mat, PetscScalar);
94 PetscErrorCode (*diagonalset)(Mat, Vec, InsertMode);
95 PetscErrorCode (*zerorowscolumns)(Mat, PetscInt, const PetscInt[], PetscScalar, Vec, Vec);
96 /*49*/
97 PetscErrorCode (*setrandom)(Mat, PetscRandom);
98 PetscErrorCode (*getrowij)(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscBool *);
99 PetscErrorCode (*restorerowij)(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscBool *);
100 PetscErrorCode (*getcolumnij)(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscBool *);
101 PetscErrorCode (*restorecolumnij)(Mat, PetscInt, PetscBool, PetscBool, PetscInt *, const PetscInt *[], const PetscInt *[], PetscBool *);
102 /*54*/
103 PetscErrorCode (*fdcoloringcreate)(Mat, ISColoring, MatFDColoring);
104 PetscErrorCode (*coloringpatch)(Mat, PetscInt, PetscInt, ISColoringValue[], ISColoring *);
105 PetscErrorCode (*setunfactored)(Mat);
106 PetscErrorCode (*permute)(Mat, IS, IS, Mat *);
107 PetscErrorCode (*setvaluesblocked)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
108 /*59*/
109 PetscErrorCode (*createsubmatrix)(Mat, IS, IS, MatReuse, Mat *);
110 PetscErrorCode (*destroy)(Mat);
111 PetscErrorCode (*view)(Mat, PetscViewer);
112 PetscErrorCode (*convertfrom)(Mat, MatType, MatReuse, Mat *);
113 PetscErrorCode (*matmatmultsymbolic)(Mat, Mat, Mat, PetscReal, Mat);
114 /*64*/
115 PetscErrorCode (*matmatmultnumeric)(Mat, Mat, Mat, Mat);
116 PetscErrorCode (*setlocaltoglobalmapping)(Mat, ISLocalToGlobalMapping, ISLocalToGlobalMapping);
117 PetscErrorCode (*setvalueslocal)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
118 PetscErrorCode (*zerorowslocal)(Mat, PetscInt, const PetscInt[], PetscScalar, Vec, Vec);
119 PetscErrorCode (*getrowmaxabs)(Mat, Vec, PetscInt[]);
120 /*69*/
121 PetscErrorCode (*getrowminabs)(Mat, Vec, PetscInt[]);
122 PetscErrorCode (*convert)(Mat, MatType, MatReuse, Mat *);
123 PetscErrorCode (*hasoperation)(Mat, MatOperation, PetscBool *);
124 PetscErrorCode (*fdcoloringapply)(Mat, MatFDColoring, Vec, void *);
125 PetscErrorCode (*setfromoptions)(Mat, PetscOptionItems);
126 /*74*/
127 PetscErrorCode (*findzerodiagonals)(Mat, IS *);
128 PetscErrorCode (*mults)(Mat, Vecs, Vecs);
129 PetscErrorCode (*solves)(Mat, Vecs, Vecs);
130 PetscErrorCode (*getinertia)(Mat, PetscInt *, PetscInt *, PetscInt *);
131 PetscErrorCode (*load)(Mat, PetscViewer);
132 /*79*/
133 PetscErrorCode (*issymmetric)(Mat, PetscReal, PetscBool *);
134 PetscErrorCode (*ishermitian)(Mat, PetscReal, PetscBool *);
135 PetscErrorCode (*isstructurallysymmetric)(Mat, PetscBool *);
136 PetscErrorCode (*setvaluesblockedlocal)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
137 PetscErrorCode (*getvecs)(Mat, Vec *, Vec *);
138 /*84*/
139 PetscErrorCode (*matmultsymbolic)(Mat, Mat, PetscReal, Mat);
140 PetscErrorCode (*matmultnumeric)(Mat, Mat, Mat);
141 PetscErrorCode (*ptapnumeric)(Mat, Mat, Mat); /* double dispatch wrapper routine */
142 PetscErrorCode (*mattransposemultsymbolic)(Mat, Mat, PetscReal, Mat);
143 PetscErrorCode (*mattransposemultnumeric)(Mat, Mat, Mat);
144 /*89*/
145 PetscErrorCode (*bindtocpu)(Mat, PetscBool);
146 PetscErrorCode (*productsetfromoptions)(Mat);
147 PetscErrorCode (*productsymbolic)(Mat);
148 PetscErrorCode (*productnumeric)(Mat);
149 PetscErrorCode (*conjugate)(Mat); /* complex conjugate */
150 /*94*/
151 PetscErrorCode (*viewnative)(Mat, PetscViewer);
152 PetscErrorCode (*setvaluesrow)(Mat, PetscInt, const PetscScalar[]);
153 PetscErrorCode (*realpart)(Mat);
154 PetscErrorCode (*imaginarypart)(Mat);
155 PetscErrorCode (*getrowuppertriangular)(Mat);
156 /*99*/
157 PetscErrorCode (*restorerowuppertriangular)(Mat);
158 PetscErrorCode (*matsolve)(Mat, Mat, Mat);
159 PetscErrorCode (*matsolvetranspose)(Mat, Mat, Mat);
160 PetscErrorCode (*getrowmin)(Mat, Vec, PetscInt[]);
161 PetscErrorCode (*getcolumnvector)(Mat, Vec, PetscInt);
162 /*104*/
163 PetscErrorCode (*getseqnonzerostructure)(Mat, Mat *);
164 PetscErrorCode (*create)(Mat);
165 PetscErrorCode (*getghosts)(Mat, PetscInt *, const PetscInt *[]);
166 PetscErrorCode (*getlocalsubmatrix)(Mat, IS, IS, Mat *);
167 PetscErrorCode (*restorelocalsubmatrix)(Mat, IS, IS, Mat *);
168 /*109*/
169 PetscErrorCode (*multdiagonalblock)(Mat, Vec, Vec);
170 PetscErrorCode (*hermitiantranspose)(Mat, MatReuse, Mat *);
171 PetscErrorCode (*multhermitiantranspose)(Mat, Vec, Vec);
172 PetscErrorCode (*multhermitiantransposeadd)(Mat, Vec, Vec, Vec);
173 PetscErrorCode (*getmultiprocblock)(Mat, MPI_Comm, MatReuse, Mat *);
174 /*114*/
175 PetscErrorCode (*findnonzerorows)(Mat, IS *);
176 PetscErrorCode (*getcolumnreductions)(Mat, PetscInt, PetscReal *);
177 PetscErrorCode (*invertblockdiagonal)(Mat, const PetscScalar **);
178 PetscErrorCode (*invertvariableblockdiagonal)(Mat, PetscInt, const PetscInt *, PetscScalar *);
179 PetscErrorCode (*createsubmatricesmpi)(Mat, PetscInt, const IS[], const IS[], MatReuse, Mat **);
180 /*119*/
181 PetscErrorCode (*setvaluesbatch)(Mat, PetscInt, PetscInt, PetscInt *, const PetscScalar *);
182 PetscErrorCode (*transposematmultsymbolic)(Mat, Mat, PetscReal, Mat);
183 PetscErrorCode (*transposematmultnumeric)(Mat, Mat, Mat);
184 PetscErrorCode (*transposecoloringcreate)(Mat, ISColoring, MatTransposeColoring);
185 PetscErrorCode (*transcoloringapplysptoden)(MatTransposeColoring, Mat, Mat);
186 /*124*/
187 PetscErrorCode (*transcoloringapplydentosp)(MatTransposeColoring, Mat, Mat);
188 PetscErrorCode (*rartnumeric)(Mat, Mat, Mat); /* double dispatch wrapper routine */
189 PetscErrorCode (*setblocksizes)(Mat, PetscInt, PetscInt);
190 PetscErrorCode (*residual)(Mat, Vec, Vec, Vec);
191 PetscErrorCode (*fdcoloringsetup)(Mat, ISColoring, MatFDColoring);
192 /*129*/
193 PetscErrorCode (*findoffblockdiagonalentries)(Mat, IS *);
194 PetscErrorCode (*creatempimatconcatenateseqmat)(MPI_Comm, Mat, PetscInt, MatReuse, Mat *);
195 PetscErrorCode (*destroysubmatrices)(PetscInt, Mat *[]);
196 PetscErrorCode (*mattransposesolve)(Mat, Mat, Mat);
197 PetscErrorCode (*getvalueslocal)(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], PetscScalar[]);
198 /*134*/
199 PetscErrorCode (*creategraph)(Mat, PetscBool, PetscBool, PetscReal, PetscInt, PetscInt[], Mat *);
200 PetscErrorCode (*transposesymbolic)(Mat, Mat *);
201 PetscErrorCode (*eliminatezeros)(Mat, PetscBool);
202 PetscErrorCode (*getrowsumabs)(Mat, Vec);
203 PetscErrorCode (*getfactor)(Mat, MatSolverType, MatFactorType, Mat *);
204 /*139*/
205 PetscErrorCode (*getblockdiagonal)(Mat, Mat *); // NOTE: the caller of get{block, vblock}diagonal owns the returned matrix;
206 PetscErrorCode (*getvblockdiagonal)(Mat, Mat *); // they must destroy it after use
207 PetscErrorCode (*copyhashtoxaij)(Mat, Mat);
208 PetscErrorCode (*getcurrentmemtype)(Mat, PetscMemType *);
209 PetscErrorCode (*zerorowscolumnslocal)(Mat, PetscInt, const PetscInt[], PetscScalar, Vec, Vec);
210 };
211 /*
212 If you add MatOps entries above also add them to the MATOP enum
213 in include/petscmat.h
214 */
215
216 #include <petscsys.h>
217
218 typedef struct _p_MatRootName *MatRootName;
219 struct _p_MatRootName {
220 char *rname, *sname, *mname;
221 MatRootName next;
222 };
223
224 PETSC_EXTERN MatRootName MatRootNameList;
225
226 /*
227 Utility private matrix routines used outside Mat
228 */
229 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode MatFindNonzeroRowsOrCols_Basic(Mat, PetscBool, PetscReal, IS *);
230 PETSC_EXTERN PetscErrorCode MatShellGetScalingShifts(Mat, PetscScalar *, PetscScalar *, Vec *, Vec *, Vec *, Mat *, IS *, IS *);
231
232 #define MAT_SHELL_NOT_ALLOWED (void *)-1
233
234 /*
235 Utility private matrix routines
236 */
237 PETSC_INTERN PetscErrorCode MatConvert_Basic(Mat, MatType, MatReuse, Mat *);
238 PETSC_INTERN PetscErrorCode MatConvert_Shell(Mat, MatType, MatReuse, Mat *);
239 PETSC_INTERN PetscErrorCode MatConvertFrom_Shell(Mat, MatType, MatReuse, Mat *);
240 PETSC_INTERN PetscErrorCode MatShellSetContext_Immutable(Mat, void *);
241 PETSC_INTERN PetscErrorCode MatShellSetContextDestroy_Immutable(Mat, PetscCtxDestroyFn *);
242 PETSC_INTERN PetscErrorCode MatShellSetManageScalingShifts_Immutable(Mat);
243 PETSC_INTERN PetscErrorCode MatCopy_Basic(Mat, Mat, MatStructure);
244 PETSC_INTERN PetscErrorCode MatDiagonalSet_Default(Mat, Vec, InsertMode);
245 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
246 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
247 #endif
248 PETSC_INTERN PetscErrorCode MatSetPreallocationCOO_Basic(Mat, PetscCount, PetscInt[], PetscInt[]);
249 PETSC_INTERN PetscErrorCode MatSetValuesCOO_Basic(Mat, const PetscScalar[], InsertMode);
250
251 /* Scattering of dense matrices with strided PetscSF */
252 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode MatDenseScatter_Private(PetscSF, Mat, Mat, InsertMode, ScatterMode);
253
254 /* This can be moved to the public header after implementing some missing MatProducts */
255 PETSC_INTERN PetscErrorCode MatCreateFromISLocalToGlobalMapping(ISLocalToGlobalMapping, Mat, PetscBool, PetscBool, MatType, Mat *);
256
257 /* these callbacks rely on the old matrix function pointers for
258 matmat operations. They are unsafe, and should be removed.
259 However, the amount of work needed to clean up all the
260 implementations is not negligible */
261 PETSC_INTERN PetscErrorCode MatProductSymbolic_AB(Mat);
262 PETSC_INTERN PetscErrorCode MatProductNumeric_AB(Mat);
263 PETSC_INTERN PetscErrorCode MatProductSymbolic_AtB(Mat);
264 PETSC_INTERN PetscErrorCode MatProductNumeric_AtB(Mat);
265 PETSC_INTERN PetscErrorCode MatProductSymbolic_ABt(Mat);
266 PETSC_INTERN PetscErrorCode MatProductNumeric_ABt(Mat);
267 PETSC_INTERN PetscErrorCode MatProductNumeric_PtAP(Mat);
268 PETSC_INTERN PetscErrorCode MatProductNumeric_RARt(Mat);
269 PETSC_INTERN PetscErrorCode MatProductSymbolic_ABC(Mat);
270 PETSC_INTERN PetscErrorCode MatProductNumeric_ABC(Mat);
271
272 PETSC_INTERN PetscErrorCode MatProductCreate_Private(Mat, Mat, Mat, Mat);
273 /* this callback handles all the different triple products and
274 does not rely on the function pointers; used by cuSPARSE/hipSPARSE and KOKKOS-KERNELS */
275 PETSC_INTERN PetscErrorCode MatProductSymbolic_ABC_Basic(Mat);
276
277 /* CreateGraph is common to AIJ seq and mpi */
278 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat, PetscBool, PetscBool, PetscReal, PetscInt, PetscInt[], Mat *);
279
280 #if defined(PETSC_CLANG_STATIC_ANALYZER)
281 template <typename Tm>
282 extern void MatCheckPreallocated(Tm, int);
283 template <typename Tm>
284 extern void MatCheckProduct(Tm, int);
285 #else /* PETSC_CLANG_STATIC_ANALYZER */
286 #define MatCheckPreallocated(A, arg) \
287 do { \
288 if (!(A)->preallocated) PetscCall(MatSetUp(A)); \
289 } while (0)
290
291 #if defined(PETSC_USE_DEBUG)
292 #define MatCheckProduct(A, arg) \
293 do { \
294 PetscCheck((A)->product, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Argument %d \"%s\" is not a matrix obtained from MatProductCreate()", (arg), #A); \
295 } while (0)
296 #else
297 #define MatCheckProduct(A, arg) \
298 do { \
299 } while (0)
300 #endif
301 #endif /* PETSC_CLANG_STATIC_ANALYZER */
302
303 /*
304 The stash is used to temporarily store inserted matrix values that
305 belong to another processor. During the assembly phase the stashed
306 values are moved to the correct processor and
307 */
308
309 typedef struct _MatStashSpace *PetscMatStashSpace;
310
311 struct _MatStashSpace {
312 PetscMatStashSpace next;
313 PetscScalar *space_head, *val;
314 PetscInt *idx, *idy;
315 PetscInt total_space_size;
316 PetscInt local_used;
317 PetscInt local_remaining;
318 };
319
320 PETSC_EXTERN PetscErrorCode PetscMatStashSpaceGet(PetscInt, PetscInt, PetscMatStashSpace *);
321 PETSC_EXTERN PetscErrorCode PetscMatStashSpaceContiguous(PetscInt, PetscMatStashSpace *, PetscScalar *, PetscInt *, PetscInt *);
322 PETSC_EXTERN PetscErrorCode PetscMatStashSpaceDestroy(PetscMatStashSpace *);
323
324 typedef struct {
325 PetscInt count;
326 } MatStashHeader;
327
328 typedef struct {
329 void *buffer; /* Of type blocktype, dynamically constructed */
330 PetscInt count;
331 char pending;
332 } MatStashFrame;
333
334 typedef struct _MatStash MatStash;
335 struct _MatStash {
336 PetscInt nmax; /* maximum stash size */
337 PetscInt umax; /* user specified max-size */
338 PetscInt oldnmax; /* the nmax value used previously */
339 PetscInt n; /* stash size */
340 PetscInt bs; /* block size of the stash */
341 PetscInt reallocs; /* preserve the no of mallocs invoked */
342 PetscMatStashSpace space_head, space; /* linked list to hold stashed global row/column numbers and matrix values */
343
344 PetscErrorCode (*ScatterBegin)(Mat, MatStash *, PetscInt *);
345 PetscErrorCode (*ScatterGetMesg)(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
346 PetscErrorCode (*ScatterEnd)(MatStash *);
347 PetscErrorCode (*ScatterDestroy)(MatStash *);
348
349 /* The following variables are used for communication */
350 MPI_Comm comm;
351 PetscMPIInt size, rank;
352 PetscMPIInt tag1, tag2;
353 MPI_Request *send_waits; /* array of send requests */
354 MPI_Request *recv_waits; /* array of receive requests */
355 MPI_Status *send_status; /* array of send status */
356 PetscMPIInt nsends, nrecvs; /* numbers of sends and receives */
357 PetscScalar *svalues; /* sending data */
358 PetscInt *sindices;
359 PetscScalar **rvalues; /* receiving data (values) */
360 PetscInt **rindices; /* receiving data (indices) */
361 PetscMPIInt nprocessed; /* number of messages already processed */
362 PetscMPIInt *flg_v; /* indicates what messages have arrived so far and from whom */
363 PetscBool reproduce;
364 PetscMPIInt reproduce_count;
365
366 /* The following variables are used for BTS communication */
367 PetscBool first_assembly_done; /* Is the first time matrix assembly done? */
368 PetscBool use_status; /* Use MPI_Status to determine number of items in each message */
369 PetscMPIInt nsendranks;
370 PetscMPIInt nrecvranks;
371 PetscMPIInt *sendranks;
372 PetscMPIInt *recvranks;
373 MatStashHeader *sendhdr, *recvhdr;
374 MatStashFrame *sendframes; /* pointers to the main messages */
375 MatStashFrame *recvframes;
376 MatStashFrame *recvframe_active;
377 PetscInt recvframe_i; /* index of block within active frame */
378 PetscInt recvframe_count; /* Count actually sent for current frame */
379 PetscMPIInt recvcount; /* Number of receives processed so far */
380 PetscMPIInt *some_indices; /* From last call to MPI_Waitsome */
381 MPI_Status *some_statuses; /* Statuses from last call to MPI_Waitsome */
382 PetscMPIInt some_count; /* Number of requests completed in last call to MPI_Waitsome */
383 PetscMPIInt some_i; /* Index of request currently being processed */
384 MPI_Request *sendreqs;
385 MPI_Request *recvreqs;
386 PetscSegBuffer segsendblocks;
387 PetscSegBuffer segrecvframe;
388 PetscSegBuffer segrecvblocks;
389 MPI_Datatype blocktype;
390 size_t blocktype_size;
391 InsertMode *insertmode; /* Pointer to check mat->insertmode and set upon message arrival in case no local values have been set. */
392 };
393
394 #if !defined(PETSC_HAVE_MPIUNI)
395 PETSC_INTERN PetscErrorCode MatStashScatterDestroy_BTS(MatStash *);
396 #endif
397 PETSC_INTERN PetscErrorCode MatStashCreate_Private(MPI_Comm, PetscInt, MatStash *);
398 PETSC_INTERN PetscErrorCode MatStashDestroy_Private(MatStash *);
399 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Private(MatStash *);
400 PETSC_INTERN PetscErrorCode MatStashSetInitialSize_Private(MatStash *, PetscInt);
401 PETSC_INTERN PetscErrorCode MatStashGetInfo_Private(MatStash *, PetscInt *, PetscInt *);
402 PETSC_INTERN PetscErrorCode MatStashValuesRow_Private(MatStash *, PetscInt, PetscInt, const PetscInt[], const PetscScalar[], PetscBool);
403 PETSC_INTERN PetscErrorCode MatStashValuesCol_Private(MatStash *, PetscInt, PetscInt, const PetscInt[], const PetscScalar[], PetscInt, PetscBool);
404 PETSC_INTERN PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *, PetscInt, PetscInt, const PetscInt[], const PetscScalar[], PetscInt, PetscInt, PetscInt);
405 PETSC_INTERN PetscErrorCode MatStashValuesColBlocked_Private(MatStash *, PetscInt, PetscInt, const PetscInt[], const PetscScalar[], PetscInt, PetscInt, PetscInt);
406 PETSC_INTERN PetscErrorCode MatStashScatterBegin_Private(Mat, MatStash *, PetscInt *);
407 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Private(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
408 PETSC_INTERN PetscErrorCode MatGetInfo_External(Mat, MatInfoType, MatInfo *);
409
410 typedef struct {
411 PetscInt dim;
412 PetscInt dims[4];
413 PetscInt starts[4];
414 PetscBool noc; /* this is a single component problem, hence user will not set MatStencil.c */
415 } MatStencilInfo;
416
417 /* Info about using compressed row format */
418 typedef struct {
419 PetscBool use; /* indicates compressed rows have been checked and will be used */
420 PetscInt nrows; /* number of non-zero rows */
421 PetscInt *i; /* compressed row pointer */
422 PetscInt *rindex; /* compressed row index */
423 } Mat_CompressedRow;
424 PETSC_EXTERN PetscErrorCode MatCheckCompressedRow(Mat, PetscInt, Mat_CompressedRow *, PetscInt *, PetscInt, PetscReal);
425
426 typedef struct { /* used by MatCreateRedundantMatrix() for reusing matredundant */
427 PetscInt nzlocal, nsends, nrecvs;
428 PetscMPIInt *send_rank, *recv_rank;
429 PetscInt *sbuf_nz, *rbuf_nz, *sbuf_j, **rbuf_j;
430 PetscScalar *sbuf_a, **rbuf_a;
431 MPI_Comm subcomm; /* when user does not provide a subcomm */
432 IS isrow, iscol;
433 Mat *matseq;
434 } Mat_Redundant;
435
436 typedef struct { /* used by MatProduct() */
437 MatProductType type;
438 char *alg;
439 Mat A, B, C, Dwork;
440 PetscBool symbolic_used_the_fact_A_is_symmetric; /* Symbolic phase took advantage of the fact that A is symmetric, and optimized e.g. AtB as AB. Then, .. */
441 PetscBool symbolic_used_the_fact_B_is_symmetric; /* .. in the numeric phase, if a new A is not symmetric (but has the same sparsity as the old A therefore .. */
442 PetscBool symbolic_used_the_fact_C_is_symmetric; /* MatMatMult(A,B,MAT_REUSE_MATRIX,..&C) is still legitimate), we need to redo symbolic! */
443 PetscObjectParameterDeclare(PetscReal, fill);
444 PetscBool api_user; /* used to distinguish command line options and to indicate the matrix values are ready to be consumed at symbolic phase if needed */
445 PetscBool setfromoptionscalled;
446
447 /* Some products may display the information on the algorithm used */
448 PetscErrorCode (*view)(Mat, PetscViewer);
449
450 /* many products have intermediate data structures, each specific to Mat types and product type */
451 PetscBool clear; /* whether or not to clear the data structures after MatProductNumeric has been called */
452 void *data; /* where to stash those structures */
453 PetscCtxDestroyFn *destroy; /* freeing data */
454 } Mat_Product;
455
456 struct _p_Mat {
457 PETSCHEADER(struct _MatOps);
458 PetscLayout rmap, cmap;
459 void *data; /* implementation-specific data */
460 MatFactorType factortype; /* MAT_FACTOR_LU, ILU, CHOLESKY or ICC */
461 PetscBool trivialsymbolic; /* indicates the symbolic factorization doesn't actually do a symbolic factorization, it is delayed to the numeric factorization */
462 PetscBool canuseordering; /* factorization can use ordering provide to routine (most PETSc implementations) */
463 MatOrderingType preferredordering[MAT_FACTOR_NUM_TYPES]; /* what is the preferred (or default) ordering for the matrix solver type */
464 PetscBool assembled; /* is the matrix assembled? */
465 PetscBool was_assembled; /* new values inserted into assembled mat */
466 PetscInt num_ass; /* number of times matrix has been assembled */
467 PetscObjectState nonzerostate; /* each time new nonzeros locations are introduced into the matrix this is updated */
468 PetscObjectState ass_nonzerostate; /* nonzero state at last assembly */
469 MatInfo info; /* matrix information */
470 InsertMode insertmode; /* have values been inserted in matrix or added? */
471 MatStash stash, bstash; /* used for assembling off-proc mat emements */
472 MatNullSpace nullsp; /* null space (operator is singular) */
473 MatNullSpace transnullsp; /* null space of transpose of operator */
474 MatNullSpace nearnullsp; /* near null space to be used by multigrid methods */
475 PetscInt congruentlayouts; /* are the rows and columns layouts congruent? */
476 PetscBool preallocated;
477 MatStencilInfo stencil; /* information for structured grid */
478 PetscBool3 symmetric, hermitian, structurally_symmetric, spd;
479 PetscBool symmetry_eternal, structural_symmetry_eternal, spd_eternal;
480 PetscBool nooffprocentries, nooffproczerorows;
481 PetscBool assembly_subset; /* set by MAT_SUBSET_OFF_PROC_ENTRIES */
482 PetscBool submat_singleis; /* for efficient PCSetUp_ASM() */
483 PetscBool structure_only;
484 PetscBool sortedfull; /* full, sorted rows are inserted */
485 PetscBool force_diagonals; /* set by MAT_FORCE_DIAGONAL_ENTRIES */
486 #if defined(PETSC_HAVE_DEVICE)
487 PetscOffloadMask offloadmask; /* a mask which indicates where the valid matrix data is (GPU, CPU or both) */
488 PetscBool boundtocpu;
489 PetscBool bindingpropagates;
490 #endif
491 char *defaultrandtype;
492 void *spptr; /* pointer for special library like SuperLU */
493 char *solvertype;
494 PetscBool checksymmetryonassembly, checknullspaceonassembly;
495 PetscReal checksymmetrytol;
496 Mat schur; /* Schur complement matrix */
497 MatFactorSchurStatus schur_status; /* status of the Schur complement matrix */
498 Mat_Redundant *redundant; /* used by MatCreateRedundantMatrix() */
499 PetscBool erroriffailure; /* Generate an error if detected (for example a zero pivot) instead of returning */
500 MatFactorError factorerrortype; /* type of error in factorization */
501 PetscReal factorerror_zeropivot_value; /* If numerical zero pivot was detected this is the computed value */
502 PetscInt factorerror_zeropivot_row; /* Row where zero pivot was detected */
503 PetscInt nblocks, *bsizes; /* support for MatSetVariableBlockSizes() */
504 PetscInt p_cstart, p_rank, p_cend, n_rank; /* Information from parallel MatComputeVariableBlockEnvelope() */
505 PetscBool p_parallel;
506 char *defaultvectype;
507 Mat_Product *product;
508 PetscBool form_explicit_transpose; /* hint to generate an explicit mat tranpsose for operations like MatMultTranspose() */
509 PetscBool transupdated; /* whether or not the explicitly generated transpose is up-to-date */
510 char *factorprefix; /* the prefix to use with factored matrix that is created */
511 PetscBool hash_active; /* indicates MatSetValues() is being handled by hashing */
512 };
513
514 PETSC_INTERN PetscErrorCode MatAXPY_Basic(Mat, PetscScalar, Mat, MatStructure);
515 PETSC_INTERN PetscErrorCode MatAXPY_BasicWithPreallocation(Mat, Mat, PetscScalar, Mat, MatStructure);
516 PETSC_INTERN PetscErrorCode MatAXPY_Basic_Preallocate(Mat, Mat, Mat *);
517 PETSC_INTERN PetscErrorCode MatAXPY_Dense_Nest(Mat, PetscScalar, Mat);
518
519 PETSC_INTERN PetscErrorCode MatSetUp_Default(Mat);
520
521 /*
522 Utility for MatZeroRows
523 */
524 PETSC_INTERN PetscErrorCode MatZeroRowsMapLocal_Private(Mat, PetscInt, const PetscInt *, PetscInt *, PetscInt **);
525
526 /*
527 Utility for MatView/MatLoad
528 */
529 PETSC_INTERN PetscErrorCode MatView_Binary_BlockSizes(Mat, PetscViewer);
530 PETSC_INTERN PetscErrorCode MatLoad_Binary_BlockSizes(Mat, PetscViewer);
531
532 /*
533 Object for partitioning graphs
534 */
535
536 typedef struct _MatPartitioningOps *MatPartitioningOps;
537 struct _MatPartitioningOps {
538 PetscErrorCode (*apply)(MatPartitioning, IS *);
539 PetscErrorCode (*applynd)(MatPartitioning, IS *);
540 PetscErrorCode (*setfromoptions)(MatPartitioning, PetscOptionItems);
541 PetscErrorCode (*destroy)(MatPartitioning);
542 PetscErrorCode (*view)(MatPartitioning, PetscViewer);
543 PetscErrorCode (*improve)(MatPartitioning, IS *);
544 };
545
546 struct _p_MatPartitioning {
547 PETSCHEADER(struct _MatPartitioningOps);
548 Mat adj;
549 PetscInt *vertex_weights;
550 PetscReal *part_weights;
551 PetscInt n; /* number of partitions */
552 PetscInt ncon; /* number of vertex weights per vertex */
553 void *data;
554 PetscBool use_edge_weights; /* A flag indicates whether or not to use edge weights */
555 };
556
557 /* needed for parallel nested dissection by ParMetis and PTSCOTCH */
558 PETSC_INTERN PetscErrorCode MatPartitioningSizesToSep_Private(PetscInt, PetscInt[], PetscInt[], PetscInt[]);
559
560 /*
561 Object for coarsen graphs
562 */
563 typedef struct _MatCoarsenOps *MatCoarsenOps;
564 struct _MatCoarsenOps {
565 PetscErrorCode (*apply)(MatCoarsen);
566 PetscErrorCode (*setfromoptions)(MatCoarsen, PetscOptionItems);
567 PetscErrorCode (*destroy)(MatCoarsen);
568 PetscErrorCode (*view)(MatCoarsen, PetscViewer);
569 };
570
571 #define MAT_COARSEN_STRENGTH_INDEX_SIZE 3
572 struct _p_MatCoarsen {
573 PETSCHEADER(struct _MatCoarsenOps);
574 Mat graph;
575 void *subctx;
576 /* */
577 PetscBool strict_aggs;
578 IS perm;
579 PetscCoarsenData *agg_lists;
580 PetscInt max_it; /* number of iterations in HEM */
581 PetscReal threshold; /* HEM can filter interim graphs */
582 PetscInt strength_index_size;
583 PetscInt strength_index[MAT_COARSEN_STRENGTH_INDEX_SIZE];
584 };
585
586 PETSC_EXTERN PetscErrorCode MatCoarsenMISKSetDistance(MatCoarsen, PetscInt);
587 PETSC_EXTERN PetscErrorCode MatCoarsenMISKGetDistance(MatCoarsen, PetscInt *);
588
589 /*
590 Used in aijdevice.h
591 */
592 typedef struct {
593 PetscInt *i;
594 PetscInt *j;
595 PetscScalar *a;
596 PetscInt n;
597 PetscInt ignorezeroentries;
598 } PetscCSRDataStructure;
599
600 /*
601 MatFDColoring is used to compute Jacobian matrices efficiently
602 via coloring. The data structure is explained below in an example.
603
604 Color = 0 1 0 2 | 2 3 0
605 ---------------------------------------------------
606 00 01 | 05
607 10 11 | 14 15 Processor 0
608 22 23 | 25
609 32 33 |
610 ===================================================
611 | 44 45 46
612 50 | 55 Processor 1
613 | 64 66
614 ---------------------------------------------------
615
616 ncolors = 4;
617
618 ncolumns = {2,1,1,0}
619 columns = {{0,2},{1},{3},{}}
620 nrows = {4,2,3,3}
621 rows = {{0,1,2,3},{0,1},{1,2,3},{0,1,2}}
622 vwscale = {dx(0),dx(1),dx(2),dx(3)} MPI Vec
623 vscale = {dx(0),dx(1),dx(2),dx(3),dx(4),dx(5)} Seq Vec
624
625 ncolumns = {1,0,1,1}
626 columns = {{6},{},{4},{5}}
627 nrows = {3,0,2,2}
628 rows = {{0,1,2},{},{1,2},{1,2}}
629 vwscale = {dx(4),dx(5),dx(6)} MPI Vec
630 vscale = {dx(0),dx(4),dx(5),dx(6)} Seq Vec
631
632 See the routine MatFDColoringApply() for how this data is used
633 to compute the Jacobian.
634
635 */
636 typedef struct {
637 PetscInt row;
638 PetscInt col;
639 PetscScalar *valaddr; /* address of value */
640 } MatEntry;
641
642 typedef struct {
643 PetscInt row;
644 PetscScalar *valaddr; /* address of value */
645 } MatEntry2;
646
647 struct _p_MatFDColoring {
648 PETSCHEADER(int);
649 PetscInt M, N, m; /* total rows, columns; local rows */
650 PetscInt rstart; /* first row owned by local processor */
651 PetscInt ncolors; /* number of colors */
652 PetscInt *ncolumns; /* number of local columns for a color */
653 PetscInt **columns; /* lists the local columns of each color (using global column numbering) */
654 IS *isa; /* these are the IS that contain the column values given in columns */
655 PetscInt *nrows; /* number of local rows for each color */
656 MatEntry *matentry; /* holds (row, column, address of value) for Jacobian matrix entry */
657 MatEntry2 *matentry2; /* holds (row, address of value) for Jacobian matrix entry */
658 PetscScalar *dy; /* store a block of F(x+dx)-F(x) when J is in BAIJ format */
659 PetscReal error_rel; /* square root of relative error in computing function */
660 PetscReal umin; /* minimum allowable u'dx value */
661 Vec w1, w2, w3; /* work vectors used in computing Jacobian */
662 PetscBool fset; /* indicates that the initial function value F(X) is set */
663 MatFDColoringFn *f; /* function that defines Jacobian */
664 void *fctx; /* optional user-defined context for use by the function f */
665 Vec vscale; /* holds FD scaling, i.e. 1/dx for each perturbed column */
666 PetscInt currentcolor; /* color for which function evaluation is being done now */
667 const char *htype; /* "wp" or "ds" */
668 ISColoringType ctype; /* IS_COLORING_GLOBAL or IS_COLORING_LOCAL */
669 PetscInt brows, bcols; /* number of block rows or columns for speedup inserting the dense matrix into sparse Jacobian */
670 PetscBool setupcalled; /* true if setup has been called */
671 PetscBool viewed; /* true if the -mat_fd_coloring_view has been triggered already */
672 PetscFortranCallbackFn *ftn_func_pointer; /* serve the same purpose as *fortran_func_pointers in PETSc objects */
673 void *ftn_func_cntx;
674 PetscObjectId matid; /* matrix this object was created with, must always be the same */
675 };
676
677 typedef struct _MatColoringOps *MatColoringOps;
678 struct _MatColoringOps {
679 PetscErrorCode (*destroy)(MatColoring);
680 PetscErrorCode (*setfromoptions)(MatColoring, PetscOptionItems);
681 PetscErrorCode (*view)(MatColoring, PetscViewer);
682 PetscErrorCode (*apply)(MatColoring, ISColoring *);
683 PetscErrorCode (*weights)(MatColoring, PetscReal **, PetscInt **);
684 };
685
686 struct _p_MatColoring {
687 PETSCHEADER(struct _MatColoringOps);
688 Mat mat;
689 PetscInt dist; /* distance of the coloring */
690 PetscInt maxcolors; /* the maximum number of colors returned, maxcolors=1 for MIS */
691 void *data; /* inner context */
692 PetscBool valid; /* check to see if what is produced is a valid coloring */
693 MatColoringWeightType weight_type; /* type of weight computation to be performed */
694 PetscReal *user_weights; /* custom weights and permutation */
695 PetscInt *user_lperm;
696 PetscBool valid_iscoloring; /* check to see if matcoloring is produced a valid iscoloring */
697 };
698
699 struct _p_MatTransposeColoring {
700 PETSCHEADER(int);
701 PetscInt M, N, m; /* total rows, columns; local rows */
702 PetscInt rstart; /* first row owned by local processor */
703 PetscInt ncolors; /* number of colors */
704 PetscInt *ncolumns; /* number of local columns for a color */
705 PetscInt *nrows; /* number of local rows for each color */
706 PetscInt currentcolor; /* color for which function evaluation is being done now */
707 ISColoringType ctype; /* IS_COLORING_GLOBAL or IS_COLORING_LOCAL */
708
709 PetscInt *colorforrow, *colorforcol; /* pointer to rows and columns */
710 PetscInt *rows; /* lists the local rows for each color (using the local row numbering) */
711 PetscInt *den2sp; /* maps (row,color) in the dense matrix to index of sparse matrix array a->a */
712 PetscInt *columns; /* lists the local columns of each color (using global column numbering) */
713 PetscInt brows; /* number of rows for efficient implementation of MatTransColoringApplyDenToSp() */
714 PetscInt *lstart; /* array used for loop over row blocks of Csparse */
715 };
716
717 /*
718 Null space context for preconditioner/operators
719 */
720 struct _p_MatNullSpace {
721 PETSCHEADER(int);
722 PetscBool has_cnst;
723 PetscInt n;
724 Vec *vecs;
725 PetscScalar *alpha; /* for projections */
726 MatNullSpaceRemoveFn *remove; /* for user provided removal function */
727 void *rmctx; /* context for remove() function */
728 };
729
730 /*
731 Internal data structure for MATMPIDENSE
732 */
733 typedef struct {
734 Mat A; /* local submatrix */
735
736 /* The following variables are used for matrix assembly */
737 PetscBool donotstash; /* Flag indicating if values should be stashed */
738 MPI_Request *send_waits; /* array of send requests */
739 MPI_Request *recv_waits; /* array of receive requests */
740 PetscInt nsends, nrecvs; /* numbers of sends and receives */
741 PetscScalar *svalues, *rvalues; /* sending and receiving data */
742 PetscInt rmax; /* maximum message length */
743
744 /* The following variables are used for matrix-vector products */
745 Vec lvec; /* local vector */
746 PetscSF Mvctx; /* for mat-mult communications */
747 PetscBool roworiented; /* if true, row-oriented input (default) */
748
749 /* Support for MatDenseGetColumnVec and MatDenseGetSubMatrix */
750 Mat cmat; /* matrix representation of a given subset of columns */
751 Vec cvec; /* vector representation of a given column */
752 const PetscScalar *ptrinuse; /* holds array to be restored (just a placeholder) */
753 PetscInt vecinuse; /* if cvec is in use (col = vecinuse-1) */
754 PetscInt matinuse; /* if cmat is in use (cbegin = matinuse-1) */
755 /* if this is from MatDenseGetSubMatrix, which columns and rows does it correspond to? */
756 PetscInt sub_rbegin;
757 PetscInt sub_rend;
758 PetscInt sub_cbegin;
759 PetscInt sub_cend;
760 } Mat_MPIDense;
761
762 /*
763 Checking zero pivot for LU, ILU preconditioners.
764 */
765 typedef struct {
766 PetscInt nshift, nshift_max;
767 PetscReal shift_amount, shift_lo, shift_hi, shift_top, shift_fraction;
768 PetscBool newshift;
769 PetscReal rs; /* active row sum of abs(off-diagonals) */
770 PetscScalar pv; /* pivot of the active row */
771 } FactorShiftCtx;
772
773 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode MatTransposeCheckNonzeroState_Private(Mat, Mat);
774
775 /*
776 Used by MatTranspose() and potentially other functions to track the matrix used in the generation of another matrix
777 */
778 typedef struct {
779 PetscObjectId id;
780 PetscObjectState state;
781 PetscObjectState nonzerostate;
782 } MatParentState;
783
784 PETSC_EXTERN PetscErrorCode MatFactorDumpMatrix(Mat);
785 PETSC_INTERN PetscErrorCode MatSetBlockSizes_Default(Mat, PetscInt, PetscInt);
786
787 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode MatShift_Basic(Mat, PetscScalar);
788
MatPivotCheck_nz(PETSC_UNUSED Mat mat,const MatFactorInfo * info,FactorShiftCtx * sctx,PETSC_UNUSED PetscInt row)789 static inline PetscErrorCode MatPivotCheck_nz(PETSC_UNUSED Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PETSC_UNUSED PetscInt row)
790 {
791 PetscReal _rs = sctx->rs;
792 PetscReal _zero = info->zeropivot * _rs;
793
794 PetscFunctionBegin;
795 if (PetscAbsScalar(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
796 /* force |diag| > zeropivot*rs */
797 if (!sctx->nshift) sctx->shift_amount = info->shiftamount;
798 else sctx->shift_amount *= 2.0;
799 sctx->newshift = PETSC_TRUE;
800 (sctx->nshift)++;
801 } else {
802 sctx->newshift = PETSC_FALSE;
803 }
804 PetscFunctionReturn(PETSC_SUCCESS);
805 }
806
MatPivotCheck_pd(PETSC_UNUSED Mat mat,const MatFactorInfo * info,FactorShiftCtx * sctx,PETSC_UNUSED PetscInt row)807 static inline PetscErrorCode MatPivotCheck_pd(PETSC_UNUSED Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PETSC_UNUSED PetscInt row)
808 {
809 PetscReal _rs = sctx->rs;
810 PetscReal _zero = info->zeropivot * _rs;
811
812 PetscFunctionBegin;
813 if (PetscRealPart(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
814 /* force matfactor to be diagonally dominant */
815 if (sctx->nshift == sctx->nshift_max) {
816 sctx->shift_fraction = sctx->shift_hi;
817 } else {
818 sctx->shift_lo = sctx->shift_fraction;
819 sctx->shift_fraction = (sctx->shift_hi + sctx->shift_lo) / (PetscReal)2.;
820 }
821 sctx->shift_amount = sctx->shift_fraction * sctx->shift_top;
822 sctx->nshift++;
823 sctx->newshift = PETSC_TRUE;
824 } else {
825 sctx->newshift = PETSC_FALSE;
826 }
827 PetscFunctionReturn(PETSC_SUCCESS);
828 }
829
MatPivotCheck_inblocks(PETSC_UNUSED Mat mat,const MatFactorInfo * info,FactorShiftCtx * sctx,PETSC_UNUSED PetscInt row)830 static inline PetscErrorCode MatPivotCheck_inblocks(PETSC_UNUSED Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PETSC_UNUSED PetscInt row)
831 {
832 PetscReal _zero = info->zeropivot;
833
834 PetscFunctionBegin;
835 if (PetscAbsScalar(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
836 sctx->pv += info->shiftamount;
837 sctx->shift_amount = 0.0;
838 sctx->nshift++;
839 }
840 sctx->newshift = PETSC_FALSE;
841 PetscFunctionReturn(PETSC_SUCCESS);
842 }
843
MatPivotCheck_none(Mat fact,Mat mat,const MatFactorInfo * info,FactorShiftCtx * sctx,PetscInt row)844 static inline PetscErrorCode MatPivotCheck_none(Mat fact, Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PetscInt row)
845 {
846 PetscReal _zero = info->zeropivot;
847
848 PetscFunctionBegin;
849 sctx->newshift = PETSC_FALSE;
850 if (PetscAbsScalar(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
851 PetscCheck(!mat->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot row %" PetscInt_FMT " value %g tolerance %g", row, (double)PetscAbsScalar(sctx->pv), (double)_zero);
852 PetscCall(PetscInfo(mat, "Detected zero pivot in factorization in row %" PetscInt_FMT " value %g tolerance %g\n", row, (double)PetscAbsScalar(sctx->pv), (double)_zero));
853 fact->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
854 fact->factorerror_zeropivot_value = PetscAbsScalar(sctx->pv);
855 fact->factorerror_zeropivot_row = row;
856 }
857 PetscFunctionReturn(PETSC_SUCCESS);
858 }
859
MatPivotCheck(Mat fact,Mat mat,const MatFactorInfo * info,FactorShiftCtx * sctx,PetscInt row)860 static inline PetscErrorCode MatPivotCheck(Mat fact, Mat mat, const MatFactorInfo *info, FactorShiftCtx *sctx, PetscInt row)
861 {
862 PetscFunctionBegin;
863 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) PetscCall(MatPivotCheck_nz(mat, info, sctx, row));
864 else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) PetscCall(MatPivotCheck_pd(mat, info, sctx, row));
865 else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) PetscCall(MatPivotCheck_inblocks(mat, info, sctx, row));
866 else PetscCall(MatPivotCheck_none(fact, mat, info, sctx, row));
867 PetscFunctionReturn(PETSC_SUCCESS);
868 }
869
870 #include <petscbt.h>
871 /*
872 Create and initialize a linked list
873 Input Parameters:
874 idx_start - starting index of the list
875 lnk_max - max value of lnk indicating the end of the list
876 nlnk - max length of the list
877 Output Parameters:
878 lnk - list initialized
879 bt - PetscBT (bitarray) with all bits set to false
880 lnk_empty - flg indicating the list is empty
881 */
882 #define PetscLLCreate(idx_start, lnk_max, nlnk, lnk, bt) ((PetscErrorCode)(PetscMalloc1(nlnk, &lnk) || PetscBTCreate(nlnk, &(bt)) || (lnk[idx_start] = lnk_max, PETSC_SUCCESS)))
883
884 #define PetscLLCreate_new(idx_start, lnk_max, nlnk, lnk, bt, lnk_empty) ((PetscErrorCode)(PetscMalloc1(nlnk, &lnk) || PetscBTCreate(nlnk, &(bt)) || (lnk_empty = PETSC_TRUE, 0) || (lnk[idx_start] = lnk_max, PETSC_SUCCESS)))
885
PetscLLInsertLocation_Private(PetscBool assume_sorted,PetscInt k,PetscInt idx_start,PetscInt entry,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnkdata,PetscInt * PETSC_RESTRICT lnk)886 static inline PetscErrorCode PetscLLInsertLocation_Private(PetscBool assume_sorted, PetscInt k, PetscInt idx_start, PetscInt entry, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnkdata, PetscInt *PETSC_RESTRICT lnk)
887 {
888 PetscInt location;
889
890 PetscFunctionBegin;
891 /* start from the beginning if entry < previous entry */
892 if (!assume_sorted && k && entry < *lnkdata) *lnkdata = idx_start;
893 /* search for insertion location */
894 do {
895 location = *lnkdata;
896 *lnkdata = lnk[location];
897 } while (entry > *lnkdata);
898 /* insertion location is found, add entry into lnk */
899 lnk[location] = entry;
900 lnk[entry] = *lnkdata;
901 ++(*nlnk);
902 *lnkdata = entry; /* next search starts from here if next_entry > entry */
903 PetscFunctionReturn(PETSC_SUCCESS);
904 }
905
PetscLLAdd_Private(PetscInt nidx,const PetscInt * PETSC_RESTRICT indices,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscBT bt,PetscBool assume_sorted)906 static inline PetscErrorCode PetscLLAdd_Private(PetscInt nidx, const PetscInt *PETSC_RESTRICT indices, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt, PetscBool assume_sorted)
907 {
908 PetscFunctionBegin;
909 *nlnk = 0;
910 for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
911 const PetscInt entry = indices[k];
912
913 if (!PetscBTLookupSet(bt, entry)) PetscCall(PetscLLInsertLocation_Private(assume_sorted, k, idx_start, entry, nlnk, &lnkdata, lnk));
914 }
915 PetscFunctionReturn(PETSC_SUCCESS);
916 }
917
918 /*
919 Add an index set into a sorted linked list
920 Input Parameters:
921 nidx - number of input indices
922 indices - integer array
923 idx_start - starting index of the list
924 lnk - linked list(an integer array) that is created
925 bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
926 output Parameters:
927 nlnk - number of newly added indices
928 lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices
929 bt - updated PetscBT (bitarray)
930 */
PetscLLAdd(PetscInt nidx,const PetscInt * PETSC_RESTRICT indices,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscBT bt)931 static inline PetscErrorCode PetscLLAdd(PetscInt nidx, const PetscInt *PETSC_RESTRICT indices, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt)
932 {
933 PetscFunctionBegin;
934 PetscCall(PetscLLAdd_Private(nidx, indices, idx_start, nlnk, lnk, bt, PETSC_FALSE));
935 PetscFunctionReturn(PETSC_SUCCESS);
936 }
937
938 /*
939 Add a SORTED ascending index set into a sorted linked list - same as PetscLLAdd() bus skip 'if (_k && _entry < _lnkdata) _lnkdata = idx_start;'
940 Input Parameters:
941 nidx - number of input indices
942 indices - sorted integer array
943 idx_start - starting index of the list
944 lnk - linked list(an integer array) that is created
945 bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
946 output Parameters:
947 nlnk - number of newly added indices
948 lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices
949 bt - updated PetscBT (bitarray)
950 */
PetscLLAddSorted(PetscInt nidx,const PetscInt * PETSC_RESTRICT indices,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscBT bt)951 static inline PetscErrorCode PetscLLAddSorted(PetscInt nidx, const PetscInt *PETSC_RESTRICT indices, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt)
952 {
953 PetscFunctionBegin;
954 PetscCall(PetscLLAdd_Private(nidx, indices, idx_start, nlnk, lnk, bt, PETSC_TRUE));
955 PetscFunctionReturn(PETSC_SUCCESS);
956 }
957
958 /*
959 Add a permuted index set into a sorted linked list
960 Input Parameters:
961 nidx - number of input indices
962 indices - integer array
963 perm - permutation of indices
964 idx_start - starting index of the list
965 lnk - linked list(an integer array) that is created
966 bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
967 output Parameters:
968 nlnk - number of newly added indices
969 lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices
970 bt - updated PetscBT (bitarray)
971 */
PetscLLAddPerm(PetscInt nidx,const PetscInt * PETSC_RESTRICT indices,const PetscInt * PETSC_RESTRICT perm,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscBT bt)972 static inline PetscErrorCode PetscLLAddPerm(PetscInt nidx, const PetscInt *PETSC_RESTRICT indices, const PetscInt *PETSC_RESTRICT perm, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt)
973 {
974 PetscFunctionBegin;
975 *nlnk = 0;
976 for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
977 const PetscInt entry = perm[indices[k]];
978
979 if (!PetscBTLookupSet(bt, entry)) PetscCall(PetscLLInsertLocation_Private(PETSC_FALSE, k, idx_start, entry, nlnk, &lnkdata, lnk));
980 }
981 PetscFunctionReturn(PETSC_SUCCESS);
982 }
983
984 #if 0
985 /* this appears to be unused? */
986 static inline PetscErrorCode PetscLLAddSorted_new(PetscInt nidx, PetscInt *indices, PetscInt idx_start, PetscBool *lnk_empty, PetscInt *nlnk, PetscInt *lnk, PetscBT bt)
987 {
988 PetscInt lnkdata = idx_start;
989
990 PetscFunctionBegin;
991 if (*lnk_empty) {
992 for (PetscInt k = 0; k < nidx; ++k) {
993 const PetscInt entry = indices[k], location = lnkdata;
994
995 PetscCall(PetscBTSet(bt,entry)); /* mark the new entry */
996 lnkdata = lnk[location];
997 /* insertion location is found, add entry into lnk */
998 lnk[location] = entry;
999 lnk[entry] = lnkdata;
1000 lnkdata = entry; /* next search starts from here */
1001 }
1002 /* lnk[indices[nidx-1]] = lnk[idx_start];
1003 lnk[idx_start] = indices[0];
1004 PetscCall(PetscBTSet(bt,indices[0]));
1005 for (_k=1; _k<nidx; _k++) {
1006 PetscCall(PetscBTSet(bt,indices[_k]));
1007 lnk[indices[_k-1]] = indices[_k];
1008 }
1009 */
1010 *nlnk = nidx;
1011 *lnk_empty = PETSC_FALSE;
1012 } else {
1013 *nlnk = 0;
1014 for (PetscInt k = 0; k < nidx; ++k) {
1015 const PetscInt entry = indices[k];
1016
1017 if (!PetscBTLookupSet(bt,entry)) PetscCall(PetscLLInsertLocation_Private(PETSC_TRUE,k,idx_start,entry,nlnk,&lnkdata,lnk));
1018 }
1019 }
1020 PetscFunctionReturn(PETSC_SUCCESS);
1021 }
1022 #endif
1023
1024 /*
1025 Add a SORTED index set into a sorted linked list used for LUFactorSymbolic()
1026 Same as PetscLLAddSorted() with an additional operation:
1027 count the number of input indices that are no larger than 'diag'
1028 Input Parameters:
1029 indices - sorted integer array
1030 idx_start - starting index of the list, index of pivot row
1031 lnk - linked list(an integer array) that is created
1032 bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1033 diag - index of the active row in LUFactorSymbolic
1034 nzbd - number of input indices with indices <= idx_start
1035 im - im[idx_start] is initialized as num of nonzero entries in row=idx_start
1036 output Parameters:
1037 nlnk - number of newly added indices
1038 lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices
1039 bt - updated PetscBT (bitarray)
1040 im - im[idx_start]: unchanged if diag is not an entry
1041 : num of entries with indices <= diag if diag is an entry
1042 */
PetscLLAddSortedLU(const PetscInt * PETSC_RESTRICT indices,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscBT bt,PetscInt diag,PetscInt nzbd,PetscInt * PETSC_RESTRICT im)1043 static inline PetscErrorCode PetscLLAddSortedLU(const PetscInt *PETSC_RESTRICT indices, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscBT bt, PetscInt diag, PetscInt nzbd, PetscInt *PETSC_RESTRICT im)
1044 {
1045 const PetscInt nidx = im[idx_start] - nzbd; /* num of entries with idx_start < index <= diag */
1046
1047 PetscFunctionBegin;
1048 *nlnk = 0;
1049 for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
1050 const PetscInt entry = indices[k];
1051
1052 ++nzbd;
1053 if (entry == diag) im[idx_start] = nzbd;
1054 if (!PetscBTLookupSet(bt, entry)) PetscCall(PetscLLInsertLocation_Private(PETSC_TRUE, k, idx_start, entry, nlnk, &lnkdata, lnk));
1055 }
1056 PetscFunctionReturn(PETSC_SUCCESS);
1057 }
1058
1059 /*
1060 Copy data on the list into an array, then initialize the list
1061 Input Parameters:
1062 idx_start - starting index of the list
1063 lnk_max - max value of lnk indicating the end of the list
1064 nlnk - number of data on the list to be copied
1065 lnk - linked list
1066 bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1067 output Parameters:
1068 indices - array that contains the copied data
1069 lnk - linked list that is cleaned and initialize
1070 bt - PetscBT (bitarray) with all bits set to false
1071 */
PetscLLClean(PetscInt idx_start,PetscInt lnk_max,PetscInt nlnk,PetscInt * PETSC_RESTRICT lnk,PetscInt * PETSC_RESTRICT indices,PetscBT bt)1072 static inline PetscErrorCode PetscLLClean(PetscInt idx_start, PetscInt lnk_max, PetscInt nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT indices, PetscBT bt)
1073 {
1074 PetscFunctionBegin;
1075 for (PetscInt j = 0, idx = idx_start; j < nlnk; ++j) {
1076 idx = lnk[idx];
1077 indices[j] = idx;
1078 PetscCall(PetscBTClear(bt, idx));
1079 }
1080 lnk[idx_start] = lnk_max;
1081 PetscFunctionReturn(PETSC_SUCCESS);
1082 }
1083
1084 /*
1085 Free memories used by the list
1086 */
1087 #define PetscLLDestroy(lnk, bt) ((PetscErrorCode)(PetscFree(lnk) || PetscBTDestroy(&(bt))))
1088
1089 /* Routines below are used for incomplete matrix factorization */
1090 /*
1091 Create and initialize a linked list and its levels
1092 Input Parameters:
1093 idx_start - starting index of the list
1094 lnk_max - max value of lnk indicating the end of the list
1095 nlnk - max length of the list
1096 Output Parameters:
1097 lnk - list initialized
1098 lnk_lvl - array of size nlnk for storing levels of lnk
1099 bt - PetscBT (bitarray) with all bits set to false
1100 */
1101 #define PetscIncompleteLLCreate(idx_start, lnk_max, nlnk, lnk, lnk_lvl, bt) \
1102 ((PetscErrorCode)(PetscIntMultError(2, nlnk, NULL) || PetscMalloc1(2 * nlnk, &lnk) || PetscBTCreate(nlnk, &(bt)) || (lnk[idx_start] = lnk_max, lnk_lvl = lnk + nlnk, PETSC_SUCCESS)))
1103
PetscIncompleteLLInsertLocation_Private(PetscBool assume_sorted,PetscInt k,PetscInt idx_start,PetscInt entry,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnkdata,PetscInt * PETSC_RESTRICT lnk,PetscInt * PETSC_RESTRICT lnklvl,PetscInt newval)1104 static inline PetscErrorCode PetscIncompleteLLInsertLocation_Private(PetscBool assume_sorted, PetscInt k, PetscInt idx_start, PetscInt entry, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnkdata, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscInt newval)
1105 {
1106 PetscFunctionBegin;
1107 PetscCall(PetscLLInsertLocation_Private(assume_sorted, k, idx_start, entry, nlnk, lnkdata, lnk));
1108 lnklvl[entry] = newval;
1109 PetscFunctionReturn(PETSC_SUCCESS);
1110 }
1111
1112 /*
1113 Initialize a sorted linked list used for ILU and ICC
1114 Input Parameters:
1115 nidx - number of input idx
1116 idx - integer array used for storing column indices
1117 idx_start - starting index of the list
1118 perm - indices of an IS
1119 lnk - linked list(an integer array) that is created
1120 lnklvl - levels of lnk
1121 bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1122 output Parameters:
1123 nlnk - number of newly added idx
1124 lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1125 lnklvl - levels of lnk
1126 bt - updated PetscBT (bitarray)
1127 */
PetscIncompleteLLInit(PetscInt nidx,const PetscInt * PETSC_RESTRICT idx,PetscInt idx_start,const PetscInt * PETSC_RESTRICT perm,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscInt * PETSC_RESTRICT lnklvl,PetscBT bt)1128 static inline PetscErrorCode PetscIncompleteLLInit(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscInt idx_start, const PetscInt *PETSC_RESTRICT perm, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt)
1129 {
1130 PetscFunctionBegin;
1131 *nlnk = 0;
1132 for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
1133 const PetscInt entry = perm[idx[k]];
1134
1135 if (!PetscBTLookupSet(bt, entry)) PetscCall(PetscIncompleteLLInsertLocation_Private(PETSC_FALSE, k, idx_start, entry, nlnk, &lnkdata, lnk, lnklvl, 0));
1136 }
1137 PetscFunctionReturn(PETSC_SUCCESS);
1138 }
1139
PetscIncompleteLLAdd_Private(PetscInt nidx,const PetscInt * PETSC_RESTRICT idx,PetscReal level,const PetscInt * PETSC_RESTRICT idxlvl,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscInt * PETSC_RESTRICT lnklvl,PetscBT bt,PetscInt prow_offset,PetscBool assume_sorted)1140 static inline PetscErrorCode PetscIncompleteLLAdd_Private(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscReal level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt, PetscInt prow_offset, PetscBool assume_sorted)
1141 {
1142 PetscFunctionBegin;
1143 *nlnk = 0;
1144 for (PetscInt k = 0, lnkdata = idx_start; k < nidx; ++k) {
1145 const PetscInt incrlev = idxlvl[k] + prow_offset + 1;
1146
1147 if (incrlev <= level) {
1148 const PetscInt entry = idx[k];
1149
1150 if (!PetscBTLookupSet(bt, entry)) PetscCall(PetscIncompleteLLInsertLocation_Private(assume_sorted, k, idx_start, entry, nlnk, &lnkdata, lnk, lnklvl, incrlev));
1151 else if (lnklvl[entry] > incrlev) lnklvl[entry] = incrlev; /* existing entry */
1152 }
1153 }
1154 PetscFunctionReturn(PETSC_SUCCESS);
1155 }
1156
1157 /*
1158 Add a SORTED index set into a sorted linked list for ICC
1159 Input Parameters:
1160 nidx - number of input indices
1161 idx - sorted integer array used for storing column indices
1162 level - level of fill, e.g., ICC(level)
1163 idxlvl - level of idx
1164 idx_start - starting index of the list
1165 lnk - linked list(an integer array) that is created
1166 lnklvl - levels of lnk
1167 bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1168 idxlvl_prow - idxlvl[prow], where prow is the row number of the idx
1169 output Parameters:
1170 nlnk - number of newly added indices
1171 lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1172 lnklvl - levels of lnk
1173 bt - updated PetscBT (bitarray)
1174 Note: the level of U(i,j) is set as lvl(i,j) = min{ lvl(i,j), lvl(prow,i)+lvl(prow,j)+1)
1175 where idx = non-zero columns of U(prow,prow+1:n-1), prow<i
1176 */
PetscICCLLAddSorted(PetscInt nidx,const PetscInt * PETSC_RESTRICT idx,PetscReal level,const PetscInt * PETSC_RESTRICT idxlvl,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscInt * PETSC_RESTRICT lnklvl,PetscBT bt,PetscInt idxlvl_prow)1177 static inline PetscErrorCode PetscICCLLAddSorted(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscReal level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt, PetscInt idxlvl_prow)
1178 {
1179 PetscFunctionBegin;
1180 PetscCall(PetscIncompleteLLAdd_Private(nidx, idx, level, idxlvl, idx_start, nlnk, lnk, lnklvl, bt, idxlvl_prow, PETSC_TRUE));
1181 PetscFunctionReturn(PETSC_SUCCESS);
1182 }
1183
1184 /*
1185 Add a SORTED index set into a sorted linked list for ILU
1186 Input Parameters:
1187 nidx - number of input indices
1188 idx - sorted integer array used for storing column indices
1189 level - level of fill, e.g., ICC(level)
1190 idxlvl - level of idx
1191 idx_start - starting index of the list
1192 lnk - linked list(an integer array) that is created
1193 lnklvl - levels of lnk
1194 bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1195 prow - the row number of idx
1196 output Parameters:
1197 nlnk - number of newly added idx
1198 lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1199 lnklvl - levels of lnk
1200 bt - updated PetscBT (bitarray)
1201
1202 Note: the level of factor(i,j) is set as lvl(i,j) = min{ lvl(i,j), lvl(i,prow)+lvl(prow,j)+1)
1203 where idx = non-zero columns of U(prow,prow+1:n-1), prow<i
1204 */
PetscILULLAddSorted(PetscInt nidx,const PetscInt * PETSC_RESTRICT idx,PetscInt level,const PetscInt * PETSC_RESTRICT idxlvl,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscInt * PETSC_RESTRICT lnklvl,PetscBT bt,PetscInt prow)1205 static inline PetscErrorCode PetscILULLAddSorted(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscInt level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt, PetscInt prow)
1206 {
1207 PetscFunctionBegin;
1208 PetscCall(PetscIncompleteLLAdd_Private(nidx, idx, level, idxlvl, idx_start, nlnk, lnk, lnklvl, bt, lnklvl[prow], PETSC_TRUE));
1209 PetscFunctionReturn(PETSC_SUCCESS);
1210 }
1211
1212 /*
1213 Add a index set into a sorted linked list
1214 Input Parameters:
1215 nidx - number of input idx
1216 idx - integer array used for storing column indices
1217 level - level of fill, e.g., ICC(level)
1218 idxlvl - level of idx
1219 idx_start - starting index of the list
1220 lnk - linked list(an integer array) that is created
1221 lnklvl - levels of lnk
1222 bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1223 output Parameters:
1224 nlnk - number of newly added idx
1225 lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1226 lnklvl - levels of lnk
1227 bt - updated PetscBT (bitarray)
1228 */
PetscIncompleteLLAdd(PetscInt nidx,const PetscInt * PETSC_RESTRICT idx,PetscReal level,const PetscInt * PETSC_RESTRICT idxlvl,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscInt * PETSC_RESTRICT lnklvl,PetscBT bt)1229 static inline PetscErrorCode PetscIncompleteLLAdd(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscReal level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt)
1230 {
1231 PetscFunctionBegin;
1232 PetscCall(PetscIncompleteLLAdd_Private(nidx, idx, level, idxlvl, idx_start, nlnk, lnk, lnklvl, bt, 0, PETSC_FALSE));
1233 PetscFunctionReturn(PETSC_SUCCESS);
1234 }
1235
1236 /*
1237 Add a SORTED index set into a sorted linked list
1238 Input Parameters:
1239 nidx - number of input indices
1240 idx - sorted integer array used for storing column indices
1241 level - level of fill, e.g., ICC(level)
1242 idxlvl - level of idx
1243 idx_start - starting index of the list
1244 lnk - linked list(an integer array) that is created
1245 lnklvl - levels of lnk
1246 bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1247 output Parameters:
1248 nlnk - number of newly added idx
1249 lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1250 lnklvl - levels of lnk
1251 bt - updated PetscBT (bitarray)
1252 */
PetscIncompleteLLAddSorted(PetscInt nidx,const PetscInt * PETSC_RESTRICT idx,PetscReal level,const PetscInt * PETSC_RESTRICT idxlvl,PetscInt idx_start,PetscInt * PETSC_RESTRICT nlnk,PetscInt * PETSC_RESTRICT lnk,PetscInt * PETSC_RESTRICT lnklvl,PetscBT bt)1253 static inline PetscErrorCode PetscIncompleteLLAddSorted(PetscInt nidx, const PetscInt *PETSC_RESTRICT idx, PetscReal level, const PetscInt *PETSC_RESTRICT idxlvl, PetscInt idx_start, PetscInt *PETSC_RESTRICT nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscBT bt)
1254 {
1255 PetscFunctionBegin;
1256 PetscCall(PetscIncompleteLLAdd_Private(nidx, idx, level, idxlvl, idx_start, nlnk, lnk, lnklvl, bt, 0, PETSC_TRUE));
1257 PetscFunctionReturn(PETSC_SUCCESS);
1258 }
1259
1260 /*
1261 Copy data on the list into an array, then initialize the list
1262 Input Parameters:
1263 idx_start - starting index of the list
1264 lnk_max - max value of lnk indicating the end of the list
1265 nlnk - number of data on the list to be copied
1266 lnk - linked list
1267 lnklvl - level of lnk
1268 bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1269 output Parameters:
1270 indices - array that contains the copied data
1271 lnk - linked list that is cleaned and initialize
1272 lnklvl - level of lnk that is reinitialized
1273 bt - PetscBT (bitarray) with all bits set to false
1274 */
PetscIncompleteLLClean(PetscInt idx_start,PetscInt lnk_max,PetscInt nlnk,PetscInt * PETSC_RESTRICT lnk,PetscInt * PETSC_RESTRICT lnklvl,PetscInt * PETSC_RESTRICT indices,PetscInt * PETSC_RESTRICT indiceslvl,PetscBT bt)1275 static inline PetscErrorCode PetscIncompleteLLClean(PetscInt idx_start, PetscInt lnk_max, PetscInt nlnk, PetscInt *PETSC_RESTRICT lnk, PetscInt *PETSC_RESTRICT lnklvl, PetscInt *PETSC_RESTRICT indices, PetscInt *PETSC_RESTRICT indiceslvl, PetscBT bt)
1276 {
1277 PetscFunctionBegin;
1278 for (PetscInt j = 0, idx = idx_start; j < nlnk; ++j) {
1279 idx = lnk[idx];
1280 indices[j] = idx;
1281 indiceslvl[j] = lnklvl[idx];
1282 lnklvl[idx] = -1;
1283 PetscCall(PetscBTClear(bt, idx));
1284 }
1285 lnk[idx_start] = lnk_max;
1286 PetscFunctionReturn(PETSC_SUCCESS);
1287 }
1288
1289 /*
1290 Free memories used by the list
1291 */
1292 #define PetscIncompleteLLDestroy(lnk, bt) ((PetscErrorCode)(PetscFree(lnk) || PetscBTDestroy(&(bt))))
1293
1294 #if !defined(PETSC_CLANG_STATIC_ANALYZER)
1295 #define MatCheckSameLocalSize(A, ar1, B, ar2) \
1296 do { \
1297 PetscCheckSameComm(A, ar1, B, ar2); \
1298 PetscCheck(((A)->rmap->n == (B)->rmap->n) && ((A)->cmap->n == (B)->cmap->n), PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Incompatible matrix local sizes: parameter # %d (%" PetscInt_FMT " x %" PetscInt_FMT ") != parameter # %d (%" PetscInt_FMT " x %" PetscInt_FMT ")", ar1, \
1299 (A)->rmap->n, (A)->cmap->n, ar2, (B)->rmap->n, (B)->cmap->n); \
1300 } while (0)
1301 #define MatCheckSameSize(A, ar1, B, ar2) \
1302 do { \
1303 PetscCheck(((A)->rmap->N == (B)->rmap->N) && ((A)->cmap->N == (B)->cmap->N), PetscObjectComm((PetscObject)(A)), PETSC_ERR_ARG_INCOMP, "Incompatible matrix global sizes: parameter # %d (%" PetscInt_FMT " x %" PetscInt_FMT ") != parameter # %d (%" PetscInt_FMT " x %" PetscInt_FMT ")", ar1, \
1304 (A)->rmap->N, (A)->cmap->N, ar2, (B)->rmap->N, (B)->cmap->N); \
1305 MatCheckSameLocalSize(A, ar1, B, ar2); \
1306 } while (0)
1307 #else
1308 template <typename Tm>
1309 extern void MatCheckSameLocalSize(Tm, int, Tm, int);
1310 template <typename Tm>
1311 extern void MatCheckSameSize(Tm, int, Tm, int);
1312 #endif
1313
1314 #define VecCheckMatCompatible(M, x, ar1, b, ar2) \
1315 do { \
1316 PetscCheck((M)->cmap->N == (x)->map->N, PetscObjectComm((PetscObject)(M)), PETSC_ERR_ARG_SIZ, "Vector global length incompatible with matrix: parameter # %d global size %" PetscInt_FMT " != matrix column global size %" PetscInt_FMT, ar1, (x)->map->N, \
1317 (M)->cmap->N); \
1318 PetscCheck((M)->rmap->N == (b)->map->N, PetscObjectComm((PetscObject)(M)), PETSC_ERR_ARG_SIZ, "Vector global length incompatible with matrix: parameter # %d global size %" PetscInt_FMT " != matrix row global size %" PetscInt_FMT, ar2, (b)->map->N, \
1319 (M)->rmap->N); \
1320 } while (0)
1321
1322 /*
1323 Create and initialize a condensed linked list -
1324 same as PetscLLCreate(), but uses a scalable array 'lnk' with size of max number of entries, not O(N).
1325 Barry suggested this approach (Dec. 6, 2011):
1326 I've thought of an alternative way of representing a linked list that is efficient but doesn't have the O(N) scaling issue
1327 (it may be faster than the O(N) even sequentially due to less crazy memory access).
1328
1329 Instead of having some like a 2 -> 4 -> 11 -> 22 list that uses slot 2 4 11 and 22 in a big array use a small array with two slots
1330 for each entry for example [ 2 1 | 4 3 | 22 -1 | 11 2] so the first number (of the pair) is the value while the second tells you where
1331 in the list the next entry is. Inserting a new link means just append another pair at the end. For example say we want to insert 13 into the
1332 list it would then become [2 1 | 4 3 | 22 -1 | 11 4 | 13 2 ] you just add a pair at the end and fix the point for the one that points to it.
1333 That is 11 use to point to the 2 slot, after the change 11 points to the 4th slot which has the value 13. Note that values are always next
1334 to each other so memory access is much better than using the big array.
1335
1336 Example:
1337 nlnk_max=5, lnk_max=36:
1338 Initial list: [0, 0 | 36, 2 | 0, 0 | 0, 0 | 0, 0 | 0, 0 | 0, 0]
1339 here, head_node has index 2 with value lnk[2]=lnk_max=36,
1340 0-th entry is used to store the number of entries in the list,
1341 The initial lnk represents head -> tail(marked by 36) with number of entries = lnk[0]=0.
1342
1343 Now adding a sorted set {2,4}, the list becomes
1344 [2, 0 | 36, 4 |2, 6 | 4, 2 | 0, 0 | 0, 0 | 0, 0 ]
1345 represents head -> 2 -> 4 -> tail with number of entries = lnk[0]=2.
1346
1347 Then adding a sorted set {0,3,35}, the list
1348 [5, 0 | 36, 8 | 2, 10 | 4, 12 | 0, 4 | 3, 6 | 35, 2 ]
1349 represents head -> 0 -> 2 -> 3 -> 4 -> 35 -> tail with number of entries = lnk[0]=5.
1350
1351 Input Parameters:
1352 nlnk_max - max length of the list
1353 lnk_max - max value of the entries
1354 Output Parameters:
1355 lnk - list created and initialized
1356 bt - PetscBT (bitarray) with all bits set to false. Note: bt has size lnk_max, not nln_max!
1357 */
PetscLLCondensedCreate(PetscInt nlnk_max,PetscInt lnk_max,PetscInt ** lnk,PetscBT * bt)1358 static inline PetscErrorCode PetscLLCondensedCreate(PetscInt nlnk_max, PetscInt lnk_max, PetscInt **lnk, PetscBT *bt)
1359 {
1360 PetscInt *llnk, lsize = 0;
1361
1362 PetscFunctionBegin;
1363 PetscCall(PetscIntMultError(2, nlnk_max + 2, &lsize));
1364 PetscCall(PetscMalloc1(lsize, lnk));
1365 PetscCall(PetscBTCreate(lnk_max, bt));
1366 llnk = *lnk;
1367 llnk[0] = 0; /* number of entries on the list */
1368 llnk[2] = lnk_max; /* value in the head node */
1369 llnk[3] = 2; /* next for the head node */
1370 PetscFunctionReturn(PETSC_SUCCESS);
1371 }
1372
1373 /*
1374 Add a SORTED ascending index set into a sorted linked list. See PetscLLCondensedCreate() for detailed description.
1375 Input Parameters:
1376 nidx - number of input indices
1377 indices - sorted integer array
1378 lnk - condensed linked list(an integer array) that is created
1379 bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1380 output Parameters:
1381 lnk - the sorted(increasing order) linked list containing previous and newly added non-redundate indices
1382 bt - updated PetscBT (bitarray)
1383 */
PetscLLCondensedAddSorted(PetscInt nidx,const PetscInt indices[],PetscInt lnk[],PetscBT bt)1384 static inline PetscErrorCode PetscLLCondensedAddSorted(PetscInt nidx, const PetscInt indices[], PetscInt lnk[], PetscBT bt)
1385 {
1386 PetscInt location = 2; /* head */
1387 PetscInt nlnk = lnk[0]; /* num of entries on the input lnk */
1388
1389 PetscFunctionBegin;
1390 for (PetscInt k = 0; k < nidx; k++) {
1391 const PetscInt entry = indices[k];
1392 if (!PetscBTLookupSet(bt, entry)) { /* new entry */
1393 PetscInt next, lnkdata;
1394
1395 /* search for insertion location */
1396 do {
1397 next = location + 1; /* link from previous node to next node */
1398 location = lnk[next]; /* idx of next node */
1399 lnkdata = lnk[location]; /* value of next node */
1400 } while (entry > lnkdata);
1401 /* insertion location is found, add entry into lnk */
1402 const PetscInt newnode = 2 * (nlnk + 2); /* index for this new node */
1403 lnk[next] = newnode; /* connect previous node to the new node */
1404 lnk[newnode] = entry; /* set value of the new node */
1405 lnk[newnode + 1] = location; /* connect new node to next node */
1406 location = newnode; /* next search starts from the new node */
1407 nlnk++;
1408 }
1409 }
1410 lnk[0] = nlnk; /* number of entries in the list */
1411 PetscFunctionReturn(PETSC_SUCCESS);
1412 }
1413
PetscLLCondensedClean(PetscInt lnk_max,PETSC_UNUSED PetscInt nidx,PetscInt * indices,PetscInt lnk[],PetscBT bt)1414 static inline PetscErrorCode PetscLLCondensedClean(PetscInt lnk_max, PETSC_UNUSED PetscInt nidx, PetscInt *indices, PetscInt lnk[], PetscBT bt)
1415 {
1416 const PetscInt nlnk = lnk[0]; /* num of entries on the list */
1417 PetscInt next = lnk[3]; /* head node */
1418
1419 PetscFunctionBegin;
1420 for (PetscInt k = 0; k < nlnk; k++) {
1421 indices[k] = lnk[next];
1422 next = lnk[next + 1];
1423 PetscCall(PetscBTClear(bt, indices[k]));
1424 }
1425 lnk[0] = 0; /* num of entries on the list */
1426 lnk[2] = lnk_max; /* initialize head node */
1427 lnk[3] = 2; /* head node */
1428 PetscFunctionReturn(PETSC_SUCCESS);
1429 }
1430
PetscLLCondensedView(PetscInt * lnk)1431 static inline PetscErrorCode PetscLLCondensedView(PetscInt *lnk)
1432 {
1433 PetscFunctionBegin;
1434 PetscCall(PetscPrintf(PETSC_COMM_SELF, "LLCondensed of size %" PetscInt_FMT ", (val, next)\n", lnk[0]));
1435 for (PetscInt k = 2; k < lnk[0] + 2; ++k) PetscCall(PetscPrintf(PETSC_COMM_SELF, " %" PetscInt_FMT ": (%" PetscInt_FMT ", %" PetscInt_FMT ")\n", 2 * k, lnk[2 * k], lnk[2 * k + 1]));
1436 PetscFunctionReturn(PETSC_SUCCESS);
1437 }
1438
1439 /*
1440 Free memories used by the list
1441 */
PetscLLCondensedDestroy(PetscInt * lnk,PetscBT bt)1442 static inline PetscErrorCode PetscLLCondensedDestroy(PetscInt *lnk, PetscBT bt)
1443 {
1444 PetscFunctionBegin;
1445 PetscCall(PetscFree(lnk));
1446 PetscCall(PetscBTDestroy(&bt));
1447 PetscFunctionReturn(PETSC_SUCCESS);
1448 }
1449
1450 /*
1451 Same as PetscLLCondensedCreate(), but does not use non-scalable O(lnk_max) bitarray
1452 Input Parameters:
1453 nlnk_max - max length of the list
1454 Output Parameters:
1455 lnk - list created and initialized
1456 */
PetscLLCondensedCreate_Scalable(PetscInt nlnk_max,PetscInt ** lnk)1457 static inline PetscErrorCode PetscLLCondensedCreate_Scalable(PetscInt nlnk_max, PetscInt **lnk)
1458 {
1459 PetscInt *llnk, lsize = 0;
1460
1461 PetscFunctionBegin;
1462 PetscCall(PetscIntMultError(2, nlnk_max + 2, &lsize));
1463 PetscCall(PetscMalloc1(lsize, lnk));
1464 llnk = *lnk;
1465 llnk[0] = 0; /* number of entries on the list */
1466 llnk[2] = PETSC_INT_MAX; /* value in the head node */
1467 llnk[3] = 2; /* next for the head node */
1468 PetscFunctionReturn(PETSC_SUCCESS);
1469 }
1470
PetscLLCondensedExpand_Scalable(PetscInt nlnk_max,PetscInt ** lnk)1471 static inline PetscErrorCode PetscLLCondensedExpand_Scalable(PetscInt nlnk_max, PetscInt **lnk)
1472 {
1473 PetscInt lsize = 0;
1474
1475 PetscFunctionBegin;
1476 PetscCall(PetscIntMultError(2, nlnk_max + 2, &lsize));
1477 PetscCall(PetscRealloc(lsize * sizeof(PetscInt), lnk));
1478 PetscFunctionReturn(PETSC_SUCCESS);
1479 }
1480
PetscLLCondensedAddSorted_Scalable(PetscInt nidx,const PetscInt indices[],PetscInt lnk[])1481 static inline PetscErrorCode PetscLLCondensedAddSorted_Scalable(PetscInt nidx, const PetscInt indices[], PetscInt lnk[])
1482 {
1483 PetscInt location = 2; /* head */
1484 PetscInt nlnk = lnk[0]; /* num of entries on the input lnk */
1485
1486 for (PetscInt k = 0; k < nidx; k++) {
1487 const PetscInt entry = indices[k];
1488 PetscInt next, lnkdata;
1489
1490 /* search for insertion location */
1491 do {
1492 next = location + 1; /* link from previous node to next node */
1493 location = lnk[next]; /* idx of next node */
1494 lnkdata = lnk[location]; /* value of next node */
1495 } while (entry > lnkdata);
1496 if (entry < lnkdata) {
1497 /* insertion location is found, add entry into lnk */
1498 const PetscInt newnode = 2 * (nlnk + 2); /* index for this new node */
1499 lnk[next] = newnode; /* connect previous node to the new node */
1500 lnk[newnode] = entry; /* set value of the new node */
1501 lnk[newnode + 1] = location; /* connect new node to next node */
1502 location = newnode; /* next search starts from the new node */
1503 nlnk++;
1504 }
1505 }
1506 lnk[0] = nlnk; /* number of entries in the list */
1507 return PETSC_SUCCESS;
1508 }
1509
PetscLLCondensedClean_Scalable(PETSC_UNUSED PetscInt nidx,PetscInt * indices,PetscInt * lnk)1510 static inline PetscErrorCode PetscLLCondensedClean_Scalable(PETSC_UNUSED PetscInt nidx, PetscInt *indices, PetscInt *lnk)
1511 {
1512 const PetscInt nlnk = lnk[0];
1513 PetscInt next = lnk[3]; /* head node */
1514
1515 for (PetscInt k = 0; k < nlnk; k++) {
1516 indices[k] = lnk[next];
1517 next = lnk[next + 1];
1518 }
1519 lnk[0] = 0; /* num of entries on the list */
1520 lnk[3] = 2; /* head node */
1521 return PETSC_SUCCESS;
1522 }
1523
PetscLLCondensedDestroy_Scalable(PetscInt * lnk)1524 static inline PetscErrorCode PetscLLCondensedDestroy_Scalable(PetscInt *lnk)
1525 {
1526 return PetscFree(lnk);
1527 }
1528
1529 /*
1530 lnk[0] number of links
1531 lnk[1] number of entries
1532 lnk[3n] value
1533 lnk[3n+1] len
1534 lnk[3n+2] link to next value
1535
1536 The next three are always the first link
1537
1538 lnk[3] PETSC_INT_MIN+1
1539 lnk[4] 1
1540 lnk[5] link to first real entry
1541
1542 The next three are always the last link
1543
1544 lnk[6] PETSC_INT_MAX - 1
1545 lnk[7] 1
1546 lnk[8] next valid link (this is the same as lnk[0] but without the decreases)
1547 */
1548
PetscLLCondensedCreate_fast(PetscInt nlnk_max,PetscInt ** lnk)1549 static inline PetscErrorCode PetscLLCondensedCreate_fast(PetscInt nlnk_max, PetscInt **lnk)
1550 {
1551 PetscInt *llnk;
1552 PetscInt lsize = 0;
1553
1554 PetscFunctionBegin;
1555 PetscCall(PetscIntMultError(3, nlnk_max + 3, &lsize));
1556 PetscCall(PetscMalloc1(lsize, lnk));
1557 llnk = *lnk;
1558 llnk[0] = 0; /* nlnk: number of entries on the list */
1559 llnk[1] = 0; /* number of integer entries represented in list */
1560 llnk[3] = PETSC_INT_MIN + 1; /* value in the first node */
1561 llnk[4] = 1; /* count for the first node */
1562 llnk[5] = 6; /* next for the first node */
1563 llnk[6] = PETSC_INT_MAX - 1; /* value in the last node */
1564 llnk[7] = 1; /* count for the last node */
1565 llnk[8] = 0; /* next valid node to be used */
1566 PetscFunctionReturn(PETSC_SUCCESS);
1567 }
1568
PetscLLCondensedAddSorted_fast(PetscInt nidx,const PetscInt indices[],PetscInt lnk[])1569 static inline PetscErrorCode PetscLLCondensedAddSorted_fast(PetscInt nidx, const PetscInt indices[], PetscInt lnk[])
1570 {
1571 for (PetscInt k = 0, prev = 3 /* first value */; k < nidx; k++) {
1572 const PetscInt entry = indices[k];
1573 PetscInt next = lnk[prev + 2];
1574
1575 /* search for insertion location */
1576 while (entry >= lnk[next]) {
1577 prev = next;
1578 next = lnk[next + 2];
1579 }
1580 /* entry is in range of previous list */
1581 if (entry < lnk[prev] + lnk[prev + 1]) continue;
1582 lnk[1]++;
1583 /* entry is right after previous list */
1584 if (entry == lnk[prev] + lnk[prev + 1]) {
1585 lnk[prev + 1]++;
1586 if (lnk[next] == entry + 1) { /* combine two contiguous strings */
1587 lnk[prev + 1] += lnk[next + 1];
1588 lnk[prev + 2] = lnk[next + 2];
1589 next = lnk[next + 2];
1590 lnk[0]--;
1591 }
1592 continue;
1593 }
1594 /* entry is right before next list */
1595 if (entry == lnk[next] - 1) {
1596 lnk[next]--;
1597 lnk[next + 1]++;
1598 prev = next;
1599 next = lnk[prev + 2];
1600 continue;
1601 }
1602 /* add entry into lnk */
1603 lnk[prev + 2] = 3 * ((lnk[8]++) + 3); /* connect previous node to the new node */
1604 prev = lnk[prev + 2];
1605 lnk[prev] = entry; /* set value of the new node */
1606 lnk[prev + 1] = 1; /* number of values in contiguous string is one to start */
1607 lnk[prev + 2] = next; /* connect new node to next node */
1608 lnk[0]++;
1609 }
1610 return PETSC_SUCCESS;
1611 }
1612
PetscLLCondensedClean_fast(PETSC_UNUSED PetscInt nidx,PetscInt * indices,PetscInt * lnk)1613 static inline PetscErrorCode PetscLLCondensedClean_fast(PETSC_UNUSED PetscInt nidx, PetscInt *indices, PetscInt *lnk)
1614 {
1615 const PetscInt nlnk = lnk[0];
1616 PetscInt next = lnk[5]; /* first node */
1617
1618 for (PetscInt k = 0, cnt = 0; k < nlnk; k++) {
1619 for (PetscInt j = 0; j < lnk[next + 1]; j++) indices[cnt++] = lnk[next] + j;
1620 next = lnk[next + 2];
1621 }
1622 lnk[0] = 0; /* nlnk: number of links */
1623 lnk[1] = 0; /* number of integer entries represented in list */
1624 lnk[3] = PETSC_INT_MIN + 1; /* value in the first node */
1625 lnk[4] = 1; /* count for the first node */
1626 lnk[5] = 6; /* next for the first node */
1627 lnk[6] = PETSC_INT_MAX - 1; /* value in the last node */
1628 lnk[7] = 1; /* count for the last node */
1629 lnk[8] = 0; /* next valid location to make link */
1630 return PETSC_SUCCESS;
1631 }
1632
PetscLLCondensedView_fast(const PetscInt * lnk)1633 static inline PetscErrorCode PetscLLCondensedView_fast(const PetscInt *lnk)
1634 {
1635 const PetscInt nlnk = lnk[0];
1636 PetscInt next = lnk[5]; /* first node */
1637
1638 for (PetscInt k = 0; k < nlnk; k++) {
1639 #if 0 /* Debugging code */
1640 printf("%d value %d len %d next %d\n", next, lnk[next], lnk[next + 1], lnk[next + 2]);
1641 #endif
1642 next = lnk[next + 2];
1643 }
1644 return PETSC_SUCCESS;
1645 }
1646
PetscLLCondensedDestroy_fast(PetscInt * lnk)1647 static inline PetscErrorCode PetscLLCondensedDestroy_fast(PetscInt *lnk)
1648 {
1649 return PetscFree(lnk);
1650 }
1651
1652 PETSC_EXTERN PetscErrorCode PetscCDCreate(PetscInt, PetscCoarsenData **);
1653 PETSC_EXTERN PetscErrorCode PetscCDDestroy(PetscCoarsenData *);
1654 PETSC_EXTERN PetscErrorCode PetscCDIntNdSetID(PetscCDIntNd *, PetscInt);
1655 PETSC_EXTERN PetscErrorCode PetscCDIntNdGetID(const PetscCDIntNd *, PetscInt *);
1656 PETSC_EXTERN PetscErrorCode PetscCDAppendID(PetscCoarsenData *, PetscInt, PetscInt);
1657 PETSC_EXTERN PetscErrorCode PetscCDMoveAppend(PetscCoarsenData *, PetscInt, PetscInt);
1658 PETSC_EXTERN PetscErrorCode PetscCDAppendNode(PetscCoarsenData *, PetscInt, PetscCDIntNd *);
1659 PETSC_EXTERN PetscErrorCode PetscCDRemoveNextNode(PetscCoarsenData *, PetscInt, PetscCDIntNd *);
1660 PETSC_EXTERN PetscErrorCode PetscCDCountAt(const PetscCoarsenData *, PetscInt, PetscInt *);
1661 PETSC_EXTERN PetscErrorCode PetscCDIsEmptyAt(const PetscCoarsenData *, PetscInt, PetscBool *);
1662 PETSC_EXTERN PetscErrorCode PetscCDSetChunkSize(PetscCoarsenData *, PetscInt);
1663 PETSC_EXTERN PetscErrorCode PetscCDPrint(const PetscCoarsenData *, PetscInt, MPI_Comm);
1664 PETSC_EXTERN PetscErrorCode PetscCDGetNonemptyIS(PetscCoarsenData *, IS *);
1665 PETSC_EXTERN PetscErrorCode PetscCDGetMat(PetscCoarsenData *, Mat *);
1666 PETSC_EXTERN PetscErrorCode PetscCDSetMat(PetscCoarsenData *, Mat);
1667 PETSC_EXTERN PetscErrorCode PetscCDClearMat(PetscCoarsenData *);
1668 PETSC_EXTERN PetscErrorCode PetscCDRemoveAllAt(PetscCoarsenData *, PetscInt);
1669 PETSC_EXTERN PetscErrorCode PetscCDCount(const PetscCoarsenData *, PetscInt *_sz);
1670
1671 PETSC_EXTERN PetscErrorCode PetscCDGetHeadPos(const PetscCoarsenData *, PetscInt, PetscCDIntNd **);
1672 PETSC_EXTERN PetscErrorCode PetscCDGetNextPos(const PetscCoarsenData *, PetscInt, PetscCDIntNd **);
1673 PETSC_EXTERN PetscErrorCode PetscCDGetASMBlocks(const PetscCoarsenData *, const PetscInt, PetscInt *, IS **);
1674
1675 PETSC_SINGLE_LIBRARY_VISIBILITY_INTERNAL PetscErrorCode MatFDColoringApply_AIJ(Mat, MatFDColoring, Vec, void *);
1676
1677 PETSC_EXTERN PetscLogEvent MAT_Mult;
1678 PETSC_EXTERN PetscLogEvent MAT_MultAdd;
1679 PETSC_EXTERN PetscLogEvent MAT_MultTranspose;
1680 PETSC_EXTERN PetscLogEvent MAT_MultHermitianTranspose;
1681 PETSC_EXTERN PetscLogEvent MAT_MultTransposeAdd;
1682 PETSC_EXTERN PetscLogEvent MAT_MultHermitianTransposeAdd;
1683 PETSC_EXTERN PetscLogEvent MAT_Solve;
1684 PETSC_EXTERN PetscLogEvent MAT_Solves;
1685 PETSC_EXTERN PetscLogEvent MAT_SolveAdd;
1686 PETSC_EXTERN PetscLogEvent MAT_SolveTranspose;
1687 PETSC_EXTERN PetscLogEvent MAT_SolveTransposeAdd;
1688 PETSC_EXTERN PetscLogEvent MAT_SOR;
1689 PETSC_EXTERN PetscLogEvent MAT_ForwardSolve;
1690 PETSC_EXTERN PetscLogEvent MAT_BackwardSolve;
1691 PETSC_EXTERN PetscLogEvent MAT_LUFactor;
1692 PETSC_EXTERN PetscLogEvent MAT_LUFactorSymbolic;
1693 PETSC_EXTERN PetscLogEvent MAT_LUFactorNumeric;
1694 PETSC_EXTERN PetscLogEvent MAT_QRFactor;
1695 PETSC_EXTERN PetscLogEvent MAT_QRFactorSymbolic;
1696 PETSC_EXTERN PetscLogEvent MAT_QRFactorNumeric;
1697 PETSC_EXTERN PetscLogEvent MAT_CholeskyFactor;
1698 PETSC_EXTERN PetscLogEvent MAT_CholeskyFactorSymbolic;
1699 PETSC_EXTERN PetscLogEvent MAT_CholeskyFactorNumeric;
1700 PETSC_EXTERN PetscLogEvent MAT_ILUFactor;
1701 PETSC_EXTERN PetscLogEvent MAT_ILUFactorSymbolic;
1702 PETSC_EXTERN PetscLogEvent MAT_ICCFactorSymbolic;
1703 PETSC_EXTERN PetscLogEvent MAT_Copy;
1704 PETSC_EXTERN PetscLogEvent MAT_Convert;
1705 PETSC_EXTERN PetscLogEvent MAT_Scale;
1706 PETSC_EXTERN PetscLogEvent MAT_AssemblyBegin;
1707 PETSC_EXTERN PetscLogEvent MAT_AssemblyEnd;
1708 PETSC_EXTERN PetscLogEvent MAT_SetValues;
1709 PETSC_EXTERN PetscLogEvent MAT_GetValues;
1710 PETSC_EXTERN PetscLogEvent MAT_GetRow;
1711 PETSC_EXTERN PetscLogEvent MAT_GetRowIJ;
1712 PETSC_EXTERN PetscLogEvent MAT_CreateSubMats;
1713 PETSC_EXTERN PetscLogEvent MAT_GetOrdering;
1714 PETSC_EXTERN PetscLogEvent MAT_RedundantMat;
1715 PETSC_EXTERN PetscLogEvent MAT_IncreaseOverlap;
1716 PETSC_EXTERN PetscLogEvent MAT_Partitioning;
1717 PETSC_EXTERN PetscLogEvent MAT_PartitioningND;
1718 PETSC_EXTERN PetscLogEvent MAT_Coarsen;
1719 PETSC_EXTERN PetscLogEvent MAT_ZeroEntries;
1720 PETSC_EXTERN PetscLogEvent MAT_Load;
1721 PETSC_EXTERN PetscLogEvent MAT_View;
1722 PETSC_EXTERN PetscLogEvent MAT_AXPY;
1723 PETSC_EXTERN PetscLogEvent MAT_FDColoringCreate;
1724 PETSC_EXTERN PetscLogEvent MAT_TransposeColoringCreate;
1725 PETSC_EXTERN PetscLogEvent MAT_FDColoringSetUp;
1726 PETSC_EXTERN PetscLogEvent MAT_FDColoringApply;
1727 PETSC_EXTERN PetscLogEvent MAT_Transpose;
1728 PETSC_EXTERN PetscLogEvent MAT_FDColoringFunction;
1729 PETSC_EXTERN PetscLogEvent MAT_CreateSubMat;
1730 PETSC_EXTERN PetscLogEvent MAT_MatSolve;
1731 PETSC_EXTERN PetscLogEvent MAT_MatTrSolve;
1732 PETSC_EXTERN PetscLogEvent MAT_MatMultSymbolic;
1733 PETSC_EXTERN PetscLogEvent MAT_MatMultNumeric;
1734 PETSC_EXTERN PetscLogEvent MAT_Getlocalmatcondensed;
1735 PETSC_EXTERN PetscLogEvent MAT_GetBrowsOfAcols;
1736 PETSC_EXTERN PetscLogEvent MAT_GetBrowsOfAocols;
1737 PETSC_EXTERN PetscLogEvent MAT_PtAPSymbolic;
1738 PETSC_EXTERN PetscLogEvent MAT_PtAPNumeric;
1739 PETSC_EXTERN PetscLogEvent MAT_Seqstompinum;
1740 PETSC_EXTERN PetscLogEvent MAT_Seqstompisym;
1741 PETSC_EXTERN PetscLogEvent MAT_Seqstompi;
1742 PETSC_EXTERN PetscLogEvent MAT_Getlocalmat;
1743 PETSC_EXTERN PetscLogEvent MAT_RARtSymbolic;
1744 PETSC_EXTERN PetscLogEvent MAT_RARtNumeric;
1745 PETSC_EXTERN PetscLogEvent MAT_MatTransposeMultSymbolic;
1746 PETSC_EXTERN PetscLogEvent MAT_MatTransposeMultNumeric;
1747 PETSC_EXTERN PetscLogEvent MAT_TransposeMatMultSymbolic;
1748 PETSC_EXTERN PetscLogEvent MAT_TransposeMatMultNumeric;
1749 PETSC_EXTERN PetscLogEvent MAT_MatMatMultSymbolic;
1750 PETSC_EXTERN PetscLogEvent MAT_MatMatMultNumeric;
1751 PETSC_EXTERN PetscLogEvent MAT_Getsymtransreduced;
1752 PETSC_EXTERN PetscLogEvent MAT_GetSeqNonzeroStructure;
1753 PETSC_EXTERN PetscLogEvent MATMFFD_Mult;
1754 PETSC_EXTERN PetscLogEvent MAT_GetMultiProcBlock;
1755 PETSC_EXTERN PetscLogEvent MAT_CUSPARSECopyToGPU;
1756 PETSC_EXTERN PetscLogEvent MAT_CUSPARSECopyFromGPU;
1757 PETSC_EXTERN PetscLogEvent MAT_CUSPARSEGenerateTranspose;
1758 PETSC_EXTERN PetscLogEvent MAT_CUSPARSESolveAnalysis;
1759 PETSC_EXTERN PetscLogEvent MAT_HIPSPARSECopyToGPU;
1760 PETSC_EXTERN PetscLogEvent MAT_HIPSPARSECopyFromGPU;
1761 PETSC_EXTERN PetscLogEvent MAT_HIPSPARSEGenerateTranspose;
1762 PETSC_EXTERN PetscLogEvent MAT_HIPSPARSESolveAnalysis;
1763 PETSC_EXTERN PetscLogEvent MAT_SetValuesBatch;
1764 PETSC_EXTERN PetscLogEvent MAT_CreateGraph;
1765 PETSC_EXTERN PetscLogEvent MAT_ViennaCLCopyToGPU;
1766 PETSC_EXTERN PetscLogEvent MAT_DenseCopyToGPU;
1767 PETSC_EXTERN PetscLogEvent MAT_DenseCopyFromGPU;
1768 PETSC_EXTERN PetscLogEvent MAT_Merge;
1769 PETSC_EXTERN PetscLogEvent MAT_Residual;
1770 PETSC_EXTERN PetscLogEvent MAT_SetRandom;
1771 PETSC_EXTERN PetscLogEvent MAT_FactorFactS;
1772 PETSC_EXTERN PetscLogEvent MAT_FactorInvS;
1773 PETSC_EXTERN PetscLogEvent MAT_PreallCOO;
1774 PETSC_EXTERN PetscLogEvent MAT_SetVCOO;
1775 PETSC_EXTERN PetscLogEvent MATCOLORING_Apply;
1776 PETSC_EXTERN PetscLogEvent MATCOLORING_Comm;
1777 PETSC_EXTERN PetscLogEvent MATCOLORING_Local;
1778 PETSC_EXTERN PetscLogEvent MATCOLORING_ISCreate;
1779 PETSC_EXTERN PetscLogEvent MATCOLORING_SetUp;
1780 PETSC_EXTERN PetscLogEvent MATCOLORING_Weights;
1781 PETSC_EXTERN PetscLogEvent MAT_H2Opus_Build;
1782 PETSC_EXTERN PetscLogEvent MAT_H2Opus_Compress;
1783 PETSC_EXTERN PetscLogEvent MAT_H2Opus_Orthog;
1784 PETSC_EXTERN PetscLogEvent MAT_H2Opus_LR;
1785 PETSC_EXTERN PetscLogEvent MAT_CUDACopyToGPU;
1786 PETSC_EXTERN PetscLogEvent MAT_HIPCopyToGPU;
1787
1788 #if defined(PETSC_CLANG_STATIC_ANALYZER)
1789 #define MatGetDiagonalMarkers(SeqXXX, bs)
1790 #else
1791 /*
1792 Adds diagonal pointers to sparse matrix nonzero structure and determines if all diagonal entries are present
1793
1794 Rechecks the matrix data structure automatically if the nonzero structure of the matrix changed since the last call
1795
1796 Potential optimization: since the a->j[j] are sorted this could use bisection to find the diagonal
1797
1798 Developer Note:
1799 Uses the C preprocessor as a template mechanism to produce MatGetDiagonal_Seq[SB]AIJ() to avoid duplicate code
1800 */
1801 #define MatGetDiagonalMarkers(SeqXXX, bs) \
1802 PetscErrorCode MatGetDiagonalMarkers_##SeqXXX(Mat A, const PetscInt **diag, PetscBool *diagDense) \
1803 { \
1804 Mat_##SeqXXX *a = (Mat_##SeqXXX *)A->data; \
1805 \
1806 PetscFunctionBegin; \
1807 if (A->factortype != MAT_FACTOR_NONE) { \
1808 if (diagDense) *diagDense = PETSC_TRUE; \
1809 if (diag) *diag = a->diag; \
1810 PetscFunctionReturn(PETSC_SUCCESS); \
1811 } \
1812 PetscCheck(diag || diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "At least one of diag or diagDense must be requested"); \
1813 if (a->diagNonzeroState != A->nonzerostate || (diag && !a->diag)) { \
1814 const PetscInt m = A->rmap->n / bs; \
1815 \
1816 if (!diag && !a->diag) { \
1817 a->diagDense = PETSC_TRUE; \
1818 for (PetscInt i = 0; i < m; i++) { \
1819 PetscBool found = PETSC_FALSE; \
1820 \
1821 for (PetscInt j = a->i[i]; j < a->i[i + 1]; j++) { \
1822 if (a->j[j] == i) { \
1823 found = PETSC_TRUE; \
1824 break; \
1825 } \
1826 } \
1827 if (!found) { \
1828 a->diagDense = PETSC_FALSE; \
1829 *diagDense = a->diagDense; \
1830 a->diagNonzeroState = A->nonzerostate; \
1831 PetscFunctionReturn(PETSC_SUCCESS); \
1832 } \
1833 } \
1834 } else { \
1835 if (!a->diag) PetscCall(PetscMalloc1(m, &a->diag)); \
1836 a->diagDense = PETSC_TRUE; \
1837 for (PetscInt i = 0; i < m; i++) { \
1838 PetscBool found = PETSC_FALSE; \
1839 \
1840 a->diag[i] = a->i[i + 1]; \
1841 for (PetscInt j = a->i[i]; j < a->i[i + 1]; j++) { \
1842 if (a->j[j] == i) { \
1843 a->diag[i] = j; \
1844 found = PETSC_TRUE; \
1845 break; \
1846 } \
1847 } \
1848 if (!found) a->diagDense = PETSC_FALSE; \
1849 } \
1850 } \
1851 a->diagNonzeroState = A->nonzerostate; \
1852 } \
1853 if (diag) *diag = a->diag; \
1854 if (diagDense) *diagDense = a->diagDense; \
1855 PetscFunctionReturn(PETSC_SUCCESS); \
1856 }
1857 #endif
1858