xref: /petsc/src/sys/memory/mal.c (revision 98d129c30f3ee9fdddc40fdbc5a989b7be64f888)
1 /*
2     Code that allows a user to dictate what malloc() PETSc uses.
3 */
4 #define PETSC_DESIRE_FEATURE_TEST_MACROS /* for posix_memalign() */
5 #include <petscsys.h>                    /*I   "petscsys.h"   I*/
6 #include <stdarg.h>
7 #if defined(PETSC_HAVE_MALLOC_H)
8   #include <malloc.h>
9 #endif
10 #if defined(PETSC_HAVE_MEMKIND)
11   #include <errno.h>
12   #include <memkind.h>
13 typedef enum {
14   PETSC_MK_DEFAULT       = 0,
15   PETSC_MK_HBW_PREFERRED = 1
16 } PetscMemkindType;
17 PetscMemkindType currentmktype  = PETSC_MK_HBW_PREFERRED;
18 PetscMemkindType previousmktype = PETSC_MK_HBW_PREFERRED;
19 #endif
20 /*
21         We want to make sure that all mallocs of double or complex numbers are complex aligned.
22     1) on systems with memalign() we call that routine to get an aligned memory location
23     2) on systems without memalign() we
24        - allocate one sizeof(PetscScalar) extra space
25        - we shift the pointer up slightly if needed to get PetscScalar aligned
26        - if shifted we store at ptr[-1] the amount of shift (plus a classid)
27 */
28 #define SHIFT_CLASSID 456123
29 
30 PETSC_EXTERN PetscErrorCode PetscMallocAlign(size_t mem, PetscBool clear, int line, const char func[], const char file[], void **result)
31 {
32   if (!mem) {
33     *result = NULL;
34     return PETSC_SUCCESS;
35   }
36 #if PetscDefined(HAVE_MEMKIND)
37   {
38     int err = memkind_posix_memalign(currentmktype ? MEMKIND_HBW_PREFERRED : MEMKIND_DEFAULT, result, PETSC_MEMALIGN, mem);
39     PetscCheck(err != EINVAL, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memkind: invalid 3rd or 4th argument of memkind_posix_memalign()");
40     if (err == ENOMEM) PetscInfo(NULL, "Memkind: fail to request HBW memory %.0f, falling back to normal memory\n", (PetscLogDouble)mem);
41     PetscCheck(*result, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem);
42     if (clear) PetscCall(PetscMemzero(*result, mem));
43   }
44 #else /* PetscDefined(HAVE_MEMKIND) */
45   #if PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)
46   if (clear) *result = calloc(1 + mem / sizeof(int), sizeof(int));
47   else *result = malloc(mem);
48 
49   PetscCheck(*result, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem);
50   if (PetscLogMemory) PetscCall(PetscMemzero(*result, mem));
51   #elif PetscDefined(HAVE_POSIX_MEMALIGN)
52   int ret = posix_memalign(result, PETSC_MEMALIGN, mem);
53   PetscCheck(ret == 0, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem);
54   if (clear || PetscLogMemory) PetscCall(PetscMemzero(*result, mem));
55   #else  /* PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) || PetscDefined(HAVE_POSIX_MEMALIGN) */
56   {
57     int *ptr, shift;
58     /*
59       malloc space for two extra chunks and shift ptr 1 + enough to get it PetscScalar aligned
60     */
61     if (clear) {
62       ptr = (int *)calloc(1 + (mem + 2 * PETSC_MEMALIGN) / sizeof(int), sizeof(int));
63     } else {
64       ptr = (int *)malloc(mem + 2 * PETSC_MEMALIGN);
65     }
66     PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem);
67     shift          = (int)(((PETSC_UINTPTR_T)ptr) % PETSC_MEMALIGN);
68     shift          = (2 * PETSC_MEMALIGN - shift) / sizeof(int);
69     ptr[shift - 1] = shift + SHIFT_CLASSID;
70     ptr += shift;
71     *result = (void *)ptr;
72     if (PetscLogMemory) PetscCall(PetscMemzero(*result, mem));
73   }
74   #endif /* PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) || PetscDefined(HAVE_POSIX_MEMALIGN) */
75 #endif   /* PetscDefined(HAVE_MEMKIND) */
76   return PETSC_SUCCESS;
77 }
78 
79 PETSC_EXTERN PetscErrorCode PetscFreeAlign(void *ptr, int line, const char func[], const char file[])
80 {
81   if (!ptr) return PETSC_SUCCESS;
82 #if PetscDefined(HAVE_MEMKIND)
83   memkind_free(0, ptr); /* specify the kind to 0 so that memkind will look up for the right type */
84 #else                   /* PetscDefined(HAVE_MEMKIND) */
85   #if (!(PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) && !PetscDefined(HAVE_POSIX_MEMALIGN))
86   {
87     /*
88       Previous int tells us how many ints the pointer has been shifted from
89       the original address provided by the system malloc().
90     */
91     const int shift = *(((int *)ptr) - 1) - SHIFT_CLASSID;
92 
93     PetscCheck(shift <= PETSC_MEMALIGN - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely memory corruption in heap");
94     PetscCheck(shift >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely memory corruption in heap");
95     ptr = (void *)(((int *)ptr) - shift);
96   }
97   #endif
98 
99   #if PetscDefined(HAVE_FREE_RETURN_INT)
100   int err = free(ptr);
101   PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_PLIB, "System free returned error %d", err);
102   #else
103   free(ptr);
104   #endif
105 #endif
106   return PETSC_SUCCESS;
107 }
108 
109 PETSC_EXTERN PetscErrorCode PetscReallocAlign(size_t mem, int line, const char func[], const char file[], void **result)
110 {
111   if (!mem) {
112     PetscCall(PetscFreeAlign(*result, line, func, file));
113     *result = NULL;
114     return PETSC_SUCCESS;
115   }
116 #if PetscDefined(HAVE_MEMKIND)
117   *result = memkind_realloc(currentmktype ? MEMKIND_HBW_PREFERRED : MEMKIND_DEFAULT, *result, mem);
118 #else
119   #if (!(PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) && !PetscDefined(HAVE_POSIX_MEMALIGN))
120   {
121     /*
122       Previous int tells us how many ints the pointer has been shifted from
123       the original address provided by the system malloc().
124     */
125     int shift = *(((int *)*result) - 1) - SHIFT_CLASSID;
126     PetscCheck(shift <= PETSC_MEMALIGN - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely memory corruption in heap");
127     PetscCheck(shift >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely memory corruption in heap");
128     *result = (void *)(((int *)*result) - shift);
129   }
130   #endif
131 
132   #if (PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) || PetscDefined(HAVE_POSIX_MEMALIGN)
133   *result = realloc(*result, mem);
134   #else
135   {
136     /*
137       malloc space for two extra chunks and shift ptr 1 + enough to get it PetscScalar aligned
138     */
139     int *ptr = (int *)realloc(*result, mem + 2 * PETSC_MEMALIGN);
140     if (ptr) {
141       int shift      = (int)(((PETSC_UINTPTR_T)ptr) % PETSC_MEMALIGN);
142       shift          = (2 * PETSC_MEMALIGN - shift) / sizeof(int);
143       ptr[shift - 1] = shift + SHIFT_CLASSID;
144       ptr += shift;
145       *result = (void *)ptr;
146     } else {
147       *result = NULL;
148     }
149   }
150   #endif
151 #endif
152   PetscCheck(*result, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem);
153 #if PetscDefined(HAVE_POSIX_MEMALIGN)
154   /* There are no standard guarantees that realloc() maintains the alignment of memalign(), so I think we have to
155    * realloc and, if the alignment is wrong, malloc/copy/free. */
156   if (((size_t)*result) % PETSC_MEMALIGN) {
157     void *newResult;
158   #if PetscDefined(HAVE_MEMKIND)
159     {
160       int err = memkind_posix_memalign(currentmktype ? MEMKIND_HBW_PREFERRED : MEMKIND_DEFAULT, &newResult, PETSC_MEMALIGN, mem);
161       PetscCheck(err != EINVAL, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memkind: invalid 3rd or 4th argument of memkind_posix_memalign()");
162       if (err == ENOMEM) PetscInfo(NULL, "Memkind: fail to request HBW memory %.0f, falling back to normal memory\n", (PetscLogDouble)mem);
163     }
164     PetscCheck(newResult, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem);
165   #else
166     int ret = posix_memalign(&newResult, PETSC_MEMALIGN, mem);
167     PetscCheck(ret == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "posix_memalign() failed with error code %d, memory requested %.0f", ret, (PetscLogDouble)mem);
168   #endif
169     PetscCall(PetscMemcpy(newResult, *result, mem));
170   #if PetscDefined(HAVE_FREE_RETURN_INT)
171     {
172       int err = free(*result);
173       PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_PLIB, "System free returned error %d", err);
174     }
175   #else
176     #if defined(PETSC_HAVE_MEMKIND)
177     memkind_free(0, *result);
178     #else
179     free(*result);
180     #endif
181   #endif
182     *result = newResult;
183   }
184 #endif
185   return PETSC_SUCCESS;
186 }
187 
188 PetscErrorCode (*PetscTrMalloc)(size_t, PetscBool, int, const char[], const char[], void **) = PetscMallocAlign;
189 PetscErrorCode (*PetscTrFree)(void *, int, const char[], const char[])                       = PetscFreeAlign;
190 PetscErrorCode (*PetscTrRealloc)(size_t, int, const char[], const char[], void **)           = PetscReallocAlign;
191 
192 PETSC_INTERN PetscBool petscsetmallocvisited;
193 PetscBool              petscsetmallocvisited = PETSC_FALSE;
194 
195 /*@C
196   PetscMallocSet - Sets the underlying allocation routines used by `PetscMalloc()` and `PetscFree()`
197 
198   Not Collective
199 
200   Input Parameters:
201 + imalloc - the routine that provides the `malloc()` implementation (also provides `calloc()`, which is used depending on the second argument)
202 . ifree   - the routine that provides the `free()` implementation
203 - iralloc - the routine that provides the `realloc()` implementation
204 
205   Level: developer
206 
207   Note:
208   This routine MUST be called before `PetscInitialize()` and may be
209   called only once.
210 
211 .seealso: `PetscMallocClear()`, `PetscInitialize()`, `PetscMalloc()`, `PetscFree()`
212 @*/
213 PetscErrorCode PetscMallocSet(PetscErrorCode (*imalloc)(size_t, PetscBool, int, const char[], const char[], void **), PetscErrorCode (*ifree)(void *, int, const char[], const char[]), PetscErrorCode (*iralloc)(size_t, int, const char[], const char[], void **))
214 {
215   PetscFunctionBegin;
216   PetscCheck(!petscsetmallocvisited || !(imalloc != PetscTrMalloc || ifree != PetscTrFree), PETSC_COMM_SELF, PETSC_ERR_SUP, "cannot call multiple times");
217   PetscTrMalloc         = imalloc;
218   PetscTrFree           = ifree;
219   PetscTrRealloc        = iralloc;
220   petscsetmallocvisited = PETSC_TRUE;
221   PetscFunctionReturn(PETSC_SUCCESS);
222 }
223 
224 /*@C
225   PetscMallocClear - Resets the routines used by `PetscMalloc()` and `PetscFree()`
226 
227   Not Collective
228 
229   Level: developer
230 
231   Notes:
232   In general one should never run a PETSc program with different `malloc()` and
233   `free()` settings for different parts; this is because one NEVER wants to
234   `free()` an address that was malloced by a different memory management system
235 
236   Called in `PetscFinalize()` so that if `PetscInitialize()` is called again it starts with a fresh slate of allocation information
237 
238 .seealso: `PetscMallocSet()`, `PetscMalloc()`, `PetscFree()`
239 @*/
240 PetscErrorCode PetscMallocClear(void)
241 {
242   PetscFunctionBegin;
243   PetscTrMalloc         = PetscMallocAlign;
244   PetscTrFree           = PetscFreeAlign;
245   PetscTrRealloc        = PetscReallocAlign;
246   petscsetmallocvisited = PETSC_FALSE;
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 PetscErrorCode PetscMemoryTrace(const char label[])
251 {
252   PetscLogDouble        mem, mal;
253   static PetscLogDouble oldmem = 0, oldmal = 0;
254 
255   PetscFunctionBegin;
256   PetscCall(PetscMemoryGetCurrentUsage(&mem));
257   PetscCall(PetscMallocGetCurrentUsage(&mal));
258 
259   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%s High water  %8.3f MB increase %8.3f MB Current %8.3f MB increase %8.3f MB\n", label, mem * 1e-6, (mem - oldmem) * 1e-6, mal * 1e-6, (mal - oldmal) * 1e-6));
260   oldmem = mem;
261   oldmal = mal;
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 static PetscErrorCode (*PetscTrMallocOld)(size_t, PetscBool, int, const char[], const char[], void **) = PetscMallocAlign;
266 static PetscErrorCode (*PetscTrReallocOld)(size_t, int, const char[], const char[], void **)           = PetscReallocAlign;
267 static PetscErrorCode (*PetscTrFreeOld)(void *, int, const char[], const char[])                       = PetscFreeAlign;
268 
269 /*@C
270   PetscMallocSetDRAM - Set `PetscMalloc()` to use DRAM.
271   If memkind is available, change the memkind type. Otherwise, switch the
272   current malloc and free routines to the `PetscMallocAlign()` and
273   `PetscFreeAlign()` (PETSc default).
274 
275   Not Collective
276 
277   Level: developer
278 
279   Note:
280   This provides a way to do the allocation on DRAM temporarily. One
281   can switch back to the previous choice by calling `PetscMallocReset()`.
282 
283 .seealso: `PetscMallocReset()`, `PetscMalloc()`, `PetscFree()`
284 @*/
285 PetscErrorCode PetscMallocSetDRAM(void)
286 {
287   PetscFunctionBegin;
288   if (PetscTrMalloc == PetscMallocAlign) {
289 #if defined(PETSC_HAVE_MEMKIND)
290     previousmktype = currentmktype;
291     currentmktype  = PETSC_MK_DEFAULT;
292 #endif
293   } else {
294     /* Save the previous choice */
295     PetscTrMallocOld  = PetscTrMalloc;
296     PetscTrReallocOld = PetscTrRealloc;
297     PetscTrFreeOld    = PetscTrFree;
298     PetscTrMalloc     = PetscMallocAlign;
299     PetscTrFree       = PetscFreeAlign;
300     PetscTrRealloc    = PetscReallocAlign;
301   }
302   PetscFunctionReturn(PETSC_SUCCESS);
303 }
304 
305 /*@C
306   PetscMallocResetDRAM - Reset the changes made by `PetscMallocSetDRAM()`
307 
308   Not Collective
309 
310   Level: developer
311 
312 .seealso: `PetscMallocSetDRAM()`
313 @*/
314 PetscErrorCode PetscMallocResetDRAM(void)
315 {
316   PetscFunctionBegin;
317   if (PetscTrMalloc == PetscMallocAlign) {
318 #if defined(PETSC_HAVE_MEMKIND)
319     currentmktype = previousmktype;
320 #endif
321   } else {
322     /* Reset to the previous choice */
323     PetscTrMalloc  = PetscTrMallocOld;
324     PetscTrRealloc = PetscTrReallocOld;
325     PetscTrFree    = PetscTrFreeOld;
326   }
327   PetscFunctionReturn(PETSC_SUCCESS);
328 }
329 
330 static PetscBool petscmalloccoalesce =
331 #if defined(PETSC_USE_MALLOC_COALESCED)
332   PETSC_TRUE;
333 #else
334   PETSC_FALSE;
335 #endif
336 
337 /*@C
338   PetscMallocSetCoalesce - Use coalesced `PetscMalloc()` when allocating groups of objects, that is when using `PetscMallocN()`
339 
340   Not Collective
341 
342   Input Parameter:
343 . coalesce - `PETSC_TRUE` to use coalesced malloc for multi-memory allocation.
344 
345   Options Database Key:
346 . -malloc_coalesce - turn coalesced `PetscMallocN()` on or off
347 
348   Level: developer
349 
350   Notes:
351   PETSc uses coalesced `PetscMallocN()` by default for optimized builds and not for debugging builds.
352 
353   This default can be changed via the command-line option `-malloc_coalesce` or by calling this function.
354 
355   This function can only be called immediately after `PetscInitialize()`
356 
357 .seealso: `PetscMallocA()`, `PetscMalloc()`, `PetscFree()`
358 @*/
359 PetscErrorCode PetscMallocSetCoalesce(PetscBool coalesce)
360 {
361   PetscFunctionBegin;
362   petscmalloccoalesce = coalesce;
363   PetscFunctionReturn(PETSC_SUCCESS);
364 }
365 
366 /*@C
367   PetscMallocA - Allocate and optionally clear one or more memory locations, possibly using coalesced malloc
368 
369   Not Collective
370 
371   Input Parameters:
372 + n        - number of objects to allocate (at least 1)
373 . clear    - use `calloc()` to allocate space initialized to zero
374 . lineno   - line number to attribute allocation (typically `__LINE__`)
375 . function - function to attribute allocation (typically `PETSC_FUNCTION_NAME`)
376 . filename - file name to attribute allocation (typically `__FILE__`)
377 - bytes0   - first of `n` object sizes
378 
379   Output Parameter:
380 . ptr0 - first of `n` pointers to allocate
381 
382   Level: developer
383 
384   Note:
385   This function is not normally called directly by users, but rather via the macros `PetscMalloc1()`, `PetscMalloc2()`, or `PetscCalloc1()`, etc.
386 
387 .seealso: `PetscMallocAlign()`, `PetscMallocSet()`, `PetscMalloc1()`, `PetscMalloc2()`, `PetscMalloc3()`, `PetscMalloc4()`, `PetscMalloc5()`, `PetscMalloc6()`, `PetscMalloc7()`,
388           `PetscCalloc1()`, `PetscCalloc2()`, `PetscCalloc3()`, `PetscCalloc4()`, `PetscCalloc5()`, `PetscCalloc6()`, `PetscCalloc7()`, `PetscFreeA()`
389 @*/
390 PetscErrorCode PetscMallocA(int n, PetscBool clear, int lineno, const char *function, const char *filename, size_t bytes0, void *ptr0, ...)
391 {
392   va_list Argp;
393   size_t  bytes[8], sumbytes;
394   void  **ptr[8];
395   int     i;
396 
397   PetscFunctionBegin;
398   PetscCheck(n <= 8, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Attempt to allocate %d objects but only 8 supported", n);
399   bytes[0] = bytes0;
400   ptr[0]   = (void **)ptr0;
401   sumbytes = (bytes0 + PETSC_MEMALIGN - 1) & ~(PETSC_MEMALIGN - 1);
402   va_start(Argp, ptr0);
403   for (i = 1; i < n; i++) {
404     bytes[i] = va_arg(Argp, size_t);
405     ptr[i]   = va_arg(Argp, void **);
406     sumbytes += (bytes[i] + PETSC_MEMALIGN - 1) & ~(PETSC_MEMALIGN - 1);
407   }
408   va_end(Argp);
409   if (petscmalloccoalesce) {
410     char *p;
411     PetscCall((*PetscTrMalloc)(sumbytes, clear, lineno, function, filename, (void **)&p));
412     if (p == NULL) {
413       for (i = 0; i < n; i++) *ptr[i] = NULL;
414     } else {
415       for (i = 0; i < n; i++) {
416         *ptr[i] = bytes[i] ? p : NULL;
417         p       = (char *)PetscAddrAlign(p + bytes[i]);
418       }
419     }
420   } else {
421     for (i = 0; i < n; i++) PetscCall((*PetscTrMalloc)(bytes[i], clear, lineno, function, filename, (void **)ptr[i]));
422   }
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 /*@C
427   PetscFreeA - Free one or more memory locations, possibly allocated using coalesced `PetscMallocN()`
428 
429   Not Collective
430 
431   Input Parameters:
432 + n        - number of objects to free (at least 1)
433 . lineno   - line number to attribute deallocation (typically `__LINE__`)
434 . function - function to attribute deallocation (typically `PETSC_FUNCTION_NAME`)
435 . filename - file name to attribute deallocation (typically `__FILE__`)
436 - ptr0     - first of `n` pointers to free
437 
438   Level: developer
439 
440   Notes:
441   This function is not normally called directly by users, but rather via the macros `PetscFree()`, `PetscFree2()`, etc.
442 
443   The pointers are zeroed to prevent users from accidentally reusing space that has been freed.
444 
445   If the arguments were obtained via `PetscMallocA()`, `PetscMalloc2()`, `PetscMalloc3()`, etc., then the arguments must be passed in the same order to the corresponding `PetscFreeA()`, `PetscFree2()`, `PetscFree3()`, respectively.
446 
447 .seealso: `PetscMallocAlign()`, `PetscMallocSet()`, `PetscMallocA()`, `PetscFree1()`, `PetscFree2()`, `PetscFree3()`, `PetscFree4()`, `PetscFree5()`, `PetscFree6()`, `PetscFree7()`
448 @*/
449 PetscErrorCode PetscFreeA(int n, int lineno, const char *function, const char *filename, void *ptr0, ...)
450 {
451   va_list Argp;
452   void  **ptr[8];
453   int     i;
454 
455   PetscFunctionBegin;
456   PetscCheck((n >= 1) && (n <= 8), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Attempt to allocate %d objects but only up to 8 supported", n);
457   ptr[0] = (void **)ptr0;
458   va_start(Argp, ptr0);
459   for (i = 1; i < n; i++) ptr[i] = va_arg(Argp, void **);
460   va_end(Argp);
461   if (petscmalloccoalesce) {
462     for (i = 0; i < n; i++) { /* Find first nonempty allocation */
463       if (*ptr[i]) break;
464     }
465     while (--n > i) *ptr[n] = NULL;
466     PetscCall((*PetscTrFree)(*ptr[n], lineno, function, filename));
467     *ptr[n] = NULL;
468   } else {
469     while (--n >= 0) {
470       PetscCall((*PetscTrFree)(*ptr[n], lineno, function, filename));
471       *ptr[n] = NULL;
472     }
473   }
474   PetscFunctionReturn(PETSC_SUCCESS);
475 }
476