xref: /petsc/src/sys/memory/mal.c (revision 5a856986583887c326abe5dfd149e8184a29cd80)
1 /*
2     Code that allows a user to dictate what malloc() PETSc uses.
3 */
4 #include <petscsys.h>             /*I   "petscsys.h"   I*/
5 #include <stdarg.h>
6 #if defined(PETSC_HAVE_MALLOC_H)
7 #include <malloc.h>
8 #endif
9 #if defined(PETSC_HAVE_MEMKIND)
10 #include <errno.h>
11 #include <memkind.h>
12 typedef enum {PETSC_MK_DEFAULT=0,PETSC_MK_HBW_PREFERRED=1} PetscMemkindType;
13 PetscMemkindType currentmktype = PETSC_MK_HBW_PREFERRED;
14 PetscMemkindType previousmktype = PETSC_MK_HBW_PREFERRED;
15 #endif
16 /*
17         We want to make sure that all mallocs of double or complex numbers are complex aligned.
18     1) on systems with memalign() we call that routine to get an aligned memory location
19     2) on systems without memalign() we
20        - allocate one sizeof(PetscScalar) extra space
21        - we shift the pointer up slightly if needed to get PetscScalar aligned
22        - if shifted we store at ptr[-1] the amount of shift (plus a classid)
23 */
24 #define SHIFT_CLASSID 456123
25 
26 PETSC_EXTERN PetscErrorCode PetscMallocAlign(size_t mem,PetscBool clear,int line,const char func[],const char file[],void **result)
27 {
28   PetscErrorCode ierr;
29 
30   if (!mem) {*result = NULL; return 0;}
31 #if defined(PETSC_HAVE_MEMKIND)
32   {
33     if (!currentmktype) ierr = memkind_posix_memalign(MEMKIND_DEFAULT,result,PETSC_MEMALIGN,mem);
34     else ierr = memkind_posix_memalign(MEMKIND_HBW_PREFERRED,result,PETSC_MEMALIGN,mem);
35     if (ierr == EINVAL) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"Memkind: invalid 3rd or 4th argument of memkind_posix_memalign()");
36     if (ierr == ENOMEM) PetscInfo1(0,"Memkind: fail to request HBW memory %.0f, falling back to normal memory\n",(PetscLogDouble)mem);
37     if (clear) {ierr = PetscMemzero(*result,mem);CHKERRQ(ierr);}
38   }
39 #else
40 #  if defined(PETSC_HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)
41   if (clear) {
42     *result = calloc(1+mem/sizeof(int),sizeof(int));
43   } else {
44     *result = malloc(mem);
45   }
46   if (PetscLogMemory) {ierr = PetscMemzero(*result,mem);CHKERRQ(ierr);}
47 
48 #  elif defined(PETSC_HAVE_MEMALIGN)
49   *result = memalign(PETSC_MEMALIGN,mem);
50   if (clear || PetscLogMemory) {
51     ierr = PetscMemzero(*result,mem);CHKERRQ(ierr);
52   }
53 #  else
54   {
55     int *ptr;
56     /*
57       malloc space for two extra chunks and shift ptr 1 + enough to get it PetscScalar aligned
58     */
59     if (clear) {
60       ptr = (int*)calloc(1+(mem + 2*PETSC_MEMALIGN)/sizeof(int),sizeof(int));
61     } else {
62       ptr = (int*)malloc(mem + 2*PETSC_MEMALIGN);
63     }
64     if (ptr) {
65       int shift    = (int)(((PETSC_UINTPTR_T) ptr) % PETSC_MEMALIGN);
66       shift        = (2*PETSC_MEMALIGN - shift)/sizeof(int);
67       ptr[shift-1] = shift + SHIFT_CLASSID;
68       ptr         += shift;
69       *result      = (void*)ptr;
70       if (PetscLogMemory) {ierr = PetscMemzero(*result,mem);CHKERRQ(ierr);}
71     } else {
72       *result      = NULL;
73     }
74   }
75 #  endif
76 #endif
77 
78   if (!*result) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_MEM,PETSC_ERROR_INITIAL,"Memory requested %.0f",(PetscLogDouble)mem);
79   return 0;
80 }
81 
82 PETSC_EXTERN PetscErrorCode PetscFreeAlign(void *ptr,int line,const char func[],const char file[])
83 {
84   if (!ptr) return 0;
85 #if defined(PETSC_HAVE_MEMKIND)
86   memkind_free(0,ptr); /* specify the kind to 0 so that memkind will look up for the right type */
87 #else
88 #  if (!(defined(PETSC_HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) && !defined(PETSC_HAVE_MEMALIGN))
89   {
90     /*
91       Previous int tells us how many ints the pointer has been shifted from
92       the original address provided by the system malloc().
93     */
94     int shift = *(((int*)ptr)-1) - SHIFT_CLASSID;
95     if (shift > PETSC_MEMALIGN-1) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_PLIB,PETSC_ERROR_INITIAL,"Likely memory corruption in heap");
96     if (shift < 0) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_PLIB,PETSC_ERROR_INITIAL,"Likely memory corruption in heap");
97     ptr = (void*)(((int*)ptr) - shift);
98   }
99 #  endif
100 
101 #  if defined(PETSC_HAVE_FREE_RETURN_INT)
102   int err = free(ptr);
103   if (err) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_PLIB,PETSC_ERROR_INITIAL,"System free returned error %d\n",err);
104 #  else
105   free(ptr);
106 #  endif
107 #endif
108   return 0;
109 }
110 
111 PETSC_EXTERN PetscErrorCode PetscReallocAlign(size_t mem, int line, const char func[], const char file[], void **result)
112 {
113   PetscErrorCode ierr;
114 
115   if (!mem) {
116     ierr = PetscFreeAlign(*result, line, func, file);
117     if (ierr) return ierr;
118     *result = NULL;
119     return 0;
120   }
121 #if defined(PETSC_HAVE_MEMKIND)
122   if (!currentmktype) *result = memkind_realloc(MEMKIND_DEFAULT,*result,mem);
123   else *result = memkind_realloc(MEMKIND_HBW_PREFERRED,*result,mem);
124 #else
125 #  if (!(defined(PETSC_HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) && !defined(PETSC_HAVE_MEMALIGN))
126   {
127     /*
128       Previous int tells us how many ints the pointer has been shifted from
129       the original address provided by the system malloc().
130     */
131     int shift = *(((int*)*result)-1) - SHIFT_CLASSID;
132     if (shift > PETSC_MEMALIGN-1) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_PLIB,PETSC_ERROR_INITIAL,"Likely memory corruption in heap");
133     if (shift < 0) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_PLIB,PETSC_ERROR_INITIAL,"Likely memory corruption in heap");
134     *result = (void*)(((int*)*result) - shift);
135   }
136 #  endif
137 
138 #  if (defined(PETSC_HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) || defined(PETSC_HAVE_MEMALIGN)
139   *result = realloc(*result, mem);
140 #  else
141   {
142     /*
143       malloc space for two extra chunks and shift ptr 1 + enough to get it PetscScalar aligned
144     */
145     int *ptr = (int *) realloc(*result, mem + 2*PETSC_MEMALIGN);
146     if (ptr) {
147       int shift    = (int)(((PETSC_UINTPTR_T) ptr) % PETSC_MEMALIGN);
148       shift        = (2*PETSC_MEMALIGN - shift)/sizeof(int);
149       ptr[shift-1] = shift + SHIFT_CLASSID;
150       ptr         += shift;
151       *result      = (void*)ptr;
152     } else {
153       *result      = NULL;
154     }
155   }
156 #  endif
157 #endif
158   if (!*result) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_MEM,PETSC_ERROR_INITIAL,"Memory requested %.0f",(PetscLogDouble)mem);
159 #if defined(PETSC_HAVE_MEMALIGN)
160   /* There are no standard guarantees that realloc() maintains the alignment of memalign(), so I think we have to
161    * realloc and, if the alignment is wrong, malloc/copy/free. */
162   if (((size_t) (*result)) % PETSC_MEMALIGN) {
163     void *newResult;
164 #  if defined(PETSC_HAVE_MEMKIND)
165     {
166       int ierr;
167       if (!currentmktype) ierr = memkind_posix_memalign(MEMKIND_DEFAULT,&newResult,PETSC_MEMALIGN,mem);
168       else ierr = memkind_posix_memalign(MEMKIND_HBW_PREFERRED,&newResult,PETSC_MEMALIGN,mem);
169       if (ierr == EINVAL) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"Memkind: invalid 3rd or 4th argument of memkind_posix_memalign()");
170       if (ierr == ENOMEM) PetscInfo1(0,"Memkind: fail to request HBW memory %.0f, falling back to normal memory\n",(PetscLogDouble)mem);
171     }
172 #  else
173     newResult = memalign(PETSC_MEMALIGN,mem);
174 #  endif
175     if (!newResult) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_MEM,PETSC_ERROR_INITIAL,"Memory requested %.0f",(PetscLogDouble)mem);
176     ierr = PetscMemcpy(newResult,*result,mem);
177     if (ierr) return ierr;
178 #  if defined(PETSC_HAVE_FREE_RETURN_INT)
179     {
180       int err = free(*result);
181       if (err) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_PLIB,PETSC_ERROR_INITIAL,"System free returned error %d\n",err);
182     }
183 #  else
184 #    if defined(PETSC_HAVE_MEMKIND)
185     memkind_free(0,*result);
186 #    else
187     free(*result);
188 #    endif
189 #  endif
190     *result = newResult;
191   }
192 #endif
193   return 0;
194 }
195 
196 PetscErrorCode (*PetscTrMalloc)(size_t,PetscBool,int,const char[],const char[],void**) = PetscMallocAlign;
197 PetscErrorCode (*PetscTrFree)(void*,int,const char[],const char[])           = PetscFreeAlign;
198 PetscErrorCode (*PetscTrRealloc)(size_t,int,const char[],const char[],void**) = PetscReallocAlign;
199 
200 PETSC_INTERN PetscBool petscsetmallocvisited;
201 PetscBool petscsetmallocvisited = PETSC_FALSE;
202 
203 /*@C
204    PetscMallocSet - Sets the routines used to do mallocs and frees.
205    This routine MUST be called before PetscInitialize() and may be
206    called only once.
207 
208    Not Collective
209 
210    Input Parameters:
211 +  malloc - the malloc routine
212 -  free - the free routine
213 
214    Level: developer
215 
216 
217 @*/
218 PetscErrorCode PetscMallocSet(PetscErrorCode (*imalloc)(size_t,PetscBool,int,const char[],const char[],void**),
219                               PetscErrorCode (*ifree)(void*,int,const char[],const char[]))
220 {
221   PetscFunctionBegin;
222   if (petscsetmallocvisited && (imalloc != PetscTrMalloc || ifree != PetscTrFree)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"cannot call multiple times");
223   PetscTrMalloc         = imalloc;
224   PetscTrFree           = ifree;
225   petscsetmallocvisited = PETSC_TRUE;
226   PetscFunctionReturn(0);
227 }
228 
229 /*@C
230    PetscMallocClear - Resets the routines used to do mallocs and frees to the
231         defaults.
232 
233    Not Collective
234 
235    Level: developer
236 
237    Notes:
238     In general one should never run a PETSc program with different malloc() and
239     free() settings for different parts; this is because one NEVER wants to
240     free() an address that was malloced by a different memory management system
241 
242 @*/
243 PetscErrorCode PetscMallocClear(void)
244 {
245   PetscFunctionBegin;
246   PetscTrMalloc         = PetscMallocAlign;
247   PetscTrFree           = PetscFreeAlign;
248   petscsetmallocvisited = PETSC_FALSE;
249   PetscFunctionReturn(0);
250 }
251 
252 PetscErrorCode PetscMemoryTrace(const char label[])
253 {
254   PetscErrorCode        ierr;
255   PetscLogDouble        mem,mal;
256   static PetscLogDouble oldmem = 0,oldmal = 0;
257 
258   PetscFunctionBegin;
259   ierr = PetscMemoryGetCurrentUsage(&mem);CHKERRQ(ierr);
260   ierr = PetscMallocGetCurrentUsage(&mal);CHKERRQ(ierr);
261 
262   ierr = PetscPrintf(PETSC_COMM_WORLD,"%s High water  %8.3f MB increase %8.3f MB Current %8.3f MB increase %8.3f MB\n",label,mem*1e-6,(mem - oldmem)*1e-6,mal*1e-6,(mal - oldmal)*1e-6);CHKERRQ(ierr);
263   oldmem = mem;
264   oldmal = mal;
265   PetscFunctionReturn(0);
266 }
267 
268 static PetscErrorCode (*PetscTrMallocOld)(size_t,PetscBool,int,const char[],const char[],void**) = PetscMallocAlign;
269 static PetscErrorCode (*PetscTrFreeOld)(void*,int,const char[],const char[])           = PetscFreeAlign;
270 
271 /*@C
272    PetscMallocSetDRAM - Set PetscMalloc to use DRAM.
273      If memkind is available, change the memkind type. Otherwise, switch the
274      current malloc and free routines to the PetscMallocAlign and
275      PetscFreeAlign (PETSc default).
276 
277    Not Collective
278 
279    Level: developer
280 
281    Notes:
282      This provides a way to do the allocation on DRAM temporarily. One
283      can switch back to the previous choice by calling PetscMallocReset().
284 
285 .seealso: PetscMallocReset()
286 @*/
287 PetscErrorCode PetscMallocSetDRAM(void)
288 {
289   PetscFunctionBegin;
290   if (PetscTrMalloc == PetscMallocAlign) {
291 #if defined(PETSC_HAVE_MEMKIND)
292     previousmktype = currentmktype;
293     currentmktype  = PETSC_MK_DEFAULT;
294 #endif
295   } else {
296     /* Save the previous choice */
297     PetscTrMallocOld = PetscTrMalloc;
298     PetscTrFreeOld   = PetscTrFree;
299     PetscTrMalloc    = PetscMallocAlign;
300     PetscTrFree      = PetscFreeAlign;
301   }
302   PetscFunctionReturn(0);
303 }
304 
305 /*@C
306    PetscMallocResetDRAM - Reset the changes made by PetscMallocSetDRAM
307 
308    Not Collective
309 
310    Level: developer
311 
312 .seealso: PetscMallocSetDRAM()
313 @*/
314 PetscErrorCode PetscMallocResetDRAM(void)
315 {
316   PetscFunctionBegin;
317   if (PetscTrMalloc == PetscMallocAlign) {
318 #if defined(PETSC_HAVE_MEMKIND)
319     currentmktype = previousmktype;
320 #endif
321   } else {
322     /* Reset to the previous choice */
323     PetscTrMalloc = PetscTrMallocOld;
324     PetscTrFree   = PetscTrFreeOld;
325   }
326   PetscFunctionReturn(0);
327 }
328 
329 static PetscBool petscmalloccoalesce =
330 #if defined(PETSC_USE_MALLOC_COALESCED)
331   PETSC_TRUE;
332 #else
333   PETSC_FALSE;
334 #endif
335 
336 /*@C
337    PetscMallocSetCoalesce - Use coalesced malloc when allocating groups of objects
338 
339    Not Collective
340 
341    Input Parameters:
342 .  coalesce - PETSC_TRUE to use coalesced malloc for multi-object allocation.
343 
344    Options Database Keys:
345 .  -malloc_coalesce - turn coalesced malloc on or off
346 
347    Note:
348    PETSc uses coalesced malloc by default for optimized builds and not for debugging builds.  This default can be changed via the command-line option -malloc_coalesce or by calling this function.
349 
350    Level: developer
351 
352 .seealso: PetscMallocA()
353 @*/
354 PetscErrorCode PetscMallocSetCoalesce(PetscBool coalesce)
355 {
356   PetscFunctionBegin;
357   petscmalloccoalesce = coalesce;
358   PetscFunctionReturn(0);
359 }
360 
361 /*@C
362    PetscMallocA - Allocate and optionally clear one or more objects, possibly using coalesced malloc
363 
364    Not Collective
365 
366    Input Parameters:
367 +  n - number of objects to allocate (at least 1)
368 .  clear - use calloc() to allocate space initialized to zero
369 .  lineno - line number to attribute allocation (typically __LINE__)
370 .  function - function to attribute allocation (typically PETSC_FUNCTION_NAME)
371 .  filename - file name to attribute allocation (typically __FILE__)
372 -  bytes0 - first of n object sizes
373 
374    Output Parameters:
375 .  ptr0 - first of n pointers to allocate
376 
377    Notes:
378    This function is not normally called directly by users, but rather via the macros PetscMalloc1(), PetscMalloc2(), or PetscCalloc1(), etc.
379 
380    Level: developer
381 
382 .seealso: PetscMallocAlign(), PetscMallocSet(), PetscMalloc1(), PetscMalloc2(), PetscMalloc3(), PetscMalloc4(), PetscMalloc5(), PetscMalloc6(), PetscMalloc7(), PetscCalloc1(), PetscCalloc2(), PetscCalloc3(), PetscCalloc4(), PetscCalloc5(), PetscCalloc6(), PetscCalloc7(), PetscFreeA()
383 @*/
384 PetscErrorCode PetscMallocA(int n,PetscBool clear,int lineno,const char *function,const char *filename,size_t bytes0,void *ptr0,...)
385 {
386   PetscErrorCode ierr;
387   va_list        Argp;
388   size_t         bytes[8],sumbytes;
389   void           **ptr[8];
390   int            i;
391 
392   PetscFunctionBegin;
393   if (n > 8) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Attempt to allocate %d objects but only 8 supported",n);
394   bytes[0] = bytes0;
395   ptr[0] = (void**)ptr0;
396   sumbytes = (bytes0 + PETSC_MEMALIGN-1) & ~(PETSC_MEMALIGN-1);
397   va_start(Argp,ptr0);
398   for (i=1; i<n; i++) {
399     bytes[i] = va_arg(Argp,size_t);
400     ptr[i] = va_arg(Argp,void**);
401     sumbytes += (bytes[i] + PETSC_MEMALIGN-1) & ~(PETSC_MEMALIGN-1);
402   }
403   va_end(Argp);
404   if (petscmalloccoalesce) {
405     char *p;
406     ierr = (*PetscTrMalloc)(sumbytes,clear,lineno,function,filename,(void**)&p);CHKERRQ(ierr);
407     for (i=0; i<n; i++) {
408       *ptr[i] = bytes[i] ? p : NULL;
409       p = (char*)PetscAddrAlign(p + bytes[i]);
410     }
411   } else {
412     for (i=0; i<n; i++) {
413       ierr = (*PetscTrMalloc)(bytes[i],clear,lineno,function,filename,(void**)ptr[i]);CHKERRQ(ierr);
414     }
415   }
416   PetscFunctionReturn(0);
417 }
418 
419 /*@C
420    PetscFreeA - Free one or more objects, possibly allocated using coalesced malloc
421 
422    Not Collective
423 
424    Input Parameters:
425 +  n - number of objects to free (at least 1)
426 .  lineno - line number to attribute deallocation (typically __LINE__)
427 .  function - function to attribute deallocation (typically PETSC_FUNCTION_NAME)
428 .  filename - file name to attribute deallocation (typically __FILE__)
429 -  ptr0 ... - first of n pointers to free
430 
431    Note:
432    This function is not normally called directly by users, but rather via the macros PetscFree(), PetscFree2(), etc.
433 
434    The pointers are zeroed to prevent users from accidently reusing space that has been freed.
435 
436    Level: developer
437 
438 .seealso: PetscMallocAlign(), PetscMallocSet(), PetscMallocA(), PetscFree1(), PetscFree2(), PetscFree3(), PetscFree4(), PetscFree5(), PetscFree6(), PetscFree7()
439 @*/
440 PetscErrorCode PetscFreeA(int n,int lineno,const char *function,const char *filename,void *ptr0,...)
441 {
442   PetscErrorCode ierr;
443   va_list        Argp;
444   void           **ptr[8];
445   int            i;
446 
447   PetscFunctionBegin;
448   if (n > 8) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Attempt to allocate %d objects but only up to 8 supported",n);
449   ptr[0] = (void**)ptr0;
450   va_start(Argp,ptr0);
451   for (i=1; i<n; i++) {
452     ptr[i] = va_arg(Argp,void**);
453   }
454   va_end(Argp);
455   if (petscmalloccoalesce) {
456     for (i=0; i<n; i++) {       /* Find first nonempty allocation */
457       if (*ptr[i]) break;
458     }
459     while (--n > i) {
460       *ptr[n] = NULL;
461     }
462     ierr = (*PetscTrFree)(*ptr[n],lineno,function,filename);CHKERRQ(ierr);
463     *ptr[n] = NULL;
464   } else {
465     while (--n >= 0) {
466       ierr = (*PetscTrFree)(*ptr[n],lineno,function,filename);CHKERRQ(ierr);
467       *ptr[n] = NULL;
468     }
469   }
470   PetscFunctionReturn(0);
471 }
472