/* Code that allows a user to dictate what malloc() PETSc uses. */ #define PETSC_DESIRE_FEATURE_TEST_MACROS /* for posix_memalign() */ #include /*I "petscsys.h" I*/ #include #if defined(PETSC_HAVE_MALLOC_H) #include #endif #if defined(PETSC_HAVE_MEMKIND) #include #include typedef enum { PETSC_MK_DEFAULT = 0, PETSC_MK_HBW_PREFERRED = 1 } PetscMemkindType; PetscMemkindType currentmktype = PETSC_MK_HBW_PREFERRED; PetscMemkindType previousmktype = PETSC_MK_HBW_PREFERRED; #endif /* We want to make sure that all mallocs of double or complex numbers are complex aligned. 1) on systems with memalign() we call that routine to get an aligned memory location 2) on systems without memalign() we - allocate one sizeof(PetscScalar) extra space - we shift the pointer up slightly if needed to get PetscScalar aligned - if shifted we store at ptr[-1] the amount of shift (plus a classid) */ #define SHIFT_CLASSID 456123 PETSC_EXTERN PetscErrorCode PetscMallocAlign(size_t mem, PetscBool clear, int line, const char func[], const char file[], void **result) { if (!mem) { *result = NULL; return PETSC_SUCCESS; } #if PetscDefined(HAVE_MEMKIND) { int err = memkind_posix_memalign(currentmktype ? MEMKIND_HBW_PREFERRED : MEMKIND_DEFAULT, result, PETSC_MEMALIGN, mem); PetscCheck(err != EINVAL, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memkind: invalid 3rd or 4th argument of memkind_posix_memalign()"); if (err == ENOMEM) PetscInfo(NULL, "Memkind: fail to request HBW memory %.0f, falling back to normal memory\n", (PetscLogDouble)mem); PetscCheck(*result, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem); if (clear) PetscCall(PetscMemzero(*result, mem)); } #else /* PetscDefined(HAVE_MEMKIND) */ #if PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8) if (clear) *result = calloc(1 + mem / sizeof(int), sizeof(int)); else *result = malloc(mem); PetscCheck(*result, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem); if (PetscLogMemory) PetscCall(PetscMemzero(*result, mem)); #elif PetscDefined(HAVE_POSIX_MEMALIGN) int ret = posix_memalign(result, PETSC_MEMALIGN, mem); PetscCheck(ret == 0, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem); if (clear || PetscLogMemory) PetscCall(PetscMemzero(*result, mem)); #else /* PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) || PetscDefined(HAVE_POSIX_MEMALIGN) */ { int *ptr, shift; /* malloc space for two extra chunks and shift ptr 1 + enough to get it PetscScalar aligned */ if (clear) { ptr = (int *)calloc(1 + (mem + 2 * PETSC_MEMALIGN) / sizeof(int), sizeof(int)); } else { ptr = (int *)malloc(mem + 2 * PETSC_MEMALIGN); } PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem); shift = (int)(((PETSC_UINTPTR_T)ptr) % PETSC_MEMALIGN); shift = (2 * PETSC_MEMALIGN - shift) / sizeof(int); ptr[shift - 1] = shift + SHIFT_CLASSID; ptr += shift; *result = (void *)ptr; if (PetscLogMemory) PetscCall(PetscMemzero(*result, mem)); } #endif /* PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) || PetscDefined(HAVE_POSIX_MEMALIGN) */ #endif /* PetscDefined(HAVE_MEMKIND) */ return PETSC_SUCCESS; } PETSC_EXTERN PetscErrorCode PetscFreeAlign(void *ptr, int line, const char func[], const char file[]) { if (!ptr) return PETSC_SUCCESS; #if PetscDefined(HAVE_MEMKIND) memkind_free(0, ptr); /* specify the kind to 0 so that memkind will look up for the right type */ #else /* PetscDefined(HAVE_MEMKIND) */ #if (!(PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) && !PetscDefined(HAVE_POSIX_MEMALIGN)) { /* Previous int tells us how many ints the pointer has been shifted from the original address provided by the system malloc(). */ const int shift = *((int *)ptr - 1) - SHIFT_CLASSID; PetscCheck(shift <= PETSC_MEMALIGN - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely memory corruption in heap"); PetscCheck(shift >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely memory corruption in heap"); ptr = (void *)((int *)ptr - shift); } #endif #if PetscDefined(HAVE_FREE_RETURN_INT) int err = free(ptr); PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_PLIB, "System free returned error %d", err); #else free(ptr); #endif #endif return PETSC_SUCCESS; } PETSC_EXTERN PetscErrorCode PetscReallocAlign(size_t mem, int line, const char func[], const char file[], void **result) { if (!mem) { PetscCall(PetscFreeAlign(*result, line, func, file)); *result = NULL; return PETSC_SUCCESS; } #if PetscDefined(HAVE_MEMKIND) *result = memkind_realloc(currentmktype ? MEMKIND_HBW_PREFERRED : MEMKIND_DEFAULT, *result, mem); #else #if (!(PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) && !PetscDefined(HAVE_POSIX_MEMALIGN)) { /* Previous int tells us how many ints the pointer has been shifted from the original address provided by the system malloc(). */ int shift = *(((int *)*result) - 1) - SHIFT_CLASSID; PetscCheck(shift <= PETSC_MEMALIGN - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely memory corruption in heap"); PetscCheck(shift >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely memory corruption in heap"); *result = (void *)(((int *)*result) - shift); } #endif #if (PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) || PetscDefined(HAVE_POSIX_MEMALIGN) *result = realloc(*result, mem); #else { /* malloc space for two extra chunks and shift ptr 1 + enough to get it PetscScalar aligned */ int *ptr = (int *)realloc(*result, mem + 2 * PETSC_MEMALIGN); if (ptr) { int shift = (int)(((PETSC_UINTPTR_T)ptr) % PETSC_MEMALIGN); shift = (2 * PETSC_MEMALIGN - shift) / sizeof(int); ptr[shift - 1] = shift + SHIFT_CLASSID; ptr += shift; *result = (void *)ptr; } else { *result = NULL; } } #endif #endif PetscCheck(*result, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem); #if PetscDefined(HAVE_POSIX_MEMALIGN) /* There are no standard guarantees that realloc() maintains the alignment of memalign(), so I think we have to * realloc and, if the alignment is wrong, malloc/copy/free. */ if (((size_t)*result) % PETSC_MEMALIGN) { void *newResult; #if PetscDefined(HAVE_MEMKIND) { int err = memkind_posix_memalign(currentmktype ? MEMKIND_HBW_PREFERRED : MEMKIND_DEFAULT, &newResult, PETSC_MEMALIGN, mem); PetscCheck(err != EINVAL, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memkind: invalid 3rd or 4th argument of memkind_posix_memalign()"); if (err == ENOMEM) PetscInfo(NULL, "Memkind: fail to request HBW memory %.0f, falling back to normal memory\n", (PetscLogDouble)mem); } PetscCheck(newResult, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem); #else int ret = posix_memalign(&newResult, PETSC_MEMALIGN, mem); PetscCheck(ret == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "posix_memalign() failed with error code %d, memory requested %.0f", ret, (PetscLogDouble)mem); #endif PetscCall(PetscMemcpy(newResult, *result, mem)); #if PetscDefined(HAVE_FREE_RETURN_INT) { int err = free(*result); PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_PLIB, "System free returned error %d", err); } #else #if defined(PETSC_HAVE_MEMKIND) memkind_free(0, *result); #else free(*result); #endif #endif *result = newResult; } #endif return PETSC_SUCCESS; } PetscErrorCode (*PetscTrMalloc)(size_t, PetscBool, int, const char[], const char[], void **) = PetscMallocAlign; PetscErrorCode (*PetscTrFree)(void *, int, const char[], const char[]) = PetscFreeAlign; PetscErrorCode (*PetscTrRealloc)(size_t, int, const char[], const char[], void **) = PetscReallocAlign; PETSC_INTERN PetscBool petscsetmallocvisited; PetscBool petscsetmallocvisited = PETSC_FALSE; /*@C PetscMallocSet - Sets the underlying allocation routines used by `PetscMalloc()` and `PetscFree()` Not Collective, No Fortran Support Input Parameters: + imalloc - the routine that provides the `malloc()` implementation (also provides `calloc()`, which is used depending on the second argument) . ifree - the routine that provides the `free()` implementation - iralloc - the routine that provides the `realloc()` implementation Level: developer Note: This routine MUST be called before `PetscInitialize()` and may be called only once. .seealso: `PetscMallocClear()`, `PetscInitialize()`, `PetscMalloc()`, `PetscFree()` @*/ PetscErrorCode PetscMallocSet(PetscErrorCode (*imalloc)(size_t, PetscBool, int, const char[], const char[], void **), PetscErrorCode (*ifree)(void *, int, const char[], const char[]), PetscErrorCode (*iralloc)(size_t, int, const char[], const char[], void **)) { PetscFunctionBegin; PetscCheck(!petscsetmallocvisited || !(imalloc != PetscTrMalloc || ifree != PetscTrFree), PETSC_COMM_SELF, PETSC_ERR_SUP, "cannot call multiple times"); PetscTrMalloc = imalloc; PetscTrFree = ifree; PetscTrRealloc = iralloc; petscsetmallocvisited = PETSC_TRUE; PetscFunctionReturn(PETSC_SUCCESS); } /*@ PetscMallocClear - Resets the routines used by `PetscMalloc()` and `PetscFree()` Not Collective Level: developer Notes: In general one should never run a PETSc program with different `malloc()` and `free()` settings for different parts; this is because one NEVER wants to `free()` an address that was malloced by a different memory management system Called in `PetscFinalize()` so that if `PetscInitialize()` is called again it starts with a fresh slate of allocation information .seealso: `PetscMallocSet()`, `PetscMalloc()`, `PetscFree()` @*/ PetscErrorCode PetscMallocClear(void) { PetscFunctionBegin; PetscTrMalloc = PetscMallocAlign; PetscTrFree = PetscFreeAlign; PetscTrRealloc = PetscReallocAlign; petscsetmallocvisited = PETSC_FALSE; PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode PetscMemoryTrace(const char label[]) { PetscLogDouble mem, mal; static PetscLogDouble oldmem = 0, oldmal = 0; PetscFunctionBegin; PetscCall(PetscMemoryGetCurrentUsage(&mem)); PetscCall(PetscMallocGetCurrentUsage(&mal)); PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%s High water %8.3f MB increase %8.3f MB Current %8.3f MB increase %8.3f MB\n", label, mem * 1e-6, (mem - oldmem) * 1e-6, mal * 1e-6, (mal - oldmal) * 1e-6)); oldmem = mem; oldmal = mal; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode (*PetscTrMallocOld)(size_t, PetscBool, int, const char[], const char[], void **) = PetscMallocAlign; static PetscErrorCode (*PetscTrReallocOld)(size_t, int, const char[], const char[], void **) = PetscReallocAlign; static PetscErrorCode (*PetscTrFreeOld)(void *, int, const char[], const char[]) = PetscFreeAlign; /*@ PetscMallocSetDRAM - Set `PetscMalloc()` to use DRAM. If memkind is available, change the memkind type. Otherwise, switch the current malloc and free routines to the `PetscMallocAlign()` and `PetscFreeAlign()` (PETSc default). Not Collective Level: developer Note: This provides a way to do the allocation on DRAM temporarily. One can switch back to the previous choice by calling `PetscMallocReset()`. .seealso: `PetscMallocReset()`, `PetscMalloc()`, `PetscFree()` @*/ PetscErrorCode PetscMallocSetDRAM(void) { PetscFunctionBegin; if (PetscTrMalloc == PetscMallocAlign) { #if defined(PETSC_HAVE_MEMKIND) previousmktype = currentmktype; currentmktype = PETSC_MK_DEFAULT; #endif } else { /* Save the previous choice */ PetscTrMallocOld = PetscTrMalloc; PetscTrReallocOld = PetscTrRealloc; PetscTrFreeOld = PetscTrFree; PetscTrMalloc = PetscMallocAlign; PetscTrFree = PetscFreeAlign; PetscTrRealloc = PetscReallocAlign; } PetscFunctionReturn(PETSC_SUCCESS); } /*@ PetscMallocResetDRAM - Reset the changes made by `PetscMallocSetDRAM()` Not Collective Level: developer .seealso: `PetscMallocSetDRAM()` @*/ PetscErrorCode PetscMallocResetDRAM(void) { PetscFunctionBegin; if (PetscTrMalloc == PetscMallocAlign) { #if defined(PETSC_HAVE_MEMKIND) currentmktype = previousmktype; #endif } else { /* Reset to the previous choice */ PetscTrMalloc = PetscTrMallocOld; PetscTrRealloc = PetscTrReallocOld; PetscTrFree = PetscTrFreeOld; } PetscFunctionReturn(PETSC_SUCCESS); } static PetscBool petscmalloccoalesce = PetscDefined(USE_MALLOC_COALESCED) ? PETSC_TRUE : PETSC_FALSE; /*@ PetscMallocSetCoalesce - Use coalesced `PetscMalloc()` when allocating groups of objects, that is when using `PetscMallocN()` Not Collective Input Parameter: . coalesce - `PETSC_TRUE` to use coalesced malloc for multi-memory allocation. Options Database Key: . -malloc_coalesce - turn coalesced `PetscMallocN()` on or off Level: developer Notes: PETSc uses coalesced `PetscMallocN()` by default for optimized builds and not for debugging builds. This default can be changed via the command-line option `-malloc_coalesce` or by calling this function. This function can only be called immediately after `PetscInitialize()` .seealso: `PetscMallocA()`, `PetscMalloc()`, `PetscFree()` @*/ PetscErrorCode PetscMallocSetCoalesce(PetscBool coalesce) { PetscFunctionBegin; petscmalloccoalesce = coalesce; PetscFunctionReturn(PETSC_SUCCESS); } /*@C PetscMallocA - Allocate and optionally clear one or more memory locations, possibly using coalesced malloc Not Collective, No Fortran Support Input Parameters: + n - number of objects to allocate (at least 1) . clear - use `calloc()` to allocate space initialized to zero . lineno - line number to attribute allocation (typically `__LINE__`) . function - function to attribute allocation (typically `PETSC_FUNCTION_NAME`) . filename - file name to attribute allocation (typically `__FILE__`) - bytes0 - first of `n` object sizes Output Parameter: . ptr0 - first of `n` pointers to allocate Level: developer Note: This function is not normally called directly by users, but rather via the macros `PetscMalloc1()`, `PetscMalloc2()`, or `PetscCalloc1()`, etc. .seealso: `PetscMallocAlign()`, `PetscMallocSet()`, `PetscMalloc1()`, `PetscMalloc2()`, `PetscMalloc3()`, `PetscMalloc4()`, `PetscMalloc5()`, `PetscMalloc6()`, `PetscMalloc7()`, `PetscCalloc1()`, `PetscCalloc2()`, `PetscCalloc3()`, `PetscCalloc4()`, `PetscCalloc5()`, `PetscCalloc6()`, `PetscCalloc7()`, `PetscFreeA()` @*/ PetscErrorCode PetscMallocA(int n, PetscBool clear, int lineno, const char *function, const char *filename, size_t bytes0, void *ptr0, ...) { va_list Argp; size_t bytes[8], sumbytes; void **ptr[8]; int i; PetscFunctionBegin; PetscCheck(n <= 8, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Attempt to allocate %d objects but only 8 supported", n); bytes[0] = bytes0; ptr[0] = (void **)ptr0; sumbytes = (bytes0 + PETSC_MEMALIGN - 1) & ~(PETSC_MEMALIGN - 1); va_start(Argp, ptr0); for (i = 1; i < n; i++) { bytes[i] = va_arg(Argp, size_t); ptr[i] = va_arg(Argp, void **); sumbytes += (bytes[i] + PETSC_MEMALIGN - 1) & ~(PETSC_MEMALIGN - 1); } va_end(Argp); if (petscmalloccoalesce) { char *p; PetscCall((*PetscTrMalloc)(sumbytes, clear, lineno, function, filename, (void **)&p)); if (p == NULL) { for (i = 0; i < n; i++) *ptr[i] = NULL; } else { for (i = 0; i < n; i++) { *ptr[i] = bytes[i] ? p : NULL; p = (char *)PetscAddrAlign(p + bytes[i]); } } } else { for (i = 0; i < n; i++) PetscCall((*PetscTrMalloc)(bytes[i], clear, lineno, function, filename, ptr[i])); } PetscFunctionReturn(PETSC_SUCCESS); } /*@C PetscFreeA - Free one or more memory locations, possibly allocated using coalesced `PetscMallocN()` Not Collective, No Fortran Support Input Parameters: + n - number of objects to free (at least 1) . lineno - line number to attribute deallocation (typically `__LINE__`) . function - function to attribute deallocation (typically `PETSC_FUNCTION_NAME`) . filename - file name to attribute deallocation (typically `__FILE__`) - ptr0 - first of `n` pointers to free Level: developer Notes: This function is not normally called directly by users, but rather via the macros `PetscFree()`, `PetscFree2()`, etc. The pointers are zeroed to prevent users from accidentally reusing space that has been freed. If the arguments were obtained via `PetscMallocA()`, `PetscMalloc2()`, `PetscMalloc3()`, etc., then the arguments must be passed in the same order to the corresponding `PetscFreeA()`, `PetscFree2()`, `PetscFree3()`, respectively. .seealso: `PetscMallocAlign()`, `PetscMallocSet()`, `PetscMallocA()`, `PetscFree()`, `PetscFree2()`, `PetscFree3()`, `PetscFree4()`, `PetscFree5()`, `PetscFree6()`, `PetscFree7()` @*/ PetscErrorCode PetscFreeA(int n, int lineno, const char *function, const char *filename, void *ptr0, ...) { va_list Argp; void **ptr[8]; int i; PetscFunctionBegin; PetscCheck((n >= 1) && (n <= 8), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Attempt to allocate %d objects but only up to 8 supported", n); ptr[0] = (void **)ptr0; va_start(Argp, ptr0); for (i = 1; i < n; i++) ptr[i] = va_arg(Argp, void **); va_end(Argp); if (petscmalloccoalesce) { for (i = 0; i < n; i++) { /* Find first nonempty allocation */ if (*ptr[i]) break; } while (--n > i) *ptr[n] = NULL; PetscCall((*PetscTrFree)(*ptr[n], lineno, function, filename)); *ptr[n] = NULL; } else { while (--n >= 0) { PetscCall((*PetscTrFree)(*ptr[n], lineno, function, filename)); *ptr[n] = NULL; } } PetscFunctionReturn(PETSC_SUCCESS); }