1e5c89e4eSSatish Balay /* 2e5c89e4eSSatish Balay Code that allows a user to dictate what malloc() PETSc uses. 3e5c89e4eSSatish Balay */ 4d7976ebaSJed Brown #define PETSC_DESIRE_FEATURE_TEST_MACROS /* for posix_memalign() */ 5c6db04a5SJed Brown #include <petscsys.h> /*I "petscsys.h" I*/ 6ba282f50SJed Brown #include <stdarg.h> 7e5c89e4eSSatish Balay #if defined(PETSC_HAVE_MALLOC_H) 8e5c89e4eSSatish Balay #include <malloc.h> 9e5c89e4eSSatish Balay #endif 10de1d6c17SHong Zhang #if defined(PETSC_HAVE_MEMKIND) 11ca8c994eSHong Zhang #include <errno.h> 12de1d6c17SHong Zhang #include <memkind.h> 139371c9d4SSatish Balay typedef enum { 149371c9d4SSatish Balay PETSC_MK_DEFAULT = 0, 159371c9d4SSatish Balay PETSC_MK_HBW_PREFERRED = 1 169371c9d4SSatish Balay } PetscMemkindType; 17e3acc61dSHong Zhang PetscMemkindType currentmktype = PETSC_MK_HBW_PREFERRED; 18e3acc61dSHong Zhang PetscMemkindType previousmktype = PETSC_MK_HBW_PREFERRED; 19de1d6c17SHong Zhang #endif 20e5c89e4eSSatish Balay /* 21e5c89e4eSSatish Balay We want to make sure that all mallocs of double or complex numbers are complex aligned. 22e5c89e4eSSatish Balay 1) on systems with memalign() we call that routine to get an aligned memory location 23e5c89e4eSSatish Balay 2) on systems without memalign() we 24e5c89e4eSSatish Balay - allocate one sizeof(PetscScalar) extra space 25e5c89e4eSSatish Balay - we shift the pointer up slightly if needed to get PetscScalar aligned 260700a824SBarry Smith - if shifted we store at ptr[-1] the amount of shift (plus a classid) 27e5c89e4eSSatish Balay */ 280700a824SBarry Smith #define SHIFT_CLASSID 456123 29e5c89e4eSSatish Balay 30d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PetscMallocAlign(size_t mem, PetscBool clear, int line, const char func[], const char file[], void **result) 31d71ae5a4SJacob Faibussowitsch { 329371c9d4SSatish Balay if (!mem) { 339371c9d4SSatish Balay *result = NULL; 343ba16761SJacob Faibussowitsch return PETSC_SUCCESS; 359371c9d4SSatish Balay } 367f18b027SJacob Faibussowitsch #if PetscDefined(HAVE_MEMKIND) 37fc2a7144SHong Zhang { 383ba16761SJacob Faibussowitsch int err = memkind_posix_memalign(currentmktype ? MEMKIND_HBW_PREFERRED : MEMKIND_DEFAULT, result, PETSC_MEMALIGN, mem); 397f18b027SJacob Faibussowitsch PetscCheck(err != EINVAL, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memkind: invalid 3rd or 4th argument of memkind_posix_memalign()"); 407f18b027SJacob Faibussowitsch if (err == ENOMEM) PetscInfo(NULL, "Memkind: fail to request HBW memory %.0f, falling back to normal memory\n", (PetscLogDouble)mem); 413ba16761SJacob Faibussowitsch PetscCheck(*result, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem); 429566063dSJacob Faibussowitsch if (clear) PetscCall(PetscMemzero(*result, mem)); 43fc2a7144SHong Zhang } 447f18b027SJacob Faibussowitsch #else /* PetscDefined(HAVE_MEMKIND) */ 457f18b027SJacob Faibussowitsch #if PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8) 467f18b027SJacob Faibussowitsch if (clear) *result = calloc(1 + mem / sizeof(int), sizeof(int)); 477f18b027SJacob Faibussowitsch else *result = malloc(mem); 487f18b027SJacob Faibussowitsch 493ba16761SJacob Faibussowitsch PetscCheck(*result, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem); 509566063dSJacob Faibussowitsch if (PetscLogMemory) PetscCall(PetscMemzero(*result, mem)); 51d7976ebaSJed Brown #elif PetscDefined(HAVE_POSIX_MEMALIGN) 52d7976ebaSJed Brown int ret = posix_memalign(result, PETSC_MEMALIGN, mem); 533ba16761SJacob Faibussowitsch PetscCheck(ret == 0, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem); 547f18b027SJacob Faibussowitsch if (clear || PetscLogMemory) PetscCall(PetscMemzero(*result, mem)); 55d7976ebaSJed Brown #else /* PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) || PetscDefined(HAVE_POSIX_MEMALIGN) */ 56e5c89e4eSSatish Balay { 5766c772faSBarry Smith int *ptr, shift; 58e5c89e4eSSatish Balay /* 59e5c89e4eSSatish Balay malloc space for two extra chunks and shift ptr 1 + enough to get it PetscScalar aligned 60e5c89e4eSSatish Balay */ 61071fcb05SBarry Smith if (clear) { 62071fcb05SBarry Smith ptr = (int *)calloc(1 + (mem + 2 * PETSC_MEMALIGN) / sizeof(int), sizeof(int)); 63071fcb05SBarry Smith } else { 64071fcb05SBarry Smith ptr = (int *)malloc(mem + 2 * PETSC_MEMALIGN); 65071fcb05SBarry Smith } 663ba16761SJacob Faibussowitsch PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem); 6766c772faSBarry Smith shift = (int)(((PETSC_UINTPTR_T)ptr) % PETSC_MEMALIGN); 68e5c89e4eSSatish Balay shift = (2 * PETSC_MEMALIGN - shift) / sizeof(int); 690700a824SBarry Smith ptr[shift - 1] = shift + SHIFT_CLASSID; 70e5c89e4eSSatish Balay ptr += shift; 71e5c89e4eSSatish Balay *result = (void *)ptr; 729566063dSJacob Faibussowitsch if (PetscLogMemory) PetscCall(PetscMemzero(*result, mem)); 73e5c89e4eSSatish Balay } 74d7976ebaSJed Brown #endif /* PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) || PetscDefined(HAVE_POSIX_MEMALIGN) */ 757f18b027SJacob Faibussowitsch #endif /* PetscDefined(HAVE_MEMKIND) */ 763ba16761SJacob Faibussowitsch return PETSC_SUCCESS; 77e5c89e4eSSatish Balay } 78e5c89e4eSSatish Balay 79d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PetscFreeAlign(void *ptr, int line, const char func[], const char file[]) 80d71ae5a4SJacob Faibussowitsch { 813ba16761SJacob Faibussowitsch if (!ptr) return PETSC_SUCCESS; 827f18b027SJacob Faibussowitsch #if PetscDefined(HAVE_MEMKIND) 83fc2a7144SHong Zhang memkind_free(0, ptr); /* specify the kind to 0 so that memkind will look up for the right type */ 847f18b027SJacob Faibussowitsch #else /* PetscDefined(HAVE_MEMKIND) */ 85d7976ebaSJed Brown #if (!(PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) && !PetscDefined(HAVE_POSIX_MEMALIGN)) 86f0ba7cfcSLisandro Dalcin { 87e5c89e4eSSatish Balay /* 88e5c89e4eSSatish Balay Previous int tells us how many ints the pointer has been shifted from 89e5c89e4eSSatish Balay the original address provided by the system malloc(). 90e5c89e4eSSatish Balay */ 917f18b027SJacob Faibussowitsch const int shift = *(((int *)ptr) - 1) - SHIFT_CLASSID; 927f18b027SJacob Faibussowitsch 933ba16761SJacob Faibussowitsch PetscCheck(shift <= PETSC_MEMALIGN - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely memory corruption in heap"); 943ba16761SJacob Faibussowitsch PetscCheck(shift >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely memory corruption in heap"); 95e5c89e4eSSatish Balay ptr = (void *)(((int *)ptr) - shift); 96e5c89e4eSSatish Balay } 97f0ba7cfcSLisandro Dalcin #endif 98e5c89e4eSSatish Balay 997f18b027SJacob Faibussowitsch #if PetscDefined(HAVE_FREE_RETURN_INT) 100e5c89e4eSSatish Balay int err = free(ptr); 1013ba16761SJacob Faibussowitsch PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_PLIB, "System free returned error %d\n", err); 102e5c89e4eSSatish Balay #else 103e5c89e4eSSatish Balay free(ptr); 104e5c89e4eSSatish Balay #endif 105fc2a7144SHong Zhang #endif 1063ba16761SJacob Faibussowitsch return PETSC_SUCCESS; 107e5c89e4eSSatish Balay } 108e5c89e4eSSatish Balay 109d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PetscReallocAlign(size_t mem, int line, const char func[], const char file[], void **result) 110d71ae5a4SJacob Faibussowitsch { 111c22f1541SToby Isaac if (!mem) { 1127f18b027SJacob Faibussowitsch PetscCall(PetscFreeAlign(*result, line, func, file)); 113c22f1541SToby Isaac *result = NULL; 1143ba16761SJacob Faibussowitsch return PETSC_SUCCESS; 115c22f1541SToby Isaac } 1167f18b027SJacob Faibussowitsch #if PetscDefined(HAVE_MEMKIND) 1177f18b027SJacob Faibussowitsch *result = memkind_realloc(currentmktype ? MEMKIND_HBW_PREFERRED : MEMKIND_DEFAULT, *result, mem); 118fc2a7144SHong Zhang #else 119d7976ebaSJed Brown #if (!(PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) && !PetscDefined(HAVE_POSIX_MEMALIGN)) 1203221ece2SMatthew G. Knepley { 1213221ece2SMatthew G. Knepley /* 1223221ece2SMatthew G. Knepley Previous int tells us how many ints the pointer has been shifted from 1233221ece2SMatthew G. Knepley the original address provided by the system malloc(). 1243221ece2SMatthew G. Knepley */ 1253221ece2SMatthew G. Knepley int shift = *(((int *)*result) - 1) - SHIFT_CLASSID; 1263ba16761SJacob Faibussowitsch PetscCheck(shift <= PETSC_MEMALIGN - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely memory corruption in heap"); 1273ba16761SJacob Faibussowitsch PetscCheck(shift >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely memory corruption in heap"); 1283221ece2SMatthew G. Knepley *result = (void *)(((int *)*result) - shift); 1293221ece2SMatthew G. Knepley } 1303221ece2SMatthew G. Knepley #endif 1313221ece2SMatthew G. Knepley 132d7976ebaSJed Brown #if (PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) || PetscDefined(HAVE_POSIX_MEMALIGN) 13341605b92SBarry Smith *result = realloc(*result, mem); 1343221ece2SMatthew G. Knepley #else 1353221ece2SMatthew G. Knepley { 1363221ece2SMatthew G. Knepley /* 1373221ece2SMatthew G. Knepley malloc space for two extra chunks and shift ptr 1 + enough to get it PetscScalar aligned 1383221ece2SMatthew G. Knepley */ 1393221ece2SMatthew G. Knepley int *ptr = (int *)realloc(*result, mem + 2 * PETSC_MEMALIGN); 1403221ece2SMatthew G. Knepley if (ptr) { 1413221ece2SMatthew G. Knepley int shift = (int)(((PETSC_UINTPTR_T)ptr) % PETSC_MEMALIGN); 1423221ece2SMatthew G. Knepley shift = (2 * PETSC_MEMALIGN - shift) / sizeof(int); 1433221ece2SMatthew G. Knepley ptr[shift - 1] = shift + SHIFT_CLASSID; 1443221ece2SMatthew G. Knepley ptr += shift; 1453221ece2SMatthew G. Knepley *result = (void *)ptr; 1463221ece2SMatthew G. Knepley } else { 1473221ece2SMatthew G. Knepley *result = NULL; 1483221ece2SMatthew G. Knepley } 1493221ece2SMatthew G. Knepley } 1503221ece2SMatthew G. Knepley #endif 151fc2a7144SHong Zhang #endif 1523ba16761SJacob Faibussowitsch PetscCheck(*result, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem); 153d7976ebaSJed Brown #if PetscDefined(HAVE_POSIX_MEMALIGN) 154c22f1541SToby Isaac /* There are no standard guarantees that realloc() maintains the alignment of memalign(), so I think we have to 155c22f1541SToby Isaac * realloc and, if the alignment is wrong, malloc/copy/free. */ 156*f4f49eeaSPierre Jolivet if (((size_t)*result) % PETSC_MEMALIGN) { 157c22f1541SToby Isaac void *newResult; 1587f18b027SJacob Faibussowitsch #if PetscDefined(HAVE_MEMKIND) 159fc2a7144SHong Zhang { 1603ba16761SJacob Faibussowitsch int err = memkind_posix_memalign(currentmktype ? MEMKIND_HBW_PREFERRED : MEMKIND_DEFAULT, &newResult, PETSC_MEMALIGN, mem); 1617f18b027SJacob Faibussowitsch PetscCheck(err != EINVAL, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memkind: invalid 3rd or 4th argument of memkind_posix_memalign()"); 1627f18b027SJacob Faibussowitsch if (err == ENOMEM) PetscInfo(NULL, "Memkind: fail to request HBW memory %.0f, falling back to normal memory\n", (PetscLogDouble)mem); 163fc2a7144SHong Zhang } 1643ba16761SJacob Faibussowitsch PetscCheck(newResult, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem); 165d7976ebaSJed Brown #else 1663ba16761SJacob Faibussowitsch int ret = posix_memalign(&newResult, PETSC_MEMALIGN, mem); 1673ba16761SJacob Faibussowitsch PetscCheck(ret == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "posix_memalign() failed with error code %d, memory requested %.0f", ret, (PetscLogDouble)mem); 168d7976ebaSJed Brown #endif 1697f18b027SJacob Faibussowitsch PetscCall(PetscMemcpy(newResult, *result, mem)); 1707f18b027SJacob Faibussowitsch #if PetscDefined(HAVE_FREE_RETURN_INT) 171c22f1541SToby Isaac { 172c22f1541SToby Isaac int err = free(*result); 1733ba16761SJacob Faibussowitsch PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_PLIB, "System free returned error %d", err); 174c22f1541SToby Isaac } 175c22f1541SToby Isaac #else 176de1d6c17SHong Zhang #if defined(PETSC_HAVE_MEMKIND) 177de1d6c17SHong Zhang memkind_free(0, *result); 178de1d6c17SHong Zhang #else 179c22f1541SToby Isaac free(*result); 180c22f1541SToby Isaac #endif 181de1d6c17SHong Zhang #endif 182c22f1541SToby Isaac *result = newResult; 183c22f1541SToby Isaac } 184c22f1541SToby Isaac #endif 1853ba16761SJacob Faibussowitsch return PETSC_SUCCESS; 1863221ece2SMatthew G. Knepley } 1873221ece2SMatthew G. Knepley 188071fcb05SBarry Smith PetscErrorCode (*PetscTrMalloc)(size_t, PetscBool, int, const char[], const char[], void **) = PetscMallocAlign; 189efca3c55SSatish Balay PetscErrorCode (*PetscTrFree)(void *, int, const char[], const char[]) = PetscFreeAlign; 1903221ece2SMatthew G. Knepley PetscErrorCode (*PetscTrRealloc)(size_t, int, const char[], const char[], void **) = PetscReallocAlign; 191e5c89e4eSSatish Balay 19295c0884eSLisandro Dalcin PETSC_INTERN PetscBool petscsetmallocvisited; 193ace3abfcSBarry Smith PetscBool petscsetmallocvisited = PETSC_FALSE; 194e5c89e4eSSatish Balay 195e5c89e4eSSatish Balay /*@C 1960ed210f4SBarry Smith PetscMallocSet - Sets the underlying allocation routines used by `PetscMalloc()` and `PetscFree()` 197e5c89e4eSSatish Balay 198e5c89e4eSSatish Balay Not Collective 199e5c89e4eSSatish Balay 200e5c89e4eSSatish Balay Input Parameters: 2010ed210f4SBarry Smith + imalloc - the routine that provides the `malloc()` implementation (also provides `calloc()`, which is used depending on the second argument) 2020ed210f4SBarry Smith . ifree - the routine that provides the `free()` implementation 2030ed210f4SBarry Smith - iralloc - the routine that provides the `realloc()` implementation 204e5c89e4eSSatish Balay 205e5c89e4eSSatish Balay Level: developer 206e5c89e4eSSatish Balay 2070ed210f4SBarry Smith Note: 2080ed210f4SBarry Smith This routine MUST be called before `PetscInitialize()` and may be 2090ed210f4SBarry Smith called only once. 2100ed210f4SBarry Smith 2110ed210f4SBarry Smith .seealso: `PetscMallocClear()`, `PetscInitialize()`, `PetscMalloc()`, `PetscFree()` 212e5c89e4eSSatish Balay @*/ 213d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscMallocSet(PetscErrorCode (*imalloc)(size_t, PetscBool, int, const char[], const char[], void **), PetscErrorCode (*ifree)(void *, int, const char[], const char[]), PetscErrorCode (*iralloc)(size_t, int, const char[], const char[], void **)) 214d71ae5a4SJacob Faibussowitsch { 215e5c89e4eSSatish Balay PetscFunctionBegin; 21608401ef6SPierre Jolivet PetscCheck(!petscsetmallocvisited || !(imalloc != PetscTrMalloc || ifree != PetscTrFree), PETSC_COMM_SELF, PETSC_ERR_SUP, "cannot call multiple times"); 217e5c89e4eSSatish Balay PetscTrMalloc = imalloc; 218e5c89e4eSSatish Balay PetscTrFree = ifree; 21992f119d6SBarry Smith PetscTrRealloc = iralloc; 220e5c89e4eSSatish Balay petscsetmallocvisited = PETSC_TRUE; 2213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 222e5c89e4eSSatish Balay } 223e5c89e4eSSatish Balay 224e5c89e4eSSatish Balay /*@C 2250ed210f4SBarry Smith PetscMallocClear - Resets the routines used by `PetscMalloc()` and `PetscFree()` 226e5c89e4eSSatish Balay 227e5c89e4eSSatish Balay Not Collective 228e5c89e4eSSatish Balay 229e5c89e4eSSatish Balay Level: developer 230e5c89e4eSSatish Balay 2310ed210f4SBarry Smith Notes: 2320ed210f4SBarry Smith In general one should never run a PETSc program with different `malloc()` and 2330ed210f4SBarry Smith `free()` settings for different parts; this is because one NEVER wants to 2340ed210f4SBarry Smith `free()` an address that was malloced by a different memory management system 235e5c89e4eSSatish Balay 236811af0c4SBarry Smith Called in `PetscFinalize()` so that if `PetscInitialize()` is called again it starts with a fresh slate of allocation information 23792f119d6SBarry Smith 2380ed210f4SBarry Smith .seealso: `PetscMallocSet()`, `PetscMalloc()`, `PetscFree()` 239e5c89e4eSSatish Balay @*/ 240d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscMallocClear(void) 241d71ae5a4SJacob Faibussowitsch { 242e5c89e4eSSatish Balay PetscFunctionBegin; 243e5c89e4eSSatish Balay PetscTrMalloc = PetscMallocAlign; 244e5c89e4eSSatish Balay PetscTrFree = PetscFreeAlign; 24592f119d6SBarry Smith PetscTrRealloc = PetscReallocAlign; 246e5c89e4eSSatish Balay petscsetmallocvisited = PETSC_FALSE; 2473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 248e5c89e4eSSatish Balay } 249b44d5720SBarry Smith 250d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscMemoryTrace(const char label[]) 251d71ae5a4SJacob Faibussowitsch { 252b44d5720SBarry Smith PetscLogDouble mem, mal; 253b44d5720SBarry Smith static PetscLogDouble oldmem = 0, oldmal = 0; 254b44d5720SBarry Smith 255b44d5720SBarry Smith PetscFunctionBegin; 2569566063dSJacob Faibussowitsch PetscCall(PetscMemoryGetCurrentUsage(&mem)); 2579566063dSJacob Faibussowitsch PetscCall(PetscMallocGetCurrentUsage(&mal)); 258b44d5720SBarry Smith 2599566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%s High water %8.3f MB increase %8.3f MB Current %8.3f MB increase %8.3f MB\n", label, mem * 1e-6, (mem - oldmem) * 1e-6, mal * 1e-6, (mal - oldmal) * 1e-6)); 260b44d5720SBarry Smith oldmem = mem; 261b44d5720SBarry Smith oldmal = mal; 2623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 263b44d5720SBarry Smith } 26413850c04SHong Zhang 265071fcb05SBarry Smith static PetscErrorCode (*PetscTrMallocOld)(size_t, PetscBool, int, const char[], const char[], void **) = PetscMallocAlign; 26692f119d6SBarry Smith static PetscErrorCode (*PetscTrReallocOld)(size_t, int, const char[], const char[], void **) = PetscReallocAlign; 26750a41461SHong Zhang static PetscErrorCode (*PetscTrFreeOld)(void *, int, const char[], const char[]) = PetscFreeAlign; 268de1d6c17SHong Zhang 269de1d6c17SHong Zhang /*@C 270811af0c4SBarry Smith PetscMallocSetDRAM - Set `PetscMalloc()` to use DRAM. 271de1d6c17SHong Zhang If memkind is available, change the memkind type. Otherwise, switch the 272811af0c4SBarry Smith current malloc and free routines to the `PetscMallocAlign()` and 273811af0c4SBarry Smith `PetscFreeAlign()` (PETSc default). 274de1d6c17SHong Zhang 275de1d6c17SHong Zhang Not Collective 276de1d6c17SHong Zhang 277de1d6c17SHong Zhang Level: developer 278de1d6c17SHong Zhang 279811af0c4SBarry Smith Note: 280de1d6c17SHong Zhang This provides a way to do the allocation on DRAM temporarily. One 281811af0c4SBarry Smith can switch back to the previous choice by calling `PetscMallocReset()`. 282de1d6c17SHong Zhang 2830ed210f4SBarry Smith .seealso: `PetscMallocReset()`, `PetscMalloc()`, `PetscFree()` 284de1d6c17SHong Zhang @*/ 285d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscMallocSetDRAM(void) 286d71ae5a4SJacob Faibussowitsch { 28713850c04SHong Zhang PetscFunctionBegin; 288de1d6c17SHong Zhang if (PetscTrMalloc == PetscMallocAlign) { 289de1d6c17SHong Zhang #if defined(PETSC_HAVE_MEMKIND) 290de1d6c17SHong Zhang previousmktype = currentmktype; 291de1d6c17SHong Zhang currentmktype = PETSC_MK_DEFAULT; 292de1d6c17SHong Zhang #endif 293de1d6c17SHong Zhang } else { 29413850c04SHong Zhang /* Save the previous choice */ 29513850c04SHong Zhang PetscTrMallocOld = PetscTrMalloc; 29692f119d6SBarry Smith PetscTrReallocOld = PetscTrRealloc; 29713850c04SHong Zhang PetscTrFreeOld = PetscTrFree; 29813850c04SHong Zhang PetscTrMalloc = PetscMallocAlign; 29913850c04SHong Zhang PetscTrFree = PetscFreeAlign; 30092f119d6SBarry Smith PetscTrRealloc = PetscReallocAlign; 301de1d6c17SHong Zhang } 3023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 30313850c04SHong Zhang } 30413850c04SHong Zhang 305de1d6c17SHong Zhang /*@C 306811af0c4SBarry Smith PetscMallocResetDRAM - Reset the changes made by `PetscMallocSetDRAM()` 307de1d6c17SHong Zhang 308de1d6c17SHong Zhang Not Collective 309de1d6c17SHong Zhang 310de1d6c17SHong Zhang Level: developer 311de1d6c17SHong Zhang 312db781477SPatrick Sanan .seealso: `PetscMallocSetDRAM()` 313de1d6c17SHong Zhang @*/ 314d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscMallocResetDRAM(void) 315d71ae5a4SJacob Faibussowitsch { 31613850c04SHong Zhang PetscFunctionBegin; 317de1d6c17SHong Zhang if (PetscTrMalloc == PetscMallocAlign) { 318de1d6c17SHong Zhang #if defined(PETSC_HAVE_MEMKIND) 319de1d6c17SHong Zhang currentmktype = previousmktype; 320de1d6c17SHong Zhang #endif 321de1d6c17SHong Zhang } else { 32213850c04SHong Zhang /* Reset to the previous choice */ 32313850c04SHong Zhang PetscTrMalloc = PetscTrMallocOld; 32492f119d6SBarry Smith PetscTrRealloc = PetscTrReallocOld; 32513850c04SHong Zhang PetscTrFree = PetscTrFreeOld; 326de1d6c17SHong Zhang } 3273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 32813850c04SHong Zhang } 329ba282f50SJed Brown 330ba282f50SJed Brown static PetscBool petscmalloccoalesce = 331ba282f50SJed Brown #if defined(PETSC_USE_MALLOC_COALESCED) 332ba282f50SJed Brown PETSC_TRUE; 333ba282f50SJed Brown #else 334ba282f50SJed Brown PETSC_FALSE; 335ba282f50SJed Brown #endif 336ba282f50SJed Brown 337ba282f50SJed Brown /*@C 3380ed210f4SBarry Smith PetscMallocSetCoalesce - Use coalesced `PetscMalloc()` when allocating groups of objects, that is when using `PetscMallocN()` 339ba282f50SJed Brown 340ba282f50SJed Brown Not Collective 341ba282f50SJed Brown 3422fe279fdSBarry Smith Input Parameter: 3430ed210f4SBarry Smith . coalesce - `PETSC_TRUE` to use coalesced malloc for multi-memory allocation. 344ba282f50SJed Brown 3450ed210f4SBarry Smith Options Database Key: 3462d853995SBarry Smith . -malloc_coalesce - turn coalesced `PetscMallocN()` on or off 347ba282f50SJed Brown 3480ed210f4SBarry Smith Level: developer 3490ed210f4SBarry Smith 350811af0c4SBarry Smith Notes: 3512d853995SBarry Smith PETSc uses coalesced `PetscMallocN()` by default for optimized builds and not for debugging builds. 352811af0c4SBarry Smith 3530ed210f4SBarry Smith This default can be changed via the command-line option `-malloc_coalesce` or by calling this function. 354811af0c4SBarry Smith 355811af0c4SBarry Smith This function can only be called immediately after `PetscInitialize()` 356ba282f50SJed Brown 3570ed210f4SBarry Smith .seealso: `PetscMallocA()`, `PetscMalloc()`, `PetscFree()` 358ba282f50SJed Brown @*/ 359d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscMallocSetCoalesce(PetscBool coalesce) 360d71ae5a4SJacob Faibussowitsch { 361ba282f50SJed Brown PetscFunctionBegin; 362ba282f50SJed Brown petscmalloccoalesce = coalesce; 3633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 364ba282f50SJed Brown } 365ba282f50SJed Brown 366ba282f50SJed Brown /*@C 3670ed210f4SBarry Smith PetscMallocA - Allocate and optionally clear one or more memory locations, possibly using coalesced malloc 368ba282f50SJed Brown 369ba282f50SJed Brown Not Collective 370ba282f50SJed Brown 371ba282f50SJed Brown Input Parameters: 372ba282f50SJed Brown + n - number of objects to allocate (at least 1) 3730ed210f4SBarry Smith . clear - use `calloc()` to allocate space initialized to zero 3740ed210f4SBarry Smith . lineno - line number to attribute allocation (typically `__LINE__`) 3750ed210f4SBarry Smith . function - function to attribute allocation (typically `PETSC_FUNCTION_NAME`) 3760ed210f4SBarry Smith . filename - file name to attribute allocation (typically `__FILE__`) 3770ed210f4SBarry Smith - bytes0 - first of `n` object sizes 378ba282f50SJed Brown 3792fe279fdSBarry Smith Output Parameter: 3800ed210f4SBarry Smith . ptr0 - first of `n` pointers to allocate 381ba282f50SJed Brown 382ba282f50SJed Brown Level: developer 383ba282f50SJed Brown 3840ed210f4SBarry Smith Note: 3850ed210f4SBarry Smith This function is not normally called directly by users, but rather via the macros `PetscMalloc1()`, `PetscMalloc2()`, or `PetscCalloc1()`, etc. 3860ed210f4SBarry Smith 3870ed210f4SBarry Smith .seealso: `PetscMallocAlign()`, `PetscMallocSet()`, `PetscMalloc1()`, `PetscMalloc2()`, `PetscMalloc3()`, `PetscMalloc4()`, `PetscMalloc5()`, `PetscMalloc6()`, `PetscMalloc7()`, 3880ed210f4SBarry Smith `PetscCalloc1()`, `PetscCalloc2()`, `PetscCalloc3()`, `PetscCalloc4()`, `PetscCalloc5()`, `PetscCalloc6()`, `PetscCalloc7()`, `PetscFreeA()` 389ba282f50SJed Brown @*/ 390d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscMallocA(int n, PetscBool clear, int lineno, const char *function, const char *filename, size_t bytes0, void *ptr0, ...) 391d71ae5a4SJacob Faibussowitsch { 392ba282f50SJed Brown va_list Argp; 393ba282f50SJed Brown size_t bytes[8], sumbytes; 394ba282f50SJed Brown void **ptr[8]; 395ba282f50SJed Brown int i; 396ba282f50SJed Brown 397ba282f50SJed Brown PetscFunctionBegin; 3987f18b027SJacob Faibussowitsch PetscCheck(n <= 8, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Attempt to allocate %d objects but only 8 supported", n); 399ba282f50SJed Brown bytes[0] = bytes0; 400ba282f50SJed Brown ptr[0] = (void **)ptr0; 401ba282f50SJed Brown sumbytes = (bytes0 + PETSC_MEMALIGN - 1) & ~(PETSC_MEMALIGN - 1); 402ba282f50SJed Brown va_start(Argp, ptr0); 403ba282f50SJed Brown for (i = 1; i < n; i++) { 404ba282f50SJed Brown bytes[i] = va_arg(Argp, size_t); 405ba282f50SJed Brown ptr[i] = va_arg(Argp, void **); 406ba282f50SJed Brown sumbytes += (bytes[i] + PETSC_MEMALIGN - 1) & ~(PETSC_MEMALIGN - 1); 407ba282f50SJed Brown } 408ba282f50SJed Brown va_end(Argp); 409ba282f50SJed Brown if (petscmalloccoalesce) { 410ba282f50SJed Brown char *p; 4119566063dSJacob Faibussowitsch PetscCall((*PetscTrMalloc)(sumbytes, clear, lineno, function, filename, (void **)&p)); 412640c8569SMatthew Woehlke if (p == NULL) { 413ad540459SPierre Jolivet for (i = 0; i < n; i++) *ptr[i] = NULL; 414640c8569SMatthew Woehlke } else { 415ba282f50SJed Brown for (i = 0; i < n; i++) { 416ba282f50SJed Brown *ptr[i] = bytes[i] ? p : NULL; 417ba282f50SJed Brown p = (char *)PetscAddrAlign(p + bytes[i]); 418ba282f50SJed Brown } 419640c8569SMatthew Woehlke } 420ba282f50SJed Brown } else { 42148a46eb9SPierre Jolivet for (i = 0; i < n; i++) PetscCall((*PetscTrMalloc)(bytes[i], clear, lineno, function, filename, (void **)ptr[i])); 422ba282f50SJed Brown } 4233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 424ba282f50SJed Brown } 425ba282f50SJed Brown 426ba282f50SJed Brown /*@C 4272d853995SBarry Smith PetscFreeA - Free one or more memory locations, possibly allocated using coalesced `PetscMallocN()` 428ba282f50SJed Brown 429ba282f50SJed Brown Not Collective 430ba282f50SJed Brown 431ba282f50SJed Brown Input Parameters: 432ba282f50SJed Brown + n - number of objects to free (at least 1) 4330ed210f4SBarry Smith . lineno - line number to attribute deallocation (typically `__LINE__`) 4340ed210f4SBarry Smith . function - function to attribute deallocation (typically `PETSC_FUNCTION_NAME`) 4350ed210f4SBarry Smith . filename - file name to attribute deallocation (typically `__FILE__`) 4360ed210f4SBarry Smith - ptr0 - first of `n` pointers to free 4370ed210f4SBarry Smith 4380ed210f4SBarry Smith Level: developer 439ba282f50SJed Brown 440811af0c4SBarry Smith Notes: 441811af0c4SBarry Smith This function is not normally called directly by users, but rather via the macros `PetscFree()`, `PetscFree2()`, etc. 442ba282f50SJed Brown 443a5b23f4aSJose E. Roman The pointers are zeroed to prevent users from accidentally reusing space that has been freed. 44489407d75SBarry Smith 4457e17494fSJames Wright If the arguments were obtained via `PetscMallocA()`, `PetscMalloc2()`, `PetscMalloc3()`, etc., then the arguments must be passed in the same order to the corresponding `PetscFreeA()`, `PetscFree2()`, `PetscFree3()`, respectively. 4467e17494fSJames Wright 447db781477SPatrick Sanan .seealso: `PetscMallocAlign()`, `PetscMallocSet()`, `PetscMallocA()`, `PetscFree1()`, `PetscFree2()`, `PetscFree3()`, `PetscFree4()`, `PetscFree5()`, `PetscFree6()`, `PetscFree7()` 448ba282f50SJed Brown @*/ 449d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscFreeA(int n, int lineno, const char *function, const char *filename, void *ptr0, ...) 450d71ae5a4SJacob Faibussowitsch { 451ba282f50SJed Brown va_list Argp; 452ba282f50SJed Brown void **ptr[8]; 453ba282f50SJed Brown int i; 454ba282f50SJed Brown 455ba282f50SJed Brown PetscFunctionBegin; 456eae3dc7dSJacob Faibussowitsch PetscCheck((n >= 1) && (n <= 8), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Attempt to allocate %d objects but only up to 8 supported", n); 457ba282f50SJed Brown ptr[0] = (void **)ptr0; 458ba282f50SJed Brown va_start(Argp, ptr0); 459ad540459SPierre Jolivet for (i = 1; i < n; i++) ptr[i] = va_arg(Argp, void **); 460ba282f50SJed Brown va_end(Argp); 461ba282f50SJed Brown if (petscmalloccoalesce) { 462ba282f50SJed Brown for (i = 0; i < n; i++) { /* Find first nonempty allocation */ 463ba282f50SJed Brown if (*ptr[i]) break; 464ba282f50SJed Brown } 465ad540459SPierre Jolivet while (--n > i) *ptr[n] = NULL; 4669566063dSJacob Faibussowitsch PetscCall((*PetscTrFree)(*ptr[n], lineno, function, filename)); 467ba282f50SJed Brown *ptr[n] = NULL; 468ba282f50SJed Brown } else { 469ba282f50SJed Brown while (--n >= 0) { 4709566063dSJacob Faibussowitsch PetscCall((*PetscTrFree)(*ptr[n], lineno, function, filename)); 471ba282f50SJed Brown *ptr[n] = NULL; 472ba282f50SJed Brown } 473ba282f50SJed Brown } 4743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 475ba282f50SJed Brown } 476