1 /* 2 Code that allows a user to dictate what malloc() PETSc uses. 3 */ 4 #define PETSC_DESIRE_FEATURE_TEST_MACROS /* for posix_memalign() */ 5 #include <petscsys.h> /*I "petscsys.h" I*/ 6 #include <stdarg.h> 7 #if defined(PETSC_HAVE_MALLOC_H) 8 #include <malloc.h> 9 #endif 10 #if defined(PETSC_HAVE_MEMKIND) 11 #include <errno.h> 12 #include <memkind.h> 13 typedef enum { 14 PETSC_MK_DEFAULT = 0, 15 PETSC_MK_HBW_PREFERRED = 1 16 } PetscMemkindType; 17 PetscMemkindType currentmktype = PETSC_MK_HBW_PREFERRED; 18 PetscMemkindType previousmktype = PETSC_MK_HBW_PREFERRED; 19 #endif 20 /* 21 We want to make sure that all mallocs of double or complex numbers are complex aligned. 22 1) on systems with memalign() we call that routine to get an aligned memory location 23 2) on systems without memalign() we 24 - allocate one sizeof(PetscScalar) extra space 25 - we shift the pointer up slightly if needed to get PetscScalar aligned 26 - if shifted we store at ptr[-1] the amount of shift (plus a classid) 27 */ 28 #define SHIFT_CLASSID 456123 29 30 PETSC_EXTERN PetscErrorCode PetscMallocAlign(size_t mem, PetscBool clear, int line, const char func[], const char file[], void **result) 31 { 32 if (!mem) { 33 *result = NULL; 34 return PETSC_SUCCESS; 35 } 36 #if PetscDefined(HAVE_MEMKIND) 37 { 38 int err = memkind_posix_memalign(currentmktype ? MEMKIND_HBW_PREFERRED : MEMKIND_DEFAULT, result, PETSC_MEMALIGN, mem); 39 PetscCheck(err != EINVAL, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memkind: invalid 3rd or 4th argument of memkind_posix_memalign()"); 40 if (err == ENOMEM) PetscInfo(NULL, "Memkind: fail to request HBW memory %.0f, falling back to normal memory\n", (PetscLogDouble)mem); 41 PetscCheck(*result, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem); 42 if (clear) PetscCall(PetscMemzero(*result, mem)); 43 } 44 #else /* PetscDefined(HAVE_MEMKIND) */ 45 #if PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8) 46 if (clear) *result = calloc(1 + mem / sizeof(int), sizeof(int)); 47 else *result = malloc(mem); 48 49 PetscCheck(*result, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem); 50 if (PetscLogMemory) PetscCall(PetscMemzero(*result, mem)); 51 #elif PetscDefined(HAVE_POSIX_MEMALIGN) 52 int ret = posix_memalign(result, PETSC_MEMALIGN, mem); 53 PetscCheck(ret == 0, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem); 54 if (clear || PetscLogMemory) PetscCall(PetscMemzero(*result, mem)); 55 #else /* PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) || PetscDefined(HAVE_POSIX_MEMALIGN) */ 56 { 57 int *ptr, shift; 58 /* 59 malloc space for two extra chunks and shift ptr 1 + enough to get it PetscScalar aligned 60 */ 61 if (clear) { 62 ptr = (int *)calloc(1 + (mem + 2 * PETSC_MEMALIGN) / sizeof(int), sizeof(int)); 63 } else { 64 ptr = (int *)malloc(mem + 2 * PETSC_MEMALIGN); 65 } 66 PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem); 67 shift = (int)(((PETSC_UINTPTR_T)ptr) % PETSC_MEMALIGN); 68 shift = (2 * PETSC_MEMALIGN - shift) / sizeof(int); 69 ptr[shift - 1] = shift + SHIFT_CLASSID; 70 ptr += shift; 71 *result = (void *)ptr; 72 if (PetscLogMemory) PetscCall(PetscMemzero(*result, mem)); 73 } 74 #endif /* PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) || PetscDefined(HAVE_POSIX_MEMALIGN) */ 75 #endif /* PetscDefined(HAVE_MEMKIND) */ 76 return PETSC_SUCCESS; 77 } 78 79 PETSC_EXTERN PetscErrorCode PetscFreeAlign(void *ptr, int line, const char func[], const char file[]) 80 { 81 if (!ptr) return PETSC_SUCCESS; 82 #if PetscDefined(HAVE_MEMKIND) 83 memkind_free(0, ptr); /* specify the kind to 0 so that memkind will look up for the right type */ 84 #else /* PetscDefined(HAVE_MEMKIND) */ 85 #if (!(PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) && !PetscDefined(HAVE_POSIX_MEMALIGN)) 86 { 87 /* 88 Previous int tells us how many ints the pointer has been shifted from 89 the original address provided by the system malloc(). 90 */ 91 const int shift = *(((int *)ptr) - 1) - SHIFT_CLASSID; 92 93 PetscCheck(shift <= PETSC_MEMALIGN - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely memory corruption in heap"); 94 PetscCheck(shift >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely memory corruption in heap"); 95 ptr = (void *)(((int *)ptr) - shift); 96 } 97 #endif 98 99 #if PetscDefined(HAVE_FREE_RETURN_INT) 100 int err = free(ptr); 101 PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_PLIB, "System free returned error %d\n", err); 102 #else 103 free(ptr); 104 #endif 105 #endif 106 return PETSC_SUCCESS; 107 } 108 109 PETSC_EXTERN PetscErrorCode PetscReallocAlign(size_t mem, int line, const char func[], const char file[], void **result) 110 { 111 if (!mem) { 112 PetscCall(PetscFreeAlign(*result, line, func, file)); 113 *result = NULL; 114 return PETSC_SUCCESS; 115 } 116 #if PetscDefined(HAVE_MEMKIND) 117 *result = memkind_realloc(currentmktype ? MEMKIND_HBW_PREFERRED : MEMKIND_DEFAULT, *result, mem); 118 #else 119 #if (!(PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) && !PetscDefined(HAVE_POSIX_MEMALIGN)) 120 { 121 /* 122 Previous int tells us how many ints the pointer has been shifted from 123 the original address provided by the system malloc(). 124 */ 125 int shift = *(((int *)*result) - 1) - SHIFT_CLASSID; 126 PetscCheck(shift <= PETSC_MEMALIGN - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely memory corruption in heap"); 127 PetscCheck(shift >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely memory corruption in heap"); 128 *result = (void *)(((int *)*result) - shift); 129 } 130 #endif 131 132 #if (PetscDefined(HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) || PetscDefined(HAVE_POSIX_MEMALIGN) 133 *result = realloc(*result, mem); 134 #else 135 { 136 /* 137 malloc space for two extra chunks and shift ptr 1 + enough to get it PetscScalar aligned 138 */ 139 int *ptr = (int *)realloc(*result, mem + 2 * PETSC_MEMALIGN); 140 if (ptr) { 141 int shift = (int)(((PETSC_UINTPTR_T)ptr) % PETSC_MEMALIGN); 142 shift = (2 * PETSC_MEMALIGN - shift) / sizeof(int); 143 ptr[shift - 1] = shift + SHIFT_CLASSID; 144 ptr += shift; 145 *result = (void *)ptr; 146 } else { 147 *result = NULL; 148 } 149 } 150 #endif 151 #endif 152 PetscCheck(*result, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem); 153 #if PetscDefined(HAVE_POSIX_MEMALIGN) 154 /* There are no standard guarantees that realloc() maintains the alignment of memalign(), so I think we have to 155 * realloc and, if the alignment is wrong, malloc/copy/free. */ 156 if (((size_t)(*result)) % PETSC_MEMALIGN) { 157 void *newResult; 158 #if PetscDefined(HAVE_MEMKIND) 159 { 160 int err = memkind_posix_memalign(currentmktype ? MEMKIND_HBW_PREFERRED : MEMKIND_DEFAULT, &newResult, PETSC_MEMALIGN, mem); 161 PetscCheck(err != EINVAL, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memkind: invalid 3rd or 4th argument of memkind_posix_memalign()"); 162 if (err == ENOMEM) PetscInfo(NULL, "Memkind: fail to request HBW memory %.0f, falling back to normal memory\n", (PetscLogDouble)mem); 163 } 164 PetscCheck(newResult, PETSC_COMM_SELF, PETSC_ERR_MEM, "Memory requested %.0f", (PetscLogDouble)mem); 165 #else 166 int ret = posix_memalign(&newResult, PETSC_MEMALIGN, mem); 167 PetscCheck(ret == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "posix_memalign() failed with error code %d, memory requested %.0f", ret, (PetscLogDouble)mem); 168 #endif 169 PetscCall(PetscMemcpy(newResult, *result, mem)); 170 #if PetscDefined(HAVE_FREE_RETURN_INT) 171 { 172 int err = free(*result); 173 PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_PLIB, "System free returned error %d", err); 174 } 175 #else 176 #if defined(PETSC_HAVE_MEMKIND) 177 memkind_free(0, *result); 178 #else 179 free(*result); 180 #endif 181 #endif 182 *result = newResult; 183 } 184 #endif 185 return PETSC_SUCCESS; 186 } 187 188 PetscErrorCode (*PetscTrMalloc)(size_t, PetscBool, int, const char[], const char[], void **) = PetscMallocAlign; 189 PetscErrorCode (*PetscTrFree)(void *, int, const char[], const char[]) = PetscFreeAlign; 190 PetscErrorCode (*PetscTrRealloc)(size_t, int, const char[], const char[], void **) = PetscReallocAlign; 191 192 PETSC_INTERN PetscBool petscsetmallocvisited; 193 PetscBool petscsetmallocvisited = PETSC_FALSE; 194 195 /*@C 196 PetscMallocSet - Sets the routines used to do mallocs and frees. 197 This routine MUST be called before `PetscInitialize()` and may be 198 called only once. 199 200 Not Collective 201 202 Input Parameters: 203 + imalloc - the routine that provides the malloc (also provides calloc(), which is used depends on the second argument) 204 . ifree - the routine that provides the free 205 - iralloc - the routine that provides the realloc 206 207 Level: developer 208 209 .seealso: `PetscMallocClear()` 210 @*/ 211 PetscErrorCode PetscMallocSet(PetscErrorCode (*imalloc)(size_t, PetscBool, int, const char[], const char[], void **), PetscErrorCode (*ifree)(void *, int, const char[], const char[]), PetscErrorCode (*iralloc)(size_t, int, const char[], const char[], void **)) 212 { 213 PetscFunctionBegin; 214 PetscCheck(!petscsetmallocvisited || !(imalloc != PetscTrMalloc || ifree != PetscTrFree), PETSC_COMM_SELF, PETSC_ERR_SUP, "cannot call multiple times"); 215 PetscTrMalloc = imalloc; 216 PetscTrFree = ifree; 217 PetscTrRealloc = iralloc; 218 petscsetmallocvisited = PETSC_TRUE; 219 PetscFunctionReturn(PETSC_SUCCESS); 220 } 221 222 /*@C 223 PetscMallocClear - Resets the routines used to do mallocs and frees to the defaults. 224 225 Not Collective 226 227 Level: developer 228 229 Note: 230 In general one should never run a PETSc program with different malloc() and 231 free() settings for different parts; this is because one NEVER wants to 232 free() an address that was malloced by a different memory management system 233 234 Called in `PetscFinalize()` so that if `PetscInitialize()` is called again it starts with a fresh slate of allocation information 235 236 .seealso: `PetscMallocSet` 237 @*/ 238 PetscErrorCode PetscMallocClear(void) 239 { 240 PetscFunctionBegin; 241 PetscTrMalloc = PetscMallocAlign; 242 PetscTrFree = PetscFreeAlign; 243 PetscTrRealloc = PetscReallocAlign; 244 petscsetmallocvisited = PETSC_FALSE; 245 PetscFunctionReturn(PETSC_SUCCESS); 246 } 247 248 PetscErrorCode PetscMemoryTrace(const char label[]) 249 { 250 PetscLogDouble mem, mal; 251 static PetscLogDouble oldmem = 0, oldmal = 0; 252 253 PetscFunctionBegin; 254 PetscCall(PetscMemoryGetCurrentUsage(&mem)); 255 PetscCall(PetscMallocGetCurrentUsage(&mal)); 256 257 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%s High water %8.3f MB increase %8.3f MB Current %8.3f MB increase %8.3f MB\n", label, mem * 1e-6, (mem - oldmem) * 1e-6, mal * 1e-6, (mal - oldmal) * 1e-6)); 258 oldmem = mem; 259 oldmal = mal; 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 static PetscErrorCode (*PetscTrMallocOld)(size_t, PetscBool, int, const char[], const char[], void **) = PetscMallocAlign; 264 static PetscErrorCode (*PetscTrReallocOld)(size_t, int, const char[], const char[], void **) = PetscReallocAlign; 265 static PetscErrorCode (*PetscTrFreeOld)(void *, int, const char[], const char[]) = PetscFreeAlign; 266 267 /*@C 268 PetscMallocSetDRAM - Set `PetscMalloc()` to use DRAM. 269 If memkind is available, change the memkind type. Otherwise, switch the 270 current malloc and free routines to the `PetscMallocAlign()` and 271 `PetscFreeAlign()` (PETSc default). 272 273 Not Collective 274 275 Level: developer 276 277 Note: 278 This provides a way to do the allocation on DRAM temporarily. One 279 can switch back to the previous choice by calling `PetscMallocReset()`. 280 281 .seealso: `PetscMallocReset()` 282 @*/ 283 PetscErrorCode PetscMallocSetDRAM(void) 284 { 285 PetscFunctionBegin; 286 if (PetscTrMalloc == PetscMallocAlign) { 287 #if defined(PETSC_HAVE_MEMKIND) 288 previousmktype = currentmktype; 289 currentmktype = PETSC_MK_DEFAULT; 290 #endif 291 } else { 292 /* Save the previous choice */ 293 PetscTrMallocOld = PetscTrMalloc; 294 PetscTrReallocOld = PetscTrRealloc; 295 PetscTrFreeOld = PetscTrFree; 296 PetscTrMalloc = PetscMallocAlign; 297 PetscTrFree = PetscFreeAlign; 298 PetscTrRealloc = PetscReallocAlign; 299 } 300 PetscFunctionReturn(PETSC_SUCCESS); 301 } 302 303 /*@C 304 PetscMallocResetDRAM - Reset the changes made by `PetscMallocSetDRAM()` 305 306 Not Collective 307 308 Level: developer 309 310 .seealso: `PetscMallocSetDRAM()` 311 @*/ 312 PetscErrorCode PetscMallocResetDRAM(void) 313 { 314 PetscFunctionBegin; 315 if (PetscTrMalloc == PetscMallocAlign) { 316 #if defined(PETSC_HAVE_MEMKIND) 317 currentmktype = previousmktype; 318 #endif 319 } else { 320 /* Reset to the previous choice */ 321 PetscTrMalloc = PetscTrMallocOld; 322 PetscTrRealloc = PetscTrReallocOld; 323 PetscTrFree = PetscTrFreeOld; 324 } 325 PetscFunctionReturn(PETSC_SUCCESS); 326 } 327 328 static PetscBool petscmalloccoalesce = 329 #if defined(PETSC_USE_MALLOC_COALESCED) 330 PETSC_TRUE; 331 #else 332 PETSC_FALSE; 333 #endif 334 335 /*@C 336 PetscMallocSetCoalesce - Use coalesced malloc when allocating groups of objects 337 338 Not Collective 339 340 Input Parameters: 341 . coalesce - `PETSC_TRUE` to use coalesced malloc for multi-object allocation. 342 343 Options Database Keys: 344 . -malloc_coalesce - turn coalesced malloc on or off 345 346 Notes: 347 PETSc uses coalesced malloc by default for optimized builds and not for debugging builds. 348 349 This default can be changed via the command-line option -malloc_coalesce or by calling this function. 350 351 This function can only be called immediately after `PetscInitialize()` 352 353 Level: developer 354 355 .seealso: `PetscMallocA()` 356 @*/ 357 PetscErrorCode PetscMallocSetCoalesce(PetscBool coalesce) 358 { 359 PetscFunctionBegin; 360 petscmalloccoalesce = coalesce; 361 PetscFunctionReturn(PETSC_SUCCESS); 362 } 363 364 /*@C 365 PetscMallocA - Allocate and optionally clear one or more objects, possibly using coalesced malloc 366 367 Not Collective 368 369 Input Parameters: 370 + n - number of objects to allocate (at least 1) 371 . clear - use calloc() to allocate space initialized to zero 372 . lineno - line number to attribute allocation (typically __LINE__) 373 . function - function to attribute allocation (typically PETSC_FUNCTION_NAME) 374 . filename - file name to attribute allocation (typically __FILE__) 375 - bytes0 - first of n object sizes 376 377 Output Parameters: 378 . ptr0 - first of n pointers to allocate 379 380 Notes 381 This function is not normally called directly by users, but rather via the macros `PetscMalloc1()`, `PetscMalloc2()`, or `PetscCalloc1()`, etc. 382 383 Level: developer 384 385 .seealso: `PetscMallocAlign()`, `PetscMallocSet()`, `PetscMalloc1()`, `PetscMalloc2()`, `PetscMalloc3()`, `PetscMalloc4()`, `PetscMalloc5()`, `PetscMalloc6()`, `PetscMalloc7()`, `PetscCalloc1()`, `PetscCalloc2()`, `PetscCalloc3()`, `PetscCalloc4()`, `PetscCalloc5()`, `PetscCalloc6()`, `PetscCalloc7()`, `PetscFreeA()` 386 @*/ 387 PetscErrorCode PetscMallocA(int n, PetscBool clear, int lineno, const char *function, const char *filename, size_t bytes0, void *ptr0, ...) 388 { 389 va_list Argp; 390 size_t bytes[8], sumbytes; 391 void **ptr[8]; 392 int i; 393 394 PetscFunctionBegin; 395 PetscCheck(n <= 8, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Attempt to allocate %d objects but only 8 supported", n); 396 bytes[0] = bytes0; 397 ptr[0] = (void **)ptr0; 398 sumbytes = (bytes0 + PETSC_MEMALIGN - 1) & ~(PETSC_MEMALIGN - 1); 399 va_start(Argp, ptr0); 400 for (i = 1; i < n; i++) { 401 bytes[i] = va_arg(Argp, size_t); 402 ptr[i] = va_arg(Argp, void **); 403 sumbytes += (bytes[i] + PETSC_MEMALIGN - 1) & ~(PETSC_MEMALIGN - 1); 404 } 405 va_end(Argp); 406 if (petscmalloccoalesce) { 407 char *p; 408 PetscCall((*PetscTrMalloc)(sumbytes, clear, lineno, function, filename, (void **)&p)); 409 if (p == NULL) { 410 for (i = 0; i < n; i++) *ptr[i] = NULL; 411 } else { 412 for (i = 0; i < n; i++) { 413 *ptr[i] = bytes[i] ? p : NULL; 414 p = (char *)PetscAddrAlign(p + bytes[i]); 415 } 416 } 417 } else { 418 for (i = 0; i < n; i++) PetscCall((*PetscTrMalloc)(bytes[i], clear, lineno, function, filename, (void **)ptr[i])); 419 } 420 PetscFunctionReturn(PETSC_SUCCESS); 421 } 422 423 /*@C 424 PetscFreeA - Free one or more objects, possibly allocated using coalesced malloc 425 426 Not Collective 427 428 Input Parameters: 429 + n - number of objects to free (at least 1) 430 . lineno - line number to attribute deallocation (typically __LINE__) 431 . function - function to attribute deallocation (typically PETSC_FUNCTION_NAME) 432 . filename - file name to attribute deallocation (typically __FILE__) 433 - ptr0 ... - first of n pointers to free 434 435 Notes: 436 This function is not normally called directly by users, but rather via the macros `PetscFree()`, `PetscFree2()`, etc. 437 438 The pointers are zeroed to prevent users from accidentally reusing space that has been freed. 439 440 Level: developer 441 442 .seealso: `PetscMallocAlign()`, `PetscMallocSet()`, `PetscMallocA()`, `PetscFree1()`, `PetscFree2()`, `PetscFree3()`, `PetscFree4()`, `PetscFree5()`, `PetscFree6()`, `PetscFree7()` 443 @*/ 444 PetscErrorCode PetscFreeA(int n, int lineno, const char *function, const char *filename, void *ptr0, ...) 445 { 446 va_list Argp; 447 void **ptr[8]; 448 int i; 449 450 PetscFunctionBegin; 451 PetscCheck(n <= 8, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Attempt to allocate %d objects but only up to 8 supported", n); 452 ptr[0] = (void **)ptr0; 453 va_start(Argp, ptr0); 454 for (i = 1; i < n; i++) ptr[i] = va_arg(Argp, void **); 455 va_end(Argp); 456 if (petscmalloccoalesce) { 457 for (i = 0; i < n; i++) { /* Find first nonempty allocation */ 458 if (*ptr[i]) break; 459 } 460 while (--n > i) *ptr[n] = NULL; 461 PetscCall((*PetscTrFree)(*ptr[n], lineno, function, filename)); 462 *ptr[n] = NULL; 463 } else { 464 while (--n >= 0) { 465 PetscCall((*PetscTrFree)(*ptr[n], lineno, function, filename)); 466 *ptr[n] = NULL; 467 } 468 } 469 PetscFunctionReturn(PETSC_SUCCESS); 470 } 471