1e5c89e4eSSatish Balay /* 2e5c89e4eSSatish Balay Code that allows a user to dictate what malloc() PETSc uses. 3e5c89e4eSSatish Balay */ 4c6db04a5SJed Brown #include <petscsys.h> /*I "petscsys.h" I*/ 5ba282f50SJed Brown #include <stdarg.h> 6e5c89e4eSSatish Balay #if defined(PETSC_HAVE_MALLOC_H) 7e5c89e4eSSatish Balay #include <malloc.h> 8e5c89e4eSSatish Balay #endif 9de1d6c17SHong Zhang #if defined(PETSC_HAVE_MEMKIND) 10ca8c994eSHong Zhang #include <errno.h> 11de1d6c17SHong Zhang #include <memkind.h> 12e3acc61dSHong Zhang typedef enum {PETSC_MK_DEFAULT=0,PETSC_MK_HBW_PREFERRED=1} PetscMemkindType; 13e3acc61dSHong Zhang PetscMemkindType currentmktype = PETSC_MK_HBW_PREFERRED; 14e3acc61dSHong Zhang PetscMemkindType previousmktype = PETSC_MK_HBW_PREFERRED; 15de1d6c17SHong Zhang #endif 16e5c89e4eSSatish Balay /* 17e5c89e4eSSatish Balay We want to make sure that all mallocs of double or complex numbers are complex aligned. 18e5c89e4eSSatish Balay 1) on systems with memalign() we call that routine to get an aligned memory location 19e5c89e4eSSatish Balay 2) on systems without memalign() we 20e5c89e4eSSatish Balay - allocate one sizeof(PetscScalar) extra space 21e5c89e4eSSatish Balay - we shift the pointer up slightly if needed to get PetscScalar aligned 220700a824SBarry Smith - if shifted we store at ptr[-1] the amount of shift (plus a classid) 23e5c89e4eSSatish Balay */ 240700a824SBarry Smith #define SHIFT_CLASSID 456123 25e5c89e4eSSatish Balay 26071fcb05SBarry Smith PETSC_EXTERN PetscErrorCode PetscMallocAlign(size_t mem,PetscBool clear,int line,const char func[],const char file[],void **result) 27e5c89e4eSSatish Balay { 28071fcb05SBarry Smith PetscErrorCode ierr; 292da392ccSBarry Smith #if defined(PETSC_HAVE_MEMKIND) 302da392ccSBarry Smith int err; 312da392ccSBarry Smith #endif 32071fcb05SBarry Smith 33f0ba7cfcSLisandro Dalcin if (!mem) {*result = NULL; return 0;} 34fc2a7144SHong Zhang #if defined(PETSC_HAVE_MEMKIND) 35fc2a7144SHong Zhang { 362da392ccSBarry Smith if (!currentmktype) err = memkind_posix_memalign(MEMKIND_DEFAULT,result,PETSC_MEMALIGN,mem); 372da392ccSBarry Smith else err = memkind_posix_memalign(MEMKIND_HBW_PREFERRED,result,PETSC_MEMALIGN,mem); 382da392ccSBarry Smith if (err == EINVAL) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"Memkind: invalid 3rd or 4th argument of memkind_posix_memalign()"); 392da392ccSBarry Smith if (err == ENOMEM) PetscInfo1(0,"Memkind: fail to request HBW memory %.0f, falling back to normal memory\n",(PetscLogDouble)mem); 40*66c772faSBarry Smith if (!*result) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_MEM,PETSC_ERROR_INITIAL,"Memory requested %.0f",(PetscLogDouble)mem); 41071fcb05SBarry Smith if (clear) {ierr = PetscMemzero(*result,mem);CHKERRQ(ierr);} 42fc2a7144SHong Zhang } 43fc2a7144SHong Zhang #else 44e5c89e4eSSatish Balay # if defined(PETSC_HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8) 45071fcb05SBarry Smith if (clear) { 46071fcb05SBarry Smith *result = calloc(1+mem/sizeof(int),sizeof(int)); 47071fcb05SBarry Smith } else { 48e5c89e4eSSatish Balay *result = malloc(mem); 49071fcb05SBarry Smith } 50*66c772faSBarry Smith if (!*result) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_MEM,PETSC_ERROR_INITIAL,"Memory requested %.0f",(PetscLogDouble)mem); 51071fcb05SBarry Smith if (PetscLogMemory) {ierr = PetscMemzero(*result,mem);CHKERRQ(ierr);} 52071fcb05SBarry Smith 53e5c89e4eSSatish Balay # elif defined(PETSC_HAVE_MEMALIGN) 54e5c89e4eSSatish Balay *result = memalign(PETSC_MEMALIGN,mem); 55*66c772faSBarry Smith if (!*result) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_MEM,PETSC_ERROR_INITIAL,"Memory requested %.0f",(PetscLogDouble)mem); 56071fcb05SBarry Smith if (clear || PetscLogMemory) { 57071fcb05SBarry Smith ierr = PetscMemzero(*result,mem);CHKERRQ(ierr); 58071fcb05SBarry Smith } 59e5c89e4eSSatish Balay # else 60e5c89e4eSSatish Balay { 61*66c772faSBarry Smith int *ptr,shift; 62e5c89e4eSSatish Balay /* 63e5c89e4eSSatish Balay malloc space for two extra chunks and shift ptr 1 + enough to get it PetscScalar aligned 64e5c89e4eSSatish Balay */ 65071fcb05SBarry Smith if (clear) { 66071fcb05SBarry Smith ptr = (int*)calloc(1+(mem + 2*PETSC_MEMALIGN)/sizeof(int),sizeof(int)); 67071fcb05SBarry Smith } else { 68071fcb05SBarry Smith ptr = (int*)malloc(mem + 2*PETSC_MEMALIGN); 69071fcb05SBarry Smith } 70*66c772faSBarry Smith if (!ptr) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_MEM,PETSC_ERROR_INITIAL,"Memory requested %.0f",(PetscLogDouble)mem); 71*66c772faSBarry Smith shift = (int)(((PETSC_UINTPTR_T) ptr) % PETSC_MEMALIGN); 72e5c89e4eSSatish Balay shift = (2*PETSC_MEMALIGN - shift)/sizeof(int); 730700a824SBarry Smith ptr[shift-1] = shift + SHIFT_CLASSID; 74e5c89e4eSSatish Balay ptr += shift; 75e5c89e4eSSatish Balay *result = (void*)ptr; 76071fcb05SBarry Smith if (PetscLogMemory) {ierr = PetscMemzero(*result,mem);CHKERRQ(ierr);} 77e5c89e4eSSatish Balay } 78e5c89e4eSSatish Balay # endif 79fc2a7144SHong Zhang #endif 80e5c89e4eSSatish Balay return 0; 81e5c89e4eSSatish Balay } 82e5c89e4eSSatish Balay 8395c0884eSLisandro Dalcin PETSC_EXTERN PetscErrorCode PetscFreeAlign(void *ptr,int line,const char func[],const char file[]) 84e5c89e4eSSatish Balay { 85f0ba7cfcSLisandro Dalcin if (!ptr) return 0; 86fc2a7144SHong Zhang #if defined(PETSC_HAVE_MEMKIND) 87fc2a7144SHong Zhang memkind_free(0,ptr); /* specify the kind to 0 so that memkind will look up for the right type */ 88fc2a7144SHong Zhang #else 89e5c89e4eSSatish Balay # if (!(defined(PETSC_HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) && !defined(PETSC_HAVE_MEMALIGN)) 90f0ba7cfcSLisandro Dalcin { 91e5c89e4eSSatish Balay /* 92e5c89e4eSSatish Balay Previous int tells us how many ints the pointer has been shifted from 93e5c89e4eSSatish Balay the original address provided by the system malloc(). 94e5c89e4eSSatish Balay */ 95f0ba7cfcSLisandro Dalcin int shift = *(((int*)ptr)-1) - SHIFT_CLASSID; 96efca3c55SSatish Balay if (shift > PETSC_MEMALIGN-1) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_PLIB,PETSC_ERROR_INITIAL,"Likely memory corruption in heap"); 97efca3c55SSatish Balay if (shift < 0) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_PLIB,PETSC_ERROR_INITIAL,"Likely memory corruption in heap"); 98e5c89e4eSSatish Balay ptr = (void*)(((int*)ptr) - shift); 99e5c89e4eSSatish Balay } 100f0ba7cfcSLisandro Dalcin # endif 101e5c89e4eSSatish Balay 102e5c89e4eSSatish Balay # if defined(PETSC_HAVE_FREE_RETURN_INT) 103e5c89e4eSSatish Balay int err = free(ptr); 104efca3c55SSatish Balay if (err) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_PLIB,PETSC_ERROR_INITIAL,"System free returned error %d\n",err); 105e5c89e4eSSatish Balay # else 106e5c89e4eSSatish Balay free(ptr); 107e5c89e4eSSatish Balay # endif 108fc2a7144SHong Zhang #endif 109e5c89e4eSSatish Balay return 0; 110e5c89e4eSSatish Balay } 111e5c89e4eSSatish Balay 11295c0884eSLisandro Dalcin PETSC_EXTERN PetscErrorCode PetscReallocAlign(size_t mem, int line, const char func[], const char file[], void **result) 1133221ece2SMatthew G. Knepley { 114c22f1541SToby Isaac PetscErrorCode ierr; 115c22f1541SToby Isaac 116c22f1541SToby Isaac if (!mem) { 117c22f1541SToby Isaac ierr = PetscFreeAlign(*result, line, func, file); 118c22f1541SToby Isaac if (ierr) return ierr; 119c22f1541SToby Isaac *result = NULL; 120c22f1541SToby Isaac return 0; 121c22f1541SToby Isaac } 122fc2a7144SHong Zhang #if defined(PETSC_HAVE_MEMKIND) 123fc2a7144SHong Zhang if (!currentmktype) *result = memkind_realloc(MEMKIND_DEFAULT,*result,mem); 124e3acc61dSHong Zhang else *result = memkind_realloc(MEMKIND_HBW_PREFERRED,*result,mem); 125fc2a7144SHong Zhang #else 1263221ece2SMatthew G. Knepley # if (!(defined(PETSC_HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) && !defined(PETSC_HAVE_MEMALIGN)) 1273221ece2SMatthew G. Knepley { 1283221ece2SMatthew G. Knepley /* 1293221ece2SMatthew G. Knepley Previous int tells us how many ints the pointer has been shifted from 1303221ece2SMatthew G. Knepley the original address provided by the system malloc(). 1313221ece2SMatthew G. Knepley */ 1323221ece2SMatthew G. Knepley int shift = *(((int*)*result)-1) - SHIFT_CLASSID; 1333221ece2SMatthew G. Knepley if (shift > PETSC_MEMALIGN-1) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_PLIB,PETSC_ERROR_INITIAL,"Likely memory corruption in heap"); 1343221ece2SMatthew G. Knepley if (shift < 0) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_PLIB,PETSC_ERROR_INITIAL,"Likely memory corruption in heap"); 1353221ece2SMatthew G. Knepley *result = (void*)(((int*)*result) - shift); 1363221ece2SMatthew G. Knepley } 1373221ece2SMatthew G. Knepley # endif 1383221ece2SMatthew G. Knepley 139c22f1541SToby Isaac # if (defined(PETSC_HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) || defined(PETSC_HAVE_MEMALIGN) 14041605b92SBarry Smith *result = realloc(*result, mem); 1413221ece2SMatthew G. Knepley # else 1423221ece2SMatthew G. Knepley { 1433221ece2SMatthew G. Knepley /* 1443221ece2SMatthew G. Knepley malloc space for two extra chunks and shift ptr 1 + enough to get it PetscScalar aligned 1453221ece2SMatthew G. Knepley */ 1463221ece2SMatthew G. Knepley int *ptr = (int *) realloc(*result, mem + 2*PETSC_MEMALIGN); 1473221ece2SMatthew G. Knepley if (ptr) { 1483221ece2SMatthew G. Knepley int shift = (int)(((PETSC_UINTPTR_T) ptr) % PETSC_MEMALIGN); 1493221ece2SMatthew G. Knepley shift = (2*PETSC_MEMALIGN - shift)/sizeof(int); 1503221ece2SMatthew G. Knepley ptr[shift-1] = shift + SHIFT_CLASSID; 1513221ece2SMatthew G. Knepley ptr += shift; 1523221ece2SMatthew G. Knepley *result = (void*)ptr; 1533221ece2SMatthew G. Knepley } else { 1543221ece2SMatthew G. Knepley *result = NULL; 1553221ece2SMatthew G. Knepley } 1563221ece2SMatthew G. Knepley } 1573221ece2SMatthew G. Knepley # endif 158fc2a7144SHong Zhang #endif 1593221ece2SMatthew G. Knepley if (!*result) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_MEM,PETSC_ERROR_INITIAL,"Memory requested %.0f",(PetscLogDouble)mem); 160c22f1541SToby Isaac #if defined(PETSC_HAVE_MEMALIGN) 161c22f1541SToby Isaac /* There are no standard guarantees that realloc() maintains the alignment of memalign(), so I think we have to 162c22f1541SToby Isaac * realloc and, if the alignment is wrong, malloc/copy/free. */ 163c22f1541SToby Isaac if (((size_t) (*result)) % PETSC_MEMALIGN) { 164c22f1541SToby Isaac void *newResult; 165fc2a7144SHong Zhang # if defined(PETSC_HAVE_MEMKIND) 166fc2a7144SHong Zhang { 1672da392ccSBarry Smith int err; 1682da392ccSBarry Smith if (!currentmktype) err = memkind_posix_memalign(MEMKIND_DEFAULT,&newResult,PETSC_MEMALIGN,mem); 1692da392ccSBarry Smith else err = memkind_posix_memalign(MEMKIND_HBW_PREFERRED,&newResult,PETSC_MEMALIGN,mem); 1702da392ccSBarry Smith if (err == EINVAL) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"Memkind: invalid 3rd or 4th argument of memkind_posix_memalign()"); 1712da392ccSBarry Smith if (err == ENOMEM) PetscInfo1(0,"Memkind: fail to request HBW memory %.0f, falling back to normal memory\n",(PetscLogDouble)mem); 172fc2a7144SHong Zhang } 173fc2a7144SHong Zhang # else 174c22f1541SToby Isaac newResult = memalign(PETSC_MEMALIGN,mem); 175fc2a7144SHong Zhang # endif 176c22f1541SToby Isaac if (!newResult) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_MEM,PETSC_ERROR_INITIAL,"Memory requested %.0f",(PetscLogDouble)mem); 177c22f1541SToby Isaac ierr = PetscMemcpy(newResult,*result,mem); 178c22f1541SToby Isaac if (ierr) return ierr; 179c22f1541SToby Isaac # if defined(PETSC_HAVE_FREE_RETURN_INT) 180c22f1541SToby Isaac { 181c22f1541SToby Isaac int err = free(*result); 182c22f1541SToby Isaac if (err) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_PLIB,PETSC_ERROR_INITIAL,"System free returned error %d\n",err); 183c22f1541SToby Isaac } 184c22f1541SToby Isaac # else 185de1d6c17SHong Zhang # if defined(PETSC_HAVE_MEMKIND) 186de1d6c17SHong Zhang memkind_free(0,*result); 187de1d6c17SHong Zhang # else 188c22f1541SToby Isaac free(*result); 189c22f1541SToby Isaac # endif 190de1d6c17SHong Zhang # endif 191c22f1541SToby Isaac *result = newResult; 192c22f1541SToby Isaac } 193c22f1541SToby Isaac #endif 1943221ece2SMatthew G. Knepley return 0; 1953221ece2SMatthew G. Knepley } 1963221ece2SMatthew G. Knepley 197071fcb05SBarry Smith PetscErrorCode (*PetscTrMalloc)(size_t,PetscBool,int,const char[],const char[],void**) = PetscMallocAlign; 198efca3c55SSatish Balay PetscErrorCode (*PetscTrFree)(void*,int,const char[],const char[]) = PetscFreeAlign; 1993221ece2SMatthew G. Knepley PetscErrorCode (*PetscTrRealloc)(size_t,int,const char[],const char[],void**) = PetscReallocAlign; 200e5c89e4eSSatish Balay 20195c0884eSLisandro Dalcin PETSC_INTERN PetscBool petscsetmallocvisited; 202ace3abfcSBarry Smith PetscBool petscsetmallocvisited = PETSC_FALSE; 203e5c89e4eSSatish Balay 204e5c89e4eSSatish Balay /*@C 2051d1a0024SBarry Smith PetscMallocSet - Sets the routines used to do mallocs and frees. 206e5c89e4eSSatish Balay This routine MUST be called before PetscInitialize() and may be 207e5c89e4eSSatish Balay called only once. 208e5c89e4eSSatish Balay 209e5c89e4eSSatish Balay Not Collective 210e5c89e4eSSatish Balay 211e5c89e4eSSatish Balay Input Parameters: 21292f119d6SBarry Smith + imalloc - the routine that provides the malloc (also provides calloc(), which is used depends on the second argument) 21392f119d6SBarry Smith . ifree - the routine that provides the free 21492f119d6SBarry Smith - iralloc - the routine that provides the realloc 215e5c89e4eSSatish Balay 216e5c89e4eSSatish Balay Level: developer 217e5c89e4eSSatish Balay 218e5c89e4eSSatish Balay @*/ 219071fcb05SBarry Smith PetscErrorCode PetscMallocSet(PetscErrorCode (*imalloc)(size_t,PetscBool,int,const char[],const char[],void**), 22092f119d6SBarry Smith PetscErrorCode (*ifree)(void*,int,const char[],const char[]), 22192f119d6SBarry Smith PetscErrorCode (*iralloc)(size_t, int, const char[], const char[], void **)) 222e5c89e4eSSatish Balay { 223e5c89e4eSSatish Balay PetscFunctionBegin; 224e32f2f54SBarry Smith if (petscsetmallocvisited && (imalloc != PetscTrMalloc || ifree != PetscTrFree)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"cannot call multiple times"); 225e5c89e4eSSatish Balay PetscTrMalloc = imalloc; 226e5c89e4eSSatish Balay PetscTrFree = ifree; 22792f119d6SBarry Smith PetscTrRealloc = iralloc; 228e5c89e4eSSatish Balay petscsetmallocvisited = PETSC_TRUE; 229e5c89e4eSSatish Balay PetscFunctionReturn(0); 230e5c89e4eSSatish Balay } 231e5c89e4eSSatish Balay 232e5c89e4eSSatish Balay /*@C 23392f119d6SBarry Smith PetscMallocClear - Resets the routines used to do mallocs and frees to the defaults. 234e5c89e4eSSatish Balay 235e5c89e4eSSatish Balay Not Collective 236e5c89e4eSSatish Balay 237e5c89e4eSSatish Balay Level: developer 238e5c89e4eSSatish Balay 239e5c89e4eSSatish Balay Notes: 240e5c89e4eSSatish Balay In general one should never run a PETSc program with different malloc() and 241e5c89e4eSSatish Balay free() settings for different parts; this is because one NEVER wants to 242e5c89e4eSSatish Balay free() an address that was malloced by a different memory management system 243e5c89e4eSSatish Balay 24492f119d6SBarry Smith Called in PetscFinalize() so that if PetscInitialize() is called again it starts with a fresh slate of allocation information 24592f119d6SBarry Smith 246e5c89e4eSSatish Balay @*/ 2477087cfbeSBarry Smith PetscErrorCode PetscMallocClear(void) 248e5c89e4eSSatish Balay { 249e5c89e4eSSatish Balay PetscFunctionBegin; 250e5c89e4eSSatish Balay PetscTrMalloc = PetscMallocAlign; 251e5c89e4eSSatish Balay PetscTrFree = PetscFreeAlign; 25292f119d6SBarry Smith PetscTrRealloc = PetscReallocAlign; 253e5c89e4eSSatish Balay petscsetmallocvisited = PETSC_FALSE; 254e5c89e4eSSatish Balay PetscFunctionReturn(0); 255e5c89e4eSSatish Balay } 256b44d5720SBarry Smith 257b44d5720SBarry Smith PetscErrorCode PetscMemoryTrace(const char label[]) 258b44d5720SBarry Smith { 259b44d5720SBarry Smith PetscErrorCode ierr; 260b44d5720SBarry Smith PetscLogDouble mem,mal; 261b44d5720SBarry Smith static PetscLogDouble oldmem = 0,oldmal = 0; 262b44d5720SBarry Smith 263b44d5720SBarry Smith PetscFunctionBegin; 264b44d5720SBarry Smith ierr = PetscMemoryGetCurrentUsage(&mem);CHKERRQ(ierr); 265b44d5720SBarry Smith ierr = PetscMallocGetCurrentUsage(&mal);CHKERRQ(ierr); 266b44d5720SBarry Smith 267b44d5720SBarry Smith ierr = PetscPrintf(PETSC_COMM_WORLD,"%s High water %8.3f MB increase %8.3f MB Current %8.3f MB increase %8.3f MB\n",label,mem*1e-6,(mem - oldmem)*1e-6,mal*1e-6,(mal - oldmal)*1e-6);CHKERRQ(ierr); 268b44d5720SBarry Smith oldmem = mem; 269b44d5720SBarry Smith oldmal = mal; 270b44d5720SBarry Smith PetscFunctionReturn(0); 271b44d5720SBarry Smith } 27213850c04SHong Zhang 273071fcb05SBarry Smith static PetscErrorCode (*PetscTrMallocOld)(size_t,PetscBool,int,const char[],const char[],void**) = PetscMallocAlign; 27492f119d6SBarry Smith static PetscErrorCode (*PetscTrReallocOld)(size_t,int,const char[],const char[],void**) = PetscReallocAlign; 27550a41461SHong Zhang static PetscErrorCode (*PetscTrFreeOld)(void*,int,const char[],const char[]) = PetscFreeAlign; 276de1d6c17SHong Zhang 277de1d6c17SHong Zhang /*@C 278de1d6c17SHong Zhang PetscMallocSetDRAM - Set PetscMalloc to use DRAM. 279de1d6c17SHong Zhang If memkind is available, change the memkind type. Otherwise, switch the 280de1d6c17SHong Zhang current malloc and free routines to the PetscMallocAlign and 281de1d6c17SHong Zhang PetscFreeAlign (PETSc default). 282de1d6c17SHong Zhang 283de1d6c17SHong Zhang Not Collective 284de1d6c17SHong Zhang 285de1d6c17SHong Zhang Level: developer 286de1d6c17SHong Zhang 287de1d6c17SHong Zhang Notes: 288de1d6c17SHong Zhang This provides a way to do the allocation on DRAM temporarily. One 289de1d6c17SHong Zhang can switch back to the previous choice by calling PetscMallocReset(). 290de1d6c17SHong Zhang 291de1d6c17SHong Zhang .seealso: PetscMallocReset() 292de1d6c17SHong Zhang @*/ 29313850c04SHong Zhang PetscErrorCode PetscMallocSetDRAM(void) 29413850c04SHong Zhang { 29513850c04SHong Zhang PetscFunctionBegin; 296de1d6c17SHong Zhang if (PetscTrMalloc == PetscMallocAlign) { 297de1d6c17SHong Zhang #if defined(PETSC_HAVE_MEMKIND) 298de1d6c17SHong Zhang previousmktype = currentmktype; 299de1d6c17SHong Zhang currentmktype = PETSC_MK_DEFAULT; 300de1d6c17SHong Zhang #endif 301de1d6c17SHong Zhang } else { 30213850c04SHong Zhang /* Save the previous choice */ 30313850c04SHong Zhang PetscTrMallocOld = PetscTrMalloc; 30492f119d6SBarry Smith PetscTrReallocOld = PetscTrRealloc; 30513850c04SHong Zhang PetscTrFreeOld = PetscTrFree; 30613850c04SHong Zhang PetscTrMalloc = PetscMallocAlign; 30713850c04SHong Zhang PetscTrFree = PetscFreeAlign; 30892f119d6SBarry Smith PetscTrRealloc = PetscReallocAlign; 309de1d6c17SHong Zhang } 31013850c04SHong Zhang PetscFunctionReturn(0); 31113850c04SHong Zhang } 31213850c04SHong Zhang 313de1d6c17SHong Zhang /*@C 314de1d6c17SHong Zhang PetscMallocResetDRAM - Reset the changes made by PetscMallocSetDRAM 315de1d6c17SHong Zhang 316de1d6c17SHong Zhang Not Collective 317de1d6c17SHong Zhang 318de1d6c17SHong Zhang Level: developer 319de1d6c17SHong Zhang 320de1d6c17SHong Zhang .seealso: PetscMallocSetDRAM() 321de1d6c17SHong Zhang @*/ 32213850c04SHong Zhang PetscErrorCode PetscMallocResetDRAM(void) 32313850c04SHong Zhang { 32413850c04SHong Zhang PetscFunctionBegin; 325de1d6c17SHong Zhang if (PetscTrMalloc == PetscMallocAlign) { 326de1d6c17SHong Zhang #if defined(PETSC_HAVE_MEMKIND) 327de1d6c17SHong Zhang currentmktype = previousmktype; 328de1d6c17SHong Zhang #endif 329de1d6c17SHong Zhang } else { 33013850c04SHong Zhang /* Reset to the previous choice */ 33113850c04SHong Zhang PetscTrMalloc = PetscTrMallocOld; 33292f119d6SBarry Smith PetscTrRealloc = PetscTrReallocOld; 33313850c04SHong Zhang PetscTrFree = PetscTrFreeOld; 334de1d6c17SHong Zhang } 33513850c04SHong Zhang PetscFunctionReturn(0); 33613850c04SHong Zhang } 337ba282f50SJed Brown 338ba282f50SJed Brown static PetscBool petscmalloccoalesce = 339ba282f50SJed Brown #if defined(PETSC_USE_MALLOC_COALESCED) 340ba282f50SJed Brown PETSC_TRUE; 341ba282f50SJed Brown #else 342ba282f50SJed Brown PETSC_FALSE; 343ba282f50SJed Brown #endif 344ba282f50SJed Brown 345ba282f50SJed Brown /*@C 346ba282f50SJed Brown PetscMallocSetCoalesce - Use coalesced malloc when allocating groups of objects 347ba282f50SJed Brown 348ba282f50SJed Brown Not Collective 349ba282f50SJed Brown 350ba282f50SJed Brown Input Parameters: 351ba282f50SJed Brown . coalesce - PETSC_TRUE to use coalesced malloc for multi-object allocation. 352ba282f50SJed Brown 353ba282f50SJed Brown Options Database Keys: 354ba282f50SJed Brown . -malloc_coalesce - turn coalesced malloc on or off 355ba282f50SJed Brown 356ba282f50SJed Brown Note: 357ba282f50SJed Brown PETSc uses coalesced malloc by default for optimized builds and not for debugging builds. This default can be changed via the command-line option -malloc_coalesce or by calling this function. 35892f119d6SBarry Smith This function can only be called immediately after PetscInitialize() 359ba282f50SJed Brown 360ba282f50SJed Brown Level: developer 361ba282f50SJed Brown 362ba282f50SJed Brown .seealso: PetscMallocA() 363ba282f50SJed Brown @*/ 364ba282f50SJed Brown PetscErrorCode PetscMallocSetCoalesce(PetscBool coalesce) 365ba282f50SJed Brown { 366ba282f50SJed Brown PetscFunctionBegin; 367ba282f50SJed Brown petscmalloccoalesce = coalesce; 368ba282f50SJed Brown PetscFunctionReturn(0); 369ba282f50SJed Brown } 370ba282f50SJed Brown 371ba282f50SJed Brown /*@C 372ba282f50SJed Brown PetscMallocA - Allocate and optionally clear one or more objects, possibly using coalesced malloc 373ba282f50SJed Brown 374ba282f50SJed Brown Not Collective 375ba282f50SJed Brown 376ba282f50SJed Brown Input Parameters: 377ba282f50SJed Brown + n - number of objects to allocate (at least 1) 37889407d75SBarry Smith . clear - use calloc() to allocate space initialized to zero 379ba282f50SJed Brown . lineno - line number to attribute allocation (typically __LINE__) 380ba282f50SJed Brown . function - function to attribute allocation (typically PETSC_FUNCTION_NAME) 381ba282f50SJed Brown . filename - file name to attribute allocation (typically __FILE__) 382ba282f50SJed Brown - bytes0 - first of n object sizes 383ba282f50SJed Brown 384ba282f50SJed Brown Output Parameters: 385ba282f50SJed Brown . ptr0 - first of n pointers to allocate 386ba282f50SJed Brown 387ba282f50SJed Brown Notes: 388ba282f50SJed Brown This function is not normally called directly by users, but rather via the macros PetscMalloc1(), PetscMalloc2(), or PetscCalloc1(), etc. 389ba282f50SJed Brown 390ba282f50SJed Brown Level: developer 391ba282f50SJed Brown 392ba282f50SJed Brown .seealso: PetscMallocAlign(), PetscMallocSet(), PetscMalloc1(), PetscMalloc2(), PetscMalloc3(), PetscMalloc4(), PetscMalloc5(), PetscMalloc6(), PetscMalloc7(), PetscCalloc1(), PetscCalloc2(), PetscCalloc3(), PetscCalloc4(), PetscCalloc5(), PetscCalloc6(), PetscCalloc7(), PetscFreeA() 393ba282f50SJed Brown @*/ 394ba282f50SJed Brown PetscErrorCode PetscMallocA(int n,PetscBool clear,int lineno,const char *function,const char *filename,size_t bytes0,void *ptr0,...) 395ba282f50SJed Brown { 396ba282f50SJed Brown PetscErrorCode ierr; 397ba282f50SJed Brown va_list Argp; 398ba282f50SJed Brown size_t bytes[8],sumbytes; 399ba282f50SJed Brown void **ptr[8]; 400ba282f50SJed Brown int i; 401ba282f50SJed Brown 402ba282f50SJed Brown PetscFunctionBegin; 403ba282f50SJed Brown if (n > 8) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Attempt to allocate %d objects but only 8 supported",n); 404ba282f50SJed Brown bytes[0] = bytes0; 405ba282f50SJed Brown ptr[0] = (void**)ptr0; 406ba282f50SJed Brown sumbytes = (bytes0 + PETSC_MEMALIGN-1) & ~(PETSC_MEMALIGN-1); 407ba282f50SJed Brown va_start(Argp,ptr0); 408ba282f50SJed Brown for (i=1; i<n; i++) { 409ba282f50SJed Brown bytes[i] = va_arg(Argp,size_t); 410ba282f50SJed Brown ptr[i] = va_arg(Argp,void**); 411ba282f50SJed Brown sumbytes += (bytes[i] + PETSC_MEMALIGN-1) & ~(PETSC_MEMALIGN-1); 412ba282f50SJed Brown } 413ba282f50SJed Brown va_end(Argp); 414ba282f50SJed Brown if (petscmalloccoalesce) { 415ba282f50SJed Brown char *p; 416071fcb05SBarry Smith ierr = (*PetscTrMalloc)(sumbytes,clear,lineno,function,filename,(void**)&p);CHKERRQ(ierr); 417ba282f50SJed Brown for (i=0; i<n; i++) { 418ba282f50SJed Brown *ptr[i] = bytes[i] ? p : NULL; 419ba282f50SJed Brown p = (char*)PetscAddrAlign(p + bytes[i]); 420ba282f50SJed Brown } 421ba282f50SJed Brown } else { 422ba282f50SJed Brown for (i=0; i<n; i++) { 423071fcb05SBarry Smith ierr = (*PetscTrMalloc)(bytes[i],clear,lineno,function,filename,(void**)ptr[i]);CHKERRQ(ierr); 424ba282f50SJed Brown } 425ba282f50SJed Brown } 426ba282f50SJed Brown PetscFunctionReturn(0); 427ba282f50SJed Brown } 428ba282f50SJed Brown 429ba282f50SJed Brown /*@C 430ba282f50SJed Brown PetscFreeA - Free one or more objects, possibly allocated using coalesced malloc 431ba282f50SJed Brown 432ba282f50SJed Brown Not Collective 433ba282f50SJed Brown 434ba282f50SJed Brown Input Parameters: 435ba282f50SJed Brown + n - number of objects to free (at least 1) 436ba282f50SJed Brown . lineno - line number to attribute deallocation (typically __LINE__) 437ba282f50SJed Brown . function - function to attribute deallocation (typically PETSC_FUNCTION_NAME) 438ba282f50SJed Brown . filename - file name to attribute deallocation (typically __FILE__) 439ba282f50SJed Brown - ptr0 ... - first of n pointers to free 440ba282f50SJed Brown 441ba282f50SJed Brown Note: 442071fcb05SBarry Smith This function is not normally called directly by users, but rather via the macros PetscFree(), PetscFree2(), etc. 443ba282f50SJed Brown 44489407d75SBarry Smith The pointers are zeroed to prevent users from accidently reusing space that has been freed. 44589407d75SBarry Smith 446ba282f50SJed Brown Level: developer 447ba282f50SJed Brown 448ba282f50SJed Brown .seealso: PetscMallocAlign(), PetscMallocSet(), PetscMallocA(), PetscFree1(), PetscFree2(), PetscFree3(), PetscFree4(), PetscFree5(), PetscFree6(), PetscFree7() 449ba282f50SJed Brown @*/ 450ba282f50SJed Brown PetscErrorCode PetscFreeA(int n,int lineno,const char *function,const char *filename,void *ptr0,...) 451ba282f50SJed Brown { 452ba282f50SJed Brown PetscErrorCode ierr; 453ba282f50SJed Brown va_list Argp; 454ba282f50SJed Brown void **ptr[8]; 455ba282f50SJed Brown int i; 456ba282f50SJed Brown 457ba282f50SJed Brown PetscFunctionBegin; 45889407d75SBarry Smith if (n > 8) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Attempt to allocate %d objects but only up to 8 supported",n); 459ba282f50SJed Brown ptr[0] = (void**)ptr0; 460ba282f50SJed Brown va_start(Argp,ptr0); 461ba282f50SJed Brown for (i=1; i<n; i++) { 462ba282f50SJed Brown ptr[i] = va_arg(Argp,void**); 463ba282f50SJed Brown } 464ba282f50SJed Brown va_end(Argp); 465ba282f50SJed Brown if (petscmalloccoalesce) { 466ba282f50SJed Brown for (i=0; i<n; i++) { /* Find first nonempty allocation */ 467ba282f50SJed Brown if (*ptr[i]) break; 468ba282f50SJed Brown } 469ba282f50SJed Brown while (--n > i) { 470ba282f50SJed Brown *ptr[n] = NULL; 471ba282f50SJed Brown } 472c53cf884SJed Brown ierr = (*PetscTrFree)(*ptr[n],lineno,function,filename);CHKERRQ(ierr); 473ba282f50SJed Brown *ptr[n] = NULL; 474ba282f50SJed Brown } else { 475ba282f50SJed Brown while (--n >= 0) { 476c53cf884SJed Brown ierr = (*PetscTrFree)(*ptr[n],lineno,function,filename);CHKERRQ(ierr); 477ba282f50SJed Brown *ptr[n] = NULL; 478ba282f50SJed Brown } 479ba282f50SJed Brown } 480ba282f50SJed Brown PetscFunctionReturn(0); 481ba282f50SJed Brown } 482