17d0a6c19SBarry Smith 2e5c89e4eSSatish Balay /* 3e5c89e4eSSatish Balay Code that allows a user to dictate what malloc() PETSc uses. 4e5c89e4eSSatish Balay */ 5c6db04a5SJed Brown #include <petscsys.h> /*I "petscsys.h" I*/ 6e5c89e4eSSatish Balay #if defined(PETSC_HAVE_MALLOC_H) 7e5c89e4eSSatish Balay #include <malloc.h> 8e5c89e4eSSatish Balay #endif 9de1d6c17SHong Zhang #if defined(PETSC_HAVE_MEMKIND) 10de1d6c17SHong Zhang #include <memkind.h> 11de1d6c17SHong Zhang typedef enum {PETSC_MK_DEFAULT=0,PETSC_MK_HBW_PREFERRED=1} PetscMemkindType; 12de1d6c17SHong Zhang PetscMemkindType currentmktype = PETSC_MK_HBW_PREFERRED; 13de1d6c17SHong Zhang PetscMemkindType previousmktype = PETSC_MK_HBW_PREFERRED; 14de1d6c17SHong Zhang #endif 15e5c89e4eSSatish Balay /* 16e5c89e4eSSatish Balay We want to make sure that all mallocs of double or complex numbers are complex aligned. 17e5c89e4eSSatish Balay 1) on systems with memalign() we call that routine to get an aligned memory location 18e5c89e4eSSatish Balay 2) on systems without memalign() we 19e5c89e4eSSatish Balay - allocate one sizeof(PetscScalar) extra space 20e5c89e4eSSatish Balay - we shift the pointer up slightly if needed to get PetscScalar aligned 210700a824SBarry Smith - if shifted we store at ptr[-1] the amount of shift (plus a classid) 22e5c89e4eSSatish Balay */ 230700a824SBarry Smith #define SHIFT_CLASSID 456123 24e5c89e4eSSatish Balay 25efca3c55SSatish Balay PetscErrorCode PetscMallocAlign(size_t mem,int line,const char func[],const char file[],void **result) 26e5c89e4eSSatish Balay { 27f0ba7cfcSLisandro Dalcin if (!mem) { *result = NULL; return 0; } 28*fc2a7144SHong Zhang #if defined(PETSC_HAVE_MEMKIND) 29*fc2a7144SHong Zhang { 30*fc2a7144SHong Zhang int ierr; 31*fc2a7144SHong Zhang if (!currentmktype) ierr = memkind_posix_memalign(MEMKIND_DEFAULT,result,PETSC_MEMALIGN,mem); 32*fc2a7144SHong Zhang else ierr = memkind_posix_memalign(MEMKIND_HBW_PREFERRED,result,PETSC_MEMALIGN,mem); 33*fc2a7144SHong Zhang if (ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MEM,"Memory requested with memkind %.0f",(PetscLogDouble)mem); 34*fc2a7144SHong Zhang } 35*fc2a7144SHong Zhang #else 36e5c89e4eSSatish Balay # if defined(PETSC_HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8) 37e5c89e4eSSatish Balay *result = malloc(mem); 38e5c89e4eSSatish Balay # elif defined(PETSC_HAVE_MEMALIGN) 39e5c89e4eSSatish Balay *result = memalign(PETSC_MEMALIGN,mem); 40e5c89e4eSSatish Balay # else 41e5c89e4eSSatish Balay { 42e5c89e4eSSatish Balay /* 43e5c89e4eSSatish Balay malloc space for two extra chunks and shift ptr 1 + enough to get it PetscScalar aligned 44e5c89e4eSSatish Balay */ 45e5c89e4eSSatish Balay if (ptr) { 46f0ba7cfcSLisandro Dalcin int shift = (int)(((PETSC_UINTPTR_T) ptr) % PETSC_MEMALIGN); 47e5c89e4eSSatish Balay shift = (2*PETSC_MEMALIGN - shift)/sizeof(int); 480700a824SBarry Smith ptr[shift-1] = shift + SHIFT_CLASSID; 49e5c89e4eSSatish Balay ptr += shift; 50e5c89e4eSSatish Balay *result = (void*)ptr; 51e5c89e4eSSatish Balay } else { 52f0ba7cfcSLisandro Dalcin *result = NULL; 53e5c89e4eSSatish Balay } 54e5c89e4eSSatish Balay } 55e5c89e4eSSatish Balay # endif 56*fc2a7144SHong Zhang #endif 57f0ba7cfcSLisandro Dalcin if (!*result) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_MEM,PETSC_ERROR_INITIAL,"Memory requested %.0f",(PetscLogDouble)mem); 58e5c89e4eSSatish Balay return 0; 59e5c89e4eSSatish Balay } 60e5c89e4eSSatish Balay 61efca3c55SSatish Balay PetscErrorCode PetscFreeAlign(void *ptr,int line,const char func[],const char file[]) 62e5c89e4eSSatish Balay { 63f0ba7cfcSLisandro Dalcin if (!ptr) return 0; 64*fc2a7144SHong Zhang #if defined(PETSC_HAVE_MEMKIND) 65*fc2a7144SHong Zhang memkind_free(0,ptr); /* specify the kind to 0 so that memkind will look up for the right type */ 66*fc2a7144SHong Zhang #else 67e5c89e4eSSatish Balay # if (!(defined(PETSC_HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) && !defined(PETSC_HAVE_MEMALIGN)) 68f0ba7cfcSLisandro Dalcin { 69e5c89e4eSSatish Balay /* 70e5c89e4eSSatish Balay Previous int tells us how many ints the pointer has been shifted from 71e5c89e4eSSatish Balay the original address provided by the system malloc(). 72e5c89e4eSSatish Balay */ 73f0ba7cfcSLisandro Dalcin int shift = *(((int*)ptr)-1) - SHIFT_CLASSID; 74efca3c55SSatish Balay if (shift > PETSC_MEMALIGN-1) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_PLIB,PETSC_ERROR_INITIAL,"Likely memory corruption in heap"); 75efca3c55SSatish Balay if (shift < 0) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_PLIB,PETSC_ERROR_INITIAL,"Likely memory corruption in heap"); 76e5c89e4eSSatish Balay ptr = (void*)(((int*)ptr) - shift); 77e5c89e4eSSatish Balay } 78f0ba7cfcSLisandro Dalcin # endif 79e5c89e4eSSatish Balay 80e5c89e4eSSatish Balay # if defined(PETSC_HAVE_FREE_RETURN_INT) 81e5c89e4eSSatish Balay int err = free(ptr); 82efca3c55SSatish Balay if (err) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_PLIB,PETSC_ERROR_INITIAL,"System free returned error %d\n",err); 83e5c89e4eSSatish Balay # else 84e5c89e4eSSatish Balay free(ptr); 85e5c89e4eSSatish Balay # endif 86*fc2a7144SHong Zhang #endif 87e5c89e4eSSatish Balay return 0; 88e5c89e4eSSatish Balay } 89e5c89e4eSSatish Balay 903221ece2SMatthew G. Knepley PetscErrorCode PetscReallocAlign(size_t mem, int line, const char func[], const char file[], void **result) 913221ece2SMatthew G. Knepley { 92c22f1541SToby Isaac PetscErrorCode ierr; 93c22f1541SToby Isaac 94c22f1541SToby Isaac if (!mem) { 95c22f1541SToby Isaac ierr = PetscFreeAlign(*result, line, func, file); 96c22f1541SToby Isaac if (ierr) return ierr; 97c22f1541SToby Isaac *result = NULL; 98c22f1541SToby Isaac return 0; 99c22f1541SToby Isaac } 100*fc2a7144SHong Zhang #if defined(PETSC_HAVE_MEMKIND) 101*fc2a7144SHong Zhang if (!currentmktype) *result = memkind_realloc(MEMKIND_DEFAULT,*result,mem); 102*fc2a7144SHong Zhang else *result = memkind_realloc(MEMKIND_HBW_PREFERRED,*result,mem); 103*fc2a7144SHong Zhang #else 1043221ece2SMatthew G. Knepley # if (!(defined(PETSC_HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) && !defined(PETSC_HAVE_MEMALIGN)) 1053221ece2SMatthew G. Knepley { 1063221ece2SMatthew G. Knepley /* 1073221ece2SMatthew G. Knepley Previous int tells us how many ints the pointer has been shifted from 1083221ece2SMatthew G. Knepley the original address provided by the system malloc(). 1093221ece2SMatthew G. Knepley */ 1103221ece2SMatthew G. Knepley int shift = *(((int*)*result)-1) - SHIFT_CLASSID; 1113221ece2SMatthew G. Knepley if (shift > PETSC_MEMALIGN-1) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_PLIB,PETSC_ERROR_INITIAL,"Likely memory corruption in heap"); 1123221ece2SMatthew G. Knepley if (shift < 0) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_PLIB,PETSC_ERROR_INITIAL,"Likely memory corruption in heap"); 1133221ece2SMatthew G. Knepley *result = (void*)(((int*)*result) - shift); 1143221ece2SMatthew G. Knepley } 1153221ece2SMatthew G. Knepley # endif 1163221ece2SMatthew G. Knepley 117c22f1541SToby Isaac # if (defined(PETSC_HAVE_DOUBLE_ALIGN_MALLOC) && (PETSC_MEMALIGN == 8)) || defined(PETSC_HAVE_MEMALIGN) 1183221ece2SMatthew G. Knepley *result = realloc(*result, mem); 1193221ece2SMatthew G. Knepley # else 1203221ece2SMatthew G. Knepley { 1213221ece2SMatthew G. Knepley /* 1223221ece2SMatthew G. Knepley malloc space for two extra chunks and shift ptr 1 + enough to get it PetscScalar aligned 1233221ece2SMatthew G. Knepley */ 1243221ece2SMatthew G. Knepley int *ptr = (int *) realloc(*result, mem + 2*PETSC_MEMALIGN); 1253221ece2SMatthew G. Knepley if (ptr) { 1263221ece2SMatthew G. Knepley int shift = (int)(((PETSC_UINTPTR_T) ptr) % PETSC_MEMALIGN); 1273221ece2SMatthew G. Knepley shift = (2*PETSC_MEMALIGN - shift)/sizeof(int); 1283221ece2SMatthew G. Knepley ptr[shift-1] = shift + SHIFT_CLASSID; 1293221ece2SMatthew G. Knepley ptr += shift; 1303221ece2SMatthew G. Knepley *result = (void*)ptr; 1313221ece2SMatthew G. Knepley } else { 1323221ece2SMatthew G. Knepley *result = NULL; 1333221ece2SMatthew G. Knepley } 1343221ece2SMatthew G. Knepley } 1353221ece2SMatthew G. Knepley # endif 136*fc2a7144SHong Zhang #endif 1373221ece2SMatthew G. Knepley if (!*result) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_MEM,PETSC_ERROR_INITIAL,"Memory requested %.0f",(PetscLogDouble)mem); 138c22f1541SToby Isaac #if defined(PETSC_HAVE_MEMALIGN) 139c22f1541SToby Isaac /* There are no standard guarantees that realloc() maintains the alignment of memalign(), so I think we have to 140c22f1541SToby Isaac * realloc and, if the alignment is wrong, malloc/copy/free. */ 141c22f1541SToby Isaac if (((size_t) (*result)) % PETSC_MEMALIGN) { 142c22f1541SToby Isaac void *newResult; 143*fc2a7144SHong Zhang # if defined(PETSC_HAVE_MEMKIND) 144*fc2a7144SHong Zhang { 145*fc2a7144SHong Zhang int ierr; 146*fc2a7144SHong Zhang if (!currentmktype) ierr = memkind_posix_memalign(MEMKIND_DEFAULT,&newResult,PETSC_MEMALIGN,mem); 147*fc2a7144SHong Zhang else ierr = memkind_posix_memalign(MEMKIND_HBW_PREFERRED,&newResult,PETSC_MEMALIGN,mem); 148*fc2a7144SHong Zhang if (ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MEM,"Memory requested with memkind %.0f",(PetscLogDouble)mem); 149*fc2a7144SHong Zhang } 150*fc2a7144SHong Zhang # else 151c22f1541SToby Isaac newResult = memalign(PETSC_MEMALIGN,mem); 152*fc2a7144SHong Zhang # endif 153c22f1541SToby Isaac if (!newResult) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_MEM,PETSC_ERROR_INITIAL,"Memory requested %.0f",(PetscLogDouble)mem); 154c22f1541SToby Isaac ierr = PetscMemcpy(newResult,*result,mem); 155c22f1541SToby Isaac if (ierr) return ierr; 156c22f1541SToby Isaac # if defined(PETSC_HAVE_FREE_RETURN_INT) 157c22f1541SToby Isaac { 158c22f1541SToby Isaac int err = free(*result); 159c22f1541SToby Isaac if (err) return PetscError(PETSC_COMM_SELF,line,func,file,PETSC_ERR_PLIB,PETSC_ERROR_INITIAL,"System free returned error %d\n",err); 160c22f1541SToby Isaac } 161c22f1541SToby Isaac # else 162de1d6c17SHong Zhang # if defined(PETSC_HAVE_MEMKIND) 163de1d6c17SHong Zhang memkind_free(0,*result); 164de1d6c17SHong Zhang # else 165c22f1541SToby Isaac free(*result); 166c22f1541SToby Isaac # endif 167de1d6c17SHong Zhang # endif 168c22f1541SToby Isaac *result = newResult; 169c22f1541SToby Isaac } 170c22f1541SToby Isaac #endif 1713221ece2SMatthew G. Knepley return 0; 1723221ece2SMatthew G. Knepley } 1733221ece2SMatthew G. Knepley 174efca3c55SSatish Balay PetscErrorCode (*PetscTrMalloc)(size_t,int,const char[],const char[],void**) = PetscMallocAlign; 175efca3c55SSatish Balay PetscErrorCode (*PetscTrFree)(void*,int,const char[],const char[]) = PetscFreeAlign; 1763221ece2SMatthew G. Knepley PetscErrorCode (*PetscTrRealloc)(size_t,int,const char[],const char[],void**) = PetscReallocAlign; 177e5c89e4eSSatish Balay 178ace3abfcSBarry Smith PetscBool petscsetmallocvisited = PETSC_FALSE; 179e5c89e4eSSatish Balay 180e5c89e4eSSatish Balay /*@C 1811d1a0024SBarry Smith PetscMallocSet - Sets the routines used to do mallocs and frees. 182e5c89e4eSSatish Balay This routine MUST be called before PetscInitialize() and may be 183e5c89e4eSSatish Balay called only once. 184e5c89e4eSSatish Balay 185e5c89e4eSSatish Balay Not Collective 186e5c89e4eSSatish Balay 187e5c89e4eSSatish Balay Input Parameters: 188e5c89e4eSSatish Balay + malloc - the malloc routine 189e5c89e4eSSatish Balay - free - the free routine 190e5c89e4eSSatish Balay 191e5c89e4eSSatish Balay Level: developer 192e5c89e4eSSatish Balay 193e5c89e4eSSatish Balay Concepts: malloc 194e5c89e4eSSatish Balay Concepts: memory^allocation 195e5c89e4eSSatish Balay 196e5c89e4eSSatish Balay @*/ 197efca3c55SSatish Balay PetscErrorCode PetscMallocSet(PetscErrorCode (*imalloc)(size_t,int,const char[],const char[],void**), 198efca3c55SSatish Balay PetscErrorCode (*ifree)(void*,int,const char[],const char[])) 199e5c89e4eSSatish Balay { 200e5c89e4eSSatish Balay PetscFunctionBegin; 201e32f2f54SBarry Smith if (petscsetmallocvisited && (imalloc != PetscTrMalloc || ifree != PetscTrFree)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"cannot call multiple times"); 202e5c89e4eSSatish Balay PetscTrMalloc = imalloc; 203e5c89e4eSSatish Balay PetscTrFree = ifree; 204e5c89e4eSSatish Balay petscsetmallocvisited = PETSC_TRUE; 205e5c89e4eSSatish Balay PetscFunctionReturn(0); 206e5c89e4eSSatish Balay } 207e5c89e4eSSatish Balay 208e5c89e4eSSatish Balay /*@C 2091d1a0024SBarry Smith PetscMallocClear - Resets the routines used to do mallocs and frees to the 210e5c89e4eSSatish Balay defaults. 211e5c89e4eSSatish Balay 212e5c89e4eSSatish Balay Not Collective 213e5c89e4eSSatish Balay 214e5c89e4eSSatish Balay Level: developer 215e5c89e4eSSatish Balay 216e5c89e4eSSatish Balay Notes: 217e5c89e4eSSatish Balay In general one should never run a PETSc program with different malloc() and 218e5c89e4eSSatish Balay free() settings for different parts; this is because one NEVER wants to 219e5c89e4eSSatish Balay free() an address that was malloced by a different memory management system 220e5c89e4eSSatish Balay 221e5c89e4eSSatish Balay @*/ 2227087cfbeSBarry Smith PetscErrorCode PetscMallocClear(void) 223e5c89e4eSSatish Balay { 224e5c89e4eSSatish Balay PetscFunctionBegin; 225e5c89e4eSSatish Balay PetscTrMalloc = PetscMallocAlign; 226e5c89e4eSSatish Balay PetscTrFree = PetscFreeAlign; 227e5c89e4eSSatish Balay petscsetmallocvisited = PETSC_FALSE; 228e5c89e4eSSatish Balay PetscFunctionReturn(0); 229e5c89e4eSSatish Balay } 230b44d5720SBarry Smith 231b44d5720SBarry Smith PetscErrorCode PetscMemoryTrace(const char label[]) 232b44d5720SBarry Smith { 233b44d5720SBarry Smith PetscErrorCode ierr; 234b44d5720SBarry Smith PetscLogDouble mem,mal; 235b44d5720SBarry Smith static PetscLogDouble oldmem = 0,oldmal = 0; 236b44d5720SBarry Smith 237b44d5720SBarry Smith PetscFunctionBegin; 238b44d5720SBarry Smith ierr = PetscMemoryGetCurrentUsage(&mem);CHKERRQ(ierr); 239b44d5720SBarry Smith ierr = PetscMallocGetCurrentUsage(&mal);CHKERRQ(ierr); 240b44d5720SBarry Smith 241b44d5720SBarry Smith ierr = PetscPrintf(PETSC_COMM_WORLD,"%s High water %8.3f MB increase %8.3f MB Current %8.3f MB increase %8.3f MB\n",label,mem*1e-6,(mem - oldmem)*1e-6,mal*1e-6,(mal - oldmal)*1e-6);CHKERRQ(ierr); 242b44d5720SBarry Smith oldmem = mem; 243b44d5720SBarry Smith oldmal = mal; 244b44d5720SBarry Smith PetscFunctionReturn(0); 245b44d5720SBarry Smith } 24613850c04SHong Zhang 24750a41461SHong Zhang static PetscErrorCode (*PetscTrMallocOld)(size_t,int,const char[],const char[],void**) = PetscMallocAlign; 24850a41461SHong Zhang static PetscErrorCode (*PetscTrFreeOld)(void*,int,const char[],const char[]) = PetscFreeAlign; 249de1d6c17SHong Zhang 250de1d6c17SHong Zhang /*@C 251de1d6c17SHong Zhang PetscMallocSetDRAM - Set PetscMalloc to use DRAM. 252de1d6c17SHong Zhang If memkind is available, change the memkind type. Otherwise, switch the 253de1d6c17SHong Zhang current malloc and free routines to the PetscMallocAlign and 254de1d6c17SHong Zhang PetscFreeAlign (PETSc default). 255de1d6c17SHong Zhang 256de1d6c17SHong Zhang Not Collective 257de1d6c17SHong Zhang 258de1d6c17SHong Zhang Level: developer 259de1d6c17SHong Zhang 260de1d6c17SHong Zhang Notes: 261de1d6c17SHong Zhang This provides a way to do the allocation on DRAM temporarily. One 262de1d6c17SHong Zhang can switch back to the previous choice by calling PetscMallocReset(). 263de1d6c17SHong Zhang 264de1d6c17SHong Zhang .seealso: PetscMallocReset() 265de1d6c17SHong Zhang @*/ 26613850c04SHong Zhang PetscErrorCode PetscMallocSetDRAM(void) 26713850c04SHong Zhang { 26813850c04SHong Zhang PetscFunctionBegin; 269de1d6c17SHong Zhang if (PetscTrMalloc == PetscMallocAlign) { 270de1d6c17SHong Zhang #if defined(PETSC_HAVE_MEMKIND) 271de1d6c17SHong Zhang previousmktype = currentmktype; 272de1d6c17SHong Zhang currentmktype = PETSC_MK_DEFAULT; 273de1d6c17SHong Zhang #endif 274de1d6c17SHong Zhang } else { 27513850c04SHong Zhang /* Save the previous choice */ 27613850c04SHong Zhang PetscTrMallocOld = PetscTrMalloc; 27713850c04SHong Zhang PetscTrFreeOld = PetscTrFree; 27813850c04SHong Zhang PetscTrMalloc = PetscMallocAlign; 27913850c04SHong Zhang PetscTrFree = PetscFreeAlign; 280de1d6c17SHong Zhang } 28113850c04SHong Zhang PetscFunctionReturn(0); 28213850c04SHong Zhang } 28313850c04SHong Zhang 284de1d6c17SHong Zhang /*@C 285de1d6c17SHong Zhang PetscMallocResetDRAM - Reset the changes made by PetscMallocSetDRAM 286de1d6c17SHong Zhang 287de1d6c17SHong Zhang Not Collective 288de1d6c17SHong Zhang 289de1d6c17SHong Zhang Level: developer 290de1d6c17SHong Zhang 291de1d6c17SHong Zhang .seealso: PetscMallocSetDRAM() 292de1d6c17SHong Zhang @*/ 29313850c04SHong Zhang PetscErrorCode PetscMallocResetDRAM(void) 29413850c04SHong Zhang { 29513850c04SHong Zhang PetscFunctionBegin; 296de1d6c17SHong Zhang if (PetscTrMalloc == PetscMallocAlign) { 297de1d6c17SHong Zhang #if defined(PETSC_HAVE_MEMKIND) 298de1d6c17SHong Zhang currentmktype = previousmktype; 299de1d6c17SHong Zhang #endif 300de1d6c17SHong Zhang } else { 30113850c04SHong Zhang /* Reset to the previous choice */ 30213850c04SHong Zhang PetscTrMalloc = PetscTrMallocOld; 30313850c04SHong Zhang PetscTrFree = PetscTrFreeOld; 304de1d6c17SHong Zhang } 30513850c04SHong Zhang PetscFunctionReturn(0); 30613850c04SHong Zhang } 307