1 static const char help[] = "Tests PetscDeviceAllocate().\n\n"; 2 3 #include "petscdevicetestcommon.h" 4 5 #define DebugPrintf(comm, ...) PetscPrintf((comm), "[DEBUG OUTPUT] " __VA_ARGS__) 6 7 static PetscErrorCode IncrementSize(PetscRandom rand, PetscInt *value) { 8 PetscReal rval; 9 10 PetscFunctionBegin; 11 // set the interval such that *value += rval never goes below 0 or above 500 12 PetscCall(PetscRandomSetInterval(rand, -(*value), 500 - (*value))); 13 PetscCall(PetscRandomGetValueReal(rand, &rval)); 14 *value += (PetscInt)rval; 15 PetscCall(DebugPrintf(PetscObjectComm((PetscObject)rand), "n: %" PetscInt_FMT "\n", *value)); 16 PetscFunctionReturn(0); 17 } 18 19 static PetscErrorCode TestAllocate(PetscDeviceContext dctx, PetscRandom rand, PetscMemType mtype) { 20 PetscScalar *ptr, *tmp_ptr; 21 PetscInt n = 10; 22 23 PetscFunctionBegin; 24 if (PetscMemTypeDevice(mtype)) { 25 PetscDeviceType dtype; 26 27 PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype)); 28 // host device context cannot handle this 29 if (dtype == PETSC_DEVICE_HOST) PetscFunctionReturn(0); 30 } 31 // test basic allocation, deallocation 32 PetscCall(IncrementSize(rand, &n)); 33 PetscCall(PetscDeviceMalloc(dctx, mtype, n, &ptr)); 34 PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "PetscDeviceMalloc() return NULL pointer for %s allocation size %" PetscInt_FMT, PetscMemTypeToString(mtype), n); 35 // this ensures the host pointer is at least valid 36 if (PetscMemTypeHost(mtype)) { 37 for (PetscInt i = 0; i < n; ++i) ptr[i] = (PetscScalar)i; 38 } 39 PetscCall(PetscDeviceFree(dctx, ptr)); 40 41 // test alignment of various types 42 { 43 char *char_ptr; 44 short *short_ptr; 45 int *int_ptr; 46 double *double_ptr; 47 long int *long_int_ptr; 48 49 PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &char_ptr)); 50 PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &short_ptr)); 51 PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &int_ptr)); 52 PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &double_ptr)); 53 PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &long_int_ptr)); 54 55 // if an error occurs here, it means the alignment system is broken! 56 PetscCall(PetscDeviceFree(dctx, char_ptr)); 57 PetscCall(PetscDeviceFree(dctx, short_ptr)); 58 PetscCall(PetscDeviceFree(dctx, int_ptr)); 59 PetscCall(PetscDeviceFree(dctx, double_ptr)); 60 PetscCall(PetscDeviceFree(dctx, long_int_ptr)); 61 } 62 63 // test that calloc() produces cleared memory 64 PetscCall(IncrementSize(rand, &n)); 65 PetscCall(PetscDeviceCalloc(dctx, mtype, n, &ptr)); 66 PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "PetscDeviceCalloc() returned NULL pointer for %s allocation size %" PetscInt_FMT, PetscMemTypeToString(mtype), n); 67 if (PetscMemTypeHost(mtype)) { 68 tmp_ptr = ptr; 69 } else { 70 PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr)); 71 PetscCall(PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n)); 72 } 73 PetscCall(PetscDeviceContextSynchronize(dctx)); 74 for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceCalloc() returned memory that was not cleared, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i])); 75 if (tmp_ptr == ptr) { 76 tmp_ptr = NULL; 77 } else { 78 PetscCall(PetscDeviceFree(dctx, tmp_ptr)); 79 } 80 PetscCall(PetscDeviceFree(dctx, ptr)); 81 82 // test that devicearrayzero produces cleared memory 83 PetscCall(IncrementSize(rand, &n)); 84 PetscCall(PetscDeviceMalloc(dctx, mtype, n, &ptr)); 85 PetscCall(PetscDeviceArrayZero(dctx, ptr, n)); 86 PetscCall(PetscMalloc1(n, &tmp_ptr)); 87 PetscCall(PetscDeviceRegisterMemory(tmp_ptr, PETSC_MEMTYPE_HOST, n * sizeof(*tmp_ptr))); 88 for (PetscInt i = 0; i < n; ++i) tmp_ptr[i] = (PetscScalar)i; 89 PetscCall(PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n)); 90 PetscCall(PetscDeviceContextSynchronize(dctx)); 91 for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayZero() did not not clear memory, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i])); 92 PetscCall(PetscDeviceFree(dctx, tmp_ptr)); 93 PetscCall(PetscDeviceFree(dctx, ptr)); 94 PetscFunctionReturn(0); 95 } 96 97 static PetscErrorCode TestAsyncCoherence(PetscDeviceContext dctx, PetscRandom rand) { 98 const PetscInt nsub = 2; 99 const PetscInt n = 1024; 100 PetscScalar *ptr, *tmp_ptr; 101 PetscDeviceType dtype; 102 PetscDeviceContext *sub; 103 104 PetscFunctionBegin; 105 PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype)); 106 // ensure the streams are nonblocking 107 PetscCall(PetscDeviceContextForkWithStreamType(dctx, PETSC_STREAM_GLOBAL_NONBLOCKING, nsub, &sub)); 108 // do a warmup to ensure each context acquires any necessary data structures 109 for (PetscInt i = 0; i < nsub; ++i) { 110 PetscCall(PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_HOST, n, &ptr)); 111 PetscCall(PetscDeviceFree(sub[i], ptr)); 112 if (dtype != PETSC_DEVICE_HOST) { 113 PetscCall(PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_DEVICE, n, &ptr)); 114 PetscCall(PetscDeviceFree(sub[i], ptr)); 115 } 116 } 117 118 // allocate on one 119 PetscCall(PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr)); 120 // free on the other 121 PetscCall(PetscDeviceFree(sub[1], ptr)); 122 123 // allocate on one 124 PetscCall(PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr)); 125 // zero on the other 126 PetscCall(PetscDeviceArrayZero(sub[1], ptr, n)); 127 PetscCall(PetscDeviceContextSynchronize(sub[1])); 128 for (PetscInt i = 0; i < n; ++i) { 129 for (PetscInt i = 0; i < n; ++i) PetscCheck(ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayZero() was not properly serialized, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(ptr[i])); 130 } 131 PetscCall(PetscDeviceFree(sub[1], ptr)); 132 133 // test the transfers are serialized 134 if (dtype != PETSC_DEVICE_HOST) { 135 PetscCall(PetscDeviceCalloc(dctx, PETSC_MEMTYPE_DEVICE, n, &ptr)); 136 PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr)); 137 PetscCall(PetscDeviceArrayCopy(sub[0], tmp_ptr, ptr, n)); 138 PetscCall(PetscDeviceContextSynchronize(sub[0])); 139 for (PetscInt i = 0; i < n; ++i) { 140 for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayCopt() was not properly serialized, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i])); 141 } 142 PetscCall(PetscDeviceFree(sub[1], ptr)); 143 } 144 145 PetscCall(PetscDeviceContextJoin(dctx, nsub, PETSC_DEVICE_CONTEXT_JOIN_DESTROY, &sub)); 146 PetscFunctionReturn(0); 147 } 148 149 int main(int argc, char *argv[]) { 150 PetscDeviceContext dctx; 151 PetscRandom rand; 152 153 PetscFunctionBeginUser; 154 PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 155 156 // A vile hack. The -info output is used to test correctness in this test which prints -- 157 // among other things -- the PetscObjectId of the PetscDevicContext and the allocated memory. 158 // 159 // Due to device and host creating slightly different number of objects on startup there will 160 // be a mismatch in the ID's. So for the tests involving the host we sit here creating 161 // PetscContainers (and incrementing the global PetscObjectId counter) until it reaches some 162 // arbitrarily high number to ensure that our first PetscDeviceContext has the same ID across 163 // systems. 164 if (PETSC_DEVICE_DEFAULT() == PETSC_DEVICE_HOST) { 165 PetscObjectId id, prev_id = 0; 166 167 do { 168 PetscContainer c; 169 170 PetscCall(PetscContainerCreate(PETSC_COMM_WORLD, &c)); 171 PetscCall(PetscObjectGetId((PetscObject)c, &id)); 172 // sanity check, in case PetscContainer ever stops being a PetscObject 173 PetscCheck(id > prev_id, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscObjectIds are not increasing for successively created PetscContainers! current: %" PetscInt64_FMT ", previous: %" PetscInt64_FMT, id, prev_id); 174 prev_id = id; 175 PetscCall(PetscContainerDestroy(&c)); 176 } while (id < 10); 177 } 178 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 179 180 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand)); 181 // this seed just so happens to keep the allocation size increasing 182 PetscCall(PetscRandomSetSeed(rand, 123)); 183 PetscCall(PetscRandomSeed(rand)); 184 PetscCall(PetscRandomSetFromOptions(rand)); 185 186 PetscCall(TestAllocate(dctx, rand, PETSC_MEMTYPE_HOST)); 187 PetscCall(TestAllocate(dctx, rand, PETSC_MEMTYPE_DEVICE)); 188 PetscCall(TestAsyncCoherence(dctx, rand)); 189 190 PetscCall(PetscRandomDestroy(&rand)); 191 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "EXIT_SUCCESS\n")); 192 PetscCall(PetscFinalize()); 193 return 0; 194 } 195 196 /*TEST 197 198 build: 199 requires: defined(PETSC_HAVE_CXX) 200 201 testset: 202 requires: defined(PETSC_USE_INFO), defined(PETSC_USE_DEBUG) 203 args: -info :device 204 suffix: with_info 205 test: 206 requires: !device 207 suffix: host_no_device 208 test: 209 requires: device 210 args: -default_device_type host 211 filter: sed -e 's/host/IMPL/g' -e 's/cuda/IMPL/g' -e 's/hip/IMPL/g' -e 's/sycl/IMPL/g' 212 suffix: host_with_device 213 test: 214 requires: cuda 215 args: -default_device_type cuda 216 suffix: cuda 217 test: 218 requires: hip 219 args: -default_device_type hip 220 suffix: hip 221 test: 222 requires: sycl 223 args: -default_device_type sycl 224 suffix: sycl 225 226 testset: 227 output_file: ./output/ExitSuccess.out 228 requires: !defined(PETSC_USE_DEBUG) 229 filter: grep -v "\[DEBUG OUTPUT\]" 230 suffix: no_info 231 test: 232 requires: !device 233 suffix: host_no_device 234 test: 235 requires: device 236 args: -default_device_type host 237 suffix: host_with_device 238 test: 239 requires: cuda 240 args: -default_device_type cuda 241 suffix: cuda 242 test: 243 requires: hip 244 args: -default_device_type hip 245 suffix: hip 246 test: 247 requires: sycl 248 args: -default_device_type sycl 249 suffix: sycl 250 TEST*/ 251