1 static const char help[] = "Tests PetscDeviceAllocate().\n\n"; 2 3 #include "petscdevicetestcommon.h" 4 5 #define DebugPrintf(comm, ...) PetscPrintf((comm), "[DEBUG OUTPUT] " __VA_ARGS__) 6 7 static PetscErrorCode IncrementSize(PetscRandom rand, PetscInt *value) { 8 PetscReal rval; 9 10 PetscFunctionBegin; 11 // set the interval such that *value += rval never goes below 0 or above 500 12 PetscCall(PetscRandomSetInterval(rand, -(*value), 500 - (*value))); 13 PetscCall(PetscRandomGetValueReal(rand, &rval)); 14 *value += (PetscInt)rval; 15 PetscCall(DebugPrintf(PetscObjectComm((PetscObject)rand), "n: %" PetscInt_FMT "\n", *value)); 16 PetscFunctionReturn(0); 17 } 18 19 static PetscErrorCode TestAllocate(PetscDeviceContext dctx, PetscRandom rand, PetscMemType mtype) { 20 PetscScalar *ptr, *tmp_ptr; 21 PetscInt n = 10; 22 23 PetscFunctionBegin; 24 if (PetscMemTypeDevice(mtype)) { 25 PetscDeviceType dtype; 26 27 PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype)); 28 // host device context cannot handle this 29 if (dtype == PETSC_DEVICE_HOST) PetscFunctionReturn(0); 30 } 31 // test basic allocation, deallocation 32 PetscCall(IncrementSize(rand, &n)); 33 PetscCall(PetscDeviceMalloc(dctx, mtype, n, &ptr)); 34 PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "PetscDeviceMalloc() return NULL pointer for %s allocation size %" PetscInt_FMT, PetscMemTypeToString(mtype), n); 35 if (PetscMemTypeHost(mtype)) { 36 for (PetscInt i = 0; i < n; ++i) ptr[i] = (PetscScalar)i; 37 } 38 PetscCall(PetscDeviceFree(dctx, ptr)); 39 40 // test that calloc() produces cleared memory 41 PetscCall(IncrementSize(rand, &n)); 42 PetscCall(PetscDeviceCalloc(dctx, mtype, n, &ptr)); 43 PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "PetscDeviceCalloc() return NULL pointer for %s allocation size %" PetscInt_FMT, PetscMemTypeToString(mtype), n); 44 if (PetscMemTypeHost(mtype)) { 45 tmp_ptr = ptr; 46 } else { 47 PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr)); 48 PetscCall(PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n)); 49 PetscCall(PetscDeviceContextSynchronize(dctx)); 50 } 51 for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceCalloc() returned memory that was not cleared, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i])); 52 if (tmp_ptr == ptr) { 53 tmp_ptr = NULL; 54 } else { 55 PetscCall(PetscDeviceFree(dctx, tmp_ptr)); 56 } 57 PetscCall(PetscDeviceFree(dctx, ptr)); 58 59 // test that devicearrayzero produces cleared memory 60 PetscCall(IncrementSize(rand, &n)); 61 PetscCall(PetscDeviceMalloc(dctx, mtype, n, &ptr)); 62 PetscCall(PetscDeviceArrayZero(dctx, ptr, n)); 63 PetscCall(PetscMalloc1(n, &tmp_ptr)); 64 PetscCall(PetscDeviceRegisterMemory(tmp_ptr, PETSC_MEMTYPE_HOST, n * sizeof(*tmp_ptr))); 65 for (PetscInt i = 0; i < n; ++i) tmp_ptr[i] = (PetscScalar)i; 66 PetscCall(PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n)); 67 PetscCall(PetscDeviceContextSynchronize(dctx)); 68 for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayZero() did not not clear memory, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i])); 69 PetscCall(PetscDeviceFree(dctx, tmp_ptr)); 70 PetscCall(PetscDeviceFree(dctx, ptr)); 71 PetscFunctionReturn(0); 72 } 73 74 static PetscErrorCode TestAsyncCoherence(PetscDeviceContext dctx, PetscRandom rand) { 75 const PetscInt nsub = 2; 76 const PetscInt n = 1024; 77 PetscScalar *ptr, *tmp_ptr; 78 PetscDeviceType dtype; 79 PetscDeviceContext *sub; 80 81 PetscFunctionBegin; 82 PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype)); 83 // ensure the streams are nonblocking 84 PetscCall(PetscDeviceContextForkWithStreamType(dctx, PETSC_STREAM_GLOBAL_NONBLOCKING, nsub, &sub)); 85 // do a warmup to ensure each context acquires any necessary data structures 86 for (PetscInt i = 0; i < nsub; ++i) { 87 PetscCall(PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_HOST, n, &ptr)); 88 PetscCall(PetscDeviceFree(sub[i], ptr)); 89 if (dtype != PETSC_DEVICE_HOST) { 90 PetscCall(PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_DEVICE, n, &ptr)); 91 PetscCall(PetscDeviceFree(sub[i], ptr)); 92 } 93 } 94 95 // allocate on one 96 PetscCall(PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr)); 97 // free on the other 98 PetscCall(PetscDeviceFree(sub[1], ptr)); 99 100 // allocate on one 101 PetscCall(PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr)); 102 // zero on the other 103 PetscCall(PetscDeviceArrayZero(sub[1], ptr, n)); 104 PetscCall(PetscDeviceContextSynchronize(sub[1])); 105 for (PetscInt i = 0; i < n; ++i) { 106 for (PetscInt i = 0; i < n; ++i) PetscCheck(ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayZero() was not properly serialized, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(ptr[i])); 107 } 108 PetscCall(PetscDeviceFree(sub[1], ptr)); 109 110 // test the transfers are serialized 111 if (dtype != PETSC_DEVICE_HOST) { 112 PetscCall(PetscDeviceCalloc(dctx, PETSC_MEMTYPE_DEVICE, n, &ptr)); 113 PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr)); 114 PetscCall(PetscDeviceArrayCopy(sub[0], tmp_ptr, ptr, n)); 115 PetscCall(PetscDeviceContextSynchronize(sub[0])); 116 for (PetscInt i = 0; i < n; ++i) { 117 for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayCopt() was not properly serialized, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i])); 118 } 119 PetscCall(PetscDeviceFree(sub[1], ptr)); 120 } 121 122 PetscCall(PetscDeviceContextJoin(dctx, nsub, PETSC_DEVICE_CONTEXT_JOIN_DESTROY, &sub)); 123 PetscFunctionReturn(0); 124 } 125 126 int main(int argc, char *argv[]) { 127 PetscDeviceContext dctx; 128 PetscRandom rand; 129 130 PetscFunctionBeginUser; 131 PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 132 133 // A vile hack. The -info output is used to test correctness in this test which prints -- 134 // among other things -- the PetscObjectId of the PetscDevicContext and the allocated memory. 135 // 136 // Due to device and host creating slightly different number of objects on startup there will 137 // be a mismatch in the ID's. So for the tests involving the host we sit here creating 138 // PetscContainers (and incrementing the global PetscObjectId counter) until it reaches some 139 // arbitrarily high number to ensure that our first PetscDeviceContext has the same ID across 140 // systems. 141 if (PETSC_DEVICE_DEFAULT() == PETSC_DEVICE_HOST) { 142 PetscObjectId id, prev_id = 0; 143 144 do { 145 PetscContainer c; 146 147 PetscCall(PetscContainerCreate(PETSC_COMM_WORLD, &c)); 148 PetscCall(PetscObjectGetId((PetscObject)c, &id)); 149 // sanity check, in case PetscContainer ever stops being a PetscObject 150 PetscCheck(id > prev_id, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscObjectIds are not increasing for successively created PetscContainers! current: %" PetscInt64_FMT ", previous: %" PetscInt64_FMT, id, prev_id); 151 prev_id = id; 152 PetscCall(PetscContainerDestroy(&c)); 153 } while (id < 10); 154 } 155 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 156 157 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand)); 158 // this seed just so happens to keep the allocation size increasing 159 PetscCall(PetscRandomSetSeed(rand, 123)); 160 PetscCall(PetscRandomSeed(rand)); 161 PetscCall(PetscRandomSetFromOptions(rand)); 162 163 PetscCall(TestAllocate(dctx, rand, PETSC_MEMTYPE_HOST)); 164 PetscCall(TestAllocate(dctx, rand, PETSC_MEMTYPE_DEVICE)); 165 PetscCall(TestAsyncCoherence(dctx, rand)); 166 167 PetscCall(PetscRandomDestroy(&rand)); 168 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "EXIT_SUCCESS\n")); 169 PetscCall(PetscFinalize()); 170 return 0; 171 } 172 173 /*TEST 174 175 build: 176 requires: defined(PETSC_HAVE_CXX) 177 178 testset: 179 requires: defined(PETSC_USE_INFO), defined(PETSC_USE_DEBUG) 180 args: -info :device 181 suffix: with_info 182 test: 183 requires: !device 184 suffix: host_no_device 185 test: 186 requires: device 187 args: -default_device_type host 188 filter: sed -e 's/host/IMPL/g' -e 's/cuda/IMPL/g' -e 's/hip/IMPL/g' -e 's/sycl/IMPL/g' 189 suffix: host_with_device 190 test: 191 requires: cuda 192 args: -default_device_type cuda 193 suffix: cuda 194 test: 195 requires: hip 196 args: -default_device_type hip 197 suffix: hip 198 test: 199 requires: sycl 200 args: -default_device_type sycl 201 suffix: sycl 202 203 testset: 204 output_file: ./output/ExitSuccess.out 205 requires: !defined(PETSC_USE_DEBUG) 206 filter: grep -v "\[DEBUG OUTPUT\]" 207 suffix: no_info 208 test: 209 requires: !device 210 suffix: host_no_device 211 test: 212 requires: device 213 args: -default_device_type host 214 suffix: host_with_device 215 test: 216 requires: cuda 217 args: -default_device_type cuda 218 suffix: cuda 219 test: 220 requires: hip 221 args: -default_device_type hip 222 suffix: hip 223 test: 224 requires: sycl 225 args: -default_device_type sycl 226 suffix: sycl 227 TEST*/ 228