1 static const char help[] = "Tests PetscDeviceAllocate().\n\n"; 2 3 #include "petscdevicetestcommon.h" 4 5 #define DebugPrintf(comm, ...) PetscPrintf((comm), "[DEBUG OUTPUT] " __VA_ARGS__) 6 7 static PetscErrorCode IncrementSize(PetscRandom rand, PetscInt *value) 8 { 9 PetscReal rval; 10 11 PetscFunctionBegin; 12 // set the interval such that *value += rval never goes below 0 or above 500 13 PetscCall(PetscRandomSetInterval(rand, -(*value), 500 - (*value))); 14 PetscCall(PetscRandomGetValueReal(rand, &rval)); 15 *value += (PetscInt)rval; 16 PetscCall(DebugPrintf(PetscObjectComm((PetscObject)rand), "n: %" PetscInt_FMT "\n", *value)); 17 PetscFunctionReturn(PETSC_SUCCESS); 18 } 19 20 static PetscErrorCode TestAllocate(PetscDeviceContext dctx, PetscRandom rand, PetscMemType mtype) 21 { 22 PetscScalar *ptr, *tmp_ptr; 23 PetscInt n = 10; 24 25 PetscFunctionBegin; 26 if (PetscMemTypeDevice(mtype)) { 27 PetscDeviceType dtype; 28 29 PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype)); 30 // host device context cannot handle this 31 if (dtype == PETSC_DEVICE_HOST) PetscFunctionReturn(PETSC_SUCCESS); 32 } 33 // test basic allocation, deallocation 34 PetscCall(IncrementSize(rand, &n)); 35 PetscCall(PetscDeviceMalloc(dctx, mtype, n, &ptr)); 36 PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "PetscDeviceMalloc() return NULL pointer for %s allocation size %" PetscInt_FMT, PetscMemTypeToString(mtype), n); 37 // this ensures the host pointer is at least valid 38 if (PetscMemTypeHost(mtype)) { 39 for (PetscInt i = 0; i < n; ++i) ptr[i] = (PetscScalar)i; 40 } 41 PetscCall(PetscDeviceFree(dctx, ptr)); 42 43 // test alignment of various types 44 { 45 char *char_ptr; 46 short *short_ptr; 47 int *int_ptr; 48 double *double_ptr; 49 long int *long_int_ptr; 50 51 PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &char_ptr)); 52 PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &short_ptr)); 53 PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &int_ptr)); 54 PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &double_ptr)); 55 PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &long_int_ptr)); 56 57 // if an error occurs here, it means the alignment system is broken! 58 PetscCall(PetscDeviceFree(dctx, char_ptr)); 59 PetscCall(PetscDeviceFree(dctx, short_ptr)); 60 PetscCall(PetscDeviceFree(dctx, int_ptr)); 61 PetscCall(PetscDeviceFree(dctx, double_ptr)); 62 PetscCall(PetscDeviceFree(dctx, long_int_ptr)); 63 } 64 65 // test that calloc() produces cleared memory 66 PetscCall(IncrementSize(rand, &n)); 67 PetscCall(PetscDeviceCalloc(dctx, mtype, n, &ptr)); 68 PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "PetscDeviceCalloc() returned NULL pointer for %s allocation size %" PetscInt_FMT, PetscMemTypeToString(mtype), n); 69 if (PetscMemTypeHost(mtype)) { 70 tmp_ptr = ptr; 71 } else { 72 PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr)); 73 PetscCall(PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n)); 74 } 75 PetscCall(PetscDeviceContextSynchronize(dctx)); 76 for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceCalloc() returned memory that was not cleared, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i])); 77 if (tmp_ptr == ptr) { 78 tmp_ptr = NULL; 79 } else { 80 PetscCall(PetscDeviceFree(dctx, tmp_ptr)); 81 } 82 PetscCall(PetscDeviceFree(dctx, ptr)); 83 84 // test that devicearrayzero produces cleared memory 85 PetscCall(IncrementSize(rand, &n)); 86 PetscCall(PetscDeviceMalloc(dctx, mtype, n, &ptr)); 87 PetscCall(PetscDeviceArrayZero(dctx, ptr, n)); 88 PetscCall(PetscMalloc1(n, &tmp_ptr)); 89 PetscCall(PetscDeviceRegisterMemory(tmp_ptr, PETSC_MEMTYPE_HOST, n * sizeof(*tmp_ptr))); 90 for (PetscInt i = 0; i < n; ++i) tmp_ptr[i] = (PetscScalar)i; 91 PetscCall(PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n)); 92 PetscCall(PetscDeviceContextSynchronize(dctx)); 93 for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayZero() did not clear memory, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i])); 94 PetscCall(PetscDeviceFree(dctx, tmp_ptr)); 95 PetscCall(PetscDeviceFree(dctx, ptr)); 96 PetscFunctionReturn(PETSC_SUCCESS); 97 } 98 99 static PetscErrorCode TestAsyncCoherence(PetscDeviceContext dctx, PetscRandom rand) 100 { 101 const PetscInt nsub = 2; 102 const PetscInt n = 1024; 103 PetscScalar *ptr, *tmp_ptr; 104 PetscDeviceType dtype; 105 PetscDeviceContext *sub; 106 107 PetscFunctionBegin; 108 PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype)); 109 // ensure the streams are nonblocking 110 PetscCall(PetscDeviceContextForkWithStreamType(dctx, PETSC_STREAM_NONBLOCKING, nsub, &sub)); 111 // do a warmup to ensure each context acquires any necessary data structures 112 for (PetscInt i = 0; i < nsub; ++i) { 113 PetscCall(PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_HOST, n, &ptr)); 114 PetscCall(PetscDeviceFree(sub[i], ptr)); 115 if (dtype != PETSC_DEVICE_HOST) { 116 PetscCall(PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_DEVICE, n, &ptr)); 117 PetscCall(PetscDeviceFree(sub[i], ptr)); 118 } 119 } 120 121 // allocate on one 122 PetscCall(PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr)); 123 // free on the other 124 PetscCall(PetscDeviceFree(sub[1], ptr)); 125 126 // allocate on one 127 PetscCall(PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr)); 128 // zero on the other 129 PetscCall(PetscDeviceArrayZero(sub[1], ptr, n)); 130 PetscCall(PetscDeviceContextSynchronize(sub[1])); 131 for (PetscInt i = 0; i < n; ++i) { 132 for (PetscInt i = 0; i < n; ++i) PetscCheck(ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayZero() was not properly serialized, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(ptr[i])); 133 } 134 PetscCall(PetscDeviceFree(sub[1], ptr)); 135 136 // test the transfers are serialized 137 if (dtype != PETSC_DEVICE_HOST) { 138 PetscCall(PetscDeviceCalloc(dctx, PETSC_MEMTYPE_DEVICE, n, &ptr)); 139 PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr)); 140 PetscCall(PetscDeviceArrayCopy(sub[0], tmp_ptr, ptr, n)); 141 PetscCall(PetscDeviceContextSynchronize(sub[0])); 142 for (PetscInt i = 0; i < n; ++i) { 143 for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayCopt() was not properly serialized, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i])); 144 } 145 PetscCall(PetscDeviceFree(sub[1], ptr)); 146 } 147 148 PetscCall(PetscDeviceContextJoin(dctx, nsub, PETSC_DEVICE_CONTEXT_JOIN_DESTROY, &sub)); 149 PetscFunctionReturn(PETSC_SUCCESS); 150 } 151 152 int main(int argc, char *argv[]) 153 { 154 PetscDeviceContext dctx; 155 PetscRandom rand; 156 157 PetscFunctionBeginUser; 158 PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 159 160 // A vile hack. The -info output is used to test correctness in this test which prints -- 161 // among other things -- the PetscObjectId of the PetscDevicContext and the allocated memory. 162 // 163 // Due to device and host creating slightly different number of objects on startup there will 164 // be a mismatch in the ID's. So for the tests involving the host we sit here creating 165 // PetscContainers (and incrementing the global PetscObjectId counter) until it reaches some 166 // arbitrarily high number to ensure that our first PetscDeviceContext has the same ID across 167 // systems. 168 { 169 PetscObjectId prev_id = 0; 170 171 do { 172 PetscContainer c; 173 PetscObjectId id; 174 175 PetscCall(PetscContainerCreate(PETSC_COMM_WORLD, &c)); 176 PetscCall(PetscObjectGetId((PetscObject)c, &id)); 177 // sanity check, in case PetscContainer ever stops being a PetscObject 178 PetscCheck(id > prev_id, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscObjectIds are not increasing for successively created PetscContainers! current: %" PetscInt64_FMT ", previous: %" PetscInt64_FMT, id, prev_id); 179 prev_id = id; 180 PetscCall(PetscContainerDestroy(&c)); 181 } while (prev_id < 50); 182 } 183 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 184 185 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand)); 186 // this seed just so happens to keep the allocation size increasing 187 PetscCall(PetscRandomSetSeed(rand, 123)); 188 PetscCall(PetscRandomSeed(rand)); 189 PetscCall(PetscRandomSetFromOptions(rand)); 190 191 PetscCall(TestAllocate(dctx, rand, PETSC_MEMTYPE_HOST)); 192 PetscCall(TestAllocate(dctx, rand, PETSC_MEMTYPE_DEVICE)); 193 PetscCall(TestAsyncCoherence(dctx, rand)); 194 195 PetscCall(PetscRandomDestroy(&rand)); 196 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "EXIT_SUCCESS\n")); 197 PetscCall(PetscFinalize()); 198 return 0; 199 } 200 201 /*TEST 202 203 testset: 204 requires: defined(PETSC_USE_INFO) defined(PETSC_USE_DEBUG) cxx 205 args: -info :device 206 suffix: with_info 207 test: 208 requires: !device 209 suffix: host_no_device 210 test: 211 requires: device 212 args: -default_device_type host 213 filter: sed -e 's/host/IMPL/g' -e 's/cuda/IMPL/g' -e 's/hip/IMPL/g' -e 's/sycl/IMPL/g' 214 suffix: host_with_device 215 test: 216 requires: cuda 217 args: -default_device_type cuda 218 suffix: cuda 219 test: 220 requires: hip 221 args: -default_device_type hip 222 suffix: hip 223 test: 224 requires: sycl 225 args: -default_device_type sycl 226 suffix: sycl 227 228 testset: 229 output_file: output/ExitSuccess.out 230 requires: !defined(PETSC_USE_DEBUG) 231 filter: grep -v "\[DEBUG OUTPUT\]" 232 suffix: no_info 233 test: 234 requires: !device 235 suffix: host_no_device 236 test: 237 requires: device 238 args: -default_device_type host 239 suffix: host_with_device 240 test: 241 requires: cuda 242 args: -default_device_type cuda 243 suffix: cuda 244 test: 245 requires: hip 246 args: -default_device_type hip 247 suffix: hip 248 test: 249 requires: sycl 250 args: -default_device_type sycl 251 suffix: sycl 252 253 test: 254 requires: !cxx 255 output_file: output/ExitSuccess.out 256 filter: grep -v "\[DEBUG OUTPUT\]" 257 suffix: no_cxx 258 259 TEST*/ 260