1 static const char help[] = "Tests PetscDeviceAllocate().\n\n";
2
3 #include "petscdevicetestcommon.h"
4
5 #define DebugPrintf(comm, ...) PetscPrintf((comm), "[DEBUG OUTPUT] " __VA_ARGS__)
6
IncrementSize(PetscRandom rand,PetscInt * value)7 static PetscErrorCode IncrementSize(PetscRandom rand, PetscInt *value)
8 {
9 PetscReal rval;
10
11 PetscFunctionBegin;
12 // set the interval such that *value += rval never goes below 0 or above 500
13 PetscCall(PetscRandomSetInterval(rand, -(*value), 500 - (*value)));
14 PetscCall(PetscRandomGetValueReal(rand, &rval));
15 *value += (PetscInt)rval;
16 PetscCall(DebugPrintf(PetscObjectComm((PetscObject)rand), "n: %" PetscInt_FMT "\n", *value));
17 PetscFunctionReturn(PETSC_SUCCESS);
18 }
19
TestAllocate(PetscDeviceContext dctx,PetscRandom rand,PetscMemType mtype)20 static PetscErrorCode TestAllocate(PetscDeviceContext dctx, PetscRandom rand, PetscMemType mtype)
21 {
22 PetscScalar *ptr, *tmp_ptr;
23 PetscInt n = 10;
24
25 PetscFunctionBegin;
26 if (PetscMemTypeDevice(mtype)) {
27 PetscDeviceType dtype;
28
29 PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype));
30 // host device context cannot handle this
31 if (dtype == PETSC_DEVICE_HOST) PetscFunctionReturn(PETSC_SUCCESS);
32 }
33 // test basic allocation, deallocation
34 PetscCall(IncrementSize(rand, &n));
35 PetscCall(PetscDeviceMalloc(dctx, mtype, n, &ptr));
36 PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "PetscDeviceMalloc() return NULL pointer for %s allocation size %" PetscInt_FMT, PetscMemTypeToString(mtype), n);
37 // this ensures the host pointer is at least valid
38 if (PetscMemTypeHost(mtype)) {
39 for (PetscInt i = 0; i < n; ++i) ptr[i] = (PetscScalar)i;
40 }
41 PetscCall(PetscDeviceFree(dctx, ptr));
42
43 // test alignment of various types
44 {
45 char *char_ptr;
46 short *short_ptr;
47 int *int_ptr;
48 double *double_ptr;
49 long int *long_int_ptr;
50
51 PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &char_ptr));
52 PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &short_ptr));
53 PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &int_ptr));
54 PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &double_ptr));
55 PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &long_int_ptr));
56
57 // if an error occurs here, it means the alignment system is broken!
58 PetscCall(PetscDeviceFree(dctx, char_ptr));
59 PetscCall(PetscDeviceFree(dctx, short_ptr));
60 PetscCall(PetscDeviceFree(dctx, int_ptr));
61 PetscCall(PetscDeviceFree(dctx, double_ptr));
62 PetscCall(PetscDeviceFree(dctx, long_int_ptr));
63 }
64
65 // test that calloc() produces cleared memory
66 PetscCall(IncrementSize(rand, &n));
67 PetscCall(PetscDeviceCalloc(dctx, mtype, n, &ptr));
68 PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "PetscDeviceCalloc() returned NULL pointer for %s allocation size %" PetscInt_FMT, PetscMemTypeToString(mtype), n);
69 if (PetscMemTypeHost(mtype)) {
70 tmp_ptr = ptr;
71 } else {
72 PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr));
73 PetscCall(PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n));
74 }
75 PetscCall(PetscDeviceContextSynchronize(dctx));
76 for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceCalloc() returned memory that was not cleared, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i]));
77 if (tmp_ptr == ptr) {
78 tmp_ptr = NULL;
79 } else {
80 PetscCall(PetscDeviceFree(dctx, tmp_ptr));
81 }
82 PetscCall(PetscDeviceFree(dctx, ptr));
83
84 // test that devicearrayzero produces cleared memory
85 PetscCall(IncrementSize(rand, &n));
86 PetscCall(PetscDeviceMalloc(dctx, mtype, n, &ptr));
87 PetscCall(PetscDeviceArrayZero(dctx, ptr, n));
88 PetscCall(PetscMalloc1(n, &tmp_ptr));
89 PetscCall(PetscDeviceRegisterMemory(tmp_ptr, PETSC_MEMTYPE_HOST, n * sizeof(*tmp_ptr)));
90 for (PetscInt i = 0; i < n; ++i) tmp_ptr[i] = (PetscScalar)i;
91 PetscCall(PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n));
92 PetscCall(PetscDeviceContextSynchronize(dctx));
93 for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayZero() did not clear memory, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i]));
94 PetscCall(PetscDeviceFree(dctx, tmp_ptr));
95 PetscCall(PetscDeviceFree(dctx, ptr));
96 PetscFunctionReturn(PETSC_SUCCESS);
97 }
98
TestAsyncCoherence(PetscDeviceContext dctx,PetscRandom rand)99 static PetscErrorCode TestAsyncCoherence(PetscDeviceContext dctx, PetscRandom rand)
100 {
101 const PetscInt nsub = 2;
102 const PetscInt n = 1024;
103 PetscScalar *ptr, *tmp_ptr;
104 PetscDeviceType dtype;
105 PetscDeviceContext *sub;
106
107 PetscFunctionBegin;
108 PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype));
109 // ensure the streams are nonblocking
110 PetscCall(PetscDeviceContextForkWithStreamType(dctx, PETSC_STREAM_NONBLOCKING, nsub, &sub));
111 // do a warmup to ensure each context acquires any necessary data structures
112 for (PetscInt i = 0; i < nsub; ++i) {
113 PetscCall(PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_HOST, n, &ptr));
114 PetscCall(PetscDeviceFree(sub[i], ptr));
115 if (dtype != PETSC_DEVICE_HOST) {
116 PetscCall(PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_DEVICE, n, &ptr));
117 PetscCall(PetscDeviceFree(sub[i], ptr));
118 }
119 }
120
121 // allocate on one
122 PetscCall(PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr));
123 // free on the other
124 PetscCall(PetscDeviceFree(sub[1], ptr));
125
126 // allocate on one
127 PetscCall(PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr));
128 // zero on the other
129 PetscCall(PetscDeviceArrayZero(sub[1], ptr, n));
130 PetscCall(PetscDeviceContextSynchronize(sub[1]));
131 for (PetscInt i = 0; i < n; ++i) {
132 for (PetscInt i = 0; i < n; ++i) PetscCheck(ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayZero() was not properly serialized, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(ptr[i]));
133 }
134 PetscCall(PetscDeviceFree(sub[1], ptr));
135
136 // test the transfers are serialized
137 if (dtype != PETSC_DEVICE_HOST) {
138 PetscCall(PetscDeviceCalloc(dctx, PETSC_MEMTYPE_DEVICE, n, &ptr));
139 PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr));
140 PetscCall(PetscDeviceArrayCopy(sub[0], tmp_ptr, ptr, n));
141 PetscCall(PetscDeviceContextSynchronize(sub[0]));
142 for (PetscInt i = 0; i < n; ++i) {
143 for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayCopt() was not properly serialized, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i]));
144 }
145 PetscCall(PetscDeviceFree(sub[1], ptr));
146 }
147
148 PetscCall(PetscDeviceContextJoin(dctx, nsub, PETSC_DEVICE_CONTEXT_JOIN_DESTROY, &sub));
149 PetscFunctionReturn(PETSC_SUCCESS);
150 }
151
main(int argc,char * argv[])152 int main(int argc, char *argv[])
153 {
154 PetscDeviceContext dctx;
155 PetscRandom rand;
156
157 PetscFunctionBeginUser;
158 PetscCall(PetscInitialize(&argc, &argv, NULL, help));
159
160 // A vile hack. The -info output is used to test correctness in this test which prints --
161 // among other things -- the PetscObjectId of the PetscDevicContext and the allocated memory.
162 //
163 // Due to device and host creating slightly different number of objects on startup there will
164 // be a mismatch in the ID's. So for the tests involving the host we sit here creating
165 // PetscContainers (and incrementing the global PetscObjectId counter) until it reaches some
166 // arbitrarily high number to ensure that our first PetscDeviceContext has the same ID across
167 // systems.
168 {
169 PetscObjectId prev_id = 0;
170
171 do {
172 PetscContainer c;
173 PetscObjectId id;
174
175 PetscCall(PetscContainerCreate(PETSC_COMM_WORLD, &c));
176 PetscCall(PetscObjectGetId((PetscObject)c, &id));
177 // sanity check, in case PetscContainer ever stops being a PetscObject
178 PetscCheck(id > prev_id, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscObjectIds are not increasing for successively created PetscContainers! current: %" PetscInt64_FMT ", previous: %" PetscInt64_FMT, id, prev_id);
179 prev_id = id;
180 PetscCall(PetscContainerDestroy(&c));
181 } while (prev_id < 50);
182 }
183 PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
184
185 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand));
186 // this seed just so happens to keep the allocation size increasing
187 PetscCall(PetscRandomSetSeed(rand, 123));
188 PetscCall(PetscRandomSeed(rand));
189 PetscCall(PetscRandomSetFromOptions(rand));
190
191 PetscCall(TestAllocate(dctx, rand, PETSC_MEMTYPE_HOST));
192 PetscCall(TestAllocate(dctx, rand, PETSC_MEMTYPE_DEVICE));
193 PetscCall(TestAsyncCoherence(dctx, rand));
194
195 PetscCall(PetscRandomDestroy(&rand));
196 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "EXIT_SUCCESS\n"));
197 PetscCall(PetscFinalize());
198 return 0;
199 }
200
201 /*TEST
202
203 testset:
204 requires: defined(PETSC_USE_INFO) defined(PETSC_USE_DEBUG) defined(PETSC_DEVICELANGUAGE_CXX)
205 args: -info :device
206 suffix: with_info
207 test:
208 requires: !device
209 suffix: host_no_device
210 test:
211 requires: device
212 args: -default_device_type host
213 filter: sed -e 's/host/IMPL/g' -e 's/cuda/IMPL/g' -e 's/hip/IMPL/g' -e 's/sycl/IMPL/g'
214 suffix: host_with_device
215 test:
216 requires: cuda
217 args: -default_device_type cuda
218 suffix: cuda
219 test:
220 requires: hip
221 args: -default_device_type hip
222 suffix: hip
223 test:
224 requires: sycl
225 args: -default_device_type sycl
226 suffix: sycl
227
228 testset:
229 output_file: output/ExitSuccess.out
230 requires: !defined(PETSC_USE_DEBUG) defined(PETSC_DEVICELANGUAGE_CXX)
231 filter: grep -v "\[DEBUG OUTPUT\]"
232 suffix: no_info
233 test:
234 requires: !device
235 suffix: host_no_device
236 test:
237 requires: device
238 args: -default_device_type host
239 suffix: host_with_device
240 test:
241 requires: cuda
242 args: -default_device_type cuda
243 suffix: cuda
244 test:
245 requires: hip
246 args: -default_device_type hip
247 suffix: hip
248 test:
249 requires: sycl
250 args: -default_device_type sycl
251 suffix: sycl
252
253 test:
254 requires: !defined(PETSC_DEVICELANGUAGE_CXX)
255 output_file: output/ExitSuccess.out
256 filter: grep -v "\[DEBUG OUTPUT\]"
257 suffix: no_cxx
258
259 TEST*/
260