xref: /petsc/src/sys/objects/device/tests/ex7.c (revision 34c645fd3b0199e05bec2fcc32d3597bfeb7f4f2)
1 static const char help[] = "Tests PetscDeviceAllocate().\n\n";
2 
3 #include "petscdevicetestcommon.h"
4 
5 #define DebugPrintf(comm, ...) PetscPrintf((comm), "[DEBUG OUTPUT] " __VA_ARGS__)
6 
7 static PetscErrorCode IncrementSize(PetscRandom rand, PetscInt *value)
8 {
9   PetscReal rval;
10 
11   PetscFunctionBegin;
12   // set the interval such that *value += rval never goes below 0 or above 500
13   PetscCall(PetscRandomSetInterval(rand, -(*value), 500 - (*value)));
14   PetscCall(PetscRandomGetValueReal(rand, &rval));
15   *value += (PetscInt)rval;
16   PetscCall(DebugPrintf(PetscObjectComm((PetscObject)rand), "n: %" PetscInt_FMT "\n", *value));
17   PetscFunctionReturn(PETSC_SUCCESS);
18 }
19 
20 static PetscErrorCode TestAllocate(PetscDeviceContext dctx, PetscRandom rand, PetscMemType mtype)
21 {
22   PetscScalar *ptr, *tmp_ptr;
23   PetscInt     n = 10;
24 
25   PetscFunctionBegin;
26   if (PetscMemTypeDevice(mtype)) {
27     PetscDeviceType dtype;
28 
29     PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype));
30     // host device context cannot handle this
31     if (dtype == PETSC_DEVICE_HOST) PetscFunctionReturn(PETSC_SUCCESS);
32   }
33   // test basic allocation, deallocation
34   PetscCall(IncrementSize(rand, &n));
35   PetscCall(PetscDeviceMalloc(dctx, mtype, n, &ptr));
36   PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "PetscDeviceMalloc() return NULL pointer for %s allocation size %" PetscInt_FMT, PetscMemTypeToString(mtype), n);
37   // this ensures the host pointer is at least valid
38   if (PetscMemTypeHost(mtype)) {
39     for (PetscInt i = 0; i < n; ++i) ptr[i] = (PetscScalar)i;
40   }
41   PetscCall(PetscDeviceFree(dctx, ptr));
42 
43   // test alignment of various types
44   {
45     char     *char_ptr;
46     short    *short_ptr;
47     int      *int_ptr;
48     double   *double_ptr;
49     long int *long_int_ptr;
50 
51     PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &char_ptr));
52     PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &short_ptr));
53     PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &int_ptr));
54     PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &double_ptr));
55     PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &long_int_ptr));
56 
57     // if an error occurs here, it means the alignment system is broken!
58     PetscCall(PetscDeviceFree(dctx, char_ptr));
59     PetscCall(PetscDeviceFree(dctx, short_ptr));
60     PetscCall(PetscDeviceFree(dctx, int_ptr));
61     PetscCall(PetscDeviceFree(dctx, double_ptr));
62     PetscCall(PetscDeviceFree(dctx, long_int_ptr));
63   }
64 
65   // test that calloc() produces cleared memory
66   PetscCall(IncrementSize(rand, &n));
67   PetscCall(PetscDeviceCalloc(dctx, mtype, n, &ptr));
68   PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "PetscDeviceCalloc() returned NULL pointer for %s allocation size %" PetscInt_FMT, PetscMemTypeToString(mtype), n);
69   if (PetscMemTypeHost(mtype)) {
70     tmp_ptr = ptr;
71   } else {
72     PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr));
73     PetscCall(PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n));
74   }
75   PetscCall(PetscDeviceContextSynchronize(dctx));
76   for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceCalloc() returned memory that was not cleared, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i]));
77   if (tmp_ptr == ptr) {
78     tmp_ptr = NULL;
79   } else {
80     PetscCall(PetscDeviceFree(dctx, tmp_ptr));
81   }
82   PetscCall(PetscDeviceFree(dctx, ptr));
83 
84   // test that devicearrayzero produces cleared memory
85   PetscCall(IncrementSize(rand, &n));
86   PetscCall(PetscDeviceMalloc(dctx, mtype, n, &ptr));
87   PetscCall(PetscDeviceArrayZero(dctx, ptr, n));
88   PetscCall(PetscMalloc1(n, &tmp_ptr));
89   PetscCall(PetscDeviceRegisterMemory(tmp_ptr, PETSC_MEMTYPE_HOST, n * sizeof(*tmp_ptr)));
90   for (PetscInt i = 0; i < n; ++i) tmp_ptr[i] = (PetscScalar)i;
91   PetscCall(PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n));
92   PetscCall(PetscDeviceContextSynchronize(dctx));
93   for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayZero() did not clear memory, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i]));
94   PetscCall(PetscDeviceFree(dctx, tmp_ptr));
95   PetscCall(PetscDeviceFree(dctx, ptr));
96   PetscFunctionReturn(PETSC_SUCCESS);
97 }
98 
99 static PetscErrorCode TestAsyncCoherence(PetscDeviceContext dctx, PetscRandom rand)
100 {
101   const PetscInt      nsub = 2;
102   const PetscInt      n    = 1024;
103   PetscScalar        *ptr, *tmp_ptr;
104   PetscDeviceType     dtype;
105   PetscDeviceContext *sub;
106 
107   PetscFunctionBegin;
108   PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype));
109   // ensure the streams are nonblocking
110   PetscCall(PetscDeviceContextForkWithStreamType(dctx, PETSC_STREAM_NONBLOCKING, nsub, &sub));
111   // do a warmup to ensure each context acquires any necessary data structures
112   for (PetscInt i = 0; i < nsub; ++i) {
113     PetscCall(PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_HOST, n, &ptr));
114     PetscCall(PetscDeviceFree(sub[i], ptr));
115     if (dtype != PETSC_DEVICE_HOST) {
116       PetscCall(PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_DEVICE, n, &ptr));
117       PetscCall(PetscDeviceFree(sub[i], ptr));
118     }
119   }
120 
121   // allocate on one
122   PetscCall(PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr));
123   // free on the other
124   PetscCall(PetscDeviceFree(sub[1], ptr));
125 
126   // allocate on one
127   PetscCall(PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr));
128   // zero on the other
129   PetscCall(PetscDeviceArrayZero(sub[1], ptr, n));
130   PetscCall(PetscDeviceContextSynchronize(sub[1]));
131   for (PetscInt i = 0; i < n; ++i) {
132     for (PetscInt i = 0; i < n; ++i) PetscCheck(ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayZero() was not properly serialized, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(ptr[i]));
133   }
134   PetscCall(PetscDeviceFree(sub[1], ptr));
135 
136   // test the transfers are serialized
137   if (dtype != PETSC_DEVICE_HOST) {
138     PetscCall(PetscDeviceCalloc(dctx, PETSC_MEMTYPE_DEVICE, n, &ptr));
139     PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr));
140     PetscCall(PetscDeviceArrayCopy(sub[0], tmp_ptr, ptr, n));
141     PetscCall(PetscDeviceContextSynchronize(sub[0]));
142     for (PetscInt i = 0; i < n; ++i) {
143       for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayCopt() was not properly serialized, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i]));
144     }
145     PetscCall(PetscDeviceFree(sub[1], ptr));
146   }
147 
148   PetscCall(PetscDeviceContextJoin(dctx, nsub, PETSC_DEVICE_CONTEXT_JOIN_DESTROY, &sub));
149   PetscFunctionReturn(PETSC_SUCCESS);
150 }
151 
152 int main(int argc, char *argv[])
153 {
154   PetscDeviceContext dctx;
155   PetscRandom        rand;
156 
157   PetscFunctionBeginUser;
158   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
159 
160   // A vile hack. The -info output is used to test correctness in this test which prints --
161   // among other things -- the PetscObjectId of the PetscDevicContext and the allocated memory.
162   //
163   // Due to device and host creating slightly different number of objects on startup there will
164   // be a mismatch in the ID's. So for the tests involving the host we sit here creating
165   // PetscContainers (and incrementing the global PetscObjectId counter) until it reaches some
166   // arbitrarily high number to ensure that our first PetscDeviceContext has the same ID across
167   // systems.
168   {
169     PetscObjectId prev_id = 0;
170 
171     do {
172       PetscContainer c;
173       PetscObjectId  id;
174 
175       PetscCall(PetscContainerCreate(PETSC_COMM_WORLD, &c));
176       PetscCall(PetscObjectGetId((PetscObject)c, &id));
177       // sanity check, in case PetscContainer ever stops being a PetscObject
178       PetscCheck(id > prev_id, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscObjectIds are not increasing for successively created PetscContainers! current: %" PetscInt64_FMT ", previous: %" PetscInt64_FMT, id, prev_id);
179       prev_id = id;
180       PetscCall(PetscContainerDestroy(&c));
181     } while (prev_id < 50);
182   }
183   PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
184 
185   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand));
186   // this seed just so happens to keep the allocation size increasing
187   PetscCall(PetscRandomSetSeed(rand, 123));
188   PetscCall(PetscRandomSeed(rand));
189   PetscCall(PetscRandomSetFromOptions(rand));
190 
191   PetscCall(TestAllocate(dctx, rand, PETSC_MEMTYPE_HOST));
192   PetscCall(TestAllocate(dctx, rand, PETSC_MEMTYPE_DEVICE));
193   PetscCall(TestAsyncCoherence(dctx, rand));
194 
195   PetscCall(PetscRandomDestroy(&rand));
196   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "EXIT_SUCCESS\n"));
197   PetscCall(PetscFinalize());
198   return 0;
199 }
200 
201 /*TEST
202 
203   testset:
204     requires: defined(PETSC_USE_INFO) defined(PETSC_USE_DEBUG) cxx
205     args: -info :device
206     suffix: with_info
207     test:
208       requires: !device
209       suffix: host_no_device
210     test:
211       requires: device
212       args: -default_device_type host
213       filter: sed -e 's/host/IMPL/g' -e 's/cuda/IMPL/g' -e 's/hip/IMPL/g' -e 's/sycl/IMPL/g'
214       suffix: host_with_device
215     test:
216       requires: cuda
217       args: -default_device_type cuda
218       suffix: cuda
219     test:
220       requires: hip
221       args: -default_device_type hip
222       suffix: hip
223     test:
224       requires: sycl
225       args: -default_device_type sycl
226       suffix: sycl
227 
228   testset:
229     output_file: ./output/ExitSuccess.out
230     requires: !defined(PETSC_USE_DEBUG)
231     filter: grep -v "\[DEBUG OUTPUT\]"
232     suffix: no_info
233     test:
234       requires: !device
235       suffix: host_no_device
236     test:
237       requires: device
238       args: -default_device_type host
239       suffix: host_with_device
240     test:
241       requires: cuda
242       args: -default_device_type cuda
243       suffix: cuda
244     test:
245       requires: hip
246       args: -default_device_type hip
247       suffix: hip
248     test:
249       requires: sycl
250       args: -default_device_type sycl
251       suffix: sycl
252 
253   test:
254     requires: !cxx
255     output_file: ./output/ExitSuccess.out
256     filter: grep -v "\[DEBUG OUTPUT\]"
257     suffix: no_cxx
258 
259 TEST*/
260