xref: /petsc/src/sys/objects/device/tests/ex7.c (revision f1580f4e3ce5d5b2393648fd039d0d41b440385d)
1 static const char help[] = "Tests PetscDeviceAllocate().\n\n";
2 
3 #include "petscdevicetestcommon.h"
4 
5 #define DebugPrintf(comm, ...) PetscPrintf((comm), "[DEBUG OUTPUT] " __VA_ARGS__)
6 
7 static PetscErrorCode IncrementSize(PetscRandom rand, PetscInt *value) {
8   PetscReal rval;
9 
10   PetscFunctionBegin;
11   // set the interval such that *value += rval never goes below 0 or above 500
12   PetscCall(PetscRandomSetInterval(rand, -(*value), 500 - (*value)));
13   PetscCall(PetscRandomGetValueReal(rand, &rval));
14   *value += (PetscInt)rval;
15   PetscCall(DebugPrintf(PetscObjectComm((PetscObject)rand), "n: %" PetscInt_FMT "\n", *value));
16   PetscFunctionReturn(0);
17 }
18 
19 static PetscErrorCode TestAllocate(PetscDeviceContext dctx, PetscRandom rand, PetscMemType mtype) {
20   PetscScalar *ptr, *tmp_ptr;
21   PetscInt     n = 10;
22 
23   PetscFunctionBegin;
24   if (PetscMemTypeDevice(mtype)) {
25     PetscDeviceType dtype;
26 
27     PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype));
28     // host device context cannot handle this
29     if (dtype == PETSC_DEVICE_HOST) PetscFunctionReturn(0);
30   }
31   // test basic allocation, deallocation
32   PetscCall(IncrementSize(rand, &n));
33   PetscCall(PetscDeviceMalloc(dctx, mtype, n, &ptr));
34   PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "PetscDeviceMalloc() return NULL pointer for %s allocation size %" PetscInt_FMT, PetscMemTypeToString(mtype), n);
35   // this ensures the host pointer is at least valid
36   if (PetscMemTypeHost(mtype)) {
37     for (PetscInt i = 0; i < n; ++i) ptr[i] = (PetscScalar)i;
38   }
39   PetscCall(PetscDeviceFree(dctx, ptr));
40 
41   // test alignment of various types
42   {
43     char     *char_ptr;
44     short    *short_ptr;
45     int      *int_ptr;
46     double   *double_ptr;
47     long int *long_int_ptr;
48 
49     PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &char_ptr));
50     PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &short_ptr));
51     PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &int_ptr));
52     PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &double_ptr));
53     PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &long_int_ptr));
54 
55     // if an error occurs here, it means the alignment system is broken!
56     PetscCall(PetscDeviceFree(dctx, char_ptr));
57     PetscCall(PetscDeviceFree(dctx, short_ptr));
58     PetscCall(PetscDeviceFree(dctx, int_ptr));
59     PetscCall(PetscDeviceFree(dctx, double_ptr));
60     PetscCall(PetscDeviceFree(dctx, long_int_ptr));
61   }
62 
63   // test that calloc() produces cleared memory
64   PetscCall(IncrementSize(rand, &n));
65   PetscCall(PetscDeviceCalloc(dctx, mtype, n, &ptr));
66   PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "PetscDeviceCalloc() returned NULL pointer for %s allocation size %" PetscInt_FMT, PetscMemTypeToString(mtype), n);
67   if (PetscMemTypeHost(mtype)) {
68     tmp_ptr = ptr;
69   } else {
70     PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr));
71     PetscCall(PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n));
72   }
73   PetscCall(PetscDeviceContextSynchronize(dctx));
74   for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceCalloc() returned memory that was not cleared, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i]));
75   if (tmp_ptr == ptr) {
76     tmp_ptr = NULL;
77   } else {
78     PetscCall(PetscDeviceFree(dctx, tmp_ptr));
79   }
80   PetscCall(PetscDeviceFree(dctx, ptr));
81 
82   // test that devicearrayzero produces cleared memory
83   PetscCall(IncrementSize(rand, &n));
84   PetscCall(PetscDeviceMalloc(dctx, mtype, n, &ptr));
85   PetscCall(PetscDeviceArrayZero(dctx, ptr, n));
86   PetscCall(PetscMalloc1(n, &tmp_ptr));
87   PetscCall(PetscDeviceRegisterMemory(tmp_ptr, PETSC_MEMTYPE_HOST, n * sizeof(*tmp_ptr)));
88   for (PetscInt i = 0; i < n; ++i) tmp_ptr[i] = (PetscScalar)i;
89   PetscCall(PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n));
90   PetscCall(PetscDeviceContextSynchronize(dctx));
91   for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayZero() did not not clear memory, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i]));
92   PetscCall(PetscDeviceFree(dctx, tmp_ptr));
93   PetscCall(PetscDeviceFree(dctx, ptr));
94   PetscFunctionReturn(0);
95 }
96 
97 static PetscErrorCode TestAsyncCoherence(PetscDeviceContext dctx, PetscRandom rand) {
98   const PetscInt      nsub = 2;
99   const PetscInt      n    = 1024;
100   PetscScalar        *ptr, *tmp_ptr;
101   PetscDeviceType     dtype;
102   PetscDeviceContext *sub;
103 
104   PetscFunctionBegin;
105   PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype));
106   // ensure the streams are nonblocking
107   PetscCall(PetscDeviceContextForkWithStreamType(dctx, PETSC_STREAM_GLOBAL_NONBLOCKING, nsub, &sub));
108   // do a warmup to ensure each context acquires any necessary data structures
109   for (PetscInt i = 0; i < nsub; ++i) {
110     PetscCall(PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_HOST, n, &ptr));
111     PetscCall(PetscDeviceFree(sub[i], ptr));
112     if (dtype != PETSC_DEVICE_HOST) {
113       PetscCall(PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_DEVICE, n, &ptr));
114       PetscCall(PetscDeviceFree(sub[i], ptr));
115     }
116   }
117 
118   // allocate on one
119   PetscCall(PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr));
120   // free on the other
121   PetscCall(PetscDeviceFree(sub[1], ptr));
122 
123   // allocate on one
124   PetscCall(PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr));
125   // zero on the other
126   PetscCall(PetscDeviceArrayZero(sub[1], ptr, n));
127   PetscCall(PetscDeviceContextSynchronize(sub[1]));
128   for (PetscInt i = 0; i < n; ++i) {
129     for (PetscInt i = 0; i < n; ++i) PetscCheck(ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayZero() was not properly serialized, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(ptr[i]));
130   }
131   PetscCall(PetscDeviceFree(sub[1], ptr));
132 
133   // test the transfers are serialized
134   if (dtype != PETSC_DEVICE_HOST) {
135     PetscCall(PetscDeviceCalloc(dctx, PETSC_MEMTYPE_DEVICE, n, &ptr));
136     PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr));
137     PetscCall(PetscDeviceArrayCopy(sub[0], tmp_ptr, ptr, n));
138     PetscCall(PetscDeviceContextSynchronize(sub[0]));
139     for (PetscInt i = 0; i < n; ++i) {
140       for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayCopt() was not properly serialized, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i]));
141     }
142     PetscCall(PetscDeviceFree(sub[1], ptr));
143   }
144 
145   PetscCall(PetscDeviceContextJoin(dctx, nsub, PETSC_DEVICE_CONTEXT_JOIN_DESTROY, &sub));
146   PetscFunctionReturn(0);
147 }
148 
149 int main(int argc, char *argv[]) {
150   PetscDeviceContext dctx;
151   PetscRandom        rand;
152 
153   PetscFunctionBeginUser;
154   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
155 
156   // A vile hack. The -info output is used to test correctness in this test which prints --
157   // among other things -- the PetscObjectId of the PetscDevicContext and the allocated memory.
158   //
159   // Due to device and host creating slightly different number of objects on startup there will
160   // be a mismatch in the ID's. So for the tests involving the host we sit here creating
161   // PetscContainers (and incrementing the global PetscObjectId counter) until it reaches some
162   // arbitrarily high number to ensure that our first PetscDeviceContext has the same ID across
163   // systems.
164   if (PETSC_DEVICE_DEFAULT() == PETSC_DEVICE_HOST) {
165     PetscObjectId id, prev_id = 0;
166 
167     do {
168       PetscContainer c;
169 
170       PetscCall(PetscContainerCreate(PETSC_COMM_WORLD, &c));
171       PetscCall(PetscObjectGetId((PetscObject)c, &id));
172       // sanity check, in case PetscContainer ever stops being a PetscObject
173       PetscCheck(id > prev_id, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscObjectIds are not increasing for successively created PetscContainers! current: %" PetscInt64_FMT ", previous: %" PetscInt64_FMT, id, prev_id);
174       prev_id = id;
175       PetscCall(PetscContainerDestroy(&c));
176     } while (id < 10);
177   }
178   PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
179 
180   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand));
181   // this seed just so happens to keep the allocation size increasing
182   PetscCall(PetscRandomSetSeed(rand, 123));
183   PetscCall(PetscRandomSeed(rand));
184   PetscCall(PetscRandomSetFromOptions(rand));
185 
186   PetscCall(TestAllocate(dctx, rand, PETSC_MEMTYPE_HOST));
187   PetscCall(TestAllocate(dctx, rand, PETSC_MEMTYPE_DEVICE));
188   PetscCall(TestAsyncCoherence(dctx, rand));
189 
190   PetscCall(PetscRandomDestroy(&rand));
191   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "EXIT_SUCCESS\n"));
192   PetscCall(PetscFinalize());
193   return 0;
194 }
195 
196 /*TEST
197 
198   build:
199    requires: defined(PETSC_HAVE_CXX)
200 
201   testset:
202    requires: defined(PETSC_USE_INFO), defined(PETSC_USE_DEBUG)
203    args: -info :device
204    suffix: with_info
205    test:
206      requires: !device
207      suffix: host_no_device
208    test:
209      requires: device
210      args: -default_device_type host
211      filter: sed -e 's/host/IMPL/g' -e 's/cuda/IMPL/g' -e 's/hip/IMPL/g' -e 's/sycl/IMPL/g'
212      suffix: host_with_device
213    test:
214      requires: cuda
215      args: -default_device_type cuda
216      suffix: cuda
217    test:
218      requires: hip
219      args: -default_device_type hip
220      suffix: hip
221    test:
222      requires: sycl
223      args: -default_device_type sycl
224      suffix: sycl
225 
226   testset:
227    output_file: ./output/ExitSuccess.out
228    requires: !defined(PETSC_USE_DEBUG)
229    filter: grep -v "\[DEBUG OUTPUT\]"
230    suffix: no_info
231    test:
232      requires: !device
233      suffix: host_no_device
234    test:
235      requires: device
236      args: -default_device_type host
237      suffix: host_with_device
238    test:
239      requires: cuda
240      args: -default_device_type cuda
241      suffix: cuda
242    test:
243      requires: hip
244      args: -default_device_type hip
245      suffix: hip
246    test:
247      requires: sycl
248      args: -default_device_type sycl
249      suffix: sycl
250 TEST*/
251