xref: /honee/src/smartsim/smartsim.c (revision 8fc6ab98e527355f6c031781f7c3aa8a26105128)
1 // SPDX-FileCopyrightText: Copyright (c) 2017-2024, HONEE contributors.
2 // SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause
3 // Based on the instructions from https://www.craylabs.org/docs/sr_integration.html and PHASTA implementation
4 
5 #include <smartsim-impl.h>
6 
7 #include <navierstokes.h>
8 
9 #define SMARTSIM_KEY "SmartSimData"
10 
11 static PetscErrorCode SmartSimDataDestroy(SmartSimData *smartsim) {
12   SmartSimData smartsim_ = *smartsim;
13   PetscFunctionBeginUser;
14   if (!smartsim_) PetscFunctionReturn(PETSC_SUCCESS);
15 
16   PetscCallSmartRedis(DeleteCClient(&smartsim_->client));
17   PetscCall(PetscFree(smartsim_));
18   *smartsim = NULL;
19   PetscFunctionReturn(PETSC_SUCCESS);
20 }
21 
22 static PetscErrorCode SmartSimTrainingSetup(Honee honee) {
23   SmartSimData smartsim;
24   PetscMPIInt  rank;
25   PetscReal    checkrun[2] = {1};
26   size_t       dim_2[1]    = {2};
27 
28   PetscFunctionBeginUser;
29   PetscCall(HoneeGetSmartSimData(honee, &smartsim));
30   PetscCallMPI(MPI_Comm_rank(honee->comm, &rank));
31 
32   if (rank % smartsim->collocated_database_num_ranks == 0) {
33     // -- Send array that communicates when ML is done training
34     PetscCall(PetscLogEventBegin(HONEE_SmartRedis_Meta, 0, 0, 0, 0));
35     PetscCallSmartRedis(put_tensor(smartsim->client, "check-run", 9, checkrun, dim_2, 1, SRTensorTypeDouble, SRMemLayoutContiguous));
36     PetscCall(SmartRedisVerifyPutTensor(smartsim->client, "check-run", 9));
37     PetscCall(PetscLogEventEnd(HONEE_SmartRedis_Meta, 0, 0, 0, 0));
38   }
39   PetscFunctionReturn(PETSC_SUCCESS);
40 }
41 
42 static PetscErrorCode SmartSimSetup(Honee honee) {
43   PetscMPIInt  rank;
44   PetscInt     num_orchestrator_nodes = 1;
45   SmartSimData smartsim;
46 
47   PetscFunctionBeginUser;
48   PetscCall(PetscNew(&smartsim));
49 
50   smartsim->collocated_database_num_ranks = 1;
51   PetscOptionsBegin(honee->comm, NULL, "Options for SmartSim integration", NULL);
52   PetscCall(PetscOptionsInt("-smartsim_collocated_database_num_ranks", "Number of ranks per collocated database instance", NULL,
53                             smartsim->collocated_database_num_ranks, &smartsim->collocated_database_num_ranks, NULL));
54   PetscOptionsEnd();
55 
56   // Create prefix to be put on tensor names
57   PetscCallMPI(MPI_Comm_rank(honee->comm, &rank));
58   PetscCall(PetscSNPrintf(smartsim->rank_id_name, sizeof(smartsim->rank_id_name), "y.%d", rank));
59 
60   PetscCall(PetscLogEventBegin(HONEE_SmartRedis_Init, 0, 0, 0, 0));
61   PetscCallSmartRedis(SmartRedisCClient(num_orchestrator_nodes != 1, smartsim->rank_id_name, strlen(smartsim->rank_id_name), &smartsim->client));
62   PetscCall(PetscLogEventEnd(HONEE_SmartRedis_Init, 0, 0, 0, 0));
63 
64   PetscCall(HoneeSetContainer(honee, SMARTSIM_KEY, smartsim, (PetscCtxDestroyFn *)SmartSimDataDestroy));
65 
66   PetscCall(SmartSimTrainingSetup(honee));
67   PetscFunctionReturn(PETSC_SUCCESS);
68 }
69 
70 /**
71   @brief Obtains the `SmartSimData` from the `Honee` object
72 
73   If `SmartSimData` has not already been initialized, this will initialize and create the struct.
74 
75   @param[in]  honee `Honee` object containing the SmartSim data
76   @param[out] smartsim `SmartSimData` containing the data
77 **/
78 PetscErrorCode HoneeGetSmartSimData(Honee honee, SmartSimData *smartsim) {
79   PetscBool has_smartsim;
80 
81   PetscFunctionBeginUser;
82   PetscCall(HoneeHasContainer(honee, SMARTSIM_KEY, &has_smartsim));
83   if (!has_smartsim) PetscCall(SmartSimSetup(honee));
84   PetscCall(HoneeGetContainer(honee, SMARTSIM_KEY, smartsim));
85   PetscFunctionReturn(PETSC_SUCCESS);
86 }
87 
88 /**
89   @brief Checks if a tensor with `name` is in the SmartRedis database
90 
91   Function will error out if tensor does not exist.
92 
93   @param[in] c_client SmartRedis client object
94   @param[in] name Name of the tensor
95   @param[in] name_length Length of the tensor name
96   @return An error code: 0 - success, otherwise - failure
97 **/
98 PetscErrorCode SmartRedisVerifyPutTensor(void *c_client, const char *name, const size_t name_length) {
99   bool does_exist = true;
100 
101   PetscFunctionBeginUser;
102   PetscCall(PetscLogEventBegin(HONEE_SmartRedis_Meta, 0, 0, 0, 0));
103   PetscCallSmartRedis(tensor_exists(c_client, name, name_length, &does_exist));
104   PetscCheck(does_exist, PETSC_COMM_SELF, -1, "Tensor of name '%s' was not written to the database successfully", name);
105   PetscCall(PetscLogEventEnd(HONEE_SmartRedis_Meta, 0, 0, 0, 0));
106   PetscFunctionReturn(PETSC_SUCCESS);
107 }
108