xref: /honee/src/smartsim/smartsim.c (revision a32db64d340db16914d4892be21e91c50f2a7cbd)
1 // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 // Based on the instructions from https://www.craylabs.org/docs/sr_integration.html and PHASTA implementation
8 
9 #include "../../include/smartsim.h"
10 
11 #include "../../navierstokes.h"
12 
13 PetscErrorCode SmartRedisVerifyPutTensor(void *c_client, const char *name, const size_t name_length) {
14   bool does_exist = true;
15 
16   PetscFunctionBeginUser;
17   PetscCall(PetscLogEventBegin(FLUIDS_SmartRedis_Meta, 0, 0, 0, 0));
18   PetscCallSmartRedis(tensor_exists(c_client, name, name_length, &does_exist));
19   PetscCheck(does_exist, PETSC_COMM_SELF, -1, "Tensor of name '%s' was not written to the database successfully", name);
20   PetscCall(PetscLogEventEnd(FLUIDS_SmartRedis_Meta, 0, 0, 0, 0));
21   PetscFunctionReturn(PETSC_SUCCESS);
22 }
23 
24 PetscErrorCode SmartSimTrainingSetup(User user) {
25   SmartSimData smartsim = user->smartsim;
26   PetscMPIInt  rank;
27   PetscReal    checkrun[2] = {1};
28   size_t       dim_2[1]    = {2};
29 
30   PetscFunctionBeginUser;
31   PetscCallMPI(MPI_Comm_rank(user->comm, &rank));
32 
33   if (rank % smartsim->collocated_database_num_ranks == 0) {
34     // -- Send array that communicates when ML is done training
35     PetscCall(PetscLogEventBegin(FLUIDS_SmartRedis_Meta, 0, 0, 0, 0));
36     PetscCallSmartRedis(put_tensor(smartsim->client, "check-run", 9, checkrun, dim_2, 1, SRTensorTypeDouble, SRMemLayoutContiguous));
37     PetscCall(SmartRedisVerifyPutTensor(smartsim->client, "check-run", 9));
38     PetscCall(PetscLogEventEnd(FLUIDS_SmartRedis_Meta, 0, 0, 0, 0));
39   }
40   PetscFunctionReturn(PETSC_SUCCESS);
41 }
42 
43 PetscErrorCode SmartSimSetup(User user) {
44   PetscMPIInt rank;
45   PetscInt    num_orchestrator_nodes = 1;
46 
47   PetscFunctionBeginUser;
48   PetscCall(PetscNew(&user->smartsim));
49   SmartSimData smartsim = user->smartsim;
50 
51   smartsim->collocated_database_num_ranks = 1;
52   PetscOptionsBegin(user->comm, NULL, "Options for SmartSim integration", NULL);
53   PetscCall(PetscOptionsInt("-smartsim_collocated_database_num_ranks", "Number of ranks per collocated database instance", NULL,
54                             smartsim->collocated_database_num_ranks, &smartsim->collocated_database_num_ranks, NULL));
55   PetscOptionsEnd();
56 
57   // Create prefix to be put on tensor names
58   PetscCallMPI(MPI_Comm_rank(user->comm, &rank));
59   PetscCall(PetscSNPrintf(smartsim->rank_id_name, sizeof(smartsim->rank_id_name), "y.%d", rank));
60 
61   PetscCall(PetscLogEventBegin(FLUIDS_SmartRedis_Init, 0, 0, 0, 0));
62   PetscCallSmartRedis(SmartRedisCClient(num_orchestrator_nodes != 1, smartsim->rank_id_name, strlen(smartsim->rank_id_name), &smartsim->client));
63   PetscCall(PetscLogEventEnd(FLUIDS_SmartRedis_Init, 0, 0, 0, 0));
64 
65   PetscCall(SmartSimTrainingSetup(user));
66   PetscFunctionReturn(PETSC_SUCCESS);
67 }
68 
69 PetscErrorCode SmartSimDataDestroy(SmartSimData smartsim) {
70   PetscFunctionBeginUser;
71   if (!smartsim) PetscFunctionReturn(PETSC_SUCCESS);
72 
73   PetscCallSmartRedis(DeleteCClient(&smartsim->client));
74   PetscCall(PetscFree(smartsim));
75   PetscFunctionReturn(PETSC_SUCCESS);
76 }
77