xref: /honee/src/smartsim/smartsim.c (revision 16cb6b6b239e8baff914b40a55e48473a01b3121)
1 // SPDX-FileCopyrightText: Copyright (c) 2017-2024, HONEE contributors.
2 // SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause
3 // Based on the instructions from https://www.craylabs.org/docs/sr_integration.html and PHASTA implementation
4 
5 #include <smartsim.h>
6 
7 #include <navierstokes.h>
8 
9 PetscErrorCode SmartRedisVerifyPutTensor(void *c_client, const char *name, const size_t name_length) {
10   bool does_exist = true;
11 
12   PetscFunctionBeginUser;
13   PetscCall(PetscLogEventBegin(HONEE_SmartRedis_Meta, 0, 0, 0, 0));
14   PetscCallSmartRedis(tensor_exists(c_client, name, name_length, &does_exist));
15   PetscCheck(does_exist, PETSC_COMM_SELF, -1, "Tensor of name '%s' was not written to the database successfully", name);
16   PetscCall(PetscLogEventEnd(HONEE_SmartRedis_Meta, 0, 0, 0, 0));
17   PetscFunctionReturn(PETSC_SUCCESS);
18 }
19 
20 PetscErrorCode SmartSimTrainingSetup(Honee honee) {
21   SmartSimData smartsim = honee->smartsim;
22   PetscMPIInt  rank;
23   PetscReal    checkrun[2] = {1};
24   size_t       dim_2[1]    = {2};
25 
26   PetscFunctionBeginUser;
27   PetscCallMPI(MPI_Comm_rank(honee->comm, &rank));
28 
29   if (rank % smartsim->collocated_database_num_ranks == 0) {
30     // -- Send array that communicates when ML is done training
31     PetscCall(PetscLogEventBegin(HONEE_SmartRedis_Meta, 0, 0, 0, 0));
32     PetscCallSmartRedis(put_tensor(smartsim->client, "check-run", 9, checkrun, dim_2, 1, SRTensorTypeDouble, SRMemLayoutContiguous));
33     PetscCall(SmartRedisVerifyPutTensor(smartsim->client, "check-run", 9));
34     PetscCall(PetscLogEventEnd(HONEE_SmartRedis_Meta, 0, 0, 0, 0));
35   }
36   PetscFunctionReturn(PETSC_SUCCESS);
37 }
38 
39 PetscErrorCode SmartSimSetup(Honee honee) {
40   PetscMPIInt rank;
41   PetscInt    num_orchestrator_nodes = 1;
42 
43   PetscFunctionBeginUser;
44   PetscCall(PetscNew(&honee->smartsim));
45   SmartSimData smartsim = honee->smartsim;
46 
47   smartsim->collocated_database_num_ranks = 1;
48   PetscOptionsBegin(honee->comm, NULL, "Options for SmartSim integration", NULL);
49   PetscCall(PetscOptionsInt("-smartsim_collocated_database_num_ranks", "Number of ranks per collocated database instance", NULL,
50                             smartsim->collocated_database_num_ranks, &smartsim->collocated_database_num_ranks, NULL));
51   PetscOptionsEnd();
52 
53   // Create prefix to be put on tensor names
54   PetscCallMPI(MPI_Comm_rank(honee->comm, &rank));
55   PetscCall(PetscSNPrintf(smartsim->rank_id_name, sizeof(smartsim->rank_id_name), "y.%d", rank));
56 
57   PetscCall(PetscLogEventBegin(HONEE_SmartRedis_Init, 0, 0, 0, 0));
58   PetscCallSmartRedis(SmartRedisCClient(num_orchestrator_nodes != 1, smartsim->rank_id_name, strlen(smartsim->rank_id_name), &smartsim->client));
59   PetscCall(PetscLogEventEnd(HONEE_SmartRedis_Init, 0, 0, 0, 0));
60 
61   PetscCall(SmartSimTrainingSetup(honee));
62   PetscFunctionReturn(PETSC_SUCCESS);
63 }
64 
65 PetscErrorCode SmartSimDataDestroy(SmartSimData smartsim) {
66   PetscFunctionBeginUser;
67   if (!smartsim) PetscFunctionReturn(PETSC_SUCCESS);
68 
69   PetscCallSmartRedis(DeleteCClient(&smartsim->client));
70   PetscCall(PetscFree(smartsim));
71   PetscFunctionReturn(PETSC_SUCCESS);
72 }
73