1 // SPDX-FileCopyrightText: Copyright (c) 2017-2024, HONEE contributors. 2 // SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause 3 // Based on the instructions from https://www.craylabs.org/docs/sr_integration.html and PHASTA implementation 4 5 #include <smartsim.h> 6 7 #include <navierstokes.h> 8 9 #define SMARTSIM_KEY "SmartSimData" 10 11 static PetscErrorCode SmartSimDataDestroy(SmartSimData *smartsim) { 12 SmartSimData smartsim_ = *smartsim; 13 PetscFunctionBeginUser; 14 if (!smartsim_) PetscFunctionReturn(PETSC_SUCCESS); 15 16 PetscCallSmartRedis(DeleteCClient(&smartsim_->client)); 17 PetscCall(PetscFree(smartsim_)); 18 *smartsim = NULL; 19 PetscFunctionReturn(PETSC_SUCCESS); 20 } 21 22 static PetscErrorCode SmartSimTrainingSetup(Honee honee) { 23 SmartSimData smartsim; 24 PetscMPIInt rank; 25 PetscReal checkrun[2] = {1}; 26 size_t dim_2[1] = {2}; 27 28 PetscFunctionBeginUser; 29 PetscCall(HoneeGetSmartSimData(honee, &smartsim)); 30 PetscCallMPI(MPI_Comm_rank(honee->comm, &rank)); 31 32 if (rank % smartsim->collocated_database_num_ranks == 0) { 33 // -- Send array that communicates when ML is done training 34 PetscCall(PetscLogEventBegin(HONEE_SmartRedis_Meta, 0, 0, 0, 0)); 35 PetscCallSmartRedis(put_tensor(smartsim->client, "check-run", 9, checkrun, dim_2, 1, SRTensorTypeDouble, SRMemLayoutContiguous)); 36 PetscCall(SmartRedisVerifyPutTensor(smartsim->client, "check-run", 9)); 37 PetscCall(PetscLogEventEnd(HONEE_SmartRedis_Meta, 0, 0, 0, 0)); 38 } 39 PetscFunctionReturn(PETSC_SUCCESS); 40 } 41 42 static PetscErrorCode SmartSimSetup(Honee honee) { 43 PetscMPIInt rank; 44 PetscInt num_orchestrator_nodes = 1; 45 SmartSimData smartsim; 46 47 PetscFunctionBeginUser; 48 PetscCall(PetscNew(&smartsim)); 49 50 smartsim->collocated_database_num_ranks = 1; 51 PetscOptionsBegin(honee->comm, NULL, "Options for SmartSim integration", NULL); 52 PetscCall(PetscOptionsInt("-smartsim_collocated_database_num_ranks", "Number of ranks per collocated database instance", NULL, 53 smartsim->collocated_database_num_ranks, &smartsim->collocated_database_num_ranks, NULL)); 54 PetscOptionsEnd(); 55 56 // Create prefix to be put on tensor names 57 PetscCallMPI(MPI_Comm_rank(honee->comm, &rank)); 58 PetscCall(PetscSNPrintf(smartsim->rank_id_name, sizeof(smartsim->rank_id_name), "y.%d", rank)); 59 60 PetscCall(PetscLogEventBegin(HONEE_SmartRedis_Init, 0, 0, 0, 0)); 61 PetscCallSmartRedis(SmartRedisCClient(num_orchestrator_nodes != 1, smartsim->rank_id_name, strlen(smartsim->rank_id_name), &smartsim->client)); 62 PetscCall(PetscLogEventEnd(HONEE_SmartRedis_Init, 0, 0, 0, 0)); 63 64 PetscCall(HoneeSetContainer(honee, SMARTSIM_KEY, smartsim, (PetscCtxDestroyFn *)SmartSimDataDestroy)); 65 66 PetscCall(SmartSimTrainingSetup(honee)); 67 PetscFunctionReturn(PETSC_SUCCESS); 68 } 69 70 PetscErrorCode HoneeGetSmartSimData(Honee honee, SmartSimData *smartsim) { 71 PetscBool has_smartsim; 72 73 PetscFunctionBeginUser; 74 PetscCall(HoneeHasContainer(honee, SMARTSIM_KEY, &has_smartsim)); 75 if (!has_smartsim) PetscCall(SmartSimSetup(honee)); 76 PetscCall(HoneeGetContainer(honee, SMARTSIM_KEY, smartsim)); 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 PetscErrorCode SmartRedisVerifyPutTensor(void *c_client, const char *name, const size_t name_length) { 81 bool does_exist = true; 82 83 PetscFunctionBeginUser; 84 PetscCall(PetscLogEventBegin(HONEE_SmartRedis_Meta, 0, 0, 0, 0)); 85 PetscCallSmartRedis(tensor_exists(c_client, name, name_length, &does_exist)); 86 PetscCheck(does_exist, PETSC_COMM_SELF, -1, "Tensor of name '%s' was not written to the database successfully", name); 87 PetscCall(PetscLogEventEnd(HONEE_SmartRedis_Meta, 0, 0, 0, 0)); 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90