diff --git a/CMakeLists.txt b/CMakeLists.txt index 36a2078e4c9fa..18d5c0348a91d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,8 +84,9 @@ option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE}) # 3rd party libs -option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON) -option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF) +option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON) +option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF) +option(LLAMA_SERVER_SYSTEMD "llama-server: support systemd socket activation and readiness notification (linux only)" OFF) # Required for relocatable CMake package include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 0ae4d698f080c..61caf7ef36562 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -133,6 +133,10 @@ if (LLAMA_LLGUIDANCE) set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS}) endif () +if (UNIX AND NOT APPLE AND LLAMA_SERVER_SYSTEMD) + target_compile_definitions(${TARGET} PRIVATE LLAMA_CPP_SYSTEMD_SUPPORT) +endif() + target_include_directories(${TARGET} PUBLIC . ../vendor) target_compile_features (${TARGET} PUBLIC cxx_std_17) target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads) diff --git a/common/arg.cpp b/common/arg.cpp index c15008fe79b4d..68e9d6995c84a 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2978,6 +2978,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.port = value; } ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_PORT")); +#ifdef LLAMA_CPP_SYSTEMD_SUPPORT + add_opt(common_arg({ "--systemd" }, + string_format("use systemd socket and readiness notification (default: %s)", + params.use_systemd ? "enabled" : "disabled"), + [](common_params & params) { params.use_systemd = true; }) + .set_examples({ LLAMA_EXAMPLE_SERVER }) + .set_env("LLAMA_ARG_SYSTEMD")); +#endif // LLAMA_CPP_SYSTEMD_SUPPORT add_opt(common_arg( {"--path"}, "PATH", string_format("path to serve static files from (default: %s)", params.public_path.c_str()), diff --git a/common/common.h b/common/common.h index 5063d73f96369..ef1cd734958fb 100644 --- a/common/common.h +++ b/common/common.h @@ -436,6 +436,10 @@ struct common_params { int reasoning_budget = -1; bool prefill_assistant = true; // if true, any trailing assistant message will be prefilled into the response +#ifdef LLAMA_CPP_SYSTEMD_SUPPORT + bool use_systemd = false; // use systemd socket and readiness notification +#endif + std::vector api_keys; std::string ssl_file_key = ""; // NOLINT diff --git a/tools/server/CMakeLists.txt b/tools/server/CMakeLists.txt index 83b608c32a9c7..6b983c01e2676 100644 --- a/tools/server/CMakeLists.txt +++ b/tools/server/CMakeLists.txt @@ -47,4 +47,12 @@ if (WIN32) TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32) endif() +if (UNIX AND NOT APPLE AND LLAMA_SERVER_SYSTEMD) + message(STATUS "LLAMA_SERVER_SYSTEMD is ON, enabling systemd support") + find_package(PkgConfig REQUIRED) + pkg_check_modules(SYSTEMD REQUIRED libsystemd) + target_link_libraries(${TARGET} PRIVATE ${SYSTEMD_LIBRARIES}) + target_compile_definitions(${TARGET} PRIVATE LLAMA_CPP_SYSTEMD_SUPPORT) +endif() + target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 519704fad7930..16ef0f07317e4 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -31,6 +31,12 @@ #include #include +#ifdef LLAMA_CPP_SYSTEMD_SUPPORT +# include +# include +# include +#endif // LLAMA_CPP_SYSTEMD_SUPPORT + using json = nlohmann::ordered_json; constexpr int HTTP_POLLING_SECONDS = 1; @@ -4068,6 +4074,38 @@ inline void signal_handler(int signal) { shutdown_handler(signal); } +#ifdef LLAMA_CPP_SYSTEMD_SUPPORT +// Subclass of httplib::Server that adds systemd socket activation support on systems +// where that's available. +class SystemdServer : public httplib::Server { + public: + bool setup_sd_socket() { + int n = sd_listen_fds(0); + if (n != 1) { + LOG_ERR("%s: sd_listen_fds() returned %d\n", __func__, n); + return false; + } + + int fd = SD_LISTEN_FDS_START; + struct stat statbuf; + if (fstat(fd, &statbuf) == -1 || !S_ISSOCK(statbuf.st_mode)) { + LOG_ERR("%s: fstat() failed or fd is not a socket\n", __func__); + return false; + } + + LOG_INF("%s: using systemd socket fd %d\n", __func__, fd); + svr_sock_ = fd; + return true; + } +}; +#endif // LLAMA_CPP_SYSTEMD_SUPPORT + +#ifdef LLAMA_CPP_SYSTEMD_SUPPORT +# define NEW_SERVER (new SystemdServer()) +#else +# define NEW_SERVER (new httplib::Server()) +#endif // LLAMA_CPP_SYSTEMD_SUPPORT + int main(int argc, char ** argv) { // own arguments required by this example common_params params; @@ -4098,14 +4136,14 @@ int main(int argc, char ** argv) { ); } else { LOG_INF("Running without SSL\n"); - svr.reset(new httplib::Server()); + svr.reset(NEW_SERVER); } #else if (params.ssl_file_key != "" && params.ssl_file_cert != "") { LOG_ERR("Server is built without SSL support\n"); return 1; } - svr.reset(new httplib::Server()); + svr.reset(NEW_SERVER); #endif std::atomic state{SERVER_STATE_LOADING_MODEL}; @@ -5280,24 +5318,38 @@ int main(int argc, char ** argv) { }; bool was_bound = false; - bool is_sock = false; - if (string_ends_with(std::string(params.hostname), ".sock")) { - is_sock = true; - LOG_INF("%s: setting address family to AF_UNIX\n", __func__); - svr->set_address_family(AF_UNIX); - // bind_to_port requires a second arg, any value other than 0 should - // simply get ignored - was_bound = svr->bind_to_port(params.hostname, 8080); - } else { - LOG_INF("%s: binding port with default address family\n", __func__); - // bind HTTP listen port - if (params.port == 0) { - int bound_port = svr->bind_to_any_port(params.hostname); - if ((was_bound = (bound_port >= 0))) { - params.port = bound_port; - } + bool is_sock = false; + +#ifdef LLAMA_CPP_SYSTEMD_SUPPORT + bool using_sd_socket = false; + if (params.use_systemd) { + was_bound = static_cast(svr.get())->setup_sd_socket(); + using_sd_socket = was_bound; + if (!was_bound) { + LOG_INF("%s: couldn't set up systemd socket; falling back to opening host:port socket\n", __func__); + } + } +#endif // LLAMA_CPP_SYSTEMD_SUPPORT + + if (!was_bound) { + if (string_ends_with(std::string(params.hostname), ".sock")) { + is_sock = true; + LOG_INF("%s: setting address family to AF_UNIX\n", __func__); + svr->set_address_family(AF_UNIX); + // bind_to_port requires a second arg, any value other than 0 should + // simply get ignored + was_bound = svr->bind_to_port(params.hostname, 8080); } else { - was_bound = svr->bind_to_port(params.hostname, params.port); + LOG_INF("%s: binding port with default address family\n", __func__); + // bind HTTP listen port + if (params.port == 0) { + int bound_port = svr->bind_to_any_port(params.hostname); + if ((was_bound = (bound_port >= 0))) { + params.port = bound_port; + } + } else { + was_bound = svr->bind_to_port(params.hostname, params.port); + } } } @@ -5326,6 +5378,12 @@ int main(int argc, char ** argv) { ctx_server.init(); state.store(SERVER_STATE_READY); +#ifdef LLAMA_CPP_SYSTEMD_SUPPORT + if (params.use_systemd) { + sd_notify(0, "READY=1"); + } +#endif + LOG_INF("%s: model loaded\n", __func__); // print sample chat example to make it clear which template is used @@ -5360,9 +5418,17 @@ int main(int argc, char ** argv) { SetConsoleCtrlHandler(reinterpret_cast(console_ctrl_handler), true); #endif - LOG_INF("%s: server is listening on %s - starting the main loop\n", __func__, - is_sock ? string_format("unix://%s", params.hostname.c_str()).c_str() : - string_format("http://%s:%d", params.hostname.c_str(), params.port).c_str()); +#ifdef LLAMA_CPP_SYSTEMD_SUPPORT + if (using_sd_socket) { + LOG_INF("%s: server is listening on systemd socket - starting the main loop\n", __func__); + } else { +#endif // LLAMA_CPP_SYSTEMD_SUPPORT + LOG_INF("%s: server is listening on %s - starting the main loop\n", __func__, + is_sock ? string_format("unix://%s", params.hostname.c_str()).c_str() : + string_format("http://%s:%d", params.hostname.c_str(), params.port).c_str()); +#ifdef LLAMA_CPP_SYSTEMD_SUPPORT + } +#endif // LLAMA_CPP_SYSTEMD_SUPPORT // this call blocks the main thread until queue_tasks.terminate() is called ctx_server.queue_tasks.start_loop();