Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,9 @@ option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})

# 3rd party libs
option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON)
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON)
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
option(LLAMA_SERVER_SYSTEMD "llama-server: support systemd socket activation and readiness notification (linux only)" OFF)

# Required for relocatable CMake package
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
Expand Down
4 changes: 4 additions & 0 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ if (LLAMA_LLGUIDANCE)
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
endif ()

if (UNIX AND NOT APPLE AND LLAMA_SERVER_SYSTEMD)
target_compile_definitions(${TARGET} PRIVATE LLAMA_CPP_SYSTEMD_SUPPORT)
endif()

target_include_directories(${TARGET} PUBLIC . ../vendor)
target_compile_features (${TARGET} PUBLIC cxx_std_17)
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
Expand Down
8 changes: 8 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2978,6 +2978,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.port = value;
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_PORT"));
#ifdef LLAMA_CPP_SYSTEMD_SUPPORT
add_opt(common_arg({ "--systemd" },
string_format("use systemd socket and readiness notification (default: %s)",
params.use_systemd ? "enabled" : "disabled"),
[](common_params & params) { params.use_systemd = true; })
.set_examples({ LLAMA_EXAMPLE_SERVER })
.set_env("LLAMA_ARG_SYSTEMD"));
#endif // LLAMA_CPP_SYSTEMD_SUPPORT
add_opt(common_arg(
{"--path"}, "PATH",
string_format("path to serve static files from (default: %s)", params.public_path.c_str()),
Expand Down
4 changes: 4 additions & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,10 @@ struct common_params {
int reasoning_budget = -1;
bool prefill_assistant = true; // if true, any trailing assistant message will be prefilled into the response

#ifdef LLAMA_CPP_SYSTEMD_SUPPORT
bool use_systemd = false; // use systemd socket and readiness notification
#endif

std::vector<std::string> api_keys;

std::string ssl_file_key = ""; // NOLINT
Expand Down
8 changes: 8 additions & 0 deletions tools/server/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,12 @@ if (WIN32)
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
endif()

if (UNIX AND NOT APPLE AND LLAMA_SERVER_SYSTEMD)
message(STATUS "LLAMA_SERVER_SYSTEMD is ON, enabling systemd support")
find_package(PkgConfig REQUIRED)
pkg_check_modules(SYSTEMD REQUIRED libsystemd)
target_link_libraries(${TARGET} PRIVATE ${SYSTEMD_LIBRARIES})
target_compile_definitions(${TARGET} PRIVATE LLAMA_CPP_SYSTEMD_SUPPORT)
endif()

target_compile_features(${TARGET} PRIVATE cxx_std_17)
110 changes: 88 additions & 22 deletions tools/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@
#include <unordered_map>
#include <unordered_set>

#ifdef LLAMA_CPP_SYSTEMD_SUPPORT
# include <sys/socket.h>
# include <sys/stat.h>
# include <systemd/sd-daemon.h>
#endif // LLAMA_CPP_SYSTEMD_SUPPORT

using json = nlohmann::ordered_json;

constexpr int HTTP_POLLING_SECONDS = 1;
Expand Down Expand Up @@ -4068,6 +4074,38 @@ inline void signal_handler(int signal) {
shutdown_handler(signal);
}

#ifdef LLAMA_CPP_SYSTEMD_SUPPORT
// Subclass of httplib::Server that adds systemd socket activation support on systems
// where that's available.
class SystemdServer : public httplib::Server {
public:
bool setup_sd_socket() {
int n = sd_listen_fds(0);
if (n != 1) {
LOG_ERR("%s: sd_listen_fds() returned %d\n", __func__, n);
return false;
}

int fd = SD_LISTEN_FDS_START;
struct stat statbuf;
if (fstat(fd, &statbuf) == -1 || !S_ISSOCK(statbuf.st_mode)) {
LOG_ERR("%s: fstat() failed or fd is not a socket\n", __func__);
return false;
}

LOG_INF("%s: using systemd socket fd %d\n", __func__, fd);
svr_sock_ = fd;
return true;
}
};
#endif // LLAMA_CPP_SYSTEMD_SUPPORT

#ifdef LLAMA_CPP_SYSTEMD_SUPPORT
# define NEW_SERVER (new SystemdServer())
#else
# define NEW_SERVER (new httplib::Server())
#endif // LLAMA_CPP_SYSTEMD_SUPPORT

int main(int argc, char ** argv) {
// own arguments required by this example
common_params params;
Expand Down Expand Up @@ -4098,14 +4136,14 @@ int main(int argc, char ** argv) {
);
} else {
LOG_INF("Running without SSL\n");
svr.reset(new httplib::Server());
svr.reset(NEW_SERVER);
}
#else
if (params.ssl_file_key != "" && params.ssl_file_cert != "") {
LOG_ERR("Server is built without SSL support\n");
return 1;
}
svr.reset(new httplib::Server());
svr.reset(NEW_SERVER);
#endif

std::atomic<server_state> state{SERVER_STATE_LOADING_MODEL};
Expand Down Expand Up @@ -5280,24 +5318,38 @@ int main(int argc, char ** argv) {
};

bool was_bound = false;
bool is_sock = false;
if (string_ends_with(std::string(params.hostname), ".sock")) {
is_sock = true;
LOG_INF("%s: setting address family to AF_UNIX\n", __func__);
svr->set_address_family(AF_UNIX);
// bind_to_port requires a second arg, any value other than 0 should
// simply get ignored
was_bound = svr->bind_to_port(params.hostname, 8080);
} else {
LOG_INF("%s: binding port with default address family\n", __func__);
// bind HTTP listen port
if (params.port == 0) {
int bound_port = svr->bind_to_any_port(params.hostname);
if ((was_bound = (bound_port >= 0))) {
params.port = bound_port;
}
bool is_sock = false;

#ifdef LLAMA_CPP_SYSTEMD_SUPPORT
bool using_sd_socket = false;
if (params.use_systemd) {
was_bound = static_cast<SystemdServer *>(svr.get())->setup_sd_socket();
using_sd_socket = was_bound;
if (!was_bound) {
LOG_INF("%s: couldn't set up systemd socket; falling back to opening host:port socket\n", __func__);
}
}
#endif // LLAMA_CPP_SYSTEMD_SUPPORT

if (!was_bound) {
if (string_ends_with(std::string(params.hostname), ".sock")) {
is_sock = true;
LOG_INF("%s: setting address family to AF_UNIX\n", __func__);
svr->set_address_family(AF_UNIX);
// bind_to_port requires a second arg, any value other than 0 should
// simply get ignored
was_bound = svr->bind_to_port(params.hostname, 8080);
} else {
was_bound = svr->bind_to_port(params.hostname, params.port);
LOG_INF("%s: binding port with default address family\n", __func__);
// bind HTTP listen port
if (params.port == 0) {
int bound_port = svr->bind_to_any_port(params.hostname);
if ((was_bound = (bound_port >= 0))) {
params.port = bound_port;
}
} else {
was_bound = svr->bind_to_port(params.hostname, params.port);
}
}
}

Expand Down Expand Up @@ -5326,6 +5378,12 @@ int main(int argc, char ** argv) {
ctx_server.init();
state.store(SERVER_STATE_READY);

#ifdef LLAMA_CPP_SYSTEMD_SUPPORT
if (params.use_systemd) {
sd_notify(0, "READY=1");
}
#endif

LOG_INF("%s: model loaded\n", __func__);

// print sample chat example to make it clear which template is used
Expand Down Expand Up @@ -5360,9 +5418,17 @@ int main(int argc, char ** argv) {
SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
#endif

LOG_INF("%s: server is listening on %s - starting the main loop\n", __func__,
is_sock ? string_format("unix://%s", params.hostname.c_str()).c_str() :
string_format("http://%s:%d", params.hostname.c_str(), params.port).c_str());
#ifdef LLAMA_CPP_SYSTEMD_SUPPORT
if (using_sd_socket) {
LOG_INF("%s: server is listening on systemd socket - starting the main loop\n", __func__);
} else {
#endif // LLAMA_CPP_SYSTEMD_SUPPORT
LOG_INF("%s: server is listening on %s - starting the main loop\n", __func__,
is_sock ? string_format("unix://%s", params.hostname.c_str()).c_str() :
string_format("http://%s:%d", params.hostname.c_str(), params.port).c_str());
#ifdef LLAMA_CPP_SYSTEMD_SUPPORT
}
#endif // LLAMA_CPP_SYSTEMD_SUPPORT

// this call blocks the main thread until queue_tasks.terminate() is called
ctx_server.queue_tasks.start_loop();
Expand Down