|
31 | 31 | #include <unordered_map>
|
32 | 32 | #include <unordered_set>
|
33 | 33 |
|
| 34 | +#ifdef LLAMA_CPP_SYSTEMD_SUPPORT |
| 35 | +# include <sys/socket.h> |
| 36 | +# include <sys/stat.h> |
| 37 | +# include <systemd/sd-daemon.h> |
| 38 | +#endif // LLAMA_CPP_SYSTEMD_SUPPORT |
| 39 | + |
34 | 40 | using json = nlohmann::ordered_json;
|
35 | 41 |
|
36 | 42 | constexpr int HTTP_POLLING_SECONDS = 1;
|
@@ -4068,6 +4074,38 @@ inline void signal_handler(int signal) {
|
4068 | 4074 | shutdown_handler(signal);
|
4069 | 4075 | }
|
4070 | 4076 |
|
| 4077 | +#ifdef LLAMA_CPP_SYSTEMD_SUPPORT |
| 4078 | +// Subclass of httplib::Server that adds systemd socket activation support on systems |
| 4079 | +// where that's available. |
| 4080 | +class SystemdServer : public httplib::Server { |
| 4081 | + public: |
| 4082 | + bool setup_sd_socket() { |
| 4083 | + int n = sd_listen_fds(0); |
| 4084 | + if (n != 1) { |
| 4085 | + LOG_ERR("%s: sd_listen_fds() returned %d\n", __func__, n); |
| 4086 | + return false; |
| 4087 | + } |
| 4088 | + |
| 4089 | + int fd = SD_LISTEN_FDS_START; |
| 4090 | + struct stat statbuf; |
| 4091 | + if (fstat(fd, &statbuf) == -1 || !S_ISSOCK(statbuf.st_mode)) { |
| 4092 | + LOG_ERR("%s: fstat() failed or fd is not a socket\n", __func__); |
| 4093 | + return false; |
| 4094 | + } |
| 4095 | + |
| 4096 | + LOG_INF("%s: using systemd socket fd %d\n", __func__, fd); |
| 4097 | + svr_sock_ = fd; |
| 4098 | + return true; |
| 4099 | + } |
| 4100 | +}; |
| 4101 | +#endif // LLAMA_CPP_SYSTEMD_SUPPORT |
| 4102 | + |
| 4103 | +#ifdef LLAMA_CPP_SYSTEMD_SUPPORT |
| 4104 | +# define NEW_SERVER (new SystemdServer()) |
| 4105 | +#else |
| 4106 | +# define NEW_SERVER (new httplib::Server()) |
| 4107 | +#endif // LLAMA_CPP_SYSTEMD_SUPPORT |
| 4108 | + |
4071 | 4109 | int main(int argc, char ** argv) {
|
4072 | 4110 | // own arguments required by this example
|
4073 | 4111 | common_params params;
|
@@ -4098,14 +4136,14 @@ int main(int argc, char ** argv) {
|
4098 | 4136 | );
|
4099 | 4137 | } else {
|
4100 | 4138 | LOG_INF("Running without SSL\n");
|
4101 |
| - svr.reset(new httplib::Server()); |
| 4139 | + svr.reset(NEW_SERVER); |
4102 | 4140 | }
|
4103 | 4141 | #else
|
4104 | 4142 | if (params.ssl_file_key != "" && params.ssl_file_cert != "") {
|
4105 | 4143 | LOG_ERR("Server is built without SSL support\n");
|
4106 | 4144 | return 1;
|
4107 | 4145 | }
|
4108 |
| - svr.reset(new httplib::Server()); |
| 4146 | + svr.reset(NEW_SERVER); |
4109 | 4147 | #endif
|
4110 | 4148 |
|
4111 | 4149 | std::atomic<server_state> state{SERVER_STATE_LOADING_MODEL};
|
@@ -5280,24 +5318,38 @@ int main(int argc, char ** argv) {
|
5280 | 5318 | };
|
5281 | 5319 |
|
5282 | 5320 | bool was_bound = false;
|
5283 |
| - bool is_sock = false; |
5284 |
| - if (string_ends_with(std::string(params.hostname), ".sock")) { |
5285 |
| - is_sock = true; |
5286 |
| - LOG_INF("%s: setting address family to AF_UNIX\n", __func__); |
5287 |
| - svr->set_address_family(AF_UNIX); |
5288 |
| - // bind_to_port requires a second arg, any value other than 0 should |
5289 |
| - // simply get ignored |
5290 |
| - was_bound = svr->bind_to_port(params.hostname, 8080); |
5291 |
| - } else { |
5292 |
| - LOG_INF("%s: binding port with default address family\n", __func__); |
5293 |
| - // bind HTTP listen port |
5294 |
| - if (params.port == 0) { |
5295 |
| - int bound_port = svr->bind_to_any_port(params.hostname); |
5296 |
| - if ((was_bound = (bound_port >= 0))) { |
5297 |
| - params.port = bound_port; |
5298 |
| - } |
| 5321 | + bool is_sock = false; |
| 5322 | + |
| 5323 | +#ifdef LLAMA_CPP_SYSTEMD_SUPPORT |
| 5324 | + bool using_sd_socket = false; |
| 5325 | + if (params.use_systemd) { |
| 5326 | + was_bound = static_cast<SystemdServer *>(svr.get())->setup_sd_socket(); |
| 5327 | + using_sd_socket = was_bound; |
| 5328 | + if (!was_bound) { |
| 5329 | + LOG_INF("%s: couldn't set up systemd socket; falling back to opening host:port socket\n", __func__); |
| 5330 | + } |
| 5331 | + } |
| 5332 | +#endif // LLAMA_CPP_SYSTEMD_SUPPORT |
| 5333 | + |
| 5334 | + if (!was_bound) { |
| 5335 | + if (string_ends_with(std::string(params.hostname), ".sock")) { |
| 5336 | + is_sock = true; |
| 5337 | + LOG_INF("%s: setting address family to AF_UNIX\n", __func__); |
| 5338 | + svr->set_address_family(AF_UNIX); |
| 5339 | + // bind_to_port requires a second arg, any value other than 0 should |
| 5340 | + // simply get ignored |
| 5341 | + was_bound = svr->bind_to_port(params.hostname, 8080); |
5299 | 5342 | } else {
|
5300 |
| - was_bound = svr->bind_to_port(params.hostname, params.port); |
| 5343 | + LOG_INF("%s: binding port with default address family\n", __func__); |
| 5344 | + // bind HTTP listen port |
| 5345 | + if (params.port == 0) { |
| 5346 | + int bound_port = svr->bind_to_any_port(params.hostname); |
| 5347 | + if ((was_bound = (bound_port >= 0))) { |
| 5348 | + params.port = bound_port; |
| 5349 | + } |
| 5350 | + } else { |
| 5351 | + was_bound = svr->bind_to_port(params.hostname, params.port); |
| 5352 | + } |
5301 | 5353 | }
|
5302 | 5354 | }
|
5303 | 5355 |
|
@@ -5326,6 +5378,12 @@ int main(int argc, char ** argv) {
|
5326 | 5378 | ctx_server.init();
|
5327 | 5379 | state.store(SERVER_STATE_READY);
|
5328 | 5380 |
|
| 5381 | +#ifdef LLAMA_CPP_SYSTEMD_SUPPORT |
| 5382 | + if (params.use_systemd) { |
| 5383 | + sd_notify(0, "READY=1"); |
| 5384 | + } |
| 5385 | +#endif |
| 5386 | + |
5329 | 5387 | LOG_INF("%s: model loaded\n", __func__);
|
5330 | 5388 |
|
5331 | 5389 | // print sample chat example to make it clear which template is used
|
@@ -5360,9 +5418,17 @@ int main(int argc, char ** argv) {
|
5360 | 5418 | SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
|
5361 | 5419 | #endif
|
5362 | 5420 |
|
5363 |
| - LOG_INF("%s: server is listening on %s - starting the main loop\n", __func__, |
5364 |
| - is_sock ? string_format("unix://%s", params.hostname.c_str()).c_str() : |
5365 |
| - string_format("http://%s:%d", params.hostname.c_str(), params.port).c_str()); |
| 5421 | +#ifdef LLAMA_CPP_SYSTEMD_SUPPORT |
| 5422 | + if (using_sd_socket) { |
| 5423 | + LOG_INF("%s: server is listening on systemd socket - starting the main loop\n", __func__); |
| 5424 | + } else { |
| 5425 | +#endif // LLAMA_CPP_SYSTEMD_SUPPORT |
| 5426 | + LOG_INF("%s: server is listening on %s - starting the main loop\n", __func__, |
| 5427 | + is_sock ? string_format("unix://%s", params.hostname.c_str()).c_str() : |
| 5428 | + string_format("http://%s:%d", params.hostname.c_str(), params.port).c_str()); |
| 5429 | +#ifdef LLAMA_CPP_SYSTEMD_SUPPORT |
| 5430 | + } |
| 5431 | +#endif // LLAMA_CPP_SYSTEMD_SUPPORT |
5366 | 5432 |
|
5367 | 5433 | // this call blocks the main thread until queue_tasks.terminate() is called
|
5368 | 5434 | ctx_server.queue_tasks.start_loop();
|
|
0 commit comments