Skip to content

Commit a3e9807

Browse files
committed
Add support for hidden streaming.
1 parent b49b8c9 commit a3e9807

File tree

3 files changed

+41
-4
lines changed

3 files changed

+41
-4
lines changed

llms_wrapper/llms.py

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
"max_output_tokens",
4343
"max_input_tokens",
4444
"use_phoenix",
45+
"via_streaming",
4546
"min_delay", # minimum delay between queries for that model
4647
]
4748

@@ -641,6 +642,7 @@ def query(
641642
debug=False,
642643
litellm_debug=None,
643644
stream=False,
645+
via_stream=False,
644646
recursive_call_info: Optional[Dict[str, any]] = None,
645647
**kwargs,
646648
) -> Dict[str, any]:
@@ -657,8 +659,10 @@ def query(
657659
return_response: whether or not the complete reponse should get returned
658660
debug: if True, emits debug messages to aid development and debugging
659661
litellm_debug: if True, litellm debug logging is enabled, if False, disabled, if None, use debug setting
660-
stream: if True, the returned object containst the stream that can be iterated over. Streaming
662+
stream: if True, the returned object contains the stream that can be iterated over. Streaming
661663
may not work for all models.
664+
via_stream: if True, ignores the stream parameters, the response data is retrieved internally via streaming.
665+
This may be useful if the non-streaming response keeps timing out.
662666
recursive_call_info: internal use only
663667
kwargs: any additional keyword arguments to pass on to the LLM
664668
@@ -708,7 +712,10 @@ def query(
708712
fmap = toolnames2funcs(tools)
709713
else:
710714
fmap = {}
711-
if stream:
715+
if via_stream:
716+
# TODO: check if model supports streaming
717+
completion_kwargs["stream"] = True
718+
elif stream:
712719
# TODO: check if model supports streaming
713720
# if streaming is enabled, we always return the original response
714721
return_response = True
@@ -743,7 +750,36 @@ def query(
743750
model=llm["llm"],
744751
messages=messages,
745752
**completion_kwargs)
746-
if stream:
753+
if via_stream:
754+
# retrieve the response using streaming, return once we have everything
755+
try:
756+
answer = ""
757+
for chunk in response:
758+
choice0 = chunk["choices"][0]
759+
if choice0.finish_reason == "stop":
760+
logger.debug(f"DEBUG: streaming got stop. Chunk {chunk['index']}: {chunk['value']}")
761+
break
762+
content = choice0["delta"].get("content", "")
763+
logger.debug(f"DEBUG: streaming content: {content}")
764+
answer += content
765+
answer += content
766+
if return_response:
767+
ret["response"] = response
768+
ret["cost"] = None
769+
ret["elapsed_time"] = time.time() - start
770+
ret["ok"] = True
771+
ret["error"] = ""
772+
return ret
773+
except Exception as e:
774+
tb = traceback.extract_tb(e.__traceback__)
775+
filename, lineno, funcname, text = tb[-1]
776+
ret["error"] = str(e) + f" in {filename}:{lineno} {funcname}"
777+
if debug:
778+
logger.error(f"Returning error: {e}")
779+
ret["answer"] = ""
780+
ret["ok"] = False
781+
return ret
782+
elif stream:
747783
def chunk_generator(model_generator, retobj):
748784
try:
749785
for chunk in model_generator:

llms_wrapper/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
import importlib.metadata
2-
__version__ = "0.5.2"
2+
__version__ = "0.5.3"
33

test-chatbot-config.hjson

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
api_key_env: MY_MISTRAL_API_KEY
7373
alias: mistral-large
7474
temperature: 0
75+
via_streaming: true
7576
}
7677
{
7778
llm: anthropic/claude-sonnet-4-20250514

0 commit comments

Comments
 (0)