Skip to content

Commit 7c973fe

Browse files
committed
Fix logging and via_streaming, bump version
1 parent 9cd6109 commit 7c973fe

File tree

6 files changed

+434
-36
lines changed

6 files changed

+434
-36
lines changed

conda-create.sourceme

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ conda create -y -n llms_wrapper python=3.11
22

33
conda activate llms_wrapper
44

5-
pip install -e .
5+
pip install -e .[all]
66

77
python -m ipykernel install --user --name=llms_wrapper
88

llms_wrapper/llms.py

Lines changed: 55 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
from litellm import completion, completion_cost, token_counter
2121
from litellm.utils import get_model_info, get_supported_openai_params, supports_response_schema
2222
from litellm.utils import supports_function_calling, supports_parallel_function_calling
23+
from litellm._logging import _enable_debugging as litellm_enable_debugging
24+
from litellm._logging import _disable_debugging as litellm_disable_debugging
2325
from llms_wrapper.utils import dict_except
2426
from llms_wrapper.model_list import model_list
2527

@@ -642,8 +644,8 @@ def query(
642644
debug=False,
643645
litellm_debug=None,
644646
stream=False,
645-
via_stream=False,
646-
recursive_call_info: Optional[Dict[str, any]] = None,
647+
via_streaming=False,
648+
recursive_call_info: Optional[Dict[str, any]] = None,
647649
**kwargs,
648650
) -> Dict[str, any]:
649651
"""
@@ -661,7 +663,7 @@ def query(
661663
litellm_debug: if True, litellm debug logging is enabled, if False, disabled, if None, use debug setting
662664
stream: if True, the returned object contains the stream that can be iterated over. Streaming
663665
may not work for all models.
664-
via_stream: if True, ignores the stream parameters, the response data is retrieved internally via streaming.
666+
via_streaming: if True, ignores the stream parameters, the response data is retrieved internally via streaming.
665667
This may be useful if the non-streaming response keeps timing out.
666668
recursive_call_info: internal use only
667669
kwargs: any additional keyword arguments to pass on to the LLM
@@ -672,12 +674,30 @@ def query(
672674
otherwise answer contains the response and error is the empty string.
673675
The boolean key "ok" is True if there is no error, False otherwise.
674676
"""
677+
def cleaned_args(args: dict):
678+
"""If there is an API key in the dict, censor it"""
679+
args = args.copy()
680+
if "api_key" in args:
681+
args["api_key"] = "***"
682+
return args
675683
if self.debug:
676684
debug = True
677685
if litellm_debug is None and debug or litellm_debug:
678686
# litellm.set_verbose = True ## deprecated!
679687
os.environ['LITELLM_LOG'] = 'DEBUG'
688+
litellm_enable_debugging()
689+
litellm._turn_on_debug()
690+
else:
691+
# make sure we turn off debugging if it is still on from a previous call
692+
litellm_disable_debugging()
693+
os.environ['LITELLM_LOG'] = 'INFO'
680694
llm = self.llms[llmalias].config
695+
logger.debug(f"llm config: {cleaned_args(llm)}")
696+
# allow to specify via_streaming and stream in the llm config as well, the value in the config will override the call
697+
if "via_streaming" in llm and llm["via_streaming"]:
698+
via_streaming = True
699+
if "stream" in llm and llm["stream"]:
700+
stream = True
681701
if not messages:
682702
raise ValueError(f"Error: No messages to send to the LLM: {llmalias}, messages: {messages}")
683703
if debug:
@@ -688,6 +708,8 @@ def query(
688708
KNOWN_LLM_CONFIG_FIELDS,
689709
ignore_underscored=True,
690710
)
711+
logger.debug(f"Options: via_streaming: {via_streaming}, stream: {stream}")
712+
logger.debug(f"Initial completion kwargs: {cleaned_args(completion_kwargs)}")
691713
if recursive_call_info is None:
692714
recursive_call_info = {}
693715
if llm.get("api_key"):
@@ -712,22 +734,24 @@ def query(
712734
fmap = toolnames2funcs(tools)
713735
else:
714736
fmap = {}
715-
if via_stream:
737+
if via_streaming:
716738
# TODO: check if model supports streaming
717739
completion_kwargs["stream"] = True
740+
logger.debug(f"completion kwargs after detecting via_streaming: {cleaned_args(completion_kwargs)}")
718741
elif stream:
719742
# TODO: check if model supports streaming
720743
# if streaming is enabled, we always return the original response
721744
return_response = True
722745
completion_kwargs["stream"] = True
746+
logger.debug(f"completion kwargs after detecting stream: {cleaned_args(completion_kwargs)}")
723747
ret = {}
724748
# before adding the kwargs, save the recursive_call_info and remove it from kwargs
725749
if debug:
726-
print(f"DEBUG: Received recursive call info: {recursive_call_info}")
750+
logger.debug(f"Received recursive call info: {recursive_call_info}")
727751
if kwargs:
728752
completion_kwargs.update(dict_except(kwargs, KNOWN_LLM_CONFIG_FIELDS, ignore_underscored=True))
729753
if debug:
730-
print(f"DEBUG: Calling completion with kwargs {completion_kwargs}")
754+
logger.debug(f"calling query with completion kwargs: {cleaned_args(completion_kwargs)}")
731755
# if we have min_delay set, we look at the _last_request_time for the LLM and caclulate the time
732756
# to wait until we can send the next request and then just wait
733757
min_delay = llm.get("min_delay", kwargs.get("min_delay", 0.0))
@@ -749,23 +773,29 @@ def query(
749773
response = litellm.completion(
750774
model=llm["llm"],
751775
messages=messages,
752-
drop_params=True,
776+
drop_params=False, # we do not drop, so typos in the query call can be detected easier!
753777
**completion_kwargs)
754-
if via_stream:
778+
logger.debug(f"Received response from litellm")
779+
if via_streaming:
755780
# retrieve the response using streaming, return once we have everything
756781
try:
757782
answer = ""
783+
logger.debug(f"Retrieving chunks ...")
784+
n_chunks = 0
758785
for chunk in response:
759786
choice0 = chunk["choices"][0]
760787
if choice0.finish_reason == "stop":
761-
logger.debug(f"DEBUG: streaming got stop. Chunk {chunk['index']}: {chunk['value']}")
788+
logger.debug(f"Streaming got stop. Chunk {chunk}")
762789
break
790+
n_chunks += 1
763791
content = choice0["delta"].get("content", "")
764-
logger.debug(f"DEBUG: streaming content: {content}")
792+
logger.debug(f"Got streaming content: {content}")
765793
answer += content
766794
answer += content
767795
if return_response:
768796
ret["response"] = response
797+
ret["answer"] = answer
798+
ret["n_chunks"] = n_chunks
769799
ret["cost"] = None
770800
ret["elapsed_time"] = time.time() - start
771801
ret["ok"] = True
@@ -808,6 +838,7 @@ def chunk_generator(model_generator, retobj):
808838
logger.debug(f"Full Response: {response}")
809839
llm["_elapsed_time"] += elapsed
810840
ret["elapsed_time"] = elapsed
841+
ret["n_chunks"] = 1
811842
if return_response:
812843
ret["response"] = response
813844
# prevent the api key from leaking out
@@ -825,7 +856,7 @@ def chunk_generator(model_generator, retobj):
825856
messages=messages,
826857
)
827858
if debug:
828-
print(f"DEBUG: cost for this call {ret['cost']}")
859+
logger.debug(f"Cost for this call {ret['cost']}")
829860
except Exception as e:
830861
logger.debug(f"Error in completion_cost for model {llm['llm']}: {e}")
831862
ret["cost"] = 0.0
@@ -839,7 +870,7 @@ def chunk_generator(model_generator, retobj):
839870
if recursive_call_info.get("cost") is not None:
840871
ret["cost"] += recursive_call_info["cost"]
841872
if debug:
842-
print(f"DEBUG: cost for this and previous calls {ret['cost']}")
873+
logger.debug(f"Cost for this and previous calls {ret['cost']}")
843874
if recursive_call_info.get("n_completion_tokens") is not None:
844875
ret["n_completion_tokens"] += recursive_call_info["n_completion_tokens"]
845876
if recursive_call_info.get("n_prompt_tokens") is not None:
@@ -861,7 +892,7 @@ def chunk_generator(model_generator, retobj):
861892
# TODO: if feasable handle all tool calling here or in a separate method which does
862893
# all the tool calling steps (up to a specified maximum).
863894
if debug:
864-
print(f"DEBUG: checking for tool_calls: {response_message}, have tools: {tools is not None}")
895+
logger.debug(f"Checking for tool_calls: {response_message}, have tools: {tools is not None}")
865896
if tools is not None:
866897
# TODO: if streaming is enabled we need to gather the complete response before
867898
# we can process the tool calls
@@ -872,17 +903,17 @@ def chunk_generator(model_generator, retobj):
872903
if stream:
873904
raise ValueError("Error: streaming is not supported for tool calls yet")
874905
if debug:
875-
print(f"DEBUG: got {len(tool_calls)} tool calls:")
906+
logger.debug(f"Got {len(tool_calls)} tool calls:")
876907
for tool_call in tool_calls:
877-
print(f"DEBUG: {tool_call}")
908+
logger.debug(f"Tool call: {tool_call}")
878909
if len(tool_calls) > 0: # not an empty list
879910
if debug:
880-
print(f"DEBUG: appending response message: {response_message}")
911+
logger.debug(f"Appending response message: {response_message}")
881912
messages.append(response_message)
882913
for tool_call in tool_calls:
883914
function_name = tool_call.function.name
884915
if debug:
885-
print(f"DEBUG: tool call {function_name}")
916+
logger.debug(f"Tool call {function_name}")
886917
fun2call = fmap.get(function_name)
887918
if fun2call is None:
888919
ret["error"] = f"Unknown tooling function name: {function_name}"
@@ -892,15 +923,15 @@ def chunk_generator(model_generator, retobj):
892923
function_args = json.loads(tool_call.function.arguments)
893924
try:
894925
if debug:
895-
print(f"DEBUG: calling {function_name} with args {function_args}")
926+
logger.debug(f"Calling {function_name} with args {function_args}")
896927
function_response = fun2call(**function_args)
897928
if debug:
898-
print(f"DEBUG: got response {function_response}")
929+
logger.debug(f"Got response {function_response}")
899930
except Exception as e:
900931
tb = traceback.extract_tb(e.__traceback__)
901932
filename, lineno, funcname, text = tb[-1]
902933
if debug:
903-
print(f"DEBUG: function call got error {e}")
934+
logger.debug(f"Function call got error {e}")
904935
ret["error"] = f"Error executing tool function {function_name}: {str(e)} in {filename}:{lineno} {funcname}"
905936
if debug:
906937
logger.error(f"Returning error: {e}")
@@ -914,10 +945,10 @@ def chunk_generator(model_generator, retobj):
914945
content=json.dumps(function_response)))
915946
# recursively call query
916947
if debug:
917-
print(f"DEBUG: recursively calling query with messages:")
948+
logger.debug(f"Recursively calling query with messages:")
918949
for idx, msg in enumerate(messages):
919-
print(f"DEBUG: Message {idx}: {msg}")
920-
print(f"DEBUG: recursively_call_info is {recursive_call_info}")
950+
logger.debug(f"Message {idx}: {msg}")
951+
logger.debug(f"Recursively_call_info is {recursive_call_info}")
921952
return self.query(
922953
llmalias,
923954
messages,
@@ -929,6 +960,7 @@ def chunk_generator(model_generator, retobj):
929960
recursive_call_info=recursive_call_info,
930961
**kwargs)
931962
except Exception as e:
963+
logger.debug(f"Exception in query from litellm: {e}")
932964
tb = traceback.extract_tb(e.__traceback__)
933965
filename, lineno, funcname, text = tb[-1]
934966
ret["error"] = str(e) + f" in {filename}:{lineno} {funcname}"

llms_wrapper/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
import importlib.metadata
2-
__version__ = "0.5.4"
2+
__version__ = "0.5.5"
33

notebooks/test-streaming.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@
189189
],
190190
"metadata": {
191191
"kernelspec": {
192-
"display_name": "llms_wrapper",
192+
"display_name": "Python 3 (ipykernel)",
193193
"language": "python",
194194
"name": "python3"
195195
},

0 commit comments

Comments
 (0)