Skip to content

Commit bcfaedb

Browse files
committed
Updated Documentation and Documentation browser
Source and Segments now use typing.Annotated to document individual parameters rather than putting it in the code comments directly. The documentation browswer takes advantage of this to produce cleaner parameter docs.
1 parent dfdb3e3 commit bcfaedb

22 files changed

+417
-598
lines changed

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@
1212
- changed asFunction to as_function
1313
- Added a check and thow an exception in simplevectordb if the user has clustered and then tries to use cosine for search
1414
- Added a plugin system so it is easer for external whl files to add commands to chatterlang.
15-
- Refactored the documentation system to pull from the registry in real time. This ensures that plugin commands are
15+
- Refactored the documentation system.
16+
- pulls from the registry in real time. This ensures that plugin commands are
1617
included in the documentation system. It also reduces potential problems from bad parsing of source code.
18+
- Pulls "Annotated" typing from parameter names to create the Parameters section of the documentation.
19+
Makes for cleaner, more consistently up to date documentation. The use of Annotated is optional.
1720
- Updated **isIn** and **isNotIn** to function list **isTrue** so that they need no always be filters.
1821

1922
## 0.8.1

src/talkpipe/app/chatterlang_reference_browser.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,36 @@ def _load_component_from_info(self, component_info):
6565
try:
6666
# Convert parameters from ParamSpec list to dict for browser compatibility
6767
parameters = {}
68+
69+
# First pass: calculate max widths for alignment
70+
max_name_width = 0
71+
max_type_width = 0
72+
max_default_width = 0
73+
74+
for param in component_info.parameters:
75+
max_name_width = max(max_name_width, len(param.name))
76+
if param.annotation:
77+
max_type_width = max(max_type_width, len(str(param.annotation)))
78+
if param.default:
79+
max_default_width = max(max_default_width, len(str(param.default)))
80+
81+
# Second pass: format with proper alignment
6882
for param in component_info.parameters:
69-
param_str = param.name
83+
param_str = param.name.ljust(max_name_width)
84+
7085
if param.annotation:
71-
param_str += f": {param.annotation}"
86+
param_str += f": {str(param.annotation).ljust(max_type_width)}"
87+
elif max_type_width > 0: # Add spacing even if no type for this param
88+
param_str += f" {' ' * max_type_width}"
89+
7290
if param.default:
73-
param_str += f" = {param.default}"
91+
param_str += f" = {str(param.default).ljust(max_default_width)}"
92+
elif max_default_width > 0: # Add spacing even if no default for this param
93+
param_str += f" {' ' * max_default_width}"
94+
95+
if param.description:
96+
param_str += f" // {param.description}"
97+
7498
parameters[param.name] = param_str
7599

76100
# Create component

src/talkpipe/app/chatterlang_serve.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
Receives JSON data via HTTP and processes it with a configurable function
44
Multi-user support with session isolation
55
"""
6-
from typing import Union
6+
from typing import Union, Annotated, Optional
77
import logging
88
import argparse
99
import yaml
@@ -1697,9 +1697,9 @@ def load_form_config(config_path: str) -> Dict[str, Any]:
16971697
class ChatterlangServerSegment(AbstractSource):
16981698
"""Segment for receiving JSON data via FastAPI with configurable form"""
16991699

1700-
def __init__(self, port: Union[int,str] = 9999, host: str = "localhost",
1701-
api_key: str = None, require_auth: bool = False,
1702-
form_config: Union[str, Dict[str, Any]] = None):
1700+
def __init__(self, port: Annotated[Union[int,str], "Port number for the server"] = 9999, host: Annotated[str, "Host address to bind to"] = "localhost",
1701+
api_key: Annotated[Optional[str], "API key for authentication"] = None, require_auth: Annotated[bool, "Whether to require authentication"] = False,
1702+
form_config: Annotated[Union[str, Dict[str, Any], None], "Form configuration as dict, config variable, or file path"] = None):
17031703
super().__init__()
17041704
self.port = int(port)
17051705
self.host = host

src/talkpipe/data/email.py

Lines changed: 18 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from email.mime.text import MIMEText
88
from email.mime.multipart import MIMEMultipart
99
from email.header import decode_header
10+
from typing import Annotated
1011
from talkpipe.pipe import core
1112
from talkpipe.chatterlang import registry
1213
from talkpipe.util.config import parse_key_value_str
@@ -140,21 +141,18 @@ def item_to_text(item, body_fields):
140141

141142
@registry.register_segment("sendEmail")
142143
@core.segment(subject_field=None, body_fields=None, sender_email=None, recipient_email=None)
143-
def sendEmail(items, subject_field, body_fields, sender_email, recipient_email, smtp_server=None, port=587):
144-
"""
145-
Send emails for each item in the input iterable using SMTP.
144+
def sendEmail(items,
145+
subject_field: Annotated[str, "Field name in the item to use as email subject"],
146+
body_fields: Annotated[str, "Comma-separated list of field names to include in email body"],
147+
sender_email: Annotated[str, "Sender's email address. If None, uses config value"],
148+
recipient_email: Annotated[str, "Recipient's email address. If None, uses config value"],
149+
smtp_server: Annotated[str, "SMTP server address. Defaults to 'smtp.gmail.com'"] = None,
150+
port: Annotated[int, "SMTP server port"] = 587):
151+
"""Send emails for each item in the input iterable using SMTP.
146152
147153
This function processes a list of items and sends an email for each one, using the specified
148154
fields for subject and body content. It supports both HTML and plain text email formats.
149155
150-
Args:
151-
subject_field (str): Field name in the item to use as email subject
152-
body_fields (list[str]): List of field names to include in email body
153-
sender_email (str, optional): Sender's email address. If None, uses config value
154-
recipient_email (str, optional): Recipient's email address. If None, uses config value
155-
smtp_server (str, optional): SMTP server address. Defaults to 'smtp.gmail.com'
156-
port (int, optional): SMTP server port. Defaults to 587
157-
158156
Yields:
159157
item: Returns each processed item after sending its corresponding email
160158
@@ -389,24 +387,19 @@ def fetch_emails(
389387

390388
@registry.register_source("readEmail")
391389
@core.source(poll_interval_minutes=10, folder='INBOX', mark_as_read=True, limit=100, unseen_only=True)
392-
def readEmail(poll_interval_minutes=10, folder='INBOX', mark_as_read=True, limit=100, unseen_only=True,
393-
imap_server=None, email_address=None, password=None):
394-
"""
395-
A source that monitors an email inbox and yields new unread emails.
390+
def readEmail(poll_interval_minutes: Annotated[int, "Minutes between email checks"] = 10,
391+
folder: Annotated[str, "Mailbox folder to check"] = 'INBOX',
392+
mark_as_read: Annotated[bool, "Whether to mark emails as read"] = True,
393+
limit: Annotated[int, "Maximum number of emails to fetch per check. If -1, fetch all"] = 100,
394+
unseen_only: Annotated[bool, "Whether to only fetch unseen emails"] = True,
395+
imap_server: Annotated[str, "IMAP server address. If None, uses config"] = None,
396+
email_address: Annotated[str, "Email address. If None, uses config"] = None,
397+
password: Annotated[str, "Password. If None, uses config"] = None):
398+
"""A source that monitors an email inbox and yields new unread emails.
396399
397400
This source periodically checks for new unread emails, marks them as read,
398401
and yields their content and metadata. It connects using IMAP and can be
399402
configured to poll at specific intervals.
400-
401-
Args:
402-
poll_interval_minutes (int, optional): Minutes between email checks. Defaults to 10.
403-
folder (str, optional): Mailbox folder to check. Defaults to 'INBOX'.
404-
mark_as_read (bool, optional): Whether to mark emails as read. Defaults to True.
405-
limit (int, optional): Maximum number of emails to fetch per check. Defaults to 100.
406-
if -1, fetch all.
407-
imap_server (str, optional): IMAP server address. If None, uses config.
408-
email_address (str, optional): Email address. If None, uses config.
409-
password (str, optional): Password. If None, uses config.
410403
411404
Yields:
412405
dict: Email metadata and content including:

src/talkpipe/data/extraction.py

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""This module contains segments for extracting text from files."""
22

3-
from typing import Union, Iterable
3+
from typing import Union, Iterable, Annotated
44
from pathlib import PosixPath
55
from docx import Document
66
from talkpipe.pipe.core import segment, AbstractSegment, field_segment
@@ -12,17 +12,13 @@
1212

1313
@register_segment("readtxt")
1414
@field_segment()
15-
def readtxt(file_path):
15+
def readtxt(file_path: Annotated[str, "Path to the text file to read"]):
1616
"""
1717
Reads text files from given file paths or directories and yields their contents.
1818
1919
If an item is a directory, it will scan the directory (recursively by default)
2020
and read all .txt files.
2121
22-
Args:
23-
items (Iterable[str]): Iterable of file or directory paths.
24-
recursive (bool): Whether to scan directories recursively for .txt files.
25-
2622
Yields:
2723
str: The contents of each text file.
2824
@@ -47,16 +43,12 @@ def readtxt(file_path):
4743

4844
@register_segment("readdocx")
4945
@field_segment()
50-
def readdocx(file_path):
46+
def readdocx(file_path: Annotated[str, "Path to the .docx file to read"]):
5147
"""Read and extract text from Microsoft Word (.docx) files.
5248
5349
If an item is a directory, it will scan the directory (recursively by default)
5450
and read all .docx files.
5551
56-
Args:
57-
items (Iterable[str]): Iterable of file or directory paths.
58-
recursive (bool): Whether to scan directories recursively for .docx files.
59-
6052
Yields:
6153
str: The full text content of each document with paragraphs joined by spaces
6254
@@ -84,19 +76,13 @@ def readdocx(file_path):
8476

8577
@register_segment("listFiles")
8678
@segment()
87-
def listFiles(patterns: Iterable[str], full_path: bool = True, files_only: bool = False):
79+
def listFiles(patterns: Annotated[Iterable[str], "Iterable of file patterns or paths (supports wildcards like *, ?, [])"], full_path: Annotated[bool, "Whether to yield full absolute paths or just filenames"] = True, files_only: Annotated[bool, "Whether to include only files (excluding directories)"] = False):
8880
"""
8981
Lists files matching given patterns (potentially with wildcards) and yields their paths.
9082
91-
Args:
92-
patterns (Iterable[str]): Iterable of file patterns or paths (supports wildcards like *, ?, []).
93-
full_path (bool): Whether to yield full absolute paths or just filenames.
94-
files_only (bool): Whether to include only files (excluding directories).
95-
9683
Yields:
9784
str: File paths (absolute if full_path=True, filenames if full_path=False).
9885
99-
10086
Raises:
10187
None: This function does not raise exceptions for non-matching patterns.
10288
"""

src/talkpipe/data/html.py

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Utility functions for processing HTML content"""
22

3+
from typing import Optional, Annotated
34
import logging
45
import re
56
import gzip
@@ -71,20 +72,14 @@ def htmlToText(html, cleanText=True):
7172

7273
@register_segment("htmlToText")
7374
@core.field_segment()
74-
def htmlToTextSegment(raw, cleanText=True):
75+
def htmlToTextSegment(raw: Annotated[str, "The raw HTML content to be converted"], cleanText: Annotated[bool, "Whether to clean and normalize the output text"] = True):
7576
"""
7677
Converts HTML content to text segment.
7778
7879
This function takes HTML content and converts it to plain text format.
7980
If cleanText is enabled, the resulting text will also be cleaned so it
8081
tries to retain only the main body content.
8182
82-
Args:
83-
raw (str): The raw HTML content to be converted
84-
cleanText (bool, optional): Whether to clean and normalize the output text. Defaults to True.
85-
field (str): The field name to be used for the segment. If None, assuming the incoming item is html.
86-
set_as (str): The name of the field to append the text to. If None, just pass on the cleaned text.
87-
8883
Returns:
8984
str: The extracted text content from the HTML
9085
@@ -226,19 +221,16 @@ def downloadURL(url, fail_on_error=True, user_agent=None, timeout=10):
226221

227222
@register_segment("downloadURL")
228223
@core.field_segment()
229-
def downloadURLSegment(item, fail_on_error=True, timeout=10, user_agent=None):
224+
def downloadURLSegment(item: Annotated[str, "The URL to download"],
225+
fail_on_error: Annotated[bool, "If True, raises exceptions on download errors. If False, returns None on errors"] = True,
226+
timeout: Annotated[int, "The timeout in seconds for the download request"] = 10,
227+
user_agent: Annotated[Optional[str], "User agent string to use for the request"] = None):
230228
"""Download a URL segment and return its content.
231229
232230
This function is a wrapper around downloadURL that specifically handles URL segments.
233231
It attempts to download content from the specified URL with configurable error handling
234232
and timeout settings.
235233
236-
Args:
237-
fail_on_error (bool, optional): If True, raises exceptions on download errors.
238-
If False, returns None on errors. Defaults to True.
239-
timeout (int, optional): The timeout in seconds for the download request.
240-
Defaults to 10 seconds.
241-
242234
Returns:
243235
bytes|None: The downloaded content as bytes if successful, None if fail_on_error
244236
is False and an error occurs.

src/talkpipe/data/mongo.py

Lines changed: 18 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import logging
77
import json
88
import re
9-
from typing import Iterable, Iterator, Optional, Union, Dict, Any
9+
from typing import Iterable, Iterator, Optional, Union, Dict, Any, Annotated
1010
from pymongo import MongoClient
1111
from pymongo.collection import Collection
1212
from pymongo.database import Database
@@ -26,35 +26,18 @@ class MongoInsert(core.AbstractSegment):
2626
For each item received, this segment inserts it into the specified MongoDB collection
2727
and then yields the item back to the pipeline. This allows for both persisting data
2828
and continuing to process it in subsequent pipeline stages.
29-
30-
Args:
31-
connection_string (str, optional): MongoDB connection string. If not provided,
32-
will attempt to get from config using the key "mongo_connection_string".
33-
database (str): Name of the MongoDB database to use.
34-
collection (str): Name of the MongoDB collection to use.
35-
field (str, optional): Field to extract from each item for insertion.
36-
If not provided, inserts the entire item. Default is "_".
37-
fields (str, optional): Comma-separated list of fields to extract and include in the
38-
document, in the format "field1:name1,field2:name2". If provided, this creates a
39-
new document with the specified fields. Cannot be used with 'field' parameter.
40-
set_as (str, optional): If provided, adds the MongoDB insertion result
41-
to the item using this field name. Default is None.
42-
create_index (str, optional): If provided, creates an index on this field.
43-
Default is None.
44-
unique_index (bool, optional): If True and create_index is provided,
45-
creates a unique index. Default is False.
4629
"""
4730

4831
def __init__(
4932
self,
50-
connection_string: Optional[str] = None,
51-
database: Optional[str] = None,
52-
collection: Optional[str] = None,
53-
field: str = "_",
54-
fields: Optional[str] = None,
55-
set_as: Optional[str] = None,
56-
create_index: Optional[str] = None,
57-
unique_index: bool = False
33+
connection_string: Annotated[Optional[str], "MongoDB connection string"] = None,
34+
database: Annotated[Optional[str], "Name of the MongoDB database to use"] = None,
35+
collection: Annotated[Optional[str], "Name of the MongoDB collection to use"] = None,
36+
field: Annotated[str, "Field to extract from each item for insertion"] = "_",
37+
fields: Annotated[Optional[str], "Comma-separated list of fields to extract"] = None,
38+
set_as: Annotated[Optional[str], "Field name to add MongoDB insertion result to item"] = None,
39+
create_index: Annotated[Optional[str], "Field to create an index on"] = None,
40+
unique_index: Annotated[bool, "Whether to create a unique index"] = False
5841
):
5942
super().__init__()
6043

@@ -125,9 +108,6 @@ def _close_connection(self):
125108
def transform(self, input_iter: Iterable[Any]) -> Iterator[Any]:
126109
"""Insert each item into the MongoDB collection.
127110
128-
Args:
129-
input_iter: Iterable of items to process.
130-
131111
Yields:
132112
Each item from the input stream after inserting it into MongoDB.
133113
If set_as is specified, the MongoDB result is added to the item.
@@ -200,35 +180,19 @@ class MongoSearch(core.AbstractSegment):
200180
201181
This segment performs a query against a MongoDB collection and yields
202182
the matching documents one by one as they are returned from the database.
203-
204-
Args:
205-
field(str): the field in the incoming item to use as a query. Defaults is "_"
206-
connection_string (str, optional): MongoDB connection string. If not provided,
207-
will attempt to get from config using the key "mongo_connection_string".
208-
database (str): Name of the MongoDB database to use.
209-
collection (str): Name of the MongoDB collection to use.
210-
project (str, optional): JSON string defining the projection for returned documents.
211-
Default is None (returns all fields).
212-
sort (str, optional): JSON string defining the sort order. Default is None.
213-
limit (int, optional): Maximum number of results to return per query. Default is 0 (no limit).
214-
skip (int, optional): Number of documents to skip. Default is 0.
215-
set_as (str, optional): If provided, adds the MongoDB results to the incoming item
216-
using this field name. If not provided, the results themselves are yielded.
217-
as_list (bool, optional): If True and set_as is provided, all results are collected
218-
into a list and appended to the incoming item. Default is False.
219183
"""
220184

221185
def __init__(
222186
self,
223-
field: str = "_",
224-
connection_string: Optional[str] = None,
225-
database: Optional[str] = None,
226-
collection: Optional[str] = None,
227-
project: Optional[str] = None,
228-
sort: Optional[str] = None,
229-
limit: int = 0,
230-
skip: int = 0,
231-
set_as: Optional[str] = None
187+
field: Annotated[str, "Field in the incoming item to use as a query"] = "_",
188+
connection_string: Annotated[Optional[str], "MongoDB connection string"] = None,
189+
database: Annotated[Optional[str], "Name of the MongoDB database to use"] = None,
190+
collection: Annotated[Optional[str], "Name of the MongoDB collection to use"] = None,
191+
project: Annotated[Optional[str], "JSON string defining projection for returned documents"] = None,
192+
sort: Annotated[Optional[str], "JSON string defining sort order"] = None,
193+
limit: Annotated[int, "Maximum number of results to return per query"] = 0,
194+
skip: Annotated[int, "Number of documents to skip"] = 0,
195+
set_as: Annotated[Optional[str], "Field name to add MongoDB results to incoming item"] = None
232196
):
233197
super().__init__()
234198

@@ -283,9 +247,6 @@ def _close_connection(self):
283247
def transform(self, input_iter: Iterable[Any]) -> Iterator[Any]:
284248
"""Search the MongoDB collection based on query parameters.
285249
286-
Args:
287-
input_iter: Iterable of items to process.
288-
289250
Yields:
290251
If set_as is specified, yields each input item with results appended.
291252
Otherwise, yields the MongoDB results directly.

0 commit comments

Comments
 (0)