sandialabs
diff --git a/‎CHANGELOG.md‎
Lines changed: 4 additions & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/talkpipe/app/chatterlang_reference_browser.py‎
Lines changed: 27 additions & 3 deletions b/‎src/talkpipe/app/chatterlang_reference_browser.py‎
Lines changed: 27 additions & 3 deletions
diff --git a/‎src/talkpipe/app/chatterlang_serve.py‎
Lines changed: 4 additions & 4 deletions b/‎src/talkpipe/app/chatterlang_serve.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/talkpipe/data/email.py‎
Lines changed: 18 additions & 25 deletions b/‎src/talkpipe/data/email.py‎
Lines changed: 18 additions & 25 deletions
diff --git a/‎src/talkpipe/data/extraction.py‎
Lines changed: 4 additions & 18 deletions b/‎src/talkpipe/data/extraction.py‎
Lines changed: 4 additions & 18 deletions
diff --git a/‎src/talkpipe/data/html.py‎
Lines changed: 6 additions & 14 deletions b/‎src/talkpipe/data/html.py‎
Lines changed: 6 additions & 14 deletions
diff --git a/‎src/talkpipe/data/mongo.py‎
Lines changed: 18 additions & 57 deletions b/‎src/talkpipe/data/mongo.py‎
Lines changed: 18 additions & 57 deletions
@@ -12,8 +12,11 @@
 - changed asFunction to as_function
 - Added a check and thow an exception in simplevectordb if the user has clustered and then tries to use cosine for search
 - Added a plugin system so it is easer for external whl files to add commands to chatterlang.
-- Refactored the documentation system to pull from the registry in real time.  This ensures that plugin commands are 
+- Refactored the documentation system.
+  - pulls from the registry in real time.  This ensures that plugin commands are 
   included in the documentation system.  It also reduces potential problems from bad parsing of source code.
+  - Pulls "Annotated" typing from parameter names to create the Parameters section of the documentation.
+  Makes for cleaner, more consistently up to date documentation.  The use of Annotated is optional.
 - Updated **isIn** and **isNotIn** to function list **isTrue** so that they need no always be filters.
 
 ## 0.8.1
 
@@ -65,12 +65,36 @@ def _load_component_from_info(self, component_info):
         try:
             # Convert parameters from ParamSpec list to dict for browser compatibility
             parameters = {}
+            
+            # First pass: calculate max widths for alignment
+            max_name_width = 0
+            max_type_width = 0
+            max_default_width = 0
+            
+            for param in component_info.parameters:
+                max_name_width = max(max_name_width, len(param.name))
+                if param.annotation:
+                    max_type_width = max(max_type_width, len(str(param.annotation)))
+                if param.default:
+                    max_default_width = max(max_default_width, len(str(param.default)))
+            
+            # Second pass: format with proper alignment
             for param in component_info.parameters:
-                param_str = param.name
+                param_str = param.name.ljust(max_name_width)
+                
                 if param.annotation:
-                    param_str += f": {param.annotation}"
+                    param_str += f": {str(param.annotation).ljust(max_type_width)}"
+                elif max_type_width > 0:  # Add spacing even if no type for this param
+                    param_str += f"  {' ' * max_type_width}"
+                
                 if param.default:
-                    param_str += f" = {param.default}"
+                    param_str += f" = {str(param.default).ljust(max_default_width)}"
+                elif max_default_width > 0:  # Add spacing even if no default for this param
+                    param_str += f"   {' ' * max_default_width}"
+                
+                if param.description:
+                    param_str += f"  // {param.description}"
+                
                 parameters[param.name] = param_str
 
             # Create component
 
@@ -3,7 +3,7 @@
 Receives JSON data via HTTP and processes it with a configurable function
 Multi-user support with session isolation
 """
-from typing import Union
+from typing import Union, Annotated, Optional
 import logging
 import argparse
 import yaml
@@ -1697,9 +1697,9 @@ def load_form_config(config_path: str) -> Dict[str, Any]:
 class ChatterlangServerSegment(AbstractSource):
     """Segment for receiving JSON data via FastAPI with configurable form"""
 
-    def __init__(self, port: Union[int,str] = 9999, host: str = "localhost", 
-                 api_key: str = None, require_auth: bool = False,
-                 form_config: Union[str, Dict[str, Any]] = None):
+    def __init__(self, port: Annotated[Union[int,str], "Port number for the server"] = 9999, host: Annotated[str, "Host address to bind to"] = "localhost", 
+                 api_key: Annotated[Optional[str], "API key for authentication"] = None, require_auth: Annotated[bool, "Whether to require authentication"] = False,
+                 form_config: Annotated[Union[str, Dict[str, Any], None], "Form configuration as dict, config variable, or file path"] = None):
         super().__init__()
         self.port = int(port)
         self.host = host
 
@@ -7,6 +7,7 @@
 from email.mime.text import MIMEText
 from email.mime.multipart import MIMEMultipart
 from email.header import decode_header
+from typing import Annotated
 from talkpipe.pipe import core
 from talkpipe.chatterlang import registry
 from talkpipe.util.config import parse_key_value_str
@@ -140,21 +141,18 @@ def item_to_text(item, body_fields):
 
 @registry.register_segment("sendEmail")
 @core.segment(subject_field=None, body_fields=None, sender_email=None, recipient_email=None)
-def sendEmail(items, subject_field, body_fields, sender_email, recipient_email, smtp_server=None, port=587):
-    """
-    Send emails for each item in the input iterable using SMTP.
+def sendEmail(items, 
+              subject_field: Annotated[str, "Field name in the item to use as email subject"], 
+              body_fields: Annotated[str, "Comma-separated list of field names to include in email body"], 
+              sender_email: Annotated[str, "Sender's email address. If None, uses config value"], 
+              recipient_email: Annotated[str, "Recipient's email address. If None, uses config value"], 
+              smtp_server: Annotated[str, "SMTP server address. Defaults to 'smtp.gmail.com'"] = None, 
+              port: Annotated[int, "SMTP server port"] = 587):
+    """Send emails for each item in the input iterable using SMTP.
 
     This function processes a list of items and sends an email for each one, using the specified
     fields for subject and body content. It supports both HTML and plain text email formats.
 
-    Args:
-        subject_field (str): Field name in the item to use as email subject
-        body_fields (list[str]): List of field names to include in email body
-        sender_email (str, optional): Sender's email address. If None, uses config value
-        recipient_email (str, optional): Recipient's email address. If None, uses config value
-        smtp_server (str, optional): SMTP server address. Defaults to 'smtp.gmail.com'
-        port (int, optional): SMTP server port. Defaults to 587
-
     Yields:
         item: Returns each processed item after sending its corresponding email
 
@@ -389,24 +387,19 @@ def fetch_emails(
 
 @registry.register_source("readEmail")
 @core.source(poll_interval_minutes=10, folder='INBOX', mark_as_read=True, limit=100, unseen_only=True)
-def readEmail(poll_interval_minutes=10, folder='INBOX', mark_as_read=True, limit=100, unseen_only=True, 
-             imap_server=None, email_address=None, password=None):
-    """
-    A source that monitors an email inbox and yields new unread emails.
+def readEmail(poll_interval_minutes: Annotated[int, "Minutes between email checks"] = 10, 
+              folder: Annotated[str, "Mailbox folder to check"] = 'INBOX', 
+              mark_as_read: Annotated[bool, "Whether to mark emails as read"] = True, 
+              limit: Annotated[int, "Maximum number of emails to fetch per check. If -1, fetch all"] = 100, 
+              unseen_only: Annotated[bool, "Whether to only fetch unseen emails"] = True,
+              imap_server: Annotated[str, "IMAP server address. If None, uses config"] = None, 
+              email_address: Annotated[str, "Email address. If None, uses config"] = None, 
+              password: Annotated[str, "Password. If None, uses config"] = None):
+    """A source that monitors an email inbox and yields new unread emails.
     
     This source periodically checks for new unread emails, marks them as read,
     and yields their content and metadata. It connects using IMAP and can be
     configured to poll at specific intervals.
-    
-    Args:
-        poll_interval_minutes (int, optional): Minutes between email checks. Defaults to 10.
-        folder (str, optional): Mailbox folder to check. Defaults to 'INBOX'.
-        mark_as_read (bool, optional): Whether to mark emails as read. Defaults to True.
-        limit (int, optional): Maximum number of emails to fetch per check. Defaults to 100. 
-            if -1, fetch all.
-        imap_server (str, optional): IMAP server address. If None, uses config.
-        email_address (str, optional): Email address. If None, uses config.
-        password (str, optional): Password. If None, uses config.
         
     Yields:
         dict: Email metadata and content including:
 
@@ -1,6 +1,6 @@
 """This module contains segments for extracting text from files."""
 
-from typing import Union, Iterable
+from typing import Union, Iterable, Annotated
 from pathlib import PosixPath
 from docx import Document
 from talkpipe.pipe.core import segment, AbstractSegment, field_segment
@@ -12,17 +12,13 @@
 
 @register_segment("readtxt")
 @field_segment()
-def readtxt(file_path):
+def readtxt(file_path: Annotated[str, "Path to the text file to read"]):
     """
     Reads text files from given file paths or directories and yields their contents.
 
     If an item is a directory, it will scan the directory (recursively by default)
     and read all .txt files.
 
-    Args:
-        items (Iterable[str]): Iterable of file or directory paths.
-        recursive (bool): Whether to scan directories recursively for .txt files.
-
     Yields:
         str: The contents of each text file.
 
@@ -47,16 +43,12 @@ def readtxt(file_path):
 
 @register_segment("readdocx")
 @field_segment()
-def readdocx(file_path):
+def readdocx(file_path: Annotated[str, "Path to the .docx file to read"]):
     """Read and extract text from Microsoft Word (.docx) files.
 
     If an item is a directory, it will scan the directory (recursively by default)
     and read all .docx files.
 
-    Args:
-        items (Iterable[str]): Iterable of file or directory paths.
-        recursive (bool): Whether to scan directories recursively for .docx files.
-
     Yields:
         str: The full text content of each document with paragraphs joined by spaces
 
@@ -84,19 +76,13 @@ def readdocx(file_path):
 
 @register_segment("listFiles")
 @segment()
-def listFiles(patterns: Iterable[str], full_path: bool = True, files_only: bool = False):
+def listFiles(patterns: Annotated[Iterable[str], "Iterable of file patterns or paths (supports wildcards like *, ?, [])"], full_path: Annotated[bool, "Whether to yield full absolute paths or just filenames"] = True, files_only: Annotated[bool, "Whether to include only files (excluding directories)"] = False):
     """
     Lists files matching given patterns (potentially with wildcards) and yields their paths.
 
-    Args:
-        patterns (Iterable[str]): Iterable of file patterns or paths (supports wildcards like *, ?, []).
-        full_path (bool): Whether to yield full absolute paths or just filenames.
-        files_only (bool): Whether to include only files (excluding directories).
-
     Yields:
         str: File paths (absolute if full_path=True, filenames if full_path=False).
 
-
     Raises:
         None: This function does not raise exceptions for non-matching patterns.
     """
 
@@ -1,5 +1,6 @@
 """Utility functions for processing HTML content"""
 
+from typing import Optional, Annotated
 import logging
 import re
 import gzip
@@ -71,20 +72,14 @@ def htmlToText(html, cleanText=True):
 
 @register_segment("htmlToText")
 @core.field_segment()
-def htmlToTextSegment(raw, cleanText=True):
+def htmlToTextSegment(raw: Annotated[str, "The raw HTML content to be converted"], cleanText: Annotated[bool, "Whether to clean and normalize the output text"] = True):
     """
     Converts HTML content to text segment.
 
     This function takes HTML content and converts it to plain text format.
     If cleanText is enabled, the resulting text will also be cleaned so it 
     tries to retain only the main body content.
 
-    Args:
-        raw (str): The raw HTML content to be converted
-        cleanText (bool, optional): Whether to clean and normalize the output text. Defaults to True.
-        field (str): The field name to be used for the segment. If None, assuming the incoming item is html.
-        set_as (str): The name of the field to append the text to.  If None, just pass on the cleaned text.
-
     Returns:
         str: The extracted text content from the HTML
 
@@ -226,19 +221,16 @@ def downloadURL(url, fail_on_error=True, user_agent=None, timeout=10):
 
 @register_segment("downloadURL")
 @core.field_segment()
-def downloadURLSegment(item, fail_on_error=True, timeout=10, user_agent=None):
+def downloadURLSegment(item: Annotated[str, "The URL to download"], 
+                       fail_on_error: Annotated[bool, "If True, raises exceptions on download errors. If False, returns None on errors"] = True, 
+                       timeout: Annotated[int, "The timeout in seconds for the download request"] = 10, 
+                       user_agent: Annotated[Optional[str], "User agent string to use for the request"] = None):
     """Download a URL segment and return its content.
 
     This function is a wrapper around downloadURL that specifically handles URL segments.
     It attempts to download content from the specified URL with configurable error handling
     and timeout settings.
 
-    Args:
-        fail_on_error (bool, optional): If True, raises exceptions on download errors.
-            If False, returns None on errors. Defaults to True.
-        timeout (int, optional): The timeout in seconds for the download request. 
-            Defaults to 10 seconds.
-
     Returns:
         bytes|None: The downloaded content as bytes if successful, None if fail_on_error
             is False and an error occurs.
 
@@ -6,7 +6,7 @@
 import logging
 import json
 import re
-from typing import Iterable, Iterator, Optional, Union, Dict, Any
+from typing import Iterable, Iterator, Optional, Union, Dict, Any, Annotated
 from pymongo import MongoClient
 from pymongo.collection import Collection
 from pymongo.database import Database
@@ -26,35 +26,18 @@ class MongoInsert(core.AbstractSegment):
     For each item received, this segment inserts it into the specified MongoDB collection
     and then yields the item back to the pipeline. This allows for both persisting data
     and continuing to process it in subsequent pipeline stages.
-    
-    Args:
-        connection_string (str, optional): MongoDB connection string. If not provided,
-            will attempt to get from config using the key "mongo_connection_string".
-        database (str): Name of the MongoDB database to use.
-        collection (str): Name of the MongoDB collection to use.
-        field (str, optional): Field to extract from each item for insertion. 
-            If not provided, inserts the entire item. Default is "_".
-        fields (str, optional): Comma-separated list of fields to extract and include in the 
-            document, in the format "field1:name1,field2:name2". If provided, this creates a 
-            new document with the specified fields. Cannot be used with 'field' parameter.
-        set_as (str, optional): If provided, adds the MongoDB insertion result
-            to the item using this field name. Default is None.
-        create_index (str, optional): If provided, creates an index on this field.
-            Default is None.
-        unique_index (bool, optional): If True and create_index is provided, 
-            creates a unique index. Default is False.
     """
 
     def __init__(
         self,
-        connection_string: Optional[str] = None,
-        database: Optional[str] = None,
-        collection: Optional[str] = None,
-        field: str = "_",
-        fields: Optional[str] = None,
-        set_as: Optional[str] = None,
-        create_index: Optional[str] = None,
-        unique_index: bool = False
+        connection_string: Annotated[Optional[str], "MongoDB connection string"] = None,
+        database: Annotated[Optional[str], "Name of the MongoDB database to use"] = None,
+        collection: Annotated[Optional[str], "Name of the MongoDB collection to use"] = None,
+        field: Annotated[str, "Field to extract from each item for insertion"] = "_",
+        fields: Annotated[Optional[str], "Comma-separated list of fields to extract"] = None,
+        set_as: Annotated[Optional[str], "Field name to add MongoDB insertion result to item"] = None,
+        create_index: Annotated[Optional[str], "Field to create an index on"] = None,
+        unique_index: Annotated[bool, "Whether to create a unique index"] = False
     ):
         super().__init__()
 
@@ -125,9 +108,6 @@ def _close_connection(self):
     def transform(self, input_iter: Iterable[Any]) -> Iterator[Any]:
         """Insert each item into the MongoDB collection.
         
-        Args:
-            input_iter: Iterable of items to process.
-            
         Yields:
             Each item from the input stream after inserting it into MongoDB.
             If set_as is specified, the MongoDB result is added to the item.
@@ -200,35 +180,19 @@ class MongoSearch(core.AbstractSegment):
     
     This segment performs a query against a MongoDB collection and yields
     the matching documents one by one as they are returned from the database.
-    
-    Args:
-        field(str): the field in the incoming item to use as a query.  Defaults is "_"
-        connection_string (str, optional): MongoDB connection string. If not provided,
-            will attempt to get from config using the key "mongo_connection_string".
-        database (str): Name of the MongoDB database to use.
-        collection (str): Name of the MongoDB collection to use.
-        project (str, optional): JSON string defining the projection for returned documents.
-            Default is None (returns all fields).
-        sort (str, optional): JSON string defining the sort order. Default is None.
-        limit (int, optional): Maximum number of results to return per query. Default is 0 (no limit).
-        skip (int, optional): Number of documents to skip. Default is 0.
-        set_as (str, optional): If provided, adds the MongoDB results to the incoming item
-            using this field name. If not provided, the results themselves are yielded.
-        as_list (bool, optional): If True and set_as is provided, all results are collected
-            into a list and appended to the incoming item. Default is False.
     """
 
     def __init__(
         self,
-        field: str = "_",
-        connection_string: Optional[str] = None,
-        database: Optional[str] = None,
-        collection: Optional[str] = None,
-        project: Optional[str] = None,
-        sort: Optional[str] = None,
-        limit: int = 0,
-        skip: int = 0,
-        set_as: Optional[str] = None
+        field: Annotated[str, "Field in the incoming item to use as a query"] = "_",
+        connection_string: Annotated[Optional[str], "MongoDB connection string"] = None,
+        database: Annotated[Optional[str], "Name of the MongoDB database to use"] = None,
+        collection: Annotated[Optional[str], "Name of the MongoDB collection to use"] = None,
+        project: Annotated[Optional[str], "JSON string defining projection for returned documents"] = None,
+        sort: Annotated[Optional[str], "JSON string defining sort order"] = None,
+        limit: Annotated[int, "Maximum number of results to return per query"] = 0,
+        skip: Annotated[int, "Number of documents to skip"] = 0,
+        set_as: Annotated[Optional[str], "Field name to add MongoDB results to incoming item"] = None
     ):
         super().__init__()
 
@@ -283,9 +247,6 @@ def _close_connection(self):
     def transform(self, input_iter: Iterable[Any]) -> Iterator[Any]:
         """Search the MongoDB collection based on query parameters.
         
-        Args:
-            input_iter: Iterable of items to process.
-            
         Yields:
             If set_as is specified, yields each input item with results appended.
             Otherwise, yields the MongoDB results directly.