From b9b966a1589f849963c3d2821a0bb4951818d042 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 18 Sep 2025 17:42:32 +0000 Subject: [PATCH 1/6] Initial plan From bb044cc5ad9f2533a89d9b73e5880fce7eecde59 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 18 Sep 2025 17:45:37 +0000 Subject: [PATCH 2/6] Initial exploration and plan for adding ALDashboard documentation Co-authored-by: nonprofittechy <7645641+nonprofittechy@users.noreply.github.com> --- docs/components/sidebar.json | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/docs/components/sidebar.json b/docs/components/sidebar.json index 7ed7287b1..1dd9f9fc0 100644 --- a/docs/components/sidebar.json +++ b/docs/components/sidebar.json @@ -1,19 +1,5 @@ { - "items": [ - "components/ALToolbox/Addup", - "components/ALToolbox/ThreePartsDate", - "components/ALToolbox/addenda", - "components/ALToolbox/al_income", - "components/ALToolbox/business_days", - "components/ALToolbox/copy_button", - "components/ALToolbox/display_template", - "components/ALToolbox/llms", - "components/ALToolbox/misc", - "components/ALToolbox/save_input_data", - "components/ALToolbox/test_al_income", - "components/ALToolbox/test_altoolbox", - "components/ALToolbox/test_misc" - ], - "label": "ALToolbox", + "items": [], + "label": null, "type": "category" } \ No newline at end of file From c2fce48ed5a81b5ea5886ee43a5db43c38de136c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 18 Sep 2025 17:53:50 +0000 Subject: [PATCH 3/6] Add ALDashboard to documentation generation and navigation Co-authored-by: nonprofittechy <7645641+nonprofittechy@users.noreply.github.com> --- .github/workflows/deploy.yml | 4 ++ .github/workflows/test-deploy.yml | 4 ++ .../ALDashboard/aldashboard_overview.md | 68 +++++++++++++++++++ pydoc-markdown.yml | 1 + sidebars.js | 1 + 5 files changed, 78 insertions(+) create mode 100644 docs/components/ALDashboard/aldashboard_overview.md diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 4b6ab5073..0e858642c 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -38,6 +38,10 @@ jobs: with: repository: SuffolkLITLab/docassemble-EFSPIntegration path: docassemble-EFSPIntegration + - uses: actions/checkout@v3 + with: + repository: SuffolkLITLab/docassemble-ALDashboard + path: docassemble-ALDashboard - name: Go to Docs directory run: | cd docs diff --git a/.github/workflows/test-deploy.yml b/.github/workflows/test-deploy.yml index c6cfbd79b..dc9f14f8a 100644 --- a/.github/workflows/test-deploy.yml +++ b/.github/workflows/test-deploy.yml @@ -38,6 +38,10 @@ jobs: with: repository: SuffolkLITLab/docassemble-EFSPIntegration path: docassemble-EFSPIntegration + - uses: actions/checkout@v3 + with: + repository: SuffolkLITLab/docassemble-ALDashboard + path: docassemble-ALDashboard - name: Go to Docs directory run: | cd docs diff --git a/docs/components/ALDashboard/aldashboard_overview.md b/docs/components/ALDashboard/aldashboard_overview.md new file mode 100644 index 000000000..bf5837e97 --- /dev/null +++ b/docs/components/ALDashboard/aldashboard_overview.md @@ -0,0 +1,68 @@ +--- +id: aldashboard_overview +title: ALDashboard overview +sidebar_label: Overview +slug: /components/ALDashboard/overview +--- + +ALDashboard is a collection of tools to help administer a Docassemble server and debug interviews. It provides utilities for package management, server maintenance, translation workflows, and debugging tools specifically designed for the Document Assembly Line ecosystem. + +## Key Features + +ALDashboard provides several categories of functionality: + +### Package Management +- Create and manage Docassemble packages +- Scan and analyze package contents +- Automated package building and deployment tools + +### Translation and Internationalization +- Translation workflow management +- Multi-language support for interviews +- Translation validation and quality assurance + +### Server Administration +- Server maintenance utilities +- Package installation and updates +- System diagnostics and monitoring + +### Document Processing +- DOCX file validation and processing +- Attachment validation tools +- Document template analysis + +## Main Modules + +ALDashboard consists of several Python modules, each providing specific functionality: + +- **aldashboard.py** - Core dashboard functionality and server management +- **create_package.py** - Package creation and management utilities +- **translation.py** - Translation workflow and management tools +- **project_maintenance.py** - Project maintenance and update utilities +- **docx_wrangling.py** - DOCX document processing and validation +- **package_scanner.py** - Package analysis and scanning tools +- **validate_docx.py** - DOCX validation utilities +- **validate_attachment.py** - Attachment validation tools + +## Installation + +ALDashboard is typically installed as part of the Document Assembly Line setup: + +```bash +pip install docassemble-ALDashboard +``` + +Or install from the Docassemble package management interface by searching for "ALDashboard". + +## Usage + +ALDashboard provides both programmatic APIs and interview-based interfaces for server administration. Most functionality is accessed through the Docassemble web interface after installation. + +## Related Documentation + +- [Assembly Line Project Architecture](/docs/get_started/al_project_architecture) +- [Installation Guide](/docs/get_started/installation) + +## Repository + +The ALDashboard source code is available at: https://github.com/SuffolkLITLab/docassemble-ALDashboard \ No newline at end of file diff --git a/pydoc-markdown.yml b/pydoc-markdown.yml index c1b0794a9..55e259bab 100644 --- a/pydoc-markdown.yml +++ b/pydoc-markdown.yml @@ -5,6 +5,7 @@ loaders: - "../FormFyxer" - "../docassemble-ALToolbox/docassemble/" - "../docassemble-EFSPIntegration/docassemble" + - "../docassemble-ALDashboard/docassemble/" processors: - type: filter skip_empty_modules: true diff --git a/sidebars.js b/sidebars.js index d073c53ee..706008c4c 100644 --- a/sidebars.js +++ b/sidebars.js @@ -164,6 +164,7 @@ module.exports = { 'components/RateMyPDF/ratemypdf_overview', 'components/GithubFeedbackForm/githubfeedbackform_overview', 'components/InterviewStats/interviewstats_overview', + 'components/ALDashboard/aldashboard_overview', { "label": "FormFyxer", "type": "category", From 9f45852a1df9ef9aa9cf5d652d8b486f3fe599ff Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 18 Sep 2025 17:57:45 +0000 Subject: [PATCH 4/6] Complete ALDashboard documentation integration with project architecture link Co-authored-by: nonprofittechy <7645641+nonprofittechy@users.noreply.github.com> --- docs/get_started/al_project_architecture.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/get_started/al_project_architecture.md b/docs/get_started/al_project_architecture.md index de6bf0e90..e33f4fb35 100644 --- a/docs/get_started/al_project_architecture.md +++ b/docs/get_started/al_project_architecture.md @@ -19,7 +19,7 @@ Below is a snapshot of the Project's overall architecture as of **June 2022**. W | [ALWeaver](../authoring/weaver_overview.md) | Helps create a draft interviews from PDF and DOCX forms. | [suffolkLITLab/docassemble-ALWeaver](https://github.com/SuffolkLITLab/docassemble-ALWeaver) | | [ALToolbox](https://github.com/SuffolkLITLab/docassemble-ALToolbox) | Utility functions and components that are used in AssemblyLine but that can be easily used by Docassemble developers who use other platforms. | [SuffolkLITLab/docassemble-ALToolbox](https://github.com/SuffolkLITLab/docassemble-ALToolbox) | | [ALKiln](../components/ALKiln/intro.mdx) | Automated testing framework that acts like a user, following your instructions to do things like fill in fields, upload files, sign in, and check your PDFs for accuracy. | [SuffolkLITLab/ALKiln](https://github.com/SuffolkLITLab/ALKiln) | -| ALDashboard | A collection of tools to help administer a Docassemble server and debug interviews. | [SuffolkLITLab/docassemble-ALDashboard](https://github.com/SuffolkLITLab/docassemble-ALDashboard) | +| [ALDashboard](/docs/components/ALDashboard/overview) | A collection of tools to help administer a Docassemble server and debug interviews. | [SuffolkLITLab/docassemble-ALDashboard](https://github.com/SuffolkLITLab/docassemble-ALDashboard) | | [ALRecipes](../components/ALRecipes/alrecipes_overview.md) | Examples for Document Assembly Line interviews, plus generic Docassemble examples addressing specific needs. | [SuffolkLITLab/docassemble-ALRecipes](https://github.com/SuffolkLITLab/docassemble-ALRecipes) | | [InterviewStats](https://github.com/SuffolkLITLab/docassemble-InterviewStats/) | A docassemble interview that lets you view statistics from other saved interview responses. | [SuffolkLITLab/InterviewStats/docassemble-InterviewStats](https://github.com/SuffolkLITLab/docassemble-InterviewStats/) | | Documentation | This website. | [SuffolkLITLab/docassemble-AssemblyLine-documentation](https://github.com/SuffolkLITLab/docassemble-AssemblyLine-documentation) | From 018d776aa6afcafc68cb178ea6ab319b319f291f Mon Sep 17 00:00:00 2001 From: Quinten Steenhuis Date: Thu, 18 Sep 2025 15:05:33 -0400 Subject: [PATCH 5/6] Simplify ALDashboard overview and remove stubs --- .../ALDashboard/aldashboard_overview.md | 44 +++---------------- 1 file changed, 5 insertions(+), 39 deletions(-) diff --git a/docs/components/ALDashboard/aldashboard_overview.md b/docs/components/ALDashboard/aldashboard_overview.md index bf5837e97..c1fbf29f9 100644 --- a/docs/components/ALDashboard/aldashboard_overview.md +++ b/docs/components/ALDashboard/aldashboard_overview.md @@ -7,29 +7,10 @@ slug: /components/ALDashboard/overview ALDashboard is a collection of tools to help administer a Docassemble server and debug interviews. It provides utilities for package management, server maintenance, translation workflows, and debugging tools specifically designed for the Document Assembly Line ecosystem. -## Key Features +:::warning This page is a stub -ALDashboard provides several categories of functionality: - -### Package Management -- Create and manage Docassemble packages -- Scan and analyze package contents -- Automated package building and deployment tools - -### Translation and Internationalization -- Translation workflow management -- Multi-language support for interviews -- Translation validation and quality assurance - -### Server Administration -- Server maintenance utilities -- Package installation and updates -- System diagnostics and monitoring - -### Document Processing -- DOCX file validation and processing -- Attachment validation tools -- Document template analysis +We have not yet documented the key modules in the ALDashboard. +::: ## Main Modules @@ -44,25 +25,10 @@ ALDashboard consists of several Python modules, each providing specific function - **validate_docx.py** - DOCX validation utilities - **validate_attachment.py** - Attachment validation tools +Some of this functionality is useful outside of the ALDashboard. `project_maintenance.py` is specifically a tool to run from the commandline and is not available as a "widget" you can interact with from the Docassemble frontend. + ## Installation ALDashboard is typically installed as part of the Document Assembly Line setup: -```bash -pip install docassemble-ALDashboard -``` - -Or install from the Docassemble package management interface by searching for "ALDashboard". - -## Usage - -ALDashboard provides both programmatic APIs and interview-based interfaces for server administration. Most functionality is accessed through the Docassemble web interface after installation. - -## Related Documentation - -- [Assembly Line Project Architecture](/docs/get_started/al_project_architecture) - [Installation Guide](/docs/get_started/installation) - -## Repository - -The ALDashboard source code is available at: https://github.com/SuffolkLITLab/docassemble-ALDashboard \ No newline at end of file From 53d6b2e486165fe6ac0c8f4553a6b779fd983890 Mon Sep 17 00:00:00 2001 From: Quinten Steenhuis Date: Thu, 18 Sep 2025 15:25:30 -0400 Subject: [PATCH 6/6] Add ALDashboard to sidebar --- docs/components/ALDashboard/aldashboard.md | 167 ++++++ .../ALDashboard/aldashboard_overview.md | 2 +- docs/components/ALDashboard/create_package.md | 67 +++ docs/components/ALDashboard/docx_wrangling.md | 80 +++ .../components/ALDashboard/package_scanner.md | 52 ++ .../ALDashboard/project_maintenance.md | 222 ++++++++ docs/components/ALDashboard/translation.md | 133 +++++ docs/components/ALDashboard/validate_docx.md | 40 ++ docs/components/ALToolbox/al_income.md | 146 ++--- docs/components/AssemblyLine/al_general.md | 28 + .../components/EFSPIntegration/conversions.md | 8 +- docs/components/EFSPIntegration/efm_client.md | 21 - .../EFSPIntegration/py_efsp_client.md | 28 +- docs/components/formfyxer/docx_wrangling.md | 185 ++++++ docs/components/formfyxer/lit_explorer.md | 528 +++++++++++++++++ docs/components/formfyxer/pdf_wrangling.md | 529 ++++++++++++++++++ docs/components/sidebar.json | 12 +- sidebars.js | 13 + 18 files changed, 2140 insertions(+), 121 deletions(-) create mode 100644 docs/components/ALDashboard/aldashboard.md create mode 100644 docs/components/ALDashboard/create_package.md create mode 100644 docs/components/ALDashboard/docx_wrangling.md create mode 100644 docs/components/ALDashboard/package_scanner.md create mode 100644 docs/components/ALDashboard/project_maintenance.md create mode 100644 docs/components/ALDashboard/translation.md create mode 100644 docs/components/ALDashboard/validate_docx.md create mode 100644 docs/components/formfyxer/docx_wrangling.md create mode 100644 docs/components/formfyxer/lit_explorer.md create mode 100644 docs/components/formfyxer/pdf_wrangling.md diff --git a/docs/components/ALDashboard/aldashboard.md b/docs/components/ALDashboard/aldashboard.md new file mode 100644 index 000000000..72843872f --- /dev/null +++ b/docs/components/ALDashboard/aldashboard.md @@ -0,0 +1,167 @@ +# Table of Contents + +* [ALDashboard.aldashboard](#ALDashboard.aldashboard) + * [speedy\_get\_users](#ALDashboard.aldashboard.speedy_get_users) + * [speedy\_get\_sessions](#ALDashboard.aldashboard.speedy_get_sessions) + * [dashboard\_get\_session\_variables](#ALDashboard.aldashboard.dashboard_get_session_variables) + * [ALPackageInstaller](#ALDashboard.aldashboard.ALPackageInstaller) + * [get\_validated\_github\_username](#ALDashboard.aldashboard.ALPackageInstaller.get_validated_github_username) + * [ErrorList](#ALDashboard.aldashboard.ErrorList) + * [ErrorLikeObject](#ALDashboard.aldashboard.ErrorLikeObject) + * [install\_fonts](#ALDashboard.aldashboard.install_fonts) + * [list\_installed\_fonts](#ALDashboard.aldashboard.list_installed_fonts) + * [nicer\_interview\_filename](#ALDashboard.aldashboard.nicer_interview_filename) + * [list\_question\_files\_in\_package](#ALDashboard.aldashboard.list_question_files_in_package) + * [list\_question\_files\_in\_docassemble\_packages](#ALDashboard.aldashboard.list_question_files_in_docassemble_packages) + +--- +sidebar_label: aldashboard +title: ALDashboard.aldashboard +--- + + + +#### speedy\_get\_users() + +```python +def speedy_get_users() -> List[Dict[int, str]] +``` + +Return a list of all users in the database. Possibly faster than get_user_list(). + + + +#### speedy\_get\_sessions(user\_id: Optional[int] = None, filename: Optional[str] = None, filter\_step1: bool = True, metadata\_key\_name: str = "metadata") + +```python +def speedy_get_sessions(user_id: Optional[int] = None, + filename: Optional[str] = None, + filter_step1: bool = True, + metadata_key_name: str = "metadata") -> List[Tuple] +``` + +Return a list of the most recent 500 sessions, optionally tied to a specific user ID. + +Each session is a tuple with named columns: +filename, +user_id, +modtime, +key + + + +#### dashboard\_get\_session\_variables(session\_id: str, filename: str) + +```python +def dashboard_get_session_variables(session_id: str, filename: str) +``` + +Return the variables and steps for a given session ID and YAML filename in serializable dictionary format. + + + +## ALPackageInstaller Objects + +```python +class ALPackageInstaller(DAObject) +``` + +Methods and state for installing AssemblyLine. + + + +#### get\_validated\_github\_username(access\_token: str) + +```python +def get_validated_github_username(access_token: str) +``` + +Given a valid GitHub `access_token`, returns the username associated with it. +Otherwise, adds one or more errors to the installer. + + + +## ErrorList Objects + +```python +class ErrorList(DAList) +``` + +Contains `ErrorLikeObject`s so they can be recognized by docassemble. + + + +## ErrorLikeObject Objects + +```python +class ErrorLikeObject(DAObject) +``` + +An object with a `template_name` that identifies the DALazyTemplate that will +show its error. It can contain any other attributes so its template can access them +as needed. DAObject doesn't seem to be enough to allow template definition. + + + +#### install\_fonts(the\_font\_files: DAFileList) + +```python +def install_fonts(the_font_files: DAFileList) +``` + +Install fonts to the server and restart both supervisor and unoconv. + + + +#### list\_installed\_fonts() + +```python +def list_installed_fonts() +``` + +List the fonts installed on the server. + + + +#### nicer\_interview\_filename(filename: str) + +```python +def nicer_interview_filename(filename: str) -> str +``` + +Given a filename like docassemble.playground10ALWeaver:data/questions/assembly_line.yml, +return a less cluttered name like: playground10ALWeaver:assembly_line + + + +#### list\_question\_files\_in\_package(package\_name: str) + +```python +def list_question_files_in_package(package_name: str) -> Optional[List[str]] +``` + +List all the files in the 'data/questions' directory of a package. + +**Arguments**: + +- `package_name` _str_ - The name of the package to list files from. + + +**Returns**: + +- `List[str]` - A list of filenames in the 'data/questions' directory of the package. + + + +#### list\_question\_files\_in\_docassemble\_packages() + +```python +def list_question_files_in_docassemble_packages() +``` + +List all the files in the 'data/questions' directory of all docassemble packages. + +**Returns**: + + Dict[str, List[str]]: A dictionary where the keys are package names and the values are lists of filenames in the 'data/questions' directory of the package. + diff --git a/docs/components/ALDashboard/aldashboard_overview.md b/docs/components/ALDashboard/aldashboard_overview.md index c1fbf29f9..02adffbc1 100644 --- a/docs/components/ALDashboard/aldashboard_overview.md +++ b/docs/components/ALDashboard/aldashboard_overview.md @@ -1,7 +1,7 @@ --- id: aldashboard_overview title: ALDashboard overview -sidebar_label: Overview +sidebar_label: About ALDashboard slug: /components/ALDashboard/overview --- diff --git a/docs/components/ALDashboard/create_package.md b/docs/components/ALDashboard/create_package.md new file mode 100644 index 000000000..f0e0a5ceb --- /dev/null +++ b/docs/components/ALDashboard/create_package.md @@ -0,0 +1,67 @@ +# Table of Contents + +* [ALDashboard.create\_package](#ALDashboard.create_package) + * [create\_package\_zip](#ALDashboard.create_package.create_package_zip) + +--- +sidebar_label: create_package +title: ALDashboard.create_package +--- + + + +#### create\_package\_zip(pkgname: str, info: dict, author\_info: dict, folders\_and\_files: dict, fileobj: Optional[DAFile] = None) + +```python +def create_package_zip(pkgname: str, + info: dict, + author_info: dict, + folders_and_files: dict, + fileobj: Optional[DAFile] = None) -> DAFile +``` + +Given a dictionary of lists, with the keys representing folders and the values +representing a list of DAFiles, create a Python package with Docassemble conventions. +info: (created by DAInterview.package_info()) +license +author_name +readme +description +url +version +dependencies +// interview_files replaced with folders_and_files +// template_files +// module_files +// static_files +author_info: +author name and email +folders_and_files: +questions->list of absolute file paths on the local filesystem +templates +modules +static +sources + +Strucure of a docassemble package: ++ docassemble-PKGNAME/ +LICENSE +MANIFEST.in +README.md +setup.cfg +setup.py ++-------docassemble +__init__.py ++------PKGNAME +__init__.py +SOME_MODULE.py ++------data ++------questions +README.md ++------sources +README.md ++------static +README.md ++------templates +README.md + diff --git a/docs/components/ALDashboard/docx_wrangling.md b/docs/components/ALDashboard/docx_wrangling.md new file mode 100644 index 000000000..0c08978f8 --- /dev/null +++ b/docs/components/ALDashboard/docx_wrangling.md @@ -0,0 +1,80 @@ +# Table of Contents + +* [ALDashboard.docx\_wrangling](#ALDashboard.docx_wrangling) + * [update\_docx](#ALDashboard.docx_wrangling.update_docx) + * [get\_labeled\_docx\_runs](#ALDashboard.docx_wrangling.get_labeled_docx_runs) + * [modify\_docx\_with\_openai\_guesses](#ALDashboard.docx_wrangling.modify_docx_with_openai_guesses) + +--- +sidebar_label: docx_wrangling +title: ALDashboard.docx_wrangling +--- + + + +#### update\_docx(document: Union[docx.document.Document, str], modified\_runs: List[Tuple[int, int, str, int]]) + +```python +def update_docx( + document: Union[docx.document.Document, str], + modified_runs: List[Tuple[int, int, str, + int]]) -> docx.document.Document +``` + +Update the document with modified runs. + +**Arguments**: + +- `document` - the docx.Document object, or the path to the DOCX file +- `modified_runs` - a tuple of paragraph number, run number, the modified text, and + a number from -1 to 1 indicating whether a new paragraph should be inserted + before or after the current paragraph. + + +**Returns**: + + The modified document. + + + +#### get\_labeled\_docx\_runs(docx\_path: str, custom\_people\_names: Optional[Tuple[str, str]] = None, openai\_client: Optional[OpenAI] = None) + +```python +def get_labeled_docx_runs( + docx_path: str, + custom_people_names: Optional[Tuple[str, str]] = None, + openai_client: Optional[OpenAI] = None +) -> List[Tuple[int, int, str, int]] +``` + +Scan the DOCX and return a list of modified text with Jinja2 variable names inserted. + +**Arguments**: + +- `docx_path` - path to the DOCX file +- `custom_people_names` - a tuple of custom names and descriptions to use in addition to the default ones. Like: ("clients", "the person benefiting from the form") + + +**Returns**: + + A list of tuples, each containing a paragraph number, run number, and the modified text of the run. + + + +#### modify\_docx\_with\_openai\_guesses(docx\_path: str) + +```python +def modify_docx_with_openai_guesses(docx_path: str) -> docx.document.Document +``` + +Uses OpenAI to guess the variable names for a document and then modifies the document with the guesses. + +**Arguments**: + +- `docx_path` _str_ - Path to the DOCX file to modify. + + +**Returns**: + +- `docx.Document` - The modified document, ready to be saved to the same or a new path + diff --git a/docs/components/ALDashboard/package_scanner.md b/docs/components/ALDashboard/package_scanner.md new file mode 100644 index 000000000..4a98a74ce --- /dev/null +++ b/docs/components/ALDashboard/package_scanner.md @@ -0,0 +1,52 @@ +# Table of Contents + +* [ALDashboard.package\_scanner](#ALDashboard.package_scanner) + * [URL](#ALDashboard.package_scanner.URL) + * [PARAMETERS](#ALDashboard.package_scanner.PARAMETERS) + * [DELAY\_BETWEEN\_QUERYS](#ALDashboard.package_scanner.DELAY_BETWEEN_QUERYS) + * [getUrl](#ALDashboard.package_scanner.getUrl) + * [fetch\_github\_repos](#ALDashboard.package_scanner.fetch_github_repos) + +--- +sidebar_label: package_scanner +title: ALDashboard.package_scanner +--- + + + +#### URL + +The basic URL to use the GitHub API + + + +#### PARAMETERS + +Additional parameters for the query (by default 100 items per page) + + + +#### DELAY\_BETWEEN\_QUERYS + +The time to wait between different queries to GitHub + + + +#### getUrl(url) + +```python +def getUrl(url) +``` + +Given a URL it returns its body + + + +#### fetch\_github\_repos(github\_user, sub\_queries) + +```python +def fetch_github_repos(github_user, sub_queries) -> dict +``` + +Given a github user input, returns soughted info. It doesn't contain version number. + diff --git a/docs/components/ALDashboard/project_maintenance.md b/docs/components/ALDashboard/project_maintenance.md new file mode 100644 index 000000000..31264bf87 --- /dev/null +++ b/docs/components/ALDashboard/project_maintenance.md @@ -0,0 +1,222 @@ +# Table of Contents + +* [ALDashboard.project\_maintenance](#ALDashboard.project_maintenance) + * [get\_package\_names](#ALDashboard.project_maintenance.get_package_names) + * [add\_tag\_to\_repos](#ALDashboard.project_maintenance.add_tag_to_repos) + * [process\_packages\_and\_add\_tag](#ALDashboard.project_maintenance.process_packages_and_add_tag) + * [get\_project\_by\_name](#ALDashboard.project_maintenance.get_project_by_name) + * [get\_repos\_by\_topic](#ALDashboard.project_maintenance.get_repos_by_topic) + * [add\_issues\_and\_create\_cards](#ALDashboard.project_maintenance.add_issues_and_create_cards) + * [find\_issues\_by\_title](#ALDashboard.project_maintenance.find_issues_by_title) + * [add\_issue\_to\_project](#ALDashboard.project_maintenance.add_issue_to_project) + * [link\_issue\_title\_to\_project](#ALDashboard.project_maintenance.link_issue_title_to_project) + * [main](#ALDashboard.project_maintenance.main) + +--- +sidebar_label: project_maintenance +title: ALDashboard.project_maintenance +--- + + + +#### get\_package\_names(server\_name: str) + +```python +def get_package_names(server_name: str) -> List[str] +``` + +Fetches the JSON file from the given Docassemble server and extracts package names. + +**Arguments**: + +- `server_name` _str_ - Name or IP address of the Docassemble server. + + +**Returns**: + +- `List[str]` - List of package names. + + + +#### add\_tag\_to\_repos(token: str, org\_name: str, repo\_names: List[str], tag: str) + +```python +def add_tag_to_repos(token: str, org_name: str, repo_names: List[str], + tag: str) -> None +``` + +Adds a specific tag to each repository in the given list. + +**Arguments**: + +- `token` _str_ - GitHub Personal Access Token (PAT) with appropriate permissions. +- `org_name` _str_ - Name of the GitHub organization. +- `repo_names` _List[str]_ - List of repository names to which the tag will be added. +- `tag` _str_ - The tag to be added to the repositories. + + This function iterates through each repository in the provided list, fetching the + current topics (tags) of the repository. If the specified tag is not already present, + it adds the tag to the repository. The function includes error handling to catch and + print any errors that occur while processing each repository. + + Example usage: + personal_access_token = "YOUR_PERSONAL_ACCESS_TOKEN" + organization_name = "YourOrgName" + repositories = ["repo1", "repo2", "repo3"] + tag_to_add = "your-tag" + + add_tag_to_repos(personal_access_token, organization_name, repositories, tag_to_add) + + + +#### process\_packages\_and\_add\_tag(server\_name: str, token: str, org\_name: str, tag: str) + +```python +def process_packages_and_add_tag(server_name: str, token: str, org_name: str, + tag: str) -> None +``` + +Fetches package names from a Docassemble server, transforms them into repository names, +and adds a specified tag to each repository. + +**Arguments**: + +- `server_name` _str_ - Name or IP address of the Docassemble server. +- `token` _str_ - GitHub Personal Access Token. +- `org_name` _str_ - Name of the GitHub organization. +- `tag` _str_ - Tag to be added to each repository. + + + +#### get\_project\_by\_name(token: str, org\_name: str, project\_name: str) + +```python +def get_project_by_name(token: str, org_name: str, + project_name: str) -> Optional[dict] +``` + +Finds a GitHub Next-Generation project by its name within an organization using GraphQL API. + +**Arguments**: + +- `token` _str_ - GitHub Personal Access Token. +- `org_name` _str_ - Name of the GitHub organization. +- `project_name` _str_ - Name of the GitHub project. + + +**Returns**: + +- `dict` - The GitHub project object, or None if not found. + + + +#### get\_repos\_by\_topic(token: str, org\_name: str, topic: str) + +```python +def get_repos_by_topic(token: str, org_name: str, + topic: str) -> List[Repository] +``` + +Fetches repositories in an organization that have a specific topic. + +**Arguments**: + +- `token` _str_ - GitHub Personal Access Token. +- `org_name` _str_ - Name of the GitHub organization. +- `topic` _str_ - The GitHub topic to filter repositories by. + + +**Returns**: + +- `List[Repository]` - A list of repository objects that have the specified topic. + + + +#### add\_issues\_and\_create\_cards(token: str, org\_name: str, project\_name: str, topic: str, issue\_title: str, issue\_body: str) + +```python +def add_issues_and_create_cards(token: str, org_name: str, project_name: str, + topic: str, issue_title: str, + issue_body: str) -> None +``` + +Adds an issue to each repository with a specific topic and creates a card for each issue in a Next-Generation GitHub project. + +**Arguments**: + +- `token` _str_ - GitHub Personal Access Token. +- `org_name` _str_ - Name of the GitHub organization. +- `project_name` _str_ - Name of the GitHub project. +- `topic` _str_ - The GitHub topic to filter repositories by. +- `issue_title` _str_ - Title of the issue. +- `issue_body` _str_ - Body of the issue. + + + +#### find\_issues\_by\_title(token: str, org\_name: str, repo\_names: List[str], issue\_title: str) + +```python +def find_issues_by_title(token: str, org_name: str, repo_names: List[str], + issue_title: str) -> List[str] +``` + +Finds issues in a list of repositories with a specific title. + +**Arguments**: + +- `token` _str_ - GitHub Personal Access Token. +- `org_name` _str_ - Name of the GitHub organization. +- `repo_names` _list_ - List of repository names. +- `issue_title` _str_ - Title of the issue to be found. + + +**Returns**: + +- `list` - A list of issue node IDs. + + + +#### add\_issue\_to\_project(token: str, project\_id: str, issue\_node\_id: str) + +```python +def add_issue_to_project(token: str, project_id: str, + issue_node_id: str) -> None +``` + +Adds an issue to a Next-Generation GitHub project. + +**Arguments**: + +- `token` _str_ - GitHub Personal Access Token. +- `project_id` _str_ - Node ID of the GitHub project. +- `issue_node_id` _str_ - Node ID of the GitHub issue. + + + +#### link\_issue\_title\_to\_project(token: str, org\_name: str, project\_name: str, topic: str, issue\_title: str) + +```python +def link_issue_title_to_project(token: str, org_name: str, project_name: str, + topic: str, issue_title: str) -> None +``` + +Links issues with a specific title in repositories with a certain topic to a Next-Generation project. + +**Arguments**: + +- `token` _str_ - GitHub Personal Access Token. +- `org_name` _str_ - Name of the GitHub organization. +- `project_name` _str_ - Name of the GitHub project. +- `topic` _str_ - The GitHub topic to filter repositories by. +- `issue_title` _str_ - Title of the issue to link. + + + +#### main() + +```python +def main() -> None +``` + +Main function to run the specified project maintenance command. + diff --git a/docs/components/ALDashboard/translation.md b/docs/components/ALDashboard/translation.md new file mode 100644 index 000000000..a617dc70a --- /dev/null +++ b/docs/components/ALDashboard/translation.md @@ -0,0 +1,133 @@ +# Table of Contents + +* [ALDashboard.translation](#ALDashboard.translation) + * [gpt\_is\_available](#ALDashboard.translation.gpt_is_available) + * [may\_have\_mako](#ALDashboard.translation.may_have_mako) + * [may\_have\_html](#ALDashboard.translation.may_have_html) + * [translate\_fragments\_gpt](#ALDashboard.translation.translate_fragments_gpt) + * [Translation](#ALDashboard.translation.Translation) + * [file](#ALDashboard.translation.Translation.file) + * [untranslated\_segments](#ALDashboard.translation.Translation.untranslated_segments) + * [translation\_file](#ALDashboard.translation.translation_file) + +--- +sidebar_label: translation +title: ALDashboard.translation +--- + + + +#### gpt\_is\_available() + +```python +def gpt_is_available() -> bool +``` + +Return True if the GPT API is available. + + + +#### may\_have\_mako(text: str) + +```python +def may_have_mako(text: str) -> bool +``` + +Return True if the text appears to contain any Mako code, such as $\{...\} or % at the beginning of a line. + + + +#### may\_have\_html(text: str) + +```python +def may_have_html(text: str) -> bool +``` + +Return True if the text appears to contain any HTML code, such as <p> or <div>. + + + +#### translate\_fragments\_gpt(fragments: Union[str, List[Tuple[int, str]]], source\_language: str, tr\_lang: str, interview\_context: Optional[str] = None, special\_words: Optional[Dict[int, str]] = None, model="gpt-4.1-nano", openai\_base\_url: Optional[str] = None, max\_output\_tokens: Optional[int] = None, max\_input\_tokens: Optional[int] = None, openai\_api: Optional[str] = None) + +```python +def translate_fragments_gpt( + fragments: Union[str, List[Tuple[int, str]]], + source_language: str, + tr_lang: str, + interview_context: Optional[str] = None, + special_words: Optional[Dict[int, str]] = None, + model="gpt-4.1-nano", + openai_base_url: Optional[str] = None, + max_output_tokens: Optional[int] = None, + max_input_tokens: Optional[int] = None, + openai_api: Optional[str] = None) -> Dict[int, str] +``` + +Use an AI model to translate a list of fragments (strings) from one language to another and provide a dictionary +with the original text and the translated text. + +You can optionally provide an alternative model, but it must support JSON mode. + +**Arguments**: + +- `fragments` - A list of strings to be translated. +- `source_language` - The language of the original text. +- `tr_lang` - The language to translate the text into. +- `special_words` - A dictionary of special words that should be translated in a specific way. +- `model` - The GPT model to use. The default is "gpt-4.1-nano" +- `openai_base_url` - The base URL for the OpenAI API. If not provided, the default OpenAI URL will be used. +- `max_output_tokens` - The maximum number of tokens to generate in the output. +- `max_input_tokens` - The maximum number of tokens in the input. If not provided, it will be set to 4000. +- `openai_api` - The OpenAI API key. If not provided, it will use the key from the configuration. + +**Returns**: + + A dictionary where the keys are the indices of the fragments and the values are the translated text. + + + +## Translation Objects + +```python +class Translation(NamedTuple) +``` + + + +#### file: `DAFile` + +an XLSX or XLIFF file + + + +#### untranslated\_segments: `int` + +Number of rows in the output that have untranslated text - one for each question, subquestion, field, etc. + + + +#### translation\_file(yaml\_filename: str, tr\_lang: str, use\_gpt=False, use\_google\_translate=False, openai\_api: Optional[str] = None, max\_tokens=4000, interview\_context: Optional[str] = None, special\_words: Optional[Dict[int, str]] = None, model: Optional[str] = None, openai\_base\_url: Optional[str] = None, max\_input\_tokens: Optional[int] = None, max\_output\_tokens: Optional[int] = None) + +```python +def translation_file(yaml_filename: str, + tr_lang: str, + use_gpt=False, + use_google_translate=False, + openai_api: Optional[str] = None, + max_tokens=4000, + interview_context: Optional[str] = None, + special_words: Optional[Dict[int, str]] = None, + model: Optional[str] = None, + openai_base_url: Optional[str] = None, + max_input_tokens: Optional[int] = None, + max_output_tokens: Optional[int] = None) -> Translation +``` + +Return a tuple of the translation file in XLSX format, plus a count of the +number of words and segments that need to be translated. + +The word and segment count only apply when filetype="XLSX". + +This code was adjusted from the Flask endpoint-only version in server.py. XLIFF support was removed +for now but can be added later. + diff --git a/docs/components/ALDashboard/validate_docx.md b/docs/components/ALDashboard/validate_docx.md new file mode 100644 index 000000000..0f47d4d65 --- /dev/null +++ b/docs/components/ALDashboard/validate_docx.md @@ -0,0 +1,40 @@ +# Table of Contents + +* [ALDashboard.validate\_docx](#ALDashboard.validate_docx) + * [CallAndDebugUndefined](#ALDashboard.validate_docx.CallAndDebugUndefined) + * [\_\_getitem\_\_](#ALDashboard.validate_docx.CallAndDebugUndefined.__getitem__) + * [get\_jinja\_errors](#ALDashboard.validate_docx.get_jinja_errors) + +--- +sidebar_label: validate_docx +title: ALDashboard.validate_docx +--- + + + +## CallAndDebugUndefined Objects + +```python +class CallAndDebugUndefined(DebugUndefined) +``` + +Handles Jinja2 undefined errors by printing the name of the undefined variable. +Extended to handle callable methods. + + + +#### \_\_getitem\_\_ + +type: ignore + + + +#### get\_jinja\_errors(the\_file: str) + +```python +def get_jinja_errors(the_file: str) -> Optional[str] +``` + +Just try rendering the DOCX file as a Jinja2 template and catch any errors. +Returns a string with the errors, if any. + diff --git a/docs/components/ALToolbox/al_income.md b/docs/components/ALToolbox/al_income.md index b232cadf9..fdcbc107a 100644 --- a/docs/components/ALToolbox/al_income.md +++ b/docs/components/ALToolbox/al_income.md @@ -117,7 +117,7 @@ the nearest whole integer. description if not found in the list. -**Example**: +**Examples**: >>> times_per_year([(12, "Monthly"), (1, "Annually")], 12) 'monthly' @@ -153,7 +153,7 @@ birthdate. - `List[int]` - List of years in the specified order. -**Example**: +**Examples**: >>> recent_years(past=3, future=1) # if current year is 2023 [2024, 2023, 2022, 2021] @@ -174,13 +174,13 @@ is 1 (a year). **Attributes**: - .value \{str | float | Decimal\} A number representing an amount of money accumulated during +- `value` _str | float | Decimal_ - A number representing an amount of money accumulated during the `times_per_year` of this income. - .times_per_year \{float | Decimal\} Represents a number of the annual frequency of +- `times_per_year` _float | Decimal_ - Represents a number of the annual frequency of the income. E.g. 12 for a monthly income. - .source \{str\} (Optional) The "source" of the income, like a "job" or a "house". - .display_name \{str\} (Optional) If present, will have a translated string to show the - user, as opposed to a raw english string from the program +- `source` _str, optional_ - The "source" of the income, like a "job" or a "house". +- `display_name` _str, optional_ - If present, will have a translated string to show the + user, as opposed to a raw english string from the program. @@ -219,7 +219,7 @@ To calculate `.total()`, an ALPeriodicAmount must have a `.times_per_year` and ` - `Decimal` - The calculated income amount for the specified frequency. -**Example**: +**Examples**: >>> income = ALPeriodicAmount(value=1000, times_per_year=12) # $1000/month >>> income.total(1) # Annual total @@ -242,17 +242,17 @@ is 1 (a year). **Attributes**: - .value \{str | float | Decimal\} A number representing an amount of money accumulated during +- `value` _str | float | Decimal_ - A number representing an amount of money accumulated during the `times_per_year` of this income. - .times_per_year \{float | Decimal\} Represents a number of the annual frequency of +- `times_per_year` _float | Decimal_ - Represents a number of the annual frequency of the income. E.g. 12 for a monthly income. - .is_hourly \{bool\} (Optional) True if the income is hourly. - .hours_per_period \{float | Decimal\} (Optional) If the income is hourly, the number of +- `is_hourly` _bool, optional_ - True if the income is hourly. +- `hours_per_period` _float | Decimal, optional_ - If the income is hourly, the number of hours during the annual frequency of this job. E.g. if the annual frequency is 52 (weekly), the hours per week might be 50. That is, 50 hours per week. This attribute is required if `.is_hourly` is True. - .source \{str\} (Optional) The "source" of the income, like a "job" or a "house". - .owner \{str\} (Optional) Full name of the income's owner as a single string. +- `source` _str, optional_ - The "source" of the income, like a "job" or a "house". +- `owner` _str, optional_ - Full name of the income's owner as a single string. @@ -300,14 +300,14 @@ Not much changes from ALPeriodic Amount, just the generic object questions class ALIncomeList(DAList) ``` -Represents a filterable DAList of incomes-type items. It can make -use of these attributes and methods in its items: +Represents a filterable DAList of incomes-type items. -.source -.owner -.times_per_year -.value -.total() +This list expects its items to have the following attributes and methods: +- source: Source identifier for filtering +- owner: Owner name for filtering +- times_per_year: Frequency of the income +- value: Amount value +- total(): Calculate total amount for a given frequency @@ -342,7 +342,7 @@ Returns a set of the unique sources in the ALIncomeList. - `Set[str]` - A set containing all unique source names from items in the list. -**Example**: +**Examples**: >>> income_list = ALIncomeList([ ... ALIncome(source="wages"), @@ -378,7 +378,7 @@ by their source. The source parameter may be a string or a list. - `ALIncomeList` - A new ALIncomeList containing only items with matching sources. -**Example**: +**Examples**: >>> income_list = ALIncomeList([ ... ALIncome(source="wages", value=1000), @@ -424,7 +424,7 @@ exclude deductions. - `Decimal` - The total income amount for the specified frequency and filters. -**Example**: +**Examples**: >>> income_list = ALIncomeList([wages_income, tips_income]) >>> income_list.total(times_per_year=12) # Monthly total @@ -473,20 +473,20 @@ Can be stored in an ALJobList. **Attributes**: - .value \{float | Decimal\} A number representing an amount of money accumulated during +- `value` _float | Decimal_ - A number representing an amount of money accumulated during the `times_per_year` of this income. - .times_per_year \{float\} Represents a number of the annual frequency of +- `times_per_year` _float_ - Represents a number of the annual frequency of the value. E.g. 12 for a monthly value. - .is_hourly \{bool\} (Optional): Whether the gross total should be calculated based on hours - worked per week - .hours_per_period \{float\} (Optional) The number of hours during the annual +- `is_hourly` _bool, optional_ - Whether the gross total should be calculated based on hours + worked per week. +- `hours_per_period` _float, optional_ - The number of hours during the annual frequency of this job. E.g. if the annual frequency is 52 (weekly), the hours per week might be 50. That is, 50 hours per week. - .deduction \{float\} (Optional) The amount of money deducted from the total value each period. +- `deduction` _float, optional_ - The amount of money deducted from the total value each period. If this job is hourly, deduction is still from each period, not each hour. Used to calculate the net income in `net_income()`. - .employer \{Individual\} (Optional) A docassemble Individual object, employer.address is the address - and employer.phone is the phone +- `employer` _Individual, optional_ - A docassemble Individual object, employer.address is the address + and employer.phone is the phone. @@ -624,7 +624,7 @@ based on the self.hours_per_period and self.times_per_year attributes. The normalized number of hours worked for the specified frequency. -**Example**: +**Examples**: If the person works 10 hours a week, it will return 520 when the times_per_year parameter is 1. @@ -766,12 +766,12 @@ will use all sources. class ALExpenseList(ALIncomeList) ``` -A list of expenses +A list of expenses. -* each element has a: -* value -* source -* display name +Each element has: +value: The monetary value of the expense +source: The source category of the expense +display_name: Human-readable name for display @@ -789,18 +789,18 @@ Can be stored in an ALAssetList. **Attributes**: - .market_value \{float | Decimal\} Market value of the asset. - .balance \{float | Decimal \} Current balance of the account, e.g., like +- `market_value` _float | Decimal_ - Market value of the asset. +- `balance` _float | Decimal_ - Current balance of the account, e.g., like the balance in a checking account, but could also represent a loan amount. - .value \{float | Decimal\} (Optional) Represents the income the asset earns +- `value` _float | Decimal, optional_ - Represents the income the asset earns for a given `times_per_year`, such as interest earned in a checking account. If not defined, the income will be set to 0, to simplify representing the many common assets that do not earn any income. - .times_per_year \{float\} (Optional) Number of times per year the asset +- `times_per_year` _float, optional_ - Number of times per year the asset earns the income listed in the `value` attribute. - .owner \{str\} (Optional) Full name of the asset owner as a single string. - .source \{str\} (Optional) The "source" of the asset, like "vase". +- `owner` _str, optional_ - Full name of the asset owner as a single string. +- `source` _str, optional_ - The "source" of the asset, like "vase". @@ -1020,7 +1020,7 @@ Triggers gathering those attributes and formats them as a single string. - `str` - A formatted string combining year, make, and model of the vehicle. -**Example**: +**Examples**: >>> vehicle = ALVehicle(year=2020, make="Toyota", model="Camry") >>> vehicle.year_make_model() @@ -1051,10 +1051,10 @@ item in an ALSimpleValueList. **Attributes**: - .value \{str | float \} The monetary value of the item. - .transaction_type \{str\} (Optional) Can be "expense", which will give a +- `value` _str | float_ - The monetary value of the item. +- `transaction_type` _str, optional_ - Can be "expense", which will give a negative value to the total of the item. - .source \{str\} (Optional) The "source" of the item, like "vase". +- `source` _str, optional_ - The "source" of the item, like "vase". @@ -1144,18 +1144,21 @@ An item in an ALItemizedValueDict (a line item like wages, tips or union dues). Should be a positive number, even if it will later be subtracted from the job's net total. -WARNING: This item's period-based value can't be calculated correctly -outside of an ALItemizedJob. Its value should only be accessed through the -filtering methods of the ALItemizedJob that contains it. +**Warnings**: + + This item's period-based value can't be calculated correctly + outside of an ALItemizedJob. Its value should only be accessed through the + filtering methods of the ALItemizedJob that contains it. + **Attributes**: - .value \{float | Decimal\} A number representing an amount of money accumulated +- `value` _float | Decimal_ - A number representing an amount of money accumulated during the `times_per_year` of this item or this item's job. - .is_hourly \{bool\} Whether this particular item is calculated hourly. - .times_per_year \{ float\} A denominator of a year representing the annual +- `is_hourly` _bool_ - Whether this particular item is calculated hourly. +- `times_per_year` _float_ - A denominator of a year representing the annual frequency of the job. E.g. 12 for monthly. - .exists \{bool\} (Optional) Allows an interview author to pre-define some common +- `exists` _bool, optional_ - Allows an interview author to pre-define some common descriptors, like "wages" or "union dues" without requiring the user to provide a value for each item. @@ -1201,7 +1204,7 @@ returns 0. Otherwise returns the decimal value of the item. or has no value. -**Example**: +**Examples**: >>> item = ALItemizedValue(value=1500, exists=True) >>> item.total() @@ -1234,8 +1237,10 @@ ALItemizedJob. E.g., wages, tips and deductions being the most common. An ALItemizedJob will have two ALItemizedValueDicts, one for income and one for deductions. -WARNING: Should only be accessed through an ALItemizedJob. Otherwise -you may get unexpected results. +**Warnings**: + + Should only be accessed through an ALItemizedJob. Otherwise + you may get unexpected results. @@ -1271,7 +1276,7 @@ as not existing (exists=False). - `Decimal` - The sum of all existing item values in the dictionary. -**Example**: +**Examples**: >>> value_dict = ALItemizedValueDict() >>> value_dict['wages'] = ALItemizedValue(value=1000, exists=True) @@ -1319,22 +1324,25 @@ income in code. **Attributes**: - .to_add \{ALItemizedValueDict\} Dict of ALItemizedValues that would be added +- `to_add` _ALItemizedValueDict_ - Dict of ALItemizedValues that would be added to a job's net total, like wages and tips. - .to_subtract \{ALItemizedValueDict\} Dict of ALItemizedValues that would be +- `to_subtract` _ALItemizedValueDict_ - Dict of ALItemizedValues that would be subtracted from a net total, like union dues or insurance premiums. - .times_per_year \{float\} A denominator of a year, like 12 for monthly, that - represents how frequently the income is earned - .is_hourly \{bool\} (Optional) Whether the value represents a figure that the - user earns on an hourly basis, rather than for the full time period - .hours_per_period \{int\} (Optional) If the job is hourly, how many hours the +- `times_per_year` _float_ - A denominator of a year, like 12 for monthly, that + represents how frequently the income is earned. +- `is_hourly` _bool, optional_ - Whether the value represents a figure that the + user earns on an hourly basis, rather than for the full time period. +- `hours_per_period` _int, optional_ - If the job is hourly, how many hours the user works per period. - .employer \{Individual\} (Optional) Individual assumed to have a name and, +- `employer` _Individual, optional_ - Individual assumed to have a name and, optionally, an address and phone. - .source \{str\} (Optional) The category of this item, like "public service". +- `source` _str, optional_ - The category of this item, like "public service". Defaults to "job". -- `WARNING` - Individual items in `.to_add` and `.to_subtract` should not be used + +**Warnings**: + + Individual items in `.to_add` and `.to_subtract` should not be used directly. They should only be accessed through the filtering methods of this job. diff --git a/docs/components/AssemblyLine/al_general.md b/docs/components/AssemblyLine/al_general.md index d54b9f491..1fe4cab75 100644 --- a/docs/components/AssemblyLine/al_general.md +++ b/docs/components/AssemblyLine/al_general.md @@ -64,6 +64,7 @@ * [section\_links](#AssemblyLine.al_general.section_links) * [will\_send\_to\_real\_court](#AssemblyLine.al_general.will_send_to_real_court) * [filter\_letters](#AssemblyLine.al_general.filter_letters) + * [fa\_icon](#AssemblyLine.al_general.fa_icon) * [is\_sms\_enabled](#AssemblyLine.al_general.is_sms_enabled) * [is\_phone\_or\_email](#AssemblyLine.al_general.is_phone_or_email) * [github\_modified\_date](#AssemblyLine.al_general.github_modified_date) @@ -1429,6 +1430,33 @@ Avoid using, this is created for 209A. - `str` - A string of unique letters. + + +#### fa\_icon(icon: str, color: str = "primary", color\_css: Optional[str] = None, size: str = "sm") + +```python +def fa_icon(icon: str, + color: str = "primary", + color_css: Optional[str] = None, + size: str = "sm") -> str +``` + +Return HTML for a font-awesome icon of the specified size and color. You can reference +a CSS variable (such as Bootstrap theme color) or a true CSS color reference, such as 'blue' or +'`DDDDDD`'. Defaults to Bootstrap theme color "primary". + +**Arguments**: + +- `icon` _str_ - The name of the icon to use. See https://fontawesome.com/icons for a list of icons. +- `color` _str_ - The color of the icon. Defaults to "primary". +- `color_css` _Optional[str]_ - A CSS variable or color reference. Defaults to None. +- `size` _str_ - The size of the icon. Defaults to "sm". + + +**Returns**: + +- `str` - HTML for the icon. + #### is\_sms\_enabled() diff --git a/docs/components/EFSPIntegration/conversions.md b/docs/components/EFSPIntegration/conversions.md index a722257ed..44282137c 100644 --- a/docs/components/EFSPIntegration/conversions.md +++ b/docs/components/EFSPIntegration/conversions.md @@ -27,10 +27,14 @@ Functions that help convert the JSON-ized XML from the proxy server into usable -#### error\_notification(err, message=None, trace=None, referer=None) +#### error\_notification(err, message=None, trace=None, referer=None, the\_vars=None) ```python -def error_notification(err, message=None, trace=None, referer=None) +def error_notification(err, + message=None, + trace=None, + referer=None, + the_vars=None) ``` Copied from docassemble.webapp.server.error_notification, since: diff --git a/docs/components/EFSPIntegration/efm_client.md b/docs/components/EFSPIntegration/efm_client.md index 86387fa37..174c2e8a3 100644 --- a/docs/components/EFSPIntegration/efm_client.md +++ b/docs/components/EFSPIntegration/efm_client.md @@ -1,8 +1,6 @@ # Table of Contents * [EFSPIntegration.efm\_client](#EFSPIntegration.efm_client) - * [DALogger](#EFSPIntegration.efm_client.DALogger) - * [log](#EFSPIntegration.efm_client.DALogger.log) * [ProxyConnection](#EFSPIntegration.efm_client.ProxyConnection) * [\_\_init\_\_](#EFSPIntegration.efm_client.ProxyConnection.__init__) * [authenticate\_user](#EFSPIntegration.efm_client.ProxyConnection.authenticate_user) @@ -15,25 +13,6 @@ sidebar_label: efm_client title: EFSPIntegration.efm_client --- - - -## DALogger Objects - -```python -class DALogger(LoggerAdapter) -``` - - - -#### log(level, msg, \*args, \*\*kwargs) - -```python -def log(level, msg, *args, **kwargs) -``` - -Delegate a log call to Docassemble's `log` function, after adding -contextual information from this adapter instance. - ## ProxyConnection Objects diff --git a/docs/components/EFSPIntegration/py_efsp_client.md b/docs/components/EFSPIntegration/py_efsp_client.md index 805abcbf7..5559030a2 100644 --- a/docs/components/EFSPIntegration/py_efsp_client.md +++ b/docs/components/EFSPIntegration/py_efsp_client.md @@ -1,8 +1,6 @@ # Table of Contents * [EFSPIntegration.py\_efsp\_client](#EFSPIntegration.py_efsp_client) - * [CORR\_ID\_HEADER](#EFSPIntegration.py_efsp_client.CORR_ID_HEADER) - * [LoggerWithContext](#EFSPIntegration.py_efsp_client.LoggerWithContext) * [EfspConnection](#EFSPIntegration.py_efsp_client.EfspConnection) * [\_\_init\_\_](#EFSPIntegration.py_efsp_client.EfspConnection.__init__) * [authenticate\_user](#EFSPIntegration.py_efsp_client.EfspConnection.authenticate_user) @@ -27,24 +25,6 @@ The base python client used to communicate with the E-file proxy server. Doesn't include anything from docassemble, and can be used without having it installed. - - -#### CORR\_ID\_HEADER - -TODO(brycew): Figure out how to add - - - -## LoggerWithContext Objects - -```python -class LoggerWithContext(LoggerAdapter) -``` - -Acts like the `merge_extra` feature from LoggerAdapter (python 3.13) is always on. - -See https://github.com/python/cpython/pull/107292/files. - ## EfspConnection Objects @@ -57,14 +37,10 @@ A python client that communicates with the E-file proxy server. -#### \_\_init\_\_(\*, url: str, api\_key: str, default\_jurisdiction: str = None, logger=None) +#### \_\_init\_\_(\*, url: str, api\_key: str, default\_jurisdiction: str = None) ```python -def __init__(*, - url: str, - api_key: str, - default_jurisdiction: str = None, - logger=None) +def __init__(*, url: str, api_key: str, default_jurisdiction: str = None) ``` **Arguments**: diff --git a/docs/components/formfyxer/docx_wrangling.md b/docs/components/formfyxer/docx_wrangling.md new file mode 100644 index 000000000..a444a2602 --- /dev/null +++ b/docs/components/formfyxer/docx_wrangling.md @@ -0,0 +1,185 @@ +# Table of Contents + +* [formfyxer.docx\_wrangling](#formfyxer.docx_wrangling) + * [update\_docx](#formfyxer.docx_wrangling.update_docx) + * [get\_docx\_repr](#formfyxer.docx_wrangling.get_docx_repr) + * [get\_labeled\_docx\_runs](#formfyxer.docx_wrangling.get_labeled_docx_runs) + * [get\_modified\_docx\_runs](#formfyxer.docx_wrangling.get_modified_docx_runs) + * [make\_docx\_plain\_language](#formfyxer.docx_wrangling.make_docx_plain_language) + * [modify\_docx\_with\_openai\_guesses](#formfyxer.docx_wrangling.modify_docx_with_openai_guesses) + +--- +sidebar_label: docx_wrangling +title: formfyxer.docx_wrangling +--- + + + +#### update\_docx(document: Union[docx.document.Document, str], modified\_runs: List[Tuple[int, int, str, int]]) + +```python +def update_docx( + document: Union[docx.document.Document, str], + modified_runs: List[Tuple[int, int, str, + int]]) -> docx.document.Document +``` + +Update the document with the modified runs. + +Note: OpenAI is probabilistic, so the modified run indices may not be correct. +When the index of a run or paragraph is out of range, a new paragraph +will be inserted at the end of the document or a new run at the end of the +paragraph's runs. + +Take a careful look at the output document to make sure it is still correct. + +**Arguments**: + +- `document` - the docx.Document object, or the path to the DOCX file +- `modified_runs` - a tuple of paragraph number, run number, the modified text, a question (not used), and whether a new paragraph should be inserted (for conditional text) + + +**Returns**: + + The modified document. + + + +#### get\_docx\_repr(docx\_path: str, paragraph\_start: int = 0, paragraph\_end: Optional[int] = None) + +```python +def get_docx_repr(docx_path: str, + paragraph_start: int = 0, + paragraph_end: Optional[int] = None) +``` + +Return a JSON representation of the paragraphs and runs in the DOCX file. + +**Arguments**: + +- `docx_path` - path to the DOCX file + + +**Returns**: + + A JSON representation of the paragraphs and runs in the DOCX file. + + + +#### get\_labeled\_docx\_runs(docx\_path: Optional[str] = None, docx\_repr=Optional[str], custom\_people\_names: Optional[Tuple[str, str]] = None, openai\_client: Optional[OpenAI] = None, api\_key: Optional[str] = None) + +```python +def get_labeled_docx_runs( + docx_path: Optional[str] = None, + docx_repr=Optional[str], + custom_people_names: Optional[Tuple[str, str]] = None, + openai_client: Optional[OpenAI] = None, + api_key: Optional[str] = None) -> List[Tuple[int, int, str, int]] +``` + +Scan the DOCX and return a list of modified text with Jinja2 variable names inserted. + +**Arguments**: + +- `docx_path` - path to the DOCX file +- `docx_repr` - a string representation of the paragraphs and runs in the DOCX file, if docx_path is not provided. This might be useful if you want +- `custom_people_names` - a tuple of custom names and descriptions to use in addition to the default ones. Like: ("clients", "the person benefiting from the form") + + +**Returns**: + + A list of tuples, each containing a paragraph number, run number, and the modified text of the run. + + + +#### get\_modified\_docx\_runs(docx\_path: Optional[str] = None, docx\_repr: Optional[str] = None, custom\_example: str = "", instructions: str = "", openai\_client: Optional[OpenAI] = None, api\_key: Optional[str] = None, temperature=0.5) + +```python +def get_modified_docx_runs(docx_path: Optional[str] = None, + docx_repr: Optional[str] = None, + custom_example: str = "", + instructions: str = "", + openai_client: Optional[OpenAI] = None, + api_key: Optional[str] = None, + temperature=0.5) -> List[Tuple[int, int, str, int]] +``` + +Use GPT to rewrite the contents of a DOCX file paragraph by paragraph. Does not handle tables, footers, or +other structures yet. + +This is a light wrapper that provides the structure of DOCX paragraphs and runs to your prompt +to OpenAI to facilitate the rewriting of the document without disrupting formatting. + +For example, this could be used to: +* Remove any passive voice +* Replace placeholder text with variable names +* Rewrite to a 6th grade reading level +* Do an advanced search and replace, without requiring you to use a regex + +By default, the example prompt includes a sample like this: + +[ +[0, 0, "Dear "], +[0, 1, "John Smith:"], +[1, 0, "I hope this letter finds you well."], +] + +Your custom instructions should include an example of how the sample will be modified, like the one below: + +Example reply, indicating paragraph, run, the new text, and a number indicating if this changes the +current paragraph, adds one before, or adds one after (-1, 0, 1): + +\{"results": +[ +[0, 1, "Dear \{\{ other_parties[0] \}\}:", 0], +[2, 0, "\{%p if is_tenant %\}", -1], +[3, 0, "\{%p endif %\}", 1], +] +\} + +You may also want to customize the input example to better match your use case. + +**Arguments**: + +- `docx_path` _str_ - path to the DOCX file +- `docx_repr` _str_ - a string representation of the paragraphs and runs in the DOCX file, if docx_path is not provided. +- `custom_example` _Optional[str]_ - a string containing the purpose and overview of the task + instructions (str) a string containing specific instructions for the task +- `openai_client` _Optional[OpenAI]_ - an OpenAI client object. If not provided a new one will be created. +- `api_key` _Optional[str]_ - an OpenAI API key. If not provided, it will be obtained from the environment +- `temperature` _float_ - the temperature to use when generating text. Lower temperatures are more conservative. + + +**Returns**: + + A list of tuples, each containing a paragraph number, run number, and the modified text of the run. + + + +#### make\_docx\_plain\_language(docx\_path: str) + +```python +def make_docx_plain_language(docx_path: str) -> docx.document.Document +``` + +Convert a DOCX file to plain language with the help of OpenAI. + + + +#### modify\_docx\_with\_openai\_guesses(docx\_path: str) + +```python +def modify_docx_with_openai_guesses(docx_path: str) -> docx.document.Document +``` + +Uses OpenAI to guess the variable names for a document and then modifies the document with the guesses. + +**Arguments**: + +- `docx_path` _str_ - Path to the DOCX file to modify. + + +**Returns**: + +- `docx.Document` - The modified document, ready to be saved to the same or a new path + diff --git a/docs/components/formfyxer/lit_explorer.md b/docs/components/formfyxer/lit_explorer.md new file mode 100644 index 000000000..bd2548754 --- /dev/null +++ b/docs/components/formfyxer/lit_explorer.md @@ -0,0 +1,528 @@ +# Table of Contents + +* [formfyxer.lit\_explorer](#formfyxer.lit_explorer) + * [recursive\_get\_id](#formfyxer.lit_explorer.recursive_get_id) + * [spot](#formfyxer.lit_explorer.spot) + * [re\_case](#formfyxer.lit_explorer.re_case) + * [regex\_norm\_field](#formfyxer.lit_explorer.regex_norm_field) + * [reformat\_field](#formfyxer.lit_explorer.reformat_field) + * [norm](#formfyxer.lit_explorer.norm) + * [vectorize](#formfyxer.lit_explorer.vectorize) + * [normalize\_name](#formfyxer.lit_explorer.normalize_name) + * [cluster\_screens](#formfyxer.lit_explorer.cluster_screens) + * [InputType](#formfyxer.lit_explorer.InputType) + * [field\_types\_and\_sizes](#formfyxer.lit_explorer.field_types_and_sizes) + * [AnswerType](#formfyxer.lit_explorer.AnswerType) + * [classify\_field](#formfyxer.lit_explorer.classify_field) + * [get\_adjusted\_character\_count](#formfyxer.lit_explorer.get_adjusted_character_count) + * [time\_to\_answer\_field](#formfyxer.lit_explorer.time_to_answer_field) + * [time\_to\_answer\_form](#formfyxer.lit_explorer.time_to_answer_form) + * [cleanup\_text](#formfyxer.lit_explorer.cleanup_text) + * [text\_complete](#formfyxer.lit_explorer.text_complete) + * [complete\_with\_command](#formfyxer.lit_explorer.complete_with_command) + * [needs\_calculations](#formfyxer.lit_explorer.needs_calculations) + * [tools\_passive](#formfyxer.lit_explorer.tools_passive) + * [get\_passive\_sentences](#formfyxer.lit_explorer.get_passive_sentences) + * [get\_citations](#formfyxer.lit_explorer.get_citations) + * [get\_sensitive\_data\_types](#formfyxer.lit_explorer.get_sensitive_data_types) + * [substitute\_phrases](#formfyxer.lit_explorer.substitute_phrases) + * [substitute\_neutral\_gender](#formfyxer.lit_explorer.substitute_neutral_gender) + * [substitute\_plain\_language](#formfyxer.lit_explorer.substitute_plain_language) + * [transformed\_sentences](#formfyxer.lit_explorer.transformed_sentences) + * [parse\_form](#formfyxer.lit_explorer.parse_form) + * [form\_complexity](#formfyxer.lit_explorer.form_complexity) + +--- +sidebar_label: lit_explorer +title: formfyxer.lit_explorer +--- + + + +#### recursive\_get\_id(values\_to\_unpack: Union[dict, list], tmpl: Optional[set] = None) + +```python +def recursive_get_id(values_to_unpack: Union[dict, list], + tmpl: Optional[set] = None) +``` + +Pull ID values out of the LIST/NSMI results from Spot. + + + +#### spot(text: str, lower: float = 0.25, pred: float = 0.5, upper: float = 0.6, verbose: float = 0, token: str = "") + +```python +def spot(text: str, + lower: float = 0.25, + pred: float = 0.5, + upper: float = 0.6, + verbose: float = 0, + token: str = "") +``` + +Call the Spot API (https://spot.suffolklitlab.org) to classify the text of a PDF using +the NSMIv2/LIST taxonomy (https://taxonomy.legal/), but returns only the IDs of issues found in the text. + + + +#### re\_case(text: str) + +```python +def re_case(text: str) -> str +``` + +Capture PascalCase, snake_case and kebab-case terms and add spaces to separate the joined words + + + +#### regex\_norm\_field(text: str) + +```python +def regex_norm_field(text: str) +``` + +Apply some heuristics to a field name to see if we can get it to match AssemblyLine conventions. +See: https://suffolklitlab.org/docassemble-AssemblyLine-documentation/docs/document_variables + + + +#### reformat\_field(text: str, max\_length: int = 30, tools\_token: Optional[str] = None) + +```python +def reformat_field(text: str, + max_length: int = 30, + tools_token: Optional[str] = None) +``` + +Transforms a string of text into a snake_case variable close in length to `max_length` name by +summarizing the string and stitching the summary together in snake_case. +h/t https://towardsdatascience.com/nlp-building-a-summariser-68e0c19e3a93 + + + +#### norm(row) + +```python +def norm(row) +``` + +Normalize a word vector. + + + +#### vectorize(text: Union[List[str], str], tools\_token: Optional[str] = None) + +```python +def vectorize(text: Union[List[str], str], tools_token: Optional[str] = None) +``` + +Vectorize a string of text. + +**Arguments**: + +- `text` - a string of multiple words to vectorize +- `tools_token` - the token to tools.suffolklitlab.org, used for micro-service + to reduce the amount of memory you need on your machine. If + not passed, you need to have `en_core_web_lg` installed. NOTE: this + last bit is nolonger correct, you have to use the micor-service + as we have had to remove SpaCY due to a breaking change + + + +#### normalize\_name(jur: str, group: str, n: int, per, last\_field: str, this\_field: str, tools\_token: Optional[str] = None) + +```python +def normalize_name(jur: str, + group: str, + n: int, + per, + last_field: str, + this_field: str, + tools_token: Optional[str] = None) -> Tuple[str, float] +``` + +Normalize a field name, if possible to the Assembly Line conventions, and if +not, to a snake_case variable name of appropriate length. + +HACK: temporarily all we do is re-case it and normalize it using regex rules. +Will be replaced with call to LLM soon. + + + +#### cluster\_screens(fields: List[str] = [], damping: float = 0.7, tools\_token: Optional[str] = None) + +```python +def cluster_screens(fields: List[str] = [], + damping: float = 0.7, + tools_token: Optional[str] = None) -> Dict[str, List[str]] +``` + +Groups the given fields into screens based on how much they are related. + +**Arguments**: + +- `fields` - a list of field names +- `damping` - a value >= 0.5 and < 1. Tunes how related screens should be +- `tools_token` - the token to tools.suffolklitlab.org, needed of doing + micro-service vectorization + +- `Returns` - a suggested screen grouping, each screen name mapped to the list of fields on it + + + +## InputType Objects + +```python +class InputType(Enum) +``` + +Input type maps onto the type of input the PDF author chose for the field. We only +handle text, checkbox, and signature fields. + + + +#### field\_types\_and\_sizes(fields: Optional[Iterable[FormField]]) + +```python +def field_types_and_sizes( + fields: Optional[Iterable[FormField]]) -> List[FieldInfo] +``` + +Transform the fields provided by get_existing_pdf_fields into a summary format. +Result will look like: +[ +\{ +"var_name": var_name, +"type": "text | checkbox | signature", +"max_length": n +\} +] + + + +## AnswerType Objects + +```python +class AnswerType(Enum) +``` + +Answer type describes the effort the user answering the form will require. +"Slot-in" answers are a matter of almost instantaneous recall, e.g., name, address, etc. +"Gathered" answers require looking around one's desk, for e.g., a health insurance number. +"Third party" answers require picking up the phone to call someone else who is the keeper +of the information. +"Created" answers don't exist before the user is presented with the question. They may include +a choice, creating a narrative, or even applying legal reasoning. "Affidavits" are a special +form of created answers. +See Jarret and Gaffney, Forms That Work (2008) + + + +#### classify\_field(field: FieldInfo, new\_name: str) + +```python +def classify_field(field: FieldInfo, new_name: str) -> AnswerType +``` + +Apply heuristics to the field's original and "normalized" name to classify +it as either a "slot-in", "gathered", "third party" or "created" field type. + + + +#### get\_adjusted\_character\_count(field: FieldInfo) + +```python +def get_adjusted_character_count(field: FieldInfo) -> float +``` + +Determines the bracketed length of an input field based on its max_length attribute, +returning a float representing the approximate length of the field content. + +The function chunks the answers into 5 different lengths (checkboxes, 2 words, short, medium, and long) +instead of directly using the character count, as forms can allocate different spaces +for the same data without considering the space the user actually needs. + +**Arguments**: + +- `field` _FieldInfo_ - An object containing information about the input field, + including the "max_length" attribute. + + +**Returns**: + +- `float` - The approximate length of the field content, categorized into checkboxes, 2 words, short, + medium, or long based on the max_length attribute. + + +**Examples**: + + >>> get_adjusted_character_count(\{"type"\}: InputType.CHECKBOX) + 4.7 + >>> get_adjusted_character_count(\{"max_length": 100\}) + 9.4 + >>> get_adjusted_character_count(\{"max_length": 300\}) + 230 + >>> get_adjusted_character_count(\{"max_length": 600\}) + 115 + >>> get_adjusted_character_count(\{"max_length": 1200\}) + 1150 + + + +#### time\_to\_answer\_field(field: FieldInfo, new\_name: str, cpm: int = 40, cpm\_std\_dev: int = 17) + +```python +def time_to_answer_field(field: FieldInfo, + new_name: str, + cpm: int = 40, + cpm_std_dev: int = 17) -> Callable[[int], np.ndarray] +``` + +Apply a heuristic for the time it takes to answer the given field, in minutes. +It is hand-written for now. +It will factor in the input type, the answer type (slot in, gathered, third party or created), and the +amount of input text allowed in the field. +The return value is a function that can return N samples of how long it will take to answer the field (in minutes) + + + +#### time\_to\_answer\_form(processed\_fields, normalized\_fields) + +```python +def time_to_answer_form(processed_fields, + normalized_fields) -> Tuple[float, float] +``` + +Provide an estimate of how long it would take an average user to respond to the questions +on the provided form. +We use signals such as the field type, name, and space provided for the response to come up with a +rough estimate, based on whether the field is: +1. fill in the blank +2. gathered - e.g., an id number, case number, etc. +3. third party: need to actually ask someone the information - e.g., income of not the user, anything else? +4. created: +a. short created (3 lines or so?) +b. long created (anything over 3 lines) + + + +#### cleanup\_text(text: str, fields\_to\_sentences: bool = False) + +```python +def cleanup_text(text: str, fields_to_sentences: bool = False) -> str +``` + +Apply cleanup routines to text to provide more accurate readability statistics. + + + +#### text\_complete(prompt: str, max\_tokens: int = 500, creds: Optional[OpenAiCreds] = None, temperature: float = 0) + +```python +def text_complete(prompt: str, + max_tokens: int = 500, + creds: Optional[OpenAiCreds] = None, + temperature: float = 0) -> str +``` + +Run a prompt via openAI's API and return the result. + +**Arguments**: + +- `prompt` _str_ - The prompt to send to the API. +- `max_tokens` _int, optional_ - The number of tokens to generate. Defaults to 500. +- `creds` _Optional[OpenAiCreds], optional_ - The credentials to use. Defaults to None. +- `temperature` _float, optional_ - The temperature to use. Defaults to 0. + + + +#### complete\_with\_command(text, command, tokens, creds: Optional[OpenAiCreds] = None) + +```python +def complete_with_command(text, + command, + tokens, + creds: Optional[OpenAiCreds] = None) -> str +``` + +Combines some text with a command to send to open ai. + + + +#### needs\_calculations(text: Union[str]) + +```python +def needs_calculations(text: Union[str]) -> bool +``` + +A conservative guess at if a given form needs the filler to make math calculations, +something that should be avoided. If + + + +#### tools\_passive(input: Union[List[str], str], tools\_token: Optional[str] = None) + +```python +def tools_passive(input: Union[List[str], str], + tools_token: Optional[str] = None) +``` + +Ping passive voice API for list of sentences using the passive voice + + + +#### get\_passive\_sentences(text: Union[List, str], tools\_token: Optional[str] = None) + +```python +def get_passive_sentences( + text: Union[List, str], + tools_token: Optional[str] = None +) -> List[Tuple[str, List[Tuple[int, int]]]] +``` + +Return a list of tuples, where each tuple represents a +sentence in which passive voice was detected along with a list of the +starting and ending position of each fragment that is phrased in the passive voice. +The combination of the two can be used in the PDFStats frontend to highlight the +passive text in an individual sentence. + +Text can either be a string or a list of strings. +If provided a single string, it will be tokenized with NTLK and +sentences containing fewer than 2 words will be ignored. + + + +#### get\_citations(text: str, tokenized\_sentences: List[str]) + +```python +def get_citations(text: str, tokenized_sentences: List[str]) -> List[str] +``` + +Get citations and some extra surrounding context (the full sentence), if the citation is +fewer than 5 characters (often eyecite only captures a section symbol +for state-level short citation formats) + + + +#### get\_sensitive\_data\_types(fields: List[str], fields\_old: Optional[List[str]] = None) + +```python +def get_sensitive_data_types( + fields: List[str], + fields_old: Optional[List[str]] = None) -> Dict[str, List[str]] +``` + +Given a list of fields, identify those related to sensitive information and return a dictionary with the sensitive +fields grouped by type. A list of the old field names can also be provided. These fields should be in the same +order. Passing the old field names allows the sensitive field algorithm to match more accurately. The return value +will not contain the old field name, only the corresponding field name from the first parameter. + +The sensitive data types are: Bank Account Number, Credit Card Number, Driver's License Number, and Social Security +Number. + + + +#### substitute\_phrases(input\_string: str, substitution\_phrases: Dict[str, str]) + +```python +def substitute_phrases( + input_string: str, + substitution_phrases: Dict[str, + str]) -> Tuple[str, List[Tuple[int, int]]] +``` + +Substitute phrases in the input string and return the new string and positions of substituted phrases. + +**Arguments**: + +- `input_string` _str_ - The input string containing phrases to be replaced. +- `substitution_phrases` _Dict[str, str]_ - A dictionary mapping original phrases to their replacement phrases. + + +**Returns**: + + Tuple[str, List[Tuple[int, int]]]: A tuple containing the new string with substituted phrases and a list of + tuples, each containing the start and end positions of the substituted + phrases in the new string. + + +**Example**: + + >>> input_string = "The quick brown fox jumped over the lazy dog." + >>> substitution_phrases = \{"quick brown": "swift reddish", "lazy dog": "sleepy canine"\} + >>> new_string, positions = substitute_phrases(input_string, substitution_phrases) + >>> print(new_string) + "The swift reddish fox jumped over the sleepy canine." + >>> print(positions) + [(4, 17), (35, 48)] + + + +#### substitute\_neutral\_gender(input\_string: str) + +```python +def substitute_neutral_gender( + input_string: str) -> Tuple[str, List[Tuple[int, int]]] +``` + +Substitute gendered phrases with neutral phrases in the input string. +Primary source is https://github.com/joelparkerhenderson/inclusive-language + + + +#### substitute\_plain\_language(input\_string: str) + +```python +def substitute_plain_language( + input_string: str) -> Tuple[str, List[Tuple[int, int]]] +``` + +Substitute complex phrases with simpler alternatives. +Source of terms is drawn from https://www.plainlanguage.gov/guidelines/words/ + + + +#### transformed\_sentences(sentence\_list: List[str], fun: Callable) + +```python +def transformed_sentences( + sentence_list: List[str], + fun: Callable) -> List[Tuple[str, str, List[Tuple[int, int]]]] +``` + +Apply a function to a list of sentences and return only the sentences with changed terms. +The result is a tuple of the original sentence, new sentence, and the starting and ending position +of each changed fragment in the sentence. + + + +#### parse\_form(in\_file: str, title: Optional[str] = None, jur: Optional[str] = None, cat: Optional[str] = None, normalize: bool = True, spot\_token: Optional[str] = None, tools\_token: Optional[str] = None, openai\_creds: Optional[OpenAiCreds] = None, rewrite: bool = False, debug: bool = False) + +```python +def parse_form(in_file: str, + title: Optional[str] = None, + jur: Optional[str] = None, + cat: Optional[str] = None, + normalize: bool = True, + spot_token: Optional[str] = None, + tools_token: Optional[str] = None, + openai_creds: Optional[OpenAiCreds] = None, + rewrite: bool = False, + debug: bool = False) +``` + +Read in a pdf, pull out basic stats, attempt to normalize its form fields, and re-write the +in_file with the new fields (if `rewrite=1`). If you pass a spot token, we will guess the +NSMI code. If you pass openai creds, we will give suggestions for the title and description. + + + +#### form\_complexity(stats) + +```python +def form_complexity(stats) +``` + +Gets a single number of how hard the form is to complete. Higher is harder. + diff --git a/docs/components/formfyxer/pdf_wrangling.md b/docs/components/formfyxer/pdf_wrangling.md new file mode 100644 index 000000000..7268a80d7 --- /dev/null +++ b/docs/components/formfyxer/pdf_wrangling.md @@ -0,0 +1,529 @@ +# Table of Contents + +* [formfyxer.pdf\_wrangling](#formfyxer.pdf_wrangling) + * [FieldType](#formfyxer.pdf_wrangling.FieldType) + * [TEXT](#formfyxer.pdf_wrangling.FieldType.TEXT) + * [AREA](#formfyxer.pdf_wrangling.FieldType.AREA) + * [LIST\_BOX](#formfyxer.pdf_wrangling.FieldType.LIST_BOX) + * [CHOICE](#formfyxer.pdf_wrangling.FieldType.CHOICE) + * [FormField](#formfyxer.pdf_wrangling.FormField) + * [\_\_init\_\_](#formfyxer.pdf_wrangling.FormField.__init__) + * [set\_fields](#formfyxer.pdf_wrangling.set_fields) + * [rename\_pdf\_fields](#formfyxer.pdf_wrangling.rename_pdf_fields) + * [unlock\_pdf\_in\_place](#formfyxer.pdf_wrangling.unlock_pdf_in_place) + * [has\_fields](#formfyxer.pdf_wrangling.has_fields) + * [get\_existing\_pdf\_fields](#formfyxer.pdf_wrangling.get_existing_pdf_fields) + * [swap\_pdf\_page](#formfyxer.pdf_wrangling.swap_pdf_page) + * [copy\_pdf\_fields](#formfyxer.pdf_wrangling.copy_pdf_fields) + * [get\_textboxes\_in\_pdf](#formfyxer.pdf_wrangling.get_textboxes_in_pdf) + * [get\_bracket\_chars\_in\_pdf](#formfyxer.pdf_wrangling.get_bracket_chars_in_pdf) + * [intersect\_bbox](#formfyxer.pdf_wrangling.intersect_bbox) + * [intersect\_bboxs](#formfyxer.pdf_wrangling.intersect_bboxs) + * [contain\_boxes](#formfyxer.pdf_wrangling.contain_boxes) + * [get\_dist\_sq](#formfyxer.pdf_wrangling.get_dist_sq) + * [get\_dist](#formfyxer.pdf_wrangling.get_dist) + * [get\_connected\_edges](#formfyxer.pdf_wrangling.get_connected_edges) + * [bbox\_distance](#formfyxer.pdf_wrangling.bbox_distance) + * [get\_possible\_fields](#formfyxer.pdf_wrangling.get_possible_fields) + * [get\_possible\_checkboxes](#formfyxer.pdf_wrangling.get_possible_checkboxes) + * [get\_possible\_radios](#formfyxer.pdf_wrangling.get_possible_radios) + * [get\_possible\_text\_fields](#formfyxer.pdf_wrangling.get_possible_text_fields) + * [auto\_add\_fields](#formfyxer.pdf_wrangling.auto_add_fields) + * [is\_tagged](#formfyxer.pdf_wrangling.is_tagged) + +--- +sidebar_label: pdf_wrangling +title: formfyxer.pdf_wrangling +--- + + + +## FieldType Objects + +```python +class FieldType(Enum) +``` + + + +#### TEXT + +Text input Field + + + +#### AREA + +Text input Field, but an area + + + +#### LIST\_BOX + +allows multiple selection + + + +#### CHOICE + +allows only one selection + + + +## FormField Objects + +```python +class FormField() +``` + +A data holding class, used to easily specify how a PDF form field should be created. + + + +#### \_\_init\_\_(field\_name: str, type\_name: Union[FieldType, str], x: int, y: int, font\_size: Optional[int] = None, tooltip: str = "", configs: Optional[Dict[str, Any]] = None) + +```python +def __init__(field_name: str, + type_name: Union[FieldType, str], + x: int, + y: int, + font_size: Optional[int] = None, + tooltip: str = "", + configs: Optional[Dict[str, Any]] = None) +``` + +Constructor + +**Arguments**: + +- `x` - the x position of the lower left corner of the field. Should be in X,Y coordinates, + where (0, 0) is the lower left of the page, x goes to the right, and units are in + points (1/72th of an inch) +- `y` - the y position of the lower left corner of the field. Should be in X,Y coordinates, + where (0, 0) is the lower left of the page, y goes up, and units are in points + (1/72th of an inch) +- `config` - a dictionary containing any keyword argument to the reportlab field functions, + which will vary depending on what type of field this is. See section 4.7 of the + [reportlab User Guide](https://www.reportlab.com/docs/reportlab-userguide.pdf) +- `field_name` - the name of the field, exposed to via most APIs. Not the tooltip, but `users1_name__0` + + + +#### set\_fields(in\_file: Union[str, Path, BinaryIO], out\_file: Union[str, Path, BinaryIO], fields\_per\_page: Iterable[Iterable[FormField]], \*, overwrite=False) + +```python +def set_fields(in_file: Union[str, Path, BinaryIO], + out_file: Union[str, Path, BinaryIO], + fields_per_page: Iterable[Iterable[FormField]], + *, + overwrite=False) +``` + +Adds fields per page to the in_file PDF, writing the new PDF to a new file. + +Example usage: + +```python +set_fields('no_fields.pdf', 'four_fields_on_second_page.pdf', + [ + [], # nothing on the first page + [ # Second page + FormField('new_field', 'text', 110, 105, configs=\{'width': 200, 'height': 30\}), + # Choice needs value to be one of the possible options, and options to be a list of strings or tuples + FormField('new_choices', 'choice', 110, 400, configs=\{'value': 'Option 1', 'options': ['Option 1', 'Option 2']\}), + # Radios need to have the same name, with different values + FormField('new_radio1', 'radio', 110, 600, configs=\{'value': 'option a'\}), + FormField('new_radio1', 'radio', 110, 500, configs=\{'value': 'option b'\}) + ] + ] +) +``` + +**Arguments**: + +- `in_file` - the input file name or path of a PDF that we're adding the fields to +- `out_file` - the output file name or path where the new version of in_file will + be written. Doesn't need to exist. +- `fields_per_page` - for each page, a series of fields that should be added to that + page. +- `owerwrite` - if the input file already some fields (AcroForm fields specifically) + and this value is true, it will erase those existing fields and just add + `fields_per_page`. If not true and the input file has fields, this won't generate + a PDF, since there isn't currently a way to merge AcroForm fields from + different PDFs. + + +**Returns**: + + Nothing. + + + +#### rename\_pdf\_fields(in\_file: Union[str, Path, BinaryIO], out\_file: Union[str, Path, BinaryIO], mapping: Mapping[str, str]) + +```python +def rename_pdf_fields(in_file: Union[str, Path, BinaryIO], + out_file: Union[str, Path, BinaryIO], + mapping: Mapping[str, str]) -> None +``` + +Given a dictionary that maps old to new field names, rename the AcroForm +field with a matching key to the specified value. + +**Example**: + +```python +rename_pdf_fields('current.pdf', 'new_field_names.pdf', + \{'abc123': 'user1_name', 'abc124', 'user1_address_city'\}) + +Args: + in_file: the filename of an input file + out_file: the filename of the output file. Doesn't need to exist, + will be overwritten if it does exist. + mapping: the python dict that maps from a current field name to the desired name + +Returns: + Nothing + + + +#### unlock\_pdf\_in\_place(in\_file: Union[str, Path, BinaryIO]) + +```python +def unlock_pdf_in_place(in_file: Union[str, Path, BinaryIO]) -> None +``` + +Try using pikePDF to unlock the PDF it it is locked. This won't work if it has a non-zero length password. + + + +#### has\_fields(pdf\_file: str) + +```python +def has_fields(pdf_file: str) -> bool +``` + +Check if a PDF has at least one form field using PikePDF. + +**Arguments**: + +- `pdf_file` _str_ - The path to the PDF file. + + +**Returns**: + +- `bool` - True if the PDF has at least one form field, False otherwise. + + + +#### get\_existing\_pdf\_fields(in\_file: Union[str, Path, BinaryIO, Pdf]) + +```python +def get_existing_pdf_fields( + in_file: Union[str, Path, BinaryIO, Pdf]) -> List[List[FormField]] +``` + +Use PikePDF to get fields from the PDF + + + +#### swap\_pdf\_page(\*, source\_pdf: Union[str, Path, Pdf], destination\_pdf: Union[str, Path, Pdf], source\_offset: int = 0, destination\_offset: int = 0, append\_fields: bool = False) + +```python +def swap_pdf_page(*, + source_pdf: Union[str, Path, Pdf], + destination_pdf: Union[str, Path, Pdf], + source_offset: int = 0, + destination_offset: int = 0, + append_fields: bool = False) -> Pdf +``` + +(DEPRECATED: use copy_pdf_fields) Copies the AcroForm fields from one PDF to another blank PDF form. Optionally, choose a starting page for both +the source and destination PDFs. By default, it will remove any existing annotations (which include form fields) +in the destination PDF. If you wish to append annotations instead, specify `append_fields = True` + + + +#### copy\_pdf\_fields(\*, source\_pdf: Union[str, Path, Pdf], destination\_pdf: Union[str, Path, Pdf], source\_offset: int = 0, destination\_offset: int = 0, append\_fields: bool = False) + +```python +def copy_pdf_fields(*, + source_pdf: Union[str, Path, Pdf], + destination_pdf: Union[str, Path, Pdf], + source_offset: int = 0, + destination_offset: int = 0, + append_fields: bool = False) -> Pdf +``` + +Copies the AcroForm fields from one PDF to another blank PDF form (without AcroForm fields). +Useful for getting started with an updated PDF form, where the old fields are pretty close to where +they should go on the new document. + +Optionally, you can choose a starting page for both +the source and destination PDFs. By default, it will remove any existing annotations (which include form fields) +in the destination PDF. If you wish to append annotations instead, specify `append_fields = True` + +**Example**: + +```python +new_pdf_with_fields = copy_pdf_fields( + source_pdf="old_pdf.pdf", + destination_pdf="new_pdf_with_no_fields.pdf") +new_pdf_with_fields.save("new_pdf_with_fields.pdf") +``` + + +**Arguments**: + +- `source_pdf` - a file name or path to a PDF that has AcroForm fields +- `destination_pdf` - a file name or path to a PDF without AcroForm fields. Existing fields will be removed. +- `source_offset` - the starting page that fields will be copied from. Defaults to 0. +- `destination_offset` - the starting page that fields will be copied to. Defaults to 0. +- `append_annotations` - controls whether formfyxer will try to append form fields instead of + overwriting. Defaults to false; when enabled may lead to undefined behavior. + + +**Returns**: + + A pikepdf.Pdf object with new fields. If `blank_pdf` was a pikepdf.Pdf object, the + same object is returned. + + + +#### get\_textboxes\_in\_pdf(in\_file: Union[str, Path, BinaryIO], line\_margin=0.02, char\_margin=2.0) + +```python +def get_textboxes_in_pdf(in_file: Union[str, Path, BinaryIO], + line_margin=0.02, + char_margin=2.0) -> List[List[Textbox]] +``` + +Gets all of the text boxes found by pdfminer in a PDF, as well as their bounding boxes + + + +#### get\_bracket\_chars\_in\_pdf(in\_file: Union[str, Path, BinaryIO], line\_margin=0.02, char\_margin=0.0) + +```python +def get_bracket_chars_in_pdf(in_file: Union[str, Path, BinaryIO], + line_margin=0.02, + char_margin=0.0) -> List +``` + +Gets all of the bracket characters ('[' and ']') found by pdfminer in a PDF, as well as their bounding boxes +TODO: Will eventually be used to find [ ] as checkboxes, but right now we can't tell the difference between [ ] and [i]. +This simply gets all of the brackets, and the characters of [hi] in a PDF and [ ] are the exact same distance apart. +Currently going with just "[hi]" doesn't happen, let's hope that assumption holds. + + + +#### intersect\_bbox(bbox\_a, bbox\_b, vert\_dilation=2, horiz\_dilation=2) + +```python +def intersect_bbox(bbox_a, bbox_b, vert_dilation=2, horiz_dilation=2) -> bool +``` + +bboxes are [left edge, bottom edge, horizontal length, vertical length] + + + +#### intersect\_bboxs(bbox\_a, bboxes, vert\_dilation=2, horiz\_dilation=2) + +```python +def intersect_bboxs(bbox_a, + bboxes, + vert_dilation=2, + horiz_dilation=2) -> Iterable[bool] +``` + +Returns an iterable of booleans, one of each of the input bboxes, true if it collides with bbox_a + + + +#### contain\_boxes(bbox\_a: BoundingBoxF, bbox\_b: BoundingBoxF) + +```python +def contain_boxes(bbox_a: BoundingBoxF, bbox_b: BoundingBoxF) -> BoundingBoxF +``` + +Given two bounding boxes, return a single bounding box that contains both of them. + + + +#### get\_dist\_sq(point\_a: XYPair, point\_b: XYPair) + +```python +def get_dist_sq(point_a: XYPair, point_b: XYPair) -> float +``` + +returns the distance squared between two points. Faster than the true euclidean dist + + + +#### get\_dist(point\_a: XYPair, point\_b: XYPair) + +```python +def get_dist(point_a: XYPair, point_b: XYPair) -> float +``` + +euclidean (L^2 norm) distance between two points + + + +#### get\_connected\_edges(point: XYPair, point\_list: Sequence) + +```python +def get_connected_edges(point: XYPair, point_list: Sequence) +``` + +point list is always ordered clockwise from the bottom left, +i.e. bottom left, top left, top right, bottom right + + + +#### bbox\_distance(bbox\_a: BoundingBoxF, bbox\_b: BoundingBoxF) + +```python +def bbox_distance( + bbox_a: BoundingBoxF, bbox_b: BoundingBoxF +) -> Tuple[float, Tuple[XYPair, XYPair], Tuple[XYPair, XYPair]] +``` + +Gets our specific "distance measure" between two different bounding boxes. +This distance is roughly the sum of the horizontal and vertical difference in alignment of +the closest shared field-bounding box edge. We are trying to find which, given a list of text boxes +around a field, is the most likely to be the actual text label for the PDF field. + +bboxes are 4 floats, x, y, width and height + + + +#### get\_possible\_fields(in\_pdf\_file: Union[str, Path], textboxes: Optional[List[List[Textbox]]] = None) + +```python +def get_possible_fields( + in_pdf_file: Union[str, Path], + textboxes: Optional[List[List[Textbox]]] = None +) -> List[List[FormField]] +``` + +Given an input PDF, runs a series of heuristics to predict where there +might be places for user enterable information (i.e. PDF fields), and returns +those predictions. + +**Example**: + +```python +fields = get_possible_fields('no_field.pdf') +print(fields[0][0]) +# Type: FieldType.TEXT, Name: name, User name: , X: 67.68, Y: 666.0, Configs: \{'fieldFlags': 'doNotScroll', 'width': 239.4, 'height': 16\} +``` + + +**Arguments**: + +- `in_pdf_file` - the input PDF +- `textboxes` _optional_ - the location of various lines of text in the PDF. + If not given, will be calculated automatically. This allows us to + pass through expensive info to calculate through several functions. + + +**Returns**: + + For each page in the input PDF, a list of predicted form fields + + + +#### get\_possible\_checkboxes(img: Union[str, cv2.Mat], find\_small=False) + +```python +def get_possible_checkboxes(img: Union[str, cv2.Mat], + find_small=False) -> Union[np.ndarray, List] +``` + +Uses boxdetect library to determine if there are checkboxes on an image of a PDF page. +Assumes the checkbox is square. + +find_small: if true, finds smaller checkboxes. Sometimes will "find" a checkbox in letters, +like O and D, if the font is too small + + + +#### get\_possible\_radios(img: Union[str, BinaryIO, cv2.Mat]) + +```python +def get_possible_radios(img: Union[str, BinaryIO, cv2.Mat]) +``` + +Even though it's called "radios", it just gets things shaped like circles, not +doing any semantic analysis yet. + + + +#### get\_possible\_text\_fields(img: Union[str, BinaryIO, cv2.Mat], text\_lines: List[Textbox], default\_line\_height: int = 44) + +```python +def get_possible_text_fields( + img: Union[str, BinaryIO, cv2.Mat], + text_lines: List[Textbox], + default_line_height: int = 44) -> List[Tuple[BoundingBox, int]] +``` + +Uses openCV to attempt to find places where a PDF could expect an input text field. + +Caveats so far: only considers straight, normal horizonal lines that don't touch any vertical lines as fields +Won't find field inputs as boxes + +default_line_height: the default height (16 pt), in pixels (at 200 dpi), which is 45 + + + +#### auto\_add\_fields(in\_pdf\_file: Union[str, Path], out\_pdf\_file: Union[str, Path]) + +```python +def auto_add_fields(in_pdf_file: Union[str, Path], out_pdf_file: Union[str, + Path]) +``` + +Uses [get_possible_fields](#formfyxer.pdf_wrangling.get_possible_fields) and +[set_fields](#formfyxer.pdf_wrangling.set_fields) to automatically add new detected fields +to an input PDF. + +**Example**: + +```python +auto_add_fields('no_fields.pdf', 'newly_added_fields.pdf') +``` + + +**Arguments**: + +- `in_pdf_file` - the input file name or path of the PDF where we'll try to find possible fields +- `out_pdf_file` - the output file name or path of the PDF where a new version of `in_pdf_file` will + be stored, with the new fields. Doesn't need to existing, but if a file does exist at that + filename, it will be overwritten. + + +**Returns**: + + Nothing + + + +#### is\_tagged(in\_pdf\_file: Union[str, Path, pikepdf.Pdf]) + +```python +def is_tagged(in_pdf_file: Union[str, Path, pikepdf.Pdf]) -> bool +``` + +Determines if the input PDF file is tagged for accessibility. + +**Arguments**: + +- `in_pdf_file` _Union[str, Path]_ - The path to the PDF file, as a string or a Path object. + + +**Returns**: + +- `bool` - True if the PDF is tagged, False otherwise. + diff --git a/docs/components/sidebar.json b/docs/components/sidebar.json index 1dd9f9fc0..188b9c53d 100644 --- a/docs/components/sidebar.json +++ b/docs/components/sidebar.json @@ -1,5 +1,13 @@ { - "items": [], - "label": null, + "items": [ + "components/ALDashboard/aldashboard", + "components/ALDashboard/create_package", + "components/ALDashboard/docx_wrangling", + "components/ALDashboard/package_scanner", + "components/ALDashboard/project_maintenance", + "components/ALDashboard/translation", + "components/ALDashboard/validate_docx" + ], + "label": "ALDashboard", "type": "category" } \ No newline at end of file diff --git a/sidebars.js b/sidebars.js index 706008c4c..6382dab7b 100644 --- a/sidebars.js +++ b/sidebars.js @@ -165,6 +165,19 @@ module.exports = { 'components/GithubFeedbackForm/githubfeedbackform_overview', 'components/InterviewStats/interviewstats_overview', 'components/ALDashboard/aldashboard_overview', + { + label: 'ALDashboard modules', + type: 'category', + items: [ + 'components/ALDashboard/aldashboard', + 'components/ALDashboard/create_package', + 'components/ALDashboard/docx_wrangling', + 'components/ALDashboard/package_scanner', + 'components/ALDashboard/project_maintenance', + 'components/ALDashboard/translation', + 'components/ALDashboard/validate_docx', + ], + }, { "label": "FormFyxer", "type": "category",