From d937291e74a9e7613c54bbc4df6f3299e37b3730 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 4 Dec 2024 14:38:02 -0800 Subject: [PATCH 1/6] add LaTeX file upload functionality and configure source directory --- requirements.txt | 1 + src/config.py | 11 +++++ src/latex.py | 108 +++++++++++++++++++++++++++++++++++++++++++++++ src/main.py | 17 +++++++- 4 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 src/latex.py diff --git a/requirements.txt b/requirements.txt index c1ce9dc..15d8695 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ gunicorn uvicorn python-multipart tenacity +mystmd diff --git a/src/config.py b/src/config.py index 9dbe3ff..519d2b8 100644 --- a/src/config.py +++ b/src/config.py @@ -1,3 +1,6 @@ +import os +import pathlib + import pydantic_settings from .log import get_logger @@ -37,6 +40,13 @@ def format_bytes(num: int) -> str: ) +def latex_source_directory(): + directory = pathlib.Path(os.environ.get('TMPDIR', '')).resolve() + directory = directory / 'myst-latex-sources' + directory.mkdir(parents=True, exist_ok=True) + return directory + + class Settings(pydantic_settings.BaseSettings): model_config = pydantic_settings.SettingsConfigDict( env_file=('.env', '.env.prod', '.env.local'), extra='ignore' @@ -45,6 +55,7 @@ class Settings(pydantic_settings.BaseSettings): ZENODO_ACCESS_TOKEN: str | None ZENODO_MAX_FILE_SIZE: int = 15 * 1024 * 1024 * 1024 JANEWAY_URL: str | None + LATEX_SOURCE_DIRECTORY: pathlib.Path = latex_source_directory() def get_settings() -> Settings: diff --git a/src/latex.py b/src/latex.py new file mode 100644 index 0000000..c2aa7fc --- /dev/null +++ b/src/latex.py @@ -0,0 +1,108 @@ +import asyncio +import mimetypes +import shutil + +import yaml +from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile + +from .config import Settings, get_settings +from .log import get_logger + +logger = get_logger() +router = APIRouter() + + +def validate_file(file: UploadFile): + mime_type, _ = mimetypes.guess_type(file.filename) + if mime_type is None: + raise HTTPException( + status_code=400, detail='Could not determine mime type of file' + ) + + if mime_type != 'application/zip': + raise HTTPException( + status_code=400, + detail=f'Invalid file type: {mime_type} for LaTeX source: {file.filename}. Must be a ZIP archive', + ) + + +@router.post('/latex/upload-file') +async def upload_file( + request: Request, + preprint_id: str, + file: UploadFile = File(...), + settings: Settings = Depends(get_settings), +): + logger.info('Uploading file') + validate_file(file) + file_path = settings.LATEX_SOURCE_DIRECTORY / preprint_id / file.filename + file_path.parent.mkdir(parents=True, exist_ok=True) + with file_path.open('wb') as buffer: + shutil.copyfileobj(file.file, buffer) + + # unzip the file + logger.info(f'Unzipping file: {file_path}') + shutil.unpack_archive(file_path, file_path.parent) + # get the path to the unzipped directory + unzipped_directory = file_path.parent / file.filename.replace('.zip', '') + + # write a yaml file (myst.ym) in the same directory. This file will contain the metadata for the preprint as following + + myst_file = unzipped_directory / 'myst.yml' + with myst_file.open('w') as buffer: + yaml.dump( + { + 'version': 1, + 'project': { + 'id': preprint_id, + 'title': '', + 'description': '', + 'keywords': [], + 'authors': [], + 'subject': 'Article', + 'open_access': True, + 'license': '', + }, + 'site': {'template': 'article-theme'}, + }, + buffer, + ) + + myst_executable = shutil.which('myst') + if myst_executable is None: + raise HTTPException(status_code=500, detail='myst executable not found in PATH') + + # run myst to convert the latex source to html + logger.info(f'Converting LaTeX source to HTML: {unzipped_directory}') + + myst_command = [myst_executable, 'build', '--site', '--ci'] + logger.info(f'Running myst command: {myst_command}') + process = await asyncio.create_subprocess_exec( + *myst_command, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=str(unzipped_directory), + ) + stdout, stderr = await process.communicate() + logger.info(f'myst stdout: {stdout.decode()}') + logger.info(f'myst stderr: {stderr.decode()}') + if process.returncode != 0: + raise HTTPException( + status_code=500, + detail=f'myst command failed with return code: {process.returncode}', + ) + + build_directory = unzipped_directory / '_build' / 'site' + parent_directory = unzipped_directory.parent / 'site' + parent_directory.mkdir(parents=True, exist_ok=True) + + # Now we need to move the contents of the _build directory to the parent directory + for item in build_directory.iterdir(): + logger.info(f'Moving item: {item} to {parent_directory}') + shutil.move(item, parent_directory) + + return { + 'status': 'ok', + 'filename': file.filename, + 'path': parent_directory, + } diff --git a/src/main.py b/src/main.py index 31eebdf..237be59 100644 --- a/src/main.py +++ b/src/main.py @@ -4,9 +4,11 @@ import tempfile from contextlib import asynccontextmanager -from fastapi import FastAPI +from fastapi import FastAPI, staticfiles from fastapi.middleware.cors import CORSMiddleware +from .config import latex_source_directory +from .latex import router as latex_router from .log import get_logger from .zenodo import router as zenodo_router @@ -14,6 +16,13 @@ logger = get_logger() +directory = latex_source_directory() +directory.mkdir(parents=True, exist_ok=True) +logger.info( + f'Resolved directory: {directory} | {directory.exists()} | {list(directory.iterdir())}' +) + + @asynccontextmanager async def lifespan_event(app: FastAPI): logger.info('⏱️ Application startup...') @@ -39,6 +48,12 @@ def create_application() -> FastAPI: allow_headers=['*'], ) app.include_router(zenodo_router, tags=['zenodo']) + app.include_router(latex_router, tags=['latex']) + app.mount( + '/myst', + staticfiles.StaticFiles(directory=directory, html=True), + name='myst', + ) return app From b22f9486e6e7f70d0a8194bc8312497a88e3ac71 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 4 Dec 2024 14:39:12 -0800 Subject: [PATCH 2/6] add pull request trigger to GitHub Actions workflow --- .github/workflows/fly.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml index c8a6ef9..f282a31 100644 --- a/.github/workflows/fly.yml +++ b/.github/workflows/fly.yml @@ -3,6 +3,9 @@ on: push: branches: - main + pull_request: + branches: + - main workflow_dispatch: env: From c9f6bfebf2db8c7c5516a09fbaa141a6f693a3c1 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 4 Dec 2024 14:42:43 -0800 Subject: [PATCH 3/6] add pyyaml dependency to requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 15d8695..d6eb970 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ uvicorn python-multipart tenacity mystmd +pyyaml From 268b0ab53b2b781ae42855109504ca4ba6ec6887 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 4 Dec 2024 14:54:06 -0800 Subject: [PATCH 4/6] add Node.js buildpack to production and staging configurations --- fly.prod.toml | 2 +- fly.staging.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fly.prod.toml b/fly.prod.toml index 7ac48d5..ce3276d 100644 --- a/fly.prod.toml +++ b/fly.prod.toml @@ -7,7 +7,7 @@ primary_region = "dfw" [build] builder = "heroku/builder:24" -buildpacks = ["heroku/buildpack-python:0.19.1"] +buildpacks = ["heroku/buildpack-python:0.19.1", "heroku/buildpack-nodejs:3.3.3"] [[vm]] diff --git a/fly.staging.toml b/fly.staging.toml index 594df02..94e0521 100644 --- a/fly.staging.toml +++ b/fly.staging.toml @@ -7,7 +7,7 @@ primary_region = "dfw" [build] builder = "heroku/builder:24" -buildpacks = ["heroku/buildpack-python:0.19.1"] +buildpacks = ["heroku/buildpack-python:0.19.1", "heroku/buildpack-nodejs:3.3.3"] [[vm]] From 341ecb29d5fcc4ecb75ea9dc8948a35784bacb28 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 4 Dec 2024 15:13:23 -0800 Subject: [PATCH 5/6] add Node.js engine specification to package.json --- package.json | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 package.json diff --git a/package.json b/package.json new file mode 100644 index 0000000..698a85c --- /dev/null +++ b/package.json @@ -0,0 +1,5 @@ +{ + "engines": { + "node": "22.x" + } +} From 9c441dd570dc7e556ee0d06cac766962907982c0 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 10 Dec 2024 12:11:03 -0800 Subject: [PATCH 6/6] update README.md to include file uploader service details and installation instructions --- README.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/README.md b/README.md index ed304b2..45a638a 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,39 @@ # cdrxiv / file-uploader +A minimal file uploader service built with FastAPI. Currently, this service has two main endpoints: + +- `/zenodo/upload-file`: used to upload files to Zenodo +- `/myst/upload-file`: used to upload latex source files + [![Fly.io Deployment](https://github.com/cdrxiv/file-uploader/actions/workflows/fly.yml/badge.svg)](https://github.com/cdrxiv/file-uploader/actions/workflows/fly.yml) - staging instance: [cdrxiv-file-uploader-staging.fly.dev](https://cdrxiv-file-uploader-staging.fly.dev/docs) - production instance: [cdrxiv-file-uploader.fly.dev](https://cdrxiv-file-uploader.fly.dev/docs) + +## installation + +To install and run this service locally, you can use the following commands: + +```bash +git clone https://github.com/cdrxiv/file-uploader +cd file-uploader +python -m pip install -r requirements.txt +``` + +## running the service + +To run the service locally, you can use the following command: + +```bash +uvicorn src.main:app --reload +``` + +## license + +All the code in this repository is [MIT](https://choosealicense.com/licenses/mit/) licensed. + +CDRXIV is a registered trademark (application pending). CDRXIV’s digital assets (graphics, logo, etc) are licensed as [CC-BY](https://creativecommons.org/licenses/by/4.0/deed.en). + +> [!IMPORTANT] +> Content and data associated with this repository and hosted on CDRXIV are subject to additional [terms of use](https://cdrxiv.org/terms-of-use). See the [FAQ](https://cdrxiv.org/about/faq) for more information on how CDRXIV content is licensed.