Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/fly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ on:
push:
branches:
- main
pull_request:
branches:
- main
workflow_dispatch:

env:
Expand Down
32 changes: 32 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,39 @@

# cdrxiv / file-uploader

A minimal file uploader service built with FastAPI. Currently, this service has two main endpoints:

- `/zenodo/upload-file`: used to upload files to Zenodo
- `/myst/upload-file`: used to upload latex source files

[![Fly.io Deployment](https://github.com/cdrxiv/file-uploader/actions/workflows/fly.yml/badge.svg)](https://github.com/cdrxiv/file-uploader/actions/workflows/fly.yml)

- staging instance: [cdrxiv-file-uploader-staging.fly.dev](https://cdrxiv-file-uploader-staging.fly.dev/docs)
- production instance: [cdrxiv-file-uploader.fly.dev](https://cdrxiv-file-uploader.fly.dev/docs)

## installation

To install and run this service locally, you can use the following commands:

```bash
git clone https://github.com/cdrxiv/file-uploader
cd file-uploader
python -m pip install -r requirements.txt
```

## running the service

To run the service locally, you can use the following command:

```bash
uvicorn src.main:app --reload
```

## license

All the code in this repository is [MIT](https://choosealicense.com/licenses/mit/) licensed.

CDRXIV is a registered trademark (application pending). CDRXIV’s digital assets (graphics, logo, etc) are licensed as [CC-BY](https://creativecommons.org/licenses/by/4.0/deed.en).

> [!IMPORTANT]
> Content and data associated with this repository and hosted on CDRXIV are subject to additional [terms of use](https://cdrxiv.org/terms-of-use). See the [FAQ](https://cdrxiv.org/about/faq) for more information on how CDRXIV content is licensed.
2 changes: 1 addition & 1 deletion fly.prod.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ primary_region = "dfw"

[build]
builder = "heroku/builder:24"
buildpacks = ["heroku/buildpack-python:0.19.1"]
buildpacks = ["heroku/buildpack-python:0.19.1", "heroku/buildpack-nodejs:3.3.3"]


[[vm]]
Expand Down
2 changes: 1 addition & 1 deletion fly.staging.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ primary_region = "dfw"

[build]
builder = "heroku/builder:24"
buildpacks = ["heroku/buildpack-python:0.19.1"]
buildpacks = ["heroku/buildpack-python:0.19.1", "heroku/buildpack-nodejs:3.3.3"]


[[vm]]
Expand Down
5 changes: 5 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"engines": {
"node": "22.x"
}
}
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ gunicorn
uvicorn
python-multipart
tenacity
mystmd
pyyaml
11 changes: 11 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import os
import pathlib

import pydantic_settings

from .log import get_logger
Expand Down Expand Up @@ -37,6 +40,13 @@ def format_bytes(num: int) -> str:
)


def latex_source_directory():
directory = pathlib.Path(os.environ.get('TMPDIR', '')).resolve()
directory = directory / 'myst-latex-sources'
directory.mkdir(parents=True, exist_ok=True)
return directory


class Settings(pydantic_settings.BaseSettings):
model_config = pydantic_settings.SettingsConfigDict(
env_file=('.env', '.env.prod', '.env.local'), extra='ignore'
Expand All @@ -45,6 +55,7 @@ class Settings(pydantic_settings.BaseSettings):
ZENODO_ACCESS_TOKEN: str | None
ZENODO_MAX_FILE_SIZE: int = 15 * 1024 * 1024 * 1024
JANEWAY_URL: str | None
LATEX_SOURCE_DIRECTORY: pathlib.Path = latex_source_directory()


def get_settings() -> Settings:
Expand Down
108 changes: 108 additions & 0 deletions src/latex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import asyncio
import mimetypes
import shutil

import yaml
from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile

from .config import Settings, get_settings
from .log import get_logger

logger = get_logger()
router = APIRouter()


def validate_file(file: UploadFile):
mime_type, _ = mimetypes.guess_type(file.filename)
if mime_type is None:
raise HTTPException(
status_code=400, detail='Could not determine mime type of file'
)

if mime_type != 'application/zip':
raise HTTPException(
status_code=400,
detail=f'Invalid file type: {mime_type} for LaTeX source: {file.filename}. Must be a ZIP archive',
)


@router.post('/latex/upload-file')
async def upload_file(
request: Request,
preprint_id: str,
file: UploadFile = File(...),
settings: Settings = Depends(get_settings),
):
logger.info('Uploading file')
validate_file(file)
file_path = settings.LATEX_SOURCE_DIRECTORY / preprint_id / file.filename
file_path.parent.mkdir(parents=True, exist_ok=True)
with file_path.open('wb') as buffer:
shutil.copyfileobj(file.file, buffer)

# unzip the file
logger.info(f'Unzipping file: {file_path}')
shutil.unpack_archive(file_path, file_path.parent)
# get the path to the unzipped directory
unzipped_directory = file_path.parent / file.filename.replace('.zip', '')

# write a yaml file (myst.ym) in the same directory. This file will contain the metadata for the preprint as following

myst_file = unzipped_directory / 'myst.yml'
with myst_file.open('w') as buffer:
yaml.dump(
{
'version': 1,
'project': {
'id': preprint_id,
'title': '',
'description': '',
'keywords': [],
'authors': [],
'subject': 'Article',
'open_access': True,
'license': '',
},
'site': {'template': 'article-theme'},
},
buffer,
)

myst_executable = shutil.which('myst')
if myst_executable is None:
raise HTTPException(status_code=500, detail='myst executable not found in PATH')

# run myst to convert the latex source to html
logger.info(f'Converting LaTeX source to HTML: {unzipped_directory}')

myst_command = [myst_executable, 'build', '--site', '--ci']
logger.info(f'Running myst command: {myst_command}')
process = await asyncio.create_subprocess_exec(
*myst_command,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=str(unzipped_directory),
)
stdout, stderr = await process.communicate()
logger.info(f'myst stdout: {stdout.decode()}')
logger.info(f'myst stderr: {stderr.decode()}')
if process.returncode != 0:
raise HTTPException(
status_code=500,
detail=f'myst command failed with return code: {process.returncode}',
)

build_directory = unzipped_directory / '_build' / 'site'
parent_directory = unzipped_directory.parent / 'site'
parent_directory.mkdir(parents=True, exist_ok=True)

# Now we need to move the contents of the _build directory to the parent directory
for item in build_directory.iterdir():
logger.info(f'Moving item: {item} to {parent_directory}')
shutil.move(item, parent_directory)

return {
'status': 'ok',
'filename': file.filename,
'path': parent_directory,
}
17 changes: 16 additions & 1 deletion src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,25 @@
import tempfile
from contextlib import asynccontextmanager

from fastapi import FastAPI
from fastapi import FastAPI, staticfiles
from fastapi.middleware.cors import CORSMiddleware

from .config import latex_source_directory
from .latex import router as latex_router
from .log import get_logger
from .zenodo import router as zenodo_router

origins = ['*']
logger = get_logger()


directory = latex_source_directory()
directory.mkdir(parents=True, exist_ok=True)
logger.info(
f'Resolved directory: {directory} | {directory.exists()} | {list(directory.iterdir())}'
)


@asynccontextmanager
async def lifespan_event(app: FastAPI):
logger.info('⏱️ Application startup...')
Expand All @@ -39,6 +48,12 @@ def create_application() -> FastAPI:
allow_headers=['*'],
)
app.include_router(zenodo_router, tags=['zenodo'])
app.include_router(latex_router, tags=['latex'])
app.mount(
'/myst',
staticfiles.StaticFiles(directory=directory, html=True),
name='myst',
)
return app


Expand Down
Loading