Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions files/config_template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,20 @@ MATTERMOST:
team: "your-mattermost-team-name" # The team name (not display name)
channel: "your-mattermost-channel-name" # The channel name (not display name)

# Basecamp configuration
BASECAMP:
is_posting_on: true
access_token: "your-basecamp-access-token"
refresh_token: "your-basecamp-refresh-token"
client_id: "your-basecamp-client-id"
client_secret: "your-basecamp-client-secret"
account_id: "your-basecamp-account-id"
user_agent: "your-basecamp-user-agent"
bucket_id: "your-basecamp-bucket-id"
board_id: "your-basecamp-board-id"


SLACK_TEST_CHANNEL_ID: "your-slack-test-channel-id" # not required so left outside of dictionary
TELEGRAM_TEST_CHANNEL_ID: "your-slack-test-channel-id" # not required so left outside of dictionary
MATTERMOST_TEST_CHANNEL_ID: "your-mattermost-test-channel-id" # not required so left outside of dictionary
GOOGLE_TEST_SPREADSHEET_ID: "your-google-test-spreadsheet-id" # not required so left outside of dictionary

GOOGLE_TEST_SPREADSHEET_ID: "your-google-test-spreadsheet-id" # not required so left outside of dictionary
36 changes: 32 additions & 4 deletions src/PaperBee/daily_posting.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ async def daily_papers_search(
zulip_args = validate_platform_args(config, "ZULIP")
telegram_args = validate_platform_args(config, "TELEGRAM")
mattermost_args = validate_platform_args(config, "MATTERMOST")
basecamp_args = validate_platform_args(config, "BASECAMP")

if telegram_args == {}:
telegram_args = {"bot_token": "", "channel_id": "", "is_posting_on": False}
Expand All @@ -48,7 +49,18 @@ async def daily_papers_search(
if slack_args == {}:
slack_args = {"bot_token": "", "channel_id": "", "is_posting_on": False}
if mattermost_args == {}:
mattermost_args = {"bot_token": "", "channel_id": "", "is_posting_on": False}
mattermost_args = {"url": "", "token": "", "team": "", "channel": "", "is_posting_on": False}

if basecamp_args == {}:
basecamp_args = {
"account_id": "",
"client_id": "",
"client_secret": "",
"user_agent": "",
"bucket_id": "",
"board_id": "",
"is_posting_on": False,
}

llm_filtering = config.get("LLM_FILTERING", False)
if llm_filtering:
Expand Down Expand Up @@ -85,16 +97,32 @@ async def daily_papers_search(
mattermost_token=mattermost_args["token"],
mattermost_team=mattermost_args["team"],
mattermost_channel=mattermost_args["channel"],
basecamp_client_id=basecamp_args["client_id"],
basecamp_client_secret=basecamp_args["client_secret"],
basecamp_account_id=basecamp_args["account_id"],
basecamp_user_agent=basecamp_args["user_agent"],
basecamp_bucket_id=basecamp_args["bucket_id"],
basecamp_board_id=basecamp_args["board_id"],
basecamp_access_token=basecamp_args["access_token"],
basecamp_refresh_token=basecamp_args["refresh_token"],
databases=databases,
)
papers, response_slack, response_telegram, response_zulip, response_mattermost = await finder.run_daily(
(
papers,
response_slack,
response_telegram,
response_zulip,
response_mattermost,
response_basecamp,
) = await finder.run_daily(
post_to_slack=slack_args["is_posting_on"],
post_to_telegram=telegram_args["is_posting_on"],
post_to_zulip=zulip_args["is_posting_on"],
post_to_mattermost=mattermost_args["is_posting_on"],
post_to_basecamp=basecamp_args["is_posting_on"],
)

return papers, response_slack, response_telegram, response_zulip, response_mattermost
return papers, response_slack, response_telegram, response_zulip, response_mattermost, response_basecamp


def main() -> None:
Expand Down Expand Up @@ -133,7 +161,7 @@ def main() -> None:
# Dispatch to the appropriate subcommand
if args.command == "post":
config = load_config(args.config)
papers, _, _, _, _ = asyncio.run(
papers, _, _, _, _, _ = asyncio.run(
daily_papers_search(
config,
interactive=args.interactive,
Expand Down
137 changes: 137 additions & 0 deletions src/PaperBee/papers/basecamp_papers_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import html
import time
from datetime import datetime
from logging import Logger
from typing import Any, Dict, List

import requests


class BasecampPaperPublisher:
"""
Publish papers (from a spreadsheet) to a Basecamp Message Board.

Args:
logger: logging.Logger instance.
account_id: Basecamp account id (the {ACCOUNT_ID} in URLs).
client_id, client_secret: OAuth credentials (from launchpad.37signals.com).
client_secret: OAuth credentials (from launchpad.37signals.com).
user_agent: string identifying your app (required by Basecamp).
bucket_id: Basecamp bucket id (the {BUCKET_ID} in URLs).
board_id: Basecamp board id (the {BOARD_ID} in URLs).
access_token: optional initial access token.
refresh_token: refresh token (used to obtain new access tokens).
"""

LAUNCHPAD_AUTH_URL = "https://launchpad.37signals.com/authorization/token"
API_BASE = "https://3.basecampapi.com"

def __init__(
self,
logger: Logger,
account_id: str,
client_id: str,
client_secret: str,
user_agent: str,
bucket_id: str,
board_id: str,
access_token: str,
refresh_token: str,
):
self.account_id = account_id
self.client_id = client_id
self.client_secret = client_secret
self.user_agent = user_agent
self.logger = logger
self.bucket_id = bucket_id
self.board_id = board_id
self.access_token = access_token
self.refresh_token = refresh_token
self._access_expires_at = 0 # epoch seconds when token expires (if known)

# small session for connection pooling
self._session = requests.Session()
self._session.headers.update({"User-Agent": user_agent, "Accept": "application/json"})

def _ensure_access_token(self) -> None:
"""Ensure we have a valid access token; refresh if needed."""
if not self.access_token or time.time() >= self._access_expires_at - 30:
self.logger.debug("Refreshing Basecamp access token...")
self._refresh_access_token()

def _refresh_access_token(self) -> None:
"""Refresh access token using refresh_token."""
if not self.refresh_token:
msg = "No refresh_token available."
raise RuntimeError(msg)

# NOTE(Rodrigo): {"type": "refresh"} as in https://github.com/basecamp/api/blob/master/sections/authentication.md
data = {
"type": "refresh", # community examples use this type for refresh
"client_id": self.client_id,
"client_secret": self.client_secret,
"refresh_token": self.refresh_token,
}
resp = requests.post(self.LAUNCHPAD_AUTH_URL, data=data, headers={"User-Agent": self.user_agent}, timeout=30)
if resp.status_code != 200:
self.logger.error("Failed to refresh Basecamp token: %s %s", resp.status_code, resp.text)
resp.raise_for_status()
payload = resp.json()
self.access_token = payload.get("access_token")
expires_in = payload.get("expires_in")
if expires_in:
self._access_expires_at = time.time() + int(expires_in)
# if the server returned a new refresh_token, update it
if payload.get("refresh_token"):
self.refresh_token = payload["refresh_token"]

# update session auth header
self._session.headers.update({
"Authorization": f"Bearer {self.access_token}",
"Content-Type": "application/json; charset=utf-8",
})

@staticmethod
def _escape_html(text: str) -> str:
return html.escape(text)

def build_message(
self,
papers: List[List[str]],
) -> str:
"""
Build a simple HTML body for a Basecamp Message's `content` field.
Basecamp uses HTML rich text for message content.
"""
parts = []
parts.append("<p><strong>Good morning ☕ Here are today's papers!</strong></p>")
# parts.append("<h3>Papers</h3><ul>")
parts.append("<ul>")
for p in papers:
title = p[4]
link = p[-1]
parts.append(f"<li><a href='{link}'>{self._escape_html(title)}</a></li>")
parts.append("</ul>")
# parts.append("<hr/>")
# parts.append("<p>Posted automatically by <code>paperbee</code></p>")
return "".join(parts)

# example: ['10.1101/2025.09.10.674954', '2025-09-17', '2025-09-16', 'TRUE', 'Differentiation hierarchy in adult B cell acute lymphoblastic leukemia at clonal resolution', '', None, 'https://doi.org/10.1101/2025.09.10.674954']

async def publish_papers(self, papers_list: List[List[str]]) -> Dict[str, Any]:
self._ensure_access_token()

# papers, preprints = self.format_papers(papers_list)
content_html = self.build_message(papers_list)

today_str = datetime.now().strftime("%d-%m-%Y")
body = {"subject": f"Papers from {today_str}", "content": content_html, "status": "active"}

url = f"{self.API_BASE}/{self.account_id}/buckets/{self.bucket_id}/message_boards/{self.board_id}/messages.json"
# self.session already has the headers, I think we don't need to pass them again
r = self._session.post(url, json=body)
if r.status_code not in (200, 201):
self.logger.error("Failed to create message: %s %s", r.status_code, r.text)
r.raise_for_status()
self.logger.info("Posted message to Basecamp board")
return r.json()
46 changes: 45 additions & 1 deletion src/PaperBee/papers/papers_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from slack_sdk import WebClient
from tqdm import tqdm

from .basecamp_papers_formatter import BasecampPaperPublisher
from .cli import InteractiveCLIFilter
from .google_sheet import GoogleSheetsUpdater
from .llm_filtering import LLMFilter
Expand Down Expand Up @@ -77,6 +78,14 @@ def __init__(
mattermost_token: str = "",
mattermost_team: str = "",
mattermost_channel: str = "",
basecamp_account_id: str = "",
basecamp_client_id: str = "",
basecamp_client_secret: str = "",
basecamp_user_agent: str = "",
basecamp_bucket_id: str = "",
basecamp_board_id: str = "",
basecamp_access_token: str = "",
basecamp_refresh_token: str = "",
ncbi_api_key: str = "",
databases: Optional[List[str]] = None,
) -> None:
Expand Down Expand Up @@ -127,6 +136,14 @@ def __init__(
self.mattermost_token: str = mattermost_token
self.mattermost_team: str = mattermost_team
self.mattermost_channel: str = mattermost_channel
self.basecamp_account_id: str = basecamp_account_id
self.basecamp_client_id: str = basecamp_client_id
self.basecamp_client_secret: str = basecamp_client_secret
self.basecamp_user_agent: str = basecamp_user_agent
self.basecamp_bucket_id: str = basecamp_bucket_id
self.basecamp_board_id: str = basecamp_board_id
self.basecamp_access_token: str = basecamp_access_token
self.basecamp_refresh_token: str = basecamp_refresh_token
# Logger
self.logger = Logger("PapersFinder")
# NCBI API
Expand Down Expand Up @@ -238,6 +255,7 @@ def update_google_sheet(self, processed_articles: pd.DataFrame, row: int = 2) ->
credentials_json_path=self.google_credentials_json,
)
gsheet_cache = gsheet_updater.read_sheet_data(sheet_name=self.sheet_name)

if gsheet_cache:
published_dois = [article["DOI"] for article in gsheet_cache]

Expand Down Expand Up @@ -323,6 +341,27 @@ async def post_paper_to_mattermost(self, papers: List[List[str]]) -> Any:
response = await mattermost_publisher.publish_papers(papers)
return response

async def post_paper_to_basecamp(self, papers: List[List[str]]) -> Any:
"""
Posts the papers to Basecamp.

Args:
papers (List[str]): List of papers to post to Basecamp.
"""
basecamp_publisher = BasecampPaperPublisher(
Logger("BasecampPaperPublisher"),
account_id=self.basecamp_account_id,
client_id=self.basecamp_client_id,
client_secret=self.basecamp_client_secret,
user_agent=self.basecamp_user_agent,
bucket_id=self.basecamp_bucket_id,
board_id=self.basecamp_board_id,
access_token=self.basecamp_access_token,
refresh_token=self.basecamp_refresh_token,
)
response = await basecamp_publisher.publish_papers(papers)
return response

def cleanup_files(self) -> None:
"""
Deletes the search result files from the previous day to keep the directory clean.
Expand Down Expand Up @@ -352,6 +391,7 @@ async def run_daily(
post_to_telegram: bool = False,
post_to_zulip: bool = False,
post_to_mattermost: bool = False,
post_to_basecamp: bool = False,
) -> Tuple[List[List[Any]], Any | None, Any | None, Any | None, Any | None]:
"""
The main method to orchestrate finding, processing, and updating papers in a Google Sheet on a daily schedule.
Expand All @@ -372,6 +412,7 @@ async def run_daily(
response_telegram = None
response_zulip = None
response_mattermost = None
response_basecamp = None

if post_to_slack:
response_slack = self.post_paper_to_slack(papers)
Expand All @@ -385,9 +426,12 @@ async def run_daily(
if post_to_mattermost:
response_mattermost = await self.post_paper_to_mattermost(papers)

if post_to_basecamp:
response_basecamp = await self.post_paper_to_basecamp(papers)

self.cleanup_files()

return papers, response_slack, response_telegram, response_zulip, response_mattermost
return papers, response_slack, response_telegram, response_zulip, response_mattermost, response_basecamp

def send_csv(self, user_id: str, user_query: str) -> Tuple[pd.DataFrame, Any]:
"""
Expand Down