Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 27 additions & 15 deletions notebooks/adhoc/coingecko/adhoc_price_data_run.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -20,19 +20,6 @@
"from op_analytics.coreutils.request import new_session"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 1. Load chain metadata to get token IDs\n",
"from op_analytics.datapipeline.chains.load import load_chain_metadata\n",
"\n",
"# Load chain metadata\n",
"chain_metadata = load_chain_metadata()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -174,8 +161,9 @@
"source": [
"with write_to_prod():\n",
" # Run the full pipeline, including extra tokens\n",
" result = execute_pull(days=365, extra_token_ids_file=extra_token_ids_file, include_top_tokens=25, fetch_metadata=True)\n",
" result = execute_pull(days=365, extra_token_ids_file=extra_token_ids_file, include_top_tokens=0, fetch_metadata=False, skip_existing_partitions=False)\n",
" # result = execute_pull(days=365, fetch_metadata=True)\n",
" # result = execute_pull(days=365, skip_existing_partitions=True, fetch_metadata=False, token_id='ethereum')\n",
" #Metadata Only\n",
" # result = execute_metadata_pull(extra_token_ids_file=extra_token_ids_file, include_top_tokens=25)\n",
" # result = execute_metadata_pull()"
Expand All @@ -198,6 +186,30 @@
"source": [
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 1. Load chain metadata to get token IDs\n",
"from op_analytics.datapipeline.chains.load import load_chain_metadata\n",
"\n",
"# Load chain metadata\n",
"chain_metadata = load_chain_metadata()\n",
"\n",
"# Get unique non-null CoinGecko API keys\n",
"token_ids = (\n",
" chain_metadata.filter(pl.col(\"cgt_coingecko_api\").is_not_null())\n",
" .select(\"cgt_coingecko_api\")\n",
" .unique()\n",
" .to_series()\n",
" .to_list()\n",
")\n",
"\n",
"print(f\"Found {len(token_ids)} unique tokens with CoinGecko API keys\")"
]
}
],
"metadata": {
Expand Down
316 changes: 316 additions & 0 deletions notebooks/adhoc/defillama/defillama_price_puller_example.ipynb

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions notebooks/adhoc/dune/dune_uni_lm.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"\n",
"from op_analytics.datasources.dune.unichain_lm import DuneUniLMSummary\n",
"\n",
"# result = DuneUniLMSummary.fetch()"
"result = DuneUniLMSummary.fetch()"
]
},
{
Expand All @@ -44,7 +44,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -68,7 +68,7 @@
"\n",
"from op_analytics.coreutils.partitioned.location import DataLocation\n",
"\n",
"# read_df = Dune.UNI_LM_2025.read_polars(min_date=\"2025-05-07\", max_date=\"2025-05-11\", location=DataLocation.LOCAL)"
"read_df = Dune.UNI_LM_2025.read_polars(min_date=\"2025-05-07\", max_date=\"2025-05-11\", location=DataLocation.LOCAL)"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion op_chains_tracking/inputs/chain_metadata_raw.csv

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Medium Sensitive Data Finding

Financial

More Details
Attribute Value
Data Classifier Financial/Ethereum Address
Data Classifier ID BUILTIN-388

Sampled Examples

Key Value Row Number
ethereum 0x*AB...**86 (42 chars) 2
ethereum 0x*79...**72 (42 chars) 3
ethereum 0x*43...**21 (42 chars) 4
ethereum 0x*00...**59 (42 chars) 5
ethereum 0x*2E...**0e (42 chars) 5

Rule ID: BUILTIN-388


To ignore this finding as an exception, reply to this conversation with #wiz_ignore reason

If you'd like to ignore this finding in all future scans, add an exception in the .wiz file (learn more) or create an Ignore Rule (learn more).

Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ campaign,Camp Network,,,,modern,Standard,,,,yes,,,https://www.campnetwork.xyz/,,
ozean,Ozean,,,,,Standard,,,,yes,Caldera,,https://ozean.finance/,,,,ozean,,CPOOL,clearpool,L2,2,ethereum,ethereum,,,,,,,,,,,,,,
soneium,Soneium,1868,5E5E5F,1/14/25,modern,Standard,12/2/24,,,yes,Self-Hosted,https://rpc.soneium.org,https://soneium.org/,https://soneium.blockscout.com/,,Soneium,soneium,,ETH,,L2,2,ethereum,ethereum,0x7A8Ed66B319911A0F3E7288BDdAB30d9c0C875c3,soneium,,soneium,,,,0x6776BE80dBAda6A02B5F2095cF13734ac303B8d1,0x008dC74CecC9dedA8595B2Fe210cE5979F0BfA8e,0x400c164C4a8cA84385B70EEd6eB03ea847c8E1b8,,0xeb9bf100225c214Efc3E7C651ebbaDcF85177607,0x88e529A6ccd302c948689Cd5156C83D4614FAE92,0x512A3d2c7a43BD9261d2B8E8C9c70D4bd4D503C0
op,OP Mainnet,10,FF0420,11/11/21,modern,OP Mainnet,6/23/21,,6/23/21,no,OP Mainnet,https://mainnet.optimism.io,https://optimism.io,https://explorer.optimism.io/,,"Optimism,OP Mainnet","optimism,op-mainnet",optimism,ETH,,L2,2,ethereum,ethereum,0x229047fed2591dbec1eF1118d64F7aF3dB9EB290,op,op_sepolia,optimism,optimism,optimism,optimism,0x6887246668a3b87F54DeB3b94Ba47a6f63F32985,0xff00000000000000000000000000000000000010,0x473300df21D047806A082244b417f96b32f13A33,0xdfe97868233d1aa22e815a266982f2cf17685a27,0x99C9fc46f92E8a1c0deC1b1747d010903E884bE1,,0xe5965Ab5962eDc7477C8520243A95517CD252fA9
fraxtal,Fraxtal,252,979797,3/11/24,modern,Non-Standard,2/1/24,,,yes,Self-Hosted,https://rpc.frax.com,https://www.frax.com/,,,Fraxtal,fraxtal,,"{0:frxETH,19571245:frax}","{0:frax-ether,19571245:frax-share}",L2,2,fraxtalda,ethereum,0x34a9f273cbD847d49c3De015FC26c3E66825f8b2,fraxtal,,frax,,,frax,0x6017f75108f251a488B045A7ce2a7C15b179d1f2,0xfF000000000000000000000000000000000420fC,0xFb90465f3064fF63FC460F01A6307eC73d64bc50,0x66CC916Ed5C6C2FA97014f7D1cD141528Ae171e4,0x34C0bD5877A5Ee7099D0f5688D65F4bB9158BDE2,,
fraxtal,Fraxtal,252,979797,3/11/24,modern,Non-Standard,2/1/24,,,yes,Self-Hosted,https://rpc.frax.com,https://www.frax.com/,,,Fraxtal,fraxtal,,"{0:""frxETH"",19571245:""FRAX""}","{0:""frax-ether"",19571245:""frax-share""}",L2,2,fraxtalda,ethereum,0x34a9f273cbD847d49c3De015FC26c3E66825f8b2,fraxtal,,frax,,,frax,0x6017f75108f251a488B045A7ce2a7C15b179d1f2,0xfF000000000000000000000000000000000420fC,0xFb90465f3064fF63FC460F01A6307eC73d64bc50,0x66CC916Ed5C6C2FA97014f7D1cD141528Ae171e4,0x34C0bD5877A5Ee7099D0f5688D65F4bB9158BDE2,,
redstone,Redstone,690,f34242,5/1/24,modern,Non-Standard,5/1/24,,,yes,Self-Hosted,https://rpc.redstonechain.com,https://redstone.xyz/,,,Redstone,redstone,redstone,ETH,,L2,2,op-plasma,ethereum,0x8f2428F7189c0d92D1c4a5358903A8c80Ec6a69D,redstone,,redstone,,,,0xA31cb9Bc414601171D4537580f98F66C03aECd43,0xff00000000000000000000000000000000000690,,0xa426A052f657AEEefc298b3B5c35a470e4739d69,0xc473ca7E02af24c129c2eEf51F2aDf0411c1Df69,,
cyber,Cyber,7560,00D82F,5/15/24,modern,Non-Standard,5/15/24,,,yes,AltLayer,https://cyber.alt.technology/,https://cyber.co/,,,Cyber,cyber,,ETH,,L2,2,eigenda,ethereum,0x5D1F4bbaF6D484fA9D5D9705f92dE6063bff6055,cyber,,cyber,,,cyber,0xf0748C52EDC23135d9845CDFB91279Cf61ee14b4,0xfF00000000000000000000000000000000001d88,0xF2987f0A626c8D29dFB2E0A21144ca3026d6F1E1,0xa669A743b065828682eE16109273F5CFeF5e676d,0x12a580c05466eefb2c467C6b115844cDaF55B255,,
kroma,Kroma,255,7fe342,9/6/23,modern,Non-Standard,5/29/24,,,yes,Self-Hosted,https://api.kroma.network/,https://kroma.network/,,,Kroma,kroma,,ETH,,L2,2,ethereum,ethereum,,kroma,,kroma,,,,0x41b8cD6791De4D8f9E0eaF7861aC506822AdcE12,0xfF00000000000000000000000000000000000255,0x,0x180c77aE51a9c505a43A2C7D81f8CE70cacb93A6,0x827962404D7104202C5aaa6b929115C8211d9596,,
Expand Down
24 changes: 22 additions & 2 deletions reference_data/historical_dumps/backfill_market_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,29 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"ERROR:yfinance:Failed to get ticker 'ETH-USD' reason: Expecting value: line 1 column 1 (char 0)\n",
"ERROR:yfinance:ETH-USD: No timezone found, symbol may be delisted\n"
]
},
{
"ename": "AttributeError",
"evalue": "'Index' object has no attribute 'tz_convert'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[4], line 18\u001b[0m\n\u001b[1;32m 14\u001b[0m hist \u001b[38;5;241m=\u001b[39m eth\u001b[38;5;241m.\u001b[39mhistory(period\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# Create a DataFrame with date and average daily price\u001b[39;00m\n\u001b[1;32m 17\u001b[0m df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame({\n\u001b[0;32m---> 18\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtimestamp\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[43mhist\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtz_convert\u001b[49m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mUTC\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;241m.\u001b[39mtz_localize(\u001b[38;5;28;01mNone\u001b[39;00m), \u001b[38;5;66;03m# Convert to UTC then remove timezone info\u001b[39;00m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124meth_usd\u001b[39m\u001b[38;5;124m'\u001b[39m: (hist[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mHigh\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m+\u001b[39m hist[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mLow\u001b[39m\u001b[38;5;124m'\u001b[39m]) \u001b[38;5;241m/\u001b[39m \u001b[38;5;241m2\u001b[39m\n\u001b[1;32m 20\u001b[0m })\n\u001b[1;32m 22\u001b[0m \u001b[38;5;66;03m# Reset index to make Date a column\u001b[39;00m\n\u001b[1;32m 23\u001b[0m df \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mreset_index(drop\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
"\u001b[0;31mAttributeError\u001b[0m: 'Index' object has no attribute 'tz_convert'"
]
}
],
"source": [
"import pandas as pd\n",
"import yfinance as yf\n",
Expand Down
115 changes: 96 additions & 19 deletions src/op_analytics/coreutils/request.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import time
import re
from urllib3.util.retry import Retry

import requests
import stamina
from requests.adapters import HTTPAdapter
from typing import Any
from op_analytics.coreutils.logger import structlog
Expand All @@ -17,6 +17,40 @@
)


def mask_api_key_in_text(text: str) -> str:
"""
Mask API keys in any text (error messages, URLs, etc.).

This function looks for common API key patterns and masks them for secure logging.

Args:
text: Text that may contain API keys

Returns:
Text with API keys masked
"""
# Pattern to find DeFiLlama API keys
defillama_pattern = r"(https://pro-api\.llama\.fi/)([^/\s]+)"

def replace_defillama_match(match):
base_url, api_key = match.groups()
if len(api_key) > 8:
masked_key = api_key[:4] + "*" * (len(api_key) - 8) + api_key[-4:]
else:
masked_key = "*" * len(api_key)
return f"{base_url}{masked_key}"

# Apply DeFiLlama masking
text = re.sub(defillama_pattern, replace_defillama_match, text)

# Add more patterns here for other APIs as needed
# Example for generic API keys in URLs:
# generic_pattern = r"(https?://[^/]+/)([a-zA-Z0-9]{20,})"
# text = re.sub(generic_pattern, replace_generic_match, text)

return text


def new_session() -> requests.Session:
session = requests.Session()
adapter = HTTPAdapter(max_retries=DEFAULT_RETRY_STRATEGY)
Expand All @@ -26,11 +60,6 @@ def new_session() -> requests.Session:
return session


def retry_logger(exc: Exception) -> bool:
log.error(f"retrying exception {exc}")
return True


def get_data(
session: requests.Session,
url: str,
Expand Down Expand Up @@ -62,17 +91,18 @@ def get_data(
timeout=timeout,
)

# Retry on exceptions.
for attempt in stamina.retry_context(
on=retry_logger,
attempts=retry_attempts,
timeout=retries_timeout,
wait_initial=retries_wait_initial,
wait_max=retries_wait_max,
):
with attempt:
if attempt.num > 1:
log.warning(f"retry attempt {url}", attempt=attempt.num)
# Custom retry logic to avoid stamina's API key exposure
last_exception = None
wait_time = retries_wait_initial
total_wait_time = 0

for attempt in range(1, retry_attempts + 1):
try:
if attempt > 1:
# Mask API keys in the URL before logging retry attempts
masked_url = mask_api_key_in_text(url)
log.warning(f"retry attempt {masked_url}", attempt=attempt)

return _get_data(
session=session,
url=url,
Expand All @@ -82,6 +112,49 @@ def get_data(
timeout=timeout,
)

except Exception as e:
last_exception = e

# Mask API keys in the exception message before logging
masked_error = mask_api_key_in_text(str(e))
log.error(f"retrying exception {masked_error}")

# If this is the last attempt, don't wait
if attempt == retry_attempts:
break

# Check if we've exceeded the total timeout
if total_wait_time >= retries_timeout:
log.error(
"retry timeout exceeded",
total_wait_time=total_wait_time,
retries_timeout=retries_timeout,
)
break

# Wait before the next attempt
actual_wait = min(wait_time, retries_wait_max)
if total_wait_time + actual_wait > retries_timeout:
actual_wait = retries_timeout - total_wait_time

if actual_wait > 0:
log.warning(
f"waiting {actual_wait}s before retry", attempt=attempt, wait_time=actual_wait
)
time.sleep(actual_wait)
total_wait_time += actual_wait

# Exponential backoff
wait_time *= 2

# If we get here, all attempts failed
if last_exception:
# Mask API keys in the final exception message
masked_error = mask_api_key_in_text(str(last_exception))
raise type(last_exception)(masked_error) from last_exception
else:
raise Exception("All retry attempts failed")


def _get_data(
session: requests.Session,
Expand All @@ -97,8 +170,12 @@ def _get_data(
resp.raise_for_status()

if resp.status_code != 200:
raise Exception(f"status={resp.status_code}, url={url!r}")
# Mask API keys in the URL before including in exception
masked_url = mask_api_key_in_text(url)
raise Exception(f"status={resp.status_code}, url={masked_url!r}")

if emit_log:
log.info(f"Fetched from {url}: {time.time() - start:.2f} seconds")
# Mask API keys in the URL before logging
masked_url = mask_api_key_in_text(url)
log.info(f"Fetched from {masked_url}: {time.time() - start:.2f} seconds")
return resp.json()
Loading