Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
73 commits
Select commit Hold shift + click to select a range
f714c7f
wip
lucia-sb Jun 18, 2025
391a360
Merge branch 'master' into lucia/improve-package-size-analyzer-accuracy
lucia-sb Jun 24, 2025
71f8c84
Merge branch 'master' into lucia/improve-package-size-analyzer-accuracy
lucia-sb Jul 1, 2025
38bc7c0
remove changes
lucia-sb Jul 1, 2025
5e442f0
remove tests
lucia-sb Jul 1, 2025
5b4db44
remove funcions
lucia-sb Jul 2, 2025
fdb7e1e
patch
lucia-sb Jul 2, 2025
0672b2e
remove imports for patch
lucia-sb Jul 2, 2025
2c63f2c
patch
lucia-sb Jul 2, 2025
9cd4abe
print in patch
lucia-sb Jul 3, 2025
fbad066
fix typo
lucia-sb Jul 3, 2025
e11957c
repack wheels
lucia-sb Jul 3, 2025
cd33605
fix
lucia-sb Jul 3, 2025
3681ff6
add wheel library
lucia-sb Jul 3, 2025
ecc2fd9
comment patch
lucia-sb Jul 3, 2025
34136f9
after repair
lucia-sb Jul 3, 2025
b6e8713
patch
lucia-sb Jul 4, 2025
40dfd38
uncomment patch call
lucia-sb Jul 4, 2025
989dcd2
skip unchanged
lucia-sb Jul 8, 2025
8508bb2
skip unchanged
lucia-sb Jul 8, 2025
c8cdc85
skip unchanged
lucia-sb Jul 8, 2025
623c0b4
debug
lucia-sb Jul 8, 2025
343a13a
debug
lucia-sb Jul 9, 2025
925ed4b
debug
lucia-sb Jul 9, 2025
a711b96
after repair
lucia-sb Jul 9, 2025
e8891c4
after repair
lucia-sb Jul 9, 2025
4e9bdc8
fix
lucia-sb Jul 9, 2025
fd2b507
classify wheels
lucia-sb Jul 11, 2025
7b9041f
fix
lucia-sb Jul 11, 2025
3ae7429
Merge branch 'master' into lucia/improve-package-size-analyzer-accuracy
lucia-sb Jul 14, 2025
242dc24
classify wheels
lucia-sb Jul 14, 2025
495b81c
Merge remote-tracking branch 'refs/remotes/origin/lucia/improve-packa…
lucia-sb Jul 14, 2025
a395b57
remove import
lucia-sb Jul 14, 2025
3d268c2
patch
lucia-sb Jul 14, 2025
2649bfa
patch fix
lucia-sb Jul 14, 2025
bb898e0
fix
lucia-sb Jul 14, 2025
2cd2e51
Merge branch 'master' into lucia/improve-package-size-analyzer-accuracy
lucia-sb Jul 15, 2025
f53a55c
remove quotes
lucia-sb Jul 18, 2025
ec9ca1b
Merge branch 'lucia/improve-package-size-analyzer-accuracy' of github…
lucia-sb Jul 18, 2025
7e1fb3b
replace quote
lucia-sb Jul 18, 2025
fb3cc71
fix quotes in patch
lucia-sb Jul 23, 2025
e626c0c
remove patch
lucia-sb Jul 23, 2025
9461354
remove patch
lucia-sb Jul 23, 2025
a7303bd
simplify utils and .toml
lucia-sb Jul 23, 2025
667e53c
toml
lucia-sb Jul 23, 2025
46c9bfb
toml
lucia-sb Jul 23, 2025
32abda2
toml
lucia-sb Jul 23, 2025
57dc620
typo
lucia-sb Aug 19, 2025
ef51ad4
change toml format to gitignore patterns
lucia-sb Aug 21, 2025
5aa5eb4
publish wheels for testing
lucia-sb Aug 28, 2025
51f0a9b
Merge branch 'master' into lucia/improve-package-size-analyzer-accuracy
lucia-sb Aug 28, 2025
df287e2
test
lucia-sb Aug 28, 2025
cb03b6a
rename wheels
lucia-sb Aug 29, 2025
76db4bf
uncomment
lucia-sb Sep 3, 2025
7efbd39
Merge branch 'master' into lucia/improve-package-size-analyzer-accuracy
lucia-sb Sep 10, 2025
221352f
remove built extra index
lucia-sb Sep 10, 2025
9579f04
remove built flag
lucia-sb Sep 10, 2025
bbdfb50
Merge branch 'master' into lucia/improve-package-size-analyzer-accuracy
lucia-sb Sep 29, 2025
bd853e1
upload wheels
lucia-sb Sep 29, 2025
7f212fe
Uncomment condition to publish wheels
lucia-sb Oct 31, 2025
301658e
Merge branch 'master' into lucia/improve-package-size-analyzer-accuracy
lucia-sb Oct 31, 2025
ca20180
Use PEP 517
lucia-sb Oct 31, 2025
ba09017
Add debugging logs
lucia-sb Oct 31, 2025
b74f756
Add debugging logs
lucia-sb Oct 31, 2025
804bc7b
Change parameter format
lucia-sb Oct 31, 2025
661846c
Add pyproject
lucia-sb Oct 31, 2025
e3ff87a
remove flag
lucia-sb Oct 31, 2025
fd23319
Change flag
lucia-sb Oct 31, 2025
5bf7360
Fix typo
lucia-sb Oct 31, 2025
6c49f37
Add debug logs
lucia-sb Oct 31, 2025
f2f00af
remove pyproject
lucia-sb Oct 31, 2025
b3d36c5
Change flag
lucia-sb Oct 31, 2025
a8f32c2
Remove flag
lucia-sb Oct 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .builders/images/runner_dependencies.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ urllib3==2.2.0
auditwheel==6.0.0; sys_platform == 'linux'
delvewheel==1.5.2; sys_platform == 'win32'
delocate==0.13.0; sys_platform == 'darwin'
wheel==0.45.1
pathspec==0.12.1
96 changes: 96 additions & 0 deletions .builders/scripts/build_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import inspect
import shutil
import sys
import tomllib
import zipfile
from functools import cache
from pathlib import Path

import pathspec
from setuptools import build_meta as _orig


def remove_test_files(wheel_path: Path) -> None:
"""
Remove excluded files and directories from a built wheel.
Prints the number of files removed.
"""
tmp_wheel = wheel_path.with_suffix(".tmp.whl")
removed_count = 0

with (
zipfile.ZipFile(wheel_path, "r") as zin,
zipfile.ZipFile(tmp_wheel, "w", compression=zipfile.ZIP_DEFLATED) as zout,
):
for info in zin.infolist():
rel = info.filename
if is_excluded_from_wheel(rel):
removed_count += 1
continue # skip excluded file or directory

data = zin.read(rel)
zout.writestr(info, data)

shutil.move(tmp_wheel, wheel_path)
print(f"Removed {removed_count} files from {wheel_path.name}")


def is_excluded_from_wheel(path: str | Path) -> bool:
"""
Return True if `path` (file or directory) should be excluded per files_to_remove.toml.
Matches:
- type annotation files: **/*.pyi, **/py.typed
- test directories listed with a trailing '/'
"""
spec = _load_excluded_spec()
rel = Path(path).as_posix()

if spec.match_file(rel) or spec.match_file(rel + "/"):
return True

return False


@cache
def _load_excluded_spec() -> pathspec.PathSpec:
"""
Load excluded paths from files_to_remove.toml and compile them
with .gitignore-style semantics.
"""
config_path = Path(__file__).parent / "files_to_remove.toml"
with open(config_path, "rb") as f:
config = tomllib.load(f)

patterns = config.get("excluded_paths", [])
return pathspec.PathSpec.from_lines("gitignore", patterns)


def build_wheel(wheel_directory, config_settings=None, metadata_directory=None):
"""Intercept wheel building to strip test files."""
wheel_file = _orig.build_wheel(wheel_directory, config_settings, metadata_directory)

# Post-process the wheel to remove tests
wheel_path = Path(wheel_directory) / wheel_file
remove_test_files(wheel_path)

return wheel_file


# Proxy all other PEP 517 hooks
# prepare_metadata_for_build_wheel = _orig.prepare_metadata_for_build_wheel
# build_sdist = _orig.build_sdist
# (better do by iterating over _orig methods instead)
print("-> Inspecting _orig methods")
for name, func in inspect.getmembers(_orig, inspect.isfunction):
# Only copy methods if they haven't been defined in the current module
# (i.e., don't overwrite your custom build_wheel)
print("Name: ", name, "Func: ", func, "Is in globals: ", name in globals())
if name not in globals():
globals()[name] = func
print("Added to globals: ", name)

# for name in dir(_orig):
# # Check if the attribute name is a PEP 517 hook and not one we defined/overrode
# if name.startswith('build_') or 'requires_for' in name:
# if name not in globals():
# setattr(sys.modules[__name__], name, getattr(_orig, name))
129 changes: 117 additions & 12 deletions .builders/scripts/build_wheels.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,38 @@
from __future__ import annotations

import argparse
import email
import json
import os
import re
import shutil
import subprocess
import sys
import time
import tomllib
from functools import cache
from hashlib import sha256
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import TypedDict
from zipfile import ZipFile

import pathspec
import urllib3
from dotenv import dotenv_values
from utils import extract_metadata, normalize_project_name
from utils import iter_wheels

INDEX_BASE_URL = 'https://agent-int-packages.datadoghq.com'
CUSTOM_EXTERNAL_INDEX = f'{INDEX_BASE_URL}/external'
CUSTOM_BUILT_INDEX = f'{INDEX_BASE_URL}/built'
UNNORMALIZED_PROJECT_NAME_CHARS = re.compile(r'[-_.]+')


class WheelSizes(TypedDict):
compressed: int
uncompressed: int


if sys.platform == 'win32':
PY3_PATH = Path('C:\\py3\\Scripts\\python.exe')
PY2_PATH = Path('C:\\py2\\Scripts\\python.exe')
Expand Down Expand Up @@ -62,6 +74,83 @@ def check_process(*args, **kwargs) -> subprocess.CompletedProcess:
return process


def extract_metadata(wheel: Path) -> email.Message:
with ZipFile(str(wheel)) as zip_archive:
for path in zip_archive.namelist():
root = path.split('/', 1)[0]
if root.endswith('.dist-info'):
dist_info_dir = root
break
else:
message = f'Could not find the `.dist-info` directory in wheel: {wheel.name}'
raise RuntimeError(message)

try:
with zip_archive.open(f'{dist_info_dir}/METADATA') as zip_file:
metadata_file_contents = zip_file.read().decode('utf-8')
except KeyError:
message = f'Could not find a `METADATA` file in the `{dist_info_dir}` directory'
raise RuntimeError(message) from None

return email.message_from_string(metadata_file_contents)


def normalize_project_name(name: str) -> str:
# https://peps.python.org/pep-0503/#normalized-names
return UNNORMALIZED_PROJECT_NAME_CHARS.sub('-', name).lower()


@cache
def get_wheel_hashes(project) -> dict[str, str]:
retry_wait = 2
while True:
try:
response = urllib3.request(
'GET',
f'https://pypi.org/simple/{project}',
headers={"Accept": "application/vnd.pypi.simple.v1+json"},
)
except urllib3.exceptions.HTTPError as e:
err_msg = f'Failed to fetch hashes for `{project}`: {e}'
else:
if response.status == 200:
break

err_msg = f'Failed to fetch hashes for `{project}`, status code: {response.status}'

print(err_msg)
print(f'Retrying in {retry_wait} seconds')
time.sleep(retry_wait)
retry_wait *= 2
continue

data = response.json()
return {
file['filename']: file['hashes']['sha256']
for file in data['files']
if file['filename'].endswith('.whl') and 'sha256' in file['hashes']
}


def wheel_was_built(wheel: Path) -> bool:
project_metadata = extract_metadata(wheel)
project_name = normalize_project_name(project_metadata['Name'])
wheel_hashes = get_wheel_hashes(project_name)
if wheel.name not in wheel_hashes:
return True

file_hash = sha256(wheel.read_bytes()).hexdigest()
return file_hash != wheel_hashes[wheel.name]


def add_dependency(dependencies: dict[str, str], sizes: dict[str, WheelSizes], wheel: Path) -> None:
project_metadata = extract_metadata(wheel)
project_name = normalize_project_name(project_metadata['Name'])
project_version = project_metadata['Version']
dependencies[project_name] = project_version
sizes[project_name] = {'version': project_version, **calculate_wheel_sizes(wheel)}


def calculate_wheel_sizes(wheel_path: Path) -> WheelSizes:
compressed_size = wheel_path.stat(follow_symlinks=True).st_size
with ZipFile(wheel_path) as zf:
Expand Down Expand Up @@ -92,6 +181,13 @@ def main():

with TemporaryDirectory() as d:
staged_wheel_dir = Path(d).resolve()
staged_built_wheels_dir = staged_wheel_dir / 'built'
staged_external_wheels_dir = staged_wheel_dir / 'external'

# Create the directories
staged_built_wheels_dir.mkdir(parents=True, exist_ok=True)
staged_external_wheels_dir.mkdir(parents=True, exist_ok=True)

env_vars = dict(os.environ)
env_vars['PATH'] = f'{python_path.parent}{os.pathsep}{env_vars["PATH"]}'
env_vars['PIP_WHEEL_DIR'] = str(staged_wheel_dir)
Expand Down Expand Up @@ -121,35 +217,46 @@ def main():
if constraints_file := env_vars.get('PIP_CONSTRAINT'):
env_vars['PIP_CONSTRAINT'] = path_to_uri(constraints_file)

print("--------------------------------")
print("Building wheels")
print("--------------------------------")
# Fetch or build wheels
command_args = [
str(python_path),
'-m',
'pip',
'wheel',
'--config-settings',
f'--build-backend={MOUNT_DIR / "scripts" / "build_backend.py"}',
'-r',
str(MOUNT_DIR / 'requirements.in'),
'--wheel-dir',
str(staged_wheel_dir),
# Temporarily removing extra index urls. See below.
# '--extra-index-url', CUSTOM_EXTERNAL_INDEX,
# '--extra-index-url',
# CUSTOM_EXTERNAL_INDEX,
]
# Temporarily disable extra index urls. There are broken wheels in the gcloud bucket
# while working on removing tests from them. Adding extra indices causes undefined behavior
# and can pull a broken image, preventing the building from running.
# if args.use_built_index:
# command_args.extend(['--extra-index-url', CUSTOM_BUILT_INDEX])

check_process(command_args, env=env_vars)
print("--------------------------------")
print("Finished building wheels")
print("--------------------------------")
# Classify wheels
for wheel in iter_wheels(staged_wheel_dir):
if wheel_was_built(wheel):
shutil.move(wheel, staged_built_wheels_dir)
else:
shutil.move(wheel, staged_external_wheels_dir)

# Repair wheels
check_process(
[
sys.executable,
'-u',
str(MOUNT_DIR / 'scripts' / 'repair_wheels.py'),
'--source-dir',
str(staged_wheel_dir),
'--source-built-dir',
str(staged_built_wheels_dir),
'--source-external-dir',
str(staged_external_wheels_dir),
'--built-dir',
str(built_wheels_dir),
'--external-dir',
Expand All @@ -166,8 +273,6 @@ def main():
project_name = normalize_project_name(project_metadata['Name'])
project_version = project_metadata['Version']
dependencies[project_name] = project_version


sizes[project_name] = {'version': project_version, **calculate_wheel_sizes(wheel)}

output_path = MOUNT_DIR / 'sizes.json'
Expand Down
45 changes: 45 additions & 0 deletions .builders/scripts/files_to_remove.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
excluded_paths = [
# --- Type annotation ---
"krb5/**/*.pyi",
"krb5/**/py.typed",

"Cryptodome/**/*.pyi",
"Cryptodome/**/py.typed",

"ddtrace/**/*.pyi",
"ddtrace/**/py.typed",

"pyVmomi/**/*.pyi",
"pyVmomi/**/py.typed",

"gssapi/**/*.pyi",
"gssapi/**/py.typed",

# --- Tests ---

"idlelib/idle_test/",
"bs4/tests/",
"Cryptodome/SelfTest/",
"gssapi/tests/",
"keystoneauth1/tests/",
"lazy_loader/tests/",
"openstack/tests/",
"os_service_types/tests/",
"pbr/tests/",
"pkg_resources/tests/",
"pip/_vendor/colorama/tests/",
"psutil/tests/",
"requests_unixsocket/tests/",
"securesystemslib/_vendor/ed25519/test_data/",
"setuptools/_distutils/compilers/C/tests/",
"setuptools/_vendor/packaging/tests/",
"setuptools/_distutils/tests/",
"setuptools/tests/",
"simplejson/tests/",
"stevedore/tests/",
"supervisor/tests/",
"/test/",
"vertica_python/tests/",
"websocket/tests/",
"win32com/test/",
]
Loading
Loading