Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class PythonDependencyTransformer {

Expand All @@ -16,6 +18,15 @@ public class PythonDependencyTransformer {
private static final List<String> TOKEN_CLEANUP_CHARS = Arrays.asList("\"", "'");
private static final List<String> TOKEN_IGNORE_AFTER_CHARS = Arrays.asList(",", "[", "==", ">=", "~=", "<=", ">", "<");

// Matching version from URI of direct reference like "https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp310-cp310-linux_x86_64.whl"
private static final Pattern URI_VERSION_PATTERN = Pattern.compile(".*/([A-Za-z0-9_.-]+)-([0-9]+(?:\\.[0-9A-Za-z_-]+)*).*\\.(whl|zip|tar\\.gz|tar\\.bz2|tar)$");

// Matching version from VCS URL of direct reference like "git+https://github.com/pallets/[email protected]"
private static final Pattern VCS_VERSION_PATTERN = Pattern.compile(".*@([0-9]+(?:\\.[0-9]+)*(?:[A-Za-z0-9._-]*)?).*");

// Matching version from archive or release URL of direct reference like "https://github.com/pypa/pip/archive/1.3.1.zip"
private static final Pattern ARCHIVE_VERSION_PATTERN = Pattern.compile(".*/(?:archive|releases)/([0-9]+(?:\\.[0-9]+)+).*\\.(zip|tar\\.gz|tar\\.bz2|tar).*");

public List<PythonDependency> transform(File requirementsFile) throws IOException {

List<PythonDependency> dependencies = new LinkedList<>();
Expand All @@ -39,6 +50,22 @@ public PythonDependency transformLine(String line) {
return null;
}

// Case 1: Handle PEP 508 direct references (name @ url)
if (formattedLine.contains("@")) {
String[] parts = formattedLine.split("@", 2);
String dependency = parts[0].trim();
String uri = parts[1].trim();

String version = extractVersionFromUri(uri);

if (!dependency.isEmpty()) {
return new PythonDependency(dependency, version);
} else {
return null;
}
}

// Case 2: Normal operator-based dependency (==, >=, etc.)
// Extract tokens before and after the operator that was found in the line
List<List<String>> extractedTokens = extractTokens(formattedLine);
List<String> tokensBeforeOperator = extractedTokens.get(0);
Expand Down Expand Up @@ -66,6 +93,34 @@ public PythonDependency transformLine(String line) {
}
}

private String extractVersionFromUri(String uri) {
if (uri == null || uri.isEmpty()) {
return "";
}

// Case 1: wheel/archive style like "https://download.pytorch.org/whl/cpu/torchvision-0.21.0%2Bcpu-cp310-cp310-linux_x86_64.whl"
Matcher matcher = URI_VERSION_PATTERN.matcher(uri);
if (matcher.find()) {
return matcher.group(2);
}
Comment on lines +102 to +105
Copy link
Preview

Copilot AI Sep 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The magic number 2 refers to the second capture group. Consider using a named constant like VERSION_GROUP_INDEX = 2 to make the code more self-documenting.

Copilot uses AI. Check for mistakes.


// Case 2: VCS reference with @<version/tag>
Matcher vcsMatcher = VCS_VERSION_PATTERN.matcher(uri);
if (vcsMatcher.find()) {
return vcsMatcher.group(1);
}
Comment on lines +108 to +111
Copy link
Preview

Copilot AI Sep 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The magic number 1 refers to the first capture group. Consider using a named constant like VCS_VERSION_GROUP_INDEX = 1 to make the code more self-documenting.

Copilot uses AI. Check for mistakes.


// Case 3: Generic archive URL with version in path (like pip archive)
Matcher archiveMatcher = ARCHIVE_VERSION_PATTERN.matcher(uri);
if (archiveMatcher.find()) {
return archiveMatcher.group(1);
}
Comment on lines +114 to +117
Copy link
Preview

Copilot AI Sep 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The magic number 1 refers to the first capture group. Consider using a named constant like ARCHIVE_VERSION_GROUP_INDEX = 1 to make the code more self-documenting.

Copilot uses AI. Check for mistakes.


// Case 4: fallback – no version found
return "";
}


public List<List<String>> extractTokens(String formattedLine) {
// Note: The line is always a valid line to extract from at this point since it has passed all the checks
// Hence it will contain at least the dependency. Version may or may not be present.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package com.blackduck.integration.detectable.detectables.setuptools.unit;

import static org.junit.jupiter.api.Assertions.*;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;

import org.junit.jupiter.api.Test;
import org.tomlj.Toml;
import org.tomlj.TomlParseResult;

import com.blackduck.integration.detectable.detectables.setuptools.parse.SetupToolsParsedResult;
import com.blackduck.integration.detectable.detectables.setuptools.parse.SetupToolsTomlParser;

class PyprojectTomlParserTest {

@Test
void testParseComplexPyprojectToml() throws IOException {
String tomlContent = "[build-system]\n" +
"requires = [\"setuptools>=61.0\", \"wheel>=0.37.1\"]\n" +
"build-backend = \"setuptools.build_meta\"\n\n" +
"[project]\n" +
"name = \"complex-setuptools-project\"\n" +
"version = \"0.1.0\"\n" +
"description = \"Sample project testing complex PEP 508 dependencies\"\n" +
"authors = [\n" +
" { name = \"Example User\", email = \"[email protected]\" }\n" +
"]\n" +
"readme = \"README.md\"\n" +
"license = { file = \"LICENSE\" }\n" +
"keywords = [\"python\", \"pep508\", \"dependencies\", \"testing\"]\n" +
"classifiers = [\n" +
" \"Programming Language :: Python :: 3\",\n" +
" \"License :: OSI Approved :: MIT License\",\n" +
" \"Operating System :: OS Independent\"\n" +
"]\n\n" +
"dependencies = [\n" +
" \"requests>=2.31.0,<3.0\",\n" +
" \"alembic==1.12.0\",\n" +
" \"beautifulsoup4==4.13.3\",\n" +
" \"six==1.16.0\",\n" +
" \"torch @ https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp310-cp310-linux_x86_64.whl\",\n" +
" \"torchvision @ https://download.pytorch.org/whl/cpu/torchvision-0.21.0%2Bcpu-cp310-cp310-linux_x86_64.whl\",\n" +
" \"flask @ git+https://github.com/pallets/[email protected]\",\n" +
" \"requests[security,socks]==2.31.0\",\n" +
" \"pandas[all]>=2.1.0,<3.0; python_version>'3.8'\"\n" +
"]\n\n" +
"[project.optional-dependencies]\n" +
"dev = [\n" +
" \"pytest>=7.4.0\",\n" +
" \"black==24.3.0\",\n" +
" \"mypy>=1.5.1\"\n" +
"]\n" +
"docs = [\n" +
" \"sphinx>=7.0.0\",\n" +
" \"sphinx-rtd-theme>=1.2.0\"\n" +
"]\n\n" +
"[tool.setuptools]\n" +
"py-modules = [\"main\"]\n\n" +
"[project.scripts]\n" +
"complex-setuptools-project = \"main:main\"\n";

Path pyProjectFile = Files.createTempFile("pyproject", ".toml");
Files.write(pyProjectFile, tomlContent.getBytes());

TomlParseResult result = Toml.parse(tomlContent);

SetupToolsTomlParser tomlParser = new SetupToolsTomlParser(result);
SetupToolsParsedResult parsedResult = tomlParser.parse();

// Assertions for project metadata
assertEquals("complex-setuptools-project", parsedResult.getProjectName());
assertEquals("0.1.0", parsedResult.getProjectVersion());

// Assertions for dependencies
assertEquals(9, parsedResult.getDirectDependencies().size());
assertTrue(parsedResult.getDirectDependencies().stream()
.anyMatch(dep -> dep.getName().equals("requests") && dep.getVersion().equals("2.31.0")));
assertTrue(parsedResult.getDirectDependencies().stream()
.anyMatch(dep -> dep.getName().equals("torch") && dep.getVersion().equals("2.6.0")));

// Assertions for optional dependencies
assertTrue(result.contains("project.optional-dependencies.dev"));
assertTrue(result.contains("project.optional-dependencies.docs"));

Files.delete(pyProjectFile); // Clean up the temporary file
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package com.blackduck.integration.detectable.detectables.setuptools.unit;

import static org.junit.jupiter.api.Assertions.assertEquals;

import com.blackduck.integration.detectable.python.util.PythonDependency;
import com.blackduck.integration.detectable.python.util.PythonDependencyTransformer;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;

import java.util.stream.Stream;

class PythonDependencyTransformerTest {

private final PythonDependencyTransformer transformer = new PythonDependencyTransformer();

static Stream<TestCase> dependencyCases() {
return Stream.of(
new TestCase("alembic==1.12.0", "alembic", "1.12.0"),
new TestCase("darkgraylib>=2.31.0,<3.0", "darkgraylib", "2.31.0"),
new TestCase("requests>=2.4.0,<3.0.dev0", "requests", "2.4.0"),
new TestCase("toml>=0.10.0", "toml", "0.10.0"),
new TestCase("torch @ https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp310-cp310-linux_x86_64.whl", "torch", "2.6.0"),
new TestCase("torchvision @ https://download.pytorch.org/whl/cpu/torchvision-0.21.0%2Bcpu-cp310-cp310-linux_x86_64.whl", "torchvision", "0.21.0"),
new TestCase("pip @ https://github.com/pypa/pip/archive/1.3.1.zip", "pip", "1.3.1"),
new TestCase("flask @ git+https://github.com/pallets/[email protected]", "flask", "2.3.3")
);
}

@ParameterizedTest
@MethodSource("dependencyCases")
void testTransformLine(TestCase testCase) {
PythonDependency dependency = transformer.transformLine(testCase.line);
assertEquals(testCase.expectedName, dependency.getName());
assertEquals(testCase.expectedVersion, dependency.getVersion());
}

static class TestCase {
final String line;
final String expectedName;
final String expectedVersion;

TestCase(String line, String expectedName, String expectedVersion) {
this.line = line;
this.expectedName = expectedName;
this.expectedVersion = expectedVersion;
}

@Override
public String toString() {
return String.format("line='%s', expectedName=%s, expectedVersion=%s", line, expectedName, expectedVersion);
}
}
}
2 changes: 1 addition & 1 deletion documentation/src/main/markdown/currentreleasenotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
* (IDETECT-4799) When constructing the BDIO, ignore the Go toolchain directive, as it is the Go project's build-time configuration setting and not a module dependency.
* (IDETECT-4813) Fix Gradle Native Inspector to correctly identify projects with only settings.gradle or settings.gradle.kts file in the root directory.
* (IDETECT-4812) Gradle Native Inspector now supports configuration cache (refactored init-detect.gradle to add support for configuration cache in Gradle projects).

* (IDETECT-4845) With added support for extracting Python package versions from direct references [PEP 508 URIs](https://packaging.python.org/en/latest/specifications/dependency-specifiers/#environment-markers) in `pyproject.toml` files, [detect_product_short] now correctly parses versions from wheel and archive URLs and VCS references for impacted detectors (Setuptools Pip Detector, Setuptools Detector, and UV Lock Detector). When data is missing or badly formatted, detectors gracefully switch back to reporting only the package name.

### Dependency updates

Expand Down