-
Notifications
You must be signed in to change notification settings - Fork 80
Python Package Version Extraction from URIs in pyproject.toml (PEP508) #1525
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ee7f064
daac7f9
5939e4c
84efbcf
c36f86e
d22a067
3c6d776
8c2c572
009370a
d57cff5
f59e8df
dd759c6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,8 @@ | |
import java.util.Arrays; | ||
import java.util.LinkedList; | ||
import java.util.List; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
public class PythonDependencyTransformer { | ||
|
||
|
@@ -16,6 +18,15 @@ public class PythonDependencyTransformer { | |
private static final List<String> TOKEN_CLEANUP_CHARS = Arrays.asList("\"", "'"); | ||
private static final List<String> TOKEN_IGNORE_AFTER_CHARS = Arrays.asList(",", "[", "==", ">=", "~=", "<=", ">", "<"); | ||
|
||
// Matching version from URI of direct reference like "https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp310-cp310-linux_x86_64.whl" | ||
private static final Pattern URI_VERSION_PATTERN = Pattern.compile(".*/([A-Za-z0-9_.-]+)-([0-9]+(?:\\.[0-9A-Za-z_-]+)*).*\\.(whl|zip|tar\\.gz|tar\\.bz2|tar)$"); | ||
|
||
// Matching version from VCS URL of direct reference like "git+https://github.com/pallets/[email protected]" | ||
private static final Pattern VCS_VERSION_PATTERN = Pattern.compile(".*@([0-9]+(?:\\.[0-9]+)*(?:[A-Za-z0-9._-]*)?).*"); | ||
|
||
// Matching version from archive or release URL of direct reference like "https://github.com/pypa/pip/archive/1.3.1.zip" | ||
private static final Pattern ARCHIVE_VERSION_PATTERN = Pattern.compile(".*/(?:archive|releases)/([0-9]+(?:\\.[0-9]+)+).*\\.(zip|tar\\.gz|tar\\.bz2|tar).*"); | ||
|
||
public List<PythonDependency> transform(File requirementsFile) throws IOException { | ||
|
||
List<PythonDependency> dependencies = new LinkedList<>(); | ||
|
@@ -39,6 +50,22 @@ public PythonDependency transformLine(String line) { | |
return null; | ||
} | ||
|
||
// Case 1: Handle PEP 508 direct references (name @ url) | ||
if (formattedLine.contains("@")) { | ||
String[] parts = formattedLine.split("@", 2); | ||
String dependency = parts[0].trim(); | ||
String uri = parts[1].trim(); | ||
|
||
String version = extractVersionFromUri(uri); | ||
|
||
if (!dependency.isEmpty()) { | ||
return new PythonDependency(dependency, version); | ||
} else { | ||
return null; | ||
} | ||
} | ||
|
||
// Case 2: Normal operator-based dependency (==, >=, etc.) | ||
// Extract tokens before and after the operator that was found in the line | ||
List<List<String>> extractedTokens = extractTokens(formattedLine); | ||
List<String> tokensBeforeOperator = extractedTokens.get(0); | ||
|
@@ -66,6 +93,34 @@ public PythonDependency transformLine(String line) { | |
} | ||
} | ||
|
||
private String extractVersionFromUri(String uri) { | ||
if (uri == null || uri.isEmpty()) { | ||
return ""; | ||
} | ||
|
||
// Case 1: wheel/archive style like "https://download.pytorch.org/whl/cpu/torchvision-0.21.0%2Bcpu-cp310-cp310-linux_x86_64.whl" | ||
Matcher matcher = URI_VERSION_PATTERN.matcher(uri); | ||
if (matcher.find()) { | ||
return matcher.group(2); | ||
} | ||
|
||
// Case 2: VCS reference with @<version/tag> | ||
Matcher vcsMatcher = VCS_VERSION_PATTERN.matcher(uri); | ||
if (vcsMatcher.find()) { | ||
return vcsMatcher.group(1); | ||
} | ||
Comment on lines
+108
to
+111
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The magic number Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
||
|
||
// Case 3: Generic archive URL with version in path (like pip archive) | ||
Matcher archiveMatcher = ARCHIVE_VERSION_PATTERN.matcher(uri); | ||
if (archiveMatcher.find()) { | ||
return archiveMatcher.group(1); | ||
} | ||
Comment on lines
+114
to
+117
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The magic number Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
||
|
||
// Case 4: fallback – no version found | ||
return ""; | ||
} | ||
|
||
|
||
public List<List<String>> extractTokens(String formattedLine) { | ||
// Note: The line is always a valid line to extract from at this point since it has passed all the checks | ||
// Hence it will contain at least the dependency. Version may or may not be present. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
package com.blackduck.integration.detectable.detectables.setuptools.unit; | ||
|
||
import static org.junit.jupiter.api.Assertions.*; | ||
|
||
import java.io.IOException; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
|
||
import org.junit.jupiter.api.Test; | ||
import org.tomlj.Toml; | ||
import org.tomlj.TomlParseResult; | ||
|
||
import com.blackduck.integration.detectable.detectables.setuptools.parse.SetupToolsParsedResult; | ||
import com.blackduck.integration.detectable.detectables.setuptools.parse.SetupToolsTomlParser; | ||
|
||
class PyprojectTomlParserTest { | ||
|
||
@Test | ||
void testParseComplexPyprojectToml() throws IOException { | ||
String tomlContent = "[build-system]\n" + | ||
"requires = [\"setuptools>=61.0\", \"wheel>=0.37.1\"]\n" + | ||
"build-backend = \"setuptools.build_meta\"\n\n" + | ||
"[project]\n" + | ||
"name = \"complex-setuptools-project\"\n" + | ||
"version = \"0.1.0\"\n" + | ||
"description = \"Sample project testing complex PEP 508 dependencies\"\n" + | ||
"authors = [\n" + | ||
" { name = \"Example User\", email = \"[email protected]\" }\n" + | ||
"]\n" + | ||
"readme = \"README.md\"\n" + | ||
"license = { file = \"LICENSE\" }\n" + | ||
"keywords = [\"python\", \"pep508\", \"dependencies\", \"testing\"]\n" + | ||
"classifiers = [\n" + | ||
" \"Programming Language :: Python :: 3\",\n" + | ||
" \"License :: OSI Approved :: MIT License\",\n" + | ||
" \"Operating System :: OS Independent\"\n" + | ||
"]\n\n" + | ||
"dependencies = [\n" + | ||
" \"requests>=2.31.0,<3.0\",\n" + | ||
" \"alembic==1.12.0\",\n" + | ||
" \"beautifulsoup4==4.13.3\",\n" + | ||
" \"six==1.16.0\",\n" + | ||
" \"torch @ https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp310-cp310-linux_x86_64.whl\",\n" + | ||
" \"torchvision @ https://download.pytorch.org/whl/cpu/torchvision-0.21.0%2Bcpu-cp310-cp310-linux_x86_64.whl\",\n" + | ||
" \"flask @ git+https://github.com/pallets/[email protected]\",\n" + | ||
" \"requests[security,socks]==2.31.0\",\n" + | ||
" \"pandas[all]>=2.1.0,<3.0; python_version>'3.8'\"\n" + | ||
"]\n\n" + | ||
"[project.optional-dependencies]\n" + | ||
"dev = [\n" + | ||
" \"pytest>=7.4.0\",\n" + | ||
" \"black==24.3.0\",\n" + | ||
" \"mypy>=1.5.1\"\n" + | ||
"]\n" + | ||
"docs = [\n" + | ||
" \"sphinx>=7.0.0\",\n" + | ||
" \"sphinx-rtd-theme>=1.2.0\"\n" + | ||
"]\n\n" + | ||
"[tool.setuptools]\n" + | ||
"py-modules = [\"main\"]\n\n" + | ||
"[project.scripts]\n" + | ||
"complex-setuptools-project = \"main:main\"\n"; | ||
|
||
Path pyProjectFile = Files.createTempFile("pyproject", ".toml"); | ||
Files.write(pyProjectFile, tomlContent.getBytes()); | ||
|
||
TomlParseResult result = Toml.parse(tomlContent); | ||
|
||
SetupToolsTomlParser tomlParser = new SetupToolsTomlParser(result); | ||
SetupToolsParsedResult parsedResult = tomlParser.parse(); | ||
|
||
// Assertions for project metadata | ||
assertEquals("complex-setuptools-project", parsedResult.getProjectName()); | ||
assertEquals("0.1.0", parsedResult.getProjectVersion()); | ||
|
||
// Assertions for dependencies | ||
assertEquals(9, parsedResult.getDirectDependencies().size()); | ||
assertTrue(parsedResult.getDirectDependencies().stream() | ||
.anyMatch(dep -> dep.getName().equals("requests") && dep.getVersion().equals("2.31.0"))); | ||
assertTrue(parsedResult.getDirectDependencies().stream() | ||
.anyMatch(dep -> dep.getName().equals("torch") && dep.getVersion().equals("2.6.0"))); | ||
|
||
// Assertions for optional dependencies | ||
assertTrue(result.contains("project.optional-dependencies.dev")); | ||
assertTrue(result.contains("project.optional-dependencies.docs")); | ||
|
||
Files.delete(pyProjectFile); // Clean up the temporary file | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
package com.blackduck.integration.detectable.detectables.setuptools.unit; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
|
||
import com.blackduck.integration.detectable.python.util.PythonDependency; | ||
import com.blackduck.integration.detectable.python.util.PythonDependencyTransformer; | ||
import org.junit.jupiter.params.ParameterizedTest; | ||
import org.junit.jupiter.params.provider.MethodSource; | ||
|
||
import java.util.stream.Stream; | ||
|
||
class PythonDependencyTransformerTest { | ||
|
||
private final PythonDependencyTransformer transformer = new PythonDependencyTransformer(); | ||
|
||
static Stream<TestCase> dependencyCases() { | ||
return Stream.of( | ||
new TestCase("alembic==1.12.0", "alembic", "1.12.0"), | ||
new TestCase("darkgraylib>=2.31.0,<3.0", "darkgraylib", "2.31.0"), | ||
new TestCase("requests>=2.4.0,<3.0.dev0", "requests", "2.4.0"), | ||
new TestCase("toml>=0.10.0", "toml", "0.10.0"), | ||
new TestCase("torch @ https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp310-cp310-linux_x86_64.whl", "torch", "2.6.0"), | ||
new TestCase("torchvision @ https://download.pytorch.org/whl/cpu/torchvision-0.21.0%2Bcpu-cp310-cp310-linux_x86_64.whl", "torchvision", "0.21.0"), | ||
new TestCase("pip @ https://github.com/pypa/pip/archive/1.3.1.zip", "pip", "1.3.1"), | ||
new TestCase("flask @ git+https://github.com/pallets/[email protected]", "flask", "2.3.3") | ||
); | ||
} | ||
|
||
@ParameterizedTest | ||
@MethodSource("dependencyCases") | ||
void testTransformLine(TestCase testCase) { | ||
PythonDependency dependency = transformer.transformLine(testCase.line); | ||
assertEquals(testCase.expectedName, dependency.getName()); | ||
assertEquals(testCase.expectedVersion, dependency.getVersion()); | ||
} | ||
|
||
static class TestCase { | ||
final String line; | ||
final String expectedName; | ||
final String expectedVersion; | ||
|
||
TestCase(String line, String expectedName, String expectedVersion) { | ||
this.line = line; | ||
this.expectedName = expectedName; | ||
this.expectedVersion = expectedVersion; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return String.format("line='%s', expectedName=%s, expectedVersion=%s", line, expectedName, expectedVersion); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The magic number
2
refers to the second capture group. Consider using a named constant likeVERSION_GROUP_INDEX = 2
to make the code more self-documenting.Copilot uses AI. Check for mistakes.