Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,18 @@
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class PythonDependencyTransformer {

private static final List<String> OPERATORS_IN_PRIORITY_ORDER = Arrays.asList("==", ">=", "~=", "<=", ">", "<");
private static final List<String> IGNORE_AFTER_CHARS = Arrays.asList("#", ";");
private static final List<String> TOKEN_CLEANUP_CHARS = Arrays.asList("\"", "'");
private static final List<String> TOKEN_IGNORE_AFTER_CHARS = Arrays.asList(",", "[", "==", ">=", "~=", "<=", ">", "<");
private static final Pattern URI_VERSION_PATTERN = Pattern.compile(".*/([A-Za-z0-9_.-]+)-([0-9]+(?:\\.[0-9A-Za-z_-]+)*).*\\.(whl|zip|tar\\.gz|tar\\.bz2|tar)$");
private static final Pattern VCS_VERSION_PATTERN = Pattern.compile(".*@([0-9]+(?:\\.[0-9]+)*(?:[A-Za-z0-9._-]*)?).*");
Copy link
Preview

Copilot AI Sep 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These regex patterns are complex and lack documentation. Consider adding inline comments explaining what each pattern matches and providing examples of URLs they're designed to parse.

Suggested change
private static final List<String> TOKEN_IGNORE_AFTER_CHARS = Arrays.asList(",", "[", "==", ">=", "~=", "<=", ">", "<");
private static final Pattern URI_VERSION_PATTERN = Pattern.compile(".*/([A-Za-z0-9_.-]+)-([0-9]+(?:\\.[0-9A-Za-z_-]+)*).*\\.(whl|zip|tar\\.gz|tar\\.bz2|tar)$");
private static final Pattern VCS_VERSION_PATTERN = Pattern.compile(".*@([0-9]+(?:\\.[0-9]+)*(?:[A-Za-z0-9._-]*)?).*");
private static final List<String> TOKEN_IGNORE_AFTER_CHARS = Arrays.asList(",", "[", "==", ">=", "~=", "<=", ">", "<");
// Matches package filenames in URIs, extracting the package name and version.
// Example: https://files.pythonhosted.org/packages/.../requests-2.25.1-py2.py3-none-any.whl
// Captures: "requests" as name, "2.25.1" as version
private static final Pattern URI_VERSION_PATTERN = Pattern.compile(".*/([A-Za-z0-9_.-]+)-([0-9]+(?:\\.[0-9A-Za-z_-]+)*).*\\.(whl|zip|tar\\.gz|tar\\.bz2|tar)$");
// Matches VCS (Version Control System) URIs with an @version suffix.
// Example: git+https://github.com/psf/[email protected]
// Captures: "v2.25.1" as version
private static final Pattern VCS_VERSION_PATTERN = Pattern.compile(".*@([0-9]+(?:\\.[0-9]+)*(?:[A-Za-z0-9._-]*)?).*");
// Matches archive or release URLs, extracting the version from the path.
// Example: https://github.com/psf/requests/archive/2.25.1.zip
// Captures: "2.25.1" as version

Copilot uses AI. Check for mistakes.

private static final Pattern ARCHIVE_VERSION_PATTERN = Pattern.compile(".*/(?:archive|releases)/([0-9]+(?:\\.[0-9]+)+).*\\.(zip|tar\\.gz|tar\\.bz2|tar).*");

public List<PythonDependency> transform(File requirementsFile) throws IOException {

Expand All @@ -39,6 +44,22 @@ public PythonDependency transformLine(String line) {
return null;
}

// Case 1: Handle PEP 508 direct references (name @ url)
if (formattedLine.contains("@")) {
String[] parts = formattedLine.split("@", 2);
String dependency = parts[0].trim();
String uri = parts[1].trim();

String version = extractVersionFromUri(uri);

if (!dependency.isEmpty()) {
return new PythonDependency(dependency, version);
} else {
return null;
}
}

// Case 2: Normal operator-based dependency (==, >=, etc.)
// Extract tokens before and after the operator that was found in the line
List<List<String>> extractedTokens = extractTokens(formattedLine);
List<String> tokensBeforeOperator = extractedTokens.get(0);
Expand Down Expand Up @@ -66,6 +87,34 @@ public PythonDependency transformLine(String line) {
}
}

private String extractVersionFromUri(String uri) {
if (uri == null || uri.isEmpty()) {
return "";
}

// Case 1: wheel/archive style
Matcher matcher = URI_VERSION_PATTERN.matcher(uri);
if (matcher.find()) {
return matcher.group(2);
}
Comment on lines +102 to +105
Copy link
Preview

Copilot AI Sep 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The magic number 2 refers to the second capture group. Consider using a named constant like VERSION_GROUP_INDEX = 2 to make the code more self-documenting.

Copilot uses AI. Check for mistakes.


// Case 2: VCS reference with @<version/tag>
Matcher vcsMatcher = VCS_VERSION_PATTERN.matcher(uri);
if (vcsMatcher.find()) {
return vcsMatcher.group(1);
}
Comment on lines +108 to +111
Copy link
Preview

Copilot AI Sep 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The magic number 1 refers to the first capture group. Consider using a named constant like VCS_VERSION_GROUP_INDEX = 1 to make the code more self-documenting.

Copilot uses AI. Check for mistakes.


// Case 3: Generic archive URL with version in path (like pip archive)
Matcher archiveMatcher = ARCHIVE_VERSION_PATTERN.matcher(uri);
if (archiveMatcher.find()) {
return archiveMatcher.group(1);
}
Comment on lines +114 to +117
Copy link
Preview

Copilot AI Sep 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The magic number 1 refers to the first capture group. Consider using a named constant like ARCHIVE_VERSION_GROUP_INDEX = 1 to make the code more self-documenting.

Copilot uses AI. Check for mistakes.


// Case 4: fallback – no version found
return "";
}


public List<List<String>> extractTokens(String formattedLine) {
// Note: The line is always a valid line to extract from at this point since it has passed all the checks
// Hence it will contain at least the dependency. Version may or may not be present.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package com.blackduck.integration.detectable.detectables.setuptools.unit;

import static org.junit.jupiter.api.Assertions.*;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;

import org.junit.jupiter.api.Test;
import org.tomlj.Toml;
import org.tomlj.TomlParseResult;

import com.blackduck.integration.detectable.detectables.setuptools.parse.SetupToolsParsedResult;
import com.blackduck.integration.detectable.detectables.setuptools.parse.SetupToolsTomlParser;

class PyprojectTomlParserTest {

@Test
void testParseComplexPyprojectToml() throws IOException {
String tomlContent = "[build-system]\n" +
"requires = [\"setuptools>=61.0\", \"wheel>=0.37.1\"]\n" +
"build-backend = \"setuptools.build_meta\"\n\n" +
"[project]\n" +
"name = \"complex-setuptools-project\"\n" +
"version = \"0.1.0\"\n" +
"description = \"Sample project testing complex PEP 508 dependencies\"\n" +
"authors = [\n" +
" { name = \"Example User\", email = \"[email protected]\" }\n" +
"]\n" +
"readme = \"README.md\"\n" +
"license = { file = \"LICENSE\" }\n" +
"keywords = [\"python\", \"pep508\", \"dependencies\", \"testing\"]\n" +
"classifiers = [\n" +
" \"Programming Language :: Python :: 3\",\n" +
" \"License :: OSI Approved :: MIT License\",\n" +
" \"Operating System :: OS Independent\"\n" +
"]\n\n" +
"dependencies = [\n" +
" \"requests>=2.31.0,<3.0\",\n" +
" \"alembic==1.12.0\",\n" +
" \"beautifulsoup4==4.13.3\",\n" +
" \"six==1.16.0\",\n" +
" \"torch @ https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp310-cp310-linux_x86_64.whl\",\n" +
" \"torchvision @ https://download.pytorch.org/whl/cpu/torchvision-0.21.0%2Bcpu-cp310-cp310-linux_x86_64.whl\",\n" +
" \"flask @ git+https://github.com/pallets/[email protected]\",\n" +
" \"requests[security,socks]==2.31.0\",\n" +
" \"pandas[all]>=2.1.0,<3.0; python_version>'3.8'\"\n" +
"]\n\n" +
"[project.optional-dependencies]\n" +
"dev = [\n" +
" \"pytest>=7.4.0\",\n" +
" \"black==24.3.0\",\n" +
" \"mypy>=1.5.1\"\n" +
"]\n" +
"docs = [\n" +
" \"sphinx>=7.0.0\",\n" +
" \"sphinx-rtd-theme>=1.2.0\"\n" +
"]\n\n" +
"[tool.setuptools]\n" +
"py-modules = [\"main\"]\n\n" +
"[project.scripts]\n" +
"complex-setuptools-project = \"main:main\"\n";

Path pyProjectFile = Files.createTempFile("pyproject", ".toml");
Files.write(pyProjectFile, tomlContent.getBytes());

TomlParseResult result = Toml.parse(tomlContent);

SetupToolsTomlParser tomlParser = new SetupToolsTomlParser(result);
SetupToolsParsedResult parsedResult = tomlParser.parse();

// Assertions for project metadata
assertEquals("complex-setuptools-project", parsedResult.getProjectName());
assertEquals("0.1.0", parsedResult.getProjectVersion());

// Assertions for dependencies
assertEquals(9, parsedResult.getDirectDependencies().size());
assertTrue(parsedResult.getDirectDependencies().stream()
.anyMatch(dep -> dep.getName().equals("requests") && dep.getVersion().equals("2.31.0")));
assertTrue(parsedResult.getDirectDependencies().stream()
.anyMatch(dep -> dep.getName().equals("torch") && dep.getVersion().equals("2.6.0")));

// Assertions for optional dependencies
assertTrue(result.contains("project.optional-dependencies.dev"));
assertTrue(result.contains("project.optional-dependencies.docs"));

Files.delete(pyProjectFile); // Clean up the temporary file
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package com.blackduck.integration.detectable.detectables.setuptools.unit;

import static org.junit.jupiter.api.Assertions.assertEquals;

import com.blackduck.integration.detectable.python.util.PythonDependency;
import com.blackduck.integration.detectable.python.util.PythonDependencyTransformer;
import org.junit.jupiter.api.Test;

class PythonDependencyTransformerTest {

@Test
void testTransformLine() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great to see unit tests for this complex parsing.

There are many cases being tested by this test. It would be a bit nicer if it was implemented using @ParameterizedTest. Otherwise, if there's ever a problem and the test starts to fail, only one failing assertion will be reported. With a parameterized test it becomes more clear what other types of cases might be broken.

Copy link
Collaborator Author

@zahidblackduck zahidblackduck Sep 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a great suggestion. I've updated the test with @ParameterizedTest.

PythonDependencyTransformer transformer = new PythonDependencyTransformer();

// Case 1: Normal dependency with exact version
PythonDependency alembic = transformer.transformLine("alembic==1.12.0");
assertEquals("alembic", alembic.getName());
assertEquals("1.12.0", alembic.getVersion());

// Case 2: Normal dependency with version range
PythonDependency darkgraylib = transformer.transformLine("darkgraylib>=2.31.0,<3.0");
assertEquals("darkgraylib", darkgraylib.getName());
assertEquals("2.31.0", darkgraylib.getVersion());

PythonDependency requests = transformer.transformLine("requests>=2.4.0,<3.0.dev0");
assertEquals("requests", requests.getName());
assertEquals("2.4.0", requests.getVersion());

// Case 3: Normal dependency with single version constraint
PythonDependency toml = transformer.transformLine("toml>=0.10.0");
assertEquals("toml", toml.getName());
assertEquals("0.10.0", toml.getVersion());

// Case 4: Dependency with direct URL (HTTP/HTTPS)
PythonDependency torch = transformer.transformLine("torch @ https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp310-cp310-linux_x86_64.whl");
assertEquals("torch", torch.getName());
assertEquals("2.6.0", torch.getVersion());

PythonDependency torchvision = transformer.transformLine("torchvision @ https://download.pytorch.org/whl/cpu/torchvision-0.21.0%2Bcpu-cp310-cp310-linux_x86_64.whl");
assertEquals("torchvision", torchvision.getName());
assertEquals("0.21.0", torchvision.getVersion());

// Case 5: Archive dependency
PythonDependency pip = transformer.transformLine("pip @ https://github.com/pypa/pip/archive/1.3.1.zip");
assertEquals("pip", pip.getName());
assertEquals("1.3.1", pip.getVersion());

// Case 6: Git dependency
PythonDependency flask = transformer.transformLine("flask @ git+https://github.com/pallets/[email protected]");
assertEquals("flask", flask.getName());
assertEquals("2.3.3", flask.getVersion());
}
}