From e006f3b5df18cae1b5071471778c734a67f18e5c Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Fri, 22 Aug 2025 09:14:12 +0200 Subject: [PATCH 01/11] Correct package path in ProfileResults. --- .../org/apache/lucene/gradle/plugins/java/ProfileResults.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/ProfileResults.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/ProfileResults.java index e4073c9e56fe..802cef5f6d55 100644 --- a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/ProfileResults.java +++ b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/ProfileResults.java @@ -85,7 +85,8 @@ static String frameToString(RecordedFrame frame, boolean lineNumbers, boolean fr * Driver method, for testing standalone. * *
-   * java -Dtests.profile.count=5 buildSrc/src/main/java/org/apache/lucene/gradle/ProfileResults.java \
+   * java -Dtests.profile.count=5 \
+   *   build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/ProfileResults.java \
    *   ./lucene/core/build/tmp/tests-cwd/somefile.jfr ...
    * 
*/ From d519c154debd8c0cda9fa43fdd25935dff5417b7 Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Fri, 22 Aug 2025 14:08:20 +0200 Subject: [PATCH 02/11] Moved lucene.java.slowest-tests-at-end.gradle, modernizing the implementation a bit. --- .../lucene.java.slowest-tests-at-end.gradle | 89 ------- .../java/JavaProjectConventionsPlugin.java | 1 + .../java/ShowSlowestTestsAtEndPlugin.java | 217 ++++++++++++++++++ build.gradle | 1 - 4 files changed, 218 insertions(+), 90 deletions(-) delete mode 100644 build-tools/build-infra/src/main/groovy/lucene.java.slowest-tests-at-end.gradle create mode 100644 build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/ShowSlowestTestsAtEndPlugin.java diff --git a/build-tools/build-infra/src/main/groovy/lucene.java.slowest-tests-at-end.gradle b/build-tools/build-infra/src/main/groovy/lucene.java.slowest-tests-at-end.gradle deleted file mode 100644 index a6f1750a425b..000000000000 --- a/build-tools/build-infra/src/main/groovy/lucene.java.slowest-tests-at-end.gradle +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Add test duration summary at the end of the build. - -if (project != project.rootProject) { - throw new GradleException("Applicable to rootProject only: " + project.path) -} - -def allTests = [] -def allSuites = [] - -allprojects { - plugins.withType(JavaPlugin).configureEach { - Provider slowestTestsOption = buildOptions.addBooleanOption("tests.slowestTests", "Print the summary of the slowest tests.", true) - int slowestTestsMinTime = buildOptions.addIntOption("tests.slowestTests.minTime", "Minimum test time to consider a test slow (millis).", 500).get() - Provider slowestSuitesOption = buildOptions.addBooleanOption("tests.slowestSuites", "Print the summary of the slowest suites.", true) - int slowestSuitesMinTime = buildOptions.addIntOption("tests.slowestSuites.minTime", "Minimum suite time to consider a suite slow (millis).", 1000).get() - - tasks.withType(Test).configureEach { task -> - if (slowestTestsOption.get()) { - afterTest { desc, result -> - def duration = (result.getEndTime() - result.getStartTime()) - if (duration >= slowestTestsMinTime) { - allTests << [ - name : "${desc.className.replaceAll('.+\\.', "")}.${desc.name} (${project.path})", - duration: duration - ] - } - } - } - - if (slowestSuitesOption.get()) { - afterSuite { desc, result -> - // Gradle reports runner times as well, omit anything that isn't attached to a concrete class. - if (desc.className != null) { - def duration = (result.getEndTime() - result.getStartTime()) - if (duration >= slowestSuitesMinTime) { - allSuites << [ - name : "${desc.className.replaceAll('.+\\.', "")} (${project.path})", - duration: duration - ] - } - } - } - } - } - } -} - -gradle.buildFinished { result -> - if (result.getFailure() == null) { - if (allTests) { - def slowest = allTests - .sort { a, b -> b.duration.compareTo(a.duration) } - .take(10) - .collect { e -> String.format(Locale.ROOT, "%5.2fs %s", e.duration / 1000d, e.name) } - - if (slowest) { - logger.lifecycle("The slowest tests during this run:\n " + slowest.join("\n ")) - } - } - - if (allSuites) { - def slowest = allSuites - .sort { a, b -> b.duration.compareTo(a.duration) } - .take(10) - .collect { e -> String.format(Locale.ROOT, "%5.2fs %s", e.duration / 1000d, e.name) } - - if (slowest) { - logger.lifecycle("The slowest suites during this run:\n " + slowest.join("\n ")) - } - } - } -} diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/JavaProjectConventionsPlugin.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/JavaProjectConventionsPlugin.java index 0d5b50cc6917..4dd5c5446840 100644 --- a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/JavaProjectConventionsPlugin.java +++ b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/JavaProjectConventionsPlugin.java @@ -54,5 +54,6 @@ private void applyJavaPlugins(Project project) { plugins.apply(CodeProfilingPlugin.class); plugins.apply(FailOnNoMatchingFilteredTestsPlugin.class); plugins.apply(CodeCoveragePlugin.class); + plugins.apply(ShowSlowestTestsAtEndPlugin.class); } } diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/ShowSlowestTestsAtEndPlugin.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/ShowSlowestTestsAtEndPlugin.java new file mode 100644 index 000000000000..0a74445df7a7 --- /dev/null +++ b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/ShowSlowestTestsAtEndPlugin.java @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.gradle.plugins.java; + +import java.util.Collection; +import java.util.Locale; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.stream.Collectors; +import javax.inject.Inject; +import org.apache.lucene.gradle.plugins.LuceneGradlePlugin; +import org.gradle.api.Project; +import org.gradle.api.logging.Logger; +import org.gradle.api.logging.Logging; +import org.gradle.api.plugins.JavaPlugin; +import org.gradle.api.provider.Provider; +import org.gradle.api.services.BuildService; +import org.gradle.api.services.BuildServiceParameters; +import org.gradle.api.tasks.testing.Test; +import org.gradle.api.tasks.testing.TestDescriptor; +import org.gradle.api.tasks.testing.TestListener; +import org.gradle.api.tasks.testing.TestResult; +import org.gradle.tooling.events.FinishEvent; +import org.gradle.tooling.events.OperationCompletionListener; +import org.gradle.tooling.events.task.TaskFailureResult; + +/** Display the slowest tests at the end of the build. */ +public class ShowSlowestTestsAtEndPlugin extends LuceneGradlePlugin { + private static final String SERVICE_NAME = "slowestTestsTrackingService"; + + public abstract static class RootExtPlugin extends LuceneGradlePlugin { + @Override + public void apply(Project project) { + applicableToRootProjectOnly(project); + + // Register the shared build service that will do the end-of-build check + project + .getGradle() + .getSharedServices() + .registerIfAbsent(SERVICE_NAME, TestStatsService.class, _ -> {}); + } + } + + public record Entry(String name, long duration) {} + + @Override + public void apply(Project project) { + requiresAppliedPlugin(project, JavaPlugin.class); + + var buildOptions = getBuildOptions(project); + Provider slowestTestsOption = + buildOptions.addBooleanOption( + "tests.slowestTests", "Print the summary of the slowest tests.", true); + int slowestTestsMinTime = + buildOptions + .addIntOption( + "tests.slowestTests.minTime", + "Minimum test time to consider a test slow (millis).", + 500) + .get(); + Provider slowestSuitesOption = + buildOptions.addBooleanOption( + "tests.slowestSuites", "Print the summary of the slowest suites.", true); + int slowestSuitesMinTime = + buildOptions + .addIntOption( + "tests.slowestSuites.minTime", + "Minimum suite time to consider a suite slow (millis).", + 1000) + .get(); + + boolean collectSlowestTests = slowestTestsOption.get(); + boolean collectSlowestSuites = slowestSuitesOption.get(); + + if (!collectSlowestTests && !collectSlowestSuites) { + // Nothing to do. + return; + } + + if (project != project.getRootProject()) { + project.getRootProject().getPlugins().apply(RootExtPlugin.class); + } + + project + .getTasks() + .withType(Test.class) + .configureEach( + task -> { + @SuppressWarnings("unchecked") + Provider service = + (Provider) + project + .getGradle() + .getSharedServices() + .getRegistrations() + .getByName(SERVICE_NAME) + .getService(); + task.usesService(service); + + var projectPath = project.getPath(); + + task.addTestListener( + new TestListener() { + @Override + public void beforeSuite(TestDescriptor suite) {} + + @Override + public void afterSuite(TestDescriptor suite, TestResult result) { + // Gradle reports runner times as well, omit anything that isn't attached to a + // concrete class. + if (collectSlowestSuites && suite.getClassName() != null) { + long duration = (result.getEndTime() - result.getStartTime()); + if (duration >= slowestTestsMinTime) { + service + .get() + .addSuiteEntry( + new Entry( + lastNameComponent(suite) + (" (" + projectPath + ")"), + duration)); + } + } + } + + @Override + public void beforeTest(TestDescriptor testDescriptor) {} + + @Override + public void afterTest(TestDescriptor testDescriptor, TestResult result) { + if (collectSlowestTests) { + long duration = (result.getEndTime() - result.getStartTime()); + if (duration >= slowestSuitesMinTime) { + service + .get() + .addTestEntry( + new Entry( + lastNameComponent(testDescriptor) + + ("." + testDescriptor.getName()) + + (" (" + projectPath + ")"), + duration)); + } + } + } + }); + }); + } + + private static String lastNameComponent(TestDescriptor suite) { + return suite.getClassName().replaceAll(".+\\.", ""); + } + + /** Build service that keeps track of test and suite times. */ + public abstract static class TestStatsService + implements BuildService, + OperationCompletionListener, + AutoCloseable { + private static final Logger LOGGER = Logging.getLogger(TestStatsService.class); + + private final ConcurrentLinkedQueue allTests = new ConcurrentLinkedQueue<>(); + private final ConcurrentLinkedQueue allSuites = new ConcurrentLinkedQueue<>(); + + private volatile boolean hadFailedTask; + + @Inject + public TestStatsService() {} + + public void addTestEntry(Entry entry) { + allTests.add(entry); + } + + public void addSuiteEntry(Entry entry) { + allSuites.add(entry); + } + + @Override + public void onFinish(FinishEvent event) { + if (event.getResult() instanceof TaskFailureResult) { + hadFailedTask = true; + } + } + + @Override + public void close() { + if (hadFailedTask || (allTests.isEmpty() && allSuites.isEmpty())) { + return; + } + + if (!allTests.isEmpty()) { + LOGGER.lifecycle("The slowest tests during this run:\n " + toString(allTests)); + } + + if (!allSuites.isEmpty()) { + LOGGER.lifecycle("The slowest suites during this run:\n " + toString(allSuites)); + } + } + + private static String toString(Collection entries) { + return entries.stream() + .sorted((a, b) -> Long.compare(b.duration, a.duration)) + .limit(10) + .map(e -> String.format(Locale.ROOT, "%5.2fs %s", e.duration / 1000d, e.name)) + .collect(Collectors.joining("\n ")); + } + } +} diff --git a/build.gradle b/build.gradle index 05b3d5425e2f..a6546acecfdb 100644 --- a/build.gradle +++ b/build.gradle @@ -27,7 +27,6 @@ plugins { id "lucene.root-project.setup" id "lucene.java-projects.conventions" - id "lucene.java.slowest-tests-at-end" id "lucene.java.show-failed-tests-at-end" id "lucene.java.modules" From 93c213f32701e3f77a9e69905c19de9e6616fc16 Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Fri, 22 Aug 2025 19:47:51 +0200 Subject: [PATCH 03/11] Moved lucene.java.show-failed-tests-at-end.gradle to java. --- ...ucene.java.show-failed-tests-at-end.gradle | 93 -------- .../java/JavaProjectConventionsPlugin.java | 1 + .../java/ShowFailedTestsAtEndPlugin.java | 211 ++++++++++++++++++ .../java/ShowSlowestTestsAtEndPlugin.java | 18 +- .../java/TestsAndRandomizationPlugin.java | 10 +- build.gradle | 1 - 6 files changed, 233 insertions(+), 101 deletions(-) delete mode 100644 build-tools/build-infra/src/main/groovy/lucene.java.show-failed-tests-at-end.gradle create mode 100644 build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/ShowFailedTestsAtEndPlugin.java diff --git a/build-tools/build-infra/src/main/groovy/lucene.java.show-failed-tests-at-end.gradle b/build-tools/build-infra/src/main/groovy/lucene.java.show-failed-tests-at-end.gradle deleted file mode 100644 index 4de4f5520714..000000000000 --- a/build-tools/build-infra/src/main/groovy/lucene.java.show-failed-tests-at-end.gradle +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.regex.Pattern -import org.apache.lucene.gradle.plugins.java.ErrorReportingTestListener - -// Display all failed tests at the end of the build. - -if (project != project.rootProject) { - throw new GradleException("Applicable to rootProject only: " + project.path) -} - -def failedTests = [] - -allprojects { - plugins.withType(JavaPlugin).configureEach { - tasks.withType(Test).configureEach { Test task -> - def testOutputsDir = task.ext.testOutputsDir - def reproLine = ErrorReportingTestListener.getReproLineOptions(task) - - afterTest { desc, result -> - if (result.resultType == TestResult.ResultType.FAILURE) { - // check if it's a constructor or a before/after class hook that failed. - def qTestName - if (desc.name == "classMethod") { - qTestName = desc.className - } else { - qTestName = "${desc.className}.${desc.name}" - } - - def randomizationParameters = "" - def p = Pattern.compile(/.+ (?[{].*[}])$/) - def matcher = p.matcher(qTestName) - if (matcher.matches()) { - randomizationParameters = matcher.group("params") - qTestName = qTestName.replace(randomizationParameters, "").trim() - } - - failedTests << [ - "name" : qTestName, - "randomizationParameters": randomizationParameters, - "project" : "${test.project.path}", - "output" : file("${testOutputsDir}/${ErrorReportingTestListener.getOutputLogName(desc.parent)}"), - "reproduce": "gradlew ${project.path}:test --tests \"${qTestName}\" ${reproLine}" - ] - } - } - - afterSuite { desc, result -> - if (result.exceptions) { - failedTests << [ - "name" : "${desc.name}", - "project" : "${test.project.path}", - "output" : file("${testOutputsDir}/${ErrorReportingTestListener.getOutputLogName(desc)}"), - "reproduce": "gradlew ${project.path}:test --tests \"${desc.name}\" ${reproLine}" - ] - } - } - } - } -} - -gradle.buildFinished { result -> - if (failedTests) { - def formatted = failedTests - .sort { a, b -> b.project.compareTo(a.project) } - .collect { e -> - String.format(Locale.ROOT, - " - %s (%s)%s\n Test output: %s\n Reproduce with: %s\n", - e.name, e.project, - e.containsKey("randomizationParameters") && - !e.randomizationParameters.isBlank() ? "\n Context parameters: ${e.randomizationParameters}" : "", - e.output, e.reproduce) - } - .join("\n") - - logger.error("\nERROR: The following {} failed:\n\n{}", failedTests.size() == 1 ? "test has" : "tests have", formatted) - } -} diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/JavaProjectConventionsPlugin.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/JavaProjectConventionsPlugin.java index 4dd5c5446840..dedb8499df40 100644 --- a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/JavaProjectConventionsPlugin.java +++ b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/JavaProjectConventionsPlugin.java @@ -55,5 +55,6 @@ private void applyJavaPlugins(Project project) { plugins.apply(FailOnNoMatchingFilteredTestsPlugin.class); plugins.apply(CodeCoveragePlugin.class); plugins.apply(ShowSlowestTestsAtEndPlugin.class); + plugins.apply(ShowFailedTestsAtEndPlugin.class); } } diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/ShowFailedTestsAtEndPlugin.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/ShowFailedTestsAtEndPlugin.java new file mode 100644 index 000000000000..a44e56bcad3d --- /dev/null +++ b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/ShowFailedTestsAtEndPlugin.java @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.gradle.plugins.java; + +import java.io.File; +import java.util.Locale; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import javax.inject.Inject; +import org.apache.lucene.gradle.plugins.LuceneGradlePlugin; +import org.gradle.api.Project; +import org.gradle.api.file.DirectoryProperty; +import org.gradle.api.logging.Logger; +import org.gradle.api.logging.Logging; +import org.gradle.api.plugins.JavaPlugin; +import org.gradle.api.provider.Provider; +import org.gradle.api.services.BuildService; +import org.gradle.api.services.BuildServiceParameters; +import org.gradle.api.tasks.testing.Test; +import org.gradle.api.tasks.testing.TestDescriptor; +import org.gradle.api.tasks.testing.TestListener; +import org.gradle.api.tasks.testing.TestResult; + +/** Display any failing tests, along with their reproduce-line at the end of the build. */ +public class ShowFailedTestsAtEndPlugin extends LuceneGradlePlugin { + private static final String SERVICE_NAME = "failingTestsTrackingService"; + + public abstract static class RootExtPlugin extends LuceneGradlePlugin { + @Override + public void apply(Project project) { + applicableToRootProjectOnly(project); + + // Register the shared build service that will do the end-of-build check + project + .getGradle() + .getSharedServices() + .registerIfAbsent(SERVICE_NAME, FailingTestsReportingService.class, _ -> {}); + } + } + + public record Entry( + String name, + String randomizationParameters, + String projectPath, + String reproLine, + File testOutput) {} + + @Override + public void apply(Project project) { + requiresAppliedPlugin(project, JavaPlugin.class); + + if (project != project.getRootProject()) { + project.getRootProject().getPlugins().apply(RootExtPlugin.class); + } + + project + .getTasks() + .withType(Test.class) + .configureEach( + task -> { + @SuppressWarnings("unchecked") + Provider service = + (Provider) + project + .getGradle() + .getSharedServices() + .getRegistrations() + .getByName(SERVICE_NAME) + .getService(); + task.usesService(service); + + String projectPath = project.getPath(); + String reproLine = ErrorReportingTestListener.getReproLineOptions(task); + DirectoryProperty testOutputsDir = + task.getExtensions() + .getByType(TestsAndRandomizationPlugin.TestOutputsExtension.class) + .getTestOutputsDir(); + + task.addTestListener( + new TestListener() { + @Override + public void beforeTest(TestDescriptor testDescriptor) {} + + @Override + public void afterTest(TestDescriptor desc, TestResult result) { + if (result.getResultType() == TestResult.ResultType.FAILURE) { + // check if it's a constructor or a before/after class hook that failed. + String qTestName; + if (desc.getName().equals("classMethod")) { + qTestName = desc.getClassName(); + } else { + qTestName = desc.getClassName() + "." + desc.getName(); + } + + var randomizationParameters = ""; + var p = Pattern.compile(".+ (?[{].*[}])$"); + var matcher = p.matcher(qTestName); + if (matcher.matches()) { + randomizationParameters = matcher.group("params"); + qTestName = qTestName.replace(randomizationParameters, "").trim(); + } + + service + .get() + .addFailedTest( + new Entry( + qTestName, + randomizationParameters, + projectPath, + "gradlew " + + (projectPath + ":test") + + (" --tests \"" + qTestName + "\" ") + + reproLine, + testOutputsDir + .file( + ErrorReportingTestListener.getOutputLogName( + desc.getParent())) + .get() + .getAsFile())); + } + } + + @Override + public void beforeSuite(TestDescriptor suite) {} + + @Override + public void afterSuite(TestDescriptor desc, TestResult result) { + if (result.getExceptions() != null && !result.getExceptions().isEmpty()) { + service + .get() + .addFailedTest( + new Entry( + desc.getName(), + null, + projectPath, + "gradlew " + + (projectPath + ":test") + + (" --tests \"" + desc.getName() + "\" ") + + reproLine, + testOutputsDir + .file(ErrorReportingTestListener.getOutputLogName(desc)) + .get() + .getAsFile())); + } + } + }); + }); + } + + /** Build service that keeps track of test and suite times. */ + public abstract static class FailingTestsReportingService + implements BuildService, AutoCloseable { + private static final Logger LOGGER = Logging.getLogger(FailingTestsReportingService.class); + + private final ConcurrentLinkedQueue failedTests = new ConcurrentLinkedQueue<>(); + + @Inject + public FailingTestsReportingService() {} + + public void addFailedTest(Entry entry) { + failedTests.add(entry); + } + + @Override + public void close() { + if (!failedTests.isEmpty()) { + var limit = 10; + var formatted = + failedTests.stream() + .sorted((a, b) -> b.projectPath.compareTo(a.projectPath)) + .map( + e -> + String.format( + Locale.ROOT, + " - %s (%s)%s\n Test output: %s\n Reproduce with: %s\n", + e.name, + e.projectPath, + e.randomizationParameters != null + && !e.randomizationParameters.isBlank() + ? "\n Context parameters: " + e.randomizationParameters + : "", + e.testOutput, + e.reproLine)) + .limit(limit) + .collect(Collectors.joining("\n")); + + LOGGER.error( + "\nERROR: {} {} failed{}:\n\n{}", + failedTests.size(), + failedTests.size() == 1 ? "test has" : failedTests.size() + " tests have", + failedTests.size() > limit ? " (top " + limit + " shown)" : "", + formatted); + } + } + } +} diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/ShowSlowestTestsAtEndPlugin.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/ShowSlowestTestsAtEndPlugin.java index 0a74445df7a7..c8cebccf325f 100644 --- a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/ShowSlowestTestsAtEndPlugin.java +++ b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/ShowSlowestTestsAtEndPlugin.java @@ -33,6 +33,7 @@ import org.gradle.api.tasks.testing.TestDescriptor; import org.gradle.api.tasks.testing.TestListener; import org.gradle.api.tasks.testing.TestResult; +import org.gradle.build.event.BuildEventsListenerRegistry; import org.gradle.tooling.events.FinishEvent; import org.gradle.tooling.events.OperationCompletionListener; import org.gradle.tooling.events.task.TaskFailureResult; @@ -42,15 +43,24 @@ public class ShowSlowestTestsAtEndPlugin extends LuceneGradlePlugin { private static final String SERVICE_NAME = "slowestTestsTrackingService"; public abstract static class RootExtPlugin extends LuceneGradlePlugin { + /** + * @return Returns the injected build events listener registry. + */ + @Inject + protected abstract BuildEventsListenerRegistry getListenerRegistry(); + @Override public void apply(Project project) { applicableToRootProjectOnly(project); // Register the shared build service that will do the end-of-build check - project - .getGradle() - .getSharedServices() - .registerIfAbsent(SERVICE_NAME, TestStatsService.class, _ -> {}); + var service = + project + .getGradle() + .getSharedServices() + .registerIfAbsent(SERVICE_NAME, TestStatsService.class, _ -> {}); + + getListenerRegistry().onTaskCompletion(service); } } diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/TestsAndRandomizationPlugin.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/TestsAndRandomizationPlugin.java index 865c0ac2fadb..9b434b4af591 100644 --- a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/TestsAndRandomizationPlugin.java +++ b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/java/TestsAndRandomizationPlugin.java @@ -36,7 +36,6 @@ import org.gradle.api.file.DirectoryProperty; import org.gradle.api.file.RegularFileProperty; import org.gradle.api.invocation.Gradle; -import org.gradle.api.plugins.ExtraPropertiesExtension; import org.gradle.api.plugins.JavaPlugin; import org.gradle.api.provider.Provider; import org.gradle.api.tasks.InputFile; @@ -365,8 +364,9 @@ public void apply(Project project) { .getAsFile(); task.getExtensions() - .getByType(ExtraPropertiesExtension.class) - .set("testOutputsDir", testOutputsDir); + .create("testOutputsExtension", TestOutputsExtension.class) + .getTestOutputsDir() + .set(testOutputsDir); // LUCENE-9660: Make it possible to always rerun tests, even if they're incrementally // up-to-date. @@ -513,6 +513,10 @@ public void apply(Project project) { }); } + public abstract static class TestOutputsExtension { + abstract DirectoryProperty getTestOutputsDir(); + } + public abstract static class LoggingFileArgumentProvider implements CommandLineArgumentProvider { @InputFile @PathSensitive(PathSensitivity.RELATIVE) diff --git a/build.gradle b/build.gradle index a6546acecfdb..57d43a5b1412 100644 --- a/build.gradle +++ b/build.gradle @@ -27,7 +27,6 @@ plugins { id "lucene.root-project.setup" id "lucene.java-projects.conventions" - id "lucene.java.show-failed-tests-at-end" id "lucene.java.modules" id "lucene.validation.gradle-versions-cleanup" From 0d28474f5237b398dacb2362a33affa0241725d8 Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Fri, 22 Aug 2025 20:01:28 +0200 Subject: [PATCH 04/11] Remove owasp plugin support without a replacement #15114 --- build-tools/build-infra/build.gradle | 1 - .../groovy/lucene.validation.owasp.gradle | 64 ------------------- .../plugins/help/BuildOptionGroupsPlugin.java | 3 - build.gradle | 1 - gradle/libs.versions.toml | 1 - help/workflow.txt | 8 --- 6 files changed, 78 deletions(-) delete mode 100644 build-tools/build-infra/src/main/groovy/lucene.validation.owasp.gradle diff --git a/build-tools/build-infra/build.gradle b/build-tools/build-infra/build.gradle index 6d2a390976ad..016817b82d1c 100644 --- a/build-tools/build-infra/build.gradle +++ b/build-tools/build-infra/build.gradle @@ -91,7 +91,6 @@ dependencies { implementation plugin(deps.plugins.carrotsearch.dependencychecks) implementation plugin(deps.plugins.forbiddenapis) implementation plugin(deps.plugins.spotless) - implementation plugin(deps.plugins.owasp.dependencycheck) implementation plugin(deps.plugins.undercouch.download) implementation plugin(deps.plugins.errorprone) implementation plugin(deps.plugins.jacocolog) diff --git a/build-tools/build-infra/src/main/groovy/lucene.validation.owasp.gradle b/build-tools/build-infra/src/main/groovy/lucene.validation.owasp.gradle deleted file mode 100644 index 32c7c6283668..000000000000 --- a/build-tools/build-infra/src/main/groovy/lucene.validation.owasp.gradle +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -// This adds OWASP vulnerability validation of project dependencies - -if (project != project.rootProject) { - throw new GradleException("Applicable to rootProject only: " + project.path) -} - -// If -Pvalidation.owasp=true is set the validation will also run as part of the check task. -Provider owaspOption = buildOptions.addBooleanOption("validation.owasp", - "Enable OWASP vulnerability check for dependencies.", false) - -Provider owaspThresholdOption = buildOptions.addIntOption("validation.owasp.threshold", - "Owasp validation threshold to fail the build, if exceeded.", 7) - -Provider owaspApiKey = buildOptions.addOption("validation.owasp.apikey", - "Owasp validation API key (https://nvd.nist.gov/developers/request-an-api-key).") - -if (owaspOption.get() || gradle.startParameter.taskNames.contains("owasp")) { - plugins.apply(deps.plugins.owasp.dependencycheck.get().pluginId) - - dependencyCheck { - failBuildOnCVSS = owaspThresholdOption.get() - formats = ['HTML', 'JSON'] - skipProjects = [] - skipConfigurations = ['unifiedClasspath'] - suppressionFile = rootProject.layout.projectDirectory.file("gradle/validation/owasp/exclusions.xml") - - if (owaspApiKey.isPresent()) { - nvd.apiKey = owaspApiKey.get() - } - } - - def owaspTask = tasks.register("owasp", { - group = "Verification" - description = "Check project dependencies against OWASP vulnerability database." - dependsOn "dependencyCheckAggregate" - }) - - // Unless explicitly enabled, do not attach owasp to check. It has a large download - // footprint and takes a significant amount of time. This should be enabled for - // nightly CI runs only, I think. - if (owaspOption.get()) { - tasks.named("check").configure { - dependsOn owaspTask - } - } -} diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/help/BuildOptionGroupsPlugin.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/help/BuildOptionGroupsPlugin.java index 27b13c884476..26b1de13a789 100644 --- a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/help/BuildOptionGroupsPlugin.java +++ b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/help/BuildOptionGroupsPlugin.java @@ -53,9 +53,6 @@ public void apply(Project project) { "hunspell.corpora", "hunspell.dictionaries", "hunspell.repo.path", - "validation.owasp", - "validation.owasp.apikey", - "validation.owasp.threshold", "tests.linedocsfile", "tests.LUCENE_VERSION", "tests.bwcdir")); diff --git a/build.gradle b/build.gradle index 57d43a5b1412..d95068898a4a 100644 --- a/build.gradle +++ b/build.gradle @@ -33,7 +33,6 @@ plugins { id "lucene.validation.error-prone" id "lucene.validation.jar-checks" id "lucene.validation.rat-sources" - id "lucene.validation.owasp" id "lucene.publications.maven" id "lucene.publications.maven-to-nexus-releases" diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index fb02209f744d..c915e9b6ec50 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -105,7 +105,6 @@ carrotsearch-dependencychecks = "com.carrotsearch.gradle.dependencychecks:0.1.0" errorprone = "net.ltgt.errorprone:4.3.0" forbiddenapis = "de.thetaphi.forbiddenapis:3.9" jacocolog = "org.barfuin.gradle.jacocolog:3.1.0" -owasp-dependencycheck = "org.owasp.dependencycheck:12.1.3" randomizedtesting = "com.carrotsearch.gradle.randomizedtesting:0.0.6" spotless = "com.diffplug.spotless:7.2.1" undercouch-download = "de.undercouch.download:5.6.0" diff --git a/help/workflow.txt b/help/workflow.txt index 8b35247fcfe7..f69966d5abcc 100644 --- a/help/workflow.txt +++ b/help/workflow.txt @@ -40,11 +40,3 @@ ls lucene/core/build/docs Assemble entire documentation (including javadocs): gradlew documentation ls lucene/documentation/build/site - - -Other validation and checks -=========================== - -Generate a report of dependencies with known OWASP vulnerabilities: -gradlew :dependencyCheckAnalyze -open ./build/reports/dependency-check-report.html From 0cbfa178ef8e3f2d31ce025745d9a2b521b21a6b Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Wed, 27 Aug 2025 08:49:17 +0200 Subject: [PATCH 05/11] Interim. --- .../plugins/licenses/LicenceCheckTask.java | 239 ++++++++++++++++++ lucene/benchmark/build.gradle | 8 + 2 files changed, 247 insertions(+) create mode 100644 build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/LicenceCheckTask.java diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/LicenceCheckTask.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/LicenceCheckTask.java new file mode 100644 index 000000000000..15612cf29ad0 --- /dev/null +++ b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/LicenceCheckTask.java @@ -0,0 +1,239 @@ +package org.apache.lucene.gradle.plugins.licenses; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.*; +import java.util.stream.Collectors; +import javax.inject.Inject; +import javax.xml.XMLConstants; +import javax.xml.parsers.DocumentBuilderFactory; +import org.apache.rat.Defaults; +import org.apache.rat.ReportConfiguration; +import org.apache.rat.analysis.IHeaderMatcher; +import org.apache.rat.analysis.util.HeaderMatcherMultiplexer; +import org.apache.rat.anttasks.SubstringLicenseMatcher; +import org.apache.rat.api.RatException; +import org.apache.rat.document.impl.FileDocument; +import org.apache.rat.license.SimpleLicenseFamily; +import org.apache.rat.report.RatReport; +import org.apache.rat.report.claim.ClaimStatistic; +import org.apache.rat.report.xml.XmlReportFactory; +import org.apache.rat.report.xml.writer.impl.base.XmlWriter; +import org.gradle.api.DefaultTask; +import org.gradle.api.GradleException; +import org.gradle.api.file.ConfigurableFileCollection; +import org.gradle.api.file.ProjectLayout; +import org.gradle.api.file.RegularFileProperty; +import org.gradle.api.tasks.*; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; + +/** Java port of the Groovy/Ant-based RAT task using Apache RAT Core API (0.16.1). */ +@CacheableTask +public abstract class LicenceCheckTask extends DefaultTask { + @InputFiles + @PathSensitive(PathSensitivity.RELATIVE) + @IgnoreEmptyDirectories + public abstract ConfigurableFileCollection getInputFileTrees(); + + @OutputFile + public abstract RegularFileProperty getXmlReport(); + + @Inject + public LicenceCheckTask(ProjectLayout layout) { + getXmlReport().convention(layout.getBuildDirectory().file("rat/rat-report.xml")); + setGroup("Verification"); + setDescription( + "Runs Apache RAT on the configured sources and fails on unknown/unapproved licenses."); + } + + @TaskAction + public void run() { + final String origEncoding = System.getProperty("file.encoding"); + File reportFile = getXmlReport().get().getAsFile(); + try { + generateReport(reportFile); + printUnknownFiles(reportFile); + } finally { + if (!Objects.equals(System.getProperty("file.encoding"), origEncoding)) { + throw new GradleException( + "Something is wrong: Apache RAT changed file.encoding to " + + System.getProperty("file.encoding") + + "?"); + } + } + } + + private void generateReport(File reportFile) { + try { + Files.createDirectories(reportFile.getParentFile().toPath()); + // Write the input file list for debugging + String inputFileList = + getInputFileTrees().getFiles().stream() + .map(File::getPath) + .sorted() + .collect(Collectors.joining("\n")); + File listFile = new File(reportFile.getPath().replaceAll("\\.xml$", "-filelist.txt")); + try (Writer w = + new OutputStreamWriter(new FileOutputStream(listFile), StandardCharsets.UTF_8)) { + w.write(inputFileList); + } + + ReportConfiguration config = new ReportConfiguration(); + + List matchers = new ArrayList<>(); + matchers.add(Defaults.createDefaultMatcher()); + + matchers.add( + subStringMatcher( + "BSD4 ", + "Original BSD License (with advertising clause)", + "All advertising materials")); + matchers.add( + subStringMatcher( + "BSD ", "Modified BSD License", "Copyright (c) 2001-2009 Anders Moeller")); + matchers.add( + subStringMatcher( + "BSD ", "Modified BSD License", "Copyright (c) 2001, Dr Martin Porter")); + matchers.add( + subStringMatcher( + "BSD ", + "Modified BSD License", + "THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS")); + matchers.add( + subStringMatcher( + "BSD ", "Modified BSD License", "Egothor Software License version 1.00")); + matchers.add( + subStringMatcher("BSD ", "Modified BSD License", "Copyright (c) 2005 Bruno Martins")); + matchers.add( + subStringMatcher( + "BSD ", + "Modified BSD License", + "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS")); + matchers.add( + subStringMatcher( + "BSD ", + "Modified BSD License", + "THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS")); + + matchers.add( + subStringMatcher( + "MIT ", + "Modified BSD License", + "Permission is hereby granted, free of charge, to any person obtaining a copy")); + matchers.add(subStringMatcher("MIT ", "Modified BSD License", " ; License: MIT")); + + matchers.add( + subStringMatcher( + "AL ", "Apache", "Licensed to the Apache Software Foundation (ASF) under")); + matchers.add( + subStringMatcher( + "AL ", + "Apache", + "Licensed under the Apache License, Version 2.0 (the \"License\")")); + + matchers.add(subStringMatcher("GEN ", "Generated", "Produced by GNUPLOT")); + matchers.add(subStringMatcher("GEN ", "Generated", "Generated by Snowball")); + + config.setHeaderMatcher(new HeaderMatcherMultiplexer(matchers)); + + config.setApprovedLicenseNames( + new SimpleLicenseFamily[] { + simpleFamily("Apache"), + simpleFamily("The MIT License"), + simpleFamily("Modified BSD License"), + simpleFamily("Generated") + }); + + Files.deleteIfExists(reportFile.toPath()); + try (Writer writer = + new BufferedWriter( + new OutputStreamWriter(new FileOutputStream(reportFile), StandardCharsets.UTF_8))) { + toXmlReportFile(config, writer); + } + } catch (IOException | RatException e) { + throw new GradleException("Cannot generate RAT report", e); + } + } + + private static SimpleLicenseFamily simpleFamily(String name) { + SimpleLicenseFamily fam = new SimpleLicenseFamily(); + fam.setFamilyName(name); + return fam; + } + + private static IHeaderMatcher subStringMatcher( + String licenseFamilyCategory, String licenseFamilyName, String substringPattern) { + SubstringLicenseMatcher substringLicenseMatcher = new SubstringLicenseMatcher(); + substringLicenseMatcher.setLicenseFamilyCategory(licenseFamilyCategory); + substringLicenseMatcher.setLicenseFamilyName(licenseFamilyName); + SubstringLicenseMatcher.Pattern p = new SubstringLicenseMatcher.Pattern(); + p.setSubstring(substringPattern); + substringLicenseMatcher.addConfiguredPattern(p); + return substringLicenseMatcher; + } + + private void toXmlReportFile(ReportConfiguration config, Writer writer) + throws RatException, IOException { + ClaimStatistic stats = new ClaimStatistic(); + RatReport standardReport = + XmlReportFactory.createStandardReport(new XmlWriter(writer), stats, config); + standardReport.startReport(); + for (File f : getInputFileTrees().getFiles()) { + standardReport.report(new FileDocument(f)); + } + standardReport.endReport(); + writer.flush(); + } + + private void printUnknownFiles(File reportFile) { + List errors = parseUnknowns(reportFile); + if (!errors.isEmpty()) { + String msg = + "Found " + + errors.size() + + " file(s) with errors:\n" + + errors.stream().map(e -> " - " + e).collect(Collectors.joining("\n")); + throw new GradleException(msg); + } + } + + private static List parseUnknowns(File reportFile) { + try { + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + dbf.setXIncludeAware(false); + dbf.setIgnoringComments(true); + dbf.setExpandEntityReferences(false); + dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, ""); + dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); + dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + dbf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + dbf.setFeature("http://xml.org/sax/features/external-general-entities", false); + dbf.setFeature("http://xml.org/sax/features/external-parameter-entities", false); + dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + + Document doc = dbf.newDocumentBuilder().parse(reportFile); + NodeList resources = doc.getElementsByTagName("resource"); + List bad = new ArrayList<>(); + for (int i = 0; i < resources.getLength(); i++) { + Element res = (Element) resources.item(i); + NodeList children = res.getChildNodes(); + for (int j = 0; j < children.getLength(); j++) { + if (children.item(j) instanceof Element el) { + if ("license-approval".equals(el.getTagName()) + && "false".equals(el.getAttribute("name"))) { + bad.add("Unknown license: " + res.getAttribute("name")); + break; + } + } + } + } + Collections.sort(bad); + return bad; + } catch (Exception e) { + throw new GradleException("Error parsing RAT XML report: " + reportFile.getAbsolutePath(), e); + } + } +} diff --git a/lucene/benchmark/build.gradle b/lucene/benchmark/build.gradle index 4e98fc344d8d..3fcd0f733c06 100644 --- a/lucene/benchmark/build.gradle +++ b/lucene/benchmark/build.gradle @@ -15,6 +15,8 @@ * limitations under the License. */ +import org.apache.lucene.gradle.plugins.licenses.LicenceCheckTask + plugins { id "java" } @@ -90,3 +92,9 @@ tasks.register("run", JavaExec, { suspend = true } }) + +tasks.register("foo", LicenceCheckTask.class, {task -> + task.getInputFileTrees().from(project.fileTree("src", { + include "**/*.java" + })) +}) From 1bc1e978c4a38652617561d8e053c7fdb9e489ed Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Mon, 15 Sep 2025 20:42:37 +0200 Subject: [PATCH 06/11] Playing with new rat's api. To be reverted. --- lucene/benchmark/build.gradle | 2 + lucene/benchmark/src/java/module-info.java | 2 + .../lucene/benchmark/LicenseCheckDummy.java | 119 ++++++++++++++++++ 3 files changed, 123 insertions(+) create mode 100644 lucene/benchmark/src/java/org/apache/lucene/benchmark/LicenseCheckDummy.java diff --git a/lucene/benchmark/build.gradle b/lucene/benchmark/build.gradle index 3fcd0f733c06..7acf26eeadda 100644 --- a/lucene/benchmark/build.gradle +++ b/lucene/benchmark/build.gradle @@ -51,6 +51,8 @@ dependencies { moduleRuntimeOnly project(':lucene:analysis:icu') moduleTestImplementation project(':lucene:test-framework') + + moduleImplementation "org.apache.rat:apache-rat-core:0.16.1" } // We add 'conf' to resources because we validate *.alg script correctness in one of the tests. diff --git a/lucene/benchmark/src/java/module-info.java b/lucene/benchmark/src/java/module-info.java index e22470b71ecf..e8ae492b35e9 100644 --- a/lucene/benchmark/src/java/module-info.java +++ b/lucene/benchmark/src/java/module-info.java @@ -28,6 +28,8 @@ requires org.apache.commons.compress; requires nekohtml; requires com.ibm.icu; + requires apache.rat.core; + requires org.apache.commons.io; exports org.apache.lucene.benchmark; exports org.apache.lucene.benchmark.byTask; diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/LicenseCheckDummy.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/LicenseCheckDummy.java new file mode 100644 index 000000000000..edad6a72a1f0 --- /dev/null +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/LicenseCheckDummy.java @@ -0,0 +1,119 @@ +package org.apache.lucene.benchmark; + +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.List; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.stream.Collectors; +import org.apache.rat.*; +import org.apache.rat.analysis.IHeaderMatcher; +import org.apache.rat.analysis.matchers.CopyrightMatcher; +import org.apache.rat.analysis.matchers.OrMatcher; +import org.apache.rat.analysis.matchers.SimpleTextMatcher; +import org.apache.rat.api.RatException; +import org.apache.rat.document.impl.FileDocument; +import org.apache.rat.license.ILicense; +import org.apache.rat.license.ILicenseFamily; +import org.apache.rat.report.IReportable; +import org.apache.rat.report.RatReport; +import org.apache.rat.report.claim.ClaimStatistic; +import org.apache.rat.report.claim.impl.ClaimAggregator; +import org.apache.rat.utils.DefaultLog; + +public class LicenseCheckDummy { + public static void main(String[] args) throws Exception { + final ReportConfiguration configuration = new ReportConfiguration(DefaultLog.INSTANCE); + + Defaults.Builder defaultBuilder = Defaults.builder(); + Defaults defaults = defaultBuilder.build(); + configuration.setFrom(defaults); + + SortedSet family = + new TreeSet<>( + Set.of( + ILicenseFamily.builder() + .setLicenseFamilyCategory("BSD") + .setLicenseFamilyName("BSD license") + .build())); + configuration.addLicense( + ILicense.builder() + .setId("xyz") + .setName("Name xyz") + .setLicenseFamilyCategory("BSD") + .setMatcher(IHeaderMatcher.Builder.text().setText("license xyz").build()) + .build(family)); + + configuration.setStyleReport(false); + + List paths; + try (var s = Files.walk(Paths.get("/home/dweiss/tmp/rat/"))) { + paths = s.filter(p -> Files.isRegularFile(p)).toList(); + } + + configuration.setReportable( + new IReportable() { + @Override + public void run(RatReport report) throws RatException { + for (var path : paths) { + report.report(new FileDocument(path.toFile())); + } + } + }); + + final IHeaderMatcher asf1Matcher = + new SimpleTextMatcher("http://www.apache.org/licenses/LICENSE-2.0"); + final IHeaderMatcher asf2Matcher = new SimpleTextMatcher("https://www.apache.org/licenses/LICENSE-2.0.txt"); + final IHeaderMatcher asfMatcher = new OrMatcher(Arrays.asList(asf1Matcher, asf2Matcher)); + + final IHeaderMatcher qosMatcher = new CopyrightMatcher("2004", "2011", "QOS.ch"); + final ILicense qosLic = new TestingLicense("QOS", qosMatcher); + + IDocumentAnalyser analyser = DefaultAnalyserFactory.createDefaultAnalyser(DefaultLog.INSTANCE,Arrays.asList(asfLic, qosLic)); + final List reporters = new ArrayList<>(); + reporters.add(reporter); + report = new ClaimReporterMultiplexer(analyser, reporters); + + configuration.setOut(OutputStream::nullOutputStream); + ClaimStatistic statistic = new ClaimStatistic(); + var report = new ClaimAggregator(statistic); + report.startReport(); + configuration.getReportable().run(report); + report.endReport(); + + System.out.println("Approved: " + statistic.getNumApproved()); + System.out.println("Unapproved: " + statistic.getNumUnApproved()); + System.out.println("Unknown: " + statistic.getNumUnknown()); + + System.out.println("Document categories:"); + System.out.println( + statistic.getDocumentCategoryMap().entrySet().stream() + .map(e -> e.getKey() + ": " + e.getValue()) + .collect(Collectors.joining("\n"))); + + System.out.println(); + System.out.println("License file names:"); + System.out.println( + statistic.getLicenseFileNameMap().entrySet().stream() + .map(e -> e.getKey() + ": " + e.getValue()) + .collect(Collectors.joining("\n"))); + + System.out.println(); + System.out.println("License codes:"); + System.out.println( + statistic.getLicenseFileCodeMap().entrySet().stream() + .map(e -> e.getKey() + ": " + e.getValue()) + .collect(Collectors.joining("\n"))); + + System.out.println(); + System.out.println("Doc categories:"); + System.out.println( + statistic.getDocumentCategoryMap().entrySet().stream() + .map(e -> e.getKey() + ": " + e.getValue()) + .collect(Collectors.joining("\n"))); + } +} From 0406d4ed4742b9291294f4b9737d4dd14e752ffb Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Mon, 15 Sep 2025 21:33:40 +0200 Subject: [PATCH 07/11] Remove apache rat dependency entirely (license check temporarily turned off). --- build-tools/build-infra-shadow/build.gradle | 1 - build-tools/build-infra/build.gradle | 1 - ... => lucene.validation.rat-sources.gradle_} | 0 .../plugins/licenses/LicenceCheckTask.java | 239 ------------------ .../ValidateSourcePatternsPlugin.java | 44 +--- build.gradle | 1 - gradle/libs.versions.toml | 1 - lucene/benchmark/build.gradle | 10 - .../lucene/benchmark/LicenseCheckDummy.java | 119 --------- 9 files changed, 9 insertions(+), 407 deletions(-) rename build-tools/build-infra/src/main/groovy/{lucene.validation.rat-sources.gradle => lucene.validation.rat-sources.gradle_} (100%) delete mode 100644 build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/LicenceCheckTask.java delete mode 100644 lucene/benchmark/src/java/org/apache/lucene/benchmark/LicenseCheckDummy.java diff --git a/build-tools/build-infra-shadow/build.gradle b/build-tools/build-infra-shadow/build.gradle index 845a7c51d43c..c6291c696246 100644 --- a/build-tools/build-infra-shadow/build.gradle +++ b/build-tools/build-infra-shadow/build.gradle @@ -44,7 +44,6 @@ dependencies { implementation localGroovy() implementation deps.commons.codec implementation deps.randomizedtesting.runner - implementation deps.rat implementation deps.zstd implementation deps.gjf diff --git a/build-tools/build-infra/build.gradle b/build-tools/build-infra/build.gradle index 016817b82d1c..e6a4c4504115 100644 --- a/build-tools/build-infra/build.gradle +++ b/build-tools/build-infra/build.gradle @@ -75,7 +75,6 @@ dependencies { implementation localGroovy() implementation deps.commons.codec implementation deps.randomizedtesting.runner - implementation deps.rat implementation deps.zstd implementation deps.flexmark.core diff --git a/build-tools/build-infra/src/main/groovy/lucene.validation.rat-sources.gradle b/build-tools/build-infra/src/main/groovy/lucene.validation.rat-sources.gradle_ similarity index 100% rename from build-tools/build-infra/src/main/groovy/lucene.validation.rat-sources.gradle rename to build-tools/build-infra/src/main/groovy/lucene.validation.rat-sources.gradle_ diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/LicenceCheckTask.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/LicenceCheckTask.java deleted file mode 100644 index 15612cf29ad0..000000000000 --- a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/LicenceCheckTask.java +++ /dev/null @@ -1,239 +0,0 @@ -package org.apache.lucene.gradle.plugins.licenses; - -import java.io.*; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.util.*; -import java.util.stream.Collectors; -import javax.inject.Inject; -import javax.xml.XMLConstants; -import javax.xml.parsers.DocumentBuilderFactory; -import org.apache.rat.Defaults; -import org.apache.rat.ReportConfiguration; -import org.apache.rat.analysis.IHeaderMatcher; -import org.apache.rat.analysis.util.HeaderMatcherMultiplexer; -import org.apache.rat.anttasks.SubstringLicenseMatcher; -import org.apache.rat.api.RatException; -import org.apache.rat.document.impl.FileDocument; -import org.apache.rat.license.SimpleLicenseFamily; -import org.apache.rat.report.RatReport; -import org.apache.rat.report.claim.ClaimStatistic; -import org.apache.rat.report.xml.XmlReportFactory; -import org.apache.rat.report.xml.writer.impl.base.XmlWriter; -import org.gradle.api.DefaultTask; -import org.gradle.api.GradleException; -import org.gradle.api.file.ConfigurableFileCollection; -import org.gradle.api.file.ProjectLayout; -import org.gradle.api.file.RegularFileProperty; -import org.gradle.api.tasks.*; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.w3c.dom.NodeList; - -/** Java port of the Groovy/Ant-based RAT task using Apache RAT Core API (0.16.1). */ -@CacheableTask -public abstract class LicenceCheckTask extends DefaultTask { - @InputFiles - @PathSensitive(PathSensitivity.RELATIVE) - @IgnoreEmptyDirectories - public abstract ConfigurableFileCollection getInputFileTrees(); - - @OutputFile - public abstract RegularFileProperty getXmlReport(); - - @Inject - public LicenceCheckTask(ProjectLayout layout) { - getXmlReport().convention(layout.getBuildDirectory().file("rat/rat-report.xml")); - setGroup("Verification"); - setDescription( - "Runs Apache RAT on the configured sources and fails on unknown/unapproved licenses."); - } - - @TaskAction - public void run() { - final String origEncoding = System.getProperty("file.encoding"); - File reportFile = getXmlReport().get().getAsFile(); - try { - generateReport(reportFile); - printUnknownFiles(reportFile); - } finally { - if (!Objects.equals(System.getProperty("file.encoding"), origEncoding)) { - throw new GradleException( - "Something is wrong: Apache RAT changed file.encoding to " - + System.getProperty("file.encoding") - + "?"); - } - } - } - - private void generateReport(File reportFile) { - try { - Files.createDirectories(reportFile.getParentFile().toPath()); - // Write the input file list for debugging - String inputFileList = - getInputFileTrees().getFiles().stream() - .map(File::getPath) - .sorted() - .collect(Collectors.joining("\n")); - File listFile = new File(reportFile.getPath().replaceAll("\\.xml$", "-filelist.txt")); - try (Writer w = - new OutputStreamWriter(new FileOutputStream(listFile), StandardCharsets.UTF_8)) { - w.write(inputFileList); - } - - ReportConfiguration config = new ReportConfiguration(); - - List matchers = new ArrayList<>(); - matchers.add(Defaults.createDefaultMatcher()); - - matchers.add( - subStringMatcher( - "BSD4 ", - "Original BSD License (with advertising clause)", - "All advertising materials")); - matchers.add( - subStringMatcher( - "BSD ", "Modified BSD License", "Copyright (c) 2001-2009 Anders Moeller")); - matchers.add( - subStringMatcher( - "BSD ", "Modified BSD License", "Copyright (c) 2001, Dr Martin Porter")); - matchers.add( - subStringMatcher( - "BSD ", - "Modified BSD License", - "THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS")); - matchers.add( - subStringMatcher( - "BSD ", "Modified BSD License", "Egothor Software License version 1.00")); - matchers.add( - subStringMatcher("BSD ", "Modified BSD License", "Copyright (c) 2005 Bruno Martins")); - matchers.add( - subStringMatcher( - "BSD ", - "Modified BSD License", - "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS")); - matchers.add( - subStringMatcher( - "BSD ", - "Modified BSD License", - "THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS")); - - matchers.add( - subStringMatcher( - "MIT ", - "Modified BSD License", - "Permission is hereby granted, free of charge, to any person obtaining a copy")); - matchers.add(subStringMatcher("MIT ", "Modified BSD License", " ; License: MIT")); - - matchers.add( - subStringMatcher( - "AL ", "Apache", "Licensed to the Apache Software Foundation (ASF) under")); - matchers.add( - subStringMatcher( - "AL ", - "Apache", - "Licensed under the Apache License, Version 2.0 (the \"License\")")); - - matchers.add(subStringMatcher("GEN ", "Generated", "Produced by GNUPLOT")); - matchers.add(subStringMatcher("GEN ", "Generated", "Generated by Snowball")); - - config.setHeaderMatcher(new HeaderMatcherMultiplexer(matchers)); - - config.setApprovedLicenseNames( - new SimpleLicenseFamily[] { - simpleFamily("Apache"), - simpleFamily("The MIT License"), - simpleFamily("Modified BSD License"), - simpleFamily("Generated") - }); - - Files.deleteIfExists(reportFile.toPath()); - try (Writer writer = - new BufferedWriter( - new OutputStreamWriter(new FileOutputStream(reportFile), StandardCharsets.UTF_8))) { - toXmlReportFile(config, writer); - } - } catch (IOException | RatException e) { - throw new GradleException("Cannot generate RAT report", e); - } - } - - private static SimpleLicenseFamily simpleFamily(String name) { - SimpleLicenseFamily fam = new SimpleLicenseFamily(); - fam.setFamilyName(name); - return fam; - } - - private static IHeaderMatcher subStringMatcher( - String licenseFamilyCategory, String licenseFamilyName, String substringPattern) { - SubstringLicenseMatcher substringLicenseMatcher = new SubstringLicenseMatcher(); - substringLicenseMatcher.setLicenseFamilyCategory(licenseFamilyCategory); - substringLicenseMatcher.setLicenseFamilyName(licenseFamilyName); - SubstringLicenseMatcher.Pattern p = new SubstringLicenseMatcher.Pattern(); - p.setSubstring(substringPattern); - substringLicenseMatcher.addConfiguredPattern(p); - return substringLicenseMatcher; - } - - private void toXmlReportFile(ReportConfiguration config, Writer writer) - throws RatException, IOException { - ClaimStatistic stats = new ClaimStatistic(); - RatReport standardReport = - XmlReportFactory.createStandardReport(new XmlWriter(writer), stats, config); - standardReport.startReport(); - for (File f : getInputFileTrees().getFiles()) { - standardReport.report(new FileDocument(f)); - } - standardReport.endReport(); - writer.flush(); - } - - private void printUnknownFiles(File reportFile) { - List errors = parseUnknowns(reportFile); - if (!errors.isEmpty()) { - String msg = - "Found " - + errors.size() - + " file(s) with errors:\n" - + errors.stream().map(e -> " - " + e).collect(Collectors.joining("\n")); - throw new GradleException(msg); - } - } - - private static List parseUnknowns(File reportFile) { - try { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - dbf.setXIncludeAware(false); - dbf.setIgnoringComments(true); - dbf.setExpandEntityReferences(false); - dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, ""); - dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); - dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); - dbf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); - dbf.setFeature("http://xml.org/sax/features/external-general-entities", false); - dbf.setFeature("http://xml.org/sax/features/external-parameter-entities", false); - dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); - - Document doc = dbf.newDocumentBuilder().parse(reportFile); - NodeList resources = doc.getElementsByTagName("resource"); - List bad = new ArrayList<>(); - for (int i = 0; i < resources.getLength(); i++) { - Element res = (Element) resources.item(i); - NodeList children = res.getChildNodes(); - for (int j = 0; j < children.getLength(); j++) { - if (children.item(j) instanceof Element el) { - if ("license-approval".equals(el.getTagName()) - && "false".equals(el.getAttribute("name"))) { - bad.add("Unknown license: " + res.getAttribute("name")); - break; - } - } - } - } - Collections.sort(bad); - return bad; - } catch (Exception e) { - throw new GradleException("Error parsing RAT XML report: " + reportFile.getAbsolutePath(), e); - } - } -} diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/spotless/ValidateSourcePatternsPlugin.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/spotless/ValidateSourcePatternsPlugin.java index 1eb57eeb7a77..ffe46f821a6e 100644 --- a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/spotless/ValidateSourcePatternsPlugin.java +++ b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/spotless/ValidateSourcePatternsPlugin.java @@ -33,9 +33,6 @@ import java.util.stream.Collectors; import javax.inject.Inject; import org.apache.lucene.gradle.plugins.LuceneGradlePlugin; -import org.apache.rat.Defaults; -import org.apache.rat.analysis.RatHeaderAnalysisException; -import org.apache.rat.document.impl.FileDocument; import org.gradle.api.DefaultTask; import org.gradle.api.GradleException; import org.gradle.api.Project; @@ -145,9 +142,8 @@ public void check() { String fileText = readUtf8WithValidation(file); if (file.getName().endsWith(".xml")) { - var ratDocument = new FileDocument(file); checkLicenseHeaderPrecedes( - file, "", xmlTagPattern, xmlCommentPattern, fileText, ratDocument, violations); + file, "", xmlTagPattern, xmlCommentPattern, fileText, violations); } } progress.completed(); @@ -168,22 +164,20 @@ private void checkLicenseHeaderPrecedes( Pattern contentPattern, Pattern commentPattern, String fileText, - FileDocument ratDocument, TreeSet violations) { Matcher contentMatcher = contentPattern.matcher(fileText); if (contentMatcher.find()) { int contentStartPos = contentMatcher.start(); Matcher commentMatcher = commentPattern.matcher(fileText); while (commentMatcher.find()) { - if (isLicense(file, commentMatcher.group(1), ratDocument)) { - if (commentMatcher.start() < contentStartPos) { - // This file is all good, so break the loop: - // license header precedes 'description' definition - break; - } else { - reportViolation( - violations, file, description + " declaration precedes license header"); - } + System.out.println("# " + file); + if (commentMatcher.start() < contentStartPos) { + // This file is all good, so break the loop: + // license header precedes 'description' definition + break; + } else { + reportViolation( + violations, file, description + " declaration precedes license header"); } } } @@ -195,26 +189,6 @@ private void reportViolation(TreeSet violations, File file, String name) violations.add(msg); } - // See LUCENE-10419 - rat is not thread safe. - private static final Object ratLockBug = new Object(); - private static final Splitter lineSplitter = Splitter.on(Pattern.compile("[\\r\\n]+")); - - private boolean isLicense(File file, String text, FileDocument ratDocument) { - synchronized (ratLockBug) { - var licenseMatcher = Defaults.createDefaultMatcher(); - licenseMatcher.reset(); - return lineSplitter.splitToList(text).stream() - .anyMatch( - it -> { - try { - return licenseMatcher.match(ratDocument, it); - } catch (RatHeaderAnalysisException e) { - throw new GradleException("Could not scan this file with rat: " + file, e); - } - }); - } - } - private static String readUtf8WithValidation(File file) { String fileText; CharsetDecoder validatingDecoder = diff --git a/build.gradle b/build.gradle index d95068898a4a..aa2ce376495c 100644 --- a/build.gradle +++ b/build.gradle @@ -32,7 +32,6 @@ plugins { id "lucene.validation.gradle-versions-cleanup" id "lucene.validation.error-prone" id "lucene.validation.jar-checks" - id "lucene.validation.rat-sources" id "lucene.publications.maven" id "lucene.publications.maven-to-nexus-releases" diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index b5e07215dfc4..6e046d430ed0 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -92,7 +92,6 @@ nekohtml = { module = "net.sourceforge.nekohtml:nekohtml", version.ref = "nekoht opennlp-tools = { module = "org.apache.opennlp:opennlp-tools", version.ref = "opennlp" } procfork = { module = "com.carrotsearch:procfork", version.ref = "procfork" } randomizedtesting-runner = { module = "com.carrotsearch.randomizedtesting:randomizedtesting-runner", version.ref = "randomizedtesting" } -rat = { module = "org.apache.rat:apache-rat", version.ref = "rat" } s2-geometry = { module = "io.sgr:s2-geometry-library-java", version.ref = "s2-geometry" } spatial4j = { module = "org.locationtech.spatial4j:spatial4j", version.ref = "spatial4j" } xerces = { module = "xerces:xercesImpl", version.ref = "xerces" } diff --git a/lucene/benchmark/build.gradle b/lucene/benchmark/build.gradle index 7acf26eeadda..4e98fc344d8d 100644 --- a/lucene/benchmark/build.gradle +++ b/lucene/benchmark/build.gradle @@ -15,8 +15,6 @@ * limitations under the License. */ -import org.apache.lucene.gradle.plugins.licenses.LicenceCheckTask - plugins { id "java" } @@ -51,8 +49,6 @@ dependencies { moduleRuntimeOnly project(':lucene:analysis:icu') moduleTestImplementation project(':lucene:test-framework') - - moduleImplementation "org.apache.rat:apache-rat-core:0.16.1" } // We add 'conf' to resources because we validate *.alg script correctness in one of the tests. @@ -94,9 +90,3 @@ tasks.register("run", JavaExec, { suspend = true } }) - -tasks.register("foo", LicenceCheckTask.class, {task -> - task.getInputFileTrees().from(project.fileTree("src", { - include "**/*.java" - })) -}) diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/LicenseCheckDummy.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/LicenseCheckDummy.java deleted file mode 100644 index edad6a72a1f0..000000000000 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/LicenseCheckDummy.java +++ /dev/null @@ -1,119 +0,0 @@ -package org.apache.lucene.benchmark; - -import java.io.OutputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Arrays; -import java.util.List; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; -import java.util.stream.Collectors; -import org.apache.rat.*; -import org.apache.rat.analysis.IHeaderMatcher; -import org.apache.rat.analysis.matchers.CopyrightMatcher; -import org.apache.rat.analysis.matchers.OrMatcher; -import org.apache.rat.analysis.matchers.SimpleTextMatcher; -import org.apache.rat.api.RatException; -import org.apache.rat.document.impl.FileDocument; -import org.apache.rat.license.ILicense; -import org.apache.rat.license.ILicenseFamily; -import org.apache.rat.report.IReportable; -import org.apache.rat.report.RatReport; -import org.apache.rat.report.claim.ClaimStatistic; -import org.apache.rat.report.claim.impl.ClaimAggregator; -import org.apache.rat.utils.DefaultLog; - -public class LicenseCheckDummy { - public static void main(String[] args) throws Exception { - final ReportConfiguration configuration = new ReportConfiguration(DefaultLog.INSTANCE); - - Defaults.Builder defaultBuilder = Defaults.builder(); - Defaults defaults = defaultBuilder.build(); - configuration.setFrom(defaults); - - SortedSet family = - new TreeSet<>( - Set.of( - ILicenseFamily.builder() - .setLicenseFamilyCategory("BSD") - .setLicenseFamilyName("BSD license") - .build())); - configuration.addLicense( - ILicense.builder() - .setId("xyz") - .setName("Name xyz") - .setLicenseFamilyCategory("BSD") - .setMatcher(IHeaderMatcher.Builder.text().setText("license xyz").build()) - .build(family)); - - configuration.setStyleReport(false); - - List paths; - try (var s = Files.walk(Paths.get("/home/dweiss/tmp/rat/"))) { - paths = s.filter(p -> Files.isRegularFile(p)).toList(); - } - - configuration.setReportable( - new IReportable() { - @Override - public void run(RatReport report) throws RatException { - for (var path : paths) { - report.report(new FileDocument(path.toFile())); - } - } - }); - - final IHeaderMatcher asf1Matcher = - new SimpleTextMatcher("http://www.apache.org/licenses/LICENSE-2.0"); - final IHeaderMatcher asf2Matcher = new SimpleTextMatcher("https://www.apache.org/licenses/LICENSE-2.0.txt"); - final IHeaderMatcher asfMatcher = new OrMatcher(Arrays.asList(asf1Matcher, asf2Matcher)); - - final IHeaderMatcher qosMatcher = new CopyrightMatcher("2004", "2011", "QOS.ch"); - final ILicense qosLic = new TestingLicense("QOS", qosMatcher); - - IDocumentAnalyser analyser = DefaultAnalyserFactory.createDefaultAnalyser(DefaultLog.INSTANCE,Arrays.asList(asfLic, qosLic)); - final List reporters = new ArrayList<>(); - reporters.add(reporter); - report = new ClaimReporterMultiplexer(analyser, reporters); - - configuration.setOut(OutputStream::nullOutputStream); - ClaimStatistic statistic = new ClaimStatistic(); - var report = new ClaimAggregator(statistic); - report.startReport(); - configuration.getReportable().run(report); - report.endReport(); - - System.out.println("Approved: " + statistic.getNumApproved()); - System.out.println("Unapproved: " + statistic.getNumUnApproved()); - System.out.println("Unknown: " + statistic.getNumUnknown()); - - System.out.println("Document categories:"); - System.out.println( - statistic.getDocumentCategoryMap().entrySet().stream() - .map(e -> e.getKey() + ": " + e.getValue()) - .collect(Collectors.joining("\n"))); - - System.out.println(); - System.out.println("License file names:"); - System.out.println( - statistic.getLicenseFileNameMap().entrySet().stream() - .map(e -> e.getKey() + ": " + e.getValue()) - .collect(Collectors.joining("\n"))); - - System.out.println(); - System.out.println("License codes:"); - System.out.println( - statistic.getLicenseFileCodeMap().entrySet().stream() - .map(e -> e.getKey() + ": " + e.getValue()) - .collect(Collectors.joining("\n"))); - - System.out.println(); - System.out.println("Doc categories:"); - System.out.println( - statistic.getDocumentCategoryMap().entrySet().stream() - .map(e -> e.getKey() + ": " + e.getValue()) - .collect(Collectors.joining("\n"))); - } -} From dd4f8cd4ee2627443021e7d50e5776148cf1f00a Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Tue, 16 Sep 2025 21:52:48 +0200 Subject: [PATCH 08/11] Added a custom check licenses task. Added an initial set of licenses. --- .../gradle/plugins/hacks/HacksPlugin.java | 4 +- .../plugins/licenses/CheckLicensesPlugin.java | 28 ++ .../plugins/licenses/CheckLicensesTask.java | 255 ++++++++++++++++++ .../plugins/misc/RootProjectSetupPlugin.java | 4 +- .../ValidateSourcePatternsPlugin.java | 210 --------------- build.gradle | 12 +- gradle/libs.versions.toml | 2 - lucene/benchmark/src/java/module-info.java | 2 +- 8 files changed, 297 insertions(+), 220 deletions(-) create mode 100644 build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesPlugin.java create mode 100644 build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesTask.java delete mode 100644 build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/spotless/ValidateSourcePatternsPlugin.java diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/hacks/HacksPlugin.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/hacks/HacksPlugin.java index 89967f68ac8a..b2d374bff5a5 100644 --- a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/hacks/HacksPlugin.java +++ b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/hacks/HacksPlugin.java @@ -24,7 +24,6 @@ import org.apache.lucene.gradle.plugins.LuceneGradlePlugin; import org.apache.lucene.gradle.plugins.java.EcjLintPlugin; import org.apache.lucene.gradle.plugins.misc.CheckGradlewScriptsTweakedPlugin; -import org.apache.lucene.gradle.plugins.spotless.ValidateSourcePatternsPlugin; import org.gradle.api.Project; /** This applies various odd hacks that we probably should not need. */ @@ -56,8 +55,7 @@ private void addDummyOutputs(Project project) { task -> { var taskName = task.getName(); return taskName.startsWith(EcjLintPlugin.TASK_PREFIX) - || taskName.equals(CheckGradlewScriptsTweakedPlugin.TASK_NAME) - || taskName.equals(ValidateSourcePatternsPlugin.TASK_NAME); + || taskName.equals(CheckGradlewScriptsTweakedPlugin.TASK_NAME); }) .configureEach( task -> { diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesPlugin.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesPlugin.java new file mode 100644 index 000000000000..5c1f6cc94262 --- /dev/null +++ b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesPlugin.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.gradle.plugins.licenses; + +import org.apache.lucene.gradle.plugins.LuceneGradlePlugin; +import org.gradle.api.Project; + +/** This configures ASL and other license checks. */ +public class CheckLicensesPlugin extends LuceneGradlePlugin { + @Override + public void apply(Project project) { + applicableToRootProjectOnly(project); + } +} diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesTask.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesTask.java new file mode 100644 index 000000000000..eb9135f8c8e8 --- /dev/null +++ b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesTask.java @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.gradle.plugins.licenses; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.gradle.api.DefaultTask; +import org.gradle.api.GradleException; +import org.gradle.api.file.ConfigurableFileCollection; +import org.gradle.api.file.RegularFileProperty; +import org.gradle.api.provider.Property; +import org.gradle.api.tasks.*; +import org.gradle.work.FileChange; +import org.gradle.work.Incremental; +import org.gradle.work.InputChanges; + +/** + * This task takes a set of files as input and verifies if their header contains any of the known + * license patterns. Files that don't have any permitted licenses will trigger an error. + */ +@CacheableTask +public abstract class CheckLicensesTask extends DefaultTask { + /** The default number of leading characters scanned in each file. */ + private static final Integer DEFAULT_SCANNED_HEADER = 1024; + + private record LicenseFamily(String code, String name, Predicate matcherPredicate) {} + + private static List luceneAcceptedLicenses = + List.of( + new LicenseFamily( + "ASL", + "Apache Software License 2.0", + fixedSubstring("http://www.apache.org/licenses/LICENSE-2.0")), + new LicenseFamily( + "MIT", + "The MIT License", + // ICU license (ScriptIterator.java) + fixedSubstring( + "Permission is hereby granted, free of charge, to any person obtaining a copy") + .and( + fixedSubstring( + "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR"))), + new LicenseFamily( + "BSD", + "Modified BSD License", + fixedSubstrings( + // brics automaton + "Copyright (c) 2001-2009 Anders Moeller", + // snowball + "Copyright (c) 2001, Dr Martin Porter", + // UMASS kstem + "THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS", + // Egothor + "Egothor Software License version 1.00", + // JaSpell + "Copyright (c) 2005 Bruno Martins", + // lz4 + "Copyright (c) 2011-2016, Yann Collet")), + new LicenseFamily( + "GENERATED", + "Generated files", + fixedSubstrings( + // snowball + "Generated by Snowball", + // javacc + "Generated By:JavaCC"))); + + /** The number of leading characters scanned in each file. */ + @Input + public abstract Property getMaxScannedHeaderSize(); + + @InputFiles + @Incremental + @PathSensitive(PathSensitivity.RELATIVE) + public abstract ConfigurableFileCollection getFiles(); + + /** + * An output report file to keep up-to-date checks sane. The output file is a sorted list of all + * input files and their license codes. + */ + @OutputFile + public abstract RegularFileProperty getReportFile(); + + public CheckLicensesTask() { + getMaxScannedHeaderSize().convention(DEFAULT_SCANNED_HEADER); + } + + @TaskAction + public void run(InputChanges changes) throws IOException { + // load the current report (if any) into a sorted map + File reportFile = getReportFile().getAsFile().get(); + TreeMap report = readExistingReport(reportFile); + + // update the report with changes. + List missingLicense = new ArrayList<>(); + char[] scratch = new char[1024]; + StringBuilder buffer = new StringBuilder(); + for (FileChange fc : changes.getFileChanges(getFiles())) { + File file = fc.getFile(); + if (file.isDirectory()) { + continue; + } + + LicenseFamily licenseFamily = detectLicense(file, buffer, scratch); + if (licenseFamily == null) { + missingLicense.add(file); + } else { + String key = toRootRelative(file); + + switch (fc.getChangeType()) { + case REMOVED -> report.remove(key); + case ADDED, MODIFIED -> report.put(key, licenseFamily.code); + default -> throw new IOException("Unexpected change type: " + fc.getChangeType()); + } + } + } + + if (!missingLicense.isEmpty()) { + throw new GradleException( + String.format( + Locale.ROOT, + "The following files have no (acceptable) license header or the license header is not within the first %s header characters:\n%s", + getMaxScannedHeaderSize().get(), + missingLicense.stream() + .map(file -> " - " + toRootRelative(file)) + .collect(Collectors.joining("\n")))); + } + + // Also, in case the input set of files has itself changed, ensure the report only + // contains files that still exist under the current sources. + Set current = new HashSet<>(); + for (File f : getFiles().getFiles()) { + if (f.isFile()) { + current.add(toRootRelative(f)); + } + } + report.keySet().removeIf(k -> !current.contains(k)); + + writeReport(report, reportFile); + } + + private TreeMap readExistingReport(File reportFile) throws IOException { + TreeMap report = new TreeMap<>(); + if (!reportFile.exists()) { + return report; + } + + try (var lineStream = Files.lines(reportFile.toPath())) { + lineStream.forEach( + line -> { + int idx = line.lastIndexOf(':'); + String path = line.substring(0, idx).trim(); + String state = line.substring(idx + 1).trim(); + if (!path.isEmpty() && !state.isEmpty()) { + report.put(path, state); + } + }); + } + + return report; + } + + private void writeReport(TreeMap report, File reportFile) throws IOException { + Files.createDirectories(reportFile.toPath().getParent()); + + try (BufferedWriter bw = Files.newBufferedWriter(reportFile.toPath())) { + for (Map.Entry e : report.entrySet()) { + bw.write(e.getKey()); + bw.write(": "); + bw.write(e.getValue()); + bw.write("\n"); + } + } + } + + /** Convert the given path to a root-project-relative path with unix path separators. */ + private String toRootRelative(File file) { + Path root = getProject().getRootDir().toPath().normalize(); + Path p = file.toPath().toAbsolutePath().normalize(); + String key = root.relativize(p).toString(); + if (File.separatorChar != '/') { + key = key.replace(File.separatorChar, '/'); + } + return key; + } + + private LicenseFamily detectLicense(File file, StringBuilder buffer, char[] scratch) + throws IOException { + // I assume all files are in UTF8... This is verified elsewhere (eclint). + + buffer.setLength(0); + try (var reader = + new InputStreamReader(Files.newInputStream(file.toPath()), StandardCharsets.UTF_8)) { + + int maxChars = getMaxScannedHeaderSize().get(); + while (maxChars > 0) { + int readChars = reader.read(scratch); + if (readChars < 0) { + break; + } + buffer.append(scratch, 0, readChars); + maxChars -= readChars; + } + + String header = buffer.toString(); + + for (var licenseFamily : luceneAcceptedLicenses) { + if (licenseFamily.matcherPredicate.test(header)) { + return licenseFamily; + } + } + + return null; + } + } + + /** Matches a fixed substring. */ + private static Predicate fixedSubstring(String substring) { + return header -> header.contains(substring); + } + + /** Any of the provided substrings. */ + private static Predicate fixedSubstrings(String... otherSubstrings) { + return anyOf(Stream.of(otherSubstrings).map(CheckLicensesTask::fixedSubstring).toList()); + } + + private static Predicate anyOf(Predicate... list) { + return anyOf(List.of(list)); + } + + private static Predicate anyOf(List> list) { + return list.stream().reduce(v -> false, Predicate::or); + } +} diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/misc/RootProjectSetupPlugin.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/misc/RootProjectSetupPlugin.java index 3e44576a7d79..08349c1a247d 100644 --- a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/misc/RootProjectSetupPlugin.java +++ b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/misc/RootProjectSetupPlugin.java @@ -31,9 +31,9 @@ import org.apache.lucene.gradle.plugins.help.BuildOptionGroupsPlugin; import org.apache.lucene.gradle.plugins.ide.EclipseSupportPlugin; import org.apache.lucene.gradle.plugins.ide.IdeaSupportPlugin; +import org.apache.lucene.gradle.plugins.licenses.CheckLicensesPlugin; import org.apache.lucene.gradle.plugins.regenerate.RegenerateTasksSupportPlugin; import org.apache.lucene.gradle.plugins.spotless.GradleGroovyFormatPlugin; -import org.apache.lucene.gradle.plugins.spotless.ValidateSourcePatternsPlugin; import org.gradle.api.Project; import org.gradle.api.file.DuplicatesStrategy; import org.gradle.api.initialization.IncludedBuild; @@ -76,7 +76,7 @@ public void apply(Project rootProject) { plugins.apply(HacksPlugin.class); plugins.apply(WipeGradleTempPlugin.class); plugins.apply(GradleGroovyFormatPlugin.class); - plugins.apply(ValidateSourcePatternsPlugin.class); + plugins.apply(CheckLicensesPlugin.class); plugins.apply(ConfigureLockFilePlugin.class); plugins.apply(CheckGradlewScriptsTweakedPlugin.class); diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/spotless/ValidateSourcePatternsPlugin.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/spotless/ValidateSourcePatternsPlugin.java deleted file mode 100644 index ffe46f821a6e..000000000000 --- a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/spotless/ValidateSourcePatternsPlugin.java +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.gradle.plugins.spotless; - -import com.google.common.base.Splitter; -import java.io.File; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.StringWriter; -import java.nio.charset.CharsetDecoder; -import java.nio.charset.CodingErrorAction; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.util.Locale; -import java.util.Set; -import java.util.TreeSet; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.stream.Collectors; -import javax.inject.Inject; -import org.apache.lucene.gradle.plugins.LuceneGradlePlugin; -import org.gradle.api.DefaultTask; -import org.gradle.api.GradleException; -import org.gradle.api.Project; -import org.gradle.api.file.ConfigurableFileTree; -import org.gradle.api.tasks.CacheableTask; -import org.gradle.api.tasks.IgnoreEmptyDirectories; -import org.gradle.api.tasks.InputFiles; -import org.gradle.api.tasks.PathSensitive; -import org.gradle.api.tasks.PathSensitivity; -import org.gradle.api.tasks.TaskAction; -import org.gradle.api.tasks.compile.JavaCompile; -import org.gradle.internal.logging.progress.ProgressLogger; -import org.gradle.internal.logging.progress.ProgressLoggerFactory; - -/** Checks for invalid usage patterns in source files. */ -public class ValidateSourcePatternsPlugin extends LuceneGradlePlugin { - public static final String TASK_NAME = "validateSourcePatterns"; - - @Override - public void apply(Project project) { - applicableToRootProjectOnly(project); - - project.allprojects(this::configureProject); - } - - private void configureProject(Project project) { - var validateSourcePatternsTask = - project - .getTasks() - .register( - TASK_NAME, - ValidateSourcePatternsTask.class, - task -> { - task.setGroup("Verification"); - task.setDescription("Validate Source Patterns"); - - ConfigurableFileTree sourceFiles = task.getSourceFiles(); - sourceFiles.setDir(project.getLayout().getProjectDirectory()); - - // it seems we only scan XML files - everything else has been moved - // to rat scanning or elsewhere. - sourceFiles.include("**/*.xml"); - - // default excludes. - sourceFiles.exclude("**/build/**"); - sourceFiles.exclude("**/.idea/**"); - sourceFiles.exclude("**/.gradle/**"); - sourceFiles.exclude("**/.git/**"); - - // Don't go into subproject folders (each is scanned individually). - sourceFiles.exclude( - project.getChildProjects().keySet().stream() - .map(name -> name + "/**") - .toList()); - }); - - // Add to all checks. - project.getTasks().named("check").configure(task -> task.dependsOn(validateSourcePatternsTask)); - - // Ensure validation runs prior to any compilation task. - project - .getTasks() - .withType(JavaCompile.class) - .configureEach( - task -> { - task.mustRunAfter(validateSourcePatternsTask); - }); - - // project-specific tuning. - project - .project(":lucene:benchmark") - .getTasks() - .withType(ValidateSourcePatternsTask.class) - .configureEach( - task -> { - task.getSourceFiles().exclude("data/**", "work/**"); - }); - } - - @CacheableTask - public abstract static class ValidateSourcePatternsTask extends DefaultTask { - @InputFiles - @PathSensitive(PathSensitivity.RELATIVE) - @IgnoreEmptyDirectories - public abstract ConfigurableFileTree getSourceFiles(); - - @Inject - protected abstract ProgressLoggerFactory getProgressLoggerFactory(); - - @TaskAction - public void check() { - Set files = getSourceFiles().getFiles(); - getLogger() - .info( - "Input files for scanning:\n{}", - files.stream().map(f -> " - " + f).collect(Collectors.joining("\n"))); - - var xmlCommentPattern = Pattern.compile("(?sm)\\Q\\E"); - var xmlTagPattern = Pattern.compile("(?m)\\s*<[a-zA-Z].*"); - var violations = new TreeSet(); - - ProgressLogger progress = getProgressLoggerFactory().newOperation(this.getClass()); - progress.start(this.getName(), this.getName()); - for (var file : files) { - progress.progress("Scanning " + file.getName()); - - String fileText = readUtf8WithValidation(file); - - if (file.getName().endsWith(".xml")) { - checkLicenseHeaderPrecedes( - file, "", xmlTagPattern, xmlCommentPattern, fileText, violations); - } - } - progress.completed(); - - if (!violations.isEmpty()) { - throw new GradleException( - String.format( - Locale.ROOT, - "Found %d source violation(s):\n %s", - violations.size(), - String.join("\n ", violations))); - } - } - - private void checkLicenseHeaderPrecedes( - File file, - String description, - Pattern contentPattern, - Pattern commentPattern, - String fileText, - TreeSet violations) { - Matcher contentMatcher = contentPattern.matcher(fileText); - if (contentMatcher.find()) { - int contentStartPos = contentMatcher.start(); - Matcher commentMatcher = commentPattern.matcher(fileText); - while (commentMatcher.find()) { - System.out.println("# " + file); - if (commentMatcher.start() < contentStartPos) { - // This file is all good, so break the loop: - // license header precedes 'description' definition - break; - } else { - reportViolation( - violations, file, description + " declaration precedes license header"); - } - } - } - } - - private void reportViolation(TreeSet violations, File file, String name) { - String msg = String.format(Locale.ROOT, "%s: %s", file, name); - getLogger().error(msg); - violations.add(msg); - } - - private static String readUtf8WithValidation(File file) { - String fileText; - CharsetDecoder validatingDecoder = - StandardCharsets.UTF_8 - .newDecoder() - .onMalformedInput(CodingErrorAction.REPORT) - .onUnmappableCharacter(CodingErrorAction.REPORT); - try (var is = Files.newInputStream(file.toPath()); - var sw = new StringWriter(); - var reader = new InputStreamReader(is, validatingDecoder)) { - reader.transferTo(sw); - fileText = sw.toString(); - } catch (IOException e) { - throw new GradleException("Could not read: " + file, e); - } - return fileText; - } - } -} diff --git a/build.gradle b/build.gradle index aa2ce376495c..e3f50b11c8aa 100644 --- a/build.gradle +++ b/build.gradle @@ -1,5 +1,4 @@ -import org.apache.lucene.gradle.plugins.misc.CheckEnvironmentPlugin -import org.apache.lucene.gradle.plugins.spotless.GoogleJavaFormatPlugin +import org.apache.lucene.gradle.plugins.licenses.CheckLicensesTask /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -60,3 +59,12 @@ plugins { } description = 'Grandparent project for Apache Lucene Core' + +tasks.register("checkLicenses", CheckLicensesTask, { + reportFile = project.layout.buildDirectory.file("licenses-report.txt"); + + files.from(fileTree(".", { + include "**/*.java" + exclude "**/build/**" + })) +}) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 6e046d430ed0..83e4b7d707fa 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -50,8 +50,6 @@ opennlp = "2.5.5" procfork = "1.0.6" # unit tests randomizedtesting = "2.8.3" -# license checks -rat = "0.15" # spatial-extras/ support s2-geometry = "1.0.0" # spatial-extras/ support diff --git a/lucene/benchmark/src/java/module-info.java b/lucene/benchmark/src/java/module-info.java index e8ae492b35e9..15b5d7b8c2e1 100644 --- a/lucene/benchmark/src/java/module-info.java +++ b/lucene/benchmark/src/java/module-info.java @@ -28,7 +28,7 @@ requires org.apache.commons.compress; requires nekohtml; requires com.ibm.icu; - requires apache.rat.core; + requires apache.rat.core; requires org.apache.commons.io; exports org.apache.lucene.benchmark; From 63c1a92966d617389d2dc21d7f3d66d4bcfad024 Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Wed, 17 Sep 2025 09:48:57 +0200 Subject: [PATCH 09/11] Move license check configuration to a plugin. --- .../lucene.validation.rat-sources.gradle_ | 256 ------------------ .../plugins/licenses/CheckLicensesPlugin.java | 98 +++++++ .../plugins/licenses/CheckLicensesTask.java | 57 +++- build.gradle | 9 - 4 files changed, 140 insertions(+), 280 deletions(-) delete mode 100644 build-tools/build-infra/src/main/groovy/lucene.validation.rat-sources.gradle_ diff --git a/build-tools/build-infra/src/main/groovy/lucene.validation.rat-sources.gradle_ b/build-tools/build-infra/src/main/groovy/lucene.validation.rat-sources.gradle_ deleted file mode 100644 index c9817a054cef..000000000000 --- a/build-tools/build-infra/src/main/groovy/lucene.validation.rat-sources.gradle_ +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import groovy.xml.NamespaceBuilder - -if (project != project.rootProject) { - throw new GradleException("Applicable to rootProject only: " + project.path) -} - -// Configure the rat validation task and all scanned directories. -allprojects { - configurations { - ratDeps - } - - dependencies { - ratDeps deps.rat - } - - def ratTask = tasks.register("rat", RatTask, { - group = 'Verification' - description = 'Runs Apache Rat checks.' - - dependsOn configurations.ratDeps - - def defaultScanFileTree = project.fileTree(projectDir, { - // Don't check under the project's build folder. - exclude project.layout.buildDirectory.asFile.get().name - - // Exclude any generated stuff. - exclude "src/generated" - - // Don't check any of the subprojects - they have their own rat tasks. - if (project.path == ":") { - exclude "lucene" - exclude "**/build" - include "build-tools/build-infra/**" - } else { - exclude subprojects.collect { it.projectDir.name } - } - - // At the module scope we only check selected file patterns as folks have various .gitignore-d resources - // generated by IDEs, etc. - include "**/*.gradle" - include "**/*.xml" - include "**/*.md" - include "**/*.py" - include "**/*.sh" - include "**/*.bat" - - // Any .gradle folder in composite or top-level project. - exclude "**/.gradle/**" - - // Include selected patterns from any source folders. We could make this - // relative to source sets but it seems to be of little value - all our source sets - // live under 'src' anyway. - include "src/**" - exclude "src/**/*.png" - exclude "src/**/*.txt" - exclude "src/**/*.zip" - exclude "src/**/*.properties" - exclude "src/**/*.utf8" - - // Conditionally apply module-specific patterns. We do it here instead - // of reconfiguring each project because the provider can be made lazy - // and it's easier to manage this way. - switch (project.path) { - case ":": - include "gradlew" - include "gradlew.bat" - exclude ".gradle" - exclude ".idea" - exclude ".muse" - exclude ".git" - - // Exclude github stuff (templates, workflows). - exclude ".github" - - // do not let RAT attempt to scan a python venv, it gets lost and confused... - exclude "dev-tools/scripts/.venv/**" - break - - case ":lucene:analysis:morfologik": - exclude "src/**/*.info" - exclude "src/**/*.input" - break - - case ":lucene:analysis:opennlp": - exclude "src/**/en-test-lemmas.dict" - break - - case ":lucene:demo": - exclude "src/**/knn-token-vectors" - break - - case ":lucene:test-framework": - exclude "src/**/europarl.lines.txt.seek" - break - - case ":lucene:analysis:common": - case ":lucene:analysis.tests": - exclude "src/**/*.aff" - exclude "src/**/*.dic" - exclude "src/**/*.good" - exclude "src/**/*.sug" - exclude "src/**/*.wrong" - exclude "src/**/charfilter/*.htm*" - exclude "src/**/*LuceneResourcesWikiPage.html" - exclude "src/**/*.rslp" - break - - case ":lucene:build-tools:build-infra-shadow": - exclude "src/**/keep.me" - break - - case ":lucene:benchmark": - exclude "data/" - break - } - }) - inputFileTrees.add(defaultScanFileTree) - }) - - tasks.named("check").configure { - dependsOn ratTask - } -} - -/** - * An Apache RAT adapter that validates whether files contain acceptable licenses. - */ -@CacheableTask -class RatTask extends DefaultTask { - - @InputFiles - @PathSensitive(PathSensitivity.RELATIVE) - @IgnoreEmptyDirectories - final ListProperty inputFileTrees = project.objects.listProperty(ConfigurableFileTree) - - @OutputFile - final RegularFileProperty xmlReport = project.objects.fileProperty().convention( - project.layout.buildDirectory.file("rat/rat-report.xml")) - - def generateReport(File reportFile) { - // Set up ant rat task. - def ratClasspath = project.configurations.ratDeps.asPath - ant.setLifecycleLogLevel(AntBuilder.AntMessagePriority.ERROR) - ant.taskdef(resource: 'org/apache/rat/anttasks/antlib.xml', classpath: ratClasspath) - - // Collect all output files for debugging. - String inputFileList = inputFileTrees.get().collectMany { fileTree -> - fileTree.asList() - }.sort().join("\n") - project.file(reportFile.path.replaceAll('.xml$', '-filelist.txt')).setText(inputFileList, "UTF-8") - - // Run rat via ant. - ant.report(format: 'xml', reportFile: reportFile, addDefaultLicenseMatchers: true) { - // Pass all gradle file trees to the ant task (Gradle's internal adapters are used). - inputFileTrees.get().each { fileTree -> - fileTree.addToAntBuilder(ant, 'resources', FileCollection.AntType.ResourceCollection) - } - - // BSD 4-clause stuff (is disallowed below) - substringMatcher(licenseFamilyCategory: "BSD4 ", licenseFamilyName: "Original BSD License (with advertising clause)") { - pattern(substring: "All advertising materials") - } - - // BSD-like stuff - substringMatcher(licenseFamilyCategory: "BSD ", licenseFamilyName: "Modified BSD License") { - // brics automaton - pattern(substring: "Copyright (c) 2001-2009 Anders Moeller") - // snowball - pattern(substring: "Copyright (c) 2001, Dr Martin Porter") - // UMASS kstem - pattern(substring: "THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS") - // Egothor - pattern(substring: "Egothor Software License version 1.00") - // JaSpell - pattern(substring: "Copyright (c) 2005 Bruno Martins") - // d3.js - pattern(substring: "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS") - // highlight.js - pattern(substring: "THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS") - } - - // MIT-like - substringMatcher(licenseFamilyCategory: "MIT ", licenseFamilyName:"Modified BSD License") { - // ICU license - pattern(substring: "Permission is hereby granted, free of charge, to any person obtaining a copy") - // ui-grid - pattern(substring: " ; License: MIT") - } - - // Apache - substringMatcher(licenseFamilyCategory: "AL ", licenseFamilyName: "Apache") { - pattern(substring: "Licensed to the Apache Software Foundation (ASF) under") - // this is the old - school one under some files - pattern(substring: 'Licensed under the Apache License, Version 2.0 (the "License")') - } - - substringMatcher(licenseFamilyCategory: "GEN ", licenseFamilyName: "Generated") { - // svg files generated by gnuplot - pattern(substring: "Produced by GNUPLOT") - // snowball stemmers generated by snowball compiler - pattern(substring: "Generated by Snowball") - } - - approvedLicense(familyName: "Apache") - approvedLicense(familyName: "The MIT License") - approvedLicense(familyName: "Modified BSD License") - approvedLicense(familyName: "Generated") - } - } - - def printUnknownFiles(File reportFile) { - def ratXml = new XmlParser().parse(reportFile) - def errors = [] - ratXml.resource.each { resource -> - if (resource.'license-approval'.@name[0] == "false") { - errors << "Unknown license: ${resource.@name}" - } - } - if (errors) { - throw new GradleException("Found " + errors.size() + " file(s) with errors:\n" + - errors.collect{ msg -> " - ${msg}" }.join("\n")) - } - } - - @TaskAction - def execute() { - def origEncoding = System.getProperty("file.encoding") - try { - File reportFile = xmlReport.get().asFile - generateReport(reportFile) - printUnknownFiles(reportFile) - } finally { - if (System.getProperty("file.encoding") != origEncoding) { - throw new GradleException("Something is wrong: Apache RAT changed file.encoding to ${System.getProperty('file.encoding')}?") - } - } - } -} diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesPlugin.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesPlugin.java index 5c1f6cc94262..628c4904cda4 100644 --- a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesPlugin.java +++ b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesPlugin.java @@ -24,5 +24,103 @@ public class CheckLicensesPlugin extends LuceneGradlePlugin { @Override public void apply(Project project) { applicableToRootProjectOnly(project); + + // Add check licenses task to the top-level project, configure it to scan all files, including + // those from subprojects. It's fast and simple. + var checkLicensesTask = + project + .getTasks() + .register("checkLicenses", CheckLicensesTask.class, this::configureCheckLicenses); + + // Link any 'check' task from any subproject to this top-level task. + project.subprojects( + subproject -> { + subproject + .getTasks() + .named("check") + .configure( + checkTask -> { + checkTask.dependsOn(checkLicensesTask); + }); + }); + } + + private void configureCheckLicenses(CheckLicensesTask task) { + Project project = task.getProject(); + + task.getReportFile().set(project.getLayout().getBuildDirectory().file("licenses-report.txt")); + task.getFiles() + .from( + project.fileTree( + ".", + tree -> { + // Exclude build outputs, ide files, .git. + tree.exclude(".git"); + tree.exclude(".idea"); + tree.exclude(".muse"); + tree.exclude("**/build/**"); + tree.exclude("**/.gradle"); + + // Exclude generated stuff. + tree.exclude("**/src/generated/**"); + + // Exclude github stuff (templates, workflows). + tree.exclude(".github"); + + // do not let RAT attempt to scan a python venv, it gets lost and confused... + tree.exclude("**/.venv/**"); + + // apache rat has a 'binary guesser'... I don't think this needs to be done at all + // - + // just exclude binaries here. + tree.exclude("**/*.dat"); + tree.exclude("**/*.brk"); + tree.exclude("**/*.gz"); + tree.exclude("**/*.bin"); + tree.exclude("**/*.bz2"); + tree.exclude("**/*.gif"); + tree.exclude("**/*.svg"); + tree.exclude("lucene/analysis/smartcn/src/**/*.mem"); + + // Only check these selected file patterns as folks have various .gitignore-d + // resources generated by IDEs, etc. + tree.include("**/*.gradle"); + tree.include("**/*.xml"); + tree.include("**/*.md"); + tree.include("**/*.py"); + tree.include("**/*.sh"); + tree.include("**/*.bat"); + + // Include selected patterns from any source folders. + tree.include("**/src/**"); + tree.exclude("**/src/**/*.png"); + tree.exclude("**/src/**/*.txt"); + tree.exclude("**/src/**/*.zip"); + tree.exclude("**/src/**/*.properties"); + tree.exclude("**/src/**/*.utf8"); + + // project-specific exclusions. + tree.exclude("build-tools/build-infra-shadow/src/java/keep.me"); + tree.exclude("lucene/analysis/icu/src/**/utr30.nrm"); + tree.exclude("lucene/analysis/kuromoji/src/**/bocchan.utf-8"); + tree.exclude("lucene/analysis/morfologik/src/**/*.info"); + tree.exclude("lucene/analysis/morfologik/src/**/*.input"); + tree.exclude("lucene/analysis/morfologik/src/**/*.dict"); + tree.exclude("lucene/analysis/stempel/src/**/*.tbl"); + tree.exclude("lucene/analysis/opennlp/src/**/en-test-lemmas.dict"); + tree.exclude("lucene/demo/src/**/knn-token-vectors"); + tree.exclude("lucene/test-framework/src/**/europarl.lines.txt.seek"); + tree.exclude("lucene/analysis/common/src/**/*.aff"); + tree.exclude("lucene/analysis/common/src/**/*.dic"); + tree.exclude("lucene/analysis/common/src/**/*.good"); + tree.exclude("lucene/analysis/common/src/**/*.sug"); + tree.exclude("lucene/analysis/common/src/**/*.wrong"); + tree.exclude("lucene/analysis/common/src/**/*.rslp"); + tree.exclude("lucene/analysis/common/src/**/*.htm*"); + tree.exclude("lucene/analysis.tests/src/**/*.aff"); + tree.exclude("lucene/analysis.tests/src/**/*.dic"); + // Luke has an embedded ElegantIcons font (MIT licensed). + tree.exclude("lucene/luke/src/**/ElegantIcons.ttf"); + })); } } diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesTask.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesTask.java index eb9135f8c8e8..e18584e5a5d3 100644 --- a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesTask.java +++ b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesTask.java @@ -51,7 +51,9 @@ private record LicenseFamily(String code, String name, Predicate matcher new LicenseFamily( "ASL", "Apache Software License 2.0", - fixedSubstring("http://www.apache.org/licenses/LICENSE-2.0")), + fixedSubstrings( + "http://www.apache.org/licenses/LICENSE-2.0", + "https://www.apache.org/licenses/LICENSE-2.0")), new LicenseFamily( "MIT", "The MIT License", @@ -108,6 +110,9 @@ public CheckLicensesTask() { @TaskAction public void run(InputChanges changes) throws IOException { + getLogger() + .info("Checking licenses {}", changes.isIncremental() ? "(incremental run)" : "(full run)"); + // load the current report (if any) into a sorted map File reportFile = getReportFile().getAsFile().get(); TreeMap report = readExistingReport(reportFile); @@ -116,26 +121,32 @@ public void run(InputChanges changes) throws IOException { List missingLicense = new ArrayList<>(); char[] scratch = new char[1024]; StringBuilder buffer = new StringBuilder(); + int count = 0; for (FileChange fc : changes.getFileChanges(getFiles())) { + count++; File file = fc.getFile(); if (file.isDirectory()) { continue; } - LicenseFamily licenseFamily = detectLicense(file, buffer, scratch); - if (licenseFamily == null) { - missingLicense.add(file); - } else { - String key = toRootRelative(file); - - switch (fc.getChangeType()) { - case REMOVED -> report.remove(key); - case ADDED, MODIFIED -> report.put(key, licenseFamily.code); - default -> throw new IOException("Unexpected change type: " + fc.getChangeType()); + String key = toRootRelative(file); + + switch (fc.getChangeType()) { + case REMOVED -> report.remove(key); + case ADDED, MODIFIED -> { + LicenseFamily licenseFamily = detectLicense(file, buffer, scratch); + if (licenseFamily == null) { + missingLicense.add(file); + } else { + report.put(key, licenseFamily.code); + } } + default -> throw new IOException("Unexpected change type: " + fc.getChangeType()); } } + getLogger().info("Checked {} {}", count, count == 1 ? "file" : "files"); + if (!missingLicense.isEmpty()) { throw new GradleException( String.format( @@ -157,6 +168,26 @@ public void run(InputChanges changes) throws IOException { } report.keySet().removeIf(k -> !current.contains(k)); + var logger = getLogger(); + if (logger.isInfoEnabled()) { + var counts = + report.entrySet().stream() + .collect(Collectors.groupingBy(Map.Entry::getValue, Collectors.counting())); + logger.info( + "License type counts:\n{}", + counts.entrySet().stream() + .sorted((a, b) -> Long.compare(b.getValue().longValue(), a.getValue().longValue())) + .map( + e -> + String.format( + Locale.ROOT, + " - %s: %,d %s", + e.getKey(), + e.getValue(), + e.getValue() == 1 ? "file" : "files")) + .collect(Collectors.joining("\n"))); + } + writeReport(report, reportFile); } @@ -245,10 +276,6 @@ private static Predicate fixedSubstrings(String... otherSubstrings) { return anyOf(Stream.of(otherSubstrings).map(CheckLicensesTask::fixedSubstring).toList()); } - private static Predicate anyOf(Predicate... list) { - return anyOf(List.of(list)); - } - private static Predicate anyOf(List> list) { return list.stream().reduce(v -> false, Predicate::or); } diff --git a/build.gradle b/build.gradle index e3f50b11c8aa..926c584bee7f 100644 --- a/build.gradle +++ b/build.gradle @@ -59,12 +59,3 @@ plugins { } description = 'Grandparent project for Apache Lucene Core' - -tasks.register("checkLicenses", CheckLicensesTask, { - reportFile = project.layout.buildDirectory.file("licenses-report.txt"); - - files.from(fileTree(".", { - include "**/*.java" - exclude "**/build/**" - })) -}) From f9f1a78566856fdef8b369f58a4d92bf7fc80256 Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Wed, 17 Sep 2025 09:53:54 +0200 Subject: [PATCH 10/11] Linter changes. --- .../plugins/licenses/CheckLicensesTask.java | 27 ++++++++++++++----- lucene/benchmark/src/java/module-info.java | 1 - 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesTask.java b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesTask.java index e18584e5a5d3..77c928e67c3c 100644 --- a/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesTask.java +++ b/build-tools/build-infra/src/main/java/org/apache/lucene/gradle/plugins/licenses/CheckLicensesTask.java @@ -17,11 +17,20 @@ package org.apache.lucene.gradle.plugins.licenses; -import java.io.*; +import java.io.BufferedWriter; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; -import java.util.*; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; import java.util.function.Predicate; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -30,7 +39,13 @@ import org.gradle.api.file.ConfigurableFileCollection; import org.gradle.api.file.RegularFileProperty; import org.gradle.api.provider.Property; -import org.gradle.api.tasks.*; +import org.gradle.api.tasks.CacheableTask; +import org.gradle.api.tasks.Input; +import org.gradle.api.tasks.InputFiles; +import org.gradle.api.tasks.OutputFile; +import org.gradle.api.tasks.PathSensitive; +import org.gradle.api.tasks.PathSensitivity; +import org.gradle.api.tasks.TaskAction; import org.gradle.work.FileChange; import org.gradle.work.Incremental; import org.gradle.work.InputChanges; @@ -46,7 +61,7 @@ public abstract class CheckLicensesTask extends DefaultTask { private record LicenseFamily(String code, String name, Predicate matcherPredicate) {} - private static List luceneAcceptedLicenses = + static final List LUCENE_ACCEPTED_LICENSES = List.of( new LicenseFamily( "ASL", @@ -256,7 +271,7 @@ private LicenseFamily detectLicense(File file, StringBuilder buffer, char[] scra String header = buffer.toString(); - for (var licenseFamily : luceneAcceptedLicenses) { + for (var licenseFamily : LUCENE_ACCEPTED_LICENSES) { if (licenseFamily.matcherPredicate.test(header)) { return licenseFamily; } @@ -277,6 +292,6 @@ private static Predicate fixedSubstrings(String... otherSubstrings) { } private static Predicate anyOf(List> list) { - return list.stream().reduce(v -> false, Predicate::or); + return list.stream().reduce(_ -> false, Predicate::or); } } diff --git a/lucene/benchmark/src/java/module-info.java b/lucene/benchmark/src/java/module-info.java index 15b5d7b8c2e1..f3f567031aff 100644 --- a/lucene/benchmark/src/java/module-info.java +++ b/lucene/benchmark/src/java/module-info.java @@ -28,7 +28,6 @@ requires org.apache.commons.compress; requires nekohtml; requires com.ibm.icu; - requires apache.rat.core; requires org.apache.commons.io; exports org.apache.lucene.benchmark; From c2e532694f356da6df03e842fd7e9ce9f93a7d0d Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Wed, 17 Sep 2025 10:00:07 +0200 Subject: [PATCH 11/11] Remove a require that creeped in somehow. --- lucene/benchmark/src/java/module-info.java | 1 - 1 file changed, 1 deletion(-) diff --git a/lucene/benchmark/src/java/module-info.java b/lucene/benchmark/src/java/module-info.java index f3f567031aff..e22470b71ecf 100644 --- a/lucene/benchmark/src/java/module-info.java +++ b/lucene/benchmark/src/java/module-info.java @@ -28,7 +28,6 @@ requires org.apache.commons.compress; requires nekohtml; requires com.ibm.icu; - requires org.apache.commons.io; exports org.apache.lucene.benchmark; exports org.apache.lucene.benchmark.byTask;