260 lines
12 KiB
Java

/*
* Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package doccheckutils.checkers;
import doccheckutils.FileChecker;
import doccheckutils.Log;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import jtreg.SkippedException;
public class TidyChecker implements FileChecker, AutoCloseable {
private final Path TIDY;
final Map<Pattern, Integer> counts = new HashMap<>();
final Pattern okPattern = Pattern.compile("No warnings or errors were found.");
final Pattern countPattern = Pattern.compile("([0-9]+) warnings, ([0-9]+) errors were found!.*?(Not all warnings/errors were shown.)?");
final Pattern countPattern2 = Pattern.compile("Tidy found ([0-9]+) warning[s]? and ([0-9]+) error[s]?!.*?(Not all warnings/errors were shown.)?");
final Pattern cssPattern = Pattern.compile("You are recommended to use CSS.*");
final Pattern guardPattern = Pattern.compile("(line [0-9]+ column [0-9]+ - |[^:]+:[0-9]+:[0-9]+: )(Error|Warning):.*");
final Pattern[] patterns = {
Pattern.compile(".*Error: <.*> is not recognized!"),
Pattern.compile(".*Error: missing quote mark for attribute value"),
Pattern.compile(".*Warning: '<' \\+ '/' \\+ letter not allowed here"),
Pattern.compile(".*Warning: <.*> anchor \".*\" already defined"),
Pattern.compile(".*Warning: <.*> attribute \".*\" has invalid value \".*\""),
Pattern.compile(".*Warning: <.*> attribute \".*\" lacks value"),
Pattern.compile(".*Warning: <.*> attribute \".*\" lacks value"),
Pattern.compile(".*Warning: <.*> attribute with missing trailing quote mark"),
Pattern.compile(".*Warning: <.*> dropping value \".*\" for repeated attribute \".*\""),
Pattern.compile(".*Warning: <.*> inserting \".*\" attribute"),
Pattern.compile(".*Warning: <.*> is probably intended as </.*>"),
Pattern.compile(".*Warning: <.*> isn't allowed in <.*> elements"),
Pattern.compile(".*Warning: <.*> lacks \".*\" attribute"),
Pattern.compile(".*Warning: <.*> missing '>' for end of tag"),
Pattern.compile(".*Warning: <.*> proprietary attribute \".*\""),
Pattern.compile(".*Warning: <.*> unexpected or duplicate quote mark"),
Pattern.compile(".*Warning: <a> id and name attribute value mismatch"),
Pattern.compile(".*Warning: <a> cannot copy name attribute to id"),
Pattern.compile(".*Warning: <a> escaping malformed URI reference"),
Pattern.compile(".*Warning: <blockquote> proprietary attribute \"pre\""),
Pattern.compile(".*Warning: discarding unexpected <.*>"),
Pattern.compile(".*Warning: discarding unexpected </.*>"),
Pattern.compile(".*Warning: entity \".*\" doesn't end in ';'"),
Pattern.compile(".*Warning: inserting implicit <.*>"),
Pattern.compile(".*Warning: inserting missing 'title' element"),
Pattern.compile(".*Warning: missing <!DOCTYPE> declaration"),
Pattern.compile(".*Warning: missing <.*>"),
Pattern.compile(".*Warning: missing </.*> before <.*>"),
Pattern.compile(".*Warning: nested emphasis <.*>"),
Pattern.compile(".*Warning: plain text isn't allowed in <.*> elements"),
Pattern.compile(".*Warning: removing whitespace preceding XML Declaration"),
Pattern.compile(".*Warning: replacing <p> (by|with) <br>"),
Pattern.compile(".*Warning: replacing invalid numeric character reference .*"),
Pattern.compile(".*Warning: replacing obsolete element <xmp> with <pre>"),
Pattern.compile(".*Warning: replacing unexpected .* (by|with) </.*>"),
Pattern.compile(".*Warning: trimming empty <.*>"),
Pattern.compile(".*Warning: unescaped & or unknown entity \".*\""),
Pattern.compile(".*Warning: unescaped & which should be written as &amp;"),
Pattern.compile(".*Warning: using <br> in place of <p>"),
Pattern.compile(".*Warning: <.*> element removed from HTML5"),
Pattern.compile(".*Warning: <.*> attribute \".*\" not allowed for HTML5"),
Pattern.compile(".*Warning: The summary attribute on the <table> element is obsolete in HTML5"),
Pattern.compile(".*Warning: replacing invalid UTF-8 bytes \\(char. code U\\+.*\\)")
};
private final Log errors;
private int files = 0;
private int ok;
private int warns;
private int errs;
private int css;
private int overflow;
public TidyChecker() {
TIDY = initTidy();
errors = new Log();
}
@Override
public void checkFiles(List<Path> sb) {
files += sb.size();
try {
for (int i = 0; i < sb.size(); i += 1024) {
List<String> command = new ArrayList<>();
command.add(TIDY.toString());
command.add("-q");
command.add("-e");
command.add("--gnu-emacs");
command.add("true");
List<Path> sublist = sb.subList(i, Math.min(i + 1024, sb.size()));
for (Path p : sublist) {
command.add(p.toString());
}
Process p = new ProcessBuilder()
.command(command)
.redirectErrorStream(true)
.start();
try (BufferedReader r =
new BufferedReader(new InputStreamReader(p.getInputStream(), StandardCharsets.UTF_8))) {
String line;
while ((line = r.readLine()) != null) {
checkLine(line);
}
}
}
} catch (IOException e) {
throw new RuntimeException();
}
}
private Path initTidy() {
Path tidyExePath;
String tidyProperty = System.getProperty("tidy");
if (tidyProperty != null) {
tidyExePath = Path.of(tidyProperty);
if (!Files.exists(tidyExePath)) {
System.err.println("tidy not found: " + tidyExePath);
}
if (!Files.isExecutable(tidyExePath)) {
System.err.println("tidy not executable: " + tidyExePath);
}
} else {
boolean isWindows = System.getProperty("os.name")
.toLowerCase(Locale.US)
.startsWith("windows");
String tidyExe = isWindows ? "tidy.exe" : "tidy";
Optional<Path> p = Stream.of(System.getenv("PATH")
.split(File.pathSeparator))
.map(Path::of)
.map(d -> d.resolve(tidyExe))
.filter(Files::exists)
.filter(Files::isExecutable)
.findFirst();
if (p.isPresent()) {
tidyExePath = p.get();
} else {
throw new jtreg.SkippedException("tidy not found on PATH");
}
}
try {
Process p = new ProcessBuilder()
.command(tidyExePath.toString(), "-version")
.redirectErrorStream(true)
.start();
try (BufferedReader r =
new BufferedReader(new InputStreamReader(p.getInputStream(), StandardCharsets.UTF_8))) {
List<String> lines = r.lines().collect(Collectors.toList());
// Look for a line containing "version" and a dotted identifier beginning 5.
// If not found, look for known old/bad versions, to report in error message
Pattern version = Pattern.compile("version.* [5678]\\.\\d+(\\.\\d+)");
if (lines.stream().noneMatch(line -> version.matcher(line).find())) {
Pattern oldVersion = Pattern.compile("2006"); // 2006 implies old macOS version
String lineSep = System.lineSeparator();
String message = lines.stream().anyMatch(line -> oldVersion.matcher(line).find())
? "old version of 'tidy' found on the PATH\n"
: "could not determine the version of 'tidy' on the PATH\n";
System.err.println(message + String.join(lineSep, lines));
}
}
} catch (IOException e) {
System.err.println("Could not execute 'tidy -version': " + e);
}
return tidyExePath;
}
@Override
public void report() {
if (files > 0) {
System.err.println("Tidy found errors in the generated HTML");
if (!errors.noErrors()) {
for (String s : errors.getErrors()) {
System.err.println(s);
}
System.err.println("Tidy output end.");
System.err.println();
System.err.println();
throw new RuntimeException("Tidy found errors in the generated HTML");
}
}
}
@Override
public boolean isOK() {
return (ok == files)
&& (overflow == 0)
&& (errs == 0)
&& (warns == 0)
&& (css == 0);
}
void checkLine(String line) {
Matcher m;
if (okPattern.matcher(line).matches()) {
ok++;
} else if ((m = countPattern.matcher(line)).matches() || (m = countPattern2.matcher(line)).matches()) {
warns += Integer.parseInt(m.group(1));
errs += Integer.parseInt(m.group(2));
if (m.group(3) != null)
overflow++;
} else if (guardPattern.matcher(line).matches()) {
boolean found = false;
for (Pattern p : patterns) {
if (p.matcher(line).matches()) {
errors.log("%s", line);
found = true;
count(p);
break;
}
}
if (!found)
errors.log("unrecognized line: " + line);
} else if (cssPattern.matcher(line).matches()) {
css++;
}
}
void count(Pattern p) {
Integer i = counts.get(p);
counts.put(p, (i == null) ? 1 : i + 1);
}
@Override
public void close() {
report();
}
}