Loading src/main/java/org/torproject/metrics/collector/webstats/LogDescriptorImpl.javadeleted 100644 → 0 +0 −117 Original line number Diff line number Diff line /* Copyright 2017--2018 The Tor Project * See LICENSE for licensing information */ package org.torproject.metrics.collector.webstats; import org.torproject.descriptor.DescriptorParseException; import org.torproject.descriptor.LogDescriptor; import java.io.ByteArrayInputStream; import java.io.File; import java.io.InputStream; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Base class for log descriptors. * * @since 2.2.0 */ public abstract class LogDescriptorImpl implements LogDescriptor { /** Logfile name parts separator. */ public static final String SEP = "_"; /** The log's file name should contain this string. */ public static final String MARKER = ".log"; private static Pattern filenamePattern = Pattern.compile( "(?:\\S*)" + MARKER + SEP + "(?:[0-9a-zA-Z]*)(?:\\.?)([a-zA-Z2]*)"); private final File descriptorFile; /** Byte array for plain, i.e. uncompressed, log data. */ private byte[] logBytes; private FileType fileType; private List<String> unrecognizedLines = new ArrayList<>(); /** * This constructor performs basic operations on the given bytes. * * <p>An unknown compression type (see {@link #getCompressionType}) * is interpreted as missing compression. In this case the bytes * will be compressed to the given compression type.</p> * * @since 2.2.0 */ protected LogDescriptorImpl(byte[] logBytes, File descriptorFile, String logName) throws DescriptorParseException { this.logBytes = logBytes; this.descriptorFile = descriptorFile; try { Matcher mat = filenamePattern.matcher(logName); if (!mat.find()) { throw new DescriptorParseException( "Log file name doesn't comply to standard: " + logName); } this.fileType = FileType.findType(mat.group(1).toUpperCase()); if (FileType.PLAIN == this.fileType) { this.fileType = FileType.XZ; this.logBytes = this.fileType.compress(this.logBytes); } } catch (Exception ex) { throw new DescriptorParseException("Cannot parse file " + logName + " from file " + descriptorFile.getName(), ex); } } @Override public InputStream decompressedByteStream() throws DescriptorParseException { try { return this.fileType.decompress(new ByteArrayInputStream(this.logBytes)); } catch (Exception ex) { throw new DescriptorParseException("Cannot provide deflated stream of " + this.descriptorFile + ".", ex); } } public String getCompressionType() { return this.fileType.name().toLowerCase(); } @Override public byte[] getRawDescriptorBytes() { return this.logBytes; } public void setRawDescriptorBytes(byte[] bytes) { this.logBytes = bytes; } @Override public int getRawDescriptorLength() { return this.logBytes.length; } @Override public List<String> getAnnotations() { return Collections.emptyList(); } @Override public List<String> getUnrecognizedLines() { return this.unrecognizedLines; } @Override public File getDescriptorFile() { return descriptorFile; } } src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java +1 −1 Original line number Diff line number Diff line Loading @@ -135,7 +135,7 @@ public class SanitizeWeblogs extends CollecTorMain { private void storeSortedAndForget(String virtualHost, String physicalHost, LocalDate date, Map<String, Long> lineCounts) { String name = new StringJoiner(LogDescriptorImpl.SEP) String name = new StringJoiner(WebServerAccessLogImpl.SEP) .add(virtualHost).add(physicalHost) .add(WebServerAccessLogImpl.MARKER) .add(date.format(DateTimeFormatter.BASIC_ISO_DATE)) Loading src/main/java/org/torproject/metrics/collector/webstats/WebServerAccessLogImpl.java +72 −10 Original line number Diff line number Diff line Loading @@ -7,13 +7,15 @@ import org.torproject.descriptor.DescriptorParseException; import org.torproject.descriptor.WebServerAccessLog; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.file.Paths; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; Loading @@ -26,8 +28,10 @@ import java.util.stream.Stream; * * @since 2.2.0 */ public class WebServerAccessLogImpl extends LogDescriptorImpl implements WebServerAccessLog { public class WebServerAccessLogImpl implements WebServerAccessLog { /** Logfile name parts separator. */ public static final String SEP = "_"; /** The log's name should include this string. */ public static final String MARKER = "access.log"; Loading @@ -37,6 +41,15 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl = Pattern.compile("(\\S*)" + SEP + "(\\S*)" + SEP + "" + MARKER + SEP + "(\\d*)(?:\\.?)([a-zA-Z]*)"); private final File descriptorFile; /** Byte array for plain, i.e. uncompressed, log data. */ private byte[] logBytes; private FileType fileType; private List<String> unrecognizedLines = new ArrayList<>(); private final String physicalHost; private final String virtualHost; Loading @@ -58,25 +71,31 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl * The immediate parent name is taken to be the physical host collecting the * logs.</p> */ protected WebServerAccessLogImpl(byte[] logBytes, File file, String logName) throws DescriptorParseException { super(logBytes, file, logName); protected WebServerAccessLogImpl(byte[] logBytes, File descriptorFile, String logName) throws DescriptorParseException { this.logBytes = logBytes; this.descriptorFile = descriptorFile; try { String fn = Paths.get(logName).getFileName().toString(); Matcher mat = filenamePattern.matcher(fn); Matcher mat = filenamePattern.matcher(logName); if (!mat.find()) { throw new DescriptorParseException( "WebServerAccessLog file name doesn't comply to standard: " + fn); "Log file name doesn't comply to standard: " + logName); } this.virtualHost = mat.group(1); this.physicalHost = mat.group(2); if (null == this.virtualHost || null == this.physicalHost || this.virtualHost.isEmpty() || this.physicalHost.isEmpty()) { throw new DescriptorParseException( "WebServerAccessLog file name doesn't comply to standard: " + fn); "WebServerAccessLog file name doesn't comply to standard: " + logName); } String ymd = mat.group(3); this.logDate = LocalDate.parse(ymd, DateTimeFormatter.BASIC_ISO_DATE); this.fileType = FileType.findType(mat.group(4).toUpperCase()); if (FileType.PLAIN == this.fileType) { this.fileType = FileType.XZ; this.logBytes = this.fileType.compress(this.logBytes); } } catch (DescriptorParseException dpe) { throw dpe; // escalate } catch (Exception pe) { Loading @@ -85,6 +104,49 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl } } @Override public InputStream decompressedByteStream() throws DescriptorParseException { try { return this.fileType.decompress(new ByteArrayInputStream(this.logBytes)); } catch (Exception ex) { throw new DescriptorParseException("Cannot provide deflated stream of " + this.descriptorFile + ".", ex); } } public String getCompressionType() { return this.fileType.name().toLowerCase(); } @Override public byte[] getRawDescriptorBytes() { return this.logBytes; } public void setRawDescriptorBytes(byte[] bytes) { this.logBytes = bytes; } @Override public int getRawDescriptorLength() { return this.logBytes.length; } @Override public List<String> getAnnotations() { return Collections.emptyList(); } @Override public List<String> getUnrecognizedLines() { return this.unrecognizedLines; } @Override public File getDescriptorFile() { return descriptorFile; } @Override public String getPhysicalHost() { return this.physicalHost; Loading Loading
src/main/java/org/torproject/metrics/collector/webstats/LogDescriptorImpl.javadeleted 100644 → 0 +0 −117 Original line number Diff line number Diff line /* Copyright 2017--2018 The Tor Project * See LICENSE for licensing information */ package org.torproject.metrics.collector.webstats; import org.torproject.descriptor.DescriptorParseException; import org.torproject.descriptor.LogDescriptor; import java.io.ByteArrayInputStream; import java.io.File; import java.io.InputStream; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Base class for log descriptors. * * @since 2.2.0 */ public abstract class LogDescriptorImpl implements LogDescriptor { /** Logfile name parts separator. */ public static final String SEP = "_"; /** The log's file name should contain this string. */ public static final String MARKER = ".log"; private static Pattern filenamePattern = Pattern.compile( "(?:\\S*)" + MARKER + SEP + "(?:[0-9a-zA-Z]*)(?:\\.?)([a-zA-Z2]*)"); private final File descriptorFile; /** Byte array for plain, i.e. uncompressed, log data. */ private byte[] logBytes; private FileType fileType; private List<String> unrecognizedLines = new ArrayList<>(); /** * This constructor performs basic operations on the given bytes. * * <p>An unknown compression type (see {@link #getCompressionType}) * is interpreted as missing compression. In this case the bytes * will be compressed to the given compression type.</p> * * @since 2.2.0 */ protected LogDescriptorImpl(byte[] logBytes, File descriptorFile, String logName) throws DescriptorParseException { this.logBytes = logBytes; this.descriptorFile = descriptorFile; try { Matcher mat = filenamePattern.matcher(logName); if (!mat.find()) { throw new DescriptorParseException( "Log file name doesn't comply to standard: " + logName); } this.fileType = FileType.findType(mat.group(1).toUpperCase()); if (FileType.PLAIN == this.fileType) { this.fileType = FileType.XZ; this.logBytes = this.fileType.compress(this.logBytes); } } catch (Exception ex) { throw new DescriptorParseException("Cannot parse file " + logName + " from file " + descriptorFile.getName(), ex); } } @Override public InputStream decompressedByteStream() throws DescriptorParseException { try { return this.fileType.decompress(new ByteArrayInputStream(this.logBytes)); } catch (Exception ex) { throw new DescriptorParseException("Cannot provide deflated stream of " + this.descriptorFile + ".", ex); } } public String getCompressionType() { return this.fileType.name().toLowerCase(); } @Override public byte[] getRawDescriptorBytes() { return this.logBytes; } public void setRawDescriptorBytes(byte[] bytes) { this.logBytes = bytes; } @Override public int getRawDescriptorLength() { return this.logBytes.length; } @Override public List<String> getAnnotations() { return Collections.emptyList(); } @Override public List<String> getUnrecognizedLines() { return this.unrecognizedLines; } @Override public File getDescriptorFile() { return descriptorFile; } }
src/main/java/org/torproject/metrics/collector/webstats/SanitizeWeblogs.java +1 −1 Original line number Diff line number Diff line Loading @@ -135,7 +135,7 @@ public class SanitizeWeblogs extends CollecTorMain { private void storeSortedAndForget(String virtualHost, String physicalHost, LocalDate date, Map<String, Long> lineCounts) { String name = new StringJoiner(LogDescriptorImpl.SEP) String name = new StringJoiner(WebServerAccessLogImpl.SEP) .add(virtualHost).add(physicalHost) .add(WebServerAccessLogImpl.MARKER) .add(date.format(DateTimeFormatter.BASIC_ISO_DATE)) Loading
src/main/java/org/torproject/metrics/collector/webstats/WebServerAccessLogImpl.java +72 −10 Original line number Diff line number Diff line Loading @@ -7,13 +7,15 @@ import org.torproject.descriptor.DescriptorParseException; import org.torproject.descriptor.WebServerAccessLog; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.file.Paths; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; Loading @@ -26,8 +28,10 @@ import java.util.stream.Stream; * * @since 2.2.0 */ public class WebServerAccessLogImpl extends LogDescriptorImpl implements WebServerAccessLog { public class WebServerAccessLogImpl implements WebServerAccessLog { /** Logfile name parts separator. */ public static final String SEP = "_"; /** The log's name should include this string. */ public static final String MARKER = "access.log"; Loading @@ -37,6 +41,15 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl = Pattern.compile("(\\S*)" + SEP + "(\\S*)" + SEP + "" + MARKER + SEP + "(\\d*)(?:\\.?)([a-zA-Z]*)"); private final File descriptorFile; /** Byte array for plain, i.e. uncompressed, log data. */ private byte[] logBytes; private FileType fileType; private List<String> unrecognizedLines = new ArrayList<>(); private final String physicalHost; private final String virtualHost; Loading @@ -58,25 +71,31 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl * The immediate parent name is taken to be the physical host collecting the * logs.</p> */ protected WebServerAccessLogImpl(byte[] logBytes, File file, String logName) throws DescriptorParseException { super(logBytes, file, logName); protected WebServerAccessLogImpl(byte[] logBytes, File descriptorFile, String logName) throws DescriptorParseException { this.logBytes = logBytes; this.descriptorFile = descriptorFile; try { String fn = Paths.get(logName).getFileName().toString(); Matcher mat = filenamePattern.matcher(fn); Matcher mat = filenamePattern.matcher(logName); if (!mat.find()) { throw new DescriptorParseException( "WebServerAccessLog file name doesn't comply to standard: " + fn); "Log file name doesn't comply to standard: " + logName); } this.virtualHost = mat.group(1); this.physicalHost = mat.group(2); if (null == this.virtualHost || null == this.physicalHost || this.virtualHost.isEmpty() || this.physicalHost.isEmpty()) { throw new DescriptorParseException( "WebServerAccessLog file name doesn't comply to standard: " + fn); "WebServerAccessLog file name doesn't comply to standard: " + logName); } String ymd = mat.group(3); this.logDate = LocalDate.parse(ymd, DateTimeFormatter.BASIC_ISO_DATE); this.fileType = FileType.findType(mat.group(4).toUpperCase()); if (FileType.PLAIN == this.fileType) { this.fileType = FileType.XZ; this.logBytes = this.fileType.compress(this.logBytes); } } catch (DescriptorParseException dpe) { throw dpe; // escalate } catch (Exception pe) { Loading @@ -85,6 +104,49 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl } } @Override public InputStream decompressedByteStream() throws DescriptorParseException { try { return this.fileType.decompress(new ByteArrayInputStream(this.logBytes)); } catch (Exception ex) { throw new DescriptorParseException("Cannot provide deflated stream of " + this.descriptorFile + ".", ex); } } public String getCompressionType() { return this.fileType.name().toLowerCase(); } @Override public byte[] getRawDescriptorBytes() { return this.logBytes; } public void setRawDescriptorBytes(byte[] bytes) { this.logBytes = bytes; } @Override public int getRawDescriptorLength() { return this.logBytes.length; } @Override public List<String> getAnnotations() { return Collections.emptyList(); } @Override public List<String> getUnrecognizedLines() { return this.unrecognizedLines; } @Override public File getDescriptorFile() { return descriptorFile; } @Override public String getPhysicalHost() { return this.physicalHost; Loading