From 517ad53e450fd88607ac16226a0450be6a9688e6 Mon Sep 17 00:00:00 2001 From: wenweihuang Date: Fri, 27 Oct 2023 11:14:43 +0800 Subject: [PATCH] [INLONG-9134][Agent] Add file related utils --- .../filecollect/ProxyMessageCache.java | 6 +- inlong-agent/agent-plugins/pom.xml | 6 + .../agent/plugin/sources/TextFileSource.java | 2 +- .../reader/file/FileReaderOperator.java | 2 +- .../agent/plugin/utils/MetaDataUtils.java | 24 +- .../agent/plugin/utils/file/DateUtils.java | 455 +++++++++ .../utils/{ => file}/FileDataUtils.java | 22 +- .../agent/plugin/utils/file/FilePathUtil.java | 170 ++++ .../plugin/utils/file/FileTimeComparator.java | 36 + .../inlong/agent/plugin/utils/file/Files.java | 72 ++ .../agent/plugin/utils/file/MatchPoint.java | 58 ++ .../agent/plugin/utils/file/NewDateUtils.java | 955 ++++++++++++++++++ .../utils/file/NonRegexPatternPosition.java | 41 + .../plugin/utils/file/PathDateExpression.java | 40 + 14 files changed, 1866 insertions(+), 23 deletions(-) create mode 100644 inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/DateUtils.java rename inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/{ => file}/FileDataUtils.java (86%) create mode 100644 inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/FilePathUtil.java create mode 100644 inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/FileTimeComparator.java create mode 100644 inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/Files.java create mode 100644 inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/MatchPoint.java create mode 100644 inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/NewDateUtils.java create mode 100644 inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/NonRegexPatternPosition.java create mode 100644 inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/PathDateExpression.java diff --git a/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/message/filecollect/ProxyMessageCache.java b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/message/filecollect/ProxyMessageCache.java index d392aebccee..5426c2eb548 100644 --- a/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/message/filecollect/ProxyMessageCache.java +++ b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/message/filecollect/ProxyMessageCache.java @@ -42,7 +42,7 @@ import static org.apache.inlong.agent.constant.CommonConstants.PROXY_PACKAGE_MAX_TIMEOUT_MS; /** - * Handle List of BusMessage, which belong to the same stream id. + * Handle List of Proxy Message, which belong to the same stream id. */ public class ProxyMessageCache { @@ -67,9 +67,6 @@ public class ProxyMessageCache { */ private Map extraMap = new HashMap<>(); - /** - * Init PackBusMessage - */ public ProxyMessageCache(InstanceProfile instanceProfile, String groupId, String streamId) { this.taskId = instanceProfile.getTaskId(); this.instanceId = instanceProfile.getInstanceId(); @@ -77,7 +74,6 @@ public ProxyMessageCache(InstanceProfile instanceProfile, String groupId, String this.maxQueueNumber = instanceProfile.getInt(PROXY_INLONG_STREAM_ID_QUEUE_MAX_NUMBER, DEFAULT_PROXY_INLONG_STREAM_ID_QUEUE_MAX_NUMBER); this.cacheTimeout = instanceProfile.getInt(PROXY_PACKAGE_MAX_TIMEOUT_MS, DEFAULT_PROXY_PACKAGE_MAX_TIMEOUT_MS); - // double size of package this.messageQueue = new LinkedBlockingQueue<>(maxQueueNumber); this.groupId = groupId; this.streamId = streamId; diff --git a/inlong-agent/agent-plugins/pom.xml b/inlong-agent/agent-plugins/pom.xml index fe7760234f0..4aea08d5034 100644 --- a/inlong-agent/agent-plugins/pom.xml +++ b/inlong-agent/agent-plugins/pom.xml @@ -32,6 +32,7 @@ ${project.parent.parent.basedir} 1.8.0.Final + 1.5.1 @@ -202,6 +203,11 @@ powermock-api-mockito2 test + + com.darwinsys + hirondelle-date4j + ${darwinsys.version} + io.fabric8 kubernetes-client diff --git a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sources/TextFileSource.java b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sources/TextFileSource.java index 4dfae0c06b8..133374da180 100755 --- a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sources/TextFileSource.java +++ b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sources/TextFileSource.java @@ -21,8 +21,8 @@ import org.apache.inlong.agent.plugin.Reader; import org.apache.inlong.agent.plugin.sources.reader.file.FileReaderOperator; import org.apache.inlong.agent.plugin.sources.reader.file.TriggerFileReader; -import org.apache.inlong.agent.plugin.utils.FileDataUtils; import org.apache.inlong.agent.plugin.utils.PluginUtils; +import org.apache.inlong.agent.plugin.utils.file.FileDataUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sources/reader/file/FileReaderOperator.java b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sources/reader/file/FileReaderOperator.java index 24db072eed1..b1dfa1afcfe 100644 --- a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sources/reader/file/FileReaderOperator.java +++ b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sources/reader/file/FileReaderOperator.java @@ -29,7 +29,7 @@ import org.apache.inlong.agent.metrics.audit.AuditUtils; import org.apache.inlong.agent.plugin.Message; import org.apache.inlong.agent.plugin.sources.reader.AbstractReader; -import org.apache.inlong.agent.plugin.utils.FileDataUtils; +import org.apache.inlong.agent.plugin.utils.file.FileDataUtils; import org.apache.inlong.agent.utils.AgentUtils; import com.google.gson.Gson; diff --git a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/MetaDataUtils.java b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/MetaDataUtils.java index 8704c920048..aa215bb5393 100644 --- a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/MetaDataUtils.java +++ b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/MetaDataUtils.java @@ -17,7 +17,7 @@ package org.apache.inlong.agent.plugin.utils; -import org.apache.inlong.agent.conf.JobProfile; +import org.apache.inlong.agent.conf.AbstractConfiguration; import org.apache.inlong.agent.constant.CommonConstants; import com.google.gson.Gson; @@ -33,12 +33,12 @@ import java.util.regex.Pattern; import java.util.stream.Collectors; -import static org.apache.inlong.agent.constant.JobConstants.JOB_FILE_META_FILTER_BY_LABELS; -import static org.apache.inlong.agent.constant.JobConstants.JOB_FILE_PROPERTIES; import static org.apache.inlong.agent.constant.KubernetesConstants.CONTAINER_ID; import static org.apache.inlong.agent.constant.KubernetesConstants.CONTAINER_NAME; import static org.apache.inlong.agent.constant.KubernetesConstants.NAMESPACE; import static org.apache.inlong.agent.constant.KubernetesConstants.POD_NAME; +import static org.apache.inlong.agent.constant.TaskConstants.JOB_FILE_META_FILTER_BY_LABELS; +import static org.apache.inlong.agent.constant.TaskConstants.JOB_FILE_PROPERTIES; /** * Metadata utils @@ -89,21 +89,21 @@ public static Map getLogInfo(String fileName) { * * get labels of pod */ - public static Map getPodLabels(JobProfile jobProfile) { - if (Objects.isNull(jobProfile) || !jobProfile.hasKey(JOB_FILE_META_FILTER_BY_LABELS)) { + public static Map getPodLabels(AbstractConfiguration taskProfile) { + if (Objects.isNull(taskProfile) || !taskProfile.hasKey(JOB_FILE_META_FILTER_BY_LABELS)) { return new HashMap<>(); } - String labels = jobProfile.get(JOB_FILE_META_FILTER_BY_LABELS); + String labels = taskProfile.get(JOB_FILE_META_FILTER_BY_LABELS); Type type = new TypeToken>() { }.getType(); return GSON.fromJson(labels, type); } - public static List getNamespace(JobProfile jobProfile) { - if (Objects.isNull(jobProfile) || !jobProfile.hasKey(JOB_FILE_PROPERTIES)) { + public static List getNamespace(AbstractConfiguration taskProfile) { + if (Objects.isNull(taskProfile) || !taskProfile.hasKey(JOB_FILE_PROPERTIES)) { return null; } - String property = jobProfile.get(JOB_FILE_PROPERTIES); + String property = taskProfile.get(JOB_FILE_PROPERTIES); Type type = new TypeToken>() { }.getType(); Map properties = GSON.fromJson(property, type); @@ -120,11 +120,11 @@ public static List getNamespace(JobProfile jobProfile) { * * get name of pod */ - public static String getPodName(JobProfile jobProfile) { - if (Objects.isNull(jobProfile) || !jobProfile.hasKey(JOB_FILE_PROPERTIES)) { + public static String getPodName(AbstractConfiguration taskProfile) { + if (Objects.isNull(taskProfile) || !taskProfile.hasKey(JOB_FILE_PROPERTIES)) { return null; } - String property = jobProfile.get(JOB_FILE_PROPERTIES); + String property = taskProfile.get(JOB_FILE_PROPERTIES); Type type = new TypeToken>() { }.getType(); Map properties = GSON.fromJson(property, type); diff --git a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/DateUtils.java b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/DateUtils.java new file mode 100644 index 00000000000..2280b2db5a8 --- /dev/null +++ b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/DateUtils.java @@ -0,0 +1,455 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.agent.plugin.utils.file; + +import hirondelle.date4j.DateTime; +import org.apache.commons.lang.StringUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Objects; +import java.util.TimeZone; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class DateUtils { + + private static final Logger logger = LogManager.getLogger(DateUtils.class); + private static final String TIME_REGEX = "YYYY(?:.MM|MM)?(?:.DD|DD)?(?:.hh|hh)?(?:.mm|mm)?(?:" + + ".ss|ss)?"; + private static final String LIMIT_SEP = "(?<=[a-zA-Z])"; + private static final String LETTER_STR = "\\D+"; + private static final String DIGIT_STR = "[0-9]+"; + private static final Pattern pattern = Pattern.compile(TIME_REGEX, + Pattern.CASE_INSENSITIVE | Pattern.DOTALL | Pattern.MULTILINE); + private String dateFormat = "YYYYMMDDhhmmss"; + + public DateUtils() { + + } + + public DateUtils(String timeFormat) { + if (timeFormat != null && !timeFormat.isEmpty()) { + dateFormat = timeFormat; + } + } + + public static String getSubTimeFormat(String format, int length) { + // format may be "YYYYMMDDhhmmss" | "YYYY_MM_DD_hh_mm_ss" + int formatLen = format.length(); + StringBuffer sb = new StringBuffer(); + + for (int i = 0; i < formatLen && length > 0; ++i) { + if (Character.isLetter(format.charAt(i)) + || Character.isDigit(format.charAt(i))) { + length--; + } + sb.append(format.charAt(i)); + } + return sb.toString(); + } + + // only return the first matched + public static String extractLongestTimeRegex(String src) + throws IllegalArgumentException { + Matcher m = pattern.matcher(src); + String ret = ""; + while (m.find()) { + String oneMatch = m.group(0); + if (oneMatch.length() > ret.length()) { + ret = oneMatch; + } + } + if (ret.isEmpty()) { + throw new IllegalArgumentException("time pattern " + " not find in " + src); + } + return ret; + } + + public static PathDateExpression extractLongestTimeRegexWithPrefixOrSuffix(String src) + throws IllegalArgumentException { + if (src == null) { + return null; + } + + String longestPattern = extractLongestTimeRegex(src); + String regexSign = "\\^$*+?{(|[)]"; + + String range = "+?*{"; + + int beginIndex = src.indexOf(longestPattern); + int endIndex = beginIndex + longestPattern.length(); + String prefix = src.substring(beginIndex - 1, beginIndex); + + NonRegexPatternPosition position = NonRegexPatternPosition.NONE; + if (!regexSign.contains(prefix)) { + longestPattern = prefix + longestPattern; + position = NonRegexPatternPosition.PREFIX; + } + String suffix = ""; + if (src.length() > endIndex) { + suffix = src.substring(endIndex, endIndex + 1); + } + boolean bFlag = false; + + if (Objects.equals(suffix, ".") && src.length() > endIndex + 1) { + + char c = src.charAt(endIndex + 1); + if (StringUtils.indexOf(range, c) != -1) { + bFlag = true; + } + } + + if (!Objects.equals(suffix, "") && !regexSign.contains(suffix) && !bFlag) { + longestPattern = longestPattern + suffix; + if (position == NonRegexPatternPosition.PREFIX) { + position = NonRegexPatternPosition.BOTH; + } else { + position = NonRegexPatternPosition.SUFFIX; + } + } + if (Objects.equals(suffix, "")) { + if (position == NonRegexPatternPosition.PREFIX) { + position = NonRegexPatternPosition.ENDSUFFIX; + } else { + position = NonRegexPatternPosition.END; + } + } + + return ((position == NonRegexPatternPosition.NONE) ? null + : new PathDateExpression(longestPattern, position)); + } + + public static String formatTime(long time) { + SimpleDateFormat df = new SimpleDateFormat("yyyyMMddHHmm"); + df.setTimeZone(TimeZone.getTimeZone("GMT+8:00")); + return df.format(new Date(time)); + } + + public static boolean compare(String time, int offset) + throws ParseException { + long value = 1000 * 60 * 60 * 24; + SimpleDateFormat df = new SimpleDateFormat("yyyyMMdd"); + long to = System.currentTimeMillis(); + long from = df.parse(time.substring(0, 8)).getTime(); + if ((to - from) / value > offset) { + return true; + } else { + return false; + } + } + + public static boolean compare(long time, int offset) { + long value = 1000 * 60 * 60 * 24; + long to = System.currentTimeMillis(); + if ((to - time) / value > offset) { + return true; + } else { + return false; + } + } + + public void init(String timeFormat) { + if (timeFormat != null && !timeFormat.isEmpty()) { + dateFormat = timeFormat; + } + } + + // 20120812010203 ---> 2012-08-12 01:02:03 + private String normalizeDateStr(String src) { + src = src.replaceAll("[^a-zA-Z0-9]", ""); + int len = src.length(); + // if (!isTimeStrValid(src)) { + // return ""; + // } + StringBuffer sb = new StringBuffer(); + // year + sb.append(src.substring(0, 4)); + sb.append("-"); + if (len > 4) { + // month + sb.append(src.substring(4, 6)); + if (len > 6) { + sb.append("-"); + // day + sb.append(src.substring(6, 8)); + if (len > 8) { + sb.append(" "); + // hour + sb.append(src.substring(8, 10)); + if (len > 10) { + sb.append(":"); + // minute + sb.append(src.substring(10, 12)); + if (len > 12) { + sb.append(":"); + // seconds + sb.append(src.substring(12, 14)); + } else { + sb.append(":00"); + } + } else { + sb.append(":00:00"); + } + } else { + sb.append(" 00:00:00"); + } + } else { + sb.append("-01 00:00:00"); + } + } else { + sb.append("-01-01 00:00:00"); + } + return sb.toString(); + } + + public String getFormatSpecifiedTime(String specifiedTime) { + if (specifiedTime == null || specifiedTime.length() == 0) { + return specifiedTime; + } + + int formatLen = dateFormat.length(); + + if (specifiedTime.length() == formatLen + && !specifiedTime.matches(DIGIT_STR)) { + return specifiedTime; + } + + StringBuilder retSb = new StringBuilder(); + int specifiedInx = 0; + for (int i = 0; i < formatLen; i++) { + char tmpChar = dateFormat.charAt(i); + + if (tmpChar != 'Y' && tmpChar != 'M' && tmpChar != 'D' + && tmpChar != 'h' && tmpChar != 'm') { + retSb.append(tmpChar); + } else { + retSb.append(specifiedTime.charAt(specifiedInx++)); + } + } + + logger.info( + "TimeRegex {} <> specifiedTime {} not match, format specifiedTime {}", + new Object[]{dateFormat, specifiedTime, retSb.toString()}); + + return retSb.toString(); + } + + public String getDate(String src, String limit) { + if (src == null || src.trim().isEmpty()) { + return ""; + } + + // TODO : verify format str + int year = 0; + int month = 0; + int day = 0; + int hour = 0; + int minute = 0; + int second = 0; + + // TODO : timezone + TimeZone tz = TimeZone.getTimeZone("GMT+8:00"); + DateTime dt = null; + String outputFormat = null; + if (src.matches(LETTER_STR)) { + // format str + // TODO : data format verify + dt = DateTime.now(tz); + outputFormat = src; + } else { + // time str + src = src.replaceAll("[^0-9]", ""); + outputFormat = getSubTimeFormat(dateFormat, src.length()); + src = normalizeDateStr(src); + if (src.isEmpty()) { + return ""; + } + dt = new DateTime(src); + } + + // System.out.println("outputformat: " + outputFormat); + + limit = limit.trim(); + String[] limitArr = limit.split(LIMIT_SEP); + + for (String onelimit : limitArr) { + year = 0; + month = 0; + day = 0; + hour = 0; + minute = 0; + second = 0; + // System.out.println("onelimit: " + onelimit); + int limitLen = onelimit.length(); + String type = onelimit.substring(limitLen - 1, limitLen); + int offset = Integer.parseInt(onelimit.substring(0, limitLen - 1)); + // System.out.println("type: " + type + ". offset: " + offset); + int sign = 1; + if (offset < 0) { + sign = -1; + } else { + sign = 1; + } + if (type.equalsIgnoreCase("Y")) { + year = sign * offset; + } else if (type.equals("M")) { + month = sign * offset; + } else if (type.equalsIgnoreCase("D")) { + day = sign * offset; + } else if (type.equalsIgnoreCase("h")) { + hour = sign * offset; + } else if (type.equals("m")) { + minute = sign * offset; + } else if (type.equalsIgnoreCase("s")) { + second = sign * offset; + } + if (sign < 0) { + dt = dt.minus(year, month, day, hour, minute, second, 0, + DateTime.DayOverflow.LastDay); + } else { + dt = dt.plus(year, month, day, hour, minute, second, 0, + DateTime.DayOverflow.LastDay); + } + + } + return dt.format(outputFormat); + } + + public String getAttrPunit(String attrs) { + String punit = null; + if (attrs != null && attrs.contains("&p=")) { + for (String attr : attrs.split("&")) { + if (attr.startsWith("p=") && attr.split("=").length == 2) { + punit = attr.split("=")[1]; + break; + } + } + } + + return punit; + } + + public String getSpecifiedDate(String src, String limit, String punit) { + String ret = getDate(src, limit); + return formatCurrPeriod(ret, punit); + } + + public String normalizeTimeRegex(String src) { + return getSubTimeFormat(dateFormat, src.length()); + } + + public String getCurrentDir(String src, String timeOffset) { + Matcher m = pattern.matcher(src); + StringBuffer sb = new StringBuffer(); + while (m.find()) { + String oneMatch = m.group(0); + String currTimeStr = getDate(oneMatch, timeOffset); + m.appendReplacement(sb, currTimeStr); + } + m.appendTail(sb); + return sb.toString(); + } + + public String getCurrentDirByPunit(String src, String timeOffset, + String punit) { + Matcher m = pattern.matcher(src); + StringBuffer sb = new StringBuffer(); + while (m.find()) { + String oneMatch = m.group(0); + String currTimeStr = getSpecifiedDate(oneMatch, timeOffset, punit); + m.appendReplacement(sb, currTimeStr); + } + m.appendTail(sb); + + return sb.toString(); + } + + public String getSpecifiedDir(String src, String specifiedDate) { + Matcher m = pattern.matcher(src); + StringBuffer sb = new StringBuffer(); + + while (m.find()) { + String oneMatch = m.group(0); + StringBuilder tmpSb = new StringBuilder(); + int specifiedDateIdx = 0; + + for (int i = 0; i < oneMatch.length(); i++) { + char matchChar = oneMatch.charAt(i); + if (matchChar != 'Y' && matchChar != 'M' && matchChar != 'D' + && matchChar != 'h' && matchChar != 'm') { + tmpSb.append(matchChar); + } else { + char dateChar = specifiedDate.charAt(specifiedDateIdx); + while (String.valueOf(dateChar).matches("\\D")) { + dateChar = specifiedDate.charAt(++specifiedDateIdx); + } + tmpSb.append(dateChar); + specifiedDateIdx++; + } + } + m.appendReplacement(sb, tmpSb.toString()); + } + m.appendTail(sb); + return sb.toString(); + } + + // format current period starting less-than-hour task + // * for example: ten-minute task: + // * currPeriodDataTime is 201303271905 + // * formated value is 201303271900 + public String formatCurrPeriod(String src, String punit) { + if (src == null || punit == null || src.length() != 12) { + return src; + } + + String prefixMinuteStr = src.substring(0, src.length() - 2); + String minuteStr = src.substring(src.length() - 2, src.length()); + + if ("n".equals(punit)) { + if (minuteStr.compareTo("30") < 0) { + minuteStr = "00"; + } else { + minuteStr = "30"; + } + } else if ("q".equals(punit)) { + if (minuteStr.compareTo("15") < 0) { + minuteStr = "00"; + } else if (minuteStr.compareTo("30") < 0) { + minuteStr = "15"; + } else if (minuteStr.compareTo("45") < 0) { + minuteStr = "30"; + } else { + minuteStr = "45"; + } + } else if ("t".equals(punit)) { + minuteStr = minuteStr.charAt(0) + "0"; + } else if ("f".equals(punit)) { + if (minuteStr.substring(1).compareTo("5") < 0) { + minuteStr = minuteStr.charAt(0) + "0"; + } else { + minuteStr = minuteStr.charAt(0) + "5"; + } + } + + return prefixMinuteStr + minuteStr; + } +} diff --git a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/FileDataUtils.java b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/FileDataUtils.java similarity index 86% rename from inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/FileDataUtils.java rename to inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/FileDataUtils.java index 3a68e63985a..0b7d310a473 100644 --- a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/FileDataUtils.java +++ b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/FileDataUtils.java @@ -15,9 +15,11 @@ * limitations under the License. */ -package org.apache.inlong.agent.plugin.utils; +package org.apache.inlong.agent.plugin.utils.file; -import org.apache.inlong.agent.conf.JobProfile; +import org.apache.inlong.agent.conf.AbstractConfiguration; +import org.apache.inlong.agent.plugin.utils.MetaDataUtils; +import org.apache.inlong.agent.plugin.utils.PluginUtils; import com.google.gson.Gson; import com.google.gson.JsonObject; @@ -32,6 +34,10 @@ import java.io.File; import java.io.IOException; import java.lang.reflect.Type; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -51,6 +57,13 @@ public class FileDataUtils { private static final Logger LOGGER = LoggerFactory.getLogger(FileDataUtils.class); private static final Gson GSON = new Gson(); + public static String getInodeInfo(String fileName) throws IOException { + BasicFileAttributes attributesAfter; + Path path = Paths.get(fileName); + attributesAfter = Files.readAttributes(path, BasicFileAttributes.class); + return attributesAfter.fileKey().toString(); + } + /** * Get standard log for k8s */ @@ -84,7 +97,7 @@ public static boolean isJSON(String json) { /** * Filter file by conditions */ - public static Collection filterFile(Collection allFiles, JobProfile jobConf) { + public static Collection filterFile(Collection allFiles, AbstractConfiguration jobConf) { // filter file by labels Collection files = null; try { @@ -98,7 +111,8 @@ public static Collection filterFile(Collection allFiles, JobProfile /** * Filter file by labels if standard log for k8s */ - private static Collection filterByLabels(Collection allFiles, JobProfile jobConf) throws IOException { + private static Collection filterByLabels(Collection allFiles, AbstractConfiguration jobConf) + throws IOException { Map labelsMap = MetaDataUtils.getPodLabels(jobConf); if (labelsMap.isEmpty()) { return allFiles; diff --git a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/FilePathUtil.java b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/FilePathUtil.java new file mode 100644 index 00000000000..741aaed750c --- /dev/null +++ b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/FilePathUtil.java @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.agent.plugin.utils.file; + +import org.apache.commons.lang.StringUtils; + +import java.io.File; +import java.util.ArrayList; + +public class FilePathUtil { + + private static final String YEAR = "YYYY"; + private static final String MONTH = "MM"; + private static final String DAY = "DD"; + private static final String HOUR = "hh"; + + public static ArrayList cutDirectory(String directory) { + String baseDirectory; + String regixDirecotry; + String fileName; + + File file = new File(directory); + fileName = file.getName(); + + int fileNameIndex = directory.length() - fileName.length() - 1; + String sign = "\\^$*+?{(|[."; + String range = "+?*{"; + + int regixDirecotryIndex = StringUtils.indexOfAny(directory, sign); + if (regixDirecotryIndex != -1 + && directory.charAt(regixDirecotryIndex) == '.') { + if (regixDirecotryIndex != directory.length() - 1) { + char c = directory.charAt(regixDirecotryIndex + 1); + if (StringUtils.indexOf(range, c) == -1) { + regixDirecotryIndex = StringUtils.indexOfAny(directory, + sign.substring(0, sign.length() - 1)); + } + } + } + if (regixDirecotryIndex < fileNameIndex) { + int regixDirecotryBeginIndex = directory.lastIndexOf('/', + regixDirecotryIndex); + if (regixDirecotryBeginIndex == -1) { + baseDirectory = directory.substring(0, fileNameIndex); + regixDirecotry = ""; + } else { + + regixDirecotry = directory.substring( + regixDirecotryBeginIndex + 1, fileNameIndex); + if (regixDirecotryBeginIndex == 0) { + baseDirectory = "/"; + } else { + baseDirectory = directory.substring(0, + regixDirecotryBeginIndex); + } + } + } else { + baseDirectory = directory.substring(0, fileNameIndex); + regixDirecotry = ""; + } + + ArrayList ret = new ArrayList(); + ret.add(baseDirectory); + ret.add(regixDirecotry); + ret.add(fileName); + return ret; + } + + public static ArrayList getDirectoryLayers(String directory) { + String baseDirectory; + String regixDirectory; + String fileName; + + File file = new File(directory); + fileName = file.getName(); + + int fileNameIndex = directory.length() - fileName.length() - 1; + String sign = "\\^$*+?{(|[."; + + String range = "+?*{"; + + int regixDirecotryIndex = StringUtils.indexOfAny(directory, sign); + if (regixDirecotryIndex != -1 + && directory.charAt(regixDirecotryIndex) == '.') { + if (regixDirecotryIndex != directory.length() - 1) { + char c = directory.charAt(regixDirecotryIndex + 1); + if (StringUtils.indexOf(range, c) == -1) { + regixDirecotryIndex = StringUtils.indexOfAny(directory, + sign.substring(0, sign.length() - 1)); + } + } + } + if (regixDirecotryIndex < fileNameIndex) { + int regixDirecotryBeginIndex = directory.lastIndexOf('/', + regixDirecotryIndex); + if (regixDirecotryBeginIndex == -1) { + baseDirectory = directory.substring(0, fileNameIndex); + regixDirectory = ""; + } else { + regixDirectory = directory.substring( + regixDirecotryBeginIndex + 1, fileNameIndex); + if (regixDirecotryBeginIndex == 0) { + baseDirectory = "/"; + } else { + baseDirectory = directory.substring(0, + regixDirecotryBeginIndex); + } + } + } else { + baseDirectory = directory.substring(0, fileNameIndex); + regixDirectory = ""; + } + + int[] indexes = new int[]{ + (baseDirectory.contains(YEAR) ? baseDirectory.indexOf(YEAR) : Integer.MAX_VALUE), + (baseDirectory.contains(MONTH) ? baseDirectory.indexOf(MONTH) : Integer.MAX_VALUE), + (baseDirectory.contains(DAY) ? baseDirectory.indexOf(DAY) : Integer.MAX_VALUE), + (baseDirectory.contains(HOUR) ? baseDirectory.indexOf(HOUR) : Integer.MAX_VALUE)}; + + int minIndex = Integer.MAX_VALUE; + for (int i = 0; i < indexes.length; i++) { + if (minIndex > indexes[i]) { + minIndex = indexes[i]; + } + } + + if (minIndex != Integer.MAX_VALUE) { + int lastIndex = baseDirectory.lastIndexOf('/', minIndex); + if (regixDirectory.length() > 0) { + regixDirectory = baseDirectory.substring(lastIndex + 1, baseDirectory.length()) + + File.separator + regixDirectory; + } else { + regixDirectory = baseDirectory.substring(lastIndex + 1, baseDirectory.length()); + } + baseDirectory = baseDirectory.substring(0, lastIndex); + } + + ArrayList ret = new ArrayList(); + ret.add(baseDirectory); + ret.add(regixDirectory); + ret.add(fileName); + return ret; + } + + public static boolean isSameDir(String fileName1, String fileName2) { + ArrayList ret1 = FilePathUtil.cutDirectory(fileName1); + ArrayList ret2 = FilePathUtil.cutDirectory(fileName2); + if (ret1.get(0).equals(ret2.get(0))) { + return true; + } else { + return false; + } + } + +} diff --git a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/FileTimeComparator.java b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/FileTimeComparator.java new file mode 100644 index 00000000000..949044d864a --- /dev/null +++ b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/FileTimeComparator.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.agent.plugin.utils.file; + +import java.io.File; +import java.util.Comparator; + +public class FileTimeComparator implements Comparator { + + @Override + public int compare(File f1, File f2) { + if (f1.lastModified() < f2.lastModified()) { + return -1; + } else if (f1.lastModified() == f2.lastModified()) { + return 0; + } else { + return 1; + } + } + +} \ No newline at end of file diff --git a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/Files.java b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/Files.java new file mode 100644 index 00000000000..b4ddcfac530 --- /dev/null +++ b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/Files.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.agent.plugin.utils.file; + +import org.apache.inlong.agent.utils.file.FileFinder; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Paths; +import java.nio.file.attribute.BasicFileAttributes; + +public class Files { + + /** + * Finds files or sub directories within a given base directory. + * + * @param baseDirectory A path string representing a directory to search within. + * @return A {@link org.apache.inlong.agent.plugin.utils.FileFinder}-Object to specify the search + * parameters using a builder pattern. + */ + public static FileFinder find(String baseDirectory) { + return find(new File(baseDirectory)); + } + + /** + * Finds files or sub directories within a given base directory. + * + * @param baseDirectory A path as {@link File} object to search within. + * @return A {@link org.apache.inlong.agent.plugin.utils.FileFinder} object to specify the search + * parameters using a builder pattern. + */ + public static FileFinder find(File baseDirectory) { + return new FileFinder(baseDirectory); + } + + public static long getFileCreationTime(String fileName) { + long createTime = 0L; + try { + createTime = java.nio.file.Files.readAttributes(Paths.get(fileName), + BasicFileAttributes.class).creationTime().toMillis(); + } catch (IOException ignore) { + + } + return createTime; + } + + public static long getFileLastModifyTime(String fileName) { + long lastModify = 0L; + try { + lastModify = java.nio.file.Files.getLastModifiedTime(Paths.get(fileName)).toMillis(); + } catch (IOException ioe) { + + } + return lastModify; + } + +} \ No newline at end of file diff --git a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/MatchPoint.java b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/MatchPoint.java new file mode 100644 index 00000000000..9c261057172 --- /dev/null +++ b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/MatchPoint.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.agent.plugin.utils.file; + +/** + * Created by lamberliu on 2015/11/13. + */ +public class MatchPoint { + + String str; + int start; + int end; + + MatchPoint(String str1, int start1, int end1) { + this.str = str1; + this.start = start1; + this.end = end1; + } + + public String getStr() { + return str; + } + + public void setStr(String str1) { + str = str1; + } + + public int getStart() { + return start; + } + + public void setStart(int start1) { + start = start1; + } + + public int getEnd() { + return end; + } + + public void setEnd(int end1) { + end = end1; + } +} diff --git a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/NewDateUtils.java b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/NewDateUtils.java new file mode 100644 index 00000000000..43ec7d13574 --- /dev/null +++ b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/NewDateUtils.java @@ -0,0 +1,955 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.agent.plugin.utils.file; + +import hirondelle.date4j.DateTime; +import org.apache.commons.lang.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.List; +import java.util.StringTokenizer; +import java.util.TimeZone; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class NewDateUtils { + + public static final String FULL_FORMAT = "yyyyMMddHHmmss"; + public static final String NULL_DATA_TIME = "000000000000"; + public static final String DEFAULT_FORMAT = "yyyyMMddHHmm"; + public static final String DEFAULT_TIME_ZONE = "Asia/Shanghai"; + private static final Logger logger = LoggerFactory.getLogger(NewDateUtils.class); + private static final String TIME_REGEX = "YYYY(?:.MM|MM)?(?:.DD|DD)?(?:.hh|hh)?(?:.mm|mm)?(?:" + + ".ss|ss)?"; + private static final String LIMIT_SEP = "(?<=[a-zA-Z])"; + private static final String LETTER_STR = "\\D+"; + private static final String DIGIT_STR = "[0-9]+"; + private static final Pattern pattern = Pattern.compile(TIME_REGEX, + Pattern.CASE_INSENSITIVE | Pattern.DOTALL | Pattern.MULTILINE); + private static final Pattern bracePatt = Pattern.compile("\\{(.*?)\\}"); + private static final int DEFAULT_LENGTH = "yyyyMMddHHmm".length(); + public static long DAY_TIMEOUT_INTERVAL = 2 * 24 * 3600 * 1000; + public static long HOUR_TIMEOUT_INTERVAL = 2 * 3600 * 1000; + // 数据源配置异常 */ + public static final String DATA_SOURCE_CONFIG_ERROR = "ERROR-0-TDAgent|10001|ERROR" + + "|ERROR_DATA_SOURCE_CONFIG|"; + + /* Return the time in milliseconds for a data time. */ + /* + * public static long getTimeInMillis(String dataTime) { if (dataTime == null) { return 0; } + * + * try { SimpleDateFormat dateFormat = new SimpleDateFormat(DEFAULT_FORMAT); Date date = dateFormat.parse(dataTime); + * + * return date.getTime(); } catch (ParseException e) { return 0; } } + */ + /* Return the format data time string from milliseconds. */ + /* + * public static String getDataTimeFromTimeMillis(long dataTime) { SimpleDateFormat dateFormat = new + * SimpleDateFormat(DEFAULT_FORMAT); return dateFormat.format(new Date(dataTime)); } + */ + /* Return the should start time for a data time log file. */ + public static String getShouldStartTime(String dataTime, String cycleUnit, + String offset) { + if (dataTime == null || dataTime.length() > 12) { + return null; + } + + SimpleDateFormat dateFormat = new SimpleDateFormat(DEFAULT_FORMAT); + TimeZone timeZone = TimeZone.getTimeZone(NewDateUtils.DEFAULT_TIME_ZONE); + dateFormat.setTimeZone(timeZone); + + if (dataTime.length() < DEFAULT_LENGTH) { + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < DEFAULT_LENGTH - dataTime.length(); i++) { + sb.append("0"); + } + dataTime = dataTime + sb.toString(); + } + + Calendar calendar = Calendar.getInstance(); + try { + calendar.setTimeInMillis(dateFormat.parse(dataTime).getTime()); + } catch (ParseException e) { + return null; + } + + /* + * The delay should be added to the data time, so remove the - from offset. + */ + if (offset.startsWith("-")) { + offset = offset.substring(1, offset.length()); + } else { // 为正,配置提前读取文件 + offset = "-" + offset; + } + + return dateFormat + .format(new Date(getDateTime(calendar, cycleUnit, offset).getTimeInMillis())); + } + + private static Calendar getCurDate(String cycleUnit, String offset) { + if (cycleUnit == null || cycleUnit.length() == 0) { + return null; + } + + Calendar calendar = Calendar.getInstance(); + calendar.setTimeInMillis(System.currentTimeMillis()); + + return getDateTime(calendar, cycleUnit, offset); + } + + public static String getDateTime(String dataTime, String cycleUnit, String offset) { + String retTime = NewDateUtils.millSecConvertToTimeStr( + System.currentTimeMillis(), cycleUnit); + try { + long time = NewDateUtils.timeStrConvertTomillSec(dataTime, cycleUnit); + + Calendar calendar = Calendar.getInstance(); + calendar.setTimeInMillis(time); + Calendar retCalendar = getDateTime(calendar, cycleUnit, offset); + if (retCalendar == null) { + return dataTime; + } + + retTime = NewDateUtils.millSecConvertToTimeStr(retCalendar.getTime().getTime(), + cycleUnit); + } catch (Exception e) { + logger.error("getDateTime error: ", e); + } + return retTime; + } + + public static String getDateTime(long time, String cycleUnit, String offset) { + // String retTime = NewDateUtils.millSecConvertToTimeStr(System.currentTimeMillis(), + // cycleUnit); + + Calendar calendar = Calendar.getInstance(); + calendar.setTimeInMillis(time); + Calendar retCalendar = getDateTime(calendar, cycleUnit, offset); + return NewDateUtils.millSecConvertToTimeStr(retCalendar.getTime().getTime(), cycleUnit); + + // return retTime; + } + + private static Calendar getDateTime(Calendar calendar, String cycleUnit, String offset) { + int cycleNumber = (cycleUnit.length() <= 1 + ? 1 + : Integer.parseInt(cycleUnit.substring(0, cycleUnit.length() - 1))); + + String offsetUnit = offset.substring(offset.length() - 1, offset.length()); + int offsetNumber = Integer.parseInt(offset.substring(0, offset.length() - 1)); + + /* + * For day task, the offset cycle unit can only be day; for hourly task, the offset can't be minute; for + * minutely task, the offset cycle unit can be day, hour and minute, but if the offset cycle unit is minute, the + * offset must be divided by cycle number. + */ + // if (("D".equalsIgnoreCase(cycleUnit) && !"D".equalsIgnoreCase(offsetUnit))) { + // return calendar; + // } + // + // if (("H".equalsIgnoreCase(cycleUnit) && "m".equals(offsetUnit))) { + // return calendar; + // } + // + // if (("m".equals(cycleUnit) && "m".equals(offsetUnit) && + // cycleNumber != 0 && offsetNumber % cycleNumber != 0)) { + // return calendar; + // } + + if (cycleUnit.length() > 1 && (StringUtils.endsWithIgnoreCase(cycleUnit, "M"))) { + calendar.set(Calendar.SECOND, 0); + int minTime = calendar.get(Calendar.MINUTE); + + int leftMin = minTime % cycleNumber; + minTime = minTime - leftMin; + calendar.set(Calendar.MINUTE, minTime); + + /* Calculate the offset. */ + if ("D".equalsIgnoreCase(offsetUnit)) { + calendar.add(Calendar.DAY_OF_YEAR, offsetNumber); + } + + if ("H".equalsIgnoreCase(offsetUnit)) { + calendar.add(Calendar.HOUR_OF_DAY, offsetNumber); + } + } else if (cycleUnit.length() == 1) { + if ("D".equalsIgnoreCase(cycleUnit)) { + calendar.set(Calendar.HOUR_OF_DAY, 0); + calendar.set(Calendar.MINUTE, 0); + calendar.set(Calendar.SECOND, 0); + } else if ("h".equalsIgnoreCase(cycleUnit)) { + calendar.set(Calendar.MINUTE, 0); + calendar.set(Calendar.SECOND, 0); + } + } + + /* Calculate the offset. */ + if ("D".equalsIgnoreCase(offsetUnit)) { + calendar.add(Calendar.DAY_OF_YEAR, offsetNumber); + } + + if ("h".equalsIgnoreCase(offsetUnit)) { + calendar.add(Calendar.HOUR_OF_DAY, offsetNumber); + } + + if ("m".equals(offsetUnit)) { + calendar.add(Calendar.MINUTE, offsetNumber); + } + + return calendar; + } + + public static boolean isValidCreationTime(String dataTime, String cycleUnit, + String timeOffset) { + long timeInterval = 0; + if ("Y".equalsIgnoreCase(cycleUnit)) { + timeInterval = DAY_TIMEOUT_INTERVAL; + } else if ("M".equals(cycleUnit)) { + timeInterval = HOUR_TIMEOUT_INTERVAL; + } else if ("D".equalsIgnoreCase(cycleUnit)) { + timeInterval = DAY_TIMEOUT_INTERVAL; + } else if ("h".equalsIgnoreCase(cycleUnit)) { + timeInterval = HOUR_TIMEOUT_INTERVAL; + } else if (cycleUnit.contains("m")) { + timeInterval = HOUR_TIMEOUT_INTERVAL; + } else { + logger.error("cycleUnit {} can't parse!", cycleUnit); + timeInterval = DAY_TIMEOUT_INTERVAL; + } + + // 处理偏移量,超时周期要加上时间偏移偏移量 + if (timeOffset.startsWith("-")) { + timeInterval += caclOffset(timeOffset); + } else { // 处理向后偏移 + timeInterval -= caclOffset(timeOffset); + } + + return isValidCreationTime(dataTime, timeInterval); + } + + /** + * 根据偏移量计算偏移时间 + * 当前偏移只会向前偏移,也可向后偏移为兼容之前的计算方式(相减),当为向后偏移时,返回负;当向前偏移,返回正 + * + * @param timeOffset 偏移量,如-1d,-4h,-10m等; + * @return + */ + public static long caclOffset(String timeOffset) { + String offsetUnit = timeOffset.substring(timeOffset.length() - 1); + int startIndex = timeOffset.charAt(0) == '-' ? 1 : 0; + // 默认向后偏移 + int symbol = 1; + if (startIndex == 1) { + symbol = 1; + } else if (startIndex == 0) { // 向前偏移 + symbol = -1; + } + int offsetTime = Integer + .parseInt(timeOffset.substring(startIndex, timeOffset.length() - 1)); + if ("d".equalsIgnoreCase(offsetUnit)) { + return offsetTime * 24 * 3600 * 1000 * symbol; + } else if ("h".equalsIgnoreCase(offsetUnit)) { + return offsetTime * 3600 * 1000 * symbol; + } else if ("m".equalsIgnoreCase(offsetUnit)) { + return offsetTime * 60 * 1000 * symbol; + } + return 0; + } + + /* + * Check whether the data time is between curTime - interval and curTime + interval. + */ + public static boolean isValidCreationTime(String dataTime, long timeInterval) { + long currentTime = System.currentTimeMillis(); + + long minTime = currentTime - timeInterval; + long maxTime = currentTime + timeInterval; + + SimpleDateFormat dateFormat = new SimpleDateFormat(DEFAULT_FORMAT); + if (dataTime.length() < DEFAULT_LENGTH) { + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < DEFAULT_LENGTH - dataTime.length(); i++) { + sb.append("0"); + } + dataTime = dataTime + sb.toString(); + } + + Calendar calendar = Calendar.getInstance(); + try { + calendar.setTimeInMillis(dateFormat.parse(dataTime).getTime()); + } catch (ParseException e) { + return false; + } + + return calendar.getTimeInMillis() >= minTime + && calendar.getTimeInMillis() <= maxTime; + } + + // convert millSec to YYYMMDD by cycleUnit + public static String millSecConvertToTimeStr(long time, String cycleUnit, TimeZone tz) { + String retTime = null; + + Calendar calendarInstance = Calendar.getInstance(); + calendarInstance.setTimeInMillis(time); + + Date dateTime = calendarInstance.getTime(); + SimpleDateFormat df = null; + if ("Y".equalsIgnoreCase(cycleUnit)) { + df = new SimpleDateFormat("yyyy"); + } else if ("M".equals(cycleUnit)) { + df = new SimpleDateFormat("yyyyMM"); + } else if ("D".equalsIgnoreCase(cycleUnit)) { + df = new SimpleDateFormat("yyyyMMdd"); + } else if ("h".equalsIgnoreCase(cycleUnit)) { + df = new SimpleDateFormat("yyyyMMddHH"); + } else if (cycleUnit.contains("m")) { + df = new SimpleDateFormat("yyyyMMddHHmm"); + } else { + logger.error("cycleUnit {} can't parse!", cycleUnit); + df = new SimpleDateFormat("yyyyMMddHH"); + } + df.setTimeZone(tz); + retTime = df.format(dateTime); + + if (cycleUnit.contains("m")) { + + int cycleNum = Integer.parseInt(cycleUnit.substring(0, + cycleUnit.length() - 1)); + int mmTime = Integer.parseInt(retTime.substring( + retTime.length() - 2, retTime.length())); + String realMMTime = ""; + if (cycleNum * (mmTime / cycleNum) <= 0) { + realMMTime = "0" + cycleNum * (mmTime / cycleNum); + } else { + realMMTime = "" + cycleNum * (mmTime / cycleNum); + } + retTime = retTime.substring(0, retTime.length() - 2) + realMMTime; + } + + return retTime; + } + + // convert millSec to YYYMMDD by cycleUnit + public static String millSecConvertToTimeStr(long time, String cycleUnit) { + return millSecConvertToTimeStr(time, cycleUnit, TimeZone.getDefault()); + } + + // convert YYYMMDD to millSec by cycleUnit + public static long timeStrConvertTomillSec(String time, String cycleUnit) + throws ParseException { + return timeStrConvertTomillSec(time, cycleUnit, TimeZone.getDefault()); + } + + public static long timeStrConvertTomillSec(String time, String cycleUnit, TimeZone timeZone) + throws ParseException { + long retTime = 0; + // SimpleDateFormat df=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + SimpleDateFormat df = null; + if (cycleUnit.equals("Y") && time.length() == 4) { + df = new SimpleDateFormat("yyyy"); + } else if (cycleUnit.equals("M") && time.length() == 6) { + df = new SimpleDateFormat("yyyyMM"); + } else if (cycleUnit.equals("D") && time.length() == 8) { + df = new SimpleDateFormat("yyyyMMdd"); + } else if (cycleUnit.equalsIgnoreCase("h") && time.length() == 10) { + df = new SimpleDateFormat("yyyyMMddHH"); + } else if (cycleUnit.contains("m") && time.length() == 12) { + df = new SimpleDateFormat("yyyyMMddHHmm"); + } else { + logger.error("time {},cycleUnit {} can't parse!", time, cycleUnit); + throw new ParseException(time, 0); + } + try { + df.setTimeZone(timeZone); + retTime = df.parse(time).getTime(); + if (cycleUnit.equals("10m")) { + + } + } catch (ParseException e) { + logger.error("convert time string error. ", e); + } + return retTime; + } + + public static boolean isBraceContain(String dataName) { + Matcher matcher = bracePatt.matcher(dataName); + return matcher.find(); + } + + public static String getDateTime(String fileName, String dataName, + PathDateExpression dateExpression) { + String dataTime = null; + + if (isBraceContain(dataName)) { + String fullRegx = replaceDateExpressionWithRegex(dataName, "dataTime"); + Pattern fullPatt = Pattern.compile(fullRegx); + Matcher matcher = fullPatt.matcher(fileName); + if (matcher.find()) { + dataTime = matcher.group("dataTime"); + } + } else { + dataTime = getDateTime(fileName, dateExpression); + } + + return dataTime; + + } + + public static String getDateTime(String fileName, PathDateExpression dateExpression) { + if (fileName == null || dateExpression == null + || dateExpression.getLongestDatePattern() == null) { + return null; + } + + String longestDatePattern = NewDateUtils + .replaceDateExpressionWithRegex(dateExpression.getLongestDatePattern()); + NonRegexPatternPosition patternPosition = dateExpression.getPatternPosition(); + + Matcher mat = Pattern.compile(longestDatePattern).matcher(fileName); + boolean find = mat.find(); + // TODO : 存在文件名中有多个部分匹配到时间表达式的情况("/data/joox_logs/2000701106/201602170040.log" YYYYMMDDhh) + if (!find) { + logger.error("Can't find the pattern {} for file name {}", longestDatePattern, + fileName); + return null; + } + + String dateTime = fileName.substring(mat.start(), mat.end()); + if (patternPosition == NonRegexPatternPosition.PREFIX) { + dateTime = dateTime.substring(1, dateTime.length()); + } else if (patternPosition == NonRegexPatternPosition.SUFFIX) { + dateTime = dateTime.substring(0, dateTime.length() - 1); + } else if (patternPosition == NonRegexPatternPosition.BOTH) { + dateTime = dateTime.substring(1, dateTime.length() - 1); + } else if (patternPosition == NonRegexPatternPosition.END) { + dateTime = dateTime.substring(0, dateTime.length()); + } else if (patternPosition == NonRegexPatternPosition.ENDSUFFIX) { + dateTime = dateTime.substring(1, dateTime.length()); + } else if (patternPosition == NonRegexPatternPosition.NONE) { + logger.error("The data path configuration is invalid"); + dateTime = null; + } + + return dateTime; + } + + public static ArrayList extractAllTimeRegex(String src) { + // TODO : time regex error + Matcher m = pattern.matcher(src); + ArrayList arr = new ArrayList(); + while (m.find()) { + String oneMatch = m.group(0); + arr.add(new MatchPoint(oneMatch, m.start(), m.end())); + } + return arr; + } + + public static String replaceDateExpressionWithRegex(String dataPath) { + if (dataPath == null) { + return null; + } + StringBuffer sb = new StringBuffer(); + + // find longest DATEPATTERN + ArrayList mp = extractAllTimeRegex(dataPath); + + if (mp == null || mp.size() == 0) { + return dataPath; + } + + int lastIndex = 0; + for (MatchPoint m : mp) { + sb.append(dataPath.substring(lastIndex, m.getStart())); + + String longestPattern = m.getStr(); + int hhIndex = longestPattern.indexOf("hh"); + int mmIndex = longestPattern.indexOf("mm"); + longestPattern = longestPattern.replace("YYYY", "\\d{4}"); + longestPattern = longestPattern.replace("MM", "\\d{2}"); + longestPattern = longestPattern.replace("DD", "\\d{2}"); + longestPattern = longestPattern.replace("hh", "\\d{2}"); + + if (hhIndex != -1 && mmIndex != -1 + && mmIndex >= hhIndex + 2 && mmIndex < hhIndex + 4) { + longestPattern = longestPattern.replace("mm", "\\d{2}"); + } + sb.append(longestPattern); + lastIndex = m.getEnd(); + } + + sb.append(dataPath.substring(lastIndex)); + + return sb.toString(); + } + + public static String replaceDateExpressionWithRegex(String dataPath, String dateTimeGroupName) { + if (dataPath == null) { + return null; + } + + // \\d{4}\\d{2}\\d{2}\\d{2} --> (?\\d{4}\\d{2}\\d{2}\\d{2}) + if (isBraceContain(dataPath)) { + StringBuilder sb = new StringBuilder(); + sb.append(dataPath.substring(0, dataPath.indexOf('{'))); + sb.append("(?<").append(dateTimeGroupName).append('>'); + sb.append(dataPath.substring(dataPath.indexOf('{') + 1, dataPath.indexOf('}'))); + sb.append(')').append(dataPath.substring(dataPath.indexOf('}') + 1)); + dataPath = sb.toString(); + } + + StringBuffer sb = new StringBuffer(); + + // find longest DATEPATTERN + ArrayList mp = extractAllTimeRegex(dataPath); + + if (mp == null || mp.size() == 0) { + return dataPath; + } + + int lastIndex = 0; + for (int i = 0; i < mp.size(); i++) { + MatchPoint m = mp.get(i); + sb.append(dataPath.substring(lastIndex, m.getStart())); + + String longestPattern = m.getStr(); + int hhIndex = longestPattern.indexOf("hh"); + int mmIndex = longestPattern.indexOf("mm"); + longestPattern = longestPattern.replace("YYYY", "\\d{4}"); + longestPattern = longestPattern.replace("MM", "\\d{2}"); + longestPattern = longestPattern.replace("DD", "\\d{2}"); + longestPattern = longestPattern.replace("hh", "\\d{2}"); + + if (hhIndex != -1 && mmIndex != -1 + && mmIndex >= hhIndex + 2 && mmIndex < hhIndex + 4) { + longestPattern = longestPattern.replace("mm", "\\d{2}"); + } + + sb.append(longestPattern); + lastIndex = m.getEnd(); + } + + sb.append(dataPath.substring(lastIndex)); + + return sb.toString(); + } + + public static String replaceDateExpression(Calendar dateTime, + String dataPath) { + if (dataPath == null) { + return null; + } + + String year = String.valueOf(dateTime.get(Calendar.YEAR)); + String month = String.valueOf(dateTime.get(Calendar.MONTH) + 1); + String day = String.valueOf(dateTime.get(Calendar.DAY_OF_MONTH)); + String hour = String.valueOf(dateTime.get(Calendar.HOUR_OF_DAY)); + String minute = String.valueOf(dateTime.get(Calendar.MINUTE)); + + StringBuffer sb = new StringBuffer(); + + // find longest DATEPATTERN + ArrayList mp = extractAllTimeRegex(dataPath); + + if (mp == null || mp.size() == 0) { + return dataPath; + } + + int lastIndex = 0; + for (MatchPoint m : mp) { + sb.append(dataPath.substring(lastIndex, m.getStart())); + + String longestPattern = m.getStr(); + int hhIndex = longestPattern.indexOf("hh"); + int mmIndex = longestPattern.indexOf("mm"); + + longestPattern = longestPattern.replaceAll("YYYY", year); + longestPattern = longestPattern.replaceAll("MM", externDate(month)); + longestPattern = longestPattern.replaceAll("DD", externDate(day)); + longestPattern = longestPattern.replaceAll("hh", externDate(hour)); + + if (hhIndex != -1 && mmIndex != -1 && mmIndex >= hhIndex + 2 + && mmIndex < hhIndex + 4) { + longestPattern = longestPattern.replaceAll("mm", externDate(minute)); + } + + sb.append(longestPattern); + lastIndex = m.getEnd(); + } + + sb.append(dataPath.substring(lastIndex)); + + return sb.toString(); + } + + public static String replaceDateExpression1(Calendar dateTime, + String logFileName) { + if (dateTime == null || logFileName == null) { + return null; + } + + String year = String.valueOf(dateTime.get(Calendar.YEAR)); + String month = String.valueOf(dateTime.get(Calendar.MONTH) + 1); + String day = String.valueOf(dateTime.get(Calendar.DAY_OF_MONTH)); + String hour = String.valueOf(dateTime.get(Calendar.HOUR_OF_DAY)); + String minute = String.valueOf(dateTime.get(Calendar.MINUTE)); + + int hhIndex = logFileName.indexOf("hh"); + int mmIndex = logFileName.indexOf("mm"); + + logFileName = logFileName.replaceAll("YYYY", year); + logFileName = logFileName.replaceAll("MM", externDate(month)); + logFileName = logFileName.replaceAll("DD", externDate(day)); + logFileName = logFileName.replaceAll("hh", externDate(hour)); + + if (hhIndex != -1 && mmIndex != -1 && mmIndex >= hhIndex + 2 + && mmIndex < hhIndex + 4) { + logFileName = logFileName.replaceAll("mm", externDate(minute)); + } + + return logFileName; + } + + private static String externDate(String time) { + if (time.length() == 1) { + return "0" + time; + } else { + return time; + } + } + + public static String parseCycleUnit(String scheduleTime) { + String cycleUnit = "D"; + + StringTokenizer st = new StringTokenizer(scheduleTime, " "); + + if (st.countTokens() <= 0) { + return "D"; + } + + int index = 0; + while (st.hasMoreElements()) { + String currentString = st.nextToken(); + if (currentString.contains("/")) { + if (index == 1) { + cycleUnit = "10m"; + } else if (index == 2) { + cycleUnit = "h"; + } + break; + } + + if (currentString.equals("*")) { + if (index == 3) { + cycleUnit = "D"; + } + break; + } + + index++; + } + + logger.info("ScheduleTime: " + scheduleTime + ", cycleUnit: " + + cycleUnit); + + return cycleUnit; + } + + // start: 20120810 + // end: 20120817 + // timeval: YYYYMMDDhh + public static List getDateRegion(String start, String end, + String cycleUnit) { + // TODO : timeval verify + + List ret = new ArrayList(); + long startTime; + long endTime; + try { + startTime = NewDateUtils.timeStrConvertTomillSec(start, cycleUnit); + endTime = NewDateUtils.timeStrConvertTomillSec(end, cycleUnit); + } catch (ParseException e) { + logger.error("date format is error: ", e); + return ret; + } + DateTime dtStart = DateTime.forInstant(startTime, TimeZone.getDefault()); + DateTime dtEnd = DateTime.forInstant(endTime, TimeZone.getDefault()); + + if (cycleUnit.equals("M")) { + dtEnd = dtEnd.getEndOfMonth(); + } else if (cycleUnit.equals("D")) { + dtEnd = dtEnd.getEndOfDay(); + } + + int year = 0; + int month = 0; + int day = 0; + int hour = 0; + int minute = 0; + int second = 0; + if (cycleUnit.equalsIgnoreCase("Y")) { + year = 1; + } else if (cycleUnit.equals("M")) { + month = 1; + } else if (cycleUnit.equalsIgnoreCase("D")) { + day = 1; + } else if (cycleUnit.equalsIgnoreCase("h")) { + hour = 1; + } else if (cycleUnit.equals("10m")) { + minute = 10; + } else if (cycleUnit.equals("15m")) { + minute = 15; + } else if (cycleUnit.equals("30m")) { + minute = 30; + } else if (cycleUnit.equalsIgnoreCase("s")) { + second = 1; + } else { + logger.error("cycelUnit {} is error: ", cycleUnit); + return ret; + } + while (dtStart.lteq(dtEnd)) { + ret.add(dtStart.getMilliseconds(TimeZone.getDefault())); + dtStart = dtStart.plus(year, month, day, hour, minute, second, 0, + DateTime.DayOverflow.LastDay); + } + + return ret; + } + + public static void main(String[] args) throws Exception { + + // String aa = "/data/taox/YYYYMMDDt_log/[0-9]+_YYYYMMDD_hh00.log"; + /* + * String aa = "/data/taox/YYYYt_logMMDD/[0-9]+_YYYYMMDD_hh00.log"; String bb = + * replaceDateExpressionWithRegex(aa); System.out.println("---------: " + bb); + * + * String cc = replaceDateExpression(Calendar.getInstance(), aa); System.out.println("---------: " + cc); + * + * String dd = replaceDateExpression1(Calendar.getInstance(), aa); System.out.println("---------: " + dd); + */ + + // String text = "/data1/qq_BaseInfo/YYYY-MM/YYYY-MM-DD/gamedr.gamedb[0-9]+.minigame + // .db/YYYY-MM-DD-[0-9]+.txt"; + // System.out.println(replaceDateExpressionWithRegex(text)); + // + // int timeInterval = 1000; + // String timeOffset = "10H"; + // + // String offsetUnit = timeOffset.substring(timeOffset.length() - 1); + // int startIndex = timeOffset.charAt(0) == '-' ? 1 : 0; + // int offsetTime = Integer.parseInt(timeOffset.substring(startIndex, timeOffset.length() + // - 1)); + // if("d".equalsIgnoreCase(offsetUnit)){ + // timeInterval += offsetTime * 24 * 3600 * 1000; + // }else if("h".equalsIgnoreCase(offsetUnit)){ + // timeInterval += offsetTime * 3600 * 1000; + // }else if("m".equalsIgnoreCase(offsetUnit)){ + // timeInterval += offsetTime * 60 * 1000; + // } + // + // System.out.println(timeInterval); + // + // SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMdd HH:mm:ss"); + // + // Calendar calendar = NewDateUtils.getCurDate("D", "-10D"); + // System.out.println("year: " + calendar.get(Calendar.YEAR) + ", month: " + // + (calendar.get(Calendar.MONTH) + 1) + ", day: " + // + calendar.get(Calendar.DAY_OF_MONTH) + ", hour: " + // + calendar.get(Calendar.HOUR_OF_DAY) + ", minute: " + // + calendar.get(Calendar.MINUTE) + ", second: " + // + calendar.get(Calendar.SECOND)); + // System.out.println(dateFormat.format(calendar.getTimeInMillis())); + // + // calendar = getCurDate("H", "-2H"); + // System.out.println("year: " + calendar.get(Calendar.YEAR) + ", month: " + // + (calendar.get(Calendar.MONTH) + 1) + ", day: " + // + calendar.get(Calendar.DAY_OF_MONTH) + ", hour: " + // + calendar.get(Calendar.HOUR_OF_DAY) + ", minute: " + // + calendar.get(Calendar.MINUTE) + ", second: " + // + calendar.get(Calendar.SECOND)); + // System.out.println(dateFormat.format(calendar.getTimeInMillis())); + // + // calendar = getCurDate("H", "-2D"); + // System.out.println("year: " + calendar.get(Calendar.YEAR) + ", month: " + // + (calendar.get(Calendar.MONTH) + 1) + ", day: " + // + calendar.get(Calendar.DAY_OF_MONTH) + ", hour: " + // + calendar.get(Calendar.HOUR_OF_DAY) + ", minute: " + // + calendar.get(Calendar.MINUTE) + ", second: " + // + calendar.get(Calendar.SECOND)); + // System.out.println(dateFormat.format(calendar.getTimeInMillis())); + // + // calendar = getCurDate("5m", "-20m"); + // System.out.println("year: " + calendar.get(Calendar.YEAR) + ", month: " + // + (calendar.get(Calendar.MONTH) + 1) + ", day: " + // + calendar.get(Calendar.DAY_OF_MONTH) + ", hour: " + // + calendar.get(Calendar.HOUR_OF_DAY) + ", minute: " + // + calendar.get(Calendar.MINUTE) + ", second: " + // + calendar.get(Calendar.SECOND)); + // System.out.println(dateFormat.format(calendar.getTimeInMillis())); + // + // String directory = "/data/home/user00/xyshome/logsvr/log/YYYYMMDD/[0-9]+_YYYYMMDD_hh00 + // .log"; + // calendar = getCurDate("H", "-3H"); + // System.out.println(replaceDateExpression(calendar, directory)); + // + // System.out.println(NewDateUtils.timeStrConvertTomillSec("201404031105", + // "m")); + // System.out.println(NewDateUtils.timeStrConvertTomillSec("2014040223", + // "H")); + // System.out.println(NewDateUtils + // .timeStrConvertTomillSec("20140402", "D")); + // + // System.out.println(NewDateUtils.millSecConvertToTimeStr( + // System.currentTimeMillis(), "Y")); + // System.out.println(NewDateUtils.millSecConvertToTimeStr( + // System.currentTimeMillis(), "M")); + // System.out.println(NewDateUtils.millSecConvertToTimeStr( + // System.currentTimeMillis(), "D")); + // System.out.println(NewDateUtils.millSecConvertToTimeStr( + // System.currentTimeMillis(), "H")); + // System.out.println(NewDateUtils.millSecConvertToTimeStr( + // System.currentTimeMillis(), "10m")); + // System.out.println(NewDateUtils.millSecConvertToTimeStr( + // System.currentTimeMillis(), "15m")); + // System.out.println(NewDateUtils.millSecConvertToTimeStr( + // System.currentTimeMillis(), "30m")); + // System.out.println(NewDateUtils.millSecConvertToTimeStr( + // NewDateUtils.timeStrConvertTomillSec("201404121900", "10m"), + // "10m")); + // + // NewDateUtils.getDateRegion("20120810", "20120813", "D"); + // NewDateUtils.getDateRegion("2012081005", "2012081300", "H"); + // NewDateUtils.getDateRegion("201404111649", "201404111600", "10m"); + // String dataTime = "20160122"; + // System.out.println(NewDateUtils.getShouldStartTime(dataTime, "D", "-2h")); + + // String dataPath = "/data/herococo/YYYYMMDD_*/YYYYMMDDhhmm.log"; + // dataPath = NewDateUtils.replaceDateExpressionWithRegex(dataPath); + // System.out.println("dataPath: " + dataPath); + // + // Pattern pattern = Pattern.compile(dataPath, Pattern.CASE_INSENSITIVE + // | Pattern.DOTALL | Pattern.MULTILINE); + // // Pattern pattern = Pattern.compile("/data/herococo/\\d+/\\d+.log", + // // Pattern.CASE_INSENSITIVE | Pattern.DOTALL | Pattern.MULTILINE); + // Matcher m = pattern + // .matcher("/data/herococo/20140406_a/20140406152730.log"); + // System.out.println(m.matches()); + // + // dataPath = "/data/home/user00/xyshome/logsvr/log/YYYYMMDD/[0-9]+_YYYYMMDD_hh00.log"; + // dataPath = NewDateUtils.replaceDateExpressionWithRegex(dataPath); + // pattern = Pattern.compile(dataPath, Pattern.CASE_INSENSITIVE + // | Pattern.DOTALL | Pattern.MULTILINE); + // m = pattern + // .matcher("/data/home/user00/xyshome/logsvr/log/20140406/8_20140406_1600.log"); + // System.out.println(dataPath); + // System.out.println(m.matches()); + // + // dataPath = "/data/work/data2/abc/YYYYMMDDhh.*.txt"; + // dataPath = NewDateUtils.replaceDateExpressionWithRegex(dataPath); + // pattern = Pattern.compile(dataPath, Pattern.CASE_INSENSITIVE + // | Pattern.DOTALL | Pattern.MULTILINE); + // m = pattern.matcher("/data/work/data2/abc/201404102242.txt"); + // System.out.println(dataPath); + // System.out.println(m.matches()); + // + // List retTimeList = NewDateUtils.getDateRegion("20140411", + // "20140411", "D"); + // for (Long time : retTimeList) { + // System.out.println(NewDateUtils.millSecConvertToTimeStr(time, "D")); + // } + // + // pattern = Pattern + // .compile( + // "/data/home/tlog/logplat/log/tlogd_1/[0-9]+_\\d{4}\\d{2}\\d{2}_\\d{2}00 + // .log", + // Pattern.CASE_INSENSITIVE | Pattern.DOTALL + // | Pattern.MULTILINE); + // m = pattern + // .matcher("/data/home/tlog/logplat/log/tlogd_1/65535_20140506_1600.log.1"); + // System.out.println(m.matches()); + // System.out.println(m.lookingAt()); + // + // String unit = "h"; + // if (StringUtils.endsWithIgnoreCase("h", "H")) { + // System.out.println("yes"); + // } + // + // System.out.println(NewDateUtils.getDateTime("20160106", "D", "-4h")); + + // PathDateExpression dateExpression = DateUtils + // .extractLongestTimeRegexWithPrefixOrSuffix + // ("/data/log/qqtalk/[0-9]+_[0-9]+_id20522_[0-9]+_YYYYMMDD_hh.log"); + // System.out.println(dateExpression.getLongestDatePattern()); + // String fileTime = getDateTime("/data/log/qqtalk/3900626911_11217_id20522_17_20160420 + // .log", dateExpression); + // System.out.println(fileTime); + + // String dataTime = "20180411"; + // + // String shouldStart = getShouldStartTime(dataTime, "D", "4h"); + // System.out.println(shouldStart); + // + // String fileName = "rc_trade_water[0-9]*.YYYY-MM-DD-hh.[0-9]+"; + // + // String newFileName = "rc_trade_water.2016-11-20-12.9"; + // + // /** + // * 打印出文件名中 最长的时间表达式 + // */ + // PathDateExpression dateExpression = DateUtils.extractLongestTimeRegexWithPrefixOrSuffix + // (fileName); + // System.out.println(dateExpression.getLongestDatePattern()); + // + // /** + // * 检查正则表达式是否能匹配到文件 + // */ + // Pattern pattern = Pattern.compile(replaceDateExpressionWithRegex(fileName), + // Pattern.CASE_INSENSITIVE | Pattern.DOTALL | Pattern.MULTILINE); + // + // Matcher matcher = pattern.matcher(newFileName); + // + // if (matcher.matches() || matcher.lookingAt()) { + // System.out.println("Matched File"); + // } + // + // /** + // * 打印文件名的时间 + // */ + // String fileTime = getDateTime(newFileName, dateExpression); + // System.out.println(fileTime); + // + // + // String fileName1 = "/data/joox_logs/2000701106/201602170040.log"; + // String filePathRegx = "/data/joox_logs/2000701106/{YYYYMMDDhh}40.log"; + // String fullRegx = replaceDateExpressionWithRegex(filePathRegx, "dateTimeGN"); + // System.out.println(fullRegx); + // Pattern fullPattern = Pattern.compile(fullRegx); + // Matcher fullMatcher = fullPattern.matcher(fileName1); + // while (fullMatcher.find()) { + // System.out.println(fullMatcher.group("dateTimeGN")); + // } + + System.out.println( + timeStrConvertTomillSec("2018111209", "h", TimeZone.getTimeZone("GMT+8:00"))); + } +} diff --git a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/NonRegexPatternPosition.java b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/NonRegexPatternPosition.java new file mode 100644 index 00000000000..0732d184e0d --- /dev/null +++ b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/NonRegexPatternPosition.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.agent.plugin.utils.file; + +/* + * Describe the nearest character around the date time expression. For example, for date source name + * work/[0-9]+_YYYYMMDD_[0-9]+.log, the nearest character around YYYYMMDD is _, and this source name + * has _ before and after it. For this data source name, the NonRegexPatternPosition should be BOTH. + */ +public enum NonRegexPatternPosition { + NONE, /* Before and after the time expression are all regex pattern, it's invalid. */ + + PREFIX, /* Before the time expression is not a regex pattern. */ + + SUFFIX, /* After the time expression is not a regex pattern. */ + + END, /* + * After the time expression is not a regex pattern and he is end etc:/data/work/[0-9]+YYYYMMDD + */ + + ENDSUFFIX, /* + * After the time expression is not a regex pattern and he is end etc:/data/work/YYYYMMDD + */ + + BOTH; /* Both before and after the time expression are not regex pattern. */ +} diff --git a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/PathDateExpression.java b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/PathDateExpression.java new file mode 100644 index 00000000000..c75e0398b8e --- /dev/null +++ b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/utils/file/PathDateExpression.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.agent.plugin.utils.file; + +/* The date expression in the file path. */ +public class PathDateExpression { + + /* The longest date time expression in the file path, used to represent the task cycle unit. */ + private final String longestDatePattern; + /* Represent whether there exists regex pattern before and after the longest date pattern. */ + private final NonRegexPatternPosition patternPosition; + + public PathDateExpression(String longestDatePattern, NonRegexPatternPosition patternPosition) { + this.longestDatePattern = longestDatePattern; + this.patternPosition = patternPosition; + } + + public String getLongestDatePattern() { + return longestDatePattern; + } + + public NonRegexPatternPosition getPatternPosition() { + return patternPosition; + } +}