Skip to content

Commit

Permalink
HADOOP-18257. S3AAuditLogMergerAndParser changes
Browse files Browse the repository at this point in the history
* dest is a path to the avro file to create containing all
  of the files parsed.
* tests updated with changed API

Change-Id: I810b165a4c6d2354bc91523594c7a24a4c031aee

HADOOP-18257. audit log: ParsedAuditLogEntry

Change-Id: Iba0d57357cc04aad571f9f0f4d69ee7f1dd11712

HADOOP-18257. audit log

* moved avro source
* WiP of a serializable/writable object for easy use in MR, spark

Change-Id: I56bbff0ad2d71b64984f4cc59e1f2c83ea25597a

HADOOP-18257. spotbugs

Change-Id: Ia0ab414bcca8a898eca95fe9ddd89b5bfcef3028

HADOOP-18257 audit log parser

* tool is invoked through hadoop s3guard command
* which can now also be invoked as "hadoop s3a"!
* tests are improved.
* OperationDuration implements DurationTracker for bit more completeness

TODO
* split out the record parsing into a hadoop MR record read/write
* so support large scale parsing
* cli tool just glues that together either for small parallelised extraction
  or for aggregation to one file.
* but a bulk job would work with a larger dataset

Change-Id: I25e333592d1058b460b0bfda5313a20de13c2e35

HADOOP-18257 audit log parser

Initial round of changes
* only log at debug during parsing
* cli to take <path to source> <path of output file>
  where the output is a filename not a dir.

Change-Id: Ibdba37e42a36b1933979d2a1e7265d07069ea0bd

HADOOP-18257. Merging and Parsing S3A audit logs into Avro format for analysis.
  • Loading branch information
Mehakmeet Singh authored and steveloughran committed Nov 18, 2024
1 parent 317db31 commit fb2a10e
Show file tree
Hide file tree
Showing 17 changed files with 1,808 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ public String toString() {

@Override
public void close() {
finished();
super.close();
if (logAtInfo) {
log.info("{}", this);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.statistics.DurationTracker;

/**
* Little duration counter.
*/
@InterfaceAudience.Public
@InterfaceStability.Unstable
public class OperationDuration {
public class OperationDuration implements DurationTracker {

/**
* Time in millis when the operation started.
Expand Down Expand Up @@ -65,6 +66,16 @@ public void finished() {
finished = time();
}

@Override
public void failed() {
finished();
}

@Override
public void close() {
finished();
}

/**
* Return the duration as {@link #humanTime(long)}.
* @return a printable duration.
Expand Down
5 changes: 5 additions & 0 deletions hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,9 @@
<Method name="submit"/>
<Bug pattern="RV_RETURN_VALUE_IGNORED_BAD_PRACTICE"/>
</Match>

<Match>
<Class name="org.apache.hadoop.fs.s3a.audit.AvroS3LogEntryRecord"/>
<Bug pattern="NP_NULL_INSTANCEOF"/>
</Match>
</FindBugsFilter>
20 changes: 20 additions & 0 deletions hadoop-tools/hadoop-aws/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,7 @@
<exclusion>org.apache.hadoop.fs.s3a.commit.impl.*</exclusion>
<exclusion>org.apache.hadoop.fs.s3a.commit.magic.*</exclusion>
<exclusion>org.apache.hadoop.fs.s3a.commit.staging.*</exclusion>
<exclusion>org.apache.hadoop.fs.s3a.audit.mapreduce.*</exclusion>
</exclusions>
<bannedImports>
<bannedImport>org.apache.hadoop.mapreduce.**</bannedImport>
Expand All @@ -481,6 +482,25 @@
</execution>
</executions>
</plugin>

<!-- create the avro records for the audit log parser -->
<plugin>
<groupId>org.apache.avro</groupId>
<artifactId>avro-maven-plugin</artifactId>
<executions>
<execution>
<id>generate-avro-sources</id>
<phase>generate-sources</phase>
<goals>
<goal>schema</goal>
</goals>
</execution>
</executions>
<configuration>
<sourceDirectory>src/main/avro</sourceDirectory>
<outputDirectory>${project.build.directory}/generated-sources/avro</outputDirectory>
</configuration>
</plugin>
</plugins>
</build>

Expand Down
48 changes: 48 additions & 0 deletions hadoop-tools/hadoop-aws/src/main/avro/AvroDataSchema.avsc
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

{
"type" : "record", "name" : "AvroS3LogEntryRecord",
"namespace" : "org.apache.hadoop.fs.s3a.audit",
"fields" : [
{ "name" : "owner", "type" : "string" },
{ "name" : "bucket", "type" : "string" },
{ "name" : "timestamp", "type" : "string" },
{ "name" : "remoteip", "type" : "string" },
{ "name" : "requester", "type" : "string" },
{ "name" : "requestid", "type" : "string" },
{ "name" : "verb", "type" : "string" },
{ "name" : "key", "type" : "string" },
{ "name" : "requesturi", "type" : "string" },
{ "name" : "http", "type" : "string" },
{ "name" : "awserrorcode", "type" : "string" },
{ "name" : "bytessent", "type" : ["long", "null"] },
{ "name" : "objectsize", "type" : ["long", "null"] },
{ "name" : "totaltime", "type" : ["long", "null"] },
{ "name" : "turnaroundtime" , "type" : ["long", "null"] },
{ "name" : "referrer", "type" : "string" },
{ "name" : "useragent", "type" : "string" },
{ "name" : "version", "type" : "string" },
{ "name" : "hostid", "type" : "string" },
{ "name" : "sigv", "type" : "string" },
{ "name" : "cypher", "type" : "string" },
{ "name" : "auth", "type" : "string" },
{ "name" : "endpoint", "type" : "string" },
{ "name" : "tls", "type" : "string" },
{ "name" : "tail", "type" : "string" },
{ "name" : "referrerMap", "type" : {"type": "map", "values": "string"} }
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs.s3a.audit;

import java.io.IOException;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.audit.mapreduce.S3AAuditLogMergerAndParser;
import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool;
import org.apache.hadoop.util.ExitUtil;

import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_COMMAND_ARGUMENT_ERROR;
import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_FAIL;
import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_SUCCESS;

/**
* AuditTool is a Command Line Interface.
* Its functionality is to parse the audit log files
* and generate avro file.
*/
public class AuditTool extends S3GuardTool {

private static final Logger LOG = LoggerFactory.getLogger(AuditTool.class);

/**
* Name of audit tool: {@value}.
*/
public static final String AUDIT = "audit";


/**
* Name of this tool: {@value}.
*/
public static final String AUDIT_TOOL =
"org.apache.hadoop.fs.s3a.audit.AuditTool";

/**
* Purpose of this tool: {@value}.
*/
public static final String PURPOSE =
"\n\nUSAGE:\nMerge and parse audit log files and convert into avro files "
+ "for "
+ "better "
+ "visualization";

// Exit codes
private static final int SUCCESS = EXIT_SUCCESS;

private static final int FAILURE = EXIT_FAIL;

private static final int INVALID_ARGUMENT = EXIT_COMMAND_ARGUMENT_ERROR;

private static final int SAMPLE = 500;

private static final String USAGE =
"hadoop " + AUDIT_TOOL +
" <path of source files>" +
" <path of output file>"
+ "\n";

private PrintStream out;

public AuditTool(final Configuration conf) {
super(conf);
}

/**
* Tells us the usage of the AuditTool by commands.
* @return the string USAGE
*/
public String getUsage() {
return USAGE + PURPOSE;
}

public String getName() {
return AUDIT_TOOL;
}

/**
* This run method in AuditTool takes source and destination path of bucket,
* and checks if there are directories and pass these paths to merge and
* parse audit log files.
* @param args argument list
* @param stream output stream
* @return SUCCESS i.e, '0', which is an exit code
* @throws Exception on any failure.
*/
@Override
public int run(final String[] args, final PrintStream stream)
throws ExitUtil.ExitException, Exception {

this.out = stream;

preConditionArgsSizeCheck(args);
List<String> paths = Arrays.asList(args);

// Path of audit log files
Path logsPath = new Path(paths.get(0));

// Path of destination file
Path destPath = new Path(paths.get(1));

final S3AAuditLogMergerAndParser auditLogMergerAndParser =
new S3AAuditLogMergerAndParser(getConf(), SAMPLE);

// Calls S3AAuditLogMergerAndParser for implementing merging, passing of
// audit log files and converting into avro file
boolean mergeAndParseResult =
auditLogMergerAndParser.mergeAndParseAuditLogFiles(
logsPath, destPath);
if (!mergeAndParseResult) {
return FAILURE;
}

return SUCCESS;
}

private void preConditionArgsSizeCheck(String[] args) {
if (args.length != 2) {
errorln(getUsage());
throw invalidArgs("Invalid number of arguments");
}
}


/**
* Flush all active output channels, including {@code System.err},
* so as to stay in sync with any JRE log messages.
*/
private void flush() {
if (out != null) {
out.flush();
} else {
System.out.flush();
}
System.err.flush();
}


public void closeOutput() throws IOException {
flush();
if (out != null) {
out.close();
}
}

}
Loading

0 comments on commit fb2a10e

Please sign in to comment.