From a2c94303c66f2895f8075e3a9a1e81892834039b Mon Sep 17 00:00:00 2001 From: Anton_Achkasov Date: Fri, 22 Jul 2016 18:31:43 +0300 Subject: [PATCH] Generation X and printing Y sorted integers into HDFS via YARN application to make it work pay attention to https://github.com/sequenceiq/hadoop-docker/issues/56 issue description also minor pom-files fixes --- Homework1/pom.xml | 1 - Homework2/pom.xml | 1 - Homework3/pom.xml | 1 - Homework4/pom.xml | 1 - .../hello/container/ContainerApplication.java | 5 +- .../container/DigitsGeneratorSorter.java | 80 +++++++++++++++++++ .../main/java/hello/container/HelloPojo.java | 50 ------------ Homework5/gs-yarn-basic-dist/execute-job.sh | 12 --- Homework5/gs-yarn-basic-dist/pom.xml | 9 ++- Homework5/gs-yarn-basic-dist/yarn-site.xml | 44 ++++++++++ Homework5/pom.xml | 1 + 11 files changed, 135 insertions(+), 70 deletions(-) create mode 100644 Homework5/gs-yarn-basic-container/src/main/java/hello/container/DigitsGeneratorSorter.java delete mode 100644 Homework5/gs-yarn-basic-container/src/main/java/hello/container/HelloPojo.java create mode 100644 Homework5/gs-yarn-basic-dist/yarn-site.xml diff --git a/Homework1/pom.xml b/Homework1/pom.xml index cb5f3d2..ba2b298 100644 --- a/Homework1/pom.xml +++ b/Homework1/pom.xml @@ -140,7 +140,6 @@ timeout="120000" usepty="true" /> - diff --git a/Homework2/pom.xml b/Homework2/pom.xml index 614dc2f..35e6fe5 100644 --- a/Homework2/pom.xml +++ b/Homework2/pom.xml @@ -182,7 +182,6 @@ timeout="120000" usepty="true" /> - diff --git a/Homework3/pom.xml b/Homework3/pom.xml index 8bbf357..8572d7d 100644 --- a/Homework3/pom.xml +++ b/Homework3/pom.xml @@ -195,7 +195,6 @@ timeout="120000" usepty="true" /> - diff --git a/Homework4/pom.xml b/Homework4/pom.xml index 6c743ee..e02ab88 100644 --- a/Homework4/pom.xml +++ b/Homework4/pom.xml @@ -199,7 +199,6 @@ timeout="120000" usepty="true" /> - diff --git a/Homework5/gs-yarn-basic-container/src/main/java/hello/container/ContainerApplication.java b/Homework5/gs-yarn-basic-container/src/main/java/hello/container/ContainerApplication.java index 1f44981..bad1695 100644 --- a/Homework5/gs-yarn-basic-container/src/main/java/hello/container/ContainerApplication.java +++ b/Homework5/gs-yarn-basic-container/src/main/java/hello/container/ContainerApplication.java @@ -14,8 +14,9 @@ public static void main(String[] args) { } @Bean - public HelloPojo helloPojo() { - return new HelloPojo(); + public DigitsGeneratorSorter createDigitsGeneratorSorter() { +// TODO YarnContainerBuilder + return new DigitsGeneratorSorter(); } } diff --git a/Homework5/gs-yarn-basic-container/src/main/java/hello/container/DigitsGeneratorSorter.java b/Homework5/gs-yarn-basic-container/src/main/java/hello/container/DigitsGeneratorSorter.java new file mode 100644 index 0000000..a611342 --- /dev/null +++ b/Homework5/gs-yarn-basic-container/src/main/java/hello/container/DigitsGeneratorSorter.java @@ -0,0 +1,80 @@ +package hello.container; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.yarn.annotation.OnContainerStart; +import org.springframework.yarn.annotation.YarnComponent; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.*; + +@YarnComponent +public class DigitsGeneratorSorter { + + private static final int NUMBER_OF_GENERATED_RANDOM_DIGITS = 100000; + private static final int NUMBER_OF_DIGITS_WRITE_TO_HDFS = 100; + private static final Log LOGGER = LogFactory.getLog(DigitsGeneratorSorter.class); + private static final String OUTPUT_FILE_NAME = "result"; + + @Autowired + private Configuration configuration; + + @OnContainerStart + public void onContainerStart() throws Exception { + List integerList = createAndSortDigits(NUMBER_OF_GENERATED_RANDOM_DIGITS); + writeRecordsToHDFS(integerList, NUMBER_OF_DIGITS_WRITE_TO_HDFS); + } + + private List createAndSortDigits(int numberOfDigits) { + LOGGER.info("Creating list of " + NUMBER_OF_GENERATED_RANDOM_DIGITS + " sorted digits ..."); + Random random = new Random(); + List list = new ArrayList(); + for (int i = 0; i < numberOfDigits; i++) { + list.add(Math.abs(random.nextInt())); + } + + LOGGER.info("Sorting created list of " + NUMBER_OF_GENERATED_RANDOM_DIGITS + " ..."); + Collections.sort(list); + LOGGER.info("Sorting created list of " + NUMBER_OF_GENERATED_RANDOM_DIGITS + " complete"); + + LOGGER.info("Creation list of " + NUMBER_OF_GENERATED_RANDOM_DIGITS + " sorted digits complete"); + return list; + } + + private void writeRecordsToHDFS(List integerList, int numberOfIntegerToWrite) throws URISyntaxException, IOException { + LOGGER.info("Writing " + numberOfIntegerToWrite + " in sorted list to HDFS in /" + OUTPUT_FILE_NAME + " ..."); +// TODO can be enhanced via +// String hostname = System.getenv("HOSTNAME"); + + String hostname = "172.17.0.2"; + Configuration configuration = new Configuration(); + FileSystem fileSystem = FileSystem.get(new URI("hdfs://" + hostname + ":9000"), configuration); + Path file = new Path("hdfs://" + hostname + ":9000/" + OUTPUT_FILE_NAME); + if (fileSystem.exists(file)) { + fileSystem.delete(file, true); + } + + OutputStream outputStream = fileSystem.create(file); + BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(outputStream, "UTF-8")); + // TODO can be rewritten via streams + for (int i = 0; i < numberOfIntegerToWrite; i++) { + bufferedWriter.write(String.valueOf(integerList.get(i))); + bufferedWriter.write("\n"); + } + + bufferedWriter.close(); + fileSystem.close(); + + LOGGER.info("Writing " + numberOfIntegerToWrite + " in sorted list to HDFS in /" + OUTPUT_FILE_NAME + " complete"); + } + +} diff --git a/Homework5/gs-yarn-basic-container/src/main/java/hello/container/HelloPojo.java b/Homework5/gs-yarn-basic-container/src/main/java/hello/container/HelloPojo.java deleted file mode 100644 index df61984..0000000 --- a/Homework5/gs-yarn-basic-container/src/main/java/hello/container/HelloPojo.java +++ /dev/null @@ -1,50 +0,0 @@ -package hello.container; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.data.hadoop.fs.FsShell; -import org.springframework.yarn.annotation.OnContainerStart; -import org.springframework.yarn.annotation.YarnComponent; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Random; - -@YarnComponent -public class HelloPojo { - - private static final int NUMBER_OF_GENERATED_RANDOM_DIGITS = 100000; - private static final Log log = LogFactory.getLog(HelloPojo.class); - - @Autowired - private Configuration configuration; - - @OnContainerStart - public void publicVoidNoArgsMethod() throws Exception { - log.info("Hello from HelloPojo!"); - log.info("About to list from hdfs root content"); - - FsShell shell = new FsShell(configuration); - for (FileStatus s : shell.ls(false, "/")) { - log.info(s); - } - shell.close(); - -// createAndSortDigits(NUMBER_OF_GENERATED_RANDOM_DIGITS); - } - - private void createAndSortDigits(int numberOfDigits) { - Random random = new Random(); - List list = new ArrayList(); - for (int i = 0; i < numberOfDigits; i++) { - list.add(random.nextInt()); - } - - Collections.sort(list); - } - -} diff --git a/Homework5/gs-yarn-basic-dist/execute-job.sh b/Homework5/gs-yarn-basic-dist/execute-job.sh index 03a19b1..eb6db61 100644 --- a/Homework5/gs-yarn-basic-dist/execute-job.sh +++ b/Homework5/gs-yarn-basic-dist/execute-job.sh @@ -12,22 +12,10 @@ echo "NodeManager started. Hadoop cluster initialized. Leaving NameNode from Saf bin/hdfs dfsadmin -safemode leave echo "NameNode leaved SafeMode state" - -#echo "Port 9000" >> /etc/ssh/sshd_config -#service sshd restart - -#cd $HADOOP_PREFIX/etc/hadoop -#sed 's/.*/hdfs:\/\/localhost:9000<\/value>/' core-site.xml.template > core-site.xml -# -#cd $HADOOP_PREFIX/sbin -#sh stop-all.sh -#sh start-all.sh - echo "Staring history server" ./sbin/mr-jobhistory-daemon.sh start historyserver echo "History server started" -export HADOOP_CLIENT_OPTS="-Xmx4g -Xmn1g -Xms4g $HADOOP_CLIENT_OPTS" echo "Running a job ..." java -jar /opt/gs-yarn-basic-client-1.0-SNAPSHOT.jar echo "Job has finished" \ No newline at end of file diff --git a/Homework5/gs-yarn-basic-dist/pom.xml b/Homework5/gs-yarn-basic-dist/pom.xml index e2570ae..7cd5d13 100644 --- a/Homework5/gs-yarn-basic-dist/pom.xml +++ b/Homework5/gs-yarn-basic-dist/pom.xml @@ -111,6 +111,12 @@ todir="${remote.host.name}:${remote.host.password}@${remote.host.ip}:/root/" /> + + + + - diff --git a/Homework5/gs-yarn-basic-dist/yarn-site.xml b/Homework5/gs-yarn-basic-dist/yarn-site.xml new file mode 100644 index 0000000..a6db56b --- /dev/null +++ b/Homework5/gs-yarn-basic-dist/yarn-site.xml @@ -0,0 +1,44 @@ + + + + + + yarn.nodemanager.aux-services + mapreduce_shuffle + + + + yarn.application.classpath + /usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/* + + + + + Number of seconds after an application finishes before the nodemanager's + DeletionService will delete the application's localized file directory + and log directory. + + To diagnose Yarn application problems, set this property's value large + enough (for example, to 600 = 10 minutes) to permit examination of these + directories. After changing the property's value, you must restart the + nodemanager in order for it to have an effect. + + The roots of Yarn applications' work directories is configurable with + the yarn.nodemanager.local-dirs property (see below), and the roots + of the Yarn applications' log directories is configurable with the + yarn.nodemanager.log-dirs property (see also below). + + yarn.nodemanager.delete.debug-delay-sec + 600 + + + yarn.nodemanager.vmem-check-enabled + false + Whether virtual memory limits will be enforced for containers + + + yarn.nodemanager.vmem-pmem-ratio + 4 + Ratio between virtual memory to physical memory when setting memory limits for containers + + diff --git a/Homework5/pom.xml b/Homework5/pom.xml index acfe8bd..e8c1fcf 100644 --- a/Homework5/pom.xml +++ b/Homework5/pom.xml @@ -36,6 +36,7 @@ 12345678 homework5 execute-job.sh + yarn-site.xml gs-yarn-basic-container gs-yarn-basic-appmaster