Skip to content

Commit

Permalink
Generation X and printing Y sorted integers into HDFS via YARN applic…
Browse files Browse the repository at this point in the history
…ation

 to make it work pay attention to sequenceiq/hadoop-docker#56 issue description
 also minor pom-files fixes
  • Loading branch information
Hokan-Ashir committed Jul 22, 2016
1 parent 59b176c commit a2c9430
Show file tree
Hide file tree
Showing 11 changed files with 135 additions and 70 deletions.
1 change: 0 additions & 1 deletion Homework1/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@
timeout="120000"
usepty="true"
/>
<!--/etc/bootstrap.sh -bash-->
<echo message="Hadoop cluster in demon mode is running" />

<!--TODO add \r\n -> \n sed script removal in case of emergency moving from Win to Linux and wondering WTF?-->
Expand Down
1 change: 0 additions & 1 deletion Homework2/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,6 @@
timeout="120000"
usepty="true"
/>
<!--/etc/bootstrap.sh -bash-->
<echo message="Hadoop cluster in demon mode is running" />

<!--TODO add \r\n -> \n sed script removal in case of emergency moving from Win to Linux and wondering WTF?-->
Expand Down
1 change: 0 additions & 1 deletion Homework3/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,6 @@
timeout="120000"
usepty="true"
/>
<!--/etc/bootstrap.sh -bash-->
<echo message="Hadoop cluster in demon mode is running" />

<!--TODO add \r\n -> \n sed script removal in case of emergency moving from Win to Linux and wondering WTF?-->
Expand Down
1 change: 0 additions & 1 deletion Homework4/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,6 @@
timeout="120000"
usepty="true"
/>
<!--/etc/bootstrap.sh -bash-->
<echo message="Hadoop cluster in demon mode is running" />

<!--TODO add \r\n -> \n sed script removal in case of emergency moving from Win to Linux and wondering WTF?-->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ public static void main(String[] args) {
}

@Bean
public HelloPojo helloPojo() {
return new HelloPojo();
public DigitsGeneratorSorter createDigitsGeneratorSorter() {
// TODO YarnContainerBuilder
return new DigitsGeneratorSorter();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package hello.container;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.yarn.annotation.OnContainerStart;
import org.springframework.yarn.annotation.YarnComponent;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.*;

@YarnComponent
public class DigitsGeneratorSorter {

private static final int NUMBER_OF_GENERATED_RANDOM_DIGITS = 100000;
private static final int NUMBER_OF_DIGITS_WRITE_TO_HDFS = 100;
private static final Log LOGGER = LogFactory.getLog(DigitsGeneratorSorter.class);
private static final String OUTPUT_FILE_NAME = "result";

@Autowired
private Configuration configuration;

@OnContainerStart
public void onContainerStart() throws Exception {
List<Integer> integerList = createAndSortDigits(NUMBER_OF_GENERATED_RANDOM_DIGITS);
writeRecordsToHDFS(integerList, NUMBER_OF_DIGITS_WRITE_TO_HDFS);
}

private List<Integer> createAndSortDigits(int numberOfDigits) {
LOGGER.info("Creating list of " + NUMBER_OF_GENERATED_RANDOM_DIGITS + " sorted digits ...");
Random random = new Random();
List<Integer> list = new ArrayList<Integer>();
for (int i = 0; i < numberOfDigits; i++) {
list.add(Math.abs(random.nextInt()));
}

LOGGER.info("Sorting created list of " + NUMBER_OF_GENERATED_RANDOM_DIGITS + " ...");
Collections.sort(list);
LOGGER.info("Sorting created list of " + NUMBER_OF_GENERATED_RANDOM_DIGITS + " complete");

LOGGER.info("Creation list of " + NUMBER_OF_GENERATED_RANDOM_DIGITS + " sorted digits complete");
return list;
}

private void writeRecordsToHDFS(List<Integer> integerList, int numberOfIntegerToWrite) throws URISyntaxException, IOException {
LOGGER.info("Writing " + numberOfIntegerToWrite + " in sorted list to HDFS in /" + OUTPUT_FILE_NAME + " ...");
// TODO can be enhanced via
// String hostname = System.getenv("HOSTNAME");

String hostname = "172.17.0.2";
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI("hdfs://" + hostname + ":9000"), configuration);
Path file = new Path("hdfs://" + hostname + ":9000/" + OUTPUT_FILE_NAME);
if (fileSystem.exists(file)) {
fileSystem.delete(file, true);
}

OutputStream outputStream = fileSystem.create(file);
BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(outputStream, "UTF-8"));
// TODO can be rewritten via streams
for (int i = 0; i < numberOfIntegerToWrite; i++) {
bufferedWriter.write(String.valueOf(integerList.get(i)));
bufferedWriter.write("\n");
}

bufferedWriter.close();
fileSystem.close();

LOGGER.info("Writing " + numberOfIntegerToWrite + " in sorted list to HDFS in /" + OUTPUT_FILE_NAME + " complete");
}

}

This file was deleted.

12 changes: 0 additions & 12 deletions Homework5/gs-yarn-basic-dist/execute-job.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,10 @@ echo "NodeManager started. Hadoop cluster initialized. Leaving NameNode from Saf
bin/hdfs dfsadmin -safemode leave
echo "NameNode leaved SafeMode state"


#echo "Port 9000" >> /etc/ssh/sshd_config
#service sshd restart

#cd $HADOOP_PREFIX/etc/hadoop
#sed 's/<value>.*/<value>hdfs:\/\/localhost:9000<\/value>/' core-site.xml.template > core-site.xml
#
#cd $HADOOP_PREFIX/sbin
#sh stop-all.sh
#sh start-all.sh

echo "Staring history server"
./sbin/mr-jobhistory-daemon.sh start historyserver
echo "History server started"

export HADOOP_CLIENT_OPTS="-Xmx4g -Xmn1g -Xms4g $HADOOP_CLIENT_OPTS"
echo "Running a job ..."
java -jar /opt/gs-yarn-basic-client-1.0-SNAPSHOT.jar
echo "Job has finished"
9 changes: 7 additions & 2 deletions Homework5/gs-yarn-basic-dist/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,12 @@
todir="${remote.host.name}:${remote.host.password}@${remote.host.ip}:/root/" />
<echo message="Coping ${file.name.to.copy.yarn.client} completed" />

<echo message="Coping ${yarn.site.settings.file.name} ..." />
<scp trust="true" failonerror="true" verbose="off" sftp="true"
file="./${yarn.site.settings.file.name}"
todir="${remote.host.name}:${remote.host.password}@${remote.host.ip}:/root/" />
<echo message="Coping ${yarn.site.settings.file.name} completed" />

<!--Coping job executor script -->
<echo message="Coping execute job file: ${job.executor.script.name} ..." />
<scp trust="true" failonerror="true" verbose="off" sftp="true"
Expand All @@ -134,11 +140,10 @@
host="${remote.host.ip}"
username="${remote.host.name}"
password="${remote.host.password}"
command="docker run -d -it -v $PWD:/opt --name ${docker.container.name} sequenceiq/hadoop-docker:2.7.1"
command="docker run -d -it -v $PWD:/opt -v $PWD/yarn-site.xml:/usr/local/hadoop/etc/hadoop/yarn-site.xml --name ${docker.container.name} -p 8088:8088 -p 8042:8042 sequenceiq/hadoop-docker:2.7.1"
timeout="120000"
usepty="true"
/>
<!--/etc/bootstrap.sh -bash-->
<echo message="Hadoop cluster in demon mode is running" />

<!--TODO add \r\n -> \n sed script removal in case of emergency moving from Win to Linux and wondering WTF?-->
Expand Down
44 changes: 44 additions & 0 deletions Homework5/gs-yarn-basic-dist/yarn-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<!--Patched yarn settings for YARN managers to have limitless virtual memory-->
<!--see https://github.com/sequenceiq/hadoop-docker/issues/56 for more info-->

<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>

<property>
<name>yarn.application.classpath</name>
<value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*</value>
</property>

<property>
<description>
Number of seconds after an application finishes before the nodemanager's
DeletionService will delete the application's localized file directory
and log directory.

To diagnose Yarn application problems, set this property's value large
enough (for example, to 600 = 10 minutes) to permit examination of these
directories. After changing the property's value, you must restart the
nodemanager in order for it to have an effect.

The roots of Yarn applications' work directories is configurable with
the yarn.nodemanager.local-dirs property (see below), and the roots
of the Yarn applications' log directories is configurable with the
yarn.nodemanager.log-dirs property (see also below).
</description>
<name>yarn.nodemanager.delete.debug-delay-sec</name>
<value>600</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
<description>Whether virtual memory limits will be enforced for containers</description>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>4</value>
<description>Ratio between virtual memory to physical memory when setting memory limits for containers</description>
</property>
</configuration>
1 change: 1 addition & 0 deletions Homework5/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
<remote.host.password>12345678</remote.host.password>
<docker.container.name>homework5</docker.container.name>
<job.executor.script.name>execute-job.sh</job.executor.script.name>
<yarn.site.settings.file.name>yarn-site.xml</yarn.site.settings.file.name>

<container.module.name>gs-yarn-basic-container</container.module.name>
<app.master.module.name>gs-yarn-basic-appmaster</app.master.module.name>
Expand Down

0 comments on commit a2c9430

Please sign in to comment.