Skip to content
This repository has been archived by the owner on Nov 9, 2020. It is now read-only.

Commit

Permalink
Merge pull request #2 from blehman/master
Browse files Browse the repository at this point in the history
added path option
  • Loading branch information
DrSkippy committed Nov 27, 2013
2 parents 1e027b9 + 49f6cce commit db9b18b
Showing 1 changed file with 45 additions and 0 deletions.
45 changes: 45 additions & 0 deletions src/get_data_files.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env bash
# utility to download historical data set from S3
waitForNProcs()
{
# procName set prior to calling e.g. procName=$xmlparser
nprocs=$(pgrep -f $procName | wc -l)
while [ $nprocs -gt $MAXPROCS ]; do
sleep $SLEEPTIME
nprocs=$(pgrep -f $procName | wc -l)
done
}
# Configure
SLEEPTIME=15 # seconds
MAXPROCS=8 # cores?
procName=curl
# to run different directory, update these
# Auto updated: 2013-08-01 03:47:32.150603
# AUTOPATH=.
AUTOPATH=/home/blehman/Gnip-Python-Historical-Utilities/src
#
export PYTHONPATH=${PYTHONPATH}:$AUTOPATH
mangler="$AUTOPATH/name_mangle.py"

if [ ! -e ./data ]; then
mkdir ./data
fi

echo "Starting download at $(date)"
echo "Copying $(wc -l ./data_files.txt) files:"

while read fn
do
waitForNProcs
filen=$(echo $fn | $mangler)
echo " copying file $fn to $filen..."
if [ -n "$1" ]
cmd="$procName $fn --create-dirs -o ./data/$1/$filen"
else
cmd="$procName $fn --create-dirs -o ./data/$filen"
fi
#echo $cmd
exec $cmd &
done < ./data_files.txt

echo "Download completed at $(date)"

0 comments on commit db9b18b

Please sign in to comment.