Skip to content

Commit

Permalink
Merge pull request apache#4 from pbailis/improving-harness
Browse files Browse the repository at this point in the history
Adding point cloud generator, additional command line params
  • Loading branch information
pbailis committed Jul 22, 2014
2 parents db14f64 + f97c2b6 commit 7fb92c8
Show file tree
Hide file tree
Showing 4 changed files with 277 additions and 67 deletions.
47 changes: 47 additions & 0 deletions commands.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@

# if interrupted, run the following again, but add flag --resume
ec2/spark-ec2 --slaves=5 --spot-price=1.0 --instance-type=m2.4xlarge --region=us-west-2 --key-pair kaiju --identity-file=/Users/pbailis/.ssh/kaiju_rsa.pub launch testing

export MASTER=ec2-50-112-10-225.us-west-2.compute.amazonaws.com

ssh root@$MASTER

# on MASTER

# check out, build and ship jar
mv spark spark-old
git clone https://github.com/pbailis/spark.git
cd spark
git checkout --track origin/improving-harness
sbt/sbt assembly
cp -r ~/spark-old/conf/* conf/
yum install pssh; pssh -h ~/spark-ec2/slaves rm -rf ~/spark
cd ~; spark-ec2/copy-dir spark

# to load the data into HDFS
# first, load volume in /dev/sdp using web console
mkdir /mnt/testdata; mount /dev/sdp /mnt/testdata; cd /mnt/testdata
~/ephemeral-hdfs/bin/hadoop fs -put ./flights flights
~/ephemeral-hdfs/bin/hadoop fs -put ./weather weather
~/ephemeral-hdfs/bin/hadoop fs -put ./bismarck_data bismarck_data

# to start the master
cd ~/spark; sbin/stop-all.sh; sleep 5; sbin/start-all.sh


# to run flights
cd ~/spark; ./bin/spark-submit --class org.apache.spark.examples.mllib.research.SynchronousADMMTests examples/target/scala-*/spark-examples-*.jar --algorithm SVMADMM --regType L2 --regParam 1.0 --input hdfs://$MASTER:9000/user/root/flights/2008* --format flights

cd ~/spark; ./bin/spark-submit --class org.apache.spark.examples.mllib.research.SynchronousADMMTests examples/target/scala-*/spark-examples-*.jar --algorithm SVMADMM --regType L2 --regParam 1.0 --format bismarck --input hdfs://$MASTER:9000/user/root/bismarck_data/forest* --numPartitions 40 --sweepIterationStart 1 --sweepIterationEnd 12 --sweepIterationStep 2 | grep RESULT | cut -c 9-

cd ~/spark; ./bin/spark-submit --class org.apache.spark.examples.mllib.research.SynchronousADMMTests examples/target/scala-*/spark-examples-*.jar --algorithm SVM --regType L2 --regParam 1.0 --format cloud --numPartitions 40 --pointCloudPointsPerPartition 10000 --pointCloudPartitionSkew 0 --pointCloudLabelNoise 0.1 --pointCloudDimension 100 --sweepIterationStart 1 --sweepIterationEnd 12 --sweepIterationStep 2 | grep RESULT | cut -c 9-

cd ~/spark; ./bin/spark-submit --class org.apache.spark.examples.mllib.research.SynchronousADMMTests examples/target/scala-*/spark-examples-*.jar --algorithm LR --regType L2 --regParam 1.0 --format cloud --numPartitions 40 --pointCloudPointsPerPartition 10000 --pointCloudPartitionSkew 0 --pointCloudLabelNoise 0.1 --pointCloudDimension 100 --sweepIterationStart 1 --sweepIterationEnd 12 --sweepIterationStep 2 | grep RESULT | cut -c 9-



# to rebuild THE EXPERIMENTS
cd ~/spark; git pull; sbt/sbt "project examples" "assembly"; cd ~; spark-ec2/copy-dir spark; cd spark; sbin/stop-all.sh; sleep 5; sbin/start-all.sh

# to rebuild ALL
cd ~/spark; git pull; sbt/sbt assembly; cd ~; spark-ec2/copy-dir spark; cd spark; sbin/stop-all.sh; sleep 5; sbin/start-all.sh
Loading

0 comments on commit 7fb92c8

Please sign in to comment.