From f4c957256224e978d4280650e47e171909c886ca Mon Sep 17 00:00:00 2001 From: skoulouzis Date: Tue, 10 Jan 2017 19:23:02 +0100 Subject: [PATCH] Added sample script to demo usage --- .../eu/edisonproject/rest/ECO2Controller.java | 14 ++ .../rest/FolderWatcherRunnable.java | 6 + scripts/courseAvg.json | 32 ----- scripts/e-co2-serviceExample.sh | 126 +++++++++++++++++- scripts/spiderPlot.gnu | 31 ----- 5 files changed, 144 insertions(+), 65 deletions(-) delete mode 100644 scripts/courseAvg.json mode change 100644 => 100755 scripts/e-co2-serviceExample.sh delete mode 100755 scripts/spiderPlot.gnu diff --git a/rest/src/main/java/eu/edisonproject/rest/ECO2Controller.java b/rest/src/main/java/eu/edisonproject/rest/ECO2Controller.java index 46fc339..2330384 100644 --- a/rest/src/main/java/eu/edisonproject/rest/ECO2Controller.java +++ b/rest/src/main/java/eu/edisonproject/rest/ECO2Controller.java @@ -175,6 +175,20 @@ public String getClassification(@PathParam("id") final String classificationId) return rf.readFile(); } + /** + * Providing the unique classification id this method profiles the classified + * document (represented by the unique classification id) against the all jobs + * available to this service. + * + * The method is asynchronous. After the call is made an id is immediately + * returned. + * + * @param classificationId the unique classification id (obtained from the + * classification methods) + * @return unique profile id + * @throws ParseException + * @throws IOException + */ @GET @Path("/profile/jobs/{id}") @Produces(MediaType.APPLICATION_JSON) diff --git a/rest/src/main/java/eu/edisonproject/rest/FolderWatcherRunnable.java b/rest/src/main/java/eu/edisonproject/rest/FolderWatcherRunnable.java index ce89c18..dc5ab50 100644 --- a/rest/src/main/java/eu/edisonproject/rest/FolderWatcherRunnable.java +++ b/rest/src/main/java/eu/edisonproject/rest/FolderWatcherRunnable.java @@ -147,6 +147,12 @@ private File executeClassification(File inputFolder) throws Exception { } } else { + File v1 = new File(inputFolder.getAbsolutePath() + File.separator + CSV_FILE_NAME); + int count = 0; + while (!v1.exists() || count < 10) { + Thread.sleep(100); + count++; + } String[] args = new String[]{"-op", "p", "-v1", inputFolder.getAbsolutePath() + File.separator + CSV_FILE_NAME, "-v2", inputFolder.getAbsolutePath() + File.separator + "list.csv", "-o", inputFolder.getAbsolutePath(), "-p", propertiesFile.getAbsolutePath()}; BatchMain.main(args); diff --git a/scripts/courseAvg.json b/scripts/courseAvg.json deleted file mode 100644 index 6e2a64e..0000000 --- a/scripts/courseAvg.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "action plan strategies dsrm04": 0.14729358880341828, - "ipr and ethical issues dsdm06": 0.06979013802166481, - "complex data visulization dsda06": 0.05036823755848768, - "improve existing services dsdk02": 0.16214824940022457, - "new data analytics applications dseng06": 0.17088954165451378, - "contribution to research objectives dsrm05": 0.15212617127717568, - "customer data analysis dsdk05": 0.11748348126851409, - "data collection and integration dsdm03": 0.12012092376169685, - "participation financial decisions dsdk03": 0.3115664501338832, - "innovative ideas development dsrm06": 0.08759777728374989, - "systematic study dsrm02": 0.23488655591648602, - "data models dsdm02": 0.09176347394357641, - "engineering principles dseng01": 0.18127054565891734, - "analytic support to other organisation dsdk04": 0.16906242142163008, - "predictive analytics dsda01": 0.2629690532588154, - "relational and non-relational databases dseng04": 0.1615024645988167, - "statistical techniques dsda02": 0.24664371573846036, - "analytics for decision making dsda03": 0.14150408093732084, - "computational science dseng02": 0.18042291840332686, - "repository of analysis history dsdm04": 0.07265828141721652, - "scientific method dsrm01": 0.07274431582440816, - "data management plan dsdm01": 0.07348400822341768, - "big data analytics platform dsda05": 0.07685600164446506, - "devise new applications dsrm03": 0.23474809512535283, - "business process dsdk01": 0.2374214586636118, - "security service management dseng05": 0.09944975445102881, - "data blending dsda04": 0.11077596732331448, - "analysis tools for decision making dseng03": 0.19934145188896019, - "data curation dsdm05": 0.11508241305581697, - "marketing data analysis dsdk06": 0.18921848770830288 -} diff --git a/scripts/e-co2-serviceExample.sh b/scripts/e-co2-serviceExample.sh old mode 100644 new mode 100755 index bdeec93..0efb092 --- a/scripts/e-co2-serviceExample.sh +++ b/scripts/e-co2-serviceExample.sh @@ -1,7 +1,129 @@ #!/bin/bash + +# +# 1. An educator wants to create a course description and wants to see: +# How dose the job market and education align +# 2. An educator has created a course description and wants to see how it compares with: +# The average job market +# The average of the offered courses +# Top 3 most similar course +# 3. A practitioner has her/his CV and wants to compere it with: +# The average job market +# Top 3 most similar job posts + HOST=localhost PORT=9999 -curl -X GET http://$HOST:$PORT/e-co2/average/course | jq . > courseAvg.json -curl -X GET http://$HOST:$PORT/e-co2/average/job | jq . > jobAvg.json \ No newline at end of file + +function scenario1 { +# Get education average + curl -X GET http://$HOST:$PORT/e-co2/average/course -o courseAvg.json +# Build csv header + echo "field,action_plan_strategies_dsrm04,analysis_tools_for_decision_making_dseng03,analytic_support_to_other_organisation_dsdk04,analytics_for_decision_making_dsda03,big_data_analytics_platform_dsda05,business_process_dsdk01,complex_data_visulization_dsda06,computational_science_dseng02,contribution_to_research_objectives_dsrm05,customer_data_analysis_dsdk05,data_blending_dsda04,data_collection_and_integration_dsdm03,data_curation_dsdm05,data_management_plan_dsdm01,data_models_dsdm02,devise_new_applications_dsrm03,engineering_principles_dseng01,improve_existing_services_dsdk02,innovative_ideas_development_dsrm06,ipr_and_ethical_issues_dsdm06,marketing_data_analysis_dsdk06,new_data_analytics_applications_dseng06,participation_financial_decisions_dsdk03,predictive_analytics_dsda01,relational_and_non-relational_databases_dseng04,repository_of_analysis_history_dsdm04,scientific_method_dsrm01,security_service_management_dseng05,statistical_techniques_dsda02,systematic_study_dsrm02" > avg.csv +# Convert JSON to CSV + education=`jq -r '[.["action_plan_strategies_dsrm04"],.["analysis_tools_for_decision_making_dseng03"],.["analytic_support_to_other_organisation_dsdk04"],.["analytics_for_decision_making_dsda03"],.["big_data_analytics_platform_dsda05"],.["business_process_dsdk01"],.["complex_data_visulization_dsda06"],.["computational_science_dseng02"],.["contribution_to_research_objectives_dsrm05"],.["customer_data_analysis_dsdk05"],.["data_blending_dsda04"],.["data_collection_and_integration_dsdm03"],.["data_curation_dsdm05"],.["data_management_plan_dsdm01"],.["data_models_dsdm02"],.["devise_new_applications_dsrm03"],.["engineering_principles_dseng01"],.["improve_existing_services_dsdk02"],.["innovative_ideas_development_dsrm06"],.["ipr_and_ethical_issues_dsdm06"],.["marketing_data_analysis_dsdk06"],.["new_data_analytics_applications_dseng06"],.["participation_financial_decisions_dsdk03"],.["predictive_analytics_dsda01"],.["relational_and_non-relational_databases_dseng04"],.["repository_of_analysis_history_dsdm04"],.["scientific_method_dsrm01"],.["security_service_management_dseng05"],.["statistical_techniques_dsda02"],.["systematic_study_dsrm02"]] | @csv' courseAvg.json` + echo "education,$education" >> avg.csv + +# Get job market average + curl -X GET http://$HOST:$PORT/e-co2/average/job -o jobAvg.json + + # Convert JSON to CSV + jobMarket=`jq -r '[.["action_plan_strategies_dsrm04"],.["analysis_tools_for_decision_making_dseng03"],.["analytic_support_to_other_organisation_dsdk04"],.["analytics_for_decision_making_dsda03"],.["big_data_analytics_platform_dsda05"],.["business_process_dsdk01"],.["complex_data_visulization_dsda06"],.["computational_science_dseng02"],.["contribution_to_research_objectives_dsrm05"],.["customer_data_analysis_dsdk05"],.["data_blending_dsda04"],.["data_collection_and_integration_dsdm03"],.["data_curation_dsdm05"],.["data_management_plan_dsdm01"],.["data_models_dsdm02"],.["devise_new_applications_dsrm03"],.["engineering_principles_dseng01"],.["improve_existing_services_dsdk02"],.["innovative_ideas_development_dsrm06"],.["ipr_and_ethical_issues_dsdm06"],.["marketing_data_analysis_dsdk06"],.["new_data_analytics_applications_dseng06"],.["participation_financial_decisions_dsdk03"],.["predictive_analytics_dsda01"],.["relational_and_non-relational_databases_dseng04"],.["repository_of_analysis_history_dsdm04"],.["scientific_method_dsrm01"],.["security_service_management_dseng05"],.["statistical_techniques_dsda02"],.["systematic_study_dsrm02"]] | @csv' jobAvg.json` + echo "jobMarket,$jobMarket" >> avg.csv +} + + + function scenario2 { +# Classify course + cID=`curl -H "Content-Type: application/json" -X POST -d '{"id":"uid_3","title":"Data Analytics MSc","contents":" Core courses Preliminary Mathematics for Statisticians 1 Probability 2 Statistical Inference2 Regression Models2 Introduction to R Programming Data Management and Analytics using SAS Bayesian Statistics Generalised Linear Models Big Data Analytics One Course is optional for students with sufficient background in Linear Algebra and Calculus. Two students who have already completed an equivalent course can substitute this course by any other optional course, including optional courses offered as part of the MRes in Advanced Statistics (see the website for details). In your project (60 credits) you will model data collected from research in environmental science, assessed by a dissertation. Optional courses Choose two courses from group 1, one course from group 2 with the remaining courses coming from groups 1, 2 or 3. Group 1 Programming Artificial Intelligence Information Retrieval Machine Learning Group 2 Professional Skills Data Analysis Group 3 Biostatistics Multivariate Methods Time Series Design of Experiments Stochastic Processes Environmental Statistics Financial Statistics Statistical Genetics Spatial Statistics Functional Data Analysis In your project (60 credits) you will tackle a complex data analytical problem or develop novel approaches to solving data analytical challenges. The Data Lab We work closely with The Data Lab, an internationally leading research and innovation centre in data science."}' http://$HOST:$PORT/e-co2/classification/course` + + + + # Get job market average + curl -X GET http://$HOST:$PORT/e-co2/average/job -o jobAvg.json + # Get education average + curl -X GET http://$HOST:$PORT/e-co2/average/course -o courseAvg.json + + +# wait for classification to end + resp=`curl -X GET http://$HOST:$PORT/e-co2/classification/$cID` + while [ $resp == "202" ] + do + echo $cID not done + sleep 20 + resp=`curl -X GET http://$HOST:$PORT/e-co2/classification/$cID` + done + +# Get the classification + curl -X GET http://$HOST:$PORT/e-co2/classification/$cID -o $cID"_classification.json" + +# Now we can ask for profiling. Returns an ordered list of the most similar courses it also includes a "rank" field + pID=`curl -X GET http://$HOST:$PORT/e-co2/profile/courses/$cID` + + # wait for profiling to end + resp=`curl -X GET http://$HOST:$PORT/e-co2/profile/$pID` + while [ $resp == "202" ] + do + echo $pID not done + sleep 20 + resp=`curl -X GET http://$HOST:$PORT/e-co2/profile/$pID` + done + + curl -X GET http://$HOST:$PORT/e-co2/profile/$pID -o $pID"_profile.json" +} + + + + function scenario3We have the following scenarios in mind to demonstrate the e-co-2 service with the help of the portal: + + An educator wants to create a course description and wants to see: + How dose the job market and education align + An educator has created a course description and wants to see how it compares with: + The average job market + The average of the offered courses + Top 3 most similar course + A practitioner has her/his CV and wants to compere it with: + The average job market + Top 3 most similar job posts { +# Classify cv + cID=`curl -H "Content-Type: application/json" -X POST -d '{"id":"uid_4","title":"Sample Resume Data Scientist","contents":"Professional Profile A former Ruby and Java programmer with newly acquired skills, an insatiable intellectual curiosity, and the ability to mine hidden gems located within large sets of structured, semi-structured and unstructured data. Able to leverage a heavy dose of mathematics and applied statistics with visualization and a healthy sense of exploration. Education Grad Certificate, Data Mining 2012University of California, San Diego Relevant Courses: Data Mining Methods and Techniques, Data Preparation for Data Mining and Advanced Methods and Applications B.S. Computer Science 2009San Francisco State University Core Competencies Strategic Thinking: Able to influence the strategic direction of the company by identifying opportunities in large, rich data sets and creating and implementing data driven strategies that fuel growth including revenue and profits. Modeling: Design and implement statistical / predictive models and cutting edge algorithms utilizing diverse sources of data to predict demand, risk and price elasticity. Experience with creating ETL processes to source and link data. Analytics: Utilize analytical applications like SAS to identify trends and relationships between different pieces of data, draw appropriate conclusions and translate analytical findings into risk management and marketing strategies that drive value. Drive Enhancements: Develop tools and reports that help users access and analyze data resulting in higher revenues and margins and a better customer experience. Communications and Project Management: Capable of turning dry analysis into an exciting story that influences the direction of the business and communicating with diverse teams to take a project from start to finish. Collaborate with product teams to develop and support our internal data platform and to support ongoing analyses. Skills and Tools NoSQL data stores (Cassandra, MongoDB) Hadoop, MySQL, Big Table, MapReduce, SAS Large-scale, distributed systems design and development Scaling, performance and scheduling and ETL techniques C, C++, Java, Ruby on Rails Experience Cool Jeans San Francisco, CA Data Analyst2012-present Work closely with various teams across the company to identify and solve business challenges utilizing large structured, semi-structured, and unstructured data in a distributed processing environment. Develop a new pricing strategy for Total Jeans that boosted margins by 2 percent and analyzed customer buying habits which correctly predicated the resurgence of dark blue denim giving us a jump on the competition. Analyze large datasets to provide strategic direction to the company. Perform quantitative analysis of product sales trends to recommend pricing decisions. Conduct cost and benefit analysis on new ideas. Scrutinize and track customer behavior to identify trends and unmet needs. Develop statistical models to forecast inventory and procurement cycles. Assist in developing internal tools for data analysis. Programmer 2010-2011 Coded, tested, debugged, implemented and documented apps using Java and Ruby. Developed eCommerce solutions and social networking functionality. Designed, developed and maintained eCommerce and social networking applications. Built report interfaces and data feeds. Gathered and collected information from various programs, analyzed time requirements and prepared documentation to change existing programs. Professional Affiliations and Education NoSQL and Big Data Conference 2013 Hadoop Hackathon: Learn Map Reduce 2013 Member, Silicon Valley Big Data Meetup"}' http://$HOST:$PORT/e-co2/classification/cv` + + + # Get job market average + curl -X GET http://$HOST:$PORT/e-co2/average/job -o jobAvg.json + +# wait for classification to end + resp=`curl -X GET http://$HOST:$PORT/e-co2/classification/$cID` + while [ $resp == "202" ] + do + echo $cID not done + sleep 20 + resp=`curl -X GET http://$HOST:$PORT/e-co2/classification/$cID` + done + +# Get the classification + curl -X GET http://$HOST:$PORT/e-co2/classification/$cID -o $cID"_classification.json" + +# Now we can ask for profiling. Returns an ordered list of the most similar jobs it also includes a "rank" field + pID=`curl -X GET http://$HOST:$PORT/e-co2/profile/jobs/$cID` + + # wait for profiling to end + resp=`curl -X GET http://$HOST:$PORT/e-co2/profile/$pID` + while [ $resp == "202" ] + do + echo $pID not done + sleep 20 + resp=`curl -X GET http://$HOST:$PORT/e-co2/profile/$pID` + done + + curl -X GET http://$HOST:$PORT/e-co2/profile/$pID -o $pID"_profile.json" +} + + + + + +scenario1 +scenario2 +scenario3 \ No newline at end of file diff --git a/scripts/spiderPlot.gnu b/scripts/spiderPlot.gnu deleted file mode 100755 index 65fd7e7..0000000 --- a/scripts/spiderPlot.gnu +++ /dev/null @@ -1,31 +0,0 @@ -set terminal svg -set output 'simple.1.svg' - -unset border -set polar -set angles degrees -set term pngcairo enhanced size 800,800 -set output 'graph_eff_10_3_accuracy_spider.png' -set xtics axis -set ytics axis -set grid polar 90 -set style line 10 lt 1 lc 0 lw 0.3 # line style for the grid -set grid ls 10 -set xrange[-1.5:1.5] -set yrange[-1.5:1.5] -set size square -set lmargin 12 -set rmargin 12 -set key font ',12' -set title font ',20' - -set_label(x, text) = sprintf("set label '%s' at (1.8*cos(%f)), (1.7*sin(%f)) center", text, x, x) #this places a label on the outside -eval set_label(0, "Answer 1") -eval set_label(90, "Answer 2") -eval set_label(180, "Answer 3") -eval set_label(270, "Answer 4") - -set linetype 1 lc rgb 'blue' lw 2 pt 7 ps 2 # right, post -set linetype 2 lc rgb 'red' lw 2 pt 7 ps 2 # wrong, post - -plot "answers_in_post.csv" using 1:2 title '(IN, R2)' with lp lt 1 \ No newline at end of file