From 8e788000eda9699afb43438ae2b56e1b13b332a0 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Wed, 20 Sep 2023 13:52:54 -0500 Subject: [PATCH 01/19] updated docker compose using minio's default bucket generation (#64) * updated docker compose using minio's default bucket generation * fixed typo --- CHANGELOG.md | 3 ++- rabbitmq/docker-command-smile-traefik.sh | 5 +++++ rabbitmq/docker-compose-smile-traefik.yml | 12 +++++------- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dfa9084..d9f25a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,4 +18,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Docker compose file to work with new settings [42](https://github.com/ncsa/standalone-smm-analytics/issues/42) - Updated README with docker compose information [50](https://github.com/ncsa/standalone-smm-analytics/issues/50) - Created base image for sentiment analysis with model [55](https://github.com/ncsa/standalone-smm-analytics/issues/55) -- Created base image for name entity recognition with model [56](https://github.com/ncsa/standalone-smm-analytics/issues/56) \ No newline at end of file +- Created base image for name entity recognition with model [56](https://github.com/ncsa/standalone-smm-analytics/issues/56) +- Docker compose file updated to fix minio default bucket making [63](https://github.com/ncsa/standalone-smm-analytics/issues/63) \ No newline at end of file diff --git a/rabbitmq/docker-command-smile-traefik.sh b/rabbitmq/docker-command-smile-traefik.sh index ea1b9f4..5909fc7 100644 --- a/rabbitmq/docker-command-smile-traefik.sh +++ b/rabbitmq/docker-command-smile-traefik.sh @@ -18,9 +18,14 @@ export CILOGON_CLIENT_ID=<> export CILOGON_CLIENT_SECRET=<> export CILOGON_CALLBACK_URL=<> +# if the minio-api.server doesn't work, it should be ip address and port +# export MINIO_URL=http://xxx.xxx.xxx.xxx:9000 +# export MINIO_PUBLIC_ACCESS_URL=http://xxx.xxx.xxx.xxx:9000 export MINIO_URL=https://minio-api.${SERVER} export MINIO_PUBLIC_ACCESS_URL=https://minio-api.${SERVER} export BUCKET_NAME=macroscope-smile +# if the graphql.server doesn't work, it should use ip address and port +#export SMILE_GRAPHQL_URL=http://xxx.xxx.xxx.xxx:5050/graphql export SMILE_GRAPHQL_URL=https://graphql.${SERVER}/graphql # create mounted volumes on host machine diff --git a/rabbitmq/docker-compose-smile-traefik.yml b/rabbitmq/docker-compose-smile-traefik.yml index ea5f050..2dd95c7 100644 --- a/rabbitmq/docker-compose-smile-traefik.yml +++ b/rabbitmq/docker-compose-smile-traefik.yml @@ -48,14 +48,17 @@ services: - traefik.http.routers.rabbitmq.tls.certresolver=myresolver minio: - image: docker.io/bitnami/minio:2023.1.31-debian-11-r0 + image: minio/minio container_name: minio hostname: minio + ports: + - 9000:9000 volumes: - - "smile_content_data:/tmp" + - "smile_content_data:/data" environment: - MINIO_ROOT_USER=${AWS_ACCESSKEY} - MINIO_ROOT_PASSWORD=${AWS_ACCESSKEYSECRET} + command: server /data --console-address ":9001" restart: unless-stopped labels: - traefik.enable=true @@ -64,11 +67,6 @@ services: - traefik.http.routers.minio.service=minio - traefik.http.services.minio.loadbalancer.server.port=9001 - traefik.http.routers.minio.tls.certresolver=myresolver - - traefik.http.routers.minio-api.rule=Host(`minio-api.${SERVER}`) && PathPrefix(`/`) - - traefik.http.routers.minio-api.entrypoints=websecure - - traefik.http.routers.minio-api.service=minio-api - - traefik.http.services.minio-api.loadbalancer.server.port=9000 - - traefik.http.routers.minio-api.tls.certresolver=myresolver redis: image: redis From e2e0597195187d5f910a3308bc34bb4873690ee9 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Wed, 27 Sep 2023 13:48:48 -0500 Subject: [PATCH 02/19] 65 autophrase make an error (#66) * upgraded base ubuntu image for authophrase * added writeToS3.py file * updated writeToS3.py * fixed typo * added upload_results.py * use env variables instaed of param * modified changelog --- CHANGELOG.md | 5 +- rabbitmq/autophrase/Dockerfile | 8 ++- rabbitmq/autophrase/generate_raw_train.py | 50 ++++++++++++++ rabbitmq/autophrase/requirement.txt | 4 +- rabbitmq/autophrase/upload_results.py | 63 +++++++++++++++++ rabbitmq/autophrase/writeToS3.py | 83 +++++++++++++++++++++++ 6 files changed, 208 insertions(+), 5 deletions(-) create mode 100644 rabbitmq/autophrase/generate_raw_train.py create mode 100644 rabbitmq/autophrase/upload_results.py create mode 100644 rabbitmq/autophrase/writeToS3.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d9f25a6..77cd086 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,4 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Updated README with docker compose information [50](https://github.com/ncsa/standalone-smm-analytics/issues/50) - Created base image for sentiment analysis with model [55](https://github.com/ncsa/standalone-smm-analytics/issues/55) - Created base image for name entity recognition with model [56](https://github.com/ncsa/standalone-smm-analytics/issues/56) -- Docker compose file updated to fix minio default bucket making [63](https://github.com/ncsa/standalone-smm-analytics/issues/63) \ No newline at end of file +- Docker compose file updated to fix minio default bucket making [63](https://github.com/ncsa/standalone-smm-analytics/issues/63) + +### Fixed +- Autophrase error due to the code updates [65](https://github.com/ncsa/standalone-smm-analytics/issues/65) \ No newline at end of file diff --git a/rabbitmq/autophrase/Dockerfile b/rabbitmq/autophrase/Dockerfile index ff7981e..e79009c 100644 --- a/rabbitmq/autophrase/Dockerfile +++ b/rabbitmq/autophrase/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:18.04 +FROM ubuntu:20.04 # git clone autophrase algorithm RUN apt-get update \ @@ -8,9 +8,13 @@ RUN apt-get update \ # overwrite WORKDIR /AutoPhrase COPY . ./ - ENV RABBITMQ_HOST="rabbitmq" +# set environmet variables to prevent interactive prompt duing installation of openjdk +ENV DEBIAN_FRONTEND=noninteractive +ENV REGION=US +ENV TZ=America/Chicago + # install dependency libraries RUN apt-get -y update \ && apt-get -y install g++ openjdk-8-jdk curl python3-pip \ diff --git a/rabbitmq/autophrase/generate_raw_train.py b/rabbitmq/autophrase/generate_raw_train.py new file mode 100644 index 0000000..e112d52 --- /dev/null +++ b/rabbitmq/autophrase/generate_raw_train.py @@ -0,0 +1,50 @@ +import argparse +import csv + +import pandas as pd + +from writeToS3 import WriteToS3 + + +def main(s3, remoteReadPath, column): + filename = remoteReadPath.split('/')[-2] + '.csv' + s3.downloadToDisk(filename=filename, localpath='data/', remotepath=remoteReadPath) + + Array = [] + try: + with open('data/' + filename,'r',encoding="utf-8", errors="ignore") as f: + reader = csv.reader(f) + try: + for row in reader: + Array.append(row) + except Exception as e: + pass + except: + with open('data/' + filename,'r',encoding="ISO-8859-1", errors="ignore") as f: + reader = csv.reader(f) + try: + for row in reader: + Array.append(row) + except Exception as e: + pass + + df = pd.DataFrame(Array[1:],columns=Array[0]) + df[df[column]!=''][column].dropna().astype('str').to_csv('data/raw_train.txt', index=False) + + return None + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--remoteReadPath', required=True) + parser.add_argument('--column', required=True) + + # user specified parameters + parsed, unknown = parser.parse_known_args() + for arg in unknown: + if arg.startswith("--"): + parser.add_argument(arg, required=False) + + params = vars(parser.parse_args()) + + s3 = WriteToS3() + main(s3, params['remoteReadPath'], params['column']) diff --git a/rabbitmq/autophrase/requirement.txt b/rabbitmq/autophrase/requirement.txt index a8cd271..f8dbcb1 100644 --- a/rabbitmq/autophrase/requirement.txt +++ b/rabbitmq/autophrase/requirement.txt @@ -1,5 +1,5 @@ boto3>=1.6.11 -numpy==1.16.1 +numpy>=1.16.1 pandas>=0.24.2 plotly==2.7.0 -pika>=1.1.0 \ No newline at end of file +pika>=1.1.0 diff --git a/rabbitmq/autophrase/upload_results.py b/rabbitmq/autophrase/upload_results.py new file mode 100644 index 0000000..a7e4b05 --- /dev/null +++ b/rabbitmq/autophrase/upload_results.py @@ -0,0 +1,63 @@ +from os import listdir +from os.path import isfile, join +import argparse +import json +import notification as n +from writeToS3 import WriteToS3 + +def main(s3, remoteSavePath): + + output = {} + + for file in listdir('results'): + if isfile(join('results', file)): + s3.upload('results', remoteSavePath, file) + + if file == 'config.json': + output['config'] = s3.generate_downloads(remoteSavePath, file) + elif file == 'div.html': + output['visualization'] = s3.generate_downloads(remoteSavePath, file) + elif file == 'AutoPhrase_multi-words.txt': + output['multi-words'] = s3.generate_downloads(remoteSavePath, file) + elif file == 'AutoPhrase_single-word.txt': + output['single-word'] = s3.generate_downloads(remoteSavePath, file) + elif file == 'AutoPhrase.txt': + output['autophrase'] = s3.generate_downloads(remoteSavePath, file) + elif file == 'segmentation.model': + output['model'] = s3.generate_downloads(remoteSavePath, file) + elif file == 'token_mapping.txt': + output['token-mapping'] = s3.generate_downloads(remoteSavePath, file) + else: + output['misc'] = s3.generate_downloads(remoteSavePath, file) + + return output + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--uid', required=True) + parser.add_argument('--s3FolderName', required=True) + parser.add_argument('--remoteReadPath', required=True) + parser.add_argument('--column', required=True) + parser.add_argument('--minSup', required=True) + parser.add_argument('--email', required=True) + parser.add_argument('--sessionURL', required=True) + + # user specified parameters + parsed, unknown = parser.parse_known_args() + for arg in unknown: + if arg.startswith("--"): + parser.add_argument(arg, required=False) + + params = vars(parser.parse_args()) + + s3 = WriteToS3() + + # save parameters + fname = 'config.json' + with open(join('results', fname), "w") as f: + json.dump(params, f) + + awsPath = params['s3FolderName'] + '/NLP/autophrase/'+ params['uid'] + '/' + links = main(s3, awsPath) + n.notification(params['email'], case=3, filename=awsPath, links=links, + sessionURL=params['sessionURL']) \ No newline at end of file diff --git a/rabbitmq/autophrase/writeToS3.py b/rabbitmq/autophrase/writeToS3.py new file mode 100644 index 0000000..7e9c22d --- /dev/null +++ b/rabbitmq/autophrase/writeToS3.py @@ -0,0 +1,83 @@ +import mimetypes +import os + +import boto3 +from botocore.client import Config + + +class WriteToS3: + + def __init__(self): + + # local minio s3 + if os.environ['MINIO_PUBLIC_ACCESS_URL'] != "": + self.client = boto3.client('s3', endpoint_url=os.environ['MINIO_PUBLIC_ACCESS_URL'], + aws_access_key_id=os.environ['AWS_ACCESSKEY'], + aws_secret_access_key=os.environ['AWS_ACCESSKEYSECRET'], + config=Config(signature_version='s3v4')) + self.bucket_name = os.environ['BUCKET_NAME'] + + # remote aws s3 + else: + self.client = boto3.client('s3') + self.bucket_name = 'macroscope-smile' + + def upload(self, localpath, remotepath, filename): + content_type = mimetypes.guess_type(os.path.join(localpath, filename))[0] + print(filename, content_type) + if content_type == None: + extra_args = {'ContentType': 'application/octet-stream'} + else: + extra_args = {'ContentType': content_type} + + self.client.upload_file(os.path.join(localpath, filename), + self.bucket_name, + os.path.join(remotepath, filename), + ExtraArgs=extra_args) + + def createDirectory(self, DirectoryName): + self.client.put_object(Bucket=self.bucket_name, Key=DirectoryName) + + def generate_downloads(self, remotepath, filename): + url = self.client.generate_presigned_url( + ClientMethod='get_object', + Params={ + 'Bucket': self.bucket_name, + 'Key': os.path.join(remotepath, filename) + }, + ExpiresIn=604800 # one week + ) + + return url + + def downloadToDisk(self, filename, localpath, remotepath): + with open(os.path.join(localpath, filename), 'wb') as f: + self.client.download_fileobj(self.bucket_name, + os.path.join(remotepath, filename), f) + + def getObject(self, remoteKey): + obj = self.client.get_object(Bucket=self.bucket_name, Key=remoteKey) + + def putObject(self, body, remoteKey): + # bytes or seekable file-like object + obj = self.client.put_object(Bucket=self.bucket_name, + Body=body, Key=remoteKey) + print(obj['Body'].read()) + + def listDir(self, remoteClass): + objects = self.client.list_objects(Bucket=self.bucket_name, + Prefix=remoteClass, + Delimiter='/') + foldernames = [] + for o in objects.get('CommonPrefixes'): + foldernames.append(o.get('Prefix')) + + # only return the list of foldernames + return foldernames + + def listFiles(self, foldernames): + objects = self.client.list_objects(Bucket=self.bucket_name, + Prefix=foldernames) + + # return rich information about the files + return objects.get('Contents') \ No newline at end of file From 44f927b9ba8925f78a88da4d2be1912d06d13bcf Mon Sep 17 00:00:00 2001 From: Yong Wook Kim Date: Thu, 28 Sep 2023 11:59:59 -0500 Subject: [PATCH 03/19] created base docker images for autophrase --- CHANGELOG.md | 3 ++- rabbitmq/autophrase/Dockerfile | 7 +------ rabbitmq/autophrase/Dockerfile.base | 7 +++++++ 3 files changed, 10 insertions(+), 7 deletions(-) create mode 100644 rabbitmq/autophrase/Dockerfile.base diff --git a/CHANGELOG.md b/CHANGELOG.md index 77cd086..9206208 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Created base image for sentiment analysis with model [55](https://github.com/ncsa/standalone-smm-analytics/issues/55) - Created base image for name entity recognition with model [56](https://github.com/ncsa/standalone-smm-analytics/issues/56) - Docker compose file updated to fix minio default bucket making [63](https://github.com/ncsa/standalone-smm-analytics/issues/63) +- Autophrase uses base docker image [67](https://github.com/ncsa/standalone-smm-analytics/issues/67) ### Fixed -- Autophrase error due to the code updates [65](https://github.com/ncsa/standalone-smm-analytics/issues/65) \ No newline at end of file +- Autophrase error due to the code updates [65](https://github.com/ncsa/standalone-smm-analytics/issues/65) diff --git a/rabbitmq/autophrase/Dockerfile b/rabbitmq/autophrase/Dockerfile index e79009c..35021d9 100644 --- a/rabbitmq/autophrase/Dockerfile +++ b/rabbitmq/autophrase/Dockerfile @@ -1,9 +1,4 @@ -FROM ubuntu:20.04 - -# git clone autophrase algorithm -RUN apt-get update \ -&& apt-get -y install git && apt-get -y install cron \ -&& cd / && git clone https://github.com/IllinoisSocialMediaMacroscope/SMILE-AutoPhrase.git AutoPhrase +FROM socialmediamacroscope/autophrase:base # overwrite WORKDIR /AutoPhrase diff --git a/rabbitmq/autophrase/Dockerfile.base b/rabbitmq/autophrase/Dockerfile.base new file mode 100644 index 0000000..634d5e9 --- /dev/null +++ b/rabbitmq/autophrase/Dockerfile.base @@ -0,0 +1,7 @@ +FROM ubuntu:20.04 + +# git clone autophrase algorithm +RUN apt-get update \ +&& apt-get -y install git && apt-get -y install cron \ +&& cd / && git clone https://github.com/IllinoisSocialMediaMacroscope/SMILE-AutoPhrase.git AutoPhrase + From bc58b1588c28857db662de3fde972d9d88d72f1f Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Thu, 5 Oct 2023 16:57:21 -0500 Subject: [PATCH 04/19] Updated env variable for twitter and reddit on off to docker compose file --- CHANGELOG.md | 1 + rabbitmq/docker-command-smile-traefik.sh | 12 ++++++++---- rabbitmq/docker-command-smile.sh | 12 ++++++++---- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9206208..320a2ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Docker building script for whole components [23](https://github.com/ncsa/standalone-smm-analytics/issues/23) - Docker compose launch script [45](https://github.com/ncsa/standalone-smm-analytics/issues/45) - Docker compose file using traefik [46](https://github.com/ncsa/standalone-smm-analytics/issues/46) +- Environment variables for turn on off twitter and reddit [73](https://github.com/ncsa/standalone-smm-analytics/issues/73) ### Changed - Hard coded rabbimq url changed to env variable [18](https://github.com/ncsa/standalone-smm-analytics/issues/18) diff --git a/rabbitmq/docker-command-smile-traefik.sh b/rabbitmq/docker-command-smile-traefik.sh index 5909fc7..c326db2 100644 --- a/rabbitmq/docker-command-smile-traefik.sh +++ b/rabbitmq/docker-command-smile-traefik.sh @@ -24,6 +24,8 @@ export CILOGON_CALLBACK_URL=<> export MINIO_URL=https://minio-api.${SERVER} export MINIO_PUBLIC_ACCESS_URL=https://minio-api.${SERVER} export BUCKET_NAME=macroscope-smile +export SHARE_EXPIRE_IN=1 + # if the graphql.server doesn't work, it should use ip address and port #export SMILE_GRAPHQL_URL=http://xxx.xxx.xxx.xxx:5050/graphql export SMILE_GRAPHQL_URL=https://graphql.${SERVER}/graphql @@ -38,19 +40,21 @@ export RABBITMQ_URL=amqp://${SERVER} export REDIS_URL=redis://redis # email notification -#export EMAIL_HOST=<> -#export EMAIL_PORT=465 -#export EMAIL_FROM_ADDRESS=<> -#export EMAIL_PASSWORD=<> +export EMAIL_HOST=<> +export EMAIL_PORT=465 +export EMAIL_FROM_ADDRESS=<> +export EMAIL_PASSWORD=<> # align with AWS export AWS_ACCESSKEY=<> export AWS_ACCESSKEYSECRET=<> # social media platforms +export REDDIT_ON=true export REDDIT_CLIENT_ID=<> export REDDIT_CLIENT_SECRET=<> export REDDIT_CALLBACK_URL=<> +export TWITTER_ON=true #export TWITTER_CONSUMER_KEY=<> #export TWITTER_CONSUMER_SECRET=<> export TWITTER_V2_CLIENT_ID=<> diff --git a/rabbitmq/docker-command-smile.sh b/rabbitmq/docker-command-smile.sh index 9f900b2..fcb184f 100644 --- a/rabbitmq/docker-command-smile.sh +++ b/rabbitmq/docker-command-smile.sh @@ -21,6 +21,8 @@ export CILOGON_CALLBACK_URL=<> export MINIO_URL=http://${SERVER}:9000/ export MINIO_PUBLIC_ACCESS_URL=http://${SERVER}:9000/ export BUCKET_NAME=macroscope-smile +export SHARE_EXPIRE_IN=1 + export SMILE_GRAPHQL_URL=http://${SERVER}:5050/graphql # create mounted volumes on host machine @@ -33,19 +35,21 @@ export RABBITMQ_HOST=${SERVER} export REDIS_URL=redis://redis:6379 # email notification -#export EMAIL_HOST=<> -#export EMAIL_PORT=465 -#export EMAIL_FROM_ADDRESS=<> -#export EMAIL_PASSWORD=<> +export EMAIL_HOST=<> +export EMAIL_PORT=465 +export EMAIL_FROM_ADDRESS=<> +export EMAIL_PASSWORD=<> # align with AWS export AWS_ACCESSKEY=<> export AWS_ACCESSKEYSECRET=<> # social media platforms +export REDDIT_ON=true export REDDIT_CLIENT_ID=<> export REDDIT_CLIENT_SECRET=<> export REDDIT_CALLBACK_URL=<> +export TWITTER_ON=true #export TWITTER_CONSUMER_KEY=<> #export TWITTER_CONSUMER_SECRET=<> export TWITTER_V2_CLIENT_ID=<> From d7406bf61e2954f7ee825878d8804d6aa559ea9a Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Fri, 6 Oct 2023 14:39:32 -0500 Subject: [PATCH 05/19] update --- .../name_entity_recognition/SmmExtractor.py | 125 ------------------ .../extractor.dockerfile | 23 ---- .../extractor_info.json | 38 ------ .../name_entity_recognition/requirement.txt | 1 - rabbitmq/network_analysis/SmmExtractor.py | 125 ------------------ .../network_analysis/extractor.dockerfile | 15 --- rabbitmq/network_analysis/extractor_info.json | 60 --------- rabbitmq/network_analysis/requirement.txt | 1 - rabbitmq/preprocessing/Dockerfile | 5 +- rabbitmq/preprocessing/SmmExtractor.py | 125 ------------------ rabbitmq/preprocessing/extractor.dockerfile | 18 --- rabbitmq/preprocessing/extractor_info.json | 64 --------- rabbitmq/preprocessing/requirement.txt | 1 - rabbitmq/sentiment_analysis/SmmExtractor.py | 125 ------------------ .../sentiment_analysis/extractor.dockerfile | 20 --- .../sentiment_analysis/extractor_info.json | 52 -------- rabbitmq/topic_modeling/Dockerfile | 5 +- rabbitmq/topic_modeling/SmmExtractor.py | 125 ------------------ rabbitmq/topic_modeling/extractor.dockerfile | 19 --- rabbitmq/topic_modeling/extractor_info.json | 47 ------- rabbitmq/topic_modeling/requirement.txt | 1 - 21 files changed, 8 insertions(+), 987 deletions(-) delete mode 100644 rabbitmq/name_entity_recognition/SmmExtractor.py delete mode 100644 rabbitmq/name_entity_recognition/extractor.dockerfile delete mode 100644 rabbitmq/name_entity_recognition/extractor_info.json delete mode 100644 rabbitmq/network_analysis/SmmExtractor.py delete mode 100644 rabbitmq/network_analysis/extractor.dockerfile delete mode 100644 rabbitmq/network_analysis/extractor_info.json delete mode 100644 rabbitmq/preprocessing/SmmExtractor.py delete mode 100644 rabbitmq/preprocessing/extractor.dockerfile delete mode 100644 rabbitmq/preprocessing/extractor_info.json delete mode 100644 rabbitmq/sentiment_analysis/SmmExtractor.py delete mode 100644 rabbitmq/sentiment_analysis/extractor.dockerfile delete mode 100644 rabbitmq/sentiment_analysis/extractor_info.json delete mode 100644 rabbitmq/topic_modeling/SmmExtractor.py delete mode 100644 rabbitmq/topic_modeling/extractor.dockerfile delete mode 100644 rabbitmq/topic_modeling/extractor_info.json diff --git a/rabbitmq/name_entity_recognition/SmmExtractor.py b/rabbitmq/name_entity_recognition/SmmExtractor.py deleted file mode 100644 index 6aefd2c..0000000 --- a/rabbitmq/name_entity_recognition/SmmExtractor.py +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/env python - -"""Example extractor based on the clowder code.""" -import pandas as pd -import json -import os -import csv -import types -import pickle - -import logging -from pyclowder.extractors import Extractor -import pyclowder.files - -from algorithm import algorithm - - -def save_local_output(localSavePath, fname, output_data): - """ - save output in memory first to local file - :param localSavePath: local saved file - :param remoteSavePath: remote save file path - :param fname: filename - :param output_data: the actual data - :return: local saved file path - """ - # json - if isinstance(output_data, dict): - fname += '.json' - with open(os.path.join(localSavePath, fname), 'w') as f: - json.dump(output_data, f) - - # dataframe to csv - elif isinstance(output_data, pd.DataFrame): - fname += '.csv' - output_data.to_csv(fname, encoding='utf-8') - - # string to html - elif isinstance(output_data, str): - fname += '.html' - with open(os.path.join(localSavePath, fname), 'w') as f: - f.write(output_data) - - # list(list) to csv - elif isinstance(output_data, list) \ - and (isinstance(output_data[0], list) or isinstance(output_data[0], - tuple)): - fname += '.csv' - with open(os.path.join(localSavePath, fname), 'w', newline='', - encoding='utf-8') as f: - writer = csv.writer(f) - for row in output_data: - try: - writer.writerow(row) - except UnicodeEncodeError as e: - print(e) - - # special case - elif isinstance(output_data, types.GeneratorType): - if fname == 'gephi': - fname += '.gml' - elif fname == 'pajek': - fname += '.net' - else: - fname += '.unknown' - - with open(os.path.join(localSavePath, fname), 'w', newline='', - encoding='utf-8') as f: - for line in output_data: - f.write(line + '\n') - - # else pickle the object - else: - fname += '.pickle' - with open(os.path.join(localSavePath, fname), 'wb') as f: - pickle.dump(output_data, f) - - return os.path.join(localSavePath, fname) - - -class SmmExtractor(Extractor): - """Count the number of characters, words and lines in a text file.""" - def __init__(self): - Extractor.__init__(self) - - # parse command line and load default logging configuration - self.setup() - - # setup logging for the exctractor - logging.getLogger('pyclowder').setLevel(logging.DEBUG) - logging.getLogger('__main__').setLevel(logging.DEBUG) - - def process_message(self, connector, host, secret_key, resource, parameters): - # this extractor runs on dataset - # uncomment to see the resource - logger = logging.getLogger(__name__) - inputfile = resource["local_paths"][0] - dataset_id = resource['parent'].get('id') - - df = pd.read_csv(inputfile) - connector.message_process(resource, "Loading contents of file...") - - # execute the algorithm - # Parse user parameters to determine which column to analyze - userParams = parameters.get('parameters') - output = algorithm(df, userParams) - connector.message_process(resource, "Running the algorithm...") - - # upload object to s3 bucket and return the url - for fname, output_data in output.items(): - if fname != 'uid': - local_output_path = save_local_output("", fname, output_data) - uploaded_file_id = pyclowder.files.upload_to_dataset(connector, host, secret_key, dataset_id, - local_output_path) - connector.message_process(resource, "Saving " + local_output_path + "...") - - # write params to metadata - metadata = self.get_metadata(userParams, 'file', uploaded_file_id, host) - pyclowder.files.upload_metadata(connector, host, secret_key, uploaded_file_id, metadata) - connector.message_process(resource, "Writing metadata...") - - -if __name__ == "__main__": - extractor = SmmExtractor() - extractor.start() diff --git a/rabbitmq/name_entity_recognition/extractor.dockerfile b/rabbitmq/name_entity_recognition/extractor.dockerfile deleted file mode 100644 index 89dd362..0000000 --- a/rabbitmq/name_entity_recognition/extractor.dockerfile +++ /dev/null @@ -1,23 +0,0 @@ -FROM ubuntu:18.04 - -RUN mkdir -p /scripts -WORKDIR /scripts - -# copy paste python scripts -COPY . ./ - -# install dependency libraries -RUN apt-get update -RUN apt-get -y install python3-pip wget unzip - -# install dependency libraries and download required data -RUN pip3 install -r requirement.txt - -# download glove data -RUN cd ./data && wget http://nlp.stanford.edu/data/glove.twitter.27B.zip && unzip glove.twitter.27B.zip - -# Command to be run when container is run -# Can add heartbeat to change the refresh rate -CMD python3 SmmExtractor.py --heartbeat 40 - -ENV MAIN_SCRIPT="SmmExtractor.py" diff --git a/rabbitmq/name_entity_recognition/extractor_info.json b/rabbitmq/name_entity_recognition/extractor_info.json deleted file mode 100644 index 5a7794f..0000000 --- a/rabbitmq/name_entity_recognition/extractor_info.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "@context": "http://clowder.ncsa.illinois.edu/contexts/extractors.jsonld", - "name": "smm.name-entity.recognition", - "version": "0.1", - "description": "Description on this extractor", - "author": "Wang, Chen ", - "contributors": [], - "contexts": [{}], - "repository": [ - { - "repType": "git", - "repUrl": "https://github.com/ncsa/standalone-smm-analytics" - } - ], - "process": { - "file": [ - "manual" - ] - }, - "external_services": [], - "dependencies": [], - "bibtex": [], - "parameters": { - "schema": { - "column": { - "type": "string", - "title": "Text Column Header", - "default": "text" - } - }, - "form": [ - { - "key": "column", - "type": "text" - } - ] - } -} diff --git a/rabbitmq/name_entity_recognition/requirement.txt b/rabbitmq/name_entity_recognition/requirement.txt index 180c265..380bdc7 100644 --- a/rabbitmq/name_entity_recognition/requirement.txt +++ b/rabbitmq/name_entity_recognition/requirement.txt @@ -13,4 +13,3 @@ six==1.12.0 sklearn-crfsuite==0.3.6 joblib==0.13.2 pika>=1.1.0 -pyclowder==2.7.0 diff --git a/rabbitmq/network_analysis/SmmExtractor.py b/rabbitmq/network_analysis/SmmExtractor.py deleted file mode 100644 index 6aefd2c..0000000 --- a/rabbitmq/network_analysis/SmmExtractor.py +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/env python - -"""Example extractor based on the clowder code.""" -import pandas as pd -import json -import os -import csv -import types -import pickle - -import logging -from pyclowder.extractors import Extractor -import pyclowder.files - -from algorithm import algorithm - - -def save_local_output(localSavePath, fname, output_data): - """ - save output in memory first to local file - :param localSavePath: local saved file - :param remoteSavePath: remote save file path - :param fname: filename - :param output_data: the actual data - :return: local saved file path - """ - # json - if isinstance(output_data, dict): - fname += '.json' - with open(os.path.join(localSavePath, fname), 'w') as f: - json.dump(output_data, f) - - # dataframe to csv - elif isinstance(output_data, pd.DataFrame): - fname += '.csv' - output_data.to_csv(fname, encoding='utf-8') - - # string to html - elif isinstance(output_data, str): - fname += '.html' - with open(os.path.join(localSavePath, fname), 'w') as f: - f.write(output_data) - - # list(list) to csv - elif isinstance(output_data, list) \ - and (isinstance(output_data[0], list) or isinstance(output_data[0], - tuple)): - fname += '.csv' - with open(os.path.join(localSavePath, fname), 'w', newline='', - encoding='utf-8') as f: - writer = csv.writer(f) - for row in output_data: - try: - writer.writerow(row) - except UnicodeEncodeError as e: - print(e) - - # special case - elif isinstance(output_data, types.GeneratorType): - if fname == 'gephi': - fname += '.gml' - elif fname == 'pajek': - fname += '.net' - else: - fname += '.unknown' - - with open(os.path.join(localSavePath, fname), 'w', newline='', - encoding='utf-8') as f: - for line in output_data: - f.write(line + '\n') - - # else pickle the object - else: - fname += '.pickle' - with open(os.path.join(localSavePath, fname), 'wb') as f: - pickle.dump(output_data, f) - - return os.path.join(localSavePath, fname) - - -class SmmExtractor(Extractor): - """Count the number of characters, words and lines in a text file.""" - def __init__(self): - Extractor.__init__(self) - - # parse command line and load default logging configuration - self.setup() - - # setup logging for the exctractor - logging.getLogger('pyclowder').setLevel(logging.DEBUG) - logging.getLogger('__main__').setLevel(logging.DEBUG) - - def process_message(self, connector, host, secret_key, resource, parameters): - # this extractor runs on dataset - # uncomment to see the resource - logger = logging.getLogger(__name__) - inputfile = resource["local_paths"][0] - dataset_id = resource['parent'].get('id') - - df = pd.read_csv(inputfile) - connector.message_process(resource, "Loading contents of file...") - - # execute the algorithm - # Parse user parameters to determine which column to analyze - userParams = parameters.get('parameters') - output = algorithm(df, userParams) - connector.message_process(resource, "Running the algorithm...") - - # upload object to s3 bucket and return the url - for fname, output_data in output.items(): - if fname != 'uid': - local_output_path = save_local_output("", fname, output_data) - uploaded_file_id = pyclowder.files.upload_to_dataset(connector, host, secret_key, dataset_id, - local_output_path) - connector.message_process(resource, "Saving " + local_output_path + "...") - - # write params to metadata - metadata = self.get_metadata(userParams, 'file', uploaded_file_id, host) - pyclowder.files.upload_metadata(connector, host, secret_key, uploaded_file_id, metadata) - connector.message_process(resource, "Writing metadata...") - - -if __name__ == "__main__": - extractor = SmmExtractor() - extractor.start() diff --git a/rabbitmq/network_analysis/extractor.dockerfile b/rabbitmq/network_analysis/extractor.dockerfile deleted file mode 100644 index f99c472..0000000 --- a/rabbitmq/network_analysis/extractor.dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -FROM python:3.8.5 - -RUN mkdir -p /scripts -WORKDIR /scripts - -COPY . ./ - -RUN pip install --no-cache-dir -r requirement.txt - - -# Command to be run when container is run -# Can add heartbeat to change the refresh rate -CMD python3 SmmExtractor.py --heartbeat 40 - -ENV MAIN_SCRIPT="SmmExtractor.py" diff --git a/rabbitmq/network_analysis/extractor_info.json b/rabbitmq/network_analysis/extractor_info.json deleted file mode 100644 index 6467cdd..0000000 --- a/rabbitmq/network_analysis/extractor_info.json +++ /dev/null @@ -1,60 +0,0 @@ -{ - "@context": "http://clowder.ncsa.illinois.edu/contexts/extractors.jsonld", - "name": "smm.network.analysis", - "version": "0.1", - "description": "Description on this extractor", - "author": "Wang, Chen ", - "contributors": [], - "contexts": [{}], - "repository": [ - { - "repType": "git", - "repUrl": "https://github.com/ncsa/standalone-smm-analytics" - } - ], - "process": { - "file": [ - "manual" - ] - }, - "external_services": [], - "dependencies": [], - "bibtex": [], - "parameters": { - "schema": { - "relations": { - "type": "string", - "title": "Network relations", - "enum": [ - "retweet_from", - "reply_to", - "mentions" - ], - "default": "retweet_from" - }, - "layout": { - "type": "string", - "title": "Network layout", - "enum": [ - "spring", - "circular", - "fruchterman", - "random", - "shell", - "spectral" - ], - "default": "circular" - } - }, - "form": [ - { - "key": "relations", - "type": "select" - }, - { - "key": "layout", - "type": "select" - } - ] - } -} diff --git a/rabbitmq/network_analysis/requirement.txt b/rabbitmq/network_analysis/requirement.txt index 6dd8636..b2b4a03 100644 --- a/rabbitmq/network_analysis/requirement.txt +++ b/rabbitmq/network_analysis/requirement.txt @@ -4,4 +4,3 @@ pandas>=0.24.1 networkx==1.11 plotly==2.7.0 pika>=1.1.0 -pyclowder==2.7.0 diff --git a/rabbitmq/preprocessing/Dockerfile b/rabbitmq/preprocessing/Dockerfile index 2b41be5..a4a0422 100644 --- a/rabbitmq/preprocessing/Dockerfile +++ b/rabbitmq/preprocessing/Dockerfile @@ -9,8 +9,11 @@ COPY . ./ ENV RABBITMQ_HOST="rabbitmq" RUN pip install --no-cache-dir -r requirement.txt \ -&& python3 -m nltk.downloader -d /usr/local/share/nltk_data punkt stopwords averaged_perceptron_tagger \ +&& python3 -m nltk.downloader -d /usr/local/share/nltk_data punkt stopwords averaged_perceptron_tagger wordnet\ # cron job clean tmp folder && chmod u+x ./clear_cache.sh \ && chmod 0644 ./clear_cache_cron \ && crontab ./clear_cache_cron + +# wordnet cannot unzip fix +RUN unzip /usr/local/share/nltk_data/corpora/wordnet.zip -d /usr/local/share/nltk_data/corpora diff --git a/rabbitmq/preprocessing/SmmExtractor.py b/rabbitmq/preprocessing/SmmExtractor.py deleted file mode 100644 index 6aefd2c..0000000 --- a/rabbitmq/preprocessing/SmmExtractor.py +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/env python - -"""Example extractor based on the clowder code.""" -import pandas as pd -import json -import os -import csv -import types -import pickle - -import logging -from pyclowder.extractors import Extractor -import pyclowder.files - -from algorithm import algorithm - - -def save_local_output(localSavePath, fname, output_data): - """ - save output in memory first to local file - :param localSavePath: local saved file - :param remoteSavePath: remote save file path - :param fname: filename - :param output_data: the actual data - :return: local saved file path - """ - # json - if isinstance(output_data, dict): - fname += '.json' - with open(os.path.join(localSavePath, fname), 'w') as f: - json.dump(output_data, f) - - # dataframe to csv - elif isinstance(output_data, pd.DataFrame): - fname += '.csv' - output_data.to_csv(fname, encoding='utf-8') - - # string to html - elif isinstance(output_data, str): - fname += '.html' - with open(os.path.join(localSavePath, fname), 'w') as f: - f.write(output_data) - - # list(list) to csv - elif isinstance(output_data, list) \ - and (isinstance(output_data[0], list) or isinstance(output_data[0], - tuple)): - fname += '.csv' - with open(os.path.join(localSavePath, fname), 'w', newline='', - encoding='utf-8') as f: - writer = csv.writer(f) - for row in output_data: - try: - writer.writerow(row) - except UnicodeEncodeError as e: - print(e) - - # special case - elif isinstance(output_data, types.GeneratorType): - if fname == 'gephi': - fname += '.gml' - elif fname == 'pajek': - fname += '.net' - else: - fname += '.unknown' - - with open(os.path.join(localSavePath, fname), 'w', newline='', - encoding='utf-8') as f: - for line in output_data: - f.write(line + '\n') - - # else pickle the object - else: - fname += '.pickle' - with open(os.path.join(localSavePath, fname), 'wb') as f: - pickle.dump(output_data, f) - - return os.path.join(localSavePath, fname) - - -class SmmExtractor(Extractor): - """Count the number of characters, words and lines in a text file.""" - def __init__(self): - Extractor.__init__(self) - - # parse command line and load default logging configuration - self.setup() - - # setup logging for the exctractor - logging.getLogger('pyclowder').setLevel(logging.DEBUG) - logging.getLogger('__main__').setLevel(logging.DEBUG) - - def process_message(self, connector, host, secret_key, resource, parameters): - # this extractor runs on dataset - # uncomment to see the resource - logger = logging.getLogger(__name__) - inputfile = resource["local_paths"][0] - dataset_id = resource['parent'].get('id') - - df = pd.read_csv(inputfile) - connector.message_process(resource, "Loading contents of file...") - - # execute the algorithm - # Parse user parameters to determine which column to analyze - userParams = parameters.get('parameters') - output = algorithm(df, userParams) - connector.message_process(resource, "Running the algorithm...") - - # upload object to s3 bucket and return the url - for fname, output_data in output.items(): - if fname != 'uid': - local_output_path = save_local_output("", fname, output_data) - uploaded_file_id = pyclowder.files.upload_to_dataset(connector, host, secret_key, dataset_id, - local_output_path) - connector.message_process(resource, "Saving " + local_output_path + "...") - - # write params to metadata - metadata = self.get_metadata(userParams, 'file', uploaded_file_id, host) - pyclowder.files.upload_metadata(connector, host, secret_key, uploaded_file_id, metadata) - connector.message_process(resource, "Writing metadata...") - - -if __name__ == "__main__": - extractor = SmmExtractor() - extractor.start() diff --git a/rabbitmq/preprocessing/extractor.dockerfile b/rabbitmq/preprocessing/extractor.dockerfile deleted file mode 100644 index f97e9ce..0000000 --- a/rabbitmq/preprocessing/extractor.dockerfile +++ /dev/null @@ -1,18 +0,0 @@ -FROM python:3.8.5 - -RUN mkdir -p /scripts -WORKDIR /scripts - -COPY . ./ - -RUN pip install --no-cache-dir -r requirement.txt -RUN python3 -m nltk.downloader -d /usr/local/share/nltk_data punkt stopwords averaged_perceptron_tagger wordnet - -# wordnet cannot unzip fix -RUN unzip /usr/local/share/nltk_data/corpora/wordnet.zip -d /usr/local/share/nltk_data/corpora - -# Command to be run when container is run -# Can add heartbeat to change the refresh rate -CMD python3 SmmExtractor.py --heartbeat 40 - -ENV MAIN_SCRIPT="SmmExtractor.py" diff --git a/rabbitmq/preprocessing/extractor_info.json b/rabbitmq/preprocessing/extractor_info.json deleted file mode 100644 index ec9affe..0000000 --- a/rabbitmq/preprocessing/extractor_info.json +++ /dev/null @@ -1,64 +0,0 @@ -{ - "@context": "http://clowder.ncsa.illinois.edu/contexts/extractors.jsonld", - "name": "smm.preprocessing.analysis", - "version": "0.1", - "description": "Description on this extractor", - "author": "Wang, Chen ", - "contributors": [], - "contexts": [{}], - "repository": [ - { - "repType": "git", - "repUrl": "https://github.com/ncsa/standalone-smm-analytics" - } - ], - "process": { - "file": [ - "manual" - ] - }, - "external_services": [], - "dependencies": [], - "bibtex": [], - "parameters": { - "schema": { - "column": { - "type": "string", - "title": "Text Column Header", - "default": "text" - }, - "process": { - "type": "string", - "title": "Sentiment Analysis Algorithms", - "enum": [ - "lemmatization", - "stemming", - "both" - ], - "default": "lemmatization" - }, - "tagger": { - "type": "string", - "title": "Sentiment Analysis Algorithms", - "enum": [ - "posTag" - ], - "default": "posTag" - } - }, - "form": [ - { - "key": "column", - "type": "text" - }, - { - "key": "process", - "type": "select" - }, - { - "key": "tagger", - "type": "select" - } - ] - } -} diff --git a/rabbitmq/preprocessing/requirement.txt b/rabbitmq/preprocessing/requirement.txt index 69c130c..42ec019 100644 --- a/rabbitmq/preprocessing/requirement.txt +++ b/rabbitmq/preprocessing/requirement.txt @@ -5,4 +5,3 @@ plotly==2.7.0 nltk>=3.2.5 pika>=1.1.0 networkx==1.11 -pyclowder==2.7.0 diff --git a/rabbitmq/sentiment_analysis/SmmExtractor.py b/rabbitmq/sentiment_analysis/SmmExtractor.py deleted file mode 100644 index 6aefd2c..0000000 --- a/rabbitmq/sentiment_analysis/SmmExtractor.py +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/env python - -"""Example extractor based on the clowder code.""" -import pandas as pd -import json -import os -import csv -import types -import pickle - -import logging -from pyclowder.extractors import Extractor -import pyclowder.files - -from algorithm import algorithm - - -def save_local_output(localSavePath, fname, output_data): - """ - save output in memory first to local file - :param localSavePath: local saved file - :param remoteSavePath: remote save file path - :param fname: filename - :param output_data: the actual data - :return: local saved file path - """ - # json - if isinstance(output_data, dict): - fname += '.json' - with open(os.path.join(localSavePath, fname), 'w') as f: - json.dump(output_data, f) - - # dataframe to csv - elif isinstance(output_data, pd.DataFrame): - fname += '.csv' - output_data.to_csv(fname, encoding='utf-8') - - # string to html - elif isinstance(output_data, str): - fname += '.html' - with open(os.path.join(localSavePath, fname), 'w') as f: - f.write(output_data) - - # list(list) to csv - elif isinstance(output_data, list) \ - and (isinstance(output_data[0], list) or isinstance(output_data[0], - tuple)): - fname += '.csv' - with open(os.path.join(localSavePath, fname), 'w', newline='', - encoding='utf-8') as f: - writer = csv.writer(f) - for row in output_data: - try: - writer.writerow(row) - except UnicodeEncodeError as e: - print(e) - - # special case - elif isinstance(output_data, types.GeneratorType): - if fname == 'gephi': - fname += '.gml' - elif fname == 'pajek': - fname += '.net' - else: - fname += '.unknown' - - with open(os.path.join(localSavePath, fname), 'w', newline='', - encoding='utf-8') as f: - for line in output_data: - f.write(line + '\n') - - # else pickle the object - else: - fname += '.pickle' - with open(os.path.join(localSavePath, fname), 'wb') as f: - pickle.dump(output_data, f) - - return os.path.join(localSavePath, fname) - - -class SmmExtractor(Extractor): - """Count the number of characters, words and lines in a text file.""" - def __init__(self): - Extractor.__init__(self) - - # parse command line and load default logging configuration - self.setup() - - # setup logging for the exctractor - logging.getLogger('pyclowder').setLevel(logging.DEBUG) - logging.getLogger('__main__').setLevel(logging.DEBUG) - - def process_message(self, connector, host, secret_key, resource, parameters): - # this extractor runs on dataset - # uncomment to see the resource - logger = logging.getLogger(__name__) - inputfile = resource["local_paths"][0] - dataset_id = resource['parent'].get('id') - - df = pd.read_csv(inputfile) - connector.message_process(resource, "Loading contents of file...") - - # execute the algorithm - # Parse user parameters to determine which column to analyze - userParams = parameters.get('parameters') - output = algorithm(df, userParams) - connector.message_process(resource, "Running the algorithm...") - - # upload object to s3 bucket and return the url - for fname, output_data in output.items(): - if fname != 'uid': - local_output_path = save_local_output("", fname, output_data) - uploaded_file_id = pyclowder.files.upload_to_dataset(connector, host, secret_key, dataset_id, - local_output_path) - connector.message_process(resource, "Saving " + local_output_path + "...") - - # write params to metadata - metadata = self.get_metadata(userParams, 'file', uploaded_file_id, host) - pyclowder.files.upload_metadata(connector, host, secret_key, uploaded_file_id, metadata) - connector.message_process(resource, "Writing metadata...") - - -if __name__ == "__main__": - extractor = SmmExtractor() - extractor.start() diff --git a/rabbitmq/sentiment_analysis/extractor.dockerfile b/rabbitmq/sentiment_analysis/extractor.dockerfile deleted file mode 100644 index f46bfdb..0000000 --- a/rabbitmq/sentiment_analysis/extractor.dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -FROM python:3.8.5 - -RUN mkdir -p /scripts -WORKDIR /scripts - -COPY . ./ - -# Install pyClowder and any other python dependencies -RUN pip install --no-cache-dir -r requirement.txt -RUN python3 -m nltk.downloader -d /usr/local/share/nltk_data stopwords wordnet punkt averaged_perceptron_tagger \ -vader_lexicon sentiwordnet - -# wordnet cannot unzip fix -RUN unzip /usr/local/share/nltk_data/corpora/wordnet.zip -d /usr/local/share/nltk_data/corpora - -# Command to be run when container is run -# Can add heartbeat to change the refresh rate -CMD python3 SmmExtractor.py --heartbeat 40 - -ENV MAIN_SCRIPT="SmmExtractor.py" diff --git a/rabbitmq/sentiment_analysis/extractor_info.json b/rabbitmq/sentiment_analysis/extractor_info.json deleted file mode 100644 index d0fd99b..0000000 --- a/rabbitmq/sentiment_analysis/extractor_info.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "@context": "http://clowder.ncsa.illinois.edu/contexts/extractors.jsonld", - "name": "smm.preprocessing.analysis", - "version": "0.8", - "description": "Description on this extractor", - "author": "Wang, Chen ", - "contributors": [], - "contexts": [{}], - "repository": [ - { - "repType": "git", - "repUrl": "https://github.com/ncsa/standalone-smm-analytics" - } - ], - "process": { - "file": [ - "manual" - ] - }, - "external_services": [], - "dependencies": [], - "bibtex": [], - "parameters": { - "schema": { - "column": { - "type": "string", - "title": "Text Column Header", - "default": "text" - }, - "algorithm": { - "type": "string", - "title": "Sentiment Analysis Algorithms", - "enum": [ - "vader", - "sentiWordNet", - "debias" - ], - "default": "vader" - } - }, - "form": [ - { - "key": "column", - "type": "text" - }, - { - "key": "algorithm", - "type": "select" - } - ] - } -} diff --git a/rabbitmq/topic_modeling/Dockerfile b/rabbitmq/topic_modeling/Dockerfile index b689109..82fb3be 100644 --- a/rabbitmq/topic_modeling/Dockerfile +++ b/rabbitmq/topic_modeling/Dockerfile @@ -15,4 +15,7 @@ RUN pip install --no-cache-dir -r requirement.txt \ # cron job clean tmp folder && chmod u+x ./clear_cache.sh \ && chmod 0644 ./clear_cache_cron \ -&& crontab ./clear_cache_cron \ No newline at end of file +&& crontab ./clear_cache_cron + +# wordnet cannot unzip fix +RUN unzip /usr/local/share/nltk_data/corpora/wordnet.zip -d /usr/local/share/nltk_data/corpora diff --git a/rabbitmq/topic_modeling/SmmExtractor.py b/rabbitmq/topic_modeling/SmmExtractor.py deleted file mode 100644 index 6aefd2c..0000000 --- a/rabbitmq/topic_modeling/SmmExtractor.py +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/env python - -"""Example extractor based on the clowder code.""" -import pandas as pd -import json -import os -import csv -import types -import pickle - -import logging -from pyclowder.extractors import Extractor -import pyclowder.files - -from algorithm import algorithm - - -def save_local_output(localSavePath, fname, output_data): - """ - save output in memory first to local file - :param localSavePath: local saved file - :param remoteSavePath: remote save file path - :param fname: filename - :param output_data: the actual data - :return: local saved file path - """ - # json - if isinstance(output_data, dict): - fname += '.json' - with open(os.path.join(localSavePath, fname), 'w') as f: - json.dump(output_data, f) - - # dataframe to csv - elif isinstance(output_data, pd.DataFrame): - fname += '.csv' - output_data.to_csv(fname, encoding='utf-8') - - # string to html - elif isinstance(output_data, str): - fname += '.html' - with open(os.path.join(localSavePath, fname), 'w') as f: - f.write(output_data) - - # list(list) to csv - elif isinstance(output_data, list) \ - and (isinstance(output_data[0], list) or isinstance(output_data[0], - tuple)): - fname += '.csv' - with open(os.path.join(localSavePath, fname), 'w', newline='', - encoding='utf-8') as f: - writer = csv.writer(f) - for row in output_data: - try: - writer.writerow(row) - except UnicodeEncodeError as e: - print(e) - - # special case - elif isinstance(output_data, types.GeneratorType): - if fname == 'gephi': - fname += '.gml' - elif fname == 'pajek': - fname += '.net' - else: - fname += '.unknown' - - with open(os.path.join(localSavePath, fname), 'w', newline='', - encoding='utf-8') as f: - for line in output_data: - f.write(line + '\n') - - # else pickle the object - else: - fname += '.pickle' - with open(os.path.join(localSavePath, fname), 'wb') as f: - pickle.dump(output_data, f) - - return os.path.join(localSavePath, fname) - - -class SmmExtractor(Extractor): - """Count the number of characters, words and lines in a text file.""" - def __init__(self): - Extractor.__init__(self) - - # parse command line and load default logging configuration - self.setup() - - # setup logging for the exctractor - logging.getLogger('pyclowder').setLevel(logging.DEBUG) - logging.getLogger('__main__').setLevel(logging.DEBUG) - - def process_message(self, connector, host, secret_key, resource, parameters): - # this extractor runs on dataset - # uncomment to see the resource - logger = logging.getLogger(__name__) - inputfile = resource["local_paths"][0] - dataset_id = resource['parent'].get('id') - - df = pd.read_csv(inputfile) - connector.message_process(resource, "Loading contents of file...") - - # execute the algorithm - # Parse user parameters to determine which column to analyze - userParams = parameters.get('parameters') - output = algorithm(df, userParams) - connector.message_process(resource, "Running the algorithm...") - - # upload object to s3 bucket and return the url - for fname, output_data in output.items(): - if fname != 'uid': - local_output_path = save_local_output("", fname, output_data) - uploaded_file_id = pyclowder.files.upload_to_dataset(connector, host, secret_key, dataset_id, - local_output_path) - connector.message_process(resource, "Saving " + local_output_path + "...") - - # write params to metadata - metadata = self.get_metadata(userParams, 'file', uploaded_file_id, host) - pyclowder.files.upload_metadata(connector, host, secret_key, uploaded_file_id, metadata) - connector.message_process(resource, "Writing metadata...") - - -if __name__ == "__main__": - extractor = SmmExtractor() - extractor.start() diff --git a/rabbitmq/topic_modeling/extractor.dockerfile b/rabbitmq/topic_modeling/extractor.dockerfile deleted file mode 100644 index 14c201b..0000000 --- a/rabbitmq/topic_modeling/extractor.dockerfile +++ /dev/null @@ -1,19 +0,0 @@ -FROM python:3.8.5 - -RUN mkdir -p /scripts -WORKDIR /scripts - -COPY . ./ - -# Install pyClowder and any other python dependencies -RUN pip install --no-cache-dir -r requirement.txt -RUN python3 -m nltk.downloader -d /usr/local/share/nltk_data stopwords wordnet - -# wordnet cannot unzip fix -RUN unzip /usr/local/share/nltk_data/corpora/wordnet.zip -d /usr/local/share/nltk_data/corpora - -# Command to be run when container is run -# Can add heartbeat to change the refresh rate -CMD python3 SmmExtractor.py --heartbeat 40 - -ENV MAIN_SCRIPT="SmmExtractor.py" diff --git a/rabbitmq/topic_modeling/extractor_info.json b/rabbitmq/topic_modeling/extractor_info.json deleted file mode 100644 index 4363476..0000000 --- a/rabbitmq/topic_modeling/extractor_info.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "@context": "http://clowder.ncsa.illinois.edu/contexts/extractors.jsonld", - "name": "smm.topic.modeling", - "version": "0.1", - "description": "Description on this extractor", - "author": "Wang, Chen ", - "contributors": [], - "contexts": [{}], - "repository": [ - { - "repType": "git", - "repUrl": "https://github.com/ncsa/standalone-smm-analytics" - } - ], - "process": { - "file": [ - "manual" - ] - }, - "external_services": [], - "dependencies": [], - "bibtex": [], - "parameters": { - "schema": { - "column": { - "type": "string", - "title": "Text Column Header", - "default": "text" - }, - "numTopics": { - "type": "number", - "title": "Number of Toipcs", - "default": 5 - } - }, - "form": [ - { - "key": "column", - "type": "text" - }, - { - "key": "numTopics", - "type": "text" - } - ] - } -} diff --git a/rabbitmq/topic_modeling/requirement.txt b/rabbitmq/topic_modeling/requirement.txt index 02c4398..ca7e73c 100644 --- a/rabbitmq/topic_modeling/requirement.txt +++ b/rabbitmq/topic_modeling/requirement.txt @@ -5,4 +5,3 @@ numpy>=1.18.1 pandas>=1.1.4 pyLDAvis==2.1.2 pika>=1.1.0 -pyclowder==2.7.0 From a2720ebf2480048d7d9f7830ecc72d97e206c6ad Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Fri, 6 Oct 2023 14:43:55 -0500 Subject: [PATCH 06/19] add changelog entry --- rabbitmq/name_entity_recognition/CHANGELOG.md | 7 +++++++ rabbitmq/network_analysis/CHANGELOG.md | 7 +++++++ rabbitmq/preprocessing/CHANGELOG.md | 7 +++++++ rabbitmq/sentiment_analysis/CHANGELOG.md | 7 +++++++ rabbitmq/topic_modeling/CHANGELOG.md | 7 +++++++ 5 files changed, 35 insertions(+) diff --git a/rabbitmq/name_entity_recognition/CHANGELOG.md b/rabbitmq/name_entity_recognition/CHANGELOG.md index db415bf..3726dc3 100644 --- a/rabbitmq/name_entity_recognition/CHANGELOG.md +++ b/rabbitmq/name_entity_recognition/CHANGELOG.md @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Changed +- Remove Clowder related code [#75](https://github.com/ncsa/standalone-smm-analytics/issues/75) + + ## [0.1.2] - 09-14-2023 ### Added diff --git a/rabbitmq/network_analysis/CHANGELOG.md b/rabbitmq/network_analysis/CHANGELOG.md index 1a8e825..2dde568 100644 --- a/rabbitmq/network_analysis/CHANGELOG.md +++ b/rabbitmq/network_analysis/CHANGELOG.md @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Changed +- Remove Clowder related code [#75](https://github.com/ncsa/standalone-smm-analytics/issues/75) + + ## [0.1.3] - 09-14-2023 ### Added diff --git a/rabbitmq/preprocessing/CHANGELOG.md b/rabbitmq/preprocessing/CHANGELOG.md index 1a8e825..2dde568 100644 --- a/rabbitmq/preprocessing/CHANGELOG.md +++ b/rabbitmq/preprocessing/CHANGELOG.md @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Changed +- Remove Clowder related code [#75](https://github.com/ncsa/standalone-smm-analytics/issues/75) + + ## [0.1.3] - 09-14-2023 ### Added diff --git a/rabbitmq/sentiment_analysis/CHANGELOG.md b/rabbitmq/sentiment_analysis/CHANGELOG.md index ededd01..67e3b70 100644 --- a/rabbitmq/sentiment_analysis/CHANGELOG.md +++ b/rabbitmq/sentiment_analysis/CHANGELOG.md @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Changed +- Remove Clowder related code [#75](https://github.com/ncsa/standalone-smm-analytics/issues/75) + + ## [0.1.5] - 09-14-2023 ### Added diff --git a/rabbitmq/topic_modeling/CHANGELOG.md b/rabbitmq/topic_modeling/CHANGELOG.md index 1a8e825..2dde568 100644 --- a/rabbitmq/topic_modeling/CHANGELOG.md +++ b/rabbitmq/topic_modeling/CHANGELOG.md @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Changed +- Remove Clowder related code [#75](https://github.com/ncsa/standalone-smm-analytics/issues/75) + + ## [0.1.3] - 09-14-2023 ### Added From 257ddfadad0a0a82d5c9efbb7e1160354951a37e Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Fri, 6 Oct 2023 14:47:24 -0500 Subject: [PATCH 07/19] update commands --- rabbitmq/name_entity_recognition/command.txt | 3 +-- rabbitmq/network_analysis/command.txt | 3 +-- rabbitmq/preprocessing/command.txt | 3 +-- rabbitmq/sentiment_analysis/command.txt | 5 ----- rabbitmq/topic_modeling/command.txt | 3 +-- 5 files changed, 4 insertions(+), 13 deletions(-) diff --git a/rabbitmq/name_entity_recognition/command.txt b/rabbitmq/name_entity_recognition/command.txt index 1e07de4..e87b897 100644 --- a/rabbitmq/name_entity_recognition/command.txt +++ b/rabbitmq/name_entity_recognition/command.txt @@ -1,3 +1,2 @@ docker build -t socialmediamacroscope/name_entity_recognition:latest . -docker build -f extractor.dockerfile -t socialmediamacroscope/name_entity_recognition_extractor:latest . -docker push socialmediamacroscope/name_entity_recognition_extractor:latest +docker push socialmediamacroscope/name_entity_recognition:latest diff --git a/rabbitmq/network_analysis/command.txt b/rabbitmq/network_analysis/command.txt index e037d8b..8aac44e 100644 --- a/rabbitmq/network_analysis/command.txt +++ b/rabbitmq/network_analysis/command.txt @@ -1,4 +1,3 @@ # command to build and run this container docker build -t socialmediamacroscope/network_analysis:latest . -docker build -f extractor.dockerfile -t socialmediamacroscope/network_analysis_extractor:latest . -docker push socialmediamacroscope/network_analysis_extractor:latest +docker push socialmediamacroscope/network_analysis:latest diff --git a/rabbitmq/preprocessing/command.txt b/rabbitmq/preprocessing/command.txt index 7220eb1..f021c37 100644 --- a/rabbitmq/preprocessing/command.txt +++ b/rabbitmq/preprocessing/command.txt @@ -1,4 +1,3 @@ # command to build and run this container docker build -t socialmediamacroscope/preprocessing:latest . -docker build -f extractor.dockerfile -t socialmediamacroscope/preprocessing_extractor:latest . -docker push socialmediamacroscope/preprocessing_extractor:latest +docker push socialmediamacroscope/preprocessing:latest diff --git a/rabbitmq/sentiment_analysis/command.txt b/rabbitmq/sentiment_analysis/command.txt index bc2f67a..ff57c80 100644 --- a/rabbitmq/sentiment_analysis/command.txt +++ b/rabbitmq/sentiment_analysis/command.txt @@ -1,19 +1,14 @@ # command to build and run this container docker build -t socialmediamacroscope/sentiment_analysis:latest . -docker build -f extractor.dockerfile -t socialmediamacroscope/sentiment_analysis_extractor:latest . # command to build and run this container docker build -t socialmediamacroscope/sentiment_analysis:{version_number} . -docker build -f extractor.dockerfile -t socialmediamacroscope/sentiment_analysis_extractor:{version_number} . # tag as latest docker tag socialmediamacroscope/sentiment_analysis:{version_number} socialmediamacroscope/sentiment_analysis:latest -docker tag socialmediamacroscope/sentiment_analysis_extractor:{version_number} socialmediamacroscope/sentiment_analysis_extractor:latest # push docker push socialmediamacroscope/sentiment_analysis:{version_number} -docker push socialmediamacroscope/sentiment_analysis_extractor:{version_number} docker push socialmediamacroscope/sentiment_analysis:latest -docker push socialmediamacroscope/sentiment_analysis_extractor:latest diff --git a/rabbitmq/topic_modeling/command.txt b/rabbitmq/topic_modeling/command.txt index 5fdc30e..5ef2fd8 100644 --- a/rabbitmq/topic_modeling/command.txt +++ b/rabbitmq/topic_modeling/command.txt @@ -1,3 +1,2 @@ docker build -t socialmediamacroscope/topic_modeling:latest . -docker build -f extractor.dockerfile -t socialmediamacroscope/topic_modeling_extractor:latest . -docker push socialmediamacroscope/topic_modeling_extractor:latest +docker push socialmediamacroscope/topic_modeling:latest From 9689b86d9535d85e21a661a2b5755714fe1d27f6 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Fri, 13 Oct 2023 13:46:18 -0500 Subject: [PATCH 08/19] removed email password from docker compose run script --- rabbitmq/autophrase/test.py | 49 ++++++++++++++++++++++++ rabbitmq/autophrase/test2.py | 0 rabbitmq/docker-command-smile-traefik.sh | 1 - 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 rabbitmq/autophrase/test.py create mode 100644 rabbitmq/autophrase/test2.py diff --git a/rabbitmq/autophrase/test.py b/rabbitmq/autophrase/test.py new file mode 100644 index 0000000..18ecc48 --- /dev/null +++ b/rabbitmq/autophrase/test.py @@ -0,0 +1,49 @@ +import html + +links = { +'single-word': 'http://141.142.218.143:9000/macroscope-smile/ywkim%40illinois.edu/NLP/autophrase/b5dbd0de-09d3-4d9a-8629-905b82ed957e/AutoPhrase_single-word.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIFVGPPZEGB5JG3UQ%2F20231012%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231012T174952Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=36dd36243b2986a89521413d1b56fd9f7533a0b861e760a10b8b481d24018167', +'model': 'http://141.142.218.143:9000/macroscope-smile/ywkim%40illinois.edu/NLP/autophrase/b5dbd0de-09d3-4d9a-8629-905b82ed957e/segmentation.model?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIFVGPPZEGB5JG3UQ%2F20231012%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231012T174952Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=17804b5e3766e1ea762660b1ada9beb9ff3bbe10f207987a78d7df38c2f4653c', +'autophrase': 'http://141.142.218.143:9000/macroscope-smile/ywkim%40illinois.edu/NLP/autophrase/b5dbd0de-09d3-4d9a-8629-905b82ed957e/AutoPhrase.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIFVGPPZEGB5JG3UQ%2F20231012%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231012T174952Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=7454ed576ba427329faf631876beb5312e26a20f2c2dd6f26579dacd836b9f3d', +'visualization': 'http://141.142.218.143:9000/macroscope-smile/ywkim%40illinois.edu/NLP/autophrase/b5dbd0de-09d3-4d9a-8629-905b82ed957e/div.html?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIFVGPPZEGB5JG3UQ%2F20231012%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231012T174952Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=e5813e686b08a879defc22a7a4c0675caadb736dab05319e48c5b2dde4260fdc', +'multi-words': 'http://141.142.218.143:9000/macroscope-smile/ywkim%40illinois.edu/NLP/autophrase/b5dbd0de-09d3-4d9a-8629-905b82ed957e/AutoPhrase_multi-words.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIFVGPPZEGB5JG3UQ%2F20231012%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231012T174952Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=38fac737f0f84d8e02a4e00374738fad678140d008c6acf8503c38b2ace3bdc3', +'token-mapping': 'http://141.142.218.143:9000/macroscope-smile/ywkim%40illinois.edu/NLP/autophrase/b5dbd0de-09d3-4d9a-8629-905b82ed957e/token_mapping.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIFVGPPZEGB5JG3UQ%2F20231012%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231012T174952Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=f8db002ab6f07ae1c84ea4c49741c2147c6926a8758739aedf8df801038cdb2f', +'config': 'http://141.142.218.143:9000/macroscope-smile/ywkim%40illinois.edu/NLP/autophrase/b5dbd0de-09d3-4d9a-8629-905b82ed957e/config.json?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIFVGPPZEGB5JG3UQ%2F20231012%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231012T174952Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=6629785ba5ad3e92f82bf38f73a1f6c8181a3e8917328642e23753ea98b8fec1' +} +fpath = ["a", "b", "c", "d"] +test_url = ["http://test.com"] + +list_html = '' +for key in links.keys(): + list_html += '
  • ' + key + '
  • ' + +print(list_html) + +html = f""" + + +
    +

    Dear user (session ID: {fpath[0]}),

    +

    Your {fpath[2]} results are ready for you! (job ID: {fpath[3]})

    +
      +
    • You can view the visualization and download the results at Past Results page in SMILE. + Go to your session. +
        +
      • Go to Past Results
      • +
      • --> under {fpath[1]} tab
      • +
      • --> click {fpath[2]}
      • +
      • --> then find {fpath[3]}
      • +
      • --> click view
      • +
      +
      +
    • You can also click the link below to download part of the results: +
        {list_html}
      +
    • +
    +
    +

    Best Regards,

    +

    Social Media Macroscope - SMILE

    +
    + +""" + +print(html) \ No newline at end of file diff --git a/rabbitmq/autophrase/test2.py b/rabbitmq/autophrase/test2.py new file mode 100644 index 0000000..e69de29 diff --git a/rabbitmq/docker-command-smile-traefik.sh b/rabbitmq/docker-command-smile-traefik.sh index c326db2..e42e139 100644 --- a/rabbitmq/docker-command-smile-traefik.sh +++ b/rabbitmq/docker-command-smile-traefik.sh @@ -43,7 +43,6 @@ export REDIS_URL=redis://redis export EMAIL_HOST=<> export EMAIL_PORT=465 export EMAIL_FROM_ADDRESS=<> -export EMAIL_PASSWORD=<> # align with AWS export AWS_ACCESSKEY=<> From 41f0edce424e208c16dfd9481395924ef3430f6a Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Fri, 13 Oct 2023 13:47:28 -0500 Subject: [PATCH 09/19] commented out email password with comment --- rabbitmq/docker-command-smile-traefik.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rabbitmq/docker-command-smile-traefik.sh b/rabbitmq/docker-command-smile-traefik.sh index e42e139..0c0e311 100644 --- a/rabbitmq/docker-command-smile-traefik.sh +++ b/rabbitmq/docker-command-smile-traefik.sh @@ -43,6 +43,8 @@ export REDIS_URL=redis://redis export EMAIL_HOST=<> export EMAIL_PORT=465 export EMAIL_FROM_ADDRESS=<> +# the password is not needed +#export EMAIL_PASSWORD=<> # align with AWS export AWS_ACCESSKEY=<> From 33e20e3d91798038b6d1fea80c5fc1459d36017f Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Fri, 13 Oct 2023 14:35:28 -0500 Subject: [PATCH 10/19] removed test files --- rabbitmq/autophrase/test.py | 49 ------------------------------------ rabbitmq/autophrase/test2.py | 0 2 files changed, 49 deletions(-) delete mode 100644 rabbitmq/autophrase/test.py delete mode 100644 rabbitmq/autophrase/test2.py diff --git a/rabbitmq/autophrase/test.py b/rabbitmq/autophrase/test.py deleted file mode 100644 index 18ecc48..0000000 --- a/rabbitmq/autophrase/test.py +++ /dev/null @@ -1,49 +0,0 @@ -import html - -links = { -'single-word': 'http://141.142.218.143:9000/macroscope-smile/ywkim%40illinois.edu/NLP/autophrase/b5dbd0de-09d3-4d9a-8629-905b82ed957e/AutoPhrase_single-word.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIFVGPPZEGB5JG3UQ%2F20231012%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231012T174952Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=36dd36243b2986a89521413d1b56fd9f7533a0b861e760a10b8b481d24018167', -'model': 'http://141.142.218.143:9000/macroscope-smile/ywkim%40illinois.edu/NLP/autophrase/b5dbd0de-09d3-4d9a-8629-905b82ed957e/segmentation.model?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIFVGPPZEGB5JG3UQ%2F20231012%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231012T174952Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=17804b5e3766e1ea762660b1ada9beb9ff3bbe10f207987a78d7df38c2f4653c', -'autophrase': 'http://141.142.218.143:9000/macroscope-smile/ywkim%40illinois.edu/NLP/autophrase/b5dbd0de-09d3-4d9a-8629-905b82ed957e/AutoPhrase.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIFVGPPZEGB5JG3UQ%2F20231012%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231012T174952Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=7454ed576ba427329faf631876beb5312e26a20f2c2dd6f26579dacd836b9f3d', -'visualization': 'http://141.142.218.143:9000/macroscope-smile/ywkim%40illinois.edu/NLP/autophrase/b5dbd0de-09d3-4d9a-8629-905b82ed957e/div.html?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIFVGPPZEGB5JG3UQ%2F20231012%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231012T174952Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=e5813e686b08a879defc22a7a4c0675caadb736dab05319e48c5b2dde4260fdc', -'multi-words': 'http://141.142.218.143:9000/macroscope-smile/ywkim%40illinois.edu/NLP/autophrase/b5dbd0de-09d3-4d9a-8629-905b82ed957e/AutoPhrase_multi-words.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIFVGPPZEGB5JG3UQ%2F20231012%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231012T174952Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=38fac737f0f84d8e02a4e00374738fad678140d008c6acf8503c38b2ace3bdc3', -'token-mapping': 'http://141.142.218.143:9000/macroscope-smile/ywkim%40illinois.edu/NLP/autophrase/b5dbd0de-09d3-4d9a-8629-905b82ed957e/token_mapping.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIFVGPPZEGB5JG3UQ%2F20231012%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231012T174952Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=f8db002ab6f07ae1c84ea4c49741c2147c6926a8758739aedf8df801038cdb2f', -'config': 'http://141.142.218.143:9000/macroscope-smile/ywkim%40illinois.edu/NLP/autophrase/b5dbd0de-09d3-4d9a-8629-905b82ed957e/config.json?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIFVGPPZEGB5JG3UQ%2F20231012%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231012T174952Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=6629785ba5ad3e92f82bf38f73a1f6c8181a3e8917328642e23753ea98b8fec1' -} -fpath = ["a", "b", "c", "d"] -test_url = ["http://test.com"] - -list_html = '' -for key in links.keys(): - list_html += '
  • ' + key + '
  • ' - -print(list_html) - -html = f""" - - -
    -

    Dear user (session ID: {fpath[0]}),

    -

    Your {fpath[2]} results are ready for you! (job ID: {fpath[3]})

    -
      -
    • You can view the visualization and download the results at Past Results page in SMILE. - Go to your session. -
        -
      • Go to Past Results
      • -
      • --> under {fpath[1]} tab
      • -
      • --> click {fpath[2]}
      • -
      • --> then find {fpath[3]}
      • -
      • --> click view
      • -
      -
      -
    • You can also click the link below to download part of the results: -
        {list_html}
      -
    • -
    -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - -""" - -print(html) \ No newline at end of file diff --git a/rabbitmq/autophrase/test2.py b/rabbitmq/autophrase/test2.py deleted file mode 100644 index e69de29..0000000 From 2d5671156f714a6a2352d65974f3da3a78f8a16f Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Fri, 13 Oct 2023 14:37:25 -0500 Subject: [PATCH 11/19] commented out email password variable from regular docker compose command script --- rabbitmq/docker-command-smile.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rabbitmq/docker-command-smile.sh b/rabbitmq/docker-command-smile.sh index fcb184f..36044f1 100644 --- a/rabbitmq/docker-command-smile.sh +++ b/rabbitmq/docker-command-smile.sh @@ -38,7 +38,8 @@ export REDIS_URL=redis://redis:6379 export EMAIL_HOST=<> export EMAIL_PORT=465 export EMAIL_FROM_ADDRESS=<> -export EMAIL_PASSWORD=<> +# email password is not needed +#export EMAIL_PASSWORD=<> # align with AWS export AWS_ACCESSKEY=<> From c30135444e39e2bd042c1c6ff7813cb12a6667de Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Fri, 13 Oct 2023 14:52:31 -0500 Subject: [PATCH 12/19] commented out whole email variable block with comment --- rabbitmq/docker-command-smile-traefik.sh | 12 +++++++----- rabbitmq/docker-command-smile.sh | 12 +++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/rabbitmq/docker-command-smile-traefik.sh b/rabbitmq/docker-command-smile-traefik.sh index 0c0e311..7d7fe98 100644 --- a/rabbitmq/docker-command-smile-traefik.sh +++ b/rabbitmq/docker-command-smile-traefik.sh @@ -39,11 +39,13 @@ export RABBITMQ_HOST=${SERVER} export RABBITMQ_URL=amqp://${SERVER} export REDIS_URL=redis://redis -# email notification -export EMAIL_HOST=<> -export EMAIL_PORT=465 -export EMAIL_FROM_ADDRESS=<> -# the password is not needed +# the frontend will not ask the prompt window for asking the email +# to send when some process is done and ready +# if the following email related variables are setting +# email notification prompt will show up if the following variables are set +#export EMAIL_HOST=<> +#export EMAIL_PORT=465 +#export EMAIL_FROM_ADDRESS=<> #export EMAIL_PASSWORD=<> # align with AWS diff --git a/rabbitmq/docker-command-smile.sh b/rabbitmq/docker-command-smile.sh index 36044f1..420b45c 100644 --- a/rabbitmq/docker-command-smile.sh +++ b/rabbitmq/docker-command-smile.sh @@ -34,11 +34,13 @@ export RABBITMQ_URL=amqp://${SERVER}:5672 export RABBITMQ_HOST=${SERVER} export REDIS_URL=redis://redis:6379 -# email notification -export EMAIL_HOST=<> -export EMAIL_PORT=465 -export EMAIL_FROM_ADDRESS=<> -# email password is not needed +# the frontend will not ask the prompt window for asking the email +# to send when some process is done and ready +# if the following email related variables are setting +# email notification prompt will show up if the following variables are set +#export EMAIL_HOST=<> +#export EMAIL_PORT=465 +#export EMAIL_FROM_ADDRESS=<> #export EMAIL_PASSWORD=<> # align with AWS From 107f9d233f44f8b59e39269e74f20f5a7a17e782 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Mon, 23 Oct 2023 10:56:46 -0500 Subject: [PATCH 13/19] added google analytics key env variable --- CHANGELOG.md | 2 +- rabbitmq/docker-command-smile-traefik.sh | 3 +++ rabbitmq/docker-compose-smile-traefik.yml | 1 + rabbitmq/docker-compose-smile.yml | 1 + 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 320a2ce..fed1e4c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Docker compose launch script [45](https://github.com/ncsa/standalone-smm-analytics/issues/45) - Docker compose file using traefik [46](https://github.com/ncsa/standalone-smm-analytics/issues/46) - Environment variables for turn on off twitter and reddit [73](https://github.com/ncsa/standalone-smm-analytics/issues/73) - +- Environment variable for Google Analytics 4 [81](https://github.com/ncsa/standalone-smm-analytics/issues/81) ### Changed - Hard coded rabbimq url changed to env variable [18](https://github.com/ncsa/standalone-smm-analytics/issues/18) - Modified S3 url to env variable [21](https://github.com/ncsa/standalone-smm-analytics/issues/21) diff --git a/rabbitmq/docker-command-smile-traefik.sh b/rabbitmq/docker-command-smile-traefik.sh index 7d7fe98..af23c8a 100644 --- a/rabbitmq/docker-command-smile-traefik.sh +++ b/rabbitmq/docker-command-smile-traefik.sh @@ -18,6 +18,9 @@ export CILOGON_CLIENT_ID=<> export CILOGON_CLIENT_SECRET=<> export CILOGON_CALLBACK_URL=<> +# setting for google analytics 4 key +export GA_KEY= + # if the minio-api.server doesn't work, it should be ip address and port # export MINIO_URL=http://xxx.xxx.xxx.xxx:9000 # export MINIO_PUBLIC_ACCESS_URL=http://xxx.xxx.xxx.xxx:9000 diff --git a/rabbitmq/docker-compose-smile-traefik.yml b/rabbitmq/docker-compose-smile-traefik.yml index 2dd95c7..a94617f 100644 --- a/rabbitmq/docker-compose-smile-traefik.yml +++ b/rabbitmq/docker-compose-smile-traefik.yml @@ -351,6 +351,7 @@ services: - HOME=${HOME} - DOCKERIZED=${DOCKERIZED} - LOCAL_ALGORITHM=${LOCAL_ALGORITHM} + - GA_KEY=${GA_KEY} - MINIO_URL=${MINIO_URL} - MINIO_PUBLIC_ACCESS_URL=${MINIO_URL} - REDIS_URL=${REDIS_URL} diff --git a/rabbitmq/docker-compose-smile.yml b/rabbitmq/docker-compose-smile.yml index 4344e03..c9a6d93 100644 --- a/rabbitmq/docker-compose-smile.yml +++ b/rabbitmq/docker-compose-smile.yml @@ -320,6 +320,7 @@ services: - HOME=${HOME} - DOCKERIZED=${DOCKERIZED} - LOCAL_ALGORITHM=${LOCAL_ALGORITHM} + - GA_KEY=${GA_KEY} - MINIO_URL=${MINIO_URL} - MINIO_PUBLIC_ACCESS_URL=${MINIO_PUBLIC_ACCESS_URL} - REDIS_URL=${REDIS_URL} From 4ca78c60ff84a9a0e5329af01d07a85c5e1541c0 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Mon, 23 Oct 2023 15:01:45 -0500 Subject: [PATCH 14/19] 61 visualization in the email link doesnt work (#70) * add url encoding * update all the notifications * update the email module * notification update * >=3 instead of ==3 --- rabbitmq/autophrase/notification.py | 278 ++++++++++-------- rabbitmq/image_crawler/notification.py | 278 ++++++++++-------- .../name_entity_recognition/notification.py | 278 ++++++++++-------- rabbitmq/network_analysis/notification.py | 278 ++++++++++-------- rabbitmq/preprocessing/notification.py | 278 ++++++++++-------- rabbitmq/sentiment_analysis/notification.py | 278 ++++++++++-------- rabbitmq/topic_modeling/notification.py | 278 ++++++++++-------- 7 files changed, 1106 insertions(+), 840 deletions(-) diff --git a/rabbitmq/autophrase/notification.py b/rabbitmq/autophrase/notification.py index 12fa793..b7c647a 100644 --- a/rabbitmq/autophrase/notification.py +++ b/rabbitmq/autophrase/notification.py @@ -2,6 +2,21 @@ from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText import os +import urllib.parse + +import contextlib + +try: + from urllib.parse import urlencode + +except ImportError: + from urllib import urlencode + +try: + from urllib.request import urlopen + +except ImportError: + from urllib2 import urlopen def reformat_sessionURL(sessionURL): @@ -20,6 +35,16 @@ def reformat_sessionURL(sessionURL): return new_sessionURL +def make_tiny(url): + try: + request_url = 'http://tinyurl.com/api-create.php?' + urlencode({'url': url}) + with contextlib.closing(urlopen(request_url)) as response: + return response.read().decode('utf-8') + except Exception as e: + # If there is an error (e.g., network issue or TinyURL service is down), return the original URL + return url + + def notification(toaddr, case, filename, links, sessionURL): # toaddr -- email address to send to # text content to send @@ -62,132 +87,145 @@ def notification(toaddr, case, filename, links, sessionURL): fpath[2] = 'Python NetworkX' elif fpath[2] == 'classification': fpath[2] = 'Text Classification' + else: + fpath = [] - if case == 0 or case == 'comment-fail': - html = """ - - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your Reddit Comment collection has been terminated.

    -

    We are using the id and permalink from your Reddit Submission dataset - to collect comments and replies. It is most likely you have provide an incomplete Reddit Submission dataset missing these two fields.

    -

    Please try to reproduce the Reddit Submission with id and permalink, or switch to another dataset.

    - Go to your session... -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - - """ - subject = 'Your Reddit Comment collection has failed...' - elif case == 1 or case == 'comment-terminate': - html = """ - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your Reddit Comment collection is exceeding 400 Megabyte, and is terminated due to lack of disk space.

    -
      -
    • You have requested comments and replies for the Reddit Submission (Post):""" + \ - fpath[ - 3] + """. The partial comments we manage to collect and save will be compressed for you in an .zip file named """ + \ - fpath[3] + """-comments.zip (click)
    • -
    • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. - Go to your session. + if len(fpath) >= 3: + if case == 0 or case == 'comment-fail': + html = f""" + + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection has been terminated.

      +

      We are using the id and permalink from your Reddit Submission dataset + to collect comments and replies. It is most likely you have provide an incomplete Reddit Submission dataset missing these two fields.

      +

      Please try to reproduce the Reddit Submission with id and permalink, or switch to another dataset.

      + Go to your session... +
      +

      Best Regards,

      +

      Social Media Macroscope - SMILE

      +
      + + + """ + subject = 'Your Reddit Comment collection has failed...' + elif case == 1 or case == 'comment-terminate': + html = f""" + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection is exceeding 400 Megabyte, and is terminated due to lack of disk space.

        -
      • Go to Past Results
      • -
      • --> under """ + fpath[1] + """
      • -
      • --> click """ + fpath[2] + """
      • -
      • --> then find """ + fpath[3] + """
      • -
      • --> click VIEW
      • -
      • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
      • -
      -
        -
        -

        Best Regards,

        -

        Social Media Macroscope - SMILE

        -
      - - """ - subject = 'Your Reddit Comment collection has been terminated...' - elif case == 2 or case == 'comment-success': - html = """ - - -
      -

      Dear user (session ID: """ + fpath[0] + """),

      -

      Your Reddit Comment collection is ready for you!

      -
        -
      • You have requested comments and replies for the Reddit Submission (Post):""" + \ - fpath[ - 3] + """. It will be compressed for you in an .zip file named """ + \ - fpath[3] + """-comments.zip
      • -
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. - Go to your session. +
      • You have requested comments and replies for the Reddit Submission ( + Post):{fpath[3]}. The partial comments we manage to collect + and save will be compressed for you in an .zip file named + {fpath[3]}-comments.zip (click)
      • +
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. + Go to your session. +
          +
        • Go to Past Results
        • +
        • --> under {fpath[1]}
        • +
        • --> click {fpath[2]}
        • +
        • --> then find {fpath[3]}
        • +
        • --> click VIEW
        • +
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
          -
        • Go to Past Results
        • -
        • --> under """ + fpath[1] + """
        • -
        • --> click """ + fpath[2] + """
        • -
        • --> then find """ + fpath[3] + """
        • -
        • --> click VIEW
        • -
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
          +

          Best Regards,

          +

          Social Media Macroscope - SMILE

          +
      + + """ + subject = 'Your Reddit Comment collection has been terminated...' + elif case == 2 or case == 'comment-success': + html = f""" + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection is ready for you!

      +
        +
      • You have requested comments and replies for the Reddit Submission ( + Post):{fpath[3]}. It will be compressed for you in an .zip file named + {fpath[3]}-comments.zip
      • +
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. + Go to your session. +
          +
        • Go to Past Results
        • +
        • --> under {fpath[1]}
        • +
        • --> click {fpath[2]}
        • +
        • --> then find {fpath[3]}
        • +
        • --> click VIEW
        • +
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
      -
    -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - """ - subject = 'Your Reddit Comment collection is completed!' - elif case == 3 or case == 'analytics-success': - list_html = '' - for key in links.keys(): - list_html += '
  • ' + key + '
  • ' - - html = """ - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your """ + fpath[2] + """ results are ready for you! (job ID: """ + fpath[3] + """)

    -
      -
    • You can view the visualization and download the results at Past Results page in SMILE. - Go to your session. +
      +

      Best Regards,

      +

      Social Media Macroscope - SMILE

      +
    + + """ + subject = 'Your Reddit Comment collection is completed!' + elif case == 3 or case == 'analytics-success': + list_html = '' + for key in links.keys(): + list_html += f'
  • {key}
  • ' + + html = f""" + + +
    +

    Dear user (session ID:{fpath[0]}),

    +

    Your {fpath[2]} results are ready for you! (job ID: {fpath[3]})

      -
    • Go to Past Results
    • -
    • --> under """ + fpath[1] + """ tab
    • -
    • --> click """ + fpath[2] + """
    • -
    • --> then find """ + fpath[3] + """
    • -
    • --> click view
    • +
    • You can view the visualization and download the results at Past Results page in SMILE. + Go to your session. +
        +
      • Go to Past Results
      • +
      • --> under {fpath[1]} tab
      • +
      • --> click {fpath[2]}
      • +
      • --> then find {fpath[3]}
      • +
      • --> click view
      • +
      +
      +
    • You can also click the link below to download part of the results: +
        {list_html}
      +

    -
  • You can also click the link below to download part of the results: -
      """ + list_html + """
    -
  • - -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - """ - subject = 'Your ' + fpath[2] + ' computation is completed!' - - msg = MIMEMultipart('alternative') - msg['Subject'] = subject - msg['From'] = fromaddr - msg['To'] = toaddr - msg.attach(MIMEText(html, 'html')) - - server = smtplib.SMTP(host, int(port)) - server.starttls() - if password is not None and password != "": - server.login(fromaddr, password) - server.sendmail(fromaddr, toaddr, msg.as_string()) - server.quit() +

    Best Regards,

    +

    Social Media Macroscope - SMILE

    + + + """ + subject = f'Your{fpath[2]}computation is completed!' + else: + html = None + subject = None + print("Invalid case! Skip notification.") + + if html is not None and subject is not None: + msg = MIMEMultipart('alternative') + msg['Subject'] = subject + msg['From'] = fromaddr + msg['To'] = toaddr + msg.attach(MIMEText(html, 'html')) + + server = smtplib.SMTP(host, int(port)) + server.starttls() + if password is not None and password != "": + server.login(fromaddr, password) + server.sendmail(fromaddr, toaddr, msg.as_string()) + server.quit() + else: + print("Invalid html content! Skip notification.") + else: + print("Invalid filepath! Skip notification.") + else: print("Invalid Email host setting! Skip notification.") + diff --git a/rabbitmq/image_crawler/notification.py b/rabbitmq/image_crawler/notification.py index 12fa793..b7c647a 100644 --- a/rabbitmq/image_crawler/notification.py +++ b/rabbitmq/image_crawler/notification.py @@ -2,6 +2,21 @@ from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText import os +import urllib.parse + +import contextlib + +try: + from urllib.parse import urlencode + +except ImportError: + from urllib import urlencode + +try: + from urllib.request import urlopen + +except ImportError: + from urllib2 import urlopen def reformat_sessionURL(sessionURL): @@ -20,6 +35,16 @@ def reformat_sessionURL(sessionURL): return new_sessionURL +def make_tiny(url): + try: + request_url = 'http://tinyurl.com/api-create.php?' + urlencode({'url': url}) + with contextlib.closing(urlopen(request_url)) as response: + return response.read().decode('utf-8') + except Exception as e: + # If there is an error (e.g., network issue or TinyURL service is down), return the original URL + return url + + def notification(toaddr, case, filename, links, sessionURL): # toaddr -- email address to send to # text content to send @@ -62,132 +87,145 @@ def notification(toaddr, case, filename, links, sessionURL): fpath[2] = 'Python NetworkX' elif fpath[2] == 'classification': fpath[2] = 'Text Classification' + else: + fpath = [] - if case == 0 or case == 'comment-fail': - html = """ - - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your Reddit Comment collection has been terminated.

    -

    We are using the id and permalink from your Reddit Submission dataset - to collect comments and replies. It is most likely you have provide an incomplete Reddit Submission dataset missing these two fields.

    -

    Please try to reproduce the Reddit Submission with id and permalink, or switch to another dataset.

    - Go to your session... -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - - """ - subject = 'Your Reddit Comment collection has failed...' - elif case == 1 or case == 'comment-terminate': - html = """ - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your Reddit Comment collection is exceeding 400 Megabyte, and is terminated due to lack of disk space.

    -
      -
    • You have requested comments and replies for the Reddit Submission (Post):""" + \ - fpath[ - 3] + """. The partial comments we manage to collect and save will be compressed for you in an .zip file named """ + \ - fpath[3] + """-comments.zip (click)
    • -
    • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. - Go to your session. + if len(fpath) >= 3: + if case == 0 or case == 'comment-fail': + html = f""" + + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection has been terminated.

      +

      We are using the id and permalink from your Reddit Submission dataset + to collect comments and replies. It is most likely you have provide an incomplete Reddit Submission dataset missing these two fields.

      +

      Please try to reproduce the Reddit Submission with id and permalink, or switch to another dataset.

      + Go to your session... +
      +

      Best Regards,

      +

      Social Media Macroscope - SMILE

      +
      + + + """ + subject = 'Your Reddit Comment collection has failed...' + elif case == 1 or case == 'comment-terminate': + html = f""" + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection is exceeding 400 Megabyte, and is terminated due to lack of disk space.

        -
      • Go to Past Results
      • -
      • --> under """ + fpath[1] + """
      • -
      • --> click """ + fpath[2] + """
      • -
      • --> then find """ + fpath[3] + """
      • -
      • --> click VIEW
      • -
      • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
      • -
      -
        -
        -

        Best Regards,

        -

        Social Media Macroscope - SMILE

        -
      - - """ - subject = 'Your Reddit Comment collection has been terminated...' - elif case == 2 or case == 'comment-success': - html = """ - - -
      -

      Dear user (session ID: """ + fpath[0] + """),

      -

      Your Reddit Comment collection is ready for you!

      -
        -
      • You have requested comments and replies for the Reddit Submission (Post):""" + \ - fpath[ - 3] + """. It will be compressed for you in an .zip file named """ + \ - fpath[3] + """-comments.zip
      • -
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. - Go to your session. +
      • You have requested comments and replies for the Reddit Submission ( + Post):{fpath[3]}. The partial comments we manage to collect + and save will be compressed for you in an .zip file named + {fpath[3]}-comments.zip (click)
      • +
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. + Go to your session. +
          +
        • Go to Past Results
        • +
        • --> under {fpath[1]}
        • +
        • --> click {fpath[2]}
        • +
        • --> then find {fpath[3]}
        • +
        • --> click VIEW
        • +
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
          -
        • Go to Past Results
        • -
        • --> under """ + fpath[1] + """
        • -
        • --> click """ + fpath[2] + """
        • -
        • --> then find """ + fpath[3] + """
        • -
        • --> click VIEW
        • -
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
          +

          Best Regards,

          +

          Social Media Macroscope - SMILE

          +
      + + """ + subject = 'Your Reddit Comment collection has been terminated...' + elif case == 2 or case == 'comment-success': + html = f""" + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection is ready for you!

      +
        +
      • You have requested comments and replies for the Reddit Submission ( + Post):{fpath[3]}. It will be compressed for you in an .zip file named + {fpath[3]}-comments.zip
      • +
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. + Go to your session. +
          +
        • Go to Past Results
        • +
        • --> under {fpath[1]}
        • +
        • --> click {fpath[2]}
        • +
        • --> then find {fpath[3]}
        • +
        • --> click VIEW
        • +
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
      -
    -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - """ - subject = 'Your Reddit Comment collection is completed!' - elif case == 3 or case == 'analytics-success': - list_html = '' - for key in links.keys(): - list_html += '
  • ' + key + '
  • ' - - html = """ - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your """ + fpath[2] + """ results are ready for you! (job ID: """ + fpath[3] + """)

    -
      -
    • You can view the visualization and download the results at Past Results page in SMILE. - Go to your session. +
      +

      Best Regards,

      +

      Social Media Macroscope - SMILE

      +
    + + """ + subject = 'Your Reddit Comment collection is completed!' + elif case == 3 or case == 'analytics-success': + list_html = '' + for key in links.keys(): + list_html += f'
  • {key}
  • ' + + html = f""" + + +
    +

    Dear user (session ID:{fpath[0]}),

    +

    Your {fpath[2]} results are ready for you! (job ID: {fpath[3]})

      -
    • Go to Past Results
    • -
    • --> under """ + fpath[1] + """ tab
    • -
    • --> click """ + fpath[2] + """
    • -
    • --> then find """ + fpath[3] + """
    • -
    • --> click view
    • +
    • You can view the visualization and download the results at Past Results page in SMILE. + Go to your session. +
        +
      • Go to Past Results
      • +
      • --> under {fpath[1]} tab
      • +
      • --> click {fpath[2]}
      • +
      • --> then find {fpath[3]}
      • +
      • --> click view
      • +
      +
      +
    • You can also click the link below to download part of the results: +
        {list_html}
      +

    -
  • You can also click the link below to download part of the results: -
      """ + list_html + """
    -
  • - -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - """ - subject = 'Your ' + fpath[2] + ' computation is completed!' - - msg = MIMEMultipart('alternative') - msg['Subject'] = subject - msg['From'] = fromaddr - msg['To'] = toaddr - msg.attach(MIMEText(html, 'html')) - - server = smtplib.SMTP(host, int(port)) - server.starttls() - if password is not None and password != "": - server.login(fromaddr, password) - server.sendmail(fromaddr, toaddr, msg.as_string()) - server.quit() +

    Best Regards,

    +

    Social Media Macroscope - SMILE

    + + + """ + subject = f'Your{fpath[2]}computation is completed!' + else: + html = None + subject = None + print("Invalid case! Skip notification.") + + if html is not None and subject is not None: + msg = MIMEMultipart('alternative') + msg['Subject'] = subject + msg['From'] = fromaddr + msg['To'] = toaddr + msg.attach(MIMEText(html, 'html')) + + server = smtplib.SMTP(host, int(port)) + server.starttls() + if password is not None and password != "": + server.login(fromaddr, password) + server.sendmail(fromaddr, toaddr, msg.as_string()) + server.quit() + else: + print("Invalid html content! Skip notification.") + else: + print("Invalid filepath! Skip notification.") + else: print("Invalid Email host setting! Skip notification.") + diff --git a/rabbitmq/name_entity_recognition/notification.py b/rabbitmq/name_entity_recognition/notification.py index 12fa793..b7c647a 100644 --- a/rabbitmq/name_entity_recognition/notification.py +++ b/rabbitmq/name_entity_recognition/notification.py @@ -2,6 +2,21 @@ from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText import os +import urllib.parse + +import contextlib + +try: + from urllib.parse import urlencode + +except ImportError: + from urllib import urlencode + +try: + from urllib.request import urlopen + +except ImportError: + from urllib2 import urlopen def reformat_sessionURL(sessionURL): @@ -20,6 +35,16 @@ def reformat_sessionURL(sessionURL): return new_sessionURL +def make_tiny(url): + try: + request_url = 'http://tinyurl.com/api-create.php?' + urlencode({'url': url}) + with contextlib.closing(urlopen(request_url)) as response: + return response.read().decode('utf-8') + except Exception as e: + # If there is an error (e.g., network issue or TinyURL service is down), return the original URL + return url + + def notification(toaddr, case, filename, links, sessionURL): # toaddr -- email address to send to # text content to send @@ -62,132 +87,145 @@ def notification(toaddr, case, filename, links, sessionURL): fpath[2] = 'Python NetworkX' elif fpath[2] == 'classification': fpath[2] = 'Text Classification' + else: + fpath = [] - if case == 0 or case == 'comment-fail': - html = """ - - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your Reddit Comment collection has been terminated.

    -

    We are using the id and permalink from your Reddit Submission dataset - to collect comments and replies. It is most likely you have provide an incomplete Reddit Submission dataset missing these two fields.

    -

    Please try to reproduce the Reddit Submission with id and permalink, or switch to another dataset.

    - Go to your session... -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - - """ - subject = 'Your Reddit Comment collection has failed...' - elif case == 1 or case == 'comment-terminate': - html = """ - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your Reddit Comment collection is exceeding 400 Megabyte, and is terminated due to lack of disk space.

    -
      -
    • You have requested comments and replies for the Reddit Submission (Post):""" + \ - fpath[ - 3] + """. The partial comments we manage to collect and save will be compressed for you in an .zip file named """ + \ - fpath[3] + """-comments.zip (click)
    • -
    • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. - Go to your session. + if len(fpath) >= 3: + if case == 0 or case == 'comment-fail': + html = f""" + + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection has been terminated.

      +

      We are using the id and permalink from your Reddit Submission dataset + to collect comments and replies. It is most likely you have provide an incomplete Reddit Submission dataset missing these two fields.

      +

      Please try to reproduce the Reddit Submission with id and permalink, or switch to another dataset.

      + Go to your session... +
      +

      Best Regards,

      +

      Social Media Macroscope - SMILE

      +
      + + + """ + subject = 'Your Reddit Comment collection has failed...' + elif case == 1 or case == 'comment-terminate': + html = f""" + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection is exceeding 400 Megabyte, and is terminated due to lack of disk space.

        -
      • Go to Past Results
      • -
      • --> under """ + fpath[1] + """
      • -
      • --> click """ + fpath[2] + """
      • -
      • --> then find """ + fpath[3] + """
      • -
      • --> click VIEW
      • -
      • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
      • -
      -
        -
        -

        Best Regards,

        -

        Social Media Macroscope - SMILE

        -
      - - """ - subject = 'Your Reddit Comment collection has been terminated...' - elif case == 2 or case == 'comment-success': - html = """ - - -
      -

      Dear user (session ID: """ + fpath[0] + """),

      -

      Your Reddit Comment collection is ready for you!

      -
        -
      • You have requested comments and replies for the Reddit Submission (Post):""" + \ - fpath[ - 3] + """. It will be compressed for you in an .zip file named """ + \ - fpath[3] + """-comments.zip
      • -
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. - Go to your session. +
      • You have requested comments and replies for the Reddit Submission ( + Post):{fpath[3]}. The partial comments we manage to collect + and save will be compressed for you in an .zip file named + {fpath[3]}-comments.zip (click)
      • +
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. + Go to your session. +
          +
        • Go to Past Results
        • +
        • --> under {fpath[1]}
        • +
        • --> click {fpath[2]}
        • +
        • --> then find {fpath[3]}
        • +
        • --> click VIEW
        • +
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
          -
        • Go to Past Results
        • -
        • --> under """ + fpath[1] + """
        • -
        • --> click """ + fpath[2] + """
        • -
        • --> then find """ + fpath[3] + """
        • -
        • --> click VIEW
        • -
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
          +

          Best Regards,

          +

          Social Media Macroscope - SMILE

          +
      + + """ + subject = 'Your Reddit Comment collection has been terminated...' + elif case == 2 or case == 'comment-success': + html = f""" + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection is ready for you!

      +
        +
      • You have requested comments and replies for the Reddit Submission ( + Post):{fpath[3]}. It will be compressed for you in an .zip file named + {fpath[3]}-comments.zip
      • +
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. + Go to your session. +
          +
        • Go to Past Results
        • +
        • --> under {fpath[1]}
        • +
        • --> click {fpath[2]}
        • +
        • --> then find {fpath[3]}
        • +
        • --> click VIEW
        • +
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
      -
    -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - """ - subject = 'Your Reddit Comment collection is completed!' - elif case == 3 or case == 'analytics-success': - list_html = '' - for key in links.keys(): - list_html += '
  • ' + key + '
  • ' - - html = """ - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your """ + fpath[2] + """ results are ready for you! (job ID: """ + fpath[3] + """)

    -
      -
    • You can view the visualization and download the results at Past Results page in SMILE. - Go to your session. +
      +

      Best Regards,

      +

      Social Media Macroscope - SMILE

      +
    + + """ + subject = 'Your Reddit Comment collection is completed!' + elif case == 3 or case == 'analytics-success': + list_html = '' + for key in links.keys(): + list_html += f'
  • {key}
  • ' + + html = f""" + + +
    +

    Dear user (session ID:{fpath[0]}),

    +

    Your {fpath[2]} results are ready for you! (job ID: {fpath[3]})

      -
    • Go to Past Results
    • -
    • --> under """ + fpath[1] + """ tab
    • -
    • --> click """ + fpath[2] + """
    • -
    • --> then find """ + fpath[3] + """
    • -
    • --> click view
    • +
    • You can view the visualization and download the results at Past Results page in SMILE. + Go to your session. +
        +
      • Go to Past Results
      • +
      • --> under {fpath[1]} tab
      • +
      • --> click {fpath[2]}
      • +
      • --> then find {fpath[3]}
      • +
      • --> click view
      • +
      +
      +
    • You can also click the link below to download part of the results: +
        {list_html}
      +

    -
  • You can also click the link below to download part of the results: -
      """ + list_html + """
    -
  • - -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - """ - subject = 'Your ' + fpath[2] + ' computation is completed!' - - msg = MIMEMultipart('alternative') - msg['Subject'] = subject - msg['From'] = fromaddr - msg['To'] = toaddr - msg.attach(MIMEText(html, 'html')) - - server = smtplib.SMTP(host, int(port)) - server.starttls() - if password is not None and password != "": - server.login(fromaddr, password) - server.sendmail(fromaddr, toaddr, msg.as_string()) - server.quit() +

    Best Regards,

    +

    Social Media Macroscope - SMILE

    + + + """ + subject = f'Your{fpath[2]}computation is completed!' + else: + html = None + subject = None + print("Invalid case! Skip notification.") + + if html is not None and subject is not None: + msg = MIMEMultipart('alternative') + msg['Subject'] = subject + msg['From'] = fromaddr + msg['To'] = toaddr + msg.attach(MIMEText(html, 'html')) + + server = smtplib.SMTP(host, int(port)) + server.starttls() + if password is not None and password != "": + server.login(fromaddr, password) + server.sendmail(fromaddr, toaddr, msg.as_string()) + server.quit() + else: + print("Invalid html content! Skip notification.") + else: + print("Invalid filepath! Skip notification.") + else: print("Invalid Email host setting! Skip notification.") + diff --git a/rabbitmq/network_analysis/notification.py b/rabbitmq/network_analysis/notification.py index 12fa793..b7c647a 100644 --- a/rabbitmq/network_analysis/notification.py +++ b/rabbitmq/network_analysis/notification.py @@ -2,6 +2,21 @@ from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText import os +import urllib.parse + +import contextlib + +try: + from urllib.parse import urlencode + +except ImportError: + from urllib import urlencode + +try: + from urllib.request import urlopen + +except ImportError: + from urllib2 import urlopen def reformat_sessionURL(sessionURL): @@ -20,6 +35,16 @@ def reformat_sessionURL(sessionURL): return new_sessionURL +def make_tiny(url): + try: + request_url = 'http://tinyurl.com/api-create.php?' + urlencode({'url': url}) + with contextlib.closing(urlopen(request_url)) as response: + return response.read().decode('utf-8') + except Exception as e: + # If there is an error (e.g., network issue or TinyURL service is down), return the original URL + return url + + def notification(toaddr, case, filename, links, sessionURL): # toaddr -- email address to send to # text content to send @@ -62,132 +87,145 @@ def notification(toaddr, case, filename, links, sessionURL): fpath[2] = 'Python NetworkX' elif fpath[2] == 'classification': fpath[2] = 'Text Classification' + else: + fpath = [] - if case == 0 or case == 'comment-fail': - html = """ - - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your Reddit Comment collection has been terminated.

    -

    We are using the id and permalink from your Reddit Submission dataset - to collect comments and replies. It is most likely you have provide an incomplete Reddit Submission dataset missing these two fields.

    -

    Please try to reproduce the Reddit Submission with id and permalink, or switch to another dataset.

    - Go to your session... -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - - """ - subject = 'Your Reddit Comment collection has failed...' - elif case == 1 or case == 'comment-terminate': - html = """ - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your Reddit Comment collection is exceeding 400 Megabyte, and is terminated due to lack of disk space.

    -
      -
    • You have requested comments and replies for the Reddit Submission (Post):""" + \ - fpath[ - 3] + """. The partial comments we manage to collect and save will be compressed for you in an .zip file named """ + \ - fpath[3] + """-comments.zip (click)
    • -
    • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. - Go to your session. + if len(fpath) >= 3: + if case == 0 or case == 'comment-fail': + html = f""" + + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection has been terminated.

      +

      We are using the id and permalink from your Reddit Submission dataset + to collect comments and replies. It is most likely you have provide an incomplete Reddit Submission dataset missing these two fields.

      +

      Please try to reproduce the Reddit Submission with id and permalink, or switch to another dataset.

      + Go to your session... +
      +

      Best Regards,

      +

      Social Media Macroscope - SMILE

      +
      + + + """ + subject = 'Your Reddit Comment collection has failed...' + elif case == 1 or case == 'comment-terminate': + html = f""" + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection is exceeding 400 Megabyte, and is terminated due to lack of disk space.

        -
      • Go to Past Results
      • -
      • --> under """ + fpath[1] + """
      • -
      • --> click """ + fpath[2] + """
      • -
      • --> then find """ + fpath[3] + """
      • -
      • --> click VIEW
      • -
      • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
      • -
      -
        -
        -

        Best Regards,

        -

        Social Media Macroscope - SMILE

        -
      - - """ - subject = 'Your Reddit Comment collection has been terminated...' - elif case == 2 or case == 'comment-success': - html = """ - - -
      -

      Dear user (session ID: """ + fpath[0] + """),

      -

      Your Reddit Comment collection is ready for you!

      -
        -
      • You have requested comments and replies for the Reddit Submission (Post):""" + \ - fpath[ - 3] + """. It will be compressed for you in an .zip file named """ + \ - fpath[3] + """-comments.zip
      • -
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. - Go to your session. +
      • You have requested comments and replies for the Reddit Submission ( + Post):{fpath[3]}. The partial comments we manage to collect + and save will be compressed for you in an .zip file named + {fpath[3]}-comments.zip (click)
      • +
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. + Go to your session. +
          +
        • Go to Past Results
        • +
        • --> under {fpath[1]}
        • +
        • --> click {fpath[2]}
        • +
        • --> then find {fpath[3]}
        • +
        • --> click VIEW
        • +
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
          -
        • Go to Past Results
        • -
        • --> under """ + fpath[1] + """
        • -
        • --> click """ + fpath[2] + """
        • -
        • --> then find """ + fpath[3] + """
        • -
        • --> click VIEW
        • -
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
          +

          Best Regards,

          +

          Social Media Macroscope - SMILE

          +
      + + """ + subject = 'Your Reddit Comment collection has been terminated...' + elif case == 2 or case == 'comment-success': + html = f""" + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection is ready for you!

      +
        +
      • You have requested comments and replies for the Reddit Submission ( + Post):{fpath[3]}. It will be compressed for you in an .zip file named + {fpath[3]}-comments.zip
      • +
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. + Go to your session. +
          +
        • Go to Past Results
        • +
        • --> under {fpath[1]}
        • +
        • --> click {fpath[2]}
        • +
        • --> then find {fpath[3]}
        • +
        • --> click VIEW
        • +
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
      -
    -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - """ - subject = 'Your Reddit Comment collection is completed!' - elif case == 3 or case == 'analytics-success': - list_html = '' - for key in links.keys(): - list_html += '
  • ' + key + '
  • ' - - html = """ - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your """ + fpath[2] + """ results are ready for you! (job ID: """ + fpath[3] + """)

    -
      -
    • You can view the visualization and download the results at Past Results page in SMILE. - Go to your session. +
      +

      Best Regards,

      +

      Social Media Macroscope - SMILE

      +
    + + """ + subject = 'Your Reddit Comment collection is completed!' + elif case == 3 or case == 'analytics-success': + list_html = '' + for key in links.keys(): + list_html += f'
  • {key}
  • ' + + html = f""" + + +
    +

    Dear user (session ID:{fpath[0]}),

    +

    Your {fpath[2]} results are ready for you! (job ID: {fpath[3]})

      -
    • Go to Past Results
    • -
    • --> under """ + fpath[1] + """ tab
    • -
    • --> click """ + fpath[2] + """
    • -
    • --> then find """ + fpath[3] + """
    • -
    • --> click view
    • +
    • You can view the visualization and download the results at Past Results page in SMILE. + Go to your session. +
        +
      • Go to Past Results
      • +
      • --> under {fpath[1]} tab
      • +
      • --> click {fpath[2]}
      • +
      • --> then find {fpath[3]}
      • +
      • --> click view
      • +
      +
      +
    • You can also click the link below to download part of the results: +
        {list_html}
      +

    -
  • You can also click the link below to download part of the results: -
      """ + list_html + """
    -
  • - -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - """ - subject = 'Your ' + fpath[2] + ' computation is completed!' - - msg = MIMEMultipart('alternative') - msg['Subject'] = subject - msg['From'] = fromaddr - msg['To'] = toaddr - msg.attach(MIMEText(html, 'html')) - - server = smtplib.SMTP(host, int(port)) - server.starttls() - if password is not None and password != "": - server.login(fromaddr, password) - server.sendmail(fromaddr, toaddr, msg.as_string()) - server.quit() +

    Best Regards,

    +

    Social Media Macroscope - SMILE

    + + + """ + subject = f'Your{fpath[2]}computation is completed!' + else: + html = None + subject = None + print("Invalid case! Skip notification.") + + if html is not None and subject is not None: + msg = MIMEMultipart('alternative') + msg['Subject'] = subject + msg['From'] = fromaddr + msg['To'] = toaddr + msg.attach(MIMEText(html, 'html')) + + server = smtplib.SMTP(host, int(port)) + server.starttls() + if password is not None and password != "": + server.login(fromaddr, password) + server.sendmail(fromaddr, toaddr, msg.as_string()) + server.quit() + else: + print("Invalid html content! Skip notification.") + else: + print("Invalid filepath! Skip notification.") + else: print("Invalid Email host setting! Skip notification.") + diff --git a/rabbitmq/preprocessing/notification.py b/rabbitmq/preprocessing/notification.py index 12fa793..b7c647a 100644 --- a/rabbitmq/preprocessing/notification.py +++ b/rabbitmq/preprocessing/notification.py @@ -2,6 +2,21 @@ from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText import os +import urllib.parse + +import contextlib + +try: + from urllib.parse import urlencode + +except ImportError: + from urllib import urlencode + +try: + from urllib.request import urlopen + +except ImportError: + from urllib2 import urlopen def reformat_sessionURL(sessionURL): @@ -20,6 +35,16 @@ def reformat_sessionURL(sessionURL): return new_sessionURL +def make_tiny(url): + try: + request_url = 'http://tinyurl.com/api-create.php?' + urlencode({'url': url}) + with contextlib.closing(urlopen(request_url)) as response: + return response.read().decode('utf-8') + except Exception as e: + # If there is an error (e.g., network issue or TinyURL service is down), return the original URL + return url + + def notification(toaddr, case, filename, links, sessionURL): # toaddr -- email address to send to # text content to send @@ -62,132 +87,145 @@ def notification(toaddr, case, filename, links, sessionURL): fpath[2] = 'Python NetworkX' elif fpath[2] == 'classification': fpath[2] = 'Text Classification' + else: + fpath = [] - if case == 0 or case == 'comment-fail': - html = """ - - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your Reddit Comment collection has been terminated.

    -

    We are using the id and permalink from your Reddit Submission dataset - to collect comments and replies. It is most likely you have provide an incomplete Reddit Submission dataset missing these two fields.

    -

    Please try to reproduce the Reddit Submission with id and permalink, or switch to another dataset.

    - Go to your session... -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - - """ - subject = 'Your Reddit Comment collection has failed...' - elif case == 1 or case == 'comment-terminate': - html = """ - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your Reddit Comment collection is exceeding 400 Megabyte, and is terminated due to lack of disk space.

    -
      -
    • You have requested comments and replies for the Reddit Submission (Post):""" + \ - fpath[ - 3] + """. The partial comments we manage to collect and save will be compressed for you in an .zip file named """ + \ - fpath[3] + """-comments.zip (click)
    • -
    • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. - Go to your session. + if len(fpath) >= 3: + if case == 0 or case == 'comment-fail': + html = f""" + + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection has been terminated.

      +

      We are using the id and permalink from your Reddit Submission dataset + to collect comments and replies. It is most likely you have provide an incomplete Reddit Submission dataset missing these two fields.

      +

      Please try to reproduce the Reddit Submission with id and permalink, or switch to another dataset.

      + Go to your session... +
      +

      Best Regards,

      +

      Social Media Macroscope - SMILE

      +
      + + + """ + subject = 'Your Reddit Comment collection has failed...' + elif case == 1 or case == 'comment-terminate': + html = f""" + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection is exceeding 400 Megabyte, and is terminated due to lack of disk space.

        -
      • Go to Past Results
      • -
      • --> under """ + fpath[1] + """
      • -
      • --> click """ + fpath[2] + """
      • -
      • --> then find """ + fpath[3] + """
      • -
      • --> click VIEW
      • -
      • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
      • -
      -
        -
        -

        Best Regards,

        -

        Social Media Macroscope - SMILE

        -
      - - """ - subject = 'Your Reddit Comment collection has been terminated...' - elif case == 2 or case == 'comment-success': - html = """ - - -
      -

      Dear user (session ID: """ + fpath[0] + """),

      -

      Your Reddit Comment collection is ready for you!

      -
        -
      • You have requested comments and replies for the Reddit Submission (Post):""" + \ - fpath[ - 3] + """. It will be compressed for you in an .zip file named """ + \ - fpath[3] + """-comments.zip
      • -
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. - Go to your session. +
      • You have requested comments and replies for the Reddit Submission ( + Post):{fpath[3]}. The partial comments we manage to collect + and save will be compressed for you in an .zip file named + {fpath[3]}-comments.zip (click)
      • +
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. + Go to your session. +
          +
        • Go to Past Results
        • +
        • --> under {fpath[1]}
        • +
        • --> click {fpath[2]}
        • +
        • --> then find {fpath[3]}
        • +
        • --> click VIEW
        • +
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
          -
        • Go to Past Results
        • -
        • --> under """ + fpath[1] + """
        • -
        • --> click """ + fpath[2] + """
        • -
        • --> then find """ + fpath[3] + """
        • -
        • --> click VIEW
        • -
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
          +

          Best Regards,

          +

          Social Media Macroscope - SMILE

          +
      + + """ + subject = 'Your Reddit Comment collection has been terminated...' + elif case == 2 or case == 'comment-success': + html = f""" + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection is ready for you!

      +
        +
      • You have requested comments and replies for the Reddit Submission ( + Post):{fpath[3]}. It will be compressed for you in an .zip file named + {fpath[3]}-comments.zip
      • +
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. + Go to your session. +
          +
        • Go to Past Results
        • +
        • --> under {fpath[1]}
        • +
        • --> click {fpath[2]}
        • +
        • --> then find {fpath[3]}
        • +
        • --> click VIEW
        • +
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
      -
    -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - """ - subject = 'Your Reddit Comment collection is completed!' - elif case == 3 or case == 'analytics-success': - list_html = '' - for key in links.keys(): - list_html += '
  • ' + key + '
  • ' - - html = """ - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your """ + fpath[2] + """ results are ready for you! (job ID: """ + fpath[3] + """)

    -
      -
    • You can view the visualization and download the results at Past Results page in SMILE. - Go to your session. +
      +

      Best Regards,

      +

      Social Media Macroscope - SMILE

      +
    + + """ + subject = 'Your Reddit Comment collection is completed!' + elif case == 3 or case == 'analytics-success': + list_html = '' + for key in links.keys(): + list_html += f'
  • {key}
  • ' + + html = f""" + + +
    +

    Dear user (session ID:{fpath[0]}),

    +

    Your {fpath[2]} results are ready for you! (job ID: {fpath[3]})

      -
    • Go to Past Results
    • -
    • --> under """ + fpath[1] + """ tab
    • -
    • --> click """ + fpath[2] + """
    • -
    • --> then find """ + fpath[3] + """
    • -
    • --> click view
    • +
    • You can view the visualization and download the results at Past Results page in SMILE. + Go to your session. +
        +
      • Go to Past Results
      • +
      • --> under {fpath[1]} tab
      • +
      • --> click {fpath[2]}
      • +
      • --> then find {fpath[3]}
      • +
      • --> click view
      • +
      +
      +
    • You can also click the link below to download part of the results: +
        {list_html}
      +

    -
  • You can also click the link below to download part of the results: -
      """ + list_html + """
    -
  • - -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - """ - subject = 'Your ' + fpath[2] + ' computation is completed!' - - msg = MIMEMultipart('alternative') - msg['Subject'] = subject - msg['From'] = fromaddr - msg['To'] = toaddr - msg.attach(MIMEText(html, 'html')) - - server = smtplib.SMTP(host, int(port)) - server.starttls() - if password is not None and password != "": - server.login(fromaddr, password) - server.sendmail(fromaddr, toaddr, msg.as_string()) - server.quit() +

    Best Regards,

    +

    Social Media Macroscope - SMILE

    + + + """ + subject = f'Your{fpath[2]}computation is completed!' + else: + html = None + subject = None + print("Invalid case! Skip notification.") + + if html is not None and subject is not None: + msg = MIMEMultipart('alternative') + msg['Subject'] = subject + msg['From'] = fromaddr + msg['To'] = toaddr + msg.attach(MIMEText(html, 'html')) + + server = smtplib.SMTP(host, int(port)) + server.starttls() + if password is not None and password != "": + server.login(fromaddr, password) + server.sendmail(fromaddr, toaddr, msg.as_string()) + server.quit() + else: + print("Invalid html content! Skip notification.") + else: + print("Invalid filepath! Skip notification.") + else: print("Invalid Email host setting! Skip notification.") + diff --git a/rabbitmq/sentiment_analysis/notification.py b/rabbitmq/sentiment_analysis/notification.py index 12fa793..b7c647a 100644 --- a/rabbitmq/sentiment_analysis/notification.py +++ b/rabbitmq/sentiment_analysis/notification.py @@ -2,6 +2,21 @@ from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText import os +import urllib.parse + +import contextlib + +try: + from urllib.parse import urlencode + +except ImportError: + from urllib import urlencode + +try: + from urllib.request import urlopen + +except ImportError: + from urllib2 import urlopen def reformat_sessionURL(sessionURL): @@ -20,6 +35,16 @@ def reformat_sessionURL(sessionURL): return new_sessionURL +def make_tiny(url): + try: + request_url = 'http://tinyurl.com/api-create.php?' + urlencode({'url': url}) + with contextlib.closing(urlopen(request_url)) as response: + return response.read().decode('utf-8') + except Exception as e: + # If there is an error (e.g., network issue or TinyURL service is down), return the original URL + return url + + def notification(toaddr, case, filename, links, sessionURL): # toaddr -- email address to send to # text content to send @@ -62,132 +87,145 @@ def notification(toaddr, case, filename, links, sessionURL): fpath[2] = 'Python NetworkX' elif fpath[2] == 'classification': fpath[2] = 'Text Classification' + else: + fpath = [] - if case == 0 or case == 'comment-fail': - html = """ - - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your Reddit Comment collection has been terminated.

    -

    We are using the id and permalink from your Reddit Submission dataset - to collect comments and replies. It is most likely you have provide an incomplete Reddit Submission dataset missing these two fields.

    -

    Please try to reproduce the Reddit Submission with id and permalink, or switch to another dataset.

    - Go to your session... -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - - """ - subject = 'Your Reddit Comment collection has failed...' - elif case == 1 or case == 'comment-terminate': - html = """ - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your Reddit Comment collection is exceeding 400 Megabyte, and is terminated due to lack of disk space.

    -
      -
    • You have requested comments and replies for the Reddit Submission (Post):""" + \ - fpath[ - 3] + """. The partial comments we manage to collect and save will be compressed for you in an .zip file named """ + \ - fpath[3] + """-comments.zip (click)
    • -
    • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. - Go to your session. + if len(fpath) >= 3: + if case == 0 or case == 'comment-fail': + html = f""" + + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection has been terminated.

      +

      We are using the id and permalink from your Reddit Submission dataset + to collect comments and replies. It is most likely you have provide an incomplete Reddit Submission dataset missing these two fields.

      +

      Please try to reproduce the Reddit Submission with id and permalink, or switch to another dataset.

      + Go to your session... +
      +

      Best Regards,

      +

      Social Media Macroscope - SMILE

      +
      + + + """ + subject = 'Your Reddit Comment collection has failed...' + elif case == 1 or case == 'comment-terminate': + html = f""" + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection is exceeding 400 Megabyte, and is terminated due to lack of disk space.

        -
      • Go to Past Results
      • -
      • --> under """ + fpath[1] + """
      • -
      • --> click """ + fpath[2] + """
      • -
      • --> then find """ + fpath[3] + """
      • -
      • --> click VIEW
      • -
      • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
      • -
      -
        -
        -

        Best Regards,

        -

        Social Media Macroscope - SMILE

        -
      - - """ - subject = 'Your Reddit Comment collection has been terminated...' - elif case == 2 or case == 'comment-success': - html = """ - - -
      -

      Dear user (session ID: """ + fpath[0] + """),

      -

      Your Reddit Comment collection is ready for you!

      -
        -
      • You have requested comments and replies for the Reddit Submission (Post):""" + \ - fpath[ - 3] + """. It will be compressed for you in an .zip file named """ + \ - fpath[3] + """-comments.zip
      • -
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. - Go to your session. +
      • You have requested comments and replies for the Reddit Submission ( + Post):{fpath[3]}. The partial comments we manage to collect + and save will be compressed for you in an .zip file named + {fpath[3]}-comments.zip (click)
      • +
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. + Go to your session. +
          +
        • Go to Past Results
        • +
        • --> under {fpath[1]}
        • +
        • --> click {fpath[2]}
        • +
        • --> then find {fpath[3]}
        • +
        • --> click VIEW
        • +
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
          -
        • Go to Past Results
        • -
        • --> under """ + fpath[1] + """
        • -
        • --> click """ + fpath[2] + """
        • -
        • --> then find """ + fpath[3] + """
        • -
        • --> click VIEW
        • -
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
          +

          Best Regards,

          +

          Social Media Macroscope - SMILE

          +
      + + """ + subject = 'Your Reddit Comment collection has been terminated...' + elif case == 2 or case == 'comment-success': + html = f""" + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection is ready for you!

      +
        +
      • You have requested comments and replies for the Reddit Submission ( + Post):{fpath[3]}. It will be compressed for you in an .zip file named + {fpath[3]}-comments.zip
      • +
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. + Go to your session. +
          +
        • Go to Past Results
        • +
        • --> under {fpath[1]}
        • +
        • --> click {fpath[2]}
        • +
        • --> then find {fpath[3]}
        • +
        • --> click VIEW
        • +
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
      -
    -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - """ - subject = 'Your Reddit Comment collection is completed!' - elif case == 3 or case == 'analytics-success': - list_html = '' - for key in links.keys(): - list_html += '
  • ' + key + '
  • ' - - html = """ - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your """ + fpath[2] + """ results are ready for you! (job ID: """ + fpath[3] + """)

    -
      -
    • You can view the visualization and download the results at Past Results page in SMILE. - Go to your session. +
      +

      Best Regards,

      +

      Social Media Macroscope - SMILE

      +
    + + """ + subject = 'Your Reddit Comment collection is completed!' + elif case == 3 or case == 'analytics-success': + list_html = '' + for key in links.keys(): + list_html += f'
  • {key}
  • ' + + html = f""" + + +
    +

    Dear user (session ID:{fpath[0]}),

    +

    Your {fpath[2]} results are ready for you! (job ID: {fpath[3]})

      -
    • Go to Past Results
    • -
    • --> under """ + fpath[1] + """ tab
    • -
    • --> click """ + fpath[2] + """
    • -
    • --> then find """ + fpath[3] + """
    • -
    • --> click view
    • +
    • You can view the visualization and download the results at Past Results page in SMILE. + Go to your session. +
        +
      • Go to Past Results
      • +
      • --> under {fpath[1]} tab
      • +
      • --> click {fpath[2]}
      • +
      • --> then find {fpath[3]}
      • +
      • --> click view
      • +
      +
      +
    • You can also click the link below to download part of the results: +
        {list_html}
      +

    -
  • You can also click the link below to download part of the results: -
      """ + list_html + """
    -
  • - -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - """ - subject = 'Your ' + fpath[2] + ' computation is completed!' - - msg = MIMEMultipart('alternative') - msg['Subject'] = subject - msg['From'] = fromaddr - msg['To'] = toaddr - msg.attach(MIMEText(html, 'html')) - - server = smtplib.SMTP(host, int(port)) - server.starttls() - if password is not None and password != "": - server.login(fromaddr, password) - server.sendmail(fromaddr, toaddr, msg.as_string()) - server.quit() +

    Best Regards,

    +

    Social Media Macroscope - SMILE

    + + + """ + subject = f'Your{fpath[2]}computation is completed!' + else: + html = None + subject = None + print("Invalid case! Skip notification.") + + if html is not None and subject is not None: + msg = MIMEMultipart('alternative') + msg['Subject'] = subject + msg['From'] = fromaddr + msg['To'] = toaddr + msg.attach(MIMEText(html, 'html')) + + server = smtplib.SMTP(host, int(port)) + server.starttls() + if password is not None and password != "": + server.login(fromaddr, password) + server.sendmail(fromaddr, toaddr, msg.as_string()) + server.quit() + else: + print("Invalid html content! Skip notification.") + else: + print("Invalid filepath! Skip notification.") + else: print("Invalid Email host setting! Skip notification.") + diff --git a/rabbitmq/topic_modeling/notification.py b/rabbitmq/topic_modeling/notification.py index 12fa793..b7c647a 100644 --- a/rabbitmq/topic_modeling/notification.py +++ b/rabbitmq/topic_modeling/notification.py @@ -2,6 +2,21 @@ from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText import os +import urllib.parse + +import contextlib + +try: + from urllib.parse import urlencode + +except ImportError: + from urllib import urlencode + +try: + from urllib.request import urlopen + +except ImportError: + from urllib2 import urlopen def reformat_sessionURL(sessionURL): @@ -20,6 +35,16 @@ def reformat_sessionURL(sessionURL): return new_sessionURL +def make_tiny(url): + try: + request_url = 'http://tinyurl.com/api-create.php?' + urlencode({'url': url}) + with contextlib.closing(urlopen(request_url)) as response: + return response.read().decode('utf-8') + except Exception as e: + # If there is an error (e.g., network issue or TinyURL service is down), return the original URL + return url + + def notification(toaddr, case, filename, links, sessionURL): # toaddr -- email address to send to # text content to send @@ -62,132 +87,145 @@ def notification(toaddr, case, filename, links, sessionURL): fpath[2] = 'Python NetworkX' elif fpath[2] == 'classification': fpath[2] = 'Text Classification' + else: + fpath = [] - if case == 0 or case == 'comment-fail': - html = """ - - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your Reddit Comment collection has been terminated.

    -

    We are using the id and permalink from your Reddit Submission dataset - to collect comments and replies. It is most likely you have provide an incomplete Reddit Submission dataset missing these two fields.

    -

    Please try to reproduce the Reddit Submission with id and permalink, or switch to another dataset.

    - Go to your session... -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - - """ - subject = 'Your Reddit Comment collection has failed...' - elif case == 1 or case == 'comment-terminate': - html = """ - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your Reddit Comment collection is exceeding 400 Megabyte, and is terminated due to lack of disk space.

    -
      -
    • You have requested comments and replies for the Reddit Submission (Post):""" + \ - fpath[ - 3] + """. The partial comments we manage to collect and save will be compressed for you in an .zip file named """ + \ - fpath[3] + """-comments.zip (click)
    • -
    • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. - Go to your session. + if len(fpath) >= 3: + if case == 0 or case == 'comment-fail': + html = f""" + + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection has been terminated.

      +

      We are using the id and permalink from your Reddit Submission dataset + to collect comments and replies. It is most likely you have provide an incomplete Reddit Submission dataset missing these two fields.

      +

      Please try to reproduce the Reddit Submission with id and permalink, or switch to another dataset.

      + Go to your session... +
      +

      Best Regards,

      +

      Social Media Macroscope - SMILE

      +
      + + + """ + subject = 'Your Reddit Comment collection has failed...' + elif case == 1 or case == 'comment-terminate': + html = f""" + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection is exceeding 400 Megabyte, and is terminated due to lack of disk space.

        -
      • Go to Past Results
      • -
      • --> under """ + fpath[1] + """
      • -
      • --> click """ + fpath[2] + """
      • -
      • --> then find """ + fpath[3] + """
      • -
      • --> click VIEW
      • -
      • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
      • -
      -
        -
        -

        Best Regards,

        -

        Social Media Macroscope - SMILE

        -
      - - """ - subject = 'Your Reddit Comment collection has been terminated...' - elif case == 2 or case == 'comment-success': - html = """ - - -
      -

      Dear user (session ID: """ + fpath[0] + """),

      -

      Your Reddit Comment collection is ready for you!

      -
        -
      • You have requested comments and replies for the Reddit Submission (Post):""" + \ - fpath[ - 3] + """. It will be compressed for you in an .zip file named """ + \ - fpath[3] + """-comments.zip
      • -
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. - Go to your session. +
      • You have requested comments and replies for the Reddit Submission ( + Post):{fpath[3]}. The partial comments we manage to collect + and save will be compressed for you in an .zip file named + {fpath[3]}-comments.zip (click)
      • +
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. + Go to your session. +
          +
        • Go to Past Results
        • +
        • --> under {fpath[1]}
        • +
        • --> click {fpath[2]}
        • +
        • --> then find {fpath[3]}
        • +
        • --> click VIEW
        • +
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
          -
        • Go to Past Results
        • -
        • --> under """ + fpath[1] + """
        • -
        • --> click """ + fpath[2] + """
        • -
        • --> then find """ + fpath[3] + """
        • -
        • --> click VIEW
        • -
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
          +

          Best Regards,

          +

          Social Media Macroscope - SMILE

          +
      + + """ + subject = 'Your Reddit Comment collection has been terminated...' + elif case == 2 or case == 'comment-success': + html = f""" + + +
      +

      Dear user (session ID: {fpath[0]}),

      +

      Your Reddit Comment collection is ready for you!

      +
        +
      • You have requested comments and replies for the Reddit Submission ( + Post):{fpath[3]}. It will be compressed for you in an .zip file named + {fpath[3]}-comments.zip
      • +
      • In order to download this file, you need to first locate the original submission in the Past Results page in SMILE. + Go to your session. +
          +
        • Go to Past Results
        • +
        • --> under {fpath[1]}
        • +
        • --> click {fpath[2]}
        • +
        • --> then find {fpath[3]}
        • +
        • --> click VIEW
        • +
        • --> in the Overview table under the downloadables column, you will find these comments in a zip file.
        • +
      -
    -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - """ - subject = 'Your Reddit Comment collection is completed!' - elif case == 3 or case == 'analytics-success': - list_html = '' - for key in links.keys(): - list_html += '
  • ' + key + '
  • ' - - html = """ - - -
    -

    Dear user (session ID: """ + fpath[0] + """),

    -

    Your """ + fpath[2] + """ results are ready for you! (job ID: """ + fpath[3] + """)

    -
      -
    • You can view the visualization and download the results at Past Results page in SMILE. - Go to your session. +
      +

      Best Regards,

      +

      Social Media Macroscope - SMILE

      +
    + + """ + subject = 'Your Reddit Comment collection is completed!' + elif case == 3 or case == 'analytics-success': + list_html = '' + for key in links.keys(): + list_html += f'
  • {key}
  • ' + + html = f""" + + +
    +

    Dear user (session ID:{fpath[0]}),

    +

    Your {fpath[2]} results are ready for you! (job ID: {fpath[3]})

      -
    • Go to Past Results
    • -
    • --> under """ + fpath[1] + """ tab
    • -
    • --> click """ + fpath[2] + """
    • -
    • --> then find """ + fpath[3] + """
    • -
    • --> click view
    • +
    • You can view the visualization and download the results at Past Results page in SMILE. + Go to your session. +
        +
      • Go to Past Results
      • +
      • --> under {fpath[1]} tab
      • +
      • --> click {fpath[2]}
      • +
      • --> then find {fpath[3]}
      • +
      • --> click view
      • +
      +
      +
    • You can also click the link below to download part of the results: +
        {list_html}
      +

    -
  • You can also click the link below to download part of the results: -
      """ + list_html + """
    -
  • - -
    -

    Best Regards,

    -

    Social Media Macroscope - SMILE

    -
    - - """ - subject = 'Your ' + fpath[2] + ' computation is completed!' - - msg = MIMEMultipart('alternative') - msg['Subject'] = subject - msg['From'] = fromaddr - msg['To'] = toaddr - msg.attach(MIMEText(html, 'html')) - - server = smtplib.SMTP(host, int(port)) - server.starttls() - if password is not None and password != "": - server.login(fromaddr, password) - server.sendmail(fromaddr, toaddr, msg.as_string()) - server.quit() +

    Best Regards,

    +

    Social Media Macroscope - SMILE

    + + + """ + subject = f'Your{fpath[2]}computation is completed!' + else: + html = None + subject = None + print("Invalid case! Skip notification.") + + if html is not None and subject is not None: + msg = MIMEMultipart('alternative') + msg['Subject'] = subject + msg['From'] = fromaddr + msg['To'] = toaddr + msg.attach(MIMEText(html, 'html')) + + server = smtplib.SMTP(host, int(port)) + server.starttls() + if password is not None and password != "": + server.login(fromaddr, password) + server.sendmail(fromaddr, toaddr, msg.as_string()) + server.quit() + else: + print("Invalid html content! Skip notification.") + else: + print("Invalid filepath! Skip notification.") + else: print("Invalid Email host setting! Skip notification.") + From a86d0e70700fbada8a60234205266aecc3d22c47 Mon Sep 17 00:00:00 2001 From: YONG WOOK KIM Date: Thu, 26 Oct 2023 13:26:48 -0500 Subject: [PATCH 15/19] added empty line in changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fed1e4c..4d851b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,12 +5,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] + ### Added - Docker building script for whole components [23](https://github.com/ncsa/standalone-smm-analytics/issues/23) - Docker compose launch script [45](https://github.com/ncsa/standalone-smm-analytics/issues/45) - Docker compose file using traefik [46](https://github.com/ncsa/standalone-smm-analytics/issues/46) - Environment variables for turn on off twitter and reddit [73](https://github.com/ncsa/standalone-smm-analytics/issues/73) - Environment variable for Google Analytics 4 [81](https://github.com/ncsa/standalone-smm-analytics/issues/81) + ### Changed - Hard coded rabbimq url changed to env variable [18](https://github.com/ncsa/standalone-smm-analytics/issues/18) - Modified S3 url to env variable [21](https://github.com/ncsa/standalone-smm-analytics/issues/21) From 4f9b90f5fef589dba388965c31760a8eb35bf205 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Thu, 26 Oct 2023 17:19:05 -0500 Subject: [PATCH 16/19] add missing autophrase changelogs --- CHANGELOG.md | 6 +----- rabbitmq/autophrase/CHANGELOG.md | 13 +++++++++++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d851b5..dc6c147 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [Beta] - 10-26-2023 ### Added - Docker building script for whole components [23](https://github.com/ncsa/standalone-smm-analytics/issues/23) @@ -23,7 +23,3 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Created base image for sentiment analysis with model [55](https://github.com/ncsa/standalone-smm-analytics/issues/55) - Created base image for name entity recognition with model [56](https://github.com/ncsa/standalone-smm-analytics/issues/56) - Docker compose file updated to fix minio default bucket making [63](https://github.com/ncsa/standalone-smm-analytics/issues/63) -- Autophrase uses base docker image [67](https://github.com/ncsa/standalone-smm-analytics/issues/67) - -### Fixed -- Autophrase error due to the code updates [65](https://github.com/ncsa/standalone-smm-analytics/issues/65) diff --git a/rabbitmq/autophrase/CHANGELOG.md b/rabbitmq/autophrase/CHANGELOG.md index f773975..a4fcc4e 100644 --- a/rabbitmq/autophrase/CHANGELOG.md +++ b/rabbitmq/autophrase/CHANGELOG.md @@ -4,6 +4,19 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.1.6] - 10-26-2023 + +### Fixed +- Visualization in the email link doesn't work [#61](https://github.com/ncsa/standalone-smm-analytics/issues/61) + +## [0.1.5] - 09-21-2023 + +### Fixed +- Autophrase error due to the code updates [#65](https://github.com/ncsa/standalone-smm-analytics/issues/65) + +### Changed +- Autophrase uses base docker image [#67](https://github.com/ncsa/standalone-smm-analytics/issues/67) + ## [0.1.4] - 09-14-2023 ### Added From ea810152bcd1c12567713403b20e59c86d2b72a6 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Thu, 26 Oct 2023 17:24:35 -0500 Subject: [PATCH 17/19] update version --- CHANGELOG.md | 2 +- rabbitmq/name_entity_recognition/CHANGELOG.md | 4 +++- rabbitmq/network_analysis/CHANGELOG.md | 4 +++- rabbitmq/preprocessing/CHANGELOG.md | 4 +++- rabbitmq/sentiment_analysis/CHANGELOG.md | 4 +++- rabbitmq/topic_modeling/CHANGELOG.md | 4 +++- 6 files changed, 16 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc6c147..0e0c299 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Environment variable for Google Analytics 4 [81](https://github.com/ncsa/standalone-smm-analytics/issues/81) ### Changed -- Hard coded rabbimq url changed to env variable [18](https://github.com/ncsa/standalone-smm-analytics/issues/18) +- Hard coded rabbitmq url changed to env variable [18](https://github.com/ncsa/standalone-smm-analytics/issues/18) - Modified S3 url to env variable [21](https://github.com/ncsa/standalone-smm-analytics/issues/21) - Renamed Minio related environment variables [31](https://github.com/ncsa/standalone-smm-analytics/issues/31) - Rabbitmq handler's connection with dynamic credentials [41](https://github.com/ncsa/standalone-smm-analytics/issues/41) diff --git a/rabbitmq/name_entity_recognition/CHANGELOG.md b/rabbitmq/name_entity_recognition/CHANGELOG.md index 3726dc3..c03a607 100644 --- a/rabbitmq/name_entity_recognition/CHANGELOG.md +++ b/rabbitmq/name_entity_recognition/CHANGELOG.md @@ -5,11 +5,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.1.3] - 10-26-2023 ### Changed - Remove Clowder related code [#75](https://github.com/ncsa/standalone-smm-analytics/issues/75) +### Fixed +- Visualization in the email link doesn't work [#61](https://github.com/ncsa/standalone-smm-analytics/issues/61) ## [0.1.2] - 09-14-2023 diff --git a/rabbitmq/network_analysis/CHANGELOG.md b/rabbitmq/network_analysis/CHANGELOG.md index 2dde568..5bb53c0 100644 --- a/rabbitmq/network_analysis/CHANGELOG.md +++ b/rabbitmq/network_analysis/CHANGELOG.md @@ -5,11 +5,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.1.4] - 10-26-2023 ### Changed - Remove Clowder related code [#75](https://github.com/ncsa/standalone-smm-analytics/issues/75) +### Fixed +- Visualization in the email link doesn't work [#61](https://github.com/ncsa/standalone-smm-analytics/issues/61) ## [0.1.3] - 09-14-2023 diff --git a/rabbitmq/preprocessing/CHANGELOG.md b/rabbitmq/preprocessing/CHANGELOG.md index 2dde568..5bb53c0 100644 --- a/rabbitmq/preprocessing/CHANGELOG.md +++ b/rabbitmq/preprocessing/CHANGELOG.md @@ -5,11 +5,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.1.4] - 10-26-2023 ### Changed - Remove Clowder related code [#75](https://github.com/ncsa/standalone-smm-analytics/issues/75) +### Fixed +- Visualization in the email link doesn't work [#61](https://github.com/ncsa/standalone-smm-analytics/issues/61) ## [0.1.3] - 09-14-2023 diff --git a/rabbitmq/sentiment_analysis/CHANGELOG.md b/rabbitmq/sentiment_analysis/CHANGELOG.md index 67e3b70..8349e2a 100644 --- a/rabbitmq/sentiment_analysis/CHANGELOG.md +++ b/rabbitmq/sentiment_analysis/CHANGELOG.md @@ -5,11 +5,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.1.6] - 10-26-2023 ### Changed - Remove Clowder related code [#75](https://github.com/ncsa/standalone-smm-analytics/issues/75) +### Fixed +- Visualization in the email link doesn't work [#61](https://github.com/ncsa/standalone-smm-analytics/issues/61) ## [0.1.5] - 09-14-2023 diff --git a/rabbitmq/topic_modeling/CHANGELOG.md b/rabbitmq/topic_modeling/CHANGELOG.md index 2dde568..5bb53c0 100644 --- a/rabbitmq/topic_modeling/CHANGELOG.md +++ b/rabbitmq/topic_modeling/CHANGELOG.md @@ -5,11 +5,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.1.4] - 10-26-2023 ### Changed - Remove Clowder related code [#75](https://github.com/ncsa/standalone-smm-analytics/issues/75) +### Fixed +- Visualization in the email link doesn't work [#61](https://github.com/ncsa/standalone-smm-analytics/issues/61) ## [0.1.3] - 09-14-2023 From 4e4e4bb96392aa714a3ce596ba931509e453bc02 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Thu, 26 Oct 2023 17:27:40 -0500 Subject: [PATCH 18/19] write changelog issue correctly --- CHANGELOG.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e0c299..c4e0a82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,19 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Beta] - 10-26-2023 ### Added -- Docker building script for whole components [23](https://github.com/ncsa/standalone-smm-analytics/issues/23) -- Docker compose launch script [45](https://github.com/ncsa/standalone-smm-analytics/issues/45) -- Docker compose file using traefik [46](https://github.com/ncsa/standalone-smm-analytics/issues/46) -- Environment variables for turn on off twitter and reddit [73](https://github.com/ncsa/standalone-smm-analytics/issues/73) -- Environment variable for Google Analytics 4 [81](https://github.com/ncsa/standalone-smm-analytics/issues/81) +- Docker building script for whole components [#23](https://github.com/ncsa/standalone-smm-analytics/issues/23) +- Docker compose launch script [#45](https://github.com/ncsa/standalone-smm-analytics/issues/45) +- Docker compose file using traefik [#46](https://github.com/ncsa/standalone-smm-analytics/issues/46) +- Environment variables for turn on off twitter and reddit [#73](https://github.com/ncsa/standalone-smm-analytics/issues/73) +- Environment variable for Google Analytics 4 [#81](https://github.com/ncsa/standalone-smm-analytics/issues/81) ### Changed -- Hard coded rabbitmq url changed to env variable [18](https://github.com/ncsa/standalone-smm-analytics/issues/18) -- Modified S3 url to env variable [21](https://github.com/ncsa/standalone-smm-analytics/issues/21) -- Renamed Minio related environment variables [31](https://github.com/ncsa/standalone-smm-analytics/issues/31) -- Rabbitmq handler's connection with dynamic credentials [41](https://github.com/ncsa/standalone-smm-analytics/issues/41) -- Docker compose file to work with new settings [42](https://github.com/ncsa/standalone-smm-analytics/issues/42) -- Updated README with docker compose information [50](https://github.com/ncsa/standalone-smm-analytics/issues/50) -- Created base image for sentiment analysis with model [55](https://github.com/ncsa/standalone-smm-analytics/issues/55) -- Created base image for name entity recognition with model [56](https://github.com/ncsa/standalone-smm-analytics/issues/56) -- Docker compose file updated to fix minio default bucket making [63](https://github.com/ncsa/standalone-smm-analytics/issues/63) +- Hard coded rabbitmq url changed to env variable [#18](https://github.com/ncsa/standalone-smm-analytics/issues/18) +- Modified S3 url to env variable [#21](https://github.com/ncsa/standalone-smm-analytics/issues/21) +- Renamed Minio related environment variables [#31](https://github.com/ncsa/standalone-smm-analytics/issues/31) +- Rabbitmq handler's connection with dynamic credentials [#41](https://github.com/ncsa/standalone-smm-analytics/issues/41) +- Docker compose file to work with new settings [#42](https://github.com/ncsa/standalone-smm-analytics/issues/42) +- Updated README with docker compose information [#50](https://github.com/ncsa/standalone-smm-analytics/issues/50) +- Created base image for sentiment analysis with model [#55](https://github.com/ncsa/standalone-smm-analytics/issues/55) +- Created base image for name entity recognition with model [#56](https://github.com/ncsa/standalone-smm-analytics/issues/56) +- Docker compose file updated to fix minio default bucket making [#63](https://github.com/ncsa/standalone-smm-analytics/issues/63) From 3084a0fbd4620e920227dd8937b8fd7ed61b5fd1 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Thu, 26 Oct 2023 18:24:17 -0500 Subject: [PATCH 19/19] add image crawler changelog --- rabbitmq/image_crawler/CHANGELOG.md | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 rabbitmq/image_crawler/CHANGELOG.md diff --git a/rabbitmq/image_crawler/CHANGELOG.md b/rabbitmq/image_crawler/CHANGELOG.md new file mode 100644 index 0000000..d715361 --- /dev/null +++ b/rabbitmq/image_crawler/CHANGELOG.md @@ -0,0 +1,11 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + + +## [0.1.3] - 10-26-2023 + +### Fixed +- Visualization in the email link doesn't work [#61](https://github.com/ncsa/standalone-smm-analytics/issues/61)