Merge branch 'main' of github.com:amosproj/amos2023ws06-sales-lead-qu…

…alifier into dev Signed-off-by: Felix Zailskas <[email protected]>
amosproj · Feb 5, 2024 · d2fb7ec · d2fb7ec
2 parents d842143 + 48b1a1f
commit d2fb7ec
Show file tree

Hide file tree

Showing 12 changed files with 82 additions and 0 deletions.
diff --git a/Deliverables/sprint-12/demo-day-slide.pdf b/Deliverables/sprint-12/demo-day-slide.pdf
diff --git a/Deliverables/sprint-12/demo-day-video.mp4 b/Deliverables/sprint-12/demo-day-video.mp4
diff --git a/deprecated/steps/social_media_api.py b/deprecated/steps/social_media_api.py
@@ -23,6 +23,10 @@ class FacebookGraphAPI(Step):
         name: Name of this step, used for logging
         added_cols: List of fields that will be added to the main dataframe by executing this step
         required_cols: List of fields that are required to be existent in the input dataframe before performing this step
+
+    Added Columns:
+        email (str): The email of the company from facebook graph
+        category (str): The category of the company from facebook graph
     """
 
     name = "Facebook_Graph"

diff --git a/src/bdc/steps/analyze_emails.py b/src/bdc/steps/analyze_emails.py
@@ -50,6 +50,12 @@ class AnalyzeEmails(Step):
         name: Name of this step, used for logging
         added_cols: List of fields that will be added to the main dataframe by executing this step
         required_cols: List of fields that are required to be existent in the input dataframe before performing this step
+
+    Added Columns:
+        domain (str): The custom domain name/website if any
+        email_valid (bool): Boolean result of email check
+        first_name_in_account (bool): Boolean, True if the given first name is part of the email account name
+        last_name_in_account (bool): Boolean, True if the given last name is part of the email account name
     """
 
     name = "Analyze-Emails"

diff --git a/src/bdc/steps/analyze_reviews.py b/src/bdc/steps/analyze_reviews.py
@@ -92,6 +92,9 @@ class GPTReviewSentimentAnalyzer(Step):
         extract_text_from_reviews(reviews_list): Extracts text from reviews and removes line characters.
         num_tokens_from_string(text): Returns the number of tokens in a text string.
         batch_reviews(reviews, max_tokens): Batches reviews into smaller batches based on token limit.
+
+    Added Columns:
+        reviews_sentiment_score (float): The sentiment score of the reviews.
     """
 
     name = "GPT-Review-Sentiment-Analyzer"
@@ -377,6 +380,13 @@ class SmartReviewInsightsEnhancer(Step):
         _calculate_score(review): Calculates the score for a review.
         _grammatical_errors(text, lang): Calculates the number of grammatical errors in a text.
 
+    Added Columns:
+        review_avg_grammatical_score (float): The average grammatical score of the reviews.
+        review_polarization_type (str): The type of polarization in the reviews.
+        review_polarization_score (float): The score of polarization in the reviews.
+        review_highest_rating_ratio (float): The ratio of highest ratings in the reviews.
+        review_lowest_rating_ratio (float): The ratio of lowest ratings in the reviews.
+        review_rating_trend (float): The trend of ratings over time.
     """
 
     name = "Smart-Review-Insights-Enhancer"

diff --git a/src/bdc/steps/google_places.py b/src/bdc/steps/google_places.py
@@ -34,6 +34,19 @@ class GooglePlaces(Step):
         name: Name of this step, used for logging and as a column prefix
         added_cols: List of fields that will be added to the main dataframe by executing this step
         required_cols: List of fields that are required to be existent in the input dataframe before performing this step
+
+    Added Columns:
+        google_places_place_id (str): The place id of the business
+        google_places_business_status (str): The business status of the business
+        google_places_formatted_address (str): The formatted address of the business
+        google_places_name (str): The name of the business
+        google_places_user_ratings_total (int): The number of user ratings of the business
+        google_places_rating (float): The rating of the business
+        google_places_price_level (int): The price level of the business
+        google_places_candidate_count_mail (int): The number of candidates found by mail search
+        google_places_candidate_count_phone (int): The number of candidates found by phone search
+        google_places_place_id_matches_phone_search (bool): Whether the place id found by mail search matches the one found by phone search
+        google_places_confidence (float): A confidence score for the results
     """
 
     name = "Google_Places"

diff --git a/src/bdc/steps/google_places_detailed.py b/src/bdc/steps/google_places_detailed.py
@@ -34,6 +34,10 @@ class GooglePlacesDetailed(Step):
         name: Name of this step, used for logging
         added_cols: List of fields that will be added to the main dataframe by executing this step
         required_cols: List of fields that are required to be existent in the input dataframe before performing this step
+
+    Added Columns:
+        google_places_detailed_website (str): The website of the company from google places
+        google_places_detailed_type (str): The type of the company from google places
     """
 
     name = "Google_Places_Detailed"

diff --git a/src/bdc/steps/gpt_summarizer.py b/src/bdc/steps/gpt_summarizer.py
@@ -34,6 +34,9 @@ class GPTSummarizer(Step):
         added_cols: List of fields that will be added to the main dataframe by executing this step
         required_cols: List of fields that are required to be existent in the input dataframe before performing this
             step
+
+    Added Columns:
+        sales_person_summary (str): The summary of the company website for the salesperson using GPT
     """
 
     name = "GPT-Summarizer"

diff --git a/src/bdc/steps/preprocess_phonenumbers.py b/src/bdc/steps/preprocess_phonenumbers.py
@@ -25,6 +25,13 @@ class PreprocessPhonenumbers(Step):
         added_cols: List of fields that will be added to the main dataframe by executing this step
         required_cols: List of fields that are required to be existent in the input dataframe before performing this
             step
+
+    Added Columns:
+        number_formatted (str): The formatted phone number, e.g. +49 123 456789
+        number_country (str): The country of the phone number, e.g. Germany
+        number_area (str): The area of the phone number, e.g. Berlin
+        number_valid (bool): Whether the phone number is valid
+        number_possible (bool): Whether the phone number is possible
     """
 
     name = "Preprocess-Phonenumbers"

diff --git a/src/bdc/steps/regionalatlas.py b/src/bdc/steps/regionalatlas.py
@@ -33,6 +33,32 @@ class RegionalAtlas(Step):
         regions_gdfs: dataframe that includes all keys/values from the merged.geojson
         empty_result: empty result that will be used in case there are problems with the data
         epsg_code_etrs: 25832 is the standard used by RegionAtlas
+
+    Added Columns:
+        pop_density (float): Population density of the searched city
+        pop_development (float): Population development of the searched city
+        age_0 (float): Population age group 0-18 of the searched city
+        age_1 (float): Population age group 18-30 of the searched city
+        age_2 (float): Population age group 30-45 of the searched city
+        age_3 (float): Population age group 45-60 of the searched city
+        age_4 (float): Population age group 60+ of the searched city
+        pop_avg_age (float): Average age of the searched city
+        per_service_sector (float): Percentage of the service sector of the searched city
+        per_trade (float): Percentage of the trade sector of the searched city
+        employment_rate (float): Employment rate of the searched city
+        unemployment_rate (float): Unemployment rate of the searched city
+        per_long_term_unemployment (float): Percentage of long term unemployment of the searched city
+        investments_p_employee (float): Investments per employee of the searched city
+        gross_salary_p_employee (float): Gross salary per employee of the searched city
+        disp_income_p_inhabitant (float): Disposable income per inhabitant of the searched city
+        tot_income_p_taxpayer (float): Total income per taxpayer of the searched city
+        gdp_p_employee (float): GDP per employee of the searched city
+        gdp_development (float): GDP development of the searched city
+        gdp_p_inhabitant (float): GDP per inhabitant of the searched city
+        gdp_p_workhours (float): GDP per workhour of the searched city
+        pop_avg_age_zensus (float): Average age of the searched city (zensus)
+        unemployment_rate (float): Unemployment rate of the searched city (zensus)
+        regional_score (float): Regional score of the searched city
     """
 
     name: str = "Regional_Atlas"

diff --git a/src/bdc/steps/scrape_address.py b/src/bdc/steps/scrape_address.py
@@ -26,6 +26,9 @@ class ScrapeAddress(Step):
         name: Name of this step, used for logging and as a column prefix
         added_cols: List of fields that will be added to the main dataframe by executing this step
         required_cols: List of fields that are required to be existent in the input dataframe before performing this step
+
+    Added Columns:
+        address_ver_1 (str): The scraped address of the company
     """
 
     name = "Scrape-Address"

diff --git a/src/bdc/steps/search_offeneregister.py b/src/bdc/steps/search_offeneregister.py
@@ -30,6 +30,12 @@ class SearchOffeneRegister(Step):
         run(): Executes the step and returns the modified DataFrame.
         _extract_company_related_data(lead): Extracts company-related data for a given lead.
 
+    Added Columns:
+        company_name (str): The name of the company from offeneregister.de
+        company_objective (str): The objective of the company offeneregister.de
+        company_capital (float): The capital of the company offeneregister.de
+        company_capital_currency (str): The currency of the company capital offeneregister.de
+        company_address (str): The address of the company offeneregister.de
     """
 
     name = "OffeneRegister"