Skip to content

Commit

Permalink
Merge branch 'main' of github.com:amosproj/amos2023ws06-sales-lead-qu…
Browse files Browse the repository at this point in the history
…alifier into dev

Signed-off-by: Felix Zailskas <[email protected]>
  • Loading branch information
felix-zailskas committed Feb 5, 2024
2 parents d842143 + 48b1a1f commit d2fb7ec
Show file tree
Hide file tree
Showing 12 changed files with 82 additions and 0 deletions.
Binary file modified Deliverables/sprint-12/demo-day-slide.pdf
Binary file not shown.
Binary file modified Deliverables/sprint-12/demo-day-video.mp4
Binary file not shown.
4 changes: 4 additions & 0 deletions deprecated/steps/social_media_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ class FacebookGraphAPI(Step):
name: Name of this step, used for logging
added_cols: List of fields that will be added to the main dataframe by executing this step
required_cols: List of fields that are required to be existent in the input dataframe before performing this step
Added Columns:
email (str): The email of the company from facebook graph
category (str): The category of the company from facebook graph
"""

name = "Facebook_Graph"
Expand Down
6 changes: 6 additions & 0 deletions src/bdc/steps/analyze_emails.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ class AnalyzeEmails(Step):
name: Name of this step, used for logging
added_cols: List of fields that will be added to the main dataframe by executing this step
required_cols: List of fields that are required to be existent in the input dataframe before performing this step
Added Columns:
domain (str): The custom domain name/website if any
email_valid (bool): Boolean result of email check
first_name_in_account (bool): Boolean, True if the given first name is part of the email account name
last_name_in_account (bool): Boolean, True if the given last name is part of the email account name
"""

name = "Analyze-Emails"
Expand Down
10 changes: 10 additions & 0 deletions src/bdc/steps/analyze_reviews.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ class GPTReviewSentimentAnalyzer(Step):
extract_text_from_reviews(reviews_list): Extracts text from reviews and removes line characters.
num_tokens_from_string(text): Returns the number of tokens in a text string.
batch_reviews(reviews, max_tokens): Batches reviews into smaller batches based on token limit.
Added Columns:
reviews_sentiment_score (float): The sentiment score of the reviews.
"""

name = "GPT-Review-Sentiment-Analyzer"
Expand Down Expand Up @@ -377,6 +380,13 @@ class SmartReviewInsightsEnhancer(Step):
_calculate_score(review): Calculates the score for a review.
_grammatical_errors(text, lang): Calculates the number of grammatical errors in a text.
Added Columns:
review_avg_grammatical_score (float): The average grammatical score of the reviews.
review_polarization_type (str): The type of polarization in the reviews.
review_polarization_score (float): The score of polarization in the reviews.
review_highest_rating_ratio (float): The ratio of highest ratings in the reviews.
review_lowest_rating_ratio (float): The ratio of lowest ratings in the reviews.
review_rating_trend (float): The trend of ratings over time.
"""

name = "Smart-Review-Insights-Enhancer"
Expand Down
13 changes: 13 additions & 0 deletions src/bdc/steps/google_places.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@ class GooglePlaces(Step):
name: Name of this step, used for logging and as a column prefix
added_cols: List of fields that will be added to the main dataframe by executing this step
required_cols: List of fields that are required to be existent in the input dataframe before performing this step
Added Columns:
google_places_place_id (str): The place id of the business
google_places_business_status (str): The business status of the business
google_places_formatted_address (str): The formatted address of the business
google_places_name (str): The name of the business
google_places_user_ratings_total (int): The number of user ratings of the business
google_places_rating (float): The rating of the business
google_places_price_level (int): The price level of the business
google_places_candidate_count_mail (int): The number of candidates found by mail search
google_places_candidate_count_phone (int): The number of candidates found by phone search
google_places_place_id_matches_phone_search (bool): Whether the place id found by mail search matches the one found by phone search
google_places_confidence (float): A confidence score for the results
"""

name = "Google_Places"
Expand Down
4 changes: 4 additions & 0 deletions src/bdc/steps/google_places_detailed.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ class GooglePlacesDetailed(Step):
name: Name of this step, used for logging
added_cols: List of fields that will be added to the main dataframe by executing this step
required_cols: List of fields that are required to be existent in the input dataframe before performing this step
Added Columns:
google_places_detailed_website (str): The website of the company from google places
google_places_detailed_type (str): The type of the company from google places
"""

name = "Google_Places_Detailed"
Expand Down
3 changes: 3 additions & 0 deletions src/bdc/steps/gpt_summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ class GPTSummarizer(Step):
added_cols: List of fields that will be added to the main dataframe by executing this step
required_cols: List of fields that are required to be existent in the input dataframe before performing this
step
Added Columns:
sales_person_summary (str): The summary of the company website for the salesperson using GPT
"""

name = "GPT-Summarizer"
Expand Down
7 changes: 7 additions & 0 deletions src/bdc/steps/preprocess_phonenumbers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@ class PreprocessPhonenumbers(Step):
added_cols: List of fields that will be added to the main dataframe by executing this step
required_cols: List of fields that are required to be existent in the input dataframe before performing this
step
Added Columns:
number_formatted (str): The formatted phone number, e.g. +49 123 456789
number_country (str): The country of the phone number, e.g. Germany
number_area (str): The area of the phone number, e.g. Berlin
number_valid (bool): Whether the phone number is valid
number_possible (bool): Whether the phone number is possible
"""

name = "Preprocess-Phonenumbers"
Expand Down
26 changes: 26 additions & 0 deletions src/bdc/steps/regionalatlas.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,32 @@ class RegionalAtlas(Step):
regions_gdfs: dataframe that includes all keys/values from the merged.geojson
empty_result: empty result that will be used in case there are problems with the data
epsg_code_etrs: 25832 is the standard used by RegionAtlas
Added Columns:
pop_density (float): Population density of the searched city
pop_development (float): Population development of the searched city
age_0 (float): Population age group 0-18 of the searched city
age_1 (float): Population age group 18-30 of the searched city
age_2 (float): Population age group 30-45 of the searched city
age_3 (float): Population age group 45-60 of the searched city
age_4 (float): Population age group 60+ of the searched city
pop_avg_age (float): Average age of the searched city
per_service_sector (float): Percentage of the service sector of the searched city
per_trade (float): Percentage of the trade sector of the searched city
employment_rate (float): Employment rate of the searched city
unemployment_rate (float): Unemployment rate of the searched city
per_long_term_unemployment (float): Percentage of long term unemployment of the searched city
investments_p_employee (float): Investments per employee of the searched city
gross_salary_p_employee (float): Gross salary per employee of the searched city
disp_income_p_inhabitant (float): Disposable income per inhabitant of the searched city
tot_income_p_taxpayer (float): Total income per taxpayer of the searched city
gdp_p_employee (float): GDP per employee of the searched city
gdp_development (float): GDP development of the searched city
gdp_p_inhabitant (float): GDP per inhabitant of the searched city
gdp_p_workhours (float): GDP per workhour of the searched city
pop_avg_age_zensus (float): Average age of the searched city (zensus)
unemployment_rate (float): Unemployment rate of the searched city (zensus)
regional_score (float): Regional score of the searched city
"""

name: str = "Regional_Atlas"
Expand Down
3 changes: 3 additions & 0 deletions src/bdc/steps/scrape_address.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ class ScrapeAddress(Step):
name: Name of this step, used for logging and as a column prefix
added_cols: List of fields that will be added to the main dataframe by executing this step
required_cols: List of fields that are required to be existent in the input dataframe before performing this step
Added Columns:
address_ver_1 (str): The scraped address of the company
"""

name = "Scrape-Address"
Expand Down
6 changes: 6 additions & 0 deletions src/bdc/steps/search_offeneregister.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ class SearchOffeneRegister(Step):
run(): Executes the step and returns the modified DataFrame.
_extract_company_related_data(lead): Extracts company-related data for a given lead.
Added Columns:
company_name (str): The name of the company from offeneregister.de
company_objective (str): The objective of the company offeneregister.de
company_capital (float): The capital of the company offeneregister.de
company_capital_currency (str): The currency of the company capital offeneregister.de
company_address (str): The address of the company offeneregister.de
"""

name = "OffeneRegister"
Expand Down

0 comments on commit d2fb7ec

Please sign in to comment.