Skip to content

Commit

Permalink
fix minor error in regionalatlas step + moved scrape address step to …
Browse files Browse the repository at this point in the history
…deprecated + added new bdc config

Signed-off-by: Lucca Baumgärtner <[email protected]>
  • Loading branch information
luccalb committed Feb 5, 2024
1 parent 524b72e commit a20c71d
Show file tree
Hide file tree
Showing 12 changed files with 87 additions and 31 deletions.
File renamed without changes.
1 change: 0 additions & 1 deletion src/bdc/steps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,5 @@
from .hash_generator import *
from .preprocess_phonenumbers import *
from .regionalatlas import *
from .scrape_address import *
from .search_offeneregister import *
from .step import *
5 changes: 2 additions & 3 deletions src/bdc/steps/regionalatlas.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import geopandas as gpd
import osmnx
import pandas as pd
from geopandas.tools import sjoin
from pandas import DataFrame
from tqdm import tqdm

Expand Down Expand Up @@ -118,13 +117,13 @@ def run(self) -> DataFrame:

tqdm.pandas(desc="Computing Regional Score")

self.df[self.added_cols[:-1]] = self.df.progress_apply(
self.df[self.added_cols[-1:]] = self.df.progress_apply(
lambda lead: pd.Series(
get_lead_hash_generator().hash_check(
lead,
self.calculate_regional_score,
self.name + "_Regional-Score",
self.added_cols[:-1],
self.added_cols[-1:],
lead,
)
),
Expand Down
3 changes: 1 addition & 2 deletions src/database/leads/local_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import csv
import json
import os
from datetime import datetime
from pathlib import Path

import joblib
Expand All @@ -20,7 +19,7 @@
class LocalRepository(Repository):
BASE_PATH = os.path.dirname(__file__)
DF_INPUT = os.path.abspath(
os.path.join(BASE_PATH, "../../data/sumup_leads_email.csv")
os.path.join(BASE_PATH, "../../data/demo_leads_email.csv")
)
DF_OUTPUT = os.path.abspath(
os.path.join(BASE_PATH, "../../data/leads_enriched.csv")
Expand Down
4 changes: 0 additions & 4 deletions src/demo/pipeline_configs/config_sprint09_release.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@
"name": "AnalyzeEmails",
"force_refresh": true
},
{
"name": "ScrapeAddress",
"force_refresh": true
},
{
"name": "PreprocessPhonenumbers",
"force_refresh": true
Expand Down
4 changes: 0 additions & 4 deletions src/demo/pipeline_configs/config_template
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,6 @@
"name": "AnalyzeEmails",
"force_refresh": true
},
{
"name": "ScrapeAddress",
"force_refresh": true
},
{
"name": "PreprocessPhonenumbers",
"force_refresh": true
Expand Down
43 changes: 43 additions & 0 deletions src/demo/pipeline_configs/force_refresh_all_steps.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"description": "This config runs all steps with force_refresh set to true.",
"config": {
"steps": [
{
"name": "HashGenerator",
"force_refresh": true
},
{
"name": "AnalyzeEmails",
"force_refresh": true
},
{
"name": "PreprocessPhonenumbers",
"force_refresh": true
},
{
"name": "GooglePlaces",
"force_refresh": true
},
{
"name": "GooglePlacesDetailed",
"force_refresh": true
},
{
"name": "GPTReviewSentimentAnalyzer",
"force_refresh": true
},
{
"name": "GPTSummarizer",
"force_refresh": true
},
{
"name": "SmartReviewInsightsEnhancer",
"force_refresh": true
},
{
"name": "RegionalAtlas",
"force_refresh": true
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
SPDX-License-Identifier: MIT
SPDX-FileCopyrightText: 2023 Berkay Bozkurt <[email protected]>
27 changes: 27 additions & 0 deletions src/demo/pipeline_configs/regionalatlas_only.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"description": "This config runs all steps with force_refresh set to true.",
"config": {
"steps": [
{
"name": "HashGenerator",
"force_refresh": true
},
{
"name": "AnalyzeEmails",
"force_refresh": true
},
{
"name": "PreprocessPhonenumbers",
"force_refresh": true
},
{
"name": "GooglePlaces",
"force_refresh": true
},
{
"name": "RegionalAtlas",
"force_refresh": true
}
]
}
}
2 changes: 2 additions & 0 deletions src/demo/pipeline_configs/regionalatlas_only.json.license
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
SPDX-License-Identifier: MIT
SPDX-FileCopyrightText: 2023 Lucca Baumgärtner <[email protected]>
24 changes: 10 additions & 14 deletions src/demo/pipeline_configs/run_all_steps.json
Original file line number Diff line number Diff line change
@@ -1,46 +1,42 @@
{
"description": "This config runs all steps with force_refresh set to true.",
"description": "This config runs all steps with force_refresh set to false.",
"config": {
"steps": [
{
"name": "HashGenerator",
"force_refresh": true
"force_refresh": false
},
{
"name": "AnalyzeEmails",
"force_refresh": true
},
{
"name": "ScrapeAddress",
"force_refresh": true
"force_refresh": false
},
{
"name": "PreprocessPhonenumbers",
"force_refresh": true
"force_refresh": false
},
{
"name": "GooglePlaces",
"force_refresh": true
"force_refresh": false
},
{
"name": "GooglePlacesDetailed",
"force_refresh": true
"force_refresh": false
},
{
"name": "GPTReviewSentimentAnalyzer",
"force_refresh": true
"force_refresh": false
},
{
"name": "GPTSummarizer",
"force_refresh": true
"force_refresh": false
},
{
"name": "SmartReviewInsightsEnhancer",
"force_refresh": true
"force_refresh": false
},
{
"name": "RegionalAtlas",
"force_refresh": true
"force_refresh": false
}
]
}
Expand Down
3 changes: 0 additions & 3 deletions src/demo/pipeline_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
HashGenerator,
PreprocessPhonenumbers,
RegionalAtlas,
ScrapeAddress,
SearchOffeneRegister,
SmartReviewInsightsEnhancer,
)
Expand All @@ -33,14 +32,12 @@
"GPTSummarizer": GPTSummarizer,
"PreprocessPhonenumbers": PreprocessPhonenumbers,
"RegionalAtlas": RegionalAtlas,
"ScrapeAddress": ScrapeAddress,
"SearchOffeneRegister": SearchOffeneRegister,
"SmartReviewInsightsEnhancer": SmartReviewInsightsEnhancer,
}

# Please do not write following lists! Use the functions below instead.
_additional_pipeline_steps = [
(ScrapeAddress, "Scrape Address", "(will take a long time)"),
(SearchOffeneRegister, "Search OffeneRegister", "(will take a long time)"),
(PreprocessPhonenumbers, "Phone Number Validation", ""),
(
Expand Down

0 comments on commit a20c71d

Please sign in to comment.