Skip to content

Commit

Permalink
Adjusted lead dummies with new data field definitions. Added field va…
Browse files Browse the repository at this point in the history
…lidation to Lead object

Signed-off-by: Felix Zailskas <[email protected]>
  • Loading branch information
felix-zailskas committed Nov 2, 2023
1 parent 4467b00 commit 6b97817
Show file tree
Hide file tree
Showing 8 changed files with 165 additions and 66 deletions.
2 changes: 2 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ pytest = "==7.4.3"
[packages]
numpy = "==1.26.1"
scikit-learn = "==1.3.2"
pydantic = "==2.4.2"
email-validator = "==2.1.0"

[requires]
python_version = "3.10"
19 changes: 15 additions & 4 deletions src/database/database_dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

import json
from typing import List

from database.models import Lead
from database.parsers import LeadParser


class DatabaseDummy:
Expand All @@ -10,8 +14,15 @@ def __init__(self) -> None:
json_data = json.load(f)["training_leads"]
self.data = {d["lead_id"]: d for d in json_data}

def get_entry_by_id(self, id_: int) -> dict:
return self.data[id_]
def get_lead_by_id(self, id_: int) -> Lead:
return LeadParser.parse_lead_from_dict(self.data[id_])

def get_all_leads(self) -> List[Lead]:
leads = []
for entry in self.data.values():
leads.append(LeadParser.parse_lead_from_dict(entry))
return leads

def get_all_entries(self):
return self.data
def update_lead(self, lead: Lead):
print(f"Updating database entry for lead#{lead.lead_id}")
print(f"Update values: {lead}")
66 changes: 33 additions & 33 deletions src/database/dummy_leads.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,58 +2,58 @@
"training_leads": [
{
"lead_id": 0,
"company_name": "test_company",
"first_name": "test_first",
"last_name": "test_last",
"country_code": "DE",
"phone_number": 176123123,
"annual_income": 25000,
"product_of_interest": "Terminals",
"first_name": "Anton",
"last_name": "Kerner",
"phone_number": "49176123123",
"email_address": "[email protected]",
"customer_probability": 0.1,
"life_time_value": 400000
},
{
"lead_id": 1,
"company_name": "test_company",
"first_name": "test_first",
"last_name": "test_last",
"country_code": "DE",
"phone_number": 176123123,
"annual_income": 70000,
"product_of_interest": "Terminals",
"first_name": "Anton",
"last_name": "Kerner",
"phone_number": "49176123123",
"email_address": "[email protected]",
"customer_probability": 0.9,
"life_time_value": 1000
"customer_probability": 0.4,
"life_time_value": 40000
},
{
"lead_id": 2,
"company_name": "test_company",
"first_name": "test_first",
"last_name": "test_last",
"country_code": "DE",
"phone_number": 176123123,
"annual_income": 15000,
"product_of_interest": "Terminals",
"first_name": "Anton",
"last_name": "Kerner",
"phone_number": "49176123123",
"email_address": "[email protected]",
"customer_probability": 0.7,
"life_time_value": 3500
"customer_probability": 0.8,
"life_time_value": 40000
},
{
"lead_id": 3,
"company_name": "test_company",
"first_name": "test_first",
"last_name": "test_last",
"country_code": "DE",
"phone_number": 176123123,
"annual_income": 2500000,
"product_of_interest": "Terminals",
"first_name": "Anton",
"last_name": "Kerner",
"phone_number": "49176123123",
"email_address": "[email protected]",
"customer_probability": 0.4,
"life_time_value": 10000
"customer_probability": 0.08,
"life_time_value": 400000
},
{
"lead_id": 4,
"company_name": "test_company",
"first_name": "test_first",
"last_name": "test_last",
"country_code": "DE",
"phone_number": 176123123,
"annual_income": 1200,
"product_of_interest": "Terminals",
"first_name": "Anton",
"last_name": "Kerner",
"phone_number": "49176123123",
"email_address": "[email protected]",
"customer_probability": 0.32,
"life_time_value": 20000
"customer_probability": 0.9,
"life_time_value": 3400.23
}
]
}
49 changes: 49 additions & 0 deletions src/database/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

from enum import Enum, IntEnum
from typing import List, Optional

from pydantic import BaseModel, EmailStr, Field


class AnnualIncome(IntEnum):
Nothing = 0 # 0€
Class1 = 1 # (0€, 35000€]
Class2 = 35001 # (35000€, 60000€]
Class3 = 60001 # (60000€, 100000€]
Class4 = 100001 # (100000€, 200000€]
Class5 = 200001 # (200000€, 400000€]
Class6 = 400001 # (400000€, 600000€]
Class7 = 600001 # (600000€, 1000000€]
Class8 = 1000001 # (1000000€, 2000000€]
Class9 = 2000001 # (2000000€, 5000000€]
Class10 = 5000001 # (5000000€, inf€]


class ProductOfInterest(str, Enum):
Nothing = "Nothing"
Terminals = "Terminals"
CashRegisterSystem = "Cash Register System"
BusinessAccount = "Business Account"
All = "All"
Other = "Other"


class LeadValue(BaseModel):
life_time_value: float
customer_probability: float = Field(..., ge=0, le=1)

def get_lead_value(self) -> float:
return self.life_time_value * self.customer_probability


class Lead(BaseModel):
lead_id: int # could be expended to a UUID later
first_name: str
last_name: str
email_address: EmailStr
phone_number: str
annual_income: AnnualIncome
product_of_interest: ProductOfInterest
lead_value: Optional[LeadValue]
43 changes: 43 additions & 0 deletions src/database/parsers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

from typing import Dict

from database.models import AnnualIncome, Lead, LeadValue, ProductOfInterest


class LeadParser:
@staticmethod
def parse_lead_from_dict(data: Dict) -> Lead:
customer_probability = (
data["customer_probability"]
if "customer_probability" in data.keys()
else None
)
life_time_value = (
data["life_time_value"] if "life_time_value" in data.keys() else None
)

if customer_probability is not None and life_time_value is not None:
lead_value = LeadValue(
life_time_value=life_time_value,
customer_probability=customer_probability,
)
else:
lead_value = None

for income_value in AnnualIncome:
annual_income = income_value
if data["annual_income"] < income_value:
break

return Lead(
lead_id=data["lead_id"],
first_name=data["first_name"],
last_name=data["last_name"],
email_address=data["email_address"],
phone_number=data["phone_number"],
annual_income=annual_income,
product_of_interest=ProductOfInterest(data["product_of_interest"]),
lead_value=lead_value,
)
36 changes: 15 additions & 21 deletions src/evp/evp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,43 +5,31 @@
from sklearn.linear_model import LinearRegression

from database import get_database


class LeadValue:
def __init__(
self, lifetime_value: float = 0, customer_probability: float = 0
) -> None:
assert (
0.0 <= customer_probability <= 1.0
), "Probability of becoming a customer must be between 0.0 and 1.0"
self.life_time_value = lifetime_value
self.customer_probability = customer_probability

def get_lead_value(self) -> float:
return self.life_time_value * self.customer_probability
from database.models import LeadValue


class EstimatedValuePredictor:
def __init__(self) -> None:
self.probability_predictor = LinearRegression()
self.life_time_value_predictor = LinearRegression()

data = get_database().get_all_entries()
X = np.random.random((len(data), len(data)))
all_leads = get_database().get_all_leads()
X = np.random.random((len(all_leads), len(all_leads)))
y_probability = np.array(
[item["customer_probability"] for item in data.values()]
[lead.lead_value.customer_probability for lead in all_leads]
)
y_value = np.array([item["customer_probability"] for item in data.values()])
y_value = np.array([lead.lead_value.life_time_value for lead in all_leads])

self.probability_predictor.fit(X, y_probability)
self.life_time_value_predictor.fit(X, y_value)

def estimate_value(self, lead_id) -> LeadValue:
# make call to data base to retrieve relevant fields for this lead
lead_data = get_database().get_entry_by_id(lead_id)
lead = get_database().get_lead_by_id(lead_id)

# preprocess lead_data to get feature vector for our ML model
feature_vector = np.random.random((1, 5))
feature_vector = np.zeros((1, 5))
feature_vector[0][lead.lead_id] = 1.0

# use the models to predict required values
lead_value_pred = self.life_time_value_predictor.predict(feature_vector)
Expand All @@ -50,4 +38,10 @@ def estimate_value(self, lead_id) -> LeadValue:
1 + np.exp(-self.probability_predictor.predict(feature_vector))
)

return LeadValue(lead_value_pred, cust_prob_pred)
lead.lead_value = LeadValue(
life_time_value=lead_value_pred, customer_probability=cust_prob_pred
)
get_database().update_lead(lead)

# might not need to return here if the database is updated by this function
return lead.lead_value
16 changes: 8 additions & 8 deletions src/evp_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,23 @@
from database import get_database
from evp.evp import EstimatedValuePredictor

lead_id = 0
lead_id = 1

lead_data = get_database().get_entry_by_id(lead_id)
lead = get_database().get_lead_by_id(lead_id)

evp = EstimatedValuePredictor()
lead_value = evp.estimate_value(lead_id)

print(
f"""
Dummy prediction for {lead_id=}:
Dummy prediction for lead#{lead.lead_id}:
Data:
{lead_data}
Lead:
{lead}
This lead has a predicted probability of {lead_value.customer_probability} to become a customer.
This lead has a predicted life time value of {lead_value.life_time_value}.
This lead has a predicted probability of {lead_value.customer_probability:.2f} to become a customer.
This lead has a predicted life time value of {lead_value.life_time_value:.2f}.
This results in a total lead value of {lead_value.get_lead_value()}.
This results in a total lead value of {lead_value.get_lead_value():.2f}.
"""
)
File renamed without changes.

0 comments on commit 6b97817

Please sign in to comment.