Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented the EVP skeleton class. Issue #22 #39

Merged
merged 7 commits into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 156 additions & 0 deletions LICENSES/CC-BY-4.0.txt

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ pytest = "==7.4.3"

[packages]
numpy = "==1.26.1"
scikit-learn = "==1.3.2"
pydantic = "==2.4.2"
email-validator = "==2.1.0"

[requires]
python_version = "3.10"
13 changes: 13 additions & 0 deletions src/database/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

from .database_dummy import DatabaseDummy

_database = None


def get_database() -> DatabaseDummy:
global _database
if _database is None:
_database = DatabaseDummy()
return _database
28 changes: 28 additions & 0 deletions src/database/database_dummy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

import json
from typing import List

from database.models import Lead
from database.parsers import LeadParser


class DatabaseDummy:
def __init__(self) -> None:
with open("src/database/dummy_leads.json") as f:
json_data = json.load(f)["training_leads"]
self.data = {d["lead_id"]: d for d in json_data}

def get_lead_by_id(self, id_: int) -> Lead:
return LeadParser.parse_lead_from_dict(self.data[id_])

def get_all_leads(self) -> List[Lead]:
leads = []
for entry in self.data.values():
leads.append(LeadParser.parse_lead_from_dict(entry))
return leads

def update_lead(self, lead: Lead):
print(f"Updating database entry for lead#{lead.lead_id}")
print(f"Update values: {lead}")
59 changes: 59 additions & 0 deletions src/database/dummy_leads.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
"training_leads": [
{
"lead_id": 0,
"annual_income": 25000,
"product_of_interest": "Terminals",
"first_name": "Anton",
"last_name": "Kerner",
"phone_number": "49176123123",
"email_address": "[email protected]",
"customer_probability": 0.1,
"life_time_value": 400000
},
{
"lead_id": 1,
"annual_income": 70000,
"product_of_interest": "Terminals",
"first_name": "Anton",
"last_name": "Kerner",
"phone_number": "49176123123",
"email_address": "[email protected]",
"customer_probability": 0.4,
"life_time_value": 40000
},
{
"lead_id": 2,
"annual_income": 15000,
"product_of_interest": "Terminals",
"first_name": "Anton",
"last_name": "Kerner",
"phone_number": "49176123123",
"email_address": "[email protected]",
"customer_probability": 0.8,
"life_time_value": 40000
},
{
"lead_id": 3,
"annual_income": 2500000,
"product_of_interest": "Terminals",
"first_name": "Anton",
"last_name": "Kerner",
"phone_number": "49176123123",
"email_address": "[email protected]",
"customer_probability": 0.08,
"life_time_value": 400000
},
{
"lead_id": 4,
"annual_income": 1200,
"product_of_interest": "Terminals",
"first_name": "Anton",
"last_name": "Kerner",
"phone_number": "49176123123",
"email_address": "[email protected]",
"customer_probability": 0.9,
"life_time_value": 3400.23
}
]
}
2 changes: 2 additions & 0 deletions src/database/dummy_leads.json.license
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
SPDX-License-Identifier: CC-BY-4.0
SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>
49 changes: 49 additions & 0 deletions src/database/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

from enum import Enum, IntEnum
from typing import List, Optional

from pydantic import BaseModel, EmailStr, Field


class AnnualIncome(IntEnum):
Nothing = 0 # 0€
Class1 = 1 # (0€, 35000€]
Class2 = 35001 # (35000€, 60000€]
Class3 = 60001 # (60000€, 100000€]
Class4 = 100001 # (100000€, 200000€]
Class5 = 200001 # (200000€, 400000€]
Class6 = 400001 # (400000€, 600000€]
Class7 = 600001 # (600000€, 1000000€]
Class8 = 1000001 # (1000000€, 2000000€]
Class9 = 2000001 # (2000000€, 5000000€]
Class10 = 5000001 # (5000000€, inf€]


class ProductOfInterest(str, Enum):
Nothing = "Nothing"
Terminals = "Terminals"
CashRegisterSystem = "Cash Register System"
BusinessAccount = "Business Account"
All = "All"
Other = "Other"


class LeadValue(BaseModel):
life_time_value: float
customer_probability: float = Field(..., ge=0, le=1)

def get_lead_value(self) -> float:
return self.life_time_value * self.customer_probability


class Lead(BaseModel):
lead_id: int # could be expended to a UUID later
first_name: str
last_name: str
email_address: EmailStr
phone_number: str
annual_income: AnnualIncome
product_of_interest: ProductOfInterest
lead_value: Optional[LeadValue]
43 changes: 43 additions & 0 deletions src/database/parsers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

from typing import Dict

from database.models import AnnualIncome, Lead, LeadValue, ProductOfInterest


class LeadParser:
@staticmethod
def parse_lead_from_dict(data: Dict) -> Lead:
customer_probability = (
data["customer_probability"]
if "customer_probability" in data.keys()
else None
)
life_time_value = (
data["life_time_value"] if "life_time_value" in data.keys() else None
)

if customer_probability is not None and life_time_value is not None:
lead_value = LeadValue(
life_time_value=life_time_value,
customer_probability=customer_probability,
)
else:
lead_value = None

for income_value in AnnualIncome:
annual_income = income_value
if data["annual_income"] < income_value:
luccalb marked this conversation as resolved.
Show resolved Hide resolved
break

return Lead(
lead_id=data["lead_id"],
first_name=data["first_name"],
last_name=data["last_name"],
email_address=data["email_address"],
phone_number=data["phone_number"],
annual_income=annual_income,
product_of_interest=ProductOfInterest(data["product_of_interest"]),
lead_value=lead_value,
)
2 changes: 2 additions & 0 deletions src/evp/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>
47 changes: 47 additions & 0 deletions src/evp/evp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

import numpy as np
from sklearn.linear_model import LinearRegression

from database import get_database
from database.models import LeadValue


class EstimatedValuePredictor:
def __init__(self) -> None:
self.probability_predictor = LinearRegression()
self.life_time_value_predictor = LinearRegression()

all_leads = get_database().get_all_leads()
X = np.random.random((len(all_leads), len(all_leads)))
y_probability = np.array(
[lead.lead_value.customer_probability for lead in all_leads]
)
y_value = np.array([lead.lead_value.life_time_value for lead in all_leads])

self.probability_predictor.fit(X, y_probability)
self.life_time_value_predictor.fit(X, y_value)

def estimate_value(self, lead_id) -> LeadValue:
# make call to data base to retrieve relevant fields for this lead
lead = get_database().get_lead_by_id(lead_id)

# preprocess lead_data to get feature vector for our ML model
feature_vector = np.zeros((1, 5))
feature_vector[0][lead.lead_id] = 1.0

# use the models to predict required values
lead_value_pred = self.life_time_value_predictor.predict(feature_vector)
# manually applying sigmoid to ensure value in range 0, 1
cust_prob_pred = 1 / (
1 + np.exp(-self.probability_predictor.predict(feature_vector))
)

lead.lead_value = LeadValue(
life_time_value=lead_value_pred, customer_probability=cust_prob_pred
)
get_database().update_lead(lead)

# might not need to return here if the database is updated by this function
return lead.lead_value
26 changes: 26 additions & 0 deletions src/evp_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

from database import get_database
from evp.evp import EstimatedValuePredictor

lead_id = 1

lead = get_database().get_lead_by_id(lead_id)

evp = EstimatedValuePredictor()
lead_value = evp.estimate_value(lead_id)

print(
f"""
Dummy prediction for lead#{lead.lead_id}:

Lead:
{lead}

This lead has a predicted probability of {lead_value.customer_probability:.2f} to become a customer.
This lead has a predicted life time value of {lead_value.life_time_value:.2f}.

This results in a total lead value of {lead_value.get_lead_value():.2f}.
"""
)
File renamed without changes.