Skip to content

Commit

Permalink
Created a small skeleton on how the EVP might be interacted with. Add…
Browse files Browse the repository at this point in the history
…ed some dummy data for example purposes. #22

Signed-off-by: Felix Zailskas <[email protected]>
  • Loading branch information
felix-zailskas authored and rbbozkurt committed Nov 5, 2023
1 parent a0039d9 commit a5d2345
Show file tree
Hide file tree
Showing 9 changed files with 330 additions and 0 deletions.
156 changes: 156 additions & 0 deletions LICENSES/CC-BY-4.0.txt

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ name = "pypi"
[dev-packages]

[packages]
numpy = "==1.26.1"
scikit-learn = "==1.3.2"

[requires]
python_version = "3.10"
13 changes: 13 additions & 0 deletions src/database/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

from .database_dummy import DatabaseDummy

_database = None


def get_database() -> DatabaseDummy:
global _database
if _database is None:
_database = DatabaseDummy()
return _database
17 changes: 17 additions & 0 deletions src/database/database_dummy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

import json


class DatabaseDummy:
def __init__(self) -> None:
with open("src/database/dummy_leads.json") as f:
json_data = json.load(f)["training_leads"]
self.data = {d["lead_id"]: d for d in json_data}

def get_entry_by_id(self, id_: int) -> dict:
return self.data[id_]

def get_all_entries(self):
return self.data
59 changes: 59 additions & 0 deletions src/database/dummy_leads.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
"training_leads": [
{
"lead_id": 0,
"company_name": "test_company",
"first_name": "test_first",
"last_name": "test_last",
"country_code": "DE",
"phone_number": 176123123,
"email_address": "[email protected]",
"customer_probability": 0.1,
"life_time_value": 400000
},
{
"lead_id": 1,
"company_name": "test_company",
"first_name": "test_first",
"last_name": "test_last",
"country_code": "DE",
"phone_number": 176123123,
"email_address": "[email protected]",
"customer_probability": 0.9,
"life_time_value": 1000
},
{
"lead_id": 2,
"company_name": "test_company",
"first_name": "test_first",
"last_name": "test_last",
"country_code": "DE",
"phone_number": 176123123,
"email_address": "[email protected]",
"customer_probability": 0.7,
"life_time_value": 3500
},
{
"lead_id": 3,
"company_name": "test_company",
"first_name": "test_first",
"last_name": "test_last",
"country_code": "DE",
"phone_number": 176123123,
"email_address": "[email protected]",
"customer_probability": 0.4,
"life_time_value": 10000
},
{
"lead_id": 4,
"company_name": "test_company",
"first_name": "test_first",
"last_name": "test_last",
"country_code": "DE",
"phone_number": 176123123,
"email_address": "[email protected]",
"customer_probability": 0.32,
"life_time_value": 20000
}
]
}
2 changes: 2 additions & 0 deletions src/database/dummy_leads.json.license
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
SPDX-License-Identifier: CC-BY-4.0
SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>
2 changes: 2 additions & 0 deletions src/evp/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>
53 changes: 53 additions & 0 deletions src/evp/evp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

import numpy as np
from sklearn.linear_model import LinearRegression

from database import get_database


class LeadValue:
def __init__(
self, lifetime_value: float = 0, customer_probability: float = 0
) -> None:
assert (
0.0 <= customer_probability <= 1.0
), "Probability of becoming a customer must be between 0.0 and 1.0"
self.life_time_value = lifetime_value
self.customer_probability = customer_probability

def get_lead_value(self) -> float:
return self.life_time_value * self.customer_probability


class EstimatedValuePredictor:
def __init__(self) -> None:
self.probability_predictor = LinearRegression()
self.life_time_value_predictor = LinearRegression()

data = get_database().get_all_entries()
X = np.random.random((len(data), len(data)))
y_probability = np.array(
[item["customer_probability"] for item in data.values()]
)
y_value = np.array([item["customer_probability"] for item in data.values()])

self.probability_predictor.fit(X, y_probability)
self.life_time_value_predictor.fit(X, y_value)

def estimate_value(self, lead_id) -> LeadValue:
# make call to data base to retrieve relevant fields for this lead
lead_data = get_database().get_entry_by_id(lead_id)

# preprocess lead_data to get feature vector for our ML model
feature_vector = np.random.random((1, 5))

# use the models to predict required values
lead_value_pred = self.life_time_value_predictor.predict(feature_vector)
# manually applying sigmoid to ensure value in range 0, 1
cust_prob_pred = 1 / (
1 + np.exp(-self.probability_predictor.predict(feature_vector))
)

return LeadValue(lead_value_pred, cust_prob_pred)
26 changes: 26 additions & 0 deletions src/evp_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>

from database import get_database
from evp.evp import EstimatedValuePredictor

lead_id = 0

lead_data = get_database().get_entry_by_id(lead_id)

evp = EstimatedValuePredictor()
lead_value = evp.estimate_value(lead_id)

print(
f"""
Dummy prediction for {lead_id=}:
Data:
{lead_data}
This lead has a predicted probability of {lead_value.customer_probability} to become a customer.
This lead has a predicted life time value of {lead_value.life_time_value}.
This results in a total lead value of {lead_value.get_lead_value()}.
"""
)

0 comments on commit a5d2345

Please sign in to comment.