amosproj · felix-zailskas · Nov 3, 2023 · Oct 26, 2023 · Oct 30, 2023 · Nov 2, 2023
diff --git a/LICENSES/CC-BY-4.0.txt b/LICENSES/CC-BY-4.0.txt
diff --git a/Pipfile b/Pipfile
@@ -11,6 +11,9 @@ pytest = "==7.4.3"
 
 [packages]
 numpy = "==1.26.1"
+scikit-learn = "==1.3.2"
+pydantic = "==2.4.2"
+email-validator = "==2.1.0"
 
 [requires]
 python_version = "3.10"
diff --git a/src/database/__init__.py b/src/database/__init__.py
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>
+
+from .database_dummy import DatabaseDummy
+
+_database = None
+
+
+def get_database() -> DatabaseDummy:
+    global _database
+    if _database is None:
+        _database = DatabaseDummy()
+    return _database
diff --git a/src/database/database_dummy.py b/src/database/database_dummy.py
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>
+
+import json
+from typing import List
+
+from database.models import Lead
+from database.parsers import LeadParser
+
+
+class DatabaseDummy:
+    def __init__(self) -> None:
+        with open("src/database/dummy_leads.json") as f:
+            json_data = json.load(f)["training_leads"]
+            self.data = {d["lead_id"]: d for d in json_data}
+
+    def get_lead_by_id(self, id_: int) -> Lead:
+        return LeadParser.parse_lead_from_dict(self.data[id_])
+
+    def get_all_leads(self) -> List[Lead]:
+        leads = []
+        for entry in self.data.values():
+            leads.append(LeadParser.parse_lead_from_dict(entry))
+        return leads
+
+    def update_lead(self, lead: Lead):
+        print(f"Updating database entry for lead#{lead.lead_id}")
+        print(f"Update values: {lead}")
diff --git a/src/database/dummy_leads.json b/src/database/dummy_leads.json
@@ -0,0 +1,59 @@
+{
+  "training_leads": [
+    {
+      "lead_id": 0,
+      "annual_income": 25000,
+      "product_of_interest": "Terminals",
+      "first_name": "Anton",
+      "last_name": "Kerner",
+      "phone_number": "49176123123",
+      "email_address": "[email protected]",
+      "customer_probability": 0.1,
+      "life_time_value": 400000
+    },
+    {
+      "lead_id": 1,
+      "annual_income": 70000,
+      "product_of_interest": "Terminals",
+      "first_name": "Anton",
+      "last_name": "Kerner",
+      "phone_number": "49176123123",
+      "email_address": "[email protected]",
+      "customer_probability": 0.4,
+      "life_time_value": 40000
+    },
+    {
+      "lead_id": 2,
+      "annual_income": 15000,
+      "product_of_interest": "Terminals",
+      "first_name": "Anton",
+      "last_name": "Kerner",
+      "phone_number": "49176123123",
+      "email_address": "[email protected]",
+      "customer_probability": 0.8,
+      "life_time_value": 40000
+    },
+    {
+      "lead_id": 3,
+      "annual_income": 2500000,
+      "product_of_interest": "Terminals",
+      "first_name": "Anton",
+      "last_name": "Kerner",
+      "phone_number": "49176123123",
+      "email_address": "[email protected]",
+      "customer_probability": 0.08,
+      "life_time_value": 400000
+    },
+    {
+      "lead_id": 4,
+      "annual_income": 1200,
+      "product_of_interest": "Terminals",
+      "first_name": "Anton",
+      "last_name": "Kerner",
+      "phone_number": "49176123123",
+      "email_address": "[email protected]",
+      "customer_probability": 0.9,
+      "life_time_value": 3400.23
+    }
+  ]
+}
diff --git a/src/database/dummy_leads.json.license b/src/database/dummy_leads.json.license
@@ -0,0 +1,2 @@
+SPDX-License-Identifier: CC-BY-4.0
+SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>
diff --git a/src/database/models.py b/src/database/models.py
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>
+
+from enum import Enum, IntEnum
+from typing import List, Optional
+
+from pydantic import BaseModel, EmailStr, Field
+
+
+class AnnualIncome(IntEnum):
+    Nothing = 0  # 0€
+    Class1 = 1  # (0€, 35000€]
+    Class2 = 35001  # (35000€, 60000€]
+    Class3 = 60001  # (60000€, 100000€]
+    Class4 = 100001  # (100000€, 200000€]
+    Class5 = 200001  # (200000€, 400000€]
+    Class6 = 400001  # (400000€, 600000€]
+    Class7 = 600001  # (600000€, 1000000€]
+    Class8 = 1000001  # (1000000€, 2000000€]
+    Class9 = 2000001  # (2000000€, 5000000€]
+    Class10 = 5000001  # (5000000€, inf€]
+
+
+class ProductOfInterest(str, Enum):
+    Nothing = "Nothing"
+    Terminals = "Terminals"
+    CashRegisterSystem = "Cash Register System"
+    BusinessAccount = "Business Account"
+    All = "All"
+    Other = "Other"
+
+
+class LeadValue(BaseModel):
+    life_time_value: float
+    customer_probability: float = Field(..., ge=0, le=1)
+
+    def get_lead_value(self) -> float:
+        return self.life_time_value * self.customer_probability
+
+
+class Lead(BaseModel):
+    lead_id: int  # could be expended to a UUID later
+    first_name: str
+    last_name: str
+    email_address: EmailStr
+    phone_number: str
+    annual_income: AnnualIncome
+    product_of_interest: ProductOfInterest
+    lead_value: Optional[LeadValue]
diff --git a/src/database/parsers.py b/src/database/parsers.py
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>
+
+from typing import Dict
+
+from database.models import AnnualIncome, Lead, LeadValue, ProductOfInterest
+
+
+class LeadParser:
+    @staticmethod
+    def parse_lead_from_dict(data: Dict) -> Lead:
+        customer_probability = (
+            data["customer_probability"]
+            if "customer_probability" in data.keys()
+            else None
+        )
+        life_time_value = (
+            data["life_time_value"] if "life_time_value" in data.keys() else None
+        )
+
+        if customer_probability is not None and life_time_value is not None:
+            lead_value = LeadValue(
+                life_time_value=life_time_value,
+                customer_probability=customer_probability,
+            )
+        else:
+            lead_value = None
+
+        for income_value in AnnualIncome:
+            annual_income = income_value
+            if data["annual_income"] < income_value:
+                break
+
+        return Lead(
+            lead_id=data["lead_id"],
+            first_name=data["first_name"],
+            last_name=data["last_name"],
+            email_address=data["email_address"],
+            phone_number=data["phone_number"],
+            annual_income=annual_income,
+            product_of_interest=ProductOfInterest(data["product_of_interest"]),
+            lead_value=lead_value,
+        )
diff --git a/src/evp/__init__.py b/src/evp/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>
diff --git a/src/evp/evp.py b/src/evp/evp.py
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>
+
+import numpy as np
+from sklearn.linear_model import LinearRegression
+
+from database import get_database
+from database.models import LeadValue
+
+
+class EstimatedValuePredictor:
+    def __init__(self) -> None:
+        self.probability_predictor = LinearRegression()
+        self.life_time_value_predictor = LinearRegression()
+
+        all_leads = get_database().get_all_leads()
+        X = np.random.random((len(all_leads), len(all_leads)))
+        y_probability = np.array(
+            [lead.lead_value.customer_probability for lead in all_leads]
+        )
+        y_value = np.array([lead.lead_value.life_time_value for lead in all_leads])
+
+        self.probability_predictor.fit(X, y_probability)
+        self.life_time_value_predictor.fit(X, y_value)
+
+    def estimate_value(self, lead_id) -> LeadValue:
+        # make call to data base to retrieve relevant fields for this lead
+        lead = get_database().get_lead_by_id(lead_id)
+
+        # preprocess lead_data to get feature vector for our ML model
+        feature_vector = np.zeros((1, 5))
+        feature_vector[0][lead.lead_id] = 1.0
+
+        # use the models to predict required values
+        lead_value_pred = self.life_time_value_predictor.predict(feature_vector)
+        # manually applying sigmoid to ensure value in range 0, 1
+        cust_prob_pred = 1 / (
+            1 + np.exp(-self.probability_predictor.predict(feature_vector))
+        )
+
+        lead.lead_value = LeadValue(
+            life_time_value=lead_value_pred, customer_probability=cust_prob_pred
+        )
+        get_database().update_lead(lead)
+
+        # might not need to return here if the database is updated by this function
+        return lead.lead_value
diff --git a/src/evp_demo.py b/src/evp_demo.py
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2023 Felix Zailskas <[email protected]>
+
+from database import get_database
+from evp.evp import EstimatedValuePredictor
+
+lead_id = 1
+
+lead = get_database().get_lead_by_id(lead_id)
+
+evp = EstimatedValuePredictor()
+lead_value = evp.estimate_value(lead_id)
+
+print(
+    f"""
+    Dummy prediction for lead#{lead.lead_id}:
+
+    Lead:
+    {lead}
+
+    This lead has a predicted probability of {lead_value.customer_probability:.2f} to become a customer.
+    This lead has a predicted life time value of {lead_value.life_time_value:.2f}.
+
+    This results in a total lead value of {lead_value.get_lead_value():.2f}.
+"""
+)
diff --git a/src/test_dummy.py → tests/test_dummy.py b/src/test_dummy.py → tests/test_dummy.py