wikimedia · mercelisvaughan · Mar 25, 2024 · Mar 20, 2024 · Mar 20, 2024 · Mar 20, 2024
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -48,7 +48,7 @@ coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
-.pytest_cache/
+.pytest_cache
 cover/
 
 # Translations
@@ -158,3 +158,9 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+# macOS
+.DS_Store
+
+# Visual Studio Code
+.vscode/
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,5 @@
+{
+    "python.analysis.extraPaths": [
+        "./examples"
+    ]
+}
diff --git a/README.md b/README.md
@@ -1,12 +1,46 @@
 # liftwing-python
+<h1>LiftWing-python</h1>
 
-This project involves building a python package that can act as a model registry for the machine learning models deployed on Lift Wing. A model registry acts as the source of truth for the deployed models and their versions offering two main benefits:
-* Versioning and tracking of models: this allows an easier access to model version and tracking
-* Collaboration and reproducibility: in order to download a model the user only needs to interact with the registry.
-Implementation Proposal A python package that allows that has different install options according to the model as each model server has different package requirements. The user, after installing the package, will be able to load a Lift Wing model and make predictions. Taking into consideration the short duration of the internship as well as the fact that we want the person to get to know the Wikimedia community, our way of working as well as get the chance to study and dive into technical topics, the package will first deal with 1-2 models in order to create a complete proof of concept for this work. Also, to avoid blocking this work by other systems/factors or permissions it will be based on our public interfaces: The python package will have a repository on GitHub with CI/CD setup using Github Actions that will automatically upload the python package to the PyPI repository. Models for the packages will be fetched by the public analytics repository https://analytics.wikimedia.org/published/wmf-ml-models/
-There will be two modes of operation for each model:
-* Offline: the user can download and load the model and start making predictions with it. This is particularly useful for experimentation or in the case when someone wants to make a big number of batch requests that would otherwise fail due to rate limiting.
-* Online: The user can make requests to the public APIs (Lift Wing API Gateway) using the package as a client.
-Notes/Considerations:
-* We would have to figure out a (nice) way to integrate this with the deployment charts repo in order to get the model version we need to deploy.
-* Model’s python dependencies: Each model has been developed separately and may require different python libraries and versions. This means that the python package should have different installation options which will reflect the dependencies of a specific model.
+This is a Python package that acts as a client and allows users to make requests to the LiftWing API.
+Its purpose is to allow users to interact with the API by writing python code instead of manipulating HTTP requests.
+
+Below is an example of how to make a request. This specific request is being made to the revert_risk API
+
+import json
+import requests
+
+def revert_risk_api_request(language: str, revision_id: int):
+    """
+    This function makes a request to the RevertRisk API. It takes in two parameters, language and revision_id. Language is the language of the wiki article.
+    revision_id is the specific version of the article.
+    """
+    if language is None or revision_id is None:
+            raise ValueError("Both 'language' and 'revision_id' parameters are required.") # this checks if there is a language and rev_id, if not an error is thrown
+
+
+    use_auth = False
+    inference_url = f"https://api.wikimedia.org/service/lw/inference/v1/models/{language}-reverted:predict" # this is the API endpoint
+    if use_auth:
+        headers = {
+            'Authorization': f'Bearer {access_token}', 
+            'User-Agent': user_agent,
+            'Content-type': 'application/json'
+        } # headers is a dictionary used to make a HTTP request
+    else:
+        headers = {}
+
+    data = {"rev_id": revision_id}
+    response = requests.post(inference_url, headers=headers, data=json.dumps(data)) # POST request is being made 
+    if response.status_code == 200:
+        return response.json() # request was successful so return the response
+    else:
+        response.status_code == 400
+        raise ValueError(f"Unexpected error occurred: {response.status_code}")
+
+
+language = "viwiki" # language has to be in this format, enwiki, arwiki etc...
+revision_id = 12345 # rev_id has to be a valid integer, different rev_id gives different response
+
+response = revert_risk_api_request(language, revision_id)
+
+print(response)
diff --git a/examples/__init__.py b/examples/__init__.py
diff --git a/examples/revertrisk_examples.py b/examples/revertrisk_examples.py
@@ -0,0 +1,30 @@
+import json
+import requests
+
+def revert_risk_api_request(language: str, revision_id: int):
+    use_auth = False
+    inference_url = f"https://api.wikimedia.org/service/lw/inference/v1/models/{language}-reverted:predict"
+    if use_auth:
+        headers = {
+            'Authorization': f'Bearer {access_token}',
+            'User-Agent': user_agent,
+            'Content-type': 'application/json'
+        }
+    else:
+        headers = {}
+
+    data = {"rev_id": revision_id}
+    response = requests.post(inference_url, headers=headers, data=json.dumps(data))
+    if response.status_code == 200:
+        return response.json()
+    else:
+        response.status_code == 400
+        raise ValueError(f"Unexpected error occurred: {response.status_code}")
+
+
+language = "viwiki"
+revision_id = 12345
+
+response = revert_risk_api_request(language, revision_id)
+
+print(response)
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,12 @@
+[tool.poetry]
+name = "liftwing-python"
+version = "0.1.0"
+description = "Users will be able to retrieve JSON responses from their respective wiki APIs"
+
+[tool.poetry.dependencies]
+python = "^3.10 || ^3.11 || ^3.12"
+requests = "^2.0"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
diff --git a/src/liftwing_api/__init__.py b/src/liftwing_api/__init__.py
diff --git a/src/liftwing_api/models/__init__.py b/src/liftwing_api/models/__init__.py
diff --git a/src/liftwing_api/models/langid.py b/src/liftwing_api/models/langid.py
@@ -0,0 +1,64 @@
+# Python 3
+
+import json
+import requests
+
+use_auth = False
+inference_url = 'https://api.wikimedia.org/service/lw/inference/v1/models/langid:predict'
+
+if use_auth:
+  headers: {
+      'Authorization': 'Bearer YOUR_ACCESS_TOKEN',
+      'User-Agent': 'YOUR_APP_NAME (YOUR_EMAIL_OR_CONTACT_PAGE)',
+      'Content-type': 'application/json'
+  }
+else:
+  headers = {}
+data = {"text": "Some sample text in any language that we want to identify"}
+response = requests.post(inference_url, headers=headers, data=json.dumps(data))
+print(response.json())
+
+import json
+import requests
+from liftwing_model import LiftwingModel
+
+class Revscoring(LiftwingModel):
+
+    def __init__(self, base_url="https://api.wikimedia.org/service/lw/inference/v1/models/{language}-reverted:predict"):
+        super().__init__(base_url)
+        # base url is super because every class that inherits this from the base model will be using it 
+
+    def request_to_revScoringAPI(self, language: str, revision_id: int):
+        """
+        This function makes a POST request to https://api.wikimedia.org/service/lw/inference/v1/models/{language}-goodfaith:predict
+        using the language parameter and returns a JSON
+        """
+        if language is None or revision_id is None:
+            raise ValueError("Both 'language' and 'revision_id' parameters are required.")
+
+        use_auth = False
+        inference_url = f"https://api.wikimedia.org/service/lw/inference/v1/models/{language}-goodfaith:predict"
+
+        if use_auth:
+            headers = {
+                'Authorization': f'Bearer {self.access_token}',  # Assuming access_token is an attribute of class
+                'User-Agent': self.user_agent,  # Assuming user_agent is an attribute of class
+                'Content-type': 'application/json'
+            }
+        else:
+            headers = {}
+
+        data = {"rev_id": revision_id}
+        response = requests.post(inference_url, headers=headers, data=json.dumps(data))
+
+        if response.status_code == 200:
+            return response.json()
+        else:
+            response.status_code == 400
+            raise ValueError(f"Unexpected error occurred: {response.status_code}")
+
+rev = Revscoring()
+
+jsonresponse = rev.request_to_revScoringAPI(language="arwiki", revision_id=12345)
+
+print(jsonresponse)
diff --git a/src/liftwing_api/models/liftwing_model.py b/src/liftwing_api/models/liftwing_model.py
@@ -0,0 +1,12 @@
+import requests
+
+class LiftwingModel:
+    def __init__(self, base_url):
+        self.base_url = base_url
+        # this base url will be used across every model
+
+    def request(self, endpoint, method="POST"):
+        # this method will make a request and return a json response
+        #endpoint = base_url
+        #return response.json()
+        return ""
diff --git a/src/liftwing_api/models/readability.py b/src/liftwing_api/models/readability.py
@@ -0,0 +1,43 @@
+import requests
+import json
+from liftwing_model import LiftwingModel
+
+class ReadabilityModel(LiftwingModel):
+    def __init__(self, base_url="https://api.wikimedia.org/service/lw/inference/v1/models/readability:predict"):
+        super().__init__(base_url)
+        # base url is super because every class that inherits this from the base model will be using it 
+
+    def request_to_readabilityAPI(self, revision_id: int, language: str):
+        """
+        This function makes a POST request to https://api.wikimedia.org/service/lw/inference/v1/models/readability:predict
+        using the language parameter and returns a JSON
+        """
+        if language is None or revision_id is None:
+            raise ValueError("Both 'language' and 'revision_id' parameters are required.")
+
+        use_auth = False
+        inference_url = f"https://api.wikimedia.org/service/lw/inference/v1/models/readability:predict"
+
+        if use_auth:
+            headers = {
+                'Authorization': f'Bearer {self.access_token}',  # Assuming access_token is an attribute of class
+                'User-Agent': self.user_agent,  # Assuming user_agent is an attribute of class
+                'Content-type': 'application/json'
+            }
+        else:
+            headers = {}
+
+        data = {"rev_id": revision_id}
+        response = requests.post(inference_url, headers=headers, data=json.dumps(data))
+
+        if response.status_code == 200:
+            return response.json()
+        else:
+            response.status_code == 400
+            raise ValueError(f"Unexpected error occurred: {response.status_code}")
+
+readability = ReadabilityModel()
+
+jsonresponse = readability.request_to_readabilityAPI("rev_id": 123456, "lang": "en")
+
+print(jsonresponse)
diff --git a/src/liftwing_api/models/revertrisk.py b/src/liftwing_api/models/revertrisk.py
@@ -0,0 +1,43 @@
+import requests
+import json
+from liftwing_model import LiftwingModel
+
+class RevertRiskAPIModel(LiftwingModel):
+    def __init__(self, base_url="https://api.wikimedia.org/service/lw/inference/v1/models/{language}-reverted:predict"):
+        super().__init__(base_url)
+        # base url is super because every class that inherits this from the base model will be using it 
+
+    def request_to_revertRiskAPI(self, language: str, revision_id: int):
+        """
+        This function makes a POST request to https://api.wikimedia.org/service/lw/inference/v1/models/{language}-reverted:predict
+        using the language parameter and returns a JSON
+        """
+        if language is None or revision_id is None:
+            raise ValueError("Both 'language' and 'revision_id' parameters are required.")
+
+        use_auth = False
+        inference_url = f"https://api.wikimedia.org/service/lw/inference/v1/models/{language}-reverted:predict"
+
+        if use_auth:
+            headers = {
+                'Authorization': f'Bearer {self.access_token}',  # Assuming access_token is an attribute of class
+                'User-Agent': self.user_agent,  # Assuming user_agent is an attribute of class
+                'Content-type': 'application/json'
+            }
+        else:
+            headers = {}
+
+        data = {"rev_id": revision_id}
+        response = requests.post(inference_url, headers=headers, data=json.dumps(data))
+
+        if response.status_code == 200:
+            return response.json()
+        else:
+            response.status_code == 400
+            raise ValueError(f"Unexpected error occurred: {response.status_code}")
+
+revertRisk = RevertRiskAPIModel()
+
+jsonresponse = revertRisk.request_to_revertRiskAPI(language="viwiki", revision_id=12345)
+
+print(jsonresponse)
diff --git a/src/liftwing_api/models/revscoring.py b/src/liftwing_api/models/revscoring.py
@@ -0,0 +1,44 @@
+import json
+import requests
+from liftwing_model import LiftwingModel
+
+class Revscoring(LiftwingModel):
+
+    def __init__(self, base_url="https://api.wikimedia.org/service/lw/inference/v1/models/{language}-reverted:predict"):
+        super().__init__(base_url)
+        # base url is super because every class that inherits this from the base model will be using it 
+
+    def request_to_revScoringAPI(self, language: str, revision_id: int):
+        """
+        This function makes a POST request to https://api.wikimedia.org/service/lw/inference/v1/models/{language}-goodfaith:predict
+        using the language parameter and returns a JSON
+        """
+        if language is None or revision_id is None:
+            raise ValueError("Both 'language' and 'revision_id' parameters are required.")
+
+        use_auth = False
+        inference_url = f"https://api.wikimedia.org/service/lw/inference/v1/models/{language}-goodfaith:predict"
+
+        if use_auth:
+            headers = {
+                'Authorization': f'Bearer {self.access_token}',  # Assuming access_token is an attribute of class
+                'User-Agent': self.user_agent,  # Assuming user_agent is an attribute of class
+                'Content-type': 'application/json'
+            }
+        else:
+            headers = {}
+
+        data = {"rev_id": revision_id}
+        response = requests.post(inference_url, headers=headers, data=json.dumps(data))
+
+        if response.status_code == 200:
+            return response.json()
+        else:
+            response.status_code == 400
+            raise ValueError(f"Unexpected error occurred: {response.status_code}")
+
+rev = Revscoring()
+
+jsonresponse = rev.request_to_revScoringAPI(language="arwiki", revision_id=12345)
+
+print(jsonresponse)
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/models/revertriskTest.py b/tests/models/revertriskTest.py
@@ -0,0 +1,12 @@
+import unittest
+from unittest.mock import patch
+from src.liftwing_api.models import revertrisk
+
+@patch('examples.revertrisk_examples.requests.post')
+def test_revert_risk_example_test_200(self, mock_post):
+    mock_post.return_value.status_code = 200
+    mock_post.return_value.json.return_value = {'key': 'value'}
+
+    expectedResult = ("enwiki", 12345)
+
+    self.assertEqual(expectedResult, {'key': 'value'})
diff --git a/tests/revertrisk_examplestest.py b/tests/revertrisk_examplestest.py
@@ -0,0 +1,36 @@
+import unittest
+from unittest.mock import patch
+from examples.revertrisk_examples import revert_risk_api_request
+
+
+class RevertRisk_ExamplesTest(unittest.TestCase):
+
+    @patch('examples.revertrisk_examples.requests.post')
+    def test_revert_risk_example_test_200(self, mock_post):
+        mock_post.return_value.status_code = 200
+        mock_post.return_value.json.return_value = {'key': 'value'}
+
+        expectedResult = revert_risk_api_request("en", 12345)
+
+        self.assertEqual(expectedResult, {'key': 'value'})
+
+    @patch('examples.revertrisk_examples.requests.post')
+    def test_revert_risk_api_request_failure(self, mock_post):
+        mock_post.return_value.status_code = 400
+
+        with self.assertRaises(ValueError):
+            revert_risk_api_request("en", 12345)
+
+    @patch('examples.revertrisk_examples.requests.post')
+    def test_revert_risk_api_request_empty_response(self, mock_post):
+        # mocks a 200 response
+        mock_post.return_value.status_code = 200
+        # mocks an empty response
+        mock_post.return_value.json.return_value = {}
+        # result is the json response
+        result = revert_risk_api_request("en", 12345)
+        #check if response is empty
+        self.assertEqual(result, {})
+
+if __name__ == '__main__':
+    unittest.main()