Merge branch 'develop'

sbettid · Jul 28, 2023 · ac9ddbd · ac9ddbd
2 parents 5df961a + b61212c
commit ac9ddbd
Show file tree

Hide file tree

Showing 19 changed files with 612 additions and 88 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -0,0 +1,29 @@
+# This workflow will install Python dependencies and run tests
+
+name: GPSClean test runner
+
+on:
+  push:
+    branches: [ "develop", "master" ]
+  pull_request:
+    branches: [ "develop", "master" ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python 3.9
+      uses: actions/setup-python@v3
+      with:
+        python-version: "3.9"
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install flake8 pytest
+        if [ -f requirements.txt ]; then pip install --extra-index-url https://google-coral.github.io/py-repo/ -r requirements.txt; fi
+    - name: Test with pytest
+      run: |
+        pytest
diff --git a/README.md b/README.md
@@ -30,7 +30,7 @@ The second pre-processing step is the calculation of the deltas between two cons
 
 ### Error detection
 
-The error detection step is based on a previously trained machine learning model. The model, based on a neural network composed by Bidirectional LSTM (Long-Short Term Memory) cells, was trained on a set of annotated GPS traces, namely traces on which the errors were explicitly marked using an ad-hoc developed web-based tool: [Track annotation](https://api.dawnets.unibz.it/). 
+The error detection step is based on a previously trained machine learning model. The model, based on a neural network composed by Bidirectional LSTM (Long-Short Term Memory) cells, was trained on a set of annotated GPS traces, namely traces on which the errors were explicitly marked using an ad-hoc developed web-based tool. 
 
 The considered errors are the following: 
 

diff --git a/src/gpsclean/requirements.txt → requirements.txt b/src/gpsclean/requirements.txt → requirements.txt
@@ -3,7 +3,6 @@ filterpy==1.4.5
 geojson==2.5.0
 gpxpy==1.4.2
 numpy==1.22.0
-pandas==0.25.3
 pyproj==3.1.0
-scipy==1.6.1
-tflite_runtime==2.5.0.post1
+scipy==1.10.0
+tflite_runtime==2.5.0.post1
diff --git a/setup.py b/setup.py
@@ -1,13 +1,16 @@
 import setuptools
-import sys
-import platform
+
+pkg_vars  = {}
 
 with open("README.md", "r", encoding="utf-8") as fh:
     long_description = fh.read()
 
+with open("src/gpsclean/_version.py") as fp:
+    exec(fp.read(), pkg_vars)
+
 setuptools.setup(
     name="gpsclean",
-    version="1.0.1",
+    version=pkg_vars['__version__'],
     author="Davide Sbetti",
     author_email="[email protected]",
     description="An application to correct a GPS trace using machine learning techniques",
@@ -31,8 +34,7 @@
         "geojson==2.5.0",
         "gpxpy==1.4.2",
         "tflite-runtime>=2.5.0.post1",
-        "pandas>=0.25.3",
-        "scipy>=1.6.1",
+        "scipy>=1.10.0",
         "pyproj>=3.0.0",
         "numpy>=1.20.0",
         "matplotlib>=3.0.0",
@@ -42,7 +44,7 @@
     },
     entry_points={
     'console_scripts': [
-        'gpsclean = gpsclean.gpsclean:main',
+        'gpsclean = gpsclean.main:main',
     ],
 },
 )
diff --git a/src/gpsclean.egg-info/PKG-INFO b/src/gpsclean.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: gpsclean
-Version: 1.0.1
+Version: 1.0.2
 Summary: An application to correct a GPS trace using machine learning techniques
 Home-page: https://github.com/sbettid/GPSClean
 Author: Davide Sbetti
@@ -21,6 +21,8 @@ License-File: LICENSE
 
 GPSClean is an Open Source application developed with the goal of automatically detect and correct errors in GPS traces, exported in the widely adopted GPX format, using machine learning techniques and without prior geographical knowledge. It is the result of the research undertaken for my thesis project during the Master in Computational Data Science (University of Bolzano). 
 
+Would you like to know more about the underlying research? Well, then just read the [associated paper](https://www.thinkmind.org/index.php?view=article&articleid=signal_2022_1_10_60003)!
+
 Feel free to try the application and report any feedback on the project's [Github page](https://github.com/sbettid/GPSClean). 
 
 ### How does it work?
@@ -43,7 +45,7 @@ The second pre-processing step is the calculation of the deltas between two cons
 
 ### Error detection
 
-The error detection step is based on a previously trained machine learning model. The model, based on a neural network composed by Bidirectional LSTM (Long-Short Term Memory) cells, was trained on a set of annotated GPS traces, namely traces on which the errors were explicitly marked using an ad-hoc developed web-based tool: [Track annotation](https://api.dawnets.unibz.it/). 
+The error detection step is based on a previously trained machine learning model. The model, based on a neural network composed by Bidirectional LSTM (Long-Short Term Memory) cells, was trained on a set of annotated GPS traces, namely traces on which the errors were explicitly marked using an ad-hoc developed web-based tool. 
 
 The considered errors are the following: 
 

diff --git a/src/gpsclean.egg-info/SOURCES.txt b/src/gpsclean.egg-info/SOURCES.txt
@@ -5,12 +5,16 @@ setup.py
 src/gpsclean/Correction.py
 src/gpsclean/FullTraining.py
 src/gpsclean/__init__.py
-src/gpsclean/gpsclean.py
+src/gpsclean/_version.py
 src/gpsclean/gpsclean_transform.py
+src/gpsclean/main.py
 src/gpsclean.egg-info/PKG-INFO
 src/gpsclean.egg-info/SOURCES.txt
 src/gpsclean.egg-info/dependency_links.txt
 src/gpsclean.egg-info/entry_points.txt
 src/gpsclean.egg-info/requires.txt
 src/gpsclean.egg-info/top_level.txt
-src/gpsclean/data/model.tflite
+src/gpsclean/data/model.tflite
+src/tests/__init__.py
+src/tests/integration/__init__.py
+src/tests/integration/trace_cleaning_test.py
diff --git a/src/gpsclean.egg-info/entry_points.txt b/src/gpsclean.egg-info/entry_points.txt
@@ -1,2 +1,2 @@
 [console_scripts]
-gpsclean = gpsclean.gpsclean:main
+gpsclean = gpsclean.main:main
diff --git a/src/gpsclean.egg-info/requires.txt b/src/gpsclean.egg-info/requires.txt
@@ -3,8 +3,7 @@ filterpy==1.4.5
 geojson==2.5.0
 gpxpy==1.4.2
 tflite-runtime>=2.5.0.post1
-pandas>=0.25.3
-scipy>=1.6.1
+scipy>=1.10.0
 pyproj>=3.0.0
 numpy>=1.20.0
 matplotlib>=3.0.0
diff --git a/src/gpsclean.egg-info/top_level.txt b/src/gpsclean.egg-info/top_level.txt
@@ -1 +1,2 @@
 gpsclean
+tests
diff --git a/src/gpsclean/Correction.py b/src/gpsclean/Correction.py
@@ -1,16 +1,18 @@
 #this file contains the functions used to correct a trace based on the generated predictions
 import numpy as np
-import pyproj
+from pyproj import Transformer
 from filterpy.kalman import KalmanFilter
 from filterpy.common import Q_discrete_white_noise
 from scipy.linalg import block_diag
-import pandas as pd
 from datetime import datetime
 
 
 #defining conversions between the two used coordinates systems
-ecef = pyproj.Proj(proj='geocent', ellps='WGS84', datum='WGS84')
-lla = pyproj.Proj(proj='latlong', ellps='WGS84', datum='WGS84')
+ecef = {"proj": 'geocent', "ellps": 'WGS84', "datum": 'WGS84'}
+lla = {"proj": 'latlong', "ellps": 'WGS84', "datum": 'WGS84'}
+
+lla_to_ecef_transformer = Transformer.from_crs(lla, ecef)
+ecef_to_lla_transform = Transformer.from_crs(ecef, lla)
 
 #value used for clipping the deltas correction of each epoch applied to outliers
 EPS = 0.1
@@ -29,9 +31,7 @@ def remove_pauses(all_coords, all_coordtimes, predictions, deltas):
     original_times = []
 
     cur_point = 0
-
-    #print("Coords: ", all_coords.shape[0], ", Preds: ", predictions.shape)
-
+
     #if we have subtraces
     if len(np.array(all_coords[0]).shape) > 1:
         #loop over them and over their points
@@ -71,9 +71,6 @@ def remove_pauses(all_coords, all_coordtimes, predictions, deltas):
 
                         if deltas is not None:
                             reduced_deltas.append(deltas[cur_point])
-
-                    #else:
-                        #print("Deleting pause point at index: ", cur_point, " with prediction: ", predictions[cur_point])
 
                     cur_point += 1
 
@@ -94,7 +91,7 @@ def full_kalman_smoothing(points, times):
         point = points[i]
         #convert to ECEF
         lon, lat, alt = point[0], point[1], point[2]
-        x, y, z = pyproj.transform(lla, ecef, lon, lat, alt, radians=False)
+        x, y, z = lla_to_ecef_transformer.transform(lon, lat, alt, radians=False)
         #append to data
         data.append(np.array([x,y,z]))
 
@@ -164,7 +161,6 @@ def full_kalman_smoothing(points, times):
         #update prediction based on measured value
         f1.update(np.array([long, lat, alt]).T)
 
-        #print(f1.x)
         #add updated version to array
         filterpy_data.append(np.array([f1.x[0][0], f1.x[2][0], f1.x[4][0]]))
 
@@ -179,21 +175,17 @@ def full_kalman_smoothing(points, times):
         y = filterpy_data[i][1]
         z = filterpy_data[i][2]
         #convert to lat long and append
-        lon, lat, alt = pyproj.transform(ecef, lla, x, y, z, radians=False)
+        lon, lat, alt = ecef_to_lla_transform.transform(x, y, z, radians=False)
         corrected_points.append(np.array([lon, lat, alt]))
 
     corrected_points = np.array(corrected_points)
-
-    #print("Filtered points: ", corrected_points.shape)
 
     return corrected_points, times
 
 
 #correct the trace by applying a Kalman Filter on outliers only
 def kalman_smoothing(points, times, predictions):
-
-    #print("Points shape: ", points.shape)
-
+
     #convert everything to ecef
     data = []
 
@@ -203,7 +195,7 @@ def kalman_smoothing(points, times, predictions):
 
         #convert values to ECEF
         lon, lat, alt = point[0], point[1], point[2]
-        x, y, z = pyproj.transform(lla, ecef, lon, lat, alt, radians=False)
+        x, y, z = lla_to_ecef_transformer.transform(lon, lat, alt, radians=False)
 
         data.append(np.array([x,y,z]))
 
@@ -273,7 +265,6 @@ def kalman_smoothing(points, times, predictions):
         #if it is an outlier
         if predictions[i] >= 2:
             #append the corrected version
-        #print(f1.x)
             filterpy_data.append(np.array([f1.x[0][0], f1.x[2][0], f1.x[4][0]]))
         else:
             #otherwise keep the original one
@@ -292,22 +283,18 @@ def kalman_smoothing(points, times, predictions):
         y = filterpy_data[i][1]
         #z = filtered_state_means[i][2]
         z = filterpy_data[i][2]
-        lon, lat, alt = pyproj.transform(ecef, lla, x, y, z, radians=False)
+        lon, lat, alt = ecef_to_lla_transform.transform(x, y, z, radians=False)
         corrected_points.append(np.array([lon, lat, alt]))
 
     corrected_points = np.array(corrected_points)
-
-    #print("Filtered points: ", corrected_points.shape)
 
     return corrected_points, times
 
 
 
 #correct the trace by applying a separate Kalman Filter on each subtrace containing only outliers
 def separate_kalman_smoothing(points, times, predictions):
-
-    print("Points shape: ", points.shape)
-
+
     #convert everything to ecef
     data = []
 
@@ -317,7 +304,7 @@ def separate_kalman_smoothing(points, times, predictions):
 
         #convert values to ECEF
         lon, lat, alt = point[0], point[1], point[2]
-        x, y, z = pyproj.transform(lla, ecef, lon, lat, alt, radians=False)
+        x, y, z = lla_to_ecef_transformer.transform(lon, lat, alt, radians=False)
 
         data.append(np.array([x,y,z]))
 
@@ -425,21 +412,17 @@ def separate_kalman_smoothing(points, times, predictions):
         y = filterpy_data[i][1]
         #z = filtered_state_means[i][2]
         z = filterpy_data[i][2]
-        lon, lat, alt = pyproj.transform(ecef, lla, x, y, z, radians=False)
+        lon, lat, alt = ecef_to_lla_transform.transform(x, y, z, radians=False)
         corrected_points.append(np.array([lon, lat, alt]))
 
     corrected_points = np.array(corrected_points)
-
-    #print("Filtered points: ", corrected_points.shape)
 
     return corrected_points, times
 
 
 #correct the trace by applying a separate Kalman Filter on each subtrace containing only outliers
 def separate_bidirectional_kalman_smoothing(points, times, predictions, R = 4.9):
-
-    #print("Points shape: ", points.shape)
-
+
     #convert everything to ecef
     data = []
 
@@ -449,7 +432,7 @@ def separate_bidirectional_kalman_smoothing(points, times, predictions, R = 4.9)
 
         #convert values to ECEF
         lon, lat, alt = point[0], point[1], point[2]
-        x, y, z = pyproj.transform(lla, ecef, lon, lat, alt, radians=False)
+        x, y, z = lla_to_ecef_transformer.transform(lon, lat, alt, radians=False)
 
         data.append(np.array([x,y,z]))
 
@@ -467,8 +450,6 @@ def separate_bidirectional_kalman_smoothing(points, times, predictions, R = 4.9)
     for i in range(1, len(datetimes)):
         time_deltas.append((datetimes[i] - datetimes[i-1]).total_seconds())
 
-
-
 
     #for each point but the first one
     outliers = []
@@ -504,15 +485,11 @@ def separate_bidirectional_kalman_smoothing(points, times, predictions, R = 4.9)
     #check we do not have outlying areas at the end
     if isOutlier: #append current outlying area
         outliers.append({'start' : cur_start, 'end' : cur_end})
-
-    #print("Number of outliers area: ", len(outliers))
-
+
     #for each outlying area
     corrected_areas = []
     for cur_outlier in outliers: 
 
-
-        #print("Cur outlier area: ", cur_outlier['start'], " - ", cur_outlier['end'])     
         cur_outlier_corrected = []
 
         #create Kalman filter and go over trace from at most 5 points before
@@ -620,21 +597,15 @@ def separate_bidirectional_kalman_smoothing(points, times, predictions, R = 4.9)
 
             #append to list if point is incorrect
             if predictions[i] >= 2:
-                #print("\tCorrecting point: ", i)
-                #print("\t\tBefore sum: ", cur_outlier_corrected[cur_item]) 
-                #print("\t\tSumming: ",  np.array([f1.x[0][0], f1.x[2][0], f1.x[4][0]]))
                 cur_outlier_corrected[cur_item] += np.array([f1.x[0][0], f1.x[2][0], f1.x[4][0]])
-                #print("\t\tAfter sum: ", cur_outlier_corrected[cur_item]) 
                 cur_outlier_corrected[cur_item] /= 2
-                #print("\t\tMean: ", cur_outlier_corrected[cur_item]) 
 
                 cur_item -= 1
             else:
                 f1.x = np.array([[data[i][0], 0, data[i][1], 0, data[i][2], 0]], dtype=float).T
 
         #now apply correction
         cur_outlier_corrected = np.array(cur_outlier_corrected)
-        #print("\tCur outlier correction shape: ", cur_outlier_corrected.shape)
         data[cur_outlier['start'] : cur_outlier['end'] + 1][:] = cur_outlier_corrected
 
     #now convert back to lat lang
@@ -646,11 +617,9 @@ def separate_bidirectional_kalman_smoothing(points, times, predictions, R = 4.9)
         y = data[i][1]
         #z = filtered_state_means[i][2]
         z = data[i][2]
-        lon, lat, alt = pyproj.transform(ecef, lla, x, y, z, radians=False)
+        lon, lat, alt = ecef_to_lla_transform.transform(x, y, z, radians=False)
         corrected_points.append(np.array([lon, lat, alt]))
 
     corrected_points = np.array(corrected_points)
-
-    #print("Filtered points: ", corrected_points.shape)
 
     return corrected_points, times
diff --git a/src/gpsclean/FullTraining.py b/src/gpsclean/FullTraining.py
@@ -3,7 +3,6 @@
 import numpy as np
 import pyproj
 import sys
-import pandas as pd
 
 #setting max int used for masking
 max_int = sys.maxsize

diff --git a/src/gpsclean/__init__.py b/src/gpsclean/__init__.py
@@ -0,0 +1 @@
+from ._version import __version__
diff --git a/src/gpsclean/_version.py b/src/gpsclean/_version.py
@@ -0,0 +1 @@
+__version__ = "1.0.2"