diff --git a/.DS_Store b/.DS_Store index 146f0b7..9dba8f7 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/fastgwr/__main__.py b/fastgwr/__main__.py index f52959b..4e5840a 100644 --- a/fastgwr/__main__.py +++ b/fastgwr/__main__.py @@ -88,8 +88,6 @@ def testmgwr(): """ print("Testing MGWR with zillow data:") - mpi_path = os.path.dirname(fastgwr.__file__) + '/fastgwr_mpi.py' - print(mpi_path) command = "mpiexec -np 2 python " + mpi_path + " -data https://raw.github.com/Ziqi-Li/FastGWR/master/Zillow-test-dataset/zillow_1k.csv -mgwr -c" os.system(command) pass diff --git a/paper/paper.md b/paper/paper.md index 0671b67..805b7cf 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -33,7 +33,7 @@ As geospatial data are increasingly available from different sources such as rem There are currently existing packages that allow users to fit GWR and MGWR models. Two most popular open-source options are `mgwr` in python [@oshan2019mgwr] and `GWmodel` in R [@gollini2013gwmodel], both of which provide friendly APIs and are actively maintained. `GWmodel` supports a wide array of geographically weighted models and analysis tools; however, the performance of `GWmodel` is lagged behind and not suitable for large datasets. A comprehensive performance comparison between `GWmodel` and `fastgwr` can be found in @li2019fast and @li2020computational. As for `mgwr`, the parallelism of `fastgwr` has been built into `mgwr` by leveraging the `multiprocessing` package. For small and moderate sized problems, the performance between `mgwr` and `fastgwr` is comparable. Nevertheless, the major advantage of `fastgwr` is that the use of MPI-based parallelism allows the program to run in parallel across multiple computer nodes. In this way, `fastgwr` is the only option if the analyst wants to run the GWR program on a high performance computing cluster, which empowers larger-scale analysis that is impossible for a single workstation. To demostrate this, `fastgwr` has been tested on the University of Arizona's [Ocelote](https://public.confluence.arizona.edu/display/UAHPC/Ocelote+Quick+Start) cluster using the [Zillow datasets](https://github.com/Ziqi-Li/FastGWR/tree/master/Zillow-test-dataset), and the scalability can be seen in \autoref{fig:example}. It is expected that the scalability will further increase with larger datasets as the ratio between computation and data transfer will become larger. Additionally, the model fitting results of `fastgwr` have been validated against `mgwr` which can be found in the [notebooks](https://github.com/Ziqi-Li/FastGWR/tree/master/validation%20notebook) in the attached [Gituhb repository](https://github.com/Ziqi-Li/FastGWR). -![Scalability of `fastgwr`. GWR model is fitted with 20,000 observations and MGWR model is fitted with 5,000 observations. \label{fig:example}](scalability.png){ width=50%} +![Scalability of `fastgwr`. GWR model is fitted with 20,000 observations and MGWR model is fitted with 10,000 observations. \label{fig:example}](scalability.png){ width=50%} # Installation diff --git a/requirements.txt b/requirements.txt index a95e1cf..ff540fa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ -mpi4py==3.0.3 -numpy==1.19.2 +mpi4py +scipy +numpy click twine