Merge branch 'main' into dev_classify

maps-as-data · Oct 27, 2023 · 36512a9 · 36512a9
2 parents e526e11 + 8fdb38d
commit 36512a9
Show file tree

Hide file tree

Showing 67 changed files with 1,657 additions and 619 deletions.
diff --git a/.github/workflows/mr_ci.yml b/.github/workflows/mr_ci.yml
@@ -1,11 +1,11 @@
 ---
-name: Integration Tests
+name: Units Tests
 
 on: [push]
 
 jobs:
 
-  run-tests:
+  unit-tests:
     strategy:
       matrix:
         os: [ubuntu-latest, windows-latest]
@@ -30,11 +30,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          python -m pip install . 
-
-      - name: Install Tools
-        run: |
-          python -m pip install black flake8 pytest
+          python -m pip install ".[dev]"
 
       - name: Quality Assurance
         run: |

diff --git a/.github/workflows/mr_pip_ci.yml b/.github/workflows/mr_pip_ci.yml
@@ -33,11 +33,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          python -m pip install mapreader
-
-      - name: Install Tools
-        run: |
-          python -m pip install black flake8 pytest
+          python -m pip install mapreader[dev]
 
       - name: Quality Assurance
         run: |

diff --git a/.github/workflows/publish-to-conda-forge.yml b/.github/workflows/publish-to-conda-forge.yml
@@ -2,10 +2,11 @@
 name: Publish to Conda Forge
 
 on:
+  workflow_dispatch:
   push:
-    branches:
-      - main
-
+    tags:
+      - v*
+      
 jobs:
 
   build_conda:

diff --git a/.github/workflows/publish-to-test-pypi.yml b/.github/workflows/publish-to-test-pypi.yml
@@ -3,6 +3,7 @@ name: Publish all Python 🐍 distributions 📦 to Test PyPI
 
 on:
   pull_request:
+    types: [review_requested]
     branches:
       - main
   push:
@@ -36,4 +37,4 @@ jobs:
         user: __token__
         password: ${{ secrets.TEST_PYPI_API_TOKEN }}
         repository_url: https://test.pypi.org/legacy/
-        verbose: true
+        verbose: true
diff --git a/.gitignore b/.gitignore
@@ -8,4 +8,6 @@ mapreader.egg-info
 conda_debug/
 docs/build/
 docs/source/api/mapreader/
-dist/
+dist/
+.coverage
+.coverage.*
diff --git a/CITATION.cff b/CITATION.cff
@@ -0,0 +1,69 @@
+# This CITATION.cff file was generated with cffinit.
+# Visit https://bit.ly/cffinit to generate yours today!
+
+cff-version: 1.2.0
+title: MapReader
+message: >-
+  If you use this software, please cite it using the
+  metadata from this file.
+type: software
+authors:
+  - given-names: Kaspar
+    family-names: Beelen
+    email: [email protected]
+    affiliation: 'School of Advanced Studies, University of London'
+    orcid: 'https://orcid.org/0000-0001-7331-1174'
+  - given-names: Kasra
+    family-names: Hosseini
+    email: [email protected]
+    orcid: 'https://orcid.org/0000-0003-4396-6019'
+  - given-names: Katherine
+    family-names: McDonough
+    email: [email protected]
+    affiliation: Lancaster University
+    orcid: 'https://orcid.org/0000-0001-7506-1025'
+  - given-names: Andrew
+    family-names: Smith
+    orcid: 'https://orcid.org/0000-0002-4465-2284'
+    affiliation: The Alan Turing Institute
+    email: [email protected]
+  - given-names: Daniel C. S.
+    family-names: Wilson
+    email: [email protected]
+    affiliation: The Alan Turing Institute
+    orcid: 'https://orcid.org/0000-0001-6886-775X'
+  - given-names: Rosie
+    family-names: Wood
+    affiliation: The Alan Turing Institute
+    email: [email protected]
+    orcid: 'https://orcid.org/0000-0003-1623-1949'
+  - given-names: Kalle
+    family-names: Westerling
+    email: [email protected]
+    affiliation: The Alan Turing Institute
+    orcid: 'https://orcid.org/0000-0002-2014-332X'
+identifiers:
+  - type: url
+    value: >-
+      https://github.com/Living-with-machines/MapReader/releases/tag/v1.0.1
+    description: The GitHub release URL of tag 1.0.1
+  - type: url
+    value: 'https://arxiv.org/abs/2111.15592'
+    description: MapReader paper on arxiv
+  - type: doi
+    value: 10.1145/3557919.3565812
+    description: ACM SIGSPATIAL Geohumanities 2022 Proceedings DOI
+repository-code: 'https://github.com/Living-with-machines/MapReader'
+url: 'https://mapreader.readthedocs.io/en/latest/'
+abstract: >-
+  MapReader was developed in the Living with Machines
+  project to analyze large collections of historical maps
+  but is a generalizable computer vision pipeline which can
+  be applied to any images in a wide variety of domains.
+keywords:
+  - maps
+  - computational humanities
+  - Living with Machines
+  - cultural heritage
+  - computer vision
+license: MIT
diff --git a/Contributors.md b/Contributors.md
@@ -0,0 +1,28 @@
+# MapReader Project Members
+
+This document provides details about the project members and contributors working (or having previously worked) on _MapReader_ in a paid capacity, organizational agreement, in-kind contributions or grant proposal-based collaboration.
+
+## Current Project Members
+
+| Name | Affiliation | Community Role | Start date | Previous roles |
+|---|---|---|---|---|
+| Katherine McDonough<br>([@kmcdono2](https://github.com/kmcdono2)) | The Alan Turing Institute & Lancaster University| Project Lead | 2019 - Present | - |
+| Daniel Wilson<br>([@dcsw2](https://github.com/dcsw2)) | The Alan Turing Institute | Historian/Researcher | 2019 - Present | - |
+| Kalle Westerling<br>([@kallewesterling](https://github.com/kallewesterling)) | The Alan Turing Institute | Research Application Manager | 2022 - Present | Research Software Engineer (British Library) |
+| Rosie Wood<br>([@rwood-97](https://github.com/rwood-97)) | The Alan Turing Institute | Research Data Scientist | 2023 - Present | - |
+| Kaspar Beelen<br>([@kasparvonbeelen](https://github.com/kasparvonbeelen)) | School of Advanced Study, University of London | Historian/Researcher | 2019 - Present | Digital Humanities Researcher (The Alan Turing Institute) |
+
+## Previous Project Members
+
+The following people are no longer paid to work on the project (although they remain very valuable members of the community!)
+You can request contact information through the project members above, or tag them in the GitHub issues and Pull Requests so others can answer if the folks you're looking for are not around.
+
+| Name | Community Role | Dates |
+|---|---|---|
+| Kasra Hosseini<br>([@kasra-hosseini](https://github.com/kasra-hosseini)) | Research Data Scientist (Turing) | 2019 - 2022 |
+| Jon Lawrence<br>([@jl106jml ](https://github.com/jl106jml)) | Historian/Advisor (Exeter) | 2019 - 2023 |
+| Ruth Ahnert<br>([@ruthahnert ](https://github.com/ruthahnert)) | Living with Machines PI (QMUL) |2019-2023|
+| Andy Smith<br>([@andrewphilipsmith](https://github.com/andrewphilipsmith)) | Research Data Scientist (Turing) | 2022 - 2023 |
+| Daniel van Strien<br>([@davanstrien ](https://github.com/davanstrien)) | Data Librarian (British Library) | 2019-2021 |
+| Olivia Vane<br>([@ov212 ](https://github.com/ov212)) | Research Software Engineer (British Library) | 2019-2021 |
+
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2022 The Alan Turing Institute, British Library Board, Queen Mary University of London, 
+Copyright (c) 2022 The Alan Turing Institute, British Library Board, Queen Mary University of London,
 University of Exeter, University of East Anglia and University of Cambridge
 
 Permission is hereby granted, free of charge, to any person obtaining a copy

diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@
     <h2>A computer vision pipeline for exploring and analyzing images at scale</h2>
     </p>
 </div>
- 
+
 <p align="center">
     <a href="https://pypi.org/project/mapreader/">
         <img alt="PyPI" src="https://img.shields.io/pypi/v/MapReader">
@@ -16,10 +16,10 @@
     <a href="https://github.com/Living-with-machines/MapReader/actions/workflows/mr_ci.yml/badge.svg">
         <img alt="Integration Tests badge" src="https://github.com/Living-with-machines/MapReader/actions/workflows/mr_ci.yml/badge.svg">
     </a>
+    <a href="https://zenodo.org/badge/latestdoi/430661738"><img src="https://zenodo.org/badge/430661738.svg" alt="DOI"></a>
     <br/>
 </p>
 
-
 ## What is MapReader?
 
 <div align="center">
@@ -30,16 +30,15 @@
     </figure>
 </div>
 
-MapReader is an end-to-end computer vision (CV) pipeline for exploring and analyzing images at scale. 
-
-MapReader was developed in the [Living with Machines](https://livingwithmachines.ac.uk/) project to analyze large collections of historical maps but is a _**generalizable**_ computer vision pipeline which can be applied to _**any images**_ in a wide variety of domains. 
+MapReader is an end-to-end computer vision (CV) pipeline for exploring and analyzing images at scale.
 
+MapReader was developed in the [Living with Machines](https://livingwithmachines.ac.uk/) project to analyze large collections of historical maps but is a _**generalizable**_ computer vision pipeline which can be applied to _**any images**_ in a wide variety of domains.
 
 ## Overview
 
 MapReader is a groundbreaking interdisciplinary tool that emerged from a specific set of geospatial historical research questions. It was inspired by methods in biomedical imaging and geographic information science, which were adapted for use by historians, for example in our [Journal of Victorian Culture](https://doi.org/10.1093/jvcult/vcab009) and [Geospatial Humanities 2022 SIGSPATIAL workshop](https://arxiv.org/abs/2111.15592) papers. The success of the tool subsequently generated interest from plant phenotype researchers working with large image datasets, and so MapReader is an example of cross-pollination between the humanities and the sciences made possible by reproducible data science.
 
-### MapReader pipeline 
+### MapReader pipeline
 
 <div align="center">
   <figure>
@@ -49,51 +48,53 @@ MapReader is a groundbreaking interdisciplinary tool that emerged from a specifi
   </figure>
 </div>
 
-The MapReader pipeline consists of a linear sequence of tasks which, together, can be used to train a computer vision (CV) classifier to recognise visual features within maps and identify patches containing these features across entire map collections.
+The MapReader pipeline consists of a linear sequence of tasks which, together, can be used to train a computer vision (CV) classifier to recognize visual features within maps and identify patches containing these features across entire map collections.
 
 See our [About MapReader](https://mapreader.readthedocs.io/en/latest/About.html) page to learn more.
 
-
-## Documentation 
+## Documentation
 
 The MapReader documentation can be found at https://mapreader.readthedocs.io/en/latest/index.html.
 
 **New users** should refer to the [Installation instructions](https://mapreader.readthedocs.io/en/latest/Install.html) and [Input guidance](https://mapreader.readthedocs.io/en/latest/Input-guidance.html) for help with the initial set up of MapReader.
 
-**All users** should refer to our [User Guide](https://mapreader.readthedocs.io/en/latest/User-guide/User-guide.html) for guidance on how to use MapReader. This contains end-to-end instructions on how to use the MapReader pipeline, plus a number of worked examples illustratng use cases such as:
+**All users** should refer to our [User Guide](https://mapreader.readthedocs.io/en/latest/User-guide/User-guide.html) for guidance on how to use MapReader. This contains end-to-end instructions on how to use the MapReader pipeline, plus a number of worked examples illustrating use cases such as:
+
 - Geospatial images (i.e. maps)
-- Non-geospatial images 
+- Non-geospatial images
 
  **Developers and contributors** may also want to refer to the [API documentation](https://mapreader.readthedocs.io/en/latest/api/index.html) and [Contribution guide](https://mapreader.readthedocs.io/en/latest/Contribution-guide.html) for guidance on how to contribute to the MapReader package.
-
-
+
+**Join our Slack workspace!**
+Please fill out [this form](https://forms.gle/dXjECHZQkwrZ3Xpt9) to receive an invitation to the Slack workspace.
+
 ## What is included in this repo?
 
 The MapReader package provides a set of tools to:
 
-- **Download** images/maps and metadata stored on web-servers (e.g. tileserves which can be used to retrieve maps from OpenStreetMap (OSM), the National Library of Scotland (NLS), or elsewhere).
+- **Download** images/maps and metadata stored on web-servers (e.g. tileservers which can be used to retrieve maps from OpenStreetMap (OSM), the National Library of Scotland (NLS), or elsewhere).
 - **Load** images/maps and metadata stored locally.
-- **Preprocess** images/maps:
-  - patchify (create patches from a parent image), 
+- **Pre-process** images/maps:
+  - patchify (create patches from a parent image),
   - resample (use image transformations to alter pixel-dimensions/resolution/orientation/etc.),
   - remove borders outside the neatline,
   - reproject between coordinate reference systems (CRS).
 - **Annotate** images/maps (or their patches) using an interactive annotation tool.
 - **Train or fine-tune** Computer Vision (CV) models and use these to **predict** labels (i.e. model inference) on large sets of images/maps.
 
-Various **plotting and analysis** functionalities are also included (based on packages such as *matplotlib*, *cartopy*, *Google Earth*, and [kepler.gl](https://kepler.gl/)).
-
+Various **plotting and analysis** functionalities are also included (based on packages such as _matplotlib_, _cartopy_, _Google Earth_, and _[kepler.gl](https://kepler.gl/))_.
 
 ## How to cite MapReader
 
-If you use MapReader in your work, please consider acknowledging us by citing [our SIGSPATIAL paper](https://dl.acm.org/doi/10.1145/3557919.3565812):
+If you use MapReader in your work, please cite both the MapReader repo and [our SIGSPATIAL paper](https://dl.acm.org/doi/10.1145/3557919.3565812):
 
 - Kasra Hosseini, Daniel C. S. Wilson, Kaspar Beelen, and Katherine McDonough. 2022. MapReader: a computer vision pipeline for the semantic exploration of maps at scale. In Proceedings of the 6th ACM SIGSPATIAL International Workshop on Geospatial Humanities (GeoHumanities '22). Association for Computing Machinery, New York, NY, USA, 8–19. https://doi.org/10.1145/3557919.3565812
+- Kasra Hosseini, Rosie Wood, Andy Smith, Katie McDonough, Daniel C.S. Wilson, Christina Last, Kalle Westerling, and Evangeline Mae Corcoran. “Living-with-machines/mapreader: End of Lwm”. Zenodo, July 27, 2023. https://doi.org/10.5281/zenodo.8189653.
 
 
 ## Acknowledgements
 
-This work was supported by Living with Machines (AHRC grant AH/S01179X/1) and The Alan Turing Institute (EPSRC grant EP/N510129/1). 
+This work was supported by Living with Machines (AHRC grant AH/S01179X/1) and The Alan Turing Institute (EPSRC grant EP/N510129/1).
 
 Living with Machines, funded by the UK Research and Innovation (UKRI) Strategic Priority Fund, is a multidisciplinary collaboration delivered by the Arts and Humanities Research Council (AHRC), with The Alan Turing Institute, the British Library and the Universities of Cambridge, East Anglia, Exeter, and Queen Mary University of London.
 

diff --git a/docs/source/About.rst b/docs/source/About.rst
@@ -22,25 +22,25 @@ The MapReader pipeline consists of a linear sequence of tasks:
 
 .. image:: figures/pipeline_explained.png
 
-Together, these tasks can be used to train a computer vision (CV) classifier to recognise visual features within maps and identify patches containing these features across entire map collections.
+Together, these tasks can be used to train a computer vision (CV) classifier to recognize visual features within maps and identify patches containing these features across entire map collections.
 
 What kind of visual features can MapReader help me identify?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-In order to train a CV classifier to recognise visual features within your maps, your features must have a homogeneous visual signal across your map collection (i.e. always be represented in the same way).
+In order to train a CV classifier to recognize visual features within your maps, your features must have a homogeneous visual signal across your map collection (i.e. always be represented in the same way).
 
 Why use MapReader?
 -------------------
 
-MapReader becomes useful when the number of maps you wish to analyse exceeds the number which you (or your team) are willing/capable of annotating manually.
+MapReader becomes useful when the number of maps you wish to analyze exceeds the number which you (or your team) are willing to/capable of annotating manually.
 
 This exact number will vary depending on:
 
 - the size of your maps, 
 - the features you want to find,
 - the skills you (or your team) have,
 - the amount of time at your disposal.
- 
+
 Deciding to use MapReader, which uses deep learning computer vision (CV) models to predict the class of content on patches across many sheets, means weighing the pros and cons of working with the data output that is inferred by the model. 
 Inferred data can be evaluated against expert-annotated data to understand its general quality (are all instances of a feature of interest identified by the model? does the model apply the correct label to that feature?), but in the full dataset there *will necessarily be* some percentage of error. 
 
@@ -61,4 +61,4 @@ What skills/knowledge will I need to use MapReader?
 * Understanding of your map collection and knowledge of visual features you would like to identify within your maps
 * Basic understanding of how to use your terminal
 * Basic python
-* Basic understanding of machine learning and computer vision (CV) methodology
+* Basic understanding of machine learning and computer vision (CV) methodology
diff --git a/docs/source/Beginners-info.rst b/docs/source/Beginners-info.rst
@@ -40,7 +40,7 @@ If you're new to virtual environments in Python, this tutorial provides a good i
 Jupyter notebooks
 ------------------
 
-A Jupyter notebook is an interactive computational environment that allows you to write and run code, visualise data, and write narrative text all in the same place. 
+A Jupyter notebook is an interactive computational environment that allows you to write and run code, visualize data, and write narrative text all in the same place. 
 It's a popular tool among data scientists and is commonly used for data analysis, machine learning, and scientific computing. 
 
 If you're new to Jupyter notebooks, here's a great place to start: 

diff --git a/docs/source/Coc.rst b/docs/source/Coc.rst
@@ -0,0 +1,15 @@
+Code of Conduct and Inclusivity
+================================
+
+We are currently in the process of developing a Code of Conduct. 
+In the meantime, we look to the `Code of Conduct <https://github.com/alan-turing-institute/the-turing-way/blob/main/CODE_OF_CONDUCT.md>`_ from The Turing Way as a model.
+
+We aim to be inclusive of people from all walks of life and all research fields. 
+These intentions must be reflected in the contributions that we make.
+
+We therefore encourage intentional, inclusive actions from contributors to MapReader. 
+Here are a few examples of such actions:
+
+- Use respectful, gender-neutral and inclusive language.
+- Aim to include perspectives of researchers from different research backgrounds such as science, humanities and social sciences by not limiting the scope to only scientific domains.
+- Make sure that color palettes used throughout figures are accessible to color-blind readers and contributors.