Skip to content

Commit

Permalink
Merge branch 'paper'
Browse files Browse the repository at this point in the history
  • Loading branch information
fbunt committed Jan 10, 2025
2 parents eceb0a7 + 3402574 commit e8fe089
Show file tree
Hide file tree
Showing 3 changed files with 417 additions and 0 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/paper-draft-pdf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Paper Draft PDF
on: [push]

jobs:
paper:
runs-on: ubuntu-latest
name: Paper Draft
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Build draft PDF
uses: openjournals/openjournals-draft-action@master
with:
journal: joss
# This should be the path to the paper within your repo.
paper-path: paper/paper.md
- name: Upload
uses: actions/upload-artifact@v4
with:
name: paper
# This is the output path where Pandoc will write the compiled
# PDF. Note, this should be the same directory as the input
# paper.md
path: paper/paper.pdf
299 changes: 299 additions & 0 deletions paper/paper.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,299 @@
@article{qcrunch,
title = {The Quant Crunch: How the demand for data science skills is
disrupting the job market},
author = {Miller, Steven and Hughes, Debbie},
journal = {Business Higher Education Forum},
year = {2017}
}

@article{see2024,
author = {See, Linda and Lesiv, Myroslava and Schepaschenko, Dmitry},
title = {Integrating Remote Sensing and Geospatial Big Data for Land Cover
and Land Use Mapping and Monitoring},
journal = {Land},
volume = {13},
year = {2024},
number = {6},
article-number = {769},
url = {https://www.mdpi.com/2073-445X/13/6/769},
issn = {2073-445X},
abstract = {The last few decades have seen an explosion in the availability
of remotely sensed and geospatial big data, which are defined by the 3 Vs:
a large volume of data; a variety of different forms of data; and the rapid
velocity of data arrival [...]},
doi = {10.3390/land13060769}
}

@article{era5,
author = {Hersbach, Hans and Bell, Bill and Berrisford, Paul and Hirahara,
Shoji and Horányi, András and Muñoz-Sabater, Joaquín and Nicolas, Julien
and Peubey, Carole and Radu, Raluca and Schepers, Dinand and Simmons,
Adrian and Soci, Cornel and Abdalla, Saleh and Abellan, Xavier and Balsamo,
Gianpaolo and Bechtold, Peter and Biavati, Gionata and Bidlot, Jean and
Bonavita, Massimo and De Chiara, Giovanna and Dahlgren, Per and Dee, Dick
and Diamantakis, Michail and Dragani, Rossana and Flemming, Johannes and
Forbes, Richard and Fuentes, Manuel and Geer, Alan and Haimberger, Leo and
Healy, Sean and Hogan, Robin J. and Hólm, Elías and Janisková, Marta and
Keeley, Sarah and Laloyaux, Patrick and Lopez, Philippe and Lupu, Cristina
and Radnoti, Gabor and de Rosnay, Patricia and Rozum, Iryna and Vamborg,
Freja and Villaume, Sebastien and Thépaut, Jean-Noël},
title = {The ERA5 global reanalysis},
journal = {Quarterly Journal of the Royal Meteorological Society},
volume = {146},
number = {730},
pages = {1999-2049},
keywords = {climate reanalysis, Copernicus Climate Change Service, data
assimilation, ERA5, historical observations},
doi = {https://doi.org/10.1002/qj.3803},
url = {https://rmets.onlinelibrary.wiley.com/doi/abs/10.1002/qj.3803},
eprint = {https://rmets.onlinelibrary.wiley.com/doi/pdf/10.1002/qj.3803},
abstract = {Abstract Within the Copernicus Climate Change Service (C3S),
ECMWF is producing the ERA5 reanalysis which, once completed, will embody a
detailed record of the global atmosphere, land surface and ocean waves from
1950 onwards. This new reanalysis replaces the ERA-Interim reanalysis
(spanning 1979 onwards) which was started in 2006. ERA5 is based on the
Integrated Forecasting System (IFS) Cy41r2 which was operational in 2016.
ERA5 thus benefits from a decade of developments in model physics, core
dynamics and data assimilation. In addition to a significantly enhanced
horizontal resolution of 31 km, compared to 80 km for ERA-Interim, ERA5 has
hourly output throughout, and an uncertainty estimate from an ensemble
(3-hourly at half the horizontal resolution). This paper describes the
general set-up of ERA5, as well as a basic evaluation of characteristics
and performance, with a focus on the dataset from 1979 onwards which is
currently publicly available. Re-forecasts from ERA5 analyses show a gain
of up to one day in skill with respect to ERA-Interim. Comparison with
radiosonde and PILOT data prior to assimilation shows an improved fit for
temperature, wind and humidity in the troposphere, but not the
stratosphere. A comparison with independent buoy data shows a much improved
fit for ocean wave height. The uncertainty estimate reflects the evolution
of the observing systems used in ERA5. The enhanced temporal and spatial
resolution allows for a detailed evolution of weather systems. For
precipitation, global-mean correlation with monthly-mean GPCP data is
increased from 67\% to 77\%. In general, low-frequency variability is found
to be well represented and from 10 hPa downwards general patterns of
anomalies in temperature match those from the ERA-Interim, MERRA-2 and
JRA-55 reanalyses.},
year = {2020}
}

@article{modis,
title = {An overview of MODIS Land data processing and product status},
journal = {Remote Sensing of Environment},
volume = {83},
number = {1},
pages = {3-15},
year = {2002},
note = {The Moderate Resolution Imaging Spectroradiometer (MODIS): a new
generation of Land Surface Monitoring},
issn = {0034-4257},
doi = {https://doi.org/10.1016/S0034-4257(02)00084-6},
url = {https://www.sciencedirect.com/science/article/pii/S0034425702000846},
author = {C.O Justice and J.R.G Townshend and E.F Vermote and E Masuoka and
R.E Wolfe and N Saleous and D.P Roy and J.T Morisette},
abstract = {Data from the first Moderate Resolution Imaging
Spectroradiometer (MODIS) instrument on the NASA Terra Platform are being
used to provide a new generation of land data products in support of the
National Aeronautics and Space Administration (NASA)'s Earth Science
Enterprise, global change research and natural resource management. The
MODIS products include global data sets heretofore unavailable, derived
from new moderate resolution spectral bands with spatial resolutions of 250
m to 1 km. A partnership between Science Team members and the MODIS Science
Data Support Team is producing data sets of unprecedented volume and number
for the land research and applications. This overview paper provides a
summary of the instrument performance and status, the data production
system, the products, their status and availability for land studies.}
}

@inproceedings{sentinel2,
author = {Spoto, Francois and Sy, Omar and Laberinti, Paolo and Martimort,
Philippe and Fernandez, Valerie and Colin, Olivier and Hoersch, Bianca and
Meygret, Aime},
booktitle = {2012 IEEE International Geoscience and Remote Sensing Symposium},
title = {Overview Of Sentinel-2},
year = {2012},
volume = {},
number = {},
pages = {1707-1710},
keywords = {Satellites;Europe;Spatial
resolution;Monitoring;Earth;Clouds;Satellite broadcasting;Earth
Observation;Land Monitoring;Multispectral Imaging},
doi = {10.1109/IGARSS.2012.6351195}
}

@article{numpy,
title = {Array programming with {NumPy}},
author = {Charles R. Harris and K. Jarrod Millman and St{\'{e}}fan J.
van der Walt and Ralf Gommers and Pauli Virtanen and David
Cournapeau and Eric Wieser and Julian Taylor and Sebastian
Berg and Nathaniel J. Smith and Robert Kern and Matti Picus
and Stephan Hoyer and Marten H. van Kerkwijk and Matthew
Brett and Allan Haldane and Jaime Fern{\'{a}}ndez del
R{\'{i}}o and Mark Wiebe and Pearu Peterson and Pierre
G{\'{e}}rard-Marchant and Kevin Sheppard and Tyler Reddy and
Warren Weckesser and Hameer Abbasi and Christoph Gohlke and
Travis E. Oliphant},
year = {2020},
month = sep,
journal = {Nature},
volume = {585},
number = {7825},
pages = {357--362},
doi = {10.1038/s41586-020-2649-2},
publisher = {Springer Science and Business Media {LLC}},
url = {https://doi.org/10.1038/s41586-020-2649-2}
}

@article{scipy,
author = {Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E. and
Haberland, Matt and Reddy, Tyler and Cournapeau, David and
Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and
Bright, Jonathan and {van der Walt}, St{\'e}fan J. and
Brett, Matthew and Wilson, Joshua and Millman, K. Jarrod and
Mayorov, Nikolay and Nelson, Andrew R. J. and Jones, Eric and
Kern, Robert and Larson, Eric and Carey, C J and
Polat, {\.I}lhan and Feng, Yu and Moore, Eric W. and
{VanderPlas}, Jake and Laxalde, Denis and Perktold, Josef and
Cimrman, Robert and Henriksen, Ian and Quintero, E. A. and
Harris, Charles R. and Archibald, Anne M. and
Ribeiro, Ant{\^o}nio H. and Pedregosa, Fabian and
{van Mulbregt}, Paul and {SciPy 1.0 Contributors}},
title = {{{SciPy} 1.0: Fundamental Algorithms for Scientific Computing in Python}},
journal = {Nature Methods},
year = {2020},
volume = {17},
pages = {261--272},
adsurl = {https://rdcu.be/b08Wh},
doi = {10.1038/s41592-019-0686-2},
}

@article{xarray,
title = {xarray: {N-D} labeled arrays and datasets in {Python}},
author = {Hoyer, S. and J. Hamman},
journal = {Journal of Open Research Software},
volume = {5},
number = {1},
year = {2017},
publisher = {Ubiquity Press},
doi = {10.5334/jors.148},
url = {https://doi.org/10.5334/jors.148}
}

@manual{dask,
title = {Dask: Library for dynamic task scheduling},
author = {{Dask Development Team}},
year = {2016},
url = {http://dask.pydata.org},
}

@manual{gdal,
title = {{GDAL/OGR} Geospatial Data Abstraction software Library},
author = {{GDAL/OGR contributors}},
organization = {Open Source Geospatial Foundation},
year = {2024},
url = {https://gdal.org},
doi = {10.5281/zenodo.5884351},
}

@manual{rasterio,
title = {Rasterio: geospatial raster I/O for {Python} programmers},
author = {Sean Gillies and others},
organization = {Mapbox},
year = {2013--},
url = "https://github.com/rasterio/rasterio"
}

@manual{rioxarray,
title = {rioxarray: Geospatial xarray extension powered by rasterio},
author = {{rioxarray Development Team}},
organization = {Corteva, Inc.},
year = {2019},
url = {https://github.com/corteva/rioxarray},
}

@article{sklearn,
title={Scikit-learn: Machine Learning in {P}ython},
author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
journal={Journal of Machine Learning Research},
volume={12},
pages={2825--2830},
year={2011}
}

@inproceedings{xgboost,
author = {Chen, Tianqi and Guestrin, Carlos},
title = {XGBoost: A Scalable Tree Boosting System},
year = {2016},
isbn = {9781450342322},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/2939672.2939785},
doi = {10.1145/2939672.2939785},
abstract = {Tree boosting is a highly effective and widely used machine
learning method. In this paper, we describe a scalable end-to-end tree
boosting system called XGBoost, which is used widely by data scientists to
achieve state-of-the-art results on many machine learning challenges. We
propose a novel sparsity-aware algorithm for sparse data and weighted
quantile sketch for approximate tree learning. More importantly, we provide
insights on cache access patterns, data compression and sharding to build a
scalable tree boosting system. By combining these insights, XGBoost scales
beyond billions of examples using far fewer resources than existing
systems.},
booktitle = {Proceedings of the 22nd ACM SIGKDD International Conference on
Knowledge Discovery and Data Mining},
pages = {785–794},
numpages = {10},
keywords = {large-scale machine learning},
location = {San Francisco, California, USA},
series = {KDD '16}
}

@inproceedings{pytorch,
author = {Ansel, Jason and Yang, Edward and He, Horace and Gimelshein,
Natalia and Jain, Animesh and Voznesensky, Michael and Bao, Bin and Bell,
Peter and Berard, David and Burovski, Evgeni and Chauhan, Geeta and
Chourdia, Anjali and Constable, Will and Desmaison, Alban and DeVito,
Zachary and Ellison, Elias and Feng, Will and Gong, Jiong and Gschwind,
Michael and Hirsh, Brian and Huang, Sherlock and Kalambarkar, Kshiteej and
Kirsch, Laurent and Lazos, Michael and Lezcano, Mario and Liang, Yanbo and
Liang, Jason and Lu, Yinghai and Luk, C. K. and Maher, Bert and Pan, Yunjie
and Puhrsch, Christian and Reso, Matthias and Saroufim, Mark and Siraichi,
Marcos Yukio and Suk, Helen and Zhang, Shunting and Suo, Michael and
Tillet, Phil and Zhao, Xu and Wang, Eikan and Zhou, Keren and Zou, Richard
and Wang, Xiaodong and Mathews, Ajit and Wen, William and Chanan, Gregory
and Wu, Peng and Chintala, Soumith},
title = {PyTorch 2: Faster Machine Learning Through Dynamic Python Bytecode
Transformation and Graph Compilation},
year = {2024},
isbn = {9798400703850},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3620665.3640366},
doi = {10.1145/3620665.3640366},
abstract = {This paper introduces two extensions to the popular PyTorch
machine learning framework, TorchDynamo and TorchInductor, which implement
the torch.compile feature released in PyTorch 2. TorchDynamo is a
Python-level just-in-time (JIT) compiler that enables graph compilation in
PyTorch programs without sacrificing the flexibility of Python. It achieves
this by dynamically modifying Python bytecode before execution and
extracting sequences of PyTorch operations into an FX graph, which is then
JIT compiled using one of many extensible backends. TorchInductor is the
default compiler backend for TorchDynamo, which translates PyTorch programs
into OpenAI's Triton for GPUs and C++ for CPUs. Results show that
TorchDynamo is able to capture graphs more robustly than prior approaches
while adding minimal overhead, and TorchInductor is able to provide a
2.27\texttimes{} inference and 1.41\texttimes{} training geometric mean
speedup on an NVIDIA A100 GPU across 180+ real-world models, which
outperforms six other compilers. These extensions provide a new way to
apply optimizations through compilers in eager mode frameworks like
PyTorch.},
booktitle = {Proceedings of the 29th ACM International Conference on
Architectural Support for Programming Languages and Operating Systems,
Volume 2},
pages = {929–947},
numpages = {19},
location = {La Jolla, CA, USA},
series = {ASPLOS '24}
}
Loading

0 comments on commit e8fe089

Please sign in to comment.