diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 3e8469c8..520892df 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -1,24 +1,19 @@ # This workflow will install Python dependencies, run tests and lint with a single version of Python # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: build +name: Assignment Validation on: push: - - pull_request: - - create: branches: - 'main' - tags: - - '**' -jobs: - build: + pull_request: +jobs: + test: + name: Test Code runs-on: ubuntu-latest - steps: - uses: actions/checkout@v2 - name: Set up Python 3.8 @@ -28,15 +23,28 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install flake8 pytest pydocstyle + pip install pytest if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Test with pytest + run: pytest -v + lint: + name: Check code style + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Install dependencies + run: | + pip install flake8 pydocstyle - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=80 --statistics - pydocstyle - - name: Test with pytest - run: | - pytest + flake8 . --count --max-complexity=10 --max-line-length=80 --statistics + - name: Check doc style with pydocstyle + run: pydocstyle + diff --git a/README.md b/README.md index c5e20191..823291dc 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Assignment 1 for the DataCamp course X-DataScience Master +# Assignment 1 for the DataCamp course X-DataScience Master - numpy ## What we want you to learn by doing this assignment: @@ -10,11 +10,16 @@ ## How? - - For the repository by clicking on the `Fork` button on the upper right corner - - Clone the repository of your fork with: `git clone https://github.com/MYLOGIN/datacamp-assignment1` (replace MYLOGIN with your GitHub login) - - Create a branch called `myassignment` using `git checkout -b myassignment` + - Fork the repository by clicking on the `Fork` button on the upper right corner + - Clone the repository of your fork with: `git clone https://github.com/MYLOGIN/datacamp-assignment-numpy` (replace MYLOGIN with your GitHub login) + - Create a branch called `my_user_name` using `git checkout -b my_user_name` - Make the changes to complete the assignment. You have to modify the files that contain `questions` in their name. Do not modify the files that start with `test_`. - - Open the pull request on GitHub + - Check locally that your solution meet the test by running `pytest` from the root of the repo. You may need to install `pytest` using `pip` or `conda`. + - Check the code formating for your solution using `flake8`. You may need to install `flake8` using `pip` or `conda`. + - Open the pull request on GitHub: + - Create a commit with `git add -u` and `git commit -m "UP my solution"` + - Push your branch on your fork: `git push -u origin my_user_name` + - Go to your repo in your browser and click the `Open a PR` button. - Keep pushing to your branch until the continuous integration system is green. - When it is green notify the professors on Slack that your done. diff --git a/numpy_questions.py b/numpy_questions.py index 8fb399b6..56f62ccb 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -1,3 +1,20 @@ +"""Assignment - using numpy and making a PR. + +The goals of this assignment are: + * Use numpy in practice with two easy exercises. + * Use automated tools to validate the code (`pytest` and `flake8`) + * Submit a Pull-Request on github to practice `git`. + +The two functions below are skeleton functions. The docstrings explain what +are the inputs, the outputs and the expected error. Fill the function to +complete the assignment. The code should be able to pass the test that we +wrote. To run the tests, use `pytest test_numpy_question.py` at the root of +the repo. It should say that 2 tests ran with success. + +We also ask to respect the pep8 convention: https://pep8.org. +This will be enforced with `flake8`. You can check that there is no flake8 +errors by calling `flake8` at the root of the repo. +""" import numpy as np @@ -11,22 +28,28 @@ def max_index(X): Returns ------- - i : int - The row index of the maximum. - - j : int - The column index of the maximum. + (i, j) : tuple(int) + The row and columnd index of the maximum. Raises ------ ValueError - If the input is not a numpy error or + If the input is not a numpy array or if the shape is not 2D. """ i = 0 j = 0 - # TODO + if type(X) is not np.ndarray: + raise ValueError("Input is not a numpy array") + if len(np.shape(X)) != 2: + raise ValueError("Shape of the input array is not 2D") + + n_samples, n_features = np.shape(X) + for u in range(n_samples): + for v in range(n_features): + if X[u, v] > X[i, j]: + i, j = u, v return i, j @@ -37,9 +60,20 @@ def wallis_product(n_terms): See: https://en.wikipedia.org/wiki/Wallis_product - XXX : write Parameters and Returns sections as above. + Parameters + ---------- + n_terms : int + Number of steps in the Wallis product. Note that `n_terms=0` will + consider the product to be `1`. + Returns + ------- + pi : float + The approximation of order `n_terms` of pi using the Wallis product. """ # XXX : The n_terms is an int that corresponds to the number of # terms in the product. For example 10000. - return 0. + prod = 2. + for n in range(1, n_terms + 1): + prod *= 4 * n ** 2 / ((2 * n - 1) * (2 * n + 1)) + return prod diff --git a/sklearn_questions.py b/sklearn_questions.py deleted file mode 100644 index 3867d48d..00000000 --- a/sklearn_questions.py +++ /dev/null @@ -1,35 +0,0 @@ -import numpy as np -from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.utils.validation import check_X_y, check_is_fitted -from sklearn.utils.validation import check_array - - -class OneNearestNeighbor(BaseEstimator, ClassifierMixin): - """Write docstring - """ - def __init__(self): - pass - - def fit(self, X, y): - """Write docstring - """ - X, y = check_X_y(X, y) - self.classes_ = np.unique(y) - # XXX fix - return self - - def predict(self, X): - """Write docstring - """ - check_is_fitted(self) - X = check_array(X) - y_pred = np.full(shape=len(X), fill_value=self.classes_[0]) - # XXX fix - return y_pred - - def score(self, X, y): - """Write docstring - """ - X, y = check_X_y(X, y) - y_pred = self.predict(X) - return np.mean(y_pred == y) diff --git a/test_numpy_questions.py b/test_numpy_questions.py index db1dff9f..8be9fabf 100644 --- a/test_numpy_questions.py +++ b/test_numpy_questions.py @@ -16,7 +16,7 @@ def test_max_index(): X = np.random.randn(100, 100) i, j = max_index(X) - assert np.all(X[i, j] <= X) + assert np.all(X[i, j] >= X) with pytest.raises(ValueError): max_index(None) @@ -29,5 +29,11 @@ def test_max_index(): def test_wallis_product(): + pi_approx = wallis_product(0) + assert pi_approx == 2. + + pi_approx = wallis_product(1) + assert pi_approx == 8 / 3 + pi_approx = wallis_product(100000) assert abs(pi_approx - m.pi) < 1e-4 diff --git a/test_sklearn_questions.py b/test_sklearn_questions.py deleted file mode 100644 index 9fab779c..00000000 --- a/test_sklearn_questions.py +++ /dev/null @@ -1,31 +0,0 @@ -# ################################################## -# YOU SHOULD NOT TOUCH THIS FILE ! -# ################################################## - -from sklearn.utils.estimator_checks import check_estimator -from sklearn.model_selection import train_test_split -from sklearn.datasets import make_classification -from sklearn.neighbors import KNeighborsClassifier - -from sklearn_questions import OneNearestNeighbor - -from numpy.testing import assert_array_equal - - -def test_one_nearest_neighbor_check_estimator(): - check_estimator(OneNearestNeighbor()) - - -def test_one_nearest_neighbor_match_sklearn(): - X, y = make_classification(n_samples=200, n_features=20, - random_state=42) - X_train, X_test, y_train, y_test = \ - train_test_split(X, y, random_state=42) - knn = KNeighborsClassifier(n_neighbors=1) - y_pred_sk = knn.fit(X_train, y_train).predict(X_test) - - onn = OneNearestNeighbor() - y_pred_me = onn.fit(X_train, y_train).predict(X_test) - assert_array_equal(y_pred_me, y_pred_sk) - - assert onn.score(X_test, y_test) == knn.score(X_test, y_test)