From afc0555be1c7fa8a02301cca7a43a767b1b37e45 Mon Sep 17 00:00:00 2001 From: eyrei123 Date: Sat, 21 Jun 2025 10:30:26 +0100 Subject: [PATCH 1/6] Pre-TR-Upload 1 --- how-to-drop-null-values-in-pandas/README.md | 16 +++++++++++++ .../drop_a_subset.py | 16 +++++++++++++ .../drop_null_columns.py | 9 ++++++++ .../drop_null_rows.py | 17 ++++++++++++++ .../exercise solutions.py | 23 +++++++++++++++++++ how-to-drop-null-values-in-pandas/grades.csv | 8 +++++++ .../sales_data_with_missing_values.csv | 12 ++++++++++ 7 files changed, 101 insertions(+) create mode 100644 how-to-drop-null-values-in-pandas/README.md create mode 100644 how-to-drop-null-values-in-pandas/drop_a_subset.py create mode 100644 how-to-drop-null-values-in-pandas/drop_null_columns.py create mode 100644 how-to-drop-null-values-in-pandas/drop_null_rows.py create mode 100644 how-to-drop-null-values-in-pandas/exercise solutions.py create mode 100644 how-to-drop-null-values-in-pandas/grades.csv create mode 100644 how-to-drop-null-values-in-pandas/sales_data_with_missing_values.csv diff --git a/how-to-drop-null-values-in-pandas/README.md b/how-to-drop-null-values-in-pandas/README.md new file mode 100644 index 0000000000..253f1dd88d --- /dev/null +++ b/how-to-drop-null-values-in-pandas/README.md @@ -0,0 +1,16 @@ +The materials contained in this download are designed to complement the RealPython tutorial [How to Drop Null Values in pandas](https://realpython.com/how-to-drop-null-values-in-pandas/). + +You should create a new folder named pandas_nulls on your computer and place each file inside it. You may also consider creating a [Python virtual environment](https://realpython.com/python-virtual-environments-a-primer/) within this folder. + +Your download bundle contains the following four files. The first three files contain the code from different tutorial sections, while the fourth contains the solutions to the exercise. + +`drop_null_rows.py` +`drop_null_columns.py` +`drop_a_subset.py` +`exercise_solutions.py` + +There are also two data files containing the data used throughout the tutorial: + +`sales_data_with_missing_values.csv` +`grades.csv` + diff --git a/how-to-drop-null-values-in-pandas/drop_a_subset.py b/how-to-drop-null-values-in-pandas/drop_a_subset.py new file mode 100644 index 0000000000..f6e647e9c5 --- /dev/null +++ b/how-to-drop-null-values-in-pandas/drop_a_subset.py @@ -0,0 +1,16 @@ +import pandas as pd + +sales_data = pd.read_csv( + "sales_data_with_missing_values.csv", + parse_dates=["order_date"], + date_format="%d/%m/%Y", +).convert_dtypes(dtype_backend="pyarrow") + + +sales_data.dropna(axis=0, subset=(["discount", "sale_price"])) + +sales_data.dropna(how="all") + +sales_data.dropna(thresh=5) + +sales_data.dropna(thresh=5, ignore_index=True) diff --git a/how-to-drop-null-values-in-pandas/drop_null_columns.py b/how-to-drop-null-values-in-pandas/drop_null_columns.py new file mode 100644 index 0000000000..5982903b54 --- /dev/null +++ b/how-to-drop-null-values-in-pandas/drop_null_columns.py @@ -0,0 +1,9 @@ +import pandas as pd + +sales_data = pd.read_csv( + "sales_data_with_missing_values.csv", + parse_dates=["order_date"], + date_format="%d/%m/%Y", +).convert_dtypes(dtype_backend="pyarrow") + +sales_data.dropna(axis="columns") diff --git a/how-to-drop-null-values-in-pandas/drop_null_rows.py b/how-to-drop-null-values-in-pandas/drop_null_rows.py new file mode 100644 index 0000000000..2c438c7610 --- /dev/null +++ b/how-to-drop-null-values-in-pandas/drop_null_rows.py @@ -0,0 +1,17 @@ +import pandas as pd + +sales_data = pd.read_csv( + "sales_data_with_missing_values.csv", + parse_dates=["order_date"], + date_format="%d/%m/%Y", +).convert_dtypes(dtype_backend="pyarrow") + +sales_data + +sales_data.isna().sum() + +sales_data.dropna() + +clean_sales_data = sales_data.dropna() + +clean_sales_data = sales_data.dropna(inplace=True) diff --git a/how-to-drop-null-values-in-pandas/exercise solutions.py b/how-to-drop-null-values-in-pandas/exercise solutions.py new file mode 100644 index 0000000000..c6701340d3 --- /dev/null +++ b/how-to-drop-null-values-in-pandas/exercise solutions.py @@ -0,0 +1,23 @@ +grades = pd.read_csv( + "grades.csv", +).convert_dtypes(dtype_backend="pyarrow") + +# 1. Permanently drop the last row of the dataframe. + +grades.dropna(how="all", inplace=True) + +# 2. Display the rows for the exams that all students have completed. + +grades.dropna() + +# 3. Display any columns with no missing data. + +grades.dropna(axis=1) + +# 4. Display the exams students have sat five or more times. + +grades.dropna(axis=0, thresh=6) # Remember there are seven columns. + +# 5. Who else would be in the exam hall when both `S2` and `S4` were there? + +grades.dropna(subset=["S2", "S4"]).dropna(axis=1, ignore_index=True) diff --git a/how-to-drop-null-values-in-pandas/grades.csv b/how-to-drop-null-values-in-pandas/grades.csv new file mode 100644 index 0000000000..b716989147 --- /dev/null +++ b/how-to-drop-null-values-in-pandas/grades.csv @@ -0,0 +1,8 @@ +Subject,S1,S2,S3,S4,S5,S6 +math,18,,15,20,17,18 +science,26,35,19,,33, +art,15,,9,17,18,14 +music,14,20,12,20,13,18 +history,18,19,,17,,18 +sport,20,17,20,17,18 +,,,,, \ No newline at end of file diff --git a/how-to-drop-null-values-in-pandas/sales_data_with_missing_values.csv b/how-to-drop-null-values-in-pandas/sales_data_with_missing_values.csv new file mode 100644 index 0000000000..04036fafc2 --- /dev/null +++ b/how-to-drop-null-values-in-pandas/sales_data_with_missing_values.csv @@ -0,0 +1,12 @@ +order_number,order_date,customer_name,product_purchased,discount,sale_price +,09/02/2025,Skipton Fealty,Chili Extra Virgin Olive Oil,TRUE,135.00 +70041,,Carmine Priestnall,,,150.00 +70042,09/02/2025,,Rosemary Olive Oil Candle,FALSE,78.00 +70043,10/02/2025,Lanni D'Ambrogi,,TRUE,19.50 +70044,10/02/2025,Tann Angear,Vanilla and Olive Oil Candle,,13.98 +70045,10/02/2025,Skipton Fealty,Basil Extra Virgin Olive Oil,TRUE, +70046,11/02/2025,Far Pow,Chili Extra Virgin Olive Oil,FALSE,150.00 +70047,11/02/2025,Hill Group,Chili Extra Virgin Olive Oil,TRUE,135.00 +70048,11/02/2025,Devlin Nock,Lavender and Olive Oil Lotion,FALSE,39.96 +,,,,, +70049,12/02/2025,Swift Inc,Garlic Extra Virgin Olive Oil,TRUE,936.00 From 20a4c377f208c024805b1f18892c096516c11c3e Mon Sep 17 00:00:00 2001 From: eyrei123 Date: Sat, 21 Jun 2025 10:33:41 +0100 Subject: [PATCH 2/6] Pre-TR-Upload 2 - missing import --- how-to-drop-null-values-in-pandas/exercise solutions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/how-to-drop-null-values-in-pandas/exercise solutions.py b/how-to-drop-null-values-in-pandas/exercise solutions.py index c6701340d3..239366d025 100644 --- a/how-to-drop-null-values-in-pandas/exercise solutions.py +++ b/how-to-drop-null-values-in-pandas/exercise solutions.py @@ -1,3 +1,5 @@ +import pandas as pd + grades = pd.read_csv( "grades.csv", ).convert_dtypes(dtype_backend="pyarrow") From 8e3dc01aab7d99128d14848a7183828f79d3540b Mon Sep 17 00:00:00 2001 From: eyrei123 Date: Mon, 7 Jul 2025 20:31:04 +0100 Subject: [PATCH 3/6] post-TR1 Update attempt 1 --- how-to-drop-null-values-in-pandas/drop_a_subset.py | 3 +++ how-to-drop-null-values-in-pandas/drop_null_columns.py | 1 + how-to-drop-null-values-in-pandas/drop_null_rows.py | 2 ++ .../{exercise solutions.py => exercise_solutions.py} | 10 ++++------ 4 files changed, 10 insertions(+), 6 deletions(-) rename how-to-drop-null-values-in-pandas/{exercise solutions.py => exercise_solutions.py} (54%) diff --git a/how-to-drop-null-values-in-pandas/drop_a_subset.py b/how-to-drop-null-values-in-pandas/drop_a_subset.py index f6e647e9c5..a8e79aa0a6 100644 --- a/how-to-drop-null-values-in-pandas/drop_a_subset.py +++ b/how-to-drop-null-values-in-pandas/drop_a_subset.py @@ -1,5 +1,8 @@ import pandas as pd + +pd.set_option("display.max_columns", None) + sales_data = pd.read_csv( "sales_data_with_missing_values.csv", parse_dates=["order_date"], diff --git a/how-to-drop-null-values-in-pandas/drop_null_columns.py b/how-to-drop-null-values-in-pandas/drop_null_columns.py index 5982903b54..080458132c 100644 --- a/how-to-drop-null-values-in-pandas/drop_null_columns.py +++ b/how-to-drop-null-values-in-pandas/drop_null_columns.py @@ -1,5 +1,6 @@ import pandas as pd + sales_data = pd.read_csv( "sales_data_with_missing_values.csv", parse_dates=["order_date"], diff --git a/how-to-drop-null-values-in-pandas/drop_null_rows.py b/how-to-drop-null-values-in-pandas/drop_null_rows.py index 2c438c7610..7f68d3f425 100644 --- a/how-to-drop-null-values-in-pandas/drop_null_rows.py +++ b/how-to-drop-null-values-in-pandas/drop_null_rows.py @@ -1,5 +1,7 @@ import pandas as pd +pd.set_option("display.max_columns", None) + sales_data = pd.read_csv( "sales_data_with_missing_values.csv", parse_dates=["order_date"], diff --git a/how-to-drop-null-values-in-pandas/exercise solutions.py b/how-to-drop-null-values-in-pandas/exercise_solutions.py similarity index 54% rename from how-to-drop-null-values-in-pandas/exercise solutions.py rename to how-to-drop-null-values-in-pandas/exercise_solutions.py index 239366d025..21ecd1c410 100644 --- a/how-to-drop-null-values-in-pandas/exercise solutions.py +++ b/how-to-drop-null-values-in-pandas/exercise_solutions.py @@ -1,10 +1,8 @@ -import pandas as pd - grades = pd.read_csv( "grades.csv", ).convert_dtypes(dtype_backend="pyarrow") -# 1. Permanently drop the last row of the dataframe. +# 1. Use `.dropna()` in such a way that it permanently drops the row in the dataframe containing only null values. grades.dropna(how="all", inplace=True) @@ -16,10 +14,10 @@ grades.dropna(axis=1) -# 4. Display the exams students have sat five or more times. +# 4. Display the exams sat by at least five students. -grades.dropna(axis=0, thresh=6) # Remember there are seven columns. +grades.dropna(axis=0, thresh=6) # Remember there are seven columns. -# 5. Who else would be in the exam hall when both `S2` and `S4` were there? +# 5. Who else was in in every exam that both S2 and S4 sat? grades.dropna(subset=["S2", "S4"]).dropna(axis=1, ignore_index=True) From 914ac7d6a74aa60868e294510e624b6e99019798 Mon Sep 17 00:00:00 2001 From: eyrei123 Date: Mon, 7 Jul 2025 20:35:17 +0100 Subject: [PATCH 4/6] post-TR1 Update attempt 2 --- how-to-drop-null-values-in-pandas/drop_a_subset.py | 1 - how-to-drop-null-values-in-pandas/drop_null_columns.py | 1 - how-to-drop-null-values-in-pandas/exercise_solutions.py | 2 ++ 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/how-to-drop-null-values-in-pandas/drop_a_subset.py b/how-to-drop-null-values-in-pandas/drop_a_subset.py index a8e79aa0a6..c6d7094877 100644 --- a/how-to-drop-null-values-in-pandas/drop_a_subset.py +++ b/how-to-drop-null-values-in-pandas/drop_a_subset.py @@ -1,6 +1,5 @@ import pandas as pd - pd.set_option("display.max_columns", None) sales_data = pd.read_csv( diff --git a/how-to-drop-null-values-in-pandas/drop_null_columns.py b/how-to-drop-null-values-in-pandas/drop_null_columns.py index 080458132c..5982903b54 100644 --- a/how-to-drop-null-values-in-pandas/drop_null_columns.py +++ b/how-to-drop-null-values-in-pandas/drop_null_columns.py @@ -1,6 +1,5 @@ import pandas as pd - sales_data = pd.read_csv( "sales_data_with_missing_values.csv", parse_dates=["order_date"], diff --git a/how-to-drop-null-values-in-pandas/exercise_solutions.py b/how-to-drop-null-values-in-pandas/exercise_solutions.py index 21ecd1c410..99fe600208 100644 --- a/how-to-drop-null-values-in-pandas/exercise_solutions.py +++ b/how-to-drop-null-values-in-pandas/exercise_solutions.py @@ -1,3 +1,5 @@ +import pandas as pd + grades = pd.read_csv( "grades.csv", ).convert_dtypes(dtype_backend="pyarrow") From 43ad39e199b2b9cddc55ba2c27ac1e62062c4d55 Mon Sep 17 00:00:00 2001 From: eyrei123 Date: Mon, 7 Jul 2025 20:39:06 +0100 Subject: [PATCH 5/6] post-TR1 Update attempt 3 --- how-to-drop-null-values-in-pandas/exercise_solutions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/how-to-drop-null-values-in-pandas/exercise_solutions.py b/how-to-drop-null-values-in-pandas/exercise_solutions.py index 99fe600208..a1060cb9cc 100644 --- a/how-to-drop-null-values-in-pandas/exercise_solutions.py +++ b/how-to-drop-null-values-in-pandas/exercise_solutions.py @@ -18,7 +18,7 @@ # 4. Display the exams sat by at least five students. -grades.dropna(axis=0, thresh=6) # Remember there are seven columns. +grades.dropna(axis=0, thresh=6) # Remember there are seven columns. # 5. Who else was in in every exam that both S2 and S4 sat? From 3485cb71250c0ca5211ac1296e92d9128df9a53c Mon Sep 17 00:00:00 2001 From: stephengruppetta <51741022+stephengruppetta@users.noreply.github.com> Date: Fri, 1 Aug 2025 11:02:16 +0100 Subject: [PATCH 6/6] Update README.md --- how-to-drop-null-values-in-pandas/README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/how-to-drop-null-values-in-pandas/README.md b/how-to-drop-null-values-in-pandas/README.md index 253f1dd88d..a56987f824 100644 --- a/how-to-drop-null-values-in-pandas/README.md +++ b/how-to-drop-null-values-in-pandas/README.md @@ -1,6 +1,6 @@ -The materials contained in this download are designed to complement the RealPython tutorial [How to Drop Null Values in pandas](https://realpython.com/how-to-drop-null-values-in-pandas/). +The materials contained in this download are designed to complement the Real Python tutorial [How to Drop Null Values in pandas](https://realpython.com/how-to-drop-null-values-in-pandas/). -You should create a new folder named pandas_nulls on your computer and place each file inside it. You may also consider creating a [Python virtual environment](https://realpython.com/python-virtual-environments-a-primer/) within this folder. +You should create a new folder named `pandas_nulls` on your computer and place each file inside it. You may also consider creating a [Python virtual environment](https://realpython.com/python-virtual-environments-a-primer/) within this folder. Your download bundle contains the following four files. The first three files contain the code from different tutorial sections, while the fourth contains the solutions to the exercise. @@ -13,4 +13,3 @@ There are also two data files containing the data used throughout the tutorial: `sales_data_with_missing_values.csv` `grades.csv` -