From afc0555be1c7fa8a02301cca7a43a767b1b37e45 Mon Sep 17 00:00:00 2001 From: eyrei123 Date: Sat, 21 Jun 2025 10:30:26 +0100 Subject: [PATCH 1/5] Pre-TR-Upload 1 --- how-to-drop-null-values-in-pandas/README.md | 16 +++++++++++++ .../drop_a_subset.py | 16 +++++++++++++ .../drop_null_columns.py | 9 ++++++++ .../drop_null_rows.py | 17 ++++++++++++++ .../exercise solutions.py | 23 +++++++++++++++++++ how-to-drop-null-values-in-pandas/grades.csv | 8 +++++++ .../sales_data_with_missing_values.csv | 12 ++++++++++ 7 files changed, 101 insertions(+) create mode 100644 how-to-drop-null-values-in-pandas/README.md create mode 100644 how-to-drop-null-values-in-pandas/drop_a_subset.py create mode 100644 how-to-drop-null-values-in-pandas/drop_null_columns.py create mode 100644 how-to-drop-null-values-in-pandas/drop_null_rows.py create mode 100644 how-to-drop-null-values-in-pandas/exercise solutions.py create mode 100644 how-to-drop-null-values-in-pandas/grades.csv create mode 100644 how-to-drop-null-values-in-pandas/sales_data_with_missing_values.csv diff --git a/how-to-drop-null-values-in-pandas/README.md b/how-to-drop-null-values-in-pandas/README.md new file mode 100644 index 0000000000..253f1dd88d --- /dev/null +++ b/how-to-drop-null-values-in-pandas/README.md @@ -0,0 +1,16 @@ +The materials contained in this download are designed to complement the RealPython tutorial [How to Drop Null Values in pandas](https://realpython.com/how-to-drop-null-values-in-pandas/). + +You should create a new folder named pandas_nulls on your computer and place each file inside it. You may also consider creating a [Python virtual environment](https://realpython.com/python-virtual-environments-a-primer/) within this folder. + +Your download bundle contains the following four files. The first three files contain the code from different tutorial sections, while the fourth contains the solutions to the exercise. + +`drop_null_rows.py` +`drop_null_columns.py` +`drop_a_subset.py` +`exercise_solutions.py` + +There are also two data files containing the data used throughout the tutorial: + +`sales_data_with_missing_values.csv` +`grades.csv` + diff --git a/how-to-drop-null-values-in-pandas/drop_a_subset.py b/how-to-drop-null-values-in-pandas/drop_a_subset.py new file mode 100644 index 0000000000..f6e647e9c5 --- /dev/null +++ b/how-to-drop-null-values-in-pandas/drop_a_subset.py @@ -0,0 +1,16 @@ +import pandas as pd + +sales_data = pd.read_csv( + "sales_data_with_missing_values.csv", + parse_dates=["order_date"], + date_format="%d/%m/%Y", +).convert_dtypes(dtype_backend="pyarrow") + + +sales_data.dropna(axis=0, subset=(["discount", "sale_price"])) + +sales_data.dropna(how="all") + +sales_data.dropna(thresh=5) + +sales_data.dropna(thresh=5, ignore_index=True) diff --git a/how-to-drop-null-values-in-pandas/drop_null_columns.py b/how-to-drop-null-values-in-pandas/drop_null_columns.py new file mode 100644 index 0000000000..5982903b54 --- /dev/null +++ b/how-to-drop-null-values-in-pandas/drop_null_columns.py @@ -0,0 +1,9 @@ +import pandas as pd + +sales_data = pd.read_csv( + "sales_data_with_missing_values.csv", + parse_dates=["order_date"], + date_format="%d/%m/%Y", +).convert_dtypes(dtype_backend="pyarrow") + +sales_data.dropna(axis="columns") diff --git a/how-to-drop-null-values-in-pandas/drop_null_rows.py b/how-to-drop-null-values-in-pandas/drop_null_rows.py new file mode 100644 index 0000000000..2c438c7610 --- /dev/null +++ b/how-to-drop-null-values-in-pandas/drop_null_rows.py @@ -0,0 +1,17 @@ +import pandas as pd + +sales_data = pd.read_csv( + "sales_data_with_missing_values.csv", + parse_dates=["order_date"], + date_format="%d/%m/%Y", +).convert_dtypes(dtype_backend="pyarrow") + +sales_data + +sales_data.isna().sum() + +sales_data.dropna() + +clean_sales_data = sales_data.dropna() + +clean_sales_data = sales_data.dropna(inplace=True) diff --git a/how-to-drop-null-values-in-pandas/exercise solutions.py b/how-to-drop-null-values-in-pandas/exercise solutions.py new file mode 100644 index 0000000000..c6701340d3 --- /dev/null +++ b/how-to-drop-null-values-in-pandas/exercise solutions.py @@ -0,0 +1,23 @@ +grades = pd.read_csv( + "grades.csv", +).convert_dtypes(dtype_backend="pyarrow") + +# 1. Permanently drop the last row of the dataframe. + +grades.dropna(how="all", inplace=True) + +# 2. Display the rows for the exams that all students have completed. + +grades.dropna() + +# 3. Display any columns with no missing data. + +grades.dropna(axis=1) + +# 4. Display the exams students have sat five or more times. + +grades.dropna(axis=0, thresh=6) # Remember there are seven columns. + +# 5. Who else would be in the exam hall when both `S2` and `S4` were there? + +grades.dropna(subset=["S2", "S4"]).dropna(axis=1, ignore_index=True) diff --git a/how-to-drop-null-values-in-pandas/grades.csv b/how-to-drop-null-values-in-pandas/grades.csv new file mode 100644 index 0000000000..b716989147 --- /dev/null +++ b/how-to-drop-null-values-in-pandas/grades.csv @@ -0,0 +1,8 @@ +Subject,S1,S2,S3,S4,S5,S6 +math,18,,15,20,17,18 +science,26,35,19,,33, +art,15,,9,17,18,14 +music,14,20,12,20,13,18 +history,18,19,,17,,18 +sport,20,17,20,17,18 +,,,,, \ No newline at end of file diff --git a/how-to-drop-null-values-in-pandas/sales_data_with_missing_values.csv b/how-to-drop-null-values-in-pandas/sales_data_with_missing_values.csv new file mode 100644 index 0000000000..04036fafc2 --- /dev/null +++ b/how-to-drop-null-values-in-pandas/sales_data_with_missing_values.csv @@ -0,0 +1,12 @@ +order_number,order_date,customer_name,product_purchased,discount,sale_price +,09/02/2025,Skipton Fealty,Chili Extra Virgin Olive Oil,TRUE,135.00 +70041,,Carmine Priestnall,,,150.00 +70042,09/02/2025,,Rosemary Olive Oil Candle,FALSE,78.00 +70043,10/02/2025,Lanni D'Ambrogi,,TRUE,19.50 +70044,10/02/2025,Tann Angear,Vanilla and Olive Oil Candle,,13.98 +70045,10/02/2025,Skipton Fealty,Basil Extra Virgin Olive Oil,TRUE, +70046,11/02/2025,Far Pow,Chili Extra Virgin Olive Oil,FALSE,150.00 +70047,11/02/2025,Hill Group,Chili Extra Virgin Olive Oil,TRUE,135.00 +70048,11/02/2025,Devlin Nock,Lavender and Olive Oil Lotion,FALSE,39.96 +,,,,, +70049,12/02/2025,Swift Inc,Garlic Extra Virgin Olive Oil,TRUE,936.00 From 20a4c377f208c024805b1f18892c096516c11c3e Mon Sep 17 00:00:00 2001 From: eyrei123 Date: Sat, 21 Jun 2025 10:33:41 +0100 Subject: [PATCH 2/5] Pre-TR-Upload 2 - missing import --- how-to-drop-null-values-in-pandas/exercise solutions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/how-to-drop-null-values-in-pandas/exercise solutions.py b/how-to-drop-null-values-in-pandas/exercise solutions.py index c6701340d3..239366d025 100644 --- a/how-to-drop-null-values-in-pandas/exercise solutions.py +++ b/how-to-drop-null-values-in-pandas/exercise solutions.py @@ -1,3 +1,5 @@ +import pandas as pd + grades = pd.read_csv( "grades.csv", ).convert_dtypes(dtype_backend="pyarrow") From 8e3dc01aab7d99128d14848a7183828f79d3540b Mon Sep 17 00:00:00 2001 From: eyrei123 Date: Mon, 7 Jul 2025 20:31:04 +0100 Subject: [PATCH 3/5] post-TR1 Update attempt 1 --- how-to-drop-null-values-in-pandas/drop_a_subset.py | 3 +++ how-to-drop-null-values-in-pandas/drop_null_columns.py | 1 + how-to-drop-null-values-in-pandas/drop_null_rows.py | 2 ++ .../{exercise solutions.py => exercise_solutions.py} | 10 ++++------ 4 files changed, 10 insertions(+), 6 deletions(-) rename how-to-drop-null-values-in-pandas/{exercise solutions.py => exercise_solutions.py} (54%) diff --git a/how-to-drop-null-values-in-pandas/drop_a_subset.py b/how-to-drop-null-values-in-pandas/drop_a_subset.py index f6e647e9c5..a8e79aa0a6 100644 --- a/how-to-drop-null-values-in-pandas/drop_a_subset.py +++ b/how-to-drop-null-values-in-pandas/drop_a_subset.py @@ -1,5 +1,8 @@ import pandas as pd + +pd.set_option("display.max_columns", None) + sales_data = pd.read_csv( "sales_data_with_missing_values.csv", parse_dates=["order_date"], diff --git a/how-to-drop-null-values-in-pandas/drop_null_columns.py b/how-to-drop-null-values-in-pandas/drop_null_columns.py index 5982903b54..080458132c 100644 --- a/how-to-drop-null-values-in-pandas/drop_null_columns.py +++ b/how-to-drop-null-values-in-pandas/drop_null_columns.py @@ -1,5 +1,6 @@ import pandas as pd + sales_data = pd.read_csv( "sales_data_with_missing_values.csv", parse_dates=["order_date"], diff --git a/how-to-drop-null-values-in-pandas/drop_null_rows.py b/how-to-drop-null-values-in-pandas/drop_null_rows.py index 2c438c7610..7f68d3f425 100644 --- a/how-to-drop-null-values-in-pandas/drop_null_rows.py +++ b/how-to-drop-null-values-in-pandas/drop_null_rows.py @@ -1,5 +1,7 @@ import pandas as pd +pd.set_option("display.max_columns", None) + sales_data = pd.read_csv( "sales_data_with_missing_values.csv", parse_dates=["order_date"], diff --git a/how-to-drop-null-values-in-pandas/exercise solutions.py b/how-to-drop-null-values-in-pandas/exercise_solutions.py similarity index 54% rename from how-to-drop-null-values-in-pandas/exercise solutions.py rename to how-to-drop-null-values-in-pandas/exercise_solutions.py index 239366d025..21ecd1c410 100644 --- a/how-to-drop-null-values-in-pandas/exercise solutions.py +++ b/how-to-drop-null-values-in-pandas/exercise_solutions.py @@ -1,10 +1,8 @@ -import pandas as pd - grades = pd.read_csv( "grades.csv", ).convert_dtypes(dtype_backend="pyarrow") -# 1. Permanently drop the last row of the dataframe. +# 1. Use `.dropna()` in such a way that it permanently drops the row in the dataframe containing only null values. grades.dropna(how="all", inplace=True) @@ -16,10 +14,10 @@ grades.dropna(axis=1) -# 4. Display the exams students have sat five or more times. +# 4. Display the exams sat by at least five students. -grades.dropna(axis=0, thresh=6) # Remember there are seven columns. +grades.dropna(axis=0, thresh=6) # Remember there are seven columns. -# 5. Who else would be in the exam hall when both `S2` and `S4` were there? +# 5. Who else was in in every exam that both S2 and S4 sat? grades.dropna(subset=["S2", "S4"]).dropna(axis=1, ignore_index=True) From 914ac7d6a74aa60868e294510e624b6e99019798 Mon Sep 17 00:00:00 2001 From: eyrei123 Date: Mon, 7 Jul 2025 20:35:17 +0100 Subject: [PATCH 4/5] post-TR1 Update attempt 2 --- how-to-drop-null-values-in-pandas/drop_a_subset.py | 1 - how-to-drop-null-values-in-pandas/drop_null_columns.py | 1 - how-to-drop-null-values-in-pandas/exercise_solutions.py | 2 ++ 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/how-to-drop-null-values-in-pandas/drop_a_subset.py b/how-to-drop-null-values-in-pandas/drop_a_subset.py index a8e79aa0a6..c6d7094877 100644 --- a/how-to-drop-null-values-in-pandas/drop_a_subset.py +++ b/how-to-drop-null-values-in-pandas/drop_a_subset.py @@ -1,6 +1,5 @@ import pandas as pd - pd.set_option("display.max_columns", None) sales_data = pd.read_csv( diff --git a/how-to-drop-null-values-in-pandas/drop_null_columns.py b/how-to-drop-null-values-in-pandas/drop_null_columns.py index 080458132c..5982903b54 100644 --- a/how-to-drop-null-values-in-pandas/drop_null_columns.py +++ b/how-to-drop-null-values-in-pandas/drop_null_columns.py @@ -1,6 +1,5 @@ import pandas as pd - sales_data = pd.read_csv( "sales_data_with_missing_values.csv", parse_dates=["order_date"], diff --git a/how-to-drop-null-values-in-pandas/exercise_solutions.py b/how-to-drop-null-values-in-pandas/exercise_solutions.py index 21ecd1c410..99fe600208 100644 --- a/how-to-drop-null-values-in-pandas/exercise_solutions.py +++ b/how-to-drop-null-values-in-pandas/exercise_solutions.py @@ -1,3 +1,5 @@ +import pandas as pd + grades = pd.read_csv( "grades.csv", ).convert_dtypes(dtype_backend="pyarrow") From 43ad39e199b2b9cddc55ba2c27ac1e62062c4d55 Mon Sep 17 00:00:00 2001 From: eyrei123 Date: Mon, 7 Jul 2025 20:39:06 +0100 Subject: [PATCH 5/5] post-TR1 Update attempt 3 --- how-to-drop-null-values-in-pandas/exercise_solutions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/how-to-drop-null-values-in-pandas/exercise_solutions.py b/how-to-drop-null-values-in-pandas/exercise_solutions.py index 99fe600208..a1060cb9cc 100644 --- a/how-to-drop-null-values-in-pandas/exercise_solutions.py +++ b/how-to-drop-null-values-in-pandas/exercise_solutions.py @@ -18,7 +18,7 @@ # 4. Display the exams sat by at least five students. -grades.dropna(axis=0, thresh=6) # Remember there are seven columns. +grades.dropna(axis=0, thresh=6) # Remember there are seven columns. # 5. Who else was in in every exam that both S2 and S4 sat?