diff --git a/how-to-drop-null-values-in-pandas/README.md b/how-to-drop-null-values-in-pandas/README.md new file mode 100644 index 0000000000..253f1dd88d --- /dev/null +++ b/how-to-drop-null-values-in-pandas/README.md @@ -0,0 +1,16 @@ +The materials contained in this download are designed to complement the RealPython tutorial [How to Drop Null Values in pandas](https://realpython.com/how-to-drop-null-values-in-pandas/). + +You should create a new folder named pandas_nulls on your computer and place each file inside it. You may also consider creating a [Python virtual environment](https://realpython.com/python-virtual-environments-a-primer/) within this folder. + +Your download bundle contains the following four files. The first three files contain the code from different tutorial sections, while the fourth contains the solutions to the exercise. + +`drop_null_rows.py` +`drop_null_columns.py` +`drop_a_subset.py` +`exercise_solutions.py` + +There are also two data files containing the data used throughout the tutorial: + +`sales_data_with_missing_values.csv` +`grades.csv` + diff --git a/how-to-drop-null-values-in-pandas/drop_a_subset.py b/how-to-drop-null-values-in-pandas/drop_a_subset.py new file mode 100644 index 0000000000..c6d7094877 --- /dev/null +++ b/how-to-drop-null-values-in-pandas/drop_a_subset.py @@ -0,0 +1,18 @@ +import pandas as pd + +pd.set_option("display.max_columns", None) + +sales_data = pd.read_csv( + "sales_data_with_missing_values.csv", + parse_dates=["order_date"], + date_format="%d/%m/%Y", +).convert_dtypes(dtype_backend="pyarrow") + + +sales_data.dropna(axis=0, subset=(["discount", "sale_price"])) + +sales_data.dropna(how="all") + +sales_data.dropna(thresh=5) + +sales_data.dropna(thresh=5, ignore_index=True) diff --git a/how-to-drop-null-values-in-pandas/drop_null_columns.py b/how-to-drop-null-values-in-pandas/drop_null_columns.py new file mode 100644 index 0000000000..5982903b54 --- /dev/null +++ b/how-to-drop-null-values-in-pandas/drop_null_columns.py @@ -0,0 +1,9 @@ +import pandas as pd + +sales_data = pd.read_csv( + "sales_data_with_missing_values.csv", + parse_dates=["order_date"], + date_format="%d/%m/%Y", +).convert_dtypes(dtype_backend="pyarrow") + +sales_data.dropna(axis="columns") diff --git a/how-to-drop-null-values-in-pandas/drop_null_rows.py b/how-to-drop-null-values-in-pandas/drop_null_rows.py new file mode 100644 index 0000000000..7f68d3f425 --- /dev/null +++ b/how-to-drop-null-values-in-pandas/drop_null_rows.py @@ -0,0 +1,19 @@ +import pandas as pd + +pd.set_option("display.max_columns", None) + +sales_data = pd.read_csv( + "sales_data_with_missing_values.csv", + parse_dates=["order_date"], + date_format="%d/%m/%Y", +).convert_dtypes(dtype_backend="pyarrow") + +sales_data + +sales_data.isna().sum() + +sales_data.dropna() + +clean_sales_data = sales_data.dropna() + +clean_sales_data = sales_data.dropna(inplace=True) diff --git a/how-to-drop-null-values-in-pandas/exercise_solutions.py b/how-to-drop-null-values-in-pandas/exercise_solutions.py new file mode 100644 index 0000000000..a1060cb9cc --- /dev/null +++ b/how-to-drop-null-values-in-pandas/exercise_solutions.py @@ -0,0 +1,25 @@ +import pandas as pd + +grades = pd.read_csv( + "grades.csv", +).convert_dtypes(dtype_backend="pyarrow") + +# 1. Use `.dropna()` in such a way that it permanently drops the row in the dataframe containing only null values. + +grades.dropna(how="all", inplace=True) + +# 2. Display the rows for the exams that all students have completed. + +grades.dropna() + +# 3. Display any columns with no missing data. + +grades.dropna(axis=1) + +# 4. Display the exams sat by at least five students. + +grades.dropna(axis=0, thresh=6) # Remember there are seven columns. + +# 5. Who else was in in every exam that both S2 and S4 sat? + +grades.dropna(subset=["S2", "S4"]).dropna(axis=1, ignore_index=True) diff --git a/how-to-drop-null-values-in-pandas/grades.csv b/how-to-drop-null-values-in-pandas/grades.csv new file mode 100644 index 0000000000..b716989147 --- /dev/null +++ b/how-to-drop-null-values-in-pandas/grades.csv @@ -0,0 +1,8 @@ +Subject,S1,S2,S3,S4,S5,S6 +math,18,,15,20,17,18 +science,26,35,19,,33, +art,15,,9,17,18,14 +music,14,20,12,20,13,18 +history,18,19,,17,,18 +sport,20,17,20,17,18 +,,,,, \ No newline at end of file diff --git a/how-to-drop-null-values-in-pandas/sales_data_with_missing_values.csv b/how-to-drop-null-values-in-pandas/sales_data_with_missing_values.csv new file mode 100644 index 0000000000..04036fafc2 --- /dev/null +++ b/how-to-drop-null-values-in-pandas/sales_data_with_missing_values.csv @@ -0,0 +1,12 @@ +order_number,order_date,customer_name,product_purchased,discount,sale_price +,09/02/2025,Skipton Fealty,Chili Extra Virgin Olive Oil,TRUE,135.00 +70041,,Carmine Priestnall,,,150.00 +70042,09/02/2025,,Rosemary Olive Oil Candle,FALSE,78.00 +70043,10/02/2025,Lanni D'Ambrogi,,TRUE,19.50 +70044,10/02/2025,Tann Angear,Vanilla and Olive Oil Candle,,13.98 +70045,10/02/2025,Skipton Fealty,Basil Extra Virgin Olive Oil,TRUE, +70046,11/02/2025,Far Pow,Chili Extra Virgin Olive Oil,FALSE,150.00 +70047,11/02/2025,Hill Group,Chili Extra Virgin Olive Oil,TRUE,135.00 +70048,11/02/2025,Devlin Nock,Lavender and Olive Oil Lotion,FALSE,39.96 +,,,,, +70049,12/02/2025,Swift Inc,Garlic Extra Virgin Olive Oil,TRUE,936.00