From 233f93f09cee686a30cef2e346ac01c8da7d7c64 Mon Sep 17 00:00:00 2001 From: thwait Date: Thu, 31 Jul 2025 22:38:34 -0700 Subject: [PATCH] DOC: Add example for multi-column joins using `merge` --- .../comparison/comparison_with_sql.rst | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/doc/source/getting_started/comparison/comparison_with_sql.rst b/doc/source/getting_started/comparison/comparison_with_sql.rst index dc0590f18751a..d317b920e7372 100644 --- a/doc/source/getting_started/comparison/comparison_with_sql.rst +++ b/doc/source/getting_started/comparison/comparison_with_sql.rst @@ -270,6 +270,42 @@ column with another DataFrame's index. indexed_df2 = df2.set_index("key") pd.merge(df1, indexed_df2, left_on="key", right_index=True) +:meth:`~pandas.merge` also supports joining on multiple columns by passing a list of column names. + +.. code-block:: sql + + SELECT * + FROM df1_multi + INNER JOIN df2_multi + ON df1_multi.key1 = df2_multi.key1 + AND df1_multi.key2 = df2_multi.key2; + +.. ipython:: python + + df1_multi = pd.DataFrame({ + "key1": ["A", "B", "C", "D"], + "key2": [1, 2, 3, 4], + "value": np.random.randn(4) + }) + df2_multi = pd.DataFrame({ + "key1": ["B", "D", "D", "E"], + "key2": [2, 4, 4, 5], + "value": np.random.randn(4) + }) + pd.merge(df1_multi, df2_multi, on=["key1", "key2"]) + +If the columns have different names between DataFrames, on can be replaced with left_on and +right_on. + +.. ipython:: python + + df2_multi = pd.DataFrame({ + "key_1": ["B", "D", "D", "E"], + "key_2": [2, 4, 4, 5], + "value": np.random.randn(4) + }) + pd.merge(df1_multi, df2_multi, left_on=["key1", "key2"], right_on=["key_1", "key_2"]) + LEFT OUTER JOIN ~~~~~~~~~~~~~~~