go-gota · julienrbrt · Nov 24, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,32 +1,48 @@
 # Change Log
+
 All notable changes to this project will be documented in this file.
 This project adheres to [Semantic Versioning](http://semver.org/).
 
+## To Release
+
+## Changed
+
+- [BREAKING] Changed API for joining dataframe. Dataframes can now be joined with two different column name.
+
 ## [0.10.1] - 2019-11-08
+
 ### Fixed
+
 - LoadRecords printing type debug information
 - Missing closing brackets in series.go
 - Fix gonum import path in dataframe_test
 
 ## [0.10.0] - 2019-11-08
+
 ### Changed
+
 - Merged dev branch changes from multiple collaborators (Sam Zaydel, Kyle
   Ellrott, Daniela Petruzalek, Christoph Laaber).
 
 ## [0.9.0] - 2016-10-03
+
 ### Added
+
 - Additional method to load arbitrary struct slices to DataFrames (Juan Álvarez)
 - New LoadOption Names to set initial column names (Sander van Harmelen).
 - Parser option for csv delimiter (Kyle Ellrott)
 - New Describe method for reporting summary statistics (Daniela Petruzalek)
 
 ### Changed
+
 - Improve the performance of multiple operations.
 - Code cleanup for better consistency (Sander van Harmelen)
 - Renamed 'Deselect' function to 'Drop' (Ben Marshall)
 
 ## [0.8.0] - 2016-12-12
+
 ### Added
+
 - Series.Order method and tests.
 - Series.IsNaN method and tests.
 - DataFrame.Arrange method and tests.
@@ -42,6 +58,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).
   `dataframe.LoadMatrix()`.
 
 ### Changed
+
 - elementInterface is now exported as Element.
 - Split element.go into separate files for the implementations of the
   Element interface.
@@ -57,9 +74,11 @@ This project adheres to [Semantic Versioning](http://semver.org/).
   the number of characters that can be shown by line
 
 ### Removed
+
 - Some unused functions from the helpers.go file.
 
 ### Fix
+
 - Linter errors.
 - stringElement.Float now returns NaN instead of 0 when applicable.
 - Autorenaming column names when `hasHeaders == false` now is
@@ -68,7 +87,9 @@ This project adheres to [Semantic Versioning](http://semver.org/).
   suffix numbers if the number of duplicates was greater than two.
 
 ## [0.7.0] - 2016-11-27
+
 ### Added
+
 - Many more table tests for both `series` and `dataframe`
 - Set method for `Series` and `DataFrame`
 - When loading data from CSV, JSON, or Records, different
@@ -77,6 +98,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).
 - More documentation for previously undocumented functions.
 
 ### Changed
+
 - The project has been restructured on separated `dataframe` and
   `series` packages.
 - Reviewed entire `Series` codebase for better style and
@@ -90,22 +112,29 @@ This project adheres to [Semantic Versioning](http://semver.org/).
 - Updated README with the new changes.
 
 ### Removed
+
 - Removed unnecessary abstraction layer on `Series.elements`
 
 ## [0.6.0] - 2016-10-29
+
 ### Added
+
 - InnerJoin, CrossJoin, RightJoin, LeftJoin, OuterJoin functions
 
 ### Changed
+
 - More code refactoring for easier maintenance and management
 - Add more documentation to the exported functions
 - Remove unnecessary methods and structures from the exported API
 
 ### Removed
+
 - colnames and coltypes from the DataFrame structure
 
 ## [0.5.0] - 2016-08-09
+
 ### Added
+
 - Read and write DataFrames from CSV, JSON, []map[string]interface{},
   [][]string.
 - New constructor for DataFrame accept Series and NamedSeries as
@@ -114,17 +143,21 @@ This project adheres to [Semantic Versioning](http://semver.org/).
 - Much Better error handling
 
 ### Changed
+
 - Almost complete rewrite of DataFrame code.
 - Now using Series as first class citizens and building blocks for
   DataFrames.
 
 ### Removed
+
 - Merge/Join functions have been temporarily removed to be adapted to
   the new architecture.
 - Cell interface for allowing custom types into the system.
 
 ## [0.4.0] - 2016-02-18
+
 ### Added
+
 - Getter methods for nrows and ncols.
 - An InnerJoin function that performs an Inner Merge/Join of two
   DataFrames by the given keys.
@@ -137,37 +170,43 @@ This project adheres to [Semantic Versioning](http://semver.org/).
 - Cell interface now have to implement a Copy method.
 
 ### Changed
+
 - The `cell` interface is now exported: `Cell`.
 - Cell method NA() is now IsNA().
 - The function parseColumn is now a method.
 - A number of fields and methods are now expoted.
 
 ### Fixed
+
 - Now ensuring that generated subsets are in fact new copies entirely,
   not copying pointers to the same memory address.
 
 ## [0.3.0] - 2016-02-18
+
 ### Added
+
 - Getter and setter methods for the column names of a DataFrame
 - Bool column type has been made available
 - New Bool() interface
 - A `column` now can now if any of it's elements is NA and a list of
   said NA elements ([]bool).
 
 ### Changed
+
 - Renamed `cell` interface elements to be more idiomatic:
-    - ToInteger() is now Int()
-    - ToFloat() is now Float()
+  - ToInteger() is now Int()
+  - ToFloat() is now Float()
 - The `cell` interface has changed. Int() and Float() now
   return pointers instead of values to prevent future conflicts when
-  returning an error. 
+  returning an error.
 - The `cell` interface has changed. Checksum() [16]byte added.
 - Using cell.Checksum() for identification of unique elements instead
   of raw strings.
 - The `cell` interface has changed, now also requires ToBool() method.
 - String type now does not contain a string, but a pointer to a string.
 
 ### Fixed
+
 - Bool type constructor function Bools now parses `bool` and `[]bool`
   elements correctly.
 - Int type constructor function Ints now parses `bool` and `[]bool`
@@ -178,39 +217,45 @@ This project adheres to [Semantic Versioning](http://semver.org/).
   elements correctly.
 
 ## [0.2.1] - 2016-02-14
+
 ### Fixed
+
 - Fixed a bug when the maximum number of characters on a column was
   not being updated properly when subsetting.
 
 ## [0.2.0] - 2016-02-13
+
 ### Added
+
 - Added a lot of unit tests
 
 ### Changed
+
 - The base types are now `df.String`, `df.Int`, and `df.Float`.
 - Restructured the project in different files.
 - Refactored the project so that it will allow columns to be of any
   type as long as it complies with the necessary interfaces.
 
-
 ## [0.1.0] - 2016-02-06
+
 ### Added
+
 - Load csv data to DataFrame.
 - Parse data to four supported types: `int`, `float64`, `date`
   & `string`.
 - Row/Column subsetting (Indexing, column names, row numbers, range).
 - Unique/Duplicated row subsetting.
 - DataFrame combinations by rows and columns (cbind/rbind).
 
-[0.1.0]:https://github.com/go-gota/gota/compare/v0.1.0...v0.1.0
-[0.2.0]:https://github.com/go-gota/gota/compare/v0.1.0...v0.2.0
-[0.2.1]:https://github.com/go-gota/gota/compare/v0.2.0...v0.2.1
-[0.3.0]:https://github.com/go-gota/gota/compare/v0.2.1...v0.3.0
-[0.4.0]:https://github.com/go-gota/gota/compare/v0.3.0...v0.4.0
-[0.5.0]:https://github.com/go-gota/gota/compare/v0.4.0...v0.5.0
-[0.6.0]:https://github.com/go-gota/gota/compare/v0.5.0...v0.6.0
-[0.7.0]:https://github.com/go-gota/gota/compare/v0.6.0...v0.7.0
-[0.8.0]:https://github.com/go-gota/gota/compare/v0.7.0...v0.8.0
-[0.9.0]:https://github.com/go-gota/gota/compare/v0.8.0...v0.9.0
-[0.10.0]:https://github.com/go-gota/gota/compare/v0.9.0...v0.10.0
-[0.10.1]:https://github.com/go-gota/gota/compare/v0.10.0...v0.10.1
+[0.1.0]: https://github.com/go-gota/gota/compare/v0.1.0...v0.1.0
+[0.2.0]: https://github.com/go-gota/gota/compare/v0.1.0...v0.2.0
+[0.2.1]: https://github.com/go-gota/gota/compare/v0.2.0...v0.2.1
+[0.3.0]: https://github.com/go-gota/gota/compare/v0.2.1...v0.3.0
+[0.4.0]: https://github.com/go-gota/gota/compare/v0.3.0...v0.4.0
+[0.5.0]: https://github.com/go-gota/gota/compare/v0.4.0...v0.5.0
+[0.6.0]: https://github.com/go-gota/gota/compare/v0.5.0...v0.6.0
+[0.7.0]: https://github.com/go-gota/gota/compare/v0.6.0...v0.7.0
+[0.8.0]: https://github.com/go-gota/gota/compare/v0.7.0...v0.8.0
+[0.9.0]: https://github.com/go-gota/gota/compare/v0.8.0...v0.9.0
+[0.10.0]: https://github.com/go-gota/gota/compare/v0.9.0...v0.10.0
+[0.10.1]: https://github.com/go-gota/gota/compare/v0.10.0...v0.10.1
diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go
@@ -1198,7 +1198,7 @@ func ReadCSV(r io.Reader, options ...LoadOption) DataFrame {
 // resulting records.
 func ReadJSON(r io.Reader, options ...LoadOption) DataFrame {
 	var m []map[string]interface{}
-	d:=json.NewDecoder(r)
+	d := json.NewDecoder(r)
 	d.UseNumber()
 	err := d.Decode(&m)
 	if err != nil {
@@ -1443,8 +1443,21 @@ func (df DataFrame) Col(colname string) series.Series {
 	return df.columns[idx].Copy()
 }
 
+// MergeBy permits to merge (join) two dataframes using different column name
+type MergeBy struct {
+	Left, Right string
+}
+
+// JoinColumn is an utility function that permits to create a MergeBy of a column with the same name
+func JoinColumn(key string) MergeBy {
+	return MergeBy{
+		Left:  key,
+		Right: key,
+	}
+}
+
 // InnerJoin returns a DataFrame containing the inner join of two DataFrames.
-func (df DataFrame) InnerJoin(b DataFrame, keys ...string) DataFrame {
+func (df DataFrame) InnerJoin(b DataFrame, keys ...MergeBy) DataFrame {
 	if len(keys) == 0 {
 		return DataFrame{Err: fmt.Errorf("join keys not specified")}
 	}
@@ -1453,12 +1466,12 @@ func (df DataFrame) InnerJoin(b DataFrame, keys ...string) DataFrame {
 	var iKeysB []int
 	var errorArr []string
 	for _, key := range keys {
-		i := df.colIndex(key)
+		i := df.colIndex(key.Left)
 		if i < 0 {
 			errorArr = append(errorArr, fmt.Sprintf("can't find key %q on left DataFrame", key))
 		}
 		iKeysA = append(iKeysA, i)
-		j := b.colIndex(key)
+		j := b.colIndex(key.Right)
 		if j < 0 {
 			errorArr = append(errorArr, fmt.Sprintf("can't find key %q on right DataFrame", key))
 		}
@@ -1523,7 +1536,7 @@ func (df DataFrame) InnerJoin(b DataFrame, keys ...string) DataFrame {
 }
 
 // LeftJoin returns a DataFrame containing the left join of two DataFrames.
-func (df DataFrame) LeftJoin(b DataFrame, keys ...string) DataFrame {
+func (df DataFrame) LeftJoin(b DataFrame, keys ...MergeBy) DataFrame {
 	if len(keys) == 0 {
 		return DataFrame{Err: fmt.Errorf("join keys not specified")}
 	}
@@ -1532,12 +1545,12 @@ func (df DataFrame) LeftJoin(b DataFrame, keys ...string) DataFrame {
 	var iKeysB []int
 	var errorArr []string
 	for _, key := range keys {
-		i := df.colIndex(key)
+		i := df.colIndex(key.Left)
 		if i < 0 {
 			errorArr = append(errorArr, fmt.Sprintf("can't find key %q on left DataFrame", key))
 		}
 		iKeysA = append(iKeysA, i)
-		j := b.colIndex(key)
+		j := b.colIndex(key.Right)
 		if j < 0 {
 			errorArr = append(errorArr, fmt.Sprintf("can't find key %q on right DataFrame", key))
 		}
@@ -1621,7 +1634,7 @@ func (df DataFrame) LeftJoin(b DataFrame, keys ...string) DataFrame {
 }
 
 // RightJoin returns a DataFrame containing the right join of two DataFrames.
-func (df DataFrame) RightJoin(b DataFrame, keys ...string) DataFrame {
+func (df DataFrame) RightJoin(b DataFrame, keys ...MergeBy) DataFrame {
 	if len(keys) == 0 {
 		return DataFrame{Err: fmt.Errorf("join keys not specified")}
 	}
@@ -1630,12 +1643,12 @@ func (df DataFrame) RightJoin(b DataFrame, keys ...string) DataFrame {
 	var iKeysB []int
 	var errorArr []string
 	for _, key := range keys {
-		i := df.colIndex(key)
+		i := df.colIndex(key.Left)
 		if i < 0 {
 			errorArr = append(errorArr, fmt.Sprintf("can't find key %q on left DataFrame", key))
 		}
 		iKeysA = append(iKeysA, i)
-		j := b.colIndex(key)
+		j := b.colIndex(key.Right)
 		if j < 0 {
 			errorArr = append(errorArr, fmt.Sprintf("can't find key %q on right DataFrame", key))
 		}
@@ -1729,7 +1742,7 @@ func (df DataFrame) RightJoin(b DataFrame, keys ...string) DataFrame {
 }
 
 // OuterJoin returns a DataFrame containing the outer join of two DataFrames.
-func (df DataFrame) OuterJoin(b DataFrame, keys ...string) DataFrame {
+func (df DataFrame) OuterJoin(b DataFrame, keys ...MergeBy) DataFrame {
 	if len(keys) == 0 {
 		return DataFrame{Err: fmt.Errorf("join keys not specified")}
 	}
@@ -1738,12 +1751,12 @@ func (df DataFrame) OuterJoin(b DataFrame, keys ...string) DataFrame {
 	var iKeysB []int
 	var errorArr []string
 	for _, key := range keys {
-		i := df.colIndex(key)
+		i := df.colIndex(key.Left)
 		if i < 0 {
 			errorArr = append(errorArr, fmt.Sprintf("can't find key %q on left DataFrame", key))
 		}
 		iKeysA = append(iKeysA, i)
-		j := b.colIndex(key)
+		j := b.colIndex(key.Right)
 		if j < 0 {
 			errorArr = append(errorArr, fmt.Sprintf("can't find key %q on right DataFrame", key))
 		}