bigcode-project · huhanGitHub · Apr 25, 2024 · Apr 25, 2024 · Apr 25, 2024
diff --git a/data/raw/f_428_ming.py b/data/raw/f_428_ming.py
@@ -26,10 +26,12 @@ def f_428():
 
     return encoded_str.decode()
 
+
 import string
 import unittest
 import binascii
 
+
 def run_tests():
     suite = unittest.TestSuite()
     suite.addTest(unittest.makeSuite(TestCases))

diff --git a/data/raw/f_431_ming.py b/data/raw/f_431_ming.py
@@ -2,6 +2,7 @@
 import os
 import base64
 
+
 def f_431(password: str, salt_length: int = 8) -> str:
     """
     Encrypt a password using Salt and SHA-256, then encode the result in base64.

diff --git a/data/raw/f_434_ming.py b/data/raw/f_434_ming.py
@@ -1,6 +1,7 @@
 from collections import Counter
 import pandas as pd
 
+
 def f_434(list_of_menuitems):
     """
     Given a nested list of menu items, this function flattens the list and returns a Pandas DataFrame

diff --git a/data/raw/f_436_ming.py b/data/raw/f_436_ming.py
@@ -1,6 +1,5 @@
 import collections
 import itertools
-
 import matplotlib.pyplot as plt
 
 # Constants
@@ -22,7 +21,7 @@ def f_436(a, b):
     Requirements:
     - collections
     - itertools
-    - matplotlib
+    - matplotlib.pyplot
 
     Example:
     >>> ax = f_436(['apple', 'banana', 'cherry'], ['date', 'elderberry', 'apple', 'banana', 'cherry'])

diff --git a/data/raw/f_438_ming.py b/data/raw/f_438_ming.py
@@ -15,8 +15,14 @@ def f_438(a, b):
     Requirements:
     - numpy
     - pandas
-    - scipy.stats
-    - matplotlib
+    - scipy
+    - matplotlib.pyplot
+
+    Returns:
+    - tuple: Contains two elements:
+        - float: The Pearson correlation coefficient.
+        - matplotlib.axes.Axes: The Axes object of the plotted scatter plot with a regression line.
+
 
     Example:
     >>> correlation, ax = f_438([1, 2, 3, 4, 5], [2, 4, 6, 8, 10])
@@ -33,6 +39,7 @@ def f_438(a, b):
     plt.show()
     return correlation, plt.gca()
 
+
 import unittest
 import math
 import matplotlib

diff --git a/data/raw/f_439_ming.py b/data/raw/f_439_ming.py
@@ -21,7 +21,7 @@ def f_439(a, b, columns=['A', 'B']):
         - numpy
         - pandas
         - sklearn.preprocessing
-        - matplotlib
+        - matplotlib.pyplot
 
     Example:
         >>> df, ax = f_439([1, 2, 3, 4, 5], [2, 4, 6, 8, 10])

diff --git a/data/raw/f_440_ming.py b/data/raw/f_440_ming.py
@@ -19,8 +19,8 @@ def f_440(a, b):
 
     Requirements:
     - pandas
-    - scipy.spatial.distance
-    - matplotlib
+    - scipy.spatial
+    - matplotlib.pyplot
 
     Example:
     >>> euclidean_distance, df, ax = f_440([1, 2, 3], [2, 3, 4])

diff --git a/data/raw/f_445_ming.py b/data/raw/f_445_ming.py
@@ -1,4 +1,3 @@
-import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 
@@ -18,7 +17,6 @@ def f_445(array_length=100):
     Requirements:
     - numpy
     - pandas
-    - matplotlib.pyplot
 
     Example:
     >>> df, ax = f_445(50)
@@ -39,6 +37,7 @@ def f_445(array_length=100):
 
 import unittest
 import matplotlib
+import matplotlib.pyplot as plt
 
 matplotlib.use('Agg')  # Set to 'Agg' to avoid GUI-related issues
 

diff --git a/data/raw/f_447_ming.py b/data/raw/f_447_ming.py
@@ -6,6 +6,7 @@
 ELEMENTS = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
 N_GROUPS = 5
 
+
 def f_447(l):
     """
     Given a list `l`, this function shuffles the list, constructs a dataframe using the shuffled list,

diff --git a/data/raw/f_453_ming.py b/data/raw/f_453_ming.py
@@ -18,9 +18,9 @@ def f_453():
     highlighted.
 
     Requirements:
-        - numpy for array operations.
-        - sklearn.cluster for applying KMeans clustering.
-        - matplotlib.pyplot for plotting the clustered points and centroids.
+        - numpy
+        - sklearn.cluster
+        - matplotlib.pyplot
 
     Returns:
         A tuple containing the numpy array of data points and the fitted KMeans model.

diff --git a/data/raw/f_454_ming.py b/data/raw/f_454_ming.py
@@ -2,7 +2,7 @@
 from random import randint
 import matplotlib.pyplot as plt
 import pandas as pd
-import os
+
 
 TEMP_CATEGORIES = ['Cold', 'Normal', 'Hot']
 FILE_PATH = 'custom_data.csv'
@@ -59,8 +59,10 @@ def f_454(hours, file_path=FILE_PATH):
 
     return file_path, ax
 
+
 import unittest
 import matplotlib
+import os
 # Check and set the backend
 print("Current backend:", matplotlib.get_backend())  # Optional: Check the current backend
 matplotlib.use('Agg')  # Set to 'Agg' to avoid GUI-related issues

diff --git a/data/raw/f_455_ming.py b/data/raw/f_455_ming.py
@@ -1,16 +1,13 @@
 import csv
 import os
-import shutil
 from datetime import datetime
 from random import randint
 
 # Constants
-current_directory_path = os.path.join(os.getcwd(), os.path.splitext(os.path.basename(__file__))[0])
-FILE_PATH = os.path.join(current_directory_path, 'sensor_data.csv')
 SENSORS = ['Temperature', 'Humidity', 'Pressure']
 
 
-def f_455(hours):
+def f_455(hours, current_directory_path = os.path.join(os.getcwd(), os.path.splitext(os.path.basename(__file__))[0])):
     """
     Create sensor data for the specified number of hours and save it in a CSV file.
 
@@ -35,7 +32,7 @@ def f_455(hours):
     >>> 'sensor_data.csv' in file_path  # Ensure the filename is correct
     True
     """
-
+    FILE_PATH = os.path.join(current_directory_path, 'sensor_data.csv')
     directory = os.path.dirname(FILE_PATH)
     if not os.path.exists(directory):
         os.makedirs(directory)
@@ -54,6 +51,10 @@ def f_455(hours):
 
 import unittest
 import os
+import shutil
+current_directory_path = os.path.join(os.getcwd(), os.path.splitext(os.path.basename(__file__))[0])
+FILE_PATH = os.path.join(current_directory_path, 'sensor_data.csv')
+
 
 class TestCases(unittest.TestCase):
 

diff --git a/data/raw/f_456_ming.py b/data/raw/f_456_ming.py
@@ -1,21 +1,15 @@
 import csv
 import os
-import shutil
 from datetime import datetime
 from random import randint
-
-import matplotlib
 import matplotlib.pyplot as plt
 import pandas as pd
 
 # Constants
-
-current_directory_path = os.path.join(os.getcwd(), os.path.splitext(os.path.basename(__file__))[0])
-FILE_PATH = os.path.join(current_directory_path, 'traffic_data.csv')
 VEHICLE_TYPES = ['Car', 'Bus', 'Truck', 'Bike']
 
 
-def f_456(hours):
+def f_456(hours, current_directory_path = os.path.join(os.getcwd(), os.path.splitext(os.path.basename(__file__))[0])):
     """
     Generates traffic data for different vehicle types over a specified number of hours,
     saves the data to a CSV file, and plots the data in a line chart.
@@ -31,6 +25,8 @@ def f_456(hours):
     - os
     - csv
     - matplotlib.pyplot
+    - random
+    - datetime
 
     Example:
     >>> file_path, ax = f_456(2)  # Generate data for 2 hours
@@ -44,7 +40,7 @@ def f_456(hours):
 
     if not os.path.exists(current_directory_path):
         os.makedirs(current_directory_path)
-
+    FILE_PATH = os.path.join(current_directory_path, 'traffic_data.csv')
     data = [['Time'] + VEHICLE_TYPES]
     for i in range(hours):
         row = [datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')] + [randint(0, 50) for _ in VEHICLE_TYPES]
@@ -70,11 +66,13 @@ def f_456(hours):
 
 import unittest
 from unittest.mock import patch
-
+import shutil
+import matplotlib
 # Check and set the backend
 print("Current backend:", matplotlib.get_backend())  # Optional: Check the current backend
 matplotlib.use('Agg')  # Set to 'Agg' to avoid GUI-related issues
-
+current_directory_path = os.path.join(os.getcwd(), os.path.splitext(os.path.basename(__file__))[0])
+FILE_PATH = os.path.join(current_directory_path, 'traffic_data.csv')
 
 class TestCases(unittest.TestCase):
 

diff --git a/data/raw/f_457_ming.py b/data/raw/f_457_ming.py
@@ -5,14 +5,10 @@
 from random import randint
 
 # Constants
-current_directory_path = os.getcwd()
-# print(current_directory_path)
-FILE_PATH = os.path.join(current_directory_path, 'weather_data.csv')
 WEATHER_CONDITIONS = ['Sunny', 'Cloudy', 'Rainy', 'Snowy', 'Stormy']
-BACKUP_PATH = os.path.join(current_directory_path, 'backup/')
 
 
-def f_457(hours):
+def f_457(hours, current_directory_path = os.getcwd()):
     """
     Generate weather data for the specified number of hours, save it in a CSV file and back up the file to a backup directory.
 
@@ -35,6 +31,8 @@ def f_457(hours):
     >>> 'weather_data.csv' in f_457(10)
     True
     """
+    FILE_PATH = os.path.join(current_directory_path, 'weather_data.csv')
+    BACKUP_PATH = os.path.join(current_directory_path, 'backup/')
     data = [['Time', 'Condition']]
     for i in range(hours):
         row = [datetime.now().strftime('%H:%M:%S.%f'), WEATHER_CONDITIONS[randint(0, len(WEATHER_CONDITIONS)-1)]]
@@ -53,6 +51,10 @@ def f_457(hours):
 
 import unittest
 from unittest.mock import patch, mock_open
+current_directory_path = os.getcwd()
+# print(current_directory_path)
+FILE_PATH = os.path.join(current_directory_path, 'weather_data.csv')
+BACKUP_PATH = os.path.join(current_directory_path, 'backup/')
 
 
 class TestCases(unittest.TestCase):

diff --git a/data/raw/f_458_ming.py b/data/raw/f_458_ming.py
@@ -1,10 +1,6 @@
 import time
-import unittest
 from datetime import datetime
 from random import randint
-from unittest.mock import patch
-
-import matplotlib
 import matplotlib.pyplot as plt
 
 
@@ -25,6 +21,10 @@ def f_458(duration):
     - time
     - random
     - matplotlib.pyplot
+
+    Example:
+    >>> type(f_458(1))
+    <class 'tuple'>
     """
     # Constants
     VALUES_RANGE = (0, 100)
@@ -52,6 +52,9 @@ def f_458(duration):
 
 ### Unit Tests
 # Check and set the backend
+import unittest
+from unittest.mock import patch
+import matplotlib
 print("Current backend:", matplotlib.get_backend())  # Optional: Check the current backend
 matplotlib.use('Agg')  # Set to 'Agg' to avoid GUI-related issues
 

diff --git a/data/raw/f_459_ming.py b/data/raw/f_459_ming.py
@@ -1,8 +1,5 @@
-import unittest
-from random import choice, randint
-
 import pandas as pd
-
+import time
 # Constants
 LETTERS = list('abcdefghijklmnopqrstuvwxyz')
 
@@ -20,22 +17,30 @@ def f_459(df, letter):
 
     Requirements:
     - pandas
+    - time
 
     Example:
+    >>> import pandas as pd
     >>> df = pd.DataFrame({'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Fiona']})
     >>> filtered_names = f_459(df, 'a')
     >>> filtered_names.index[0].startswith('A')
     True
     >>> len(filtered_names)
     1
     """
+    start_time = time.time()
     regex = f'^{letter}'
     filtered_df = df[df['Name'].str.contains(regex, case=False, regex=True)]
     # Note: The plotting line is removed to simplify testing and focus on data processing.
+    end_time = time.time()  # End timing
+    cost = f"Operation completed in {end_time - start_time} seconds."
     return filtered_df['Name'].value_counts()
 
 
 ### Unit Tests
+from random import choice, randint
+import unittest
+
 
 class TestCases(unittest.TestCase):
     def setUp(self):

diff --git a/data/raw/f_460_ming.py b/data/raw/f_460_ming.py
@@ -1,4 +1,5 @@
 import pandas as pd
+import time
 
 
 def f_460(df, letter):
@@ -16,18 +17,21 @@ def f_460(df, letter):
 
     Requirements:
     - pandas
-    - re
+    - time
 
     Example:
     >>> df = {'Word': ['apple', 'banana', 'cherry', 'date', 'fig', 'grape', 'kiwi']}
     >>> f_460(df, 'a')
     {5: 1}
     """
+    start_time = time.time()
     df = pd.DataFrame(df)
     regex = '^' + letter
     filtered_df = df[df['Word'].str.contains(regex, regex=True)]
     word_lengths = filtered_df['Word'].str.len()
     count_dict = word_lengths.value_counts().to_dict()
+    end_time = time.time()  # End timing
+    cost = f"Operation completed in {end_time - start_time} seconds."
 
     return count_dict