-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsorghum_analysis.py
38 lines (27 loc) · 1.15 KB
/
sorghum_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import pandas as pd
import glob
def combine_excels():
"""
Grabs all files in ./trait_results and combines them into one excel sheet.
"""
l = [pd.read_excel(filename) for filename in glob.glob("./trait_results/*.xlsx")]
df = pd.concat(l, axis=0)
df = df.drop('Unnamed: 0', axis=1)
file_name = "combined_trait_results.xlsx"
df.to_excel(file_name)
print(f"Traits loaded into {file_name} successfully")
return df
def parse_genotype_and_treatments(df):
"""
Extracts plot number from file names, and maps them with the sorghum treatments.
"""
df['Plot_Number'] = df['file_name'].str.extract(r"(\d+)_").astype(int)
sorghum_treatments = pd.read_excel('./genotype_plot_number/sorghum2024_sample_info_wide.xlsx')
sorghum_treatments = pd.melt(frame=sorghum_treatments, id_vars=['Treat', 'Genotype'], value_vars=['Rep1', 'Rep2', 'Rep3'], var_name='Replicate', value_name='Plot_Number')
df = df.merge(sorghum_treatments, on="Plot_Number", how="left")
file_name = "traits_and_sorghums.xlsx"
df.to_excel(file_name)
def main():
df = combine_excels()
parse_genotype_and_treatments(df)
main()