-
Notifications
You must be signed in to change notification settings - Fork 17
/
numpy_pandas_ex.py
36 lines (29 loc) · 1.1 KB
/
numpy_pandas_ex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# example from https://sparkbyexamples.com/pandas/pandas-dataframe-tutorial-beginners-guide/
import numpy as np
import pandas as pd
# Create pandas DataFrame from List
import pandas as pd
technologies = [ ["Spark",20000, "30days"],
["pandas",20000, "40days"],
]
df=pd.DataFrame(technologies)
print(df)
# Add Column & Row Labels to the DataFrame
column_names=["Courses","Fee","Duration"]
row_label=["a","b"]
df=pd.DataFrame(technologies,columns=column_names,index=row_label)
print(df)
# set custom types to DataFrame
types={'Courses': str,'Fee':float,'Duration':str}
df=df.astype(types)
# Create DataFrame with None/Null to work with examples
technologies = ({
'Courses':["Spark","PySpark","Hadoop","Python","Pandas",None,"Spark","Python"],
'Fee' :[22000,25000,23000,24000,np.nan,25000,25000,22000],
'Duration':['30day','50days','55days','40days','60days','35day','','50days'],
'Discount':[1000,2300,1000,1200,2500,1300,1400,1600]
})
row_labels=['r0','r1','r2','r3','r4','r5','r6','r7']
df = pd.DataFrame(technologies, index=row_labels)
print(df)
df.describe()