-
Notifications
You must be signed in to change notification settings - Fork 0
/
pandasbasics.py
246 lines (206 loc) · 6.88 KB
/
pandasbasics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
import streamlit as st
from pathlib import Path
import base64
# Initial page config
# st.set_page_config(
# page_title='Datascience cheat sheet',
# layout="wide",
# initial_sidebar_state="expanded",
# )
def main():
cs_sidebar()
cs_body()
return None
# Thanks to streamlitopedia for the following code snippet
def img_to_bytes(img_path):
img_bytes = Path(img_path).read_bytes()
encoded = base64.b64encode(img_bytes).decode()
return encoded
# sidebar
def cs_sidebar():
#
#
# st.sidebar.markdown('Import convention')
# st.sidebar.code('>>> import streamlit as st')
#
# st.sidebar.markdown('__Add widgets to sidebar__')
# st.sidebar.code('''
# st.sidebar.<widget>
# >>> a = st.sidebar.radio(\'R:\',[1,2])
# ''')
#
# st.sidebar.markdown('__Command line__')
# st.sidebar.code('''
# $ streamlit --help
# $ streamlit run your_script.py
# $ streamlit hello
# $ streamlit config show
# $ streamlit cache clear
# $ streamlit docs
# $ streamlit --version
# ''')
#
# st.sidebar.markdown('__Pre-release features__')
# st.sidebar.markdown('[Beta and experimental features](https://docs.streamlit.io/en/0.70.0/api.html#beta-and-experimental-features)')
# st.sidebar.code('''
# pip uninstall streamlit
# pip install streamlit-nightly --upgrade
# ''')
# st.sidebar.markdown('''[<img src='data:image/png;base64,{}' class='img-fluid' width=32 height=32>](https://github.com/daniellewisDL/streamlit-cheat-sheet) <small>st.cheat_sheet v0.71.0 | Nov 2020</small>'''.format(img_to_bytes("brain.png")), unsafe_allow_html=True)
return None
##########################
# Main body of cheat sheet
##########################
#
def cs_body():
# Magic commands
st.markdown("""
<style>
body {
color: #00628B;
background-color: #F5B7B1;
}
</style>
""", unsafe_allow_html=True)
# st.title("Data Science Cheat App")
# type = ["Python Basics", "Pandas Basics", "Numpy Basics", "ML Algorithms"]
# st.sidebar.markdown('Choose the cheat sheet to learn')
# activity = st.sidebar.radio("Choose one from down", type)
# if "Python Basics" in activity:
st.subheader("Pandas Basics")
col1, col2, col3 = st.beta_columns(3)
col1.subheader('Pandas Data Structures')
col1.write('Series')
col1.code('''
A one-dimensional labeled array capable of holding any data type
>>> s = pd.Series([3, -5, 7, 4], index=['a', 'b', 'c', 'd'])
''')
# Display text
col1.write('DataFrame')
col1.code('''
>> data = {'Country': ['Belgium', 'India', 'Brazil'],
'Capital': ['Brussels', 'New Delhi', 'Brasília'],
'Population': [11190846, 1303171035, 207847528]}
>>> df = pd.DataFrame(data,
columns=['Country', 'Capital', 'Population'])
''')
# Display data
col1.subheader('I/O')
col1.write('Read and Write to CSV')
col1.code('''
>>> pd.read_csv('file.csv', header=None, nrows=5)
>>> df.to_csv('myDataFrame.csv')
''')
# Display media
col1.write('Read and Write to Excel')
col1.code('''
>>> pd.read_excel('file.xlsx')
>>> pd.to_excel('dir/myDataFrame.xlsx', sheet_name='Sheet1')
///Read multiple sheets from the same file///
>>> xlsx = pd.ExcelFile('file.xls')
>>> df = pd.read_excel(xlsx, 'Sheet1')
''')
# Display interactive widgets
col1.subheader('Selection')
col1.write('Getting')
col1.code('''
>>> s['b'] -5 ------ Get one element
>>> df[1:] ------ Get subset of a Dataframe
Country Capital Population
1 India New Delhi 1303171035
2 Brazil Brasília 207847528
''')
col2.write('By Position')
col2.code('''
>>> df.iloc([0],[0]) 'Belgium' ------- Select single value by row & column
>>> df.iat([0],[0]) 'Belgium'
''')
# Libraries
col2.write('By Label')
col2.code('''
>>> df.loc([0], ['Country']) 'Belgium' ------------- Select single value by row & column labels
>>> df.at([0], ['Country']) 'Belgium'
''')
col2.write('By Label/Position')
col2.code('''
>>> df.ix[2] Country Brazil ------- Select single row of subset of rows
Capital Brasília
Population 207847
>>> df.ix[:,'Capital'] ------- Select a single column of subset of columns
0 Brussels
1 New Delhi
2 Brasília
>>> df.ix[1,'Capital'] ------- Select rows and columns
'New Delhi'
''')
# Placeholders, help, and options
col2.write('Boolean Indexing')
col2.code('''
>>> s[~(s > 1)] ------- Series s where value is not >1
s where value is <-1 or >2
>>> s[(s < -1) | (s > 2)]
>>> df[df['Population']>1200000000] ------- Set index a of Series s to 6
''')
# Mutate data
col2.write('Dropping')
col2.code('''
>>> s.drop(['a', 'c']) ------- Drop values from rows (axis=0)
>>> df.drop('Country', axis=1) ------- Drop values from columns(axis=1)
''')
col2.write('Sort & Rank')
col2.code('''
>>> df.sort_index() ------- Sort by labels along an axis
>>> df.sort_values(by='Country') ------- Sort by the values along an axis
>>> df.rank() ------- Assign ranks to entries
''')
col3.subheader('Retrieving Series/DataFrame Information')
col3.write('Basic Information')
col2.code('''
>>> df.shape ------- (rows,columns)
>>> df.index ------- Describe index
>>> df.columns ------- Describe DataFrame columns
>>> df.info() ------- Info on DataFrame
>>> df.count() ------- Number of non-NA values
''')
col3.write('Summary')
col3.code('''
>>> df.sum() ------- Sum of values
>>> df.cumsum() ------- Cummulative sum of values
>>> df.min()/df.max() ------- Minimum/maximum values
>>> df.idxmin()/df.idxmax() ------- Minimum/Maximum index value
>>> df.describe() ------- Summary statistics
>>> df.mean() ------- Mean of values
>>> df.median() ------- Median of values
''')
# Control flow
col3.subheader('Applying Functions')
col3.code('''
>>> f = lambda x: x*2
>>> df.apply(f) ------- Apply function
>>> df.applymap(f) ------- Apply function element-wise
''')
# Lay out your app
col3.subheader('Data Alignment')
col3.write('Internal Data Alignment')
col3.code('''
>>> s3 = pd.Series([7, -2, 3], index=['a', 'c', 'd'])
>>> s + s3
a 10.0
b NaN
c 5.0
d 7.0
''')
col3.write('Arithmetic Operations with Fill Methods')
col3.code('''
>>> s.add(s3, fill_value=0) a 10.0
b -5.0
c 5.0
d 7.0
>>> s.sub(s3, fill_value=2)
>>> s.div(s3, fill_value=4)
>>> s.mul(s3, fill_value=3)
''')
return None
# Run main()
if __name__ == '__main__':
main()