diff --git a/assets/test-notebook.json b/assets/test-notebook.json index e5fa45e..49717a1 100644 --- a/assets/test-notebook.json +++ b/assets/test-notebook.json @@ -1 +1 @@ -{"cell-2":{"out":"\n
\n

[2]

\n
\n\n
\n
\n \n
\n \n \n
\n\n
\n \n \n
\n\n \n
\n\n \n
\n
\n
\n

A Quick Visualization

\n
\n
\n\n ","md":"## A Quick Visualization"},"cell-1":{"in":"var df2\nload_csv(\"https://raw.githubusercontent.com/risenW/medium_tutorial_notebooks/master/train.csv\").then((df)=>{\n df2 = df\n})","out":"ReferenceError: load_data is not defined"},"cell-4":{"out":"\n
\n

[4]

\n
\n\n
\n
\n \n
\n \n \n
\n\n
\n \n \n
\n\n \n
\n\n \n
\n
\n
\n

view the dataframe

\n
\n
\n\n ","md":"view the dataframe "},"cell-3":{"in":"table(df2.head())","out":"\n
\n \n \n \n \n \n \n \n \n \n
Product_IdentifierSupermarket_IdentifierProduct_Supermarket_IdentifierProduct_WeightProduct_Fat_ContentProduct_Shelf_VisibilityProduct_TypeProduct_PriceSupermarket_Opening_YearSupermarket _SizeSupermarket_Location_TypeSupermarket_TypeProduct_Supermarket_Sales
0DRA12CHUKWUDI010DRA12_CHUKWUDI01011.6Low Fat0.068535039Soft Drinks357.542005NaNCluster 3Grocery Store709.08
1DRA12CHUKWUDI013DRA12_CHUKWUDI01311.6Low Fat0.040911824Soft Drinks355.791994HighCluster 3Supermarket Type16381.69
2DRA12CHUKWUDI017DRA12_CHUKWUDI01711.6Low Fat0.041177505Soft Drinks350.792014NaNCluster 2Supermarket Type16381.69
3DRA12CHUKWUDI018DRA12_CHUKWUDI01811.6Low Fat0.041112694Soft Drinks355.042016MediumCluster 3Supermarket Type22127.23
4DRA12CHUKWUDI035DRA12_CHUKWUDI03511.6Ultra Low fat0Soft Drinks354.792011SmallCluster 2Supermarket Type12481.77
\n
\n "},"cell-6":{"out":"\n
\n

[6]

\n
\n\n
\n
\n \n
\n \n \n
\n\n
\n \n \n
\n\n \n
\n\n \n
\n
\n
\n

Understanding the numerical columns in the dataset.

\n
\n
\n\n ","md":"Understanding the numerical columns in the dataset."},"cell-5":{"in":"table(df2.describe())","out":"\n
\n \n \n \n \n \n \n \n \n \n
Product_Shelf_VisibilityProduct_PriceSupermarket_Opening_YearProduct_Supermarket_Sales
count4990499049904990
mean0.066916391.8037722004.7834476103.52002
std0.053058119.3782598.2831514447.333835
min078.730003199283.230003
median0.053564393.8620065374.675
max0.328391667.219971201632717.410156
variance0.00281514251.16876368.61059419778778.23941
\n
\n "},"cell-7":{"in":"//Display the number of rows and columns\ndf2.shape","out":"[4990,13,]"},"cell-8":{"in":"df2.dtypes","out":"[string,string,string,string,string,float32,string,float32,int32,string,....3 more]"},"cell-10":{"out":"\n
\n

[10]

\n
\n\n
\n
\n \n
\n \n \n
\n\n
\n \n \n
\n\n \n
\n\n \n
\n
\n
\n

in order to see the columns and their types together. Let create a Series and then set the index as the column names and the values to be the tyoe

\n
\n
\n\n ","md":"in order to see the columns and their types together. Let create a Series and then set the index as the column names and the values to be the tyoe"},"cell-9":{"in":"let dtypes = new dfd.Series(df2.dtypes,{index: df2.columns})\ntable(dtypes)","out":"\n
\n \n \n \n \n \n \n \n \n \n
0
Product_Identifierstring
Supermarket_Identifierstring
Product_Supermarket_Identifierstring
Product_Weightstring
Product_Fat_Contentstring
Product_Shelf_Visibilityfloat32
Product_Typestring
Product_Pricefloat32
Supermarket_Opening_Yearint32
Supermarket _Sizestring
Supermarket_Location_Typestring
Supermarket_Typestring
Product_Supermarket_Salesfloat32
\n
\n "},"cell-11":{"in":"//Remove Id columns\nlet cols_2_remove = ['Product_Identifier', 'Supermarket_Identifier', 'Product_Supermarket_Identifier']\nvar new_df = df2.drop({columns:cols_2_remove,axis:1}) // drop the columns","out":""},"cell-12":{"in":"table(new_df.head())","out":"\n
\n \n \n \n \n \n \n \n \n \n
Product_WeightProduct_Fat_ContentProduct_Shelf_VisibilityProduct_TypeProduct_PriceSupermarket_Opening_YearSupermarket _SizeSupermarket_Location_TypeSupermarket_TypeProduct_Supermarket_Sales
011.6Low Fat0.068535039Soft Drinks357.542005NaNCluster 3Grocery Store709.08
111.6Low Fat0.040911824Soft Drinks355.791994HighCluster 3Supermarket Type16381.69
211.6Low Fat0.041177505Soft Drinks350.792014NaNCluster 2Supermarket Type16381.69
311.6Low Fat0.041112694Soft Drinks355.042016MediumCluster 3Supermarket Type22127.23
411.6Ultra Low fat0Soft Drinks354.792011SmallCluster 2Supermarket Type12481.77
\n
\n "},"cell-13":{"in":"//print the shape\nnew_df.shape","out":"[4990,10,]"},"cell-14":{"in":"//list out the categorical and numerical variable\nvar cat_cols = ['Product_Fat_Content','Product_Type',\n 'Supermarket _Size', 'Supermarket_Location_Type',\n 'Supermarket_Type' ]\n\nvar num_cols = ['Product_Weight', 'Product_Shelf_Visibility',\n 'Product_Price', 'Supermarket_Opening_Year', 'Product_Supermarket_Sales']\n","out":""},"cell-15":{"in":"for(let i in cat_cols){\n\n let col = cat_cols[i]\n\n let counts = new_df[col].value_counts()\n\n var layout = {\n title: `Bar Plot for ${col}`\n }\n\n viz(`cat_viz${i}`, x =>{\n counts.plot(x).bar({layout:layout})\n })\n \n}","out":"
−0.500.511.522.5050010001500200025003000
Bar Plot for Product_Fat_Content
−0.500.511.522.533.5050010001500
Bar Plot for Supermarket _Size
−0.500.511.522.50500100015002000
Bar Plot for Supermarket_Location_Type
00.511.522.533.5−0.5150020002500300010005000
Bar Plot for Supermarket_Type
"},"cell-16":{"in":"for(let i in num_cols){\n\n let col = num_cols[i]\n\n let layout ={\n xaxis:{\n title: col\n },\n yaxis: {\n title:'Product_Supermarket_Sales'\n }\n\n }\n \n viz(`num_col${i}`, x=>{\n \t\tnew_df.plot(x).scatter({x:col,y:'Product_Supermarket_Sales'})\n })\n}","out":"[Product_Fat_Content,Product_Type,Supermarket _Size,Supermarket_Location_Type,Supermarket_Type,]
4681012141618202205k10k15k20k25k30k35k
Product_WeightProduct_Supermarket_Sales
00.050.10.150.20.250.305k10k15k20k25k30k35k
Product_Shelf_VisibilityProduct_Supermarket_Sales
10020030040050060070005k10k15k20k25k30k35k
Product_PriceProduct_Supermarket_Sales
1995200020052010201505k10k15k20k25k30k35k
Supermarket_Opening_YearProduct_Supermarket_Sales
05k10k15k20k25k30k05k10k15k20k25k30k35k
Product_Supermarket_SalesProduct_Supermarket_Sales
"},"cell-17":{"in":"for(let i in cat_cols){\n\n let col = cat_cols[i]\n\n let layout ={\n xaxis:{\n title: col\n },\n yaxis: {\n title:'Product_Supermarket_Sales'\n }\n\n }\n viz(`cat_cols${i}`, x=>{\n new_df.plot(x).box({x:col, y:'Product_Supermarket_Sales'})\n })\n}","out":"\n\n
Low FatUltra Low fatNormal Fat05k10k15k20k25k30k
Product_Fat_ContentProduct_Supermarket_Sales
Soft DrinksDairyHard DrinksCannedFrozen FoodsFruits and VegetablesSnack FoodsBaking GoodsStarchy FoodsMeatSeafoodBreakfastBreadsHealth and HygieneHouseholdOthers05k10k15k20k25k30k
Product_TypeProduct_Supermarket_Sales
NaNHighMediumSmall05k10k15k20k25k30k
Supermarket _SizeProduct_Supermarket_Sales
Cluster 3Cluster 2Cluster 105k10k15k20k25k30k
Supermarket_Location_TypeProduct_Supermarket_Sales
Grocery StoreSupermarket Type1Supermarket Type2Supermarket Type305k10k15k20k25k30k
Supermarket_TypeProduct_Supermarket_Sales
"}} \ No newline at end of file +{"cell-2":{"out":"\n
\n

[2]

\n
\n\n
\n
\n \n
\n \n \n
\n\n
\n \n \n
\n\n \n
\n\n \n
\n
\n
\n

A Quick Visualization

\n
\n
\n\n ","md":"## A Quick Visualization"},"cell-1":{"in":"var df2\nload_csv(\"https://raw.githubusercontent.com/risenW/medium_tutorial_notebooks/master/train.csv\").then((df)=>{\n df2 = df\n})","out":""},"cell-4":{"out":"\n
\n

[4]

\n
\n\n
\n
\n \n
\n \n \n
\n\n
\n \n \n
\n\n \n
\n\n \n
\n
\n
\n

view the dataframe

\n
\n
\n\n ","md":"view the dataframe "},"cell-3":{"in":"table(df2.head(20))","out":"\n
\n \n \n \n \n \n \n \n \n \n
Product_IdentifierSupermarket_IdentifierProduct_Supermarket_IdentifierProduct_WeightProduct_Fat_ContentProduct_Shelf_VisibilityProduct_TypeProduct_PriceSupermarket_Opening_YearSupermarket _SizeSupermarket_Location_TypeSupermarket_TypeProduct_Supermarket_Sales
0DRA12CHUKWUDI010DRA12_CHUKWUDI01011.6Low Fat0.068535039Soft Drinks357.542005NaNCluster 3Grocery Store709.08
1DRA12CHUKWUDI013DRA12_CHUKWUDI01311.6Low Fat0.040911824Soft Drinks355.791994HighCluster 3Supermarket Type16381.69
2DRA12CHUKWUDI017DRA12_CHUKWUDI01711.6Low Fat0.041177505Soft Drinks350.792014NaNCluster 2Supermarket Type16381.69
3DRA12CHUKWUDI018DRA12_CHUKWUDI01811.6Low Fat0.041112694Soft Drinks355.042016MediumCluster 3Supermarket Type22127.23
4DRA12CHUKWUDI035DRA12_CHUKWUDI03511.6Ultra Low fat0Soft Drinks354.792011SmallCluster 2Supermarket Type12481.77
5DRA12CHUKWUDI045DRA12_CHUKWUDI04511.6Low Fat0Soft Drinks354.042009NaNCluster 2Supermarket Type19572.54
6DRA24CHUKWUDI010DRA24_CHUKWUDI01019.35Normal Fat0.066831682Soft Drinks409.722005NaNCluster 3Grocery Store818.93
7DRA24CHUKWUDI013DRA24_CHUKWUDI01319.35Normal Fat0.039895009Soft Drinks406.221994HighCluster 3Supermarket Type111055.61
8DRA24CHUKWUDI017DRA24_CHUKWUDI01719.35Normal Fat0.040154087Soft Drinks411.722014NaNCluster 2Supermarket Type12866.27
9DRA24CHUKWUDI019DRA24_CHUKWUDI019NaNNormal Fat0.069909188Soft Drinks408.221992SmallCluster 1Grocery Store1228.4
10DRA24CHUKWUDI027DRA24_CHUKWUDI027NaNNormal Fat0.039734882Soft Drinks414.471992MediumCluster 3Supermarket Type312284.01
11DRA24CHUKWUDI035DRA24_CHUKWUDI03519.35Normal Fat0.039920687Soft Drinks408.472011SmallCluster 2Supermarket Type18598.81
12DRA24CHUKWUDI049DRA24_CHUKWUDI04919.35Normal Fat0.039990314Soft Drinks412.722006MediumCluster 1Supermarket Type12456.8
13DRA59CHUKWUDI017DRA59_CHUKWUDI0178.27Normal Fat0Soft Drinks458.232014NaNCluster 2Supermarket Type16015.5
14DRA59CHUKWUDI018DRA59_CHUKWUDI0188.27Normal Fat0.128449055Soft Drinks466.482016MediumCluster 3Supermarket Type211105.54
15DRA59CHUKWUDI019DRA59_CHUKWUDI019NaNNormal Fat0.223985293Soft Drinks465.731992SmallCluster 1Grocery Store1388.19
16DRA59CHUKWUDI027DRA59_CHUKWUDI027NaNNormal Fat0.127308434Soft Drinks466.731992MediumCluster 3Supermarket Type317583.78
17DRA59CHUKWUDI046DRA59_CHUKWUDI0468.27Normal Fat0.127927931Soft Drinks462.232004SmallCluster 1Supermarket Type111105.54
18DRA59CHUKWUDI049DRA59_CHUKWUDI0498.27Normal Fat0.128126825Soft Drinks459.232006MediumCluster 1Supermarket Type13239.12
19DRB01CHUKWUDI027DRB01_CHUKWUDI027NaNLow Fat0.081841136Soft Drinks475.131992MediumCluster 3Supermarket Type31423.15
\n
\n "},"cell-6":{"out":"\n
\n

[6]

\n
\n\n
\n
\n \n
\n \n \n
\n\n
\n \n \n
\n\n \n
\n\n \n
\n
\n
\n

Understanding the numerical columns in the dataset.

\n
\n
\n\n ","md":"Understanding the numerical columns in the dataset."},"cell-5":{"in":"table(df2.describe())","out":"\n
\n \n \n \n \n \n \n \n \n \n
Product_Shelf_VisibilityProduct_PriceSupermarket_Opening_YearProduct_Supermarket_Sales
count4990499049904990
mean0.066916391.8037722004.7834476103.52002
std0.053058119.3782598.2831514447.333835
min078.730003199283.230003
median0.053564393.8620065374.675
max0.328391667.219971201632717.410156
variance0.00281514251.16876368.61059419778778.23941
\n
\n "},"cell-7":{"in":"//Display the number of rows and columns\nconsole.log(df2.shape)","out":"[4990,13]"},"cell-8":{"in":"console.log(df2.dtypes)","out":"[string,string,string,string,string,float32,string,float32,int32,string....3 more]"},"cell-10":{"out":"\n
\n

[10]

\n
\n\n
\n
\n \n
\n \n \n
\n\n
\n \n \n
\n\n \n
\n\n \n
\n
\n
\n

in order to see the columns and their types together. Let create a Series and then set the index as the column names and the values to be the tyoe

\n
\n
\n\n ","md":"in order to see the columns and their types together. Let create a Series and then set the index as the column names and the values to be the tyoe"},"cell-9":{"in":"let dtypes = new dfd.Series(df2.dtypes,{index: df2.columns})\ntable(dtypes)","out":"\n
\n \n \n \n \n \n \n \n \n \n
0
Product_Identifierstring
Supermarket_Identifierstring
Product_Supermarket_Identifierstring
Product_Weightstring
Product_Fat_Contentstring
Product_Shelf_Visibilityfloat32
Product_Typestring
Product_Pricefloat32
Supermarket_Opening_Yearint32
Supermarket _Sizestring
Supermarket_Location_Typestring
Supermarket_Typestring
Product_Supermarket_Salesfloat32
\n
\n "},"cell-11":{"in":"//Remove Id columns\nlet cols_2_remove = ['Product_Identifier', 'Supermarket_Identifier', 'Product_Supermarket_Identifier']\nvar new_df = df2.drop({columns:cols_2_remove,axis:1}) // drop the columns","out":""},"cell-12":{"in":"table(new_df.head())","out":"\n
\n \n \n \n \n \n \n \n \n \n
Product_WeightProduct_Fat_ContentProduct_Shelf_VisibilityProduct_TypeProduct_PriceSupermarket_Opening_YearSupermarket _SizeSupermarket_Location_TypeSupermarket_TypeProduct_Supermarket_Sales
011.6Low Fat0.068535039Soft Drinks357.542005NaNCluster 3Grocery Store709.08
111.6Low Fat0.040911824Soft Drinks355.791994HighCluster 3Supermarket Type16381.69
211.6Low Fat0.041177505Soft Drinks350.792014NaNCluster 2Supermarket Type16381.69
311.6Low Fat0.041112694Soft Drinks355.042016MediumCluster 3Supermarket Type22127.23
411.6Ultra Low fat0Soft Drinks354.792011SmallCluster 2Supermarket Type12481.77
\n
\n "},"cell-13":{"in":"//print the shape\nconsole.log(new_df.shape)","out":"[4990,10]"},"cell-14":{"in":"//list out the categorical and numerical variable\nvar cat_cols = ['Product_Fat_Content','Product_Type',\n 'Supermarket _Size', 'Supermarket_Location_Type',\n 'Supermarket_Type' ]\n\nvar num_cols = ['Product_Weight', 'Product_Shelf_Visibility',\n 'Product_Price', 'Supermarket_Opening_Year', 'Product_Supermarket_Sales']\n","out":""},"cell-15":{"in":"for(let i in cat_cols){\n\n let col = cat_cols[i]\n\n let counts = new_df[col].value_counts()\n\n var layout = {\n title: `Bar Plot for ${col}`\n }\n\n viz(`cat_viz${i}`, x =>{\n counts.plot(x).bar({layout:layout})\n })\n \n}","out":"
−0.500.511.522.5050010001500200025003000
Bar Plot for Product_Fat_Content
−0.500.511.522.533.5050010001500
Bar Plot for Supermarket _Size
−0.500.511.522.50500100015002000
Bar Plot for Supermarket_Location_Type
−0.500.511.522.533.5050010001500200025003000
Bar Plot for Supermarket_Type
"},"cell-16":{"in":"for(let i in num_cols){\n\n let col = num_cols[i]\n\n let layout ={\n xaxis:{\n title: col\n },\n yaxis: {\n title:'Product_Supermarket_Sales'\n }\n\n }\n \n viz(`num_col${i}`, x=>{\n \t\tnew_df.plot(x).scatter({x:col,y:'Product_Supermarket_Sales'})\n })\n}","out":"
4681012141618202205k10k15k20k25k30k35k
Product_WeightProduct_Supermarket_Sales
00.050.10.150.20.250.305k10k15k20k25k30k35k
Product_Shelf_VisibilityProduct_Supermarket_Sales
10020030040050060070005k10k15k20k25k30k35k
Product_PriceProduct_Supermarket_Sales
1995200020052010201505k10k15k20k25k30k35k
Supermarket_Opening_YearProduct_Supermarket_Sales
05k10k15k20k25k30k05k10k15k20k25k30k35k
Product_Supermarket_SalesProduct_Supermarket_Sales
"},"cell-17":{"in":"for(let i in cat_cols){\n\n let col = cat_cols[i]\n\n let layout ={\n xaxis:{\n title: col\n },\n yaxis: {\n title:'Product_Supermarket_Sales'\n }\n\n }\n viz(`cat_cols${i}`, x=>{\n new_df.plot(x).box({x:col, y:'Product_Supermarket_Sales'})\n })\n}","out":"
Low FatUltra Low fatNormal Fat05k10k15k20k25k30k
Product_Fat_ContentProduct_Supermarket_Sales
Soft DrinksDairyHard DrinksCannedFrozen FoodsFruits and VegetablesSnack FoodsBaking GoodsStarchy FoodsMeatSeafoodBreakfastBreadsHealth and HygieneHouseholdOthers05k10k15k20k25k30k
Product_TypeProduct_Supermarket_Sales
NaNHighMediumSmall05k10k15k20k25k30k
Supermarket _SizeProduct_Supermarket_Sales
Cluster 3Cluster 2Cluster 105k10k15k20k25k30k
Supermarket_Location_TypeProduct_Supermarket_Sales
Grocery StoreSupermarket Type1Supermarket Type2Supermarket Type305k10k15k20k25k30k
Supermarket_TypeProduct_Supermarket_Sales
"}} \ No newline at end of file