Skip to content

Commit 7762ca3

Browse files
committed
Updated Clustering_Iris F# project to v0.7 and new project structure.
1 parent a175583 commit 7762ca3

File tree

12 files changed

+656
-150
lines changed

12 files changed

+656
-150
lines changed
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
namespace Common
2+
3+
module ConsoleHelper =
4+
open System
5+
open Microsoft.ML
6+
open Microsoft.ML.Runtime.Data
7+
open Microsoft.ML.Data
8+
open Microsoft.ML.Core.Data
9+
open Microsoft.ML.Runtime.Api
10+
open System.Reflection
11+
12+
let printPrediction prediction =
13+
printfn "*************************************************"
14+
printfn "Predicted : %s" prediction
15+
printfn "*************************************************"
16+
17+
let printRegressionPredictionVersusObserved predictionCount observedCount =
18+
printfn "-------------------------------------------------"
19+
printfn "Predicted : %d" predictionCount
20+
printfn "Actual: %s" observedCount
21+
printfn "-------------------------------------------------"
22+
23+
let printRegressionMetrics name (metrics : RegressionEvaluator.Result) =
24+
printfn "*************************************************"
25+
printfn "* Metrics for %s regression model " name
26+
printfn "*------------------------------------------------"
27+
printfn "* LossFn: %.2f" metrics.LossFn
28+
printfn "* R2 Score: %.2f" metrics.RSquared
29+
printfn "* Absolute loss: %.2f" metrics.L1
30+
printfn "* Squared loss: %.2f" metrics.L2
31+
printfn "* RMS loss: %.2f" metrics.Rms
32+
printfn "*************************************************"
33+
34+
let printBinaryClassificationMetrics name (metrics : BinaryClassifierEvaluator.Result) =
35+
printfn"************************************************************"
36+
printfn"* Metrics for %s binary classification model " name
37+
printfn"*-----------------------------------------------------------"
38+
printfn"* Accuracy: %.2f%%" (metrics.Accuracy * 100.)
39+
printfn"* Auc: %.2f%%" (metrics.Auc * 100.)
40+
printfn"* F1Score: %.2f%%" (metrics.F1Score * 100.)
41+
printfn"************************************************************"
42+
43+
let printMultiClassClassificationMetrics name (metrics : MultiClassClassifierEvaluator.Result) =
44+
printfn "************************************************************"
45+
printfn "* Metrics for %s multi-class classification model " name
46+
printfn "*-----------------------------------------------------------"
47+
printfn " AccuracyMacro = %.4f, a value between 0 and 1, the closer to 1, the better" metrics.AccuracyMacro
48+
printfn " AccuracyMicro = %.4f, a value between 0 and 1, the closer to 1, the better" metrics.AccuracyMicro
49+
printfn " LogLoss = %.4f, the closer to 0, the better" metrics.LogLoss
50+
printfn " LogLoss for class 1 = %.4f, the closer to 0, the better" metrics.PerClassLogLoss.[0]
51+
printfn " LogLoss for class 2 = %.4f, the closer to 0, the better" metrics.PerClassLogLoss.[1]
52+
printfn " LogLoss for class 3 = %.4f, the closer to 0, the better" metrics.PerClassLogLoss.[2]
53+
printfn "************************************************************"
54+
55+
56+
let private calculateStandardDeviation (values : float array) =
57+
let average = values |> Array.average
58+
let sumOfSquaresOfDifferences = values |> Array.map(fun v -> (v - average) * (v - average)) |> Array.sum
59+
let standardDeviation = Math.Sqrt(sumOfSquaresOfDifferences / float (values.Length-1))
60+
standardDeviation;
61+
62+
let calculateConfidenceInterval95 (values : float array) =
63+
let confidenceInterval95 = 1.96 * calculateStandardDeviation(values) / Math.Sqrt(float (values.Length-1));
64+
confidenceInterval95
65+
66+
let printMulticlassClassificationFoldsAverageMetrics algorithmName (crossValResults : (MultiClassClassifierEvaluator.Result * ITransformer * IDataView) array) =
67+
68+
let metricsInMultipleFolds = crossValResults |> Array.map(fun (metrics, model, scoredTestData) -> metrics)
69+
70+
let microAccuracyValues = metricsInMultipleFolds |> Array.map(fun m -> m.AccuracyMicro)
71+
let microAccuracyAverage = microAccuracyValues |> Array.average
72+
let microAccuraciesStdDeviation = calculateStandardDeviation microAccuracyValues
73+
let microAccuraciesConfidenceInterval95 = calculateConfidenceInterval95 microAccuracyValues
74+
75+
let macroAccuracyValues = metricsInMultipleFolds |> Array.map(fun m -> m.AccuracyMacro)
76+
let macroAccuracyAverage = macroAccuracyValues |> Array.average
77+
let macroAccuraciesStdDeviation = calculateStandardDeviation macroAccuracyValues
78+
let macroAccuraciesConfidenceInterval95 = calculateConfidenceInterval95 macroAccuracyValues
79+
80+
let logLossValues = metricsInMultipleFolds |> Array.map (fun m -> m.LogLoss)
81+
let logLossAverage = logLossValues |> Array.average
82+
let logLossStdDeviation = calculateStandardDeviation logLossValues
83+
let logLossConfidenceInterval95 = calculateConfidenceInterval95 logLossValues
84+
85+
let logLossReductionValues = metricsInMultipleFolds |> Array.map (fun m -> m.LogLossReduction)
86+
let logLossReductionAverage = logLossReductionValues |> Array.average
87+
let logLossReductionStdDeviation = calculateStandardDeviation logLossReductionValues
88+
let logLossReductionConfidenceInterval95 = calculateConfidenceInterval95 logLossReductionValues
89+
90+
printfn "*************************************************************************************************************"
91+
printfn "* Metrics for %s Multi-class Classification model " algorithmName
92+
printfn "*------------------------------------------------------------------------------------------------------------"
93+
printfn "* Average MicroAccuracy: %.3f - Standard deviation: (%.3f) - Confidence Interval 95%%: (%.3f)" microAccuracyAverage microAccuraciesStdDeviation microAccuraciesConfidenceInterval95
94+
printfn "* Average MacroAccuracy: %.3f - Standard deviation: (%.3f) - Confidence Interval 95%%: (%.3f)" macroAccuracyAverage macroAccuraciesStdDeviation macroAccuraciesConfidenceInterval95
95+
printfn "* Average LogLoss: %.3f - Standard deviation: (%.3f) - Confidence Interval 95%%: (%.3f)" logLossAverage logLossStdDeviation logLossConfidenceInterval95
96+
printfn "* Average LogLossReduction: %.3f - Standard deviation: (%.3f) - Confidence Interval 95%%: (%.3f)" logLossReductionAverage logLossReductionStdDeviation logLossReductionConfidenceInterval95
97+
printfn "*************************************************************************************************************"
98+
99+
let printClusteringMetrics name (metrics : ClusteringEvaluator.Result) =
100+
printfn "*************************************************"
101+
printfn "* Metrics for %s clustering model " name
102+
printfn "*------------------------------------------------"
103+
printfn "* AvgMinScore: %.4f" metrics.AvgMinScore
104+
printfn "* DBI is: %.4f" metrics.Dbi
105+
printfn "*************************************************"
106+
107+
let consoleWriteHeader (lines : string array) =
108+
let defaultColor = Console.ForegroundColor
109+
Console.ForegroundColor <- ConsoleColor.Yellow
110+
printfn " "
111+
for line in lines do
112+
printfn "%s" line
113+
let maxLength = lines |> Array.map(fun x -> x.Length) |> Array.max
114+
printfn "%s" (new string('#', maxLength))
115+
Console.ForegroundColor <- defaultColor
116+
117+
let peekDataViewInConsole<'TObservation when 'TObservation : (new : unit -> 'TObservation) and 'TObservation : not struct> (mlContext : MLContext) (dataView : IDataView) (pipeline : IEstimator<ITransformer>) numberOfRows =
118+
119+
let msg = sprintf "Peek data in DataView: Showing %d rows with the columns specified by TObservation class" numberOfRows
120+
consoleWriteHeader [| msg |]
121+
122+
//https://github.com/dotnet/machinelearning/blob/master/docs/code/MlNetCookBook.md#how-do-i-look-at-the-intermediate-data
123+
let transformer = pipeline.Fit dataView
124+
let transformedData = transformer.Transform dataView
125+
126+
// 'transformedData' is a 'promise' of data, lazy-loading. Let's actually read it.
127+
// Convert to an enumerable of user-defined type.
128+
let someRows =
129+
transformedData.AsEnumerable<'TObservation>(mlContext, reuseRowObject = false)
130+
// Take the specified number of rows
131+
|> Seq.take numberOfRows
132+
// Convert to List
133+
|> Seq.toList
134+
135+
someRows
136+
|> List.iter(fun row ->
137+
138+
let lineToPrint =
139+
row.GetType().GetFields(BindingFlags.Instance ||| BindingFlags.Static ||| BindingFlags.NonPublic ||| BindingFlags.Public)
140+
|> Array.map(fun field -> sprintf "| %s: %O" field.Name (field.GetValue(row)))
141+
|> Array.fold (+) "Row--> "
142+
143+
printfn "%s" lineToPrint
144+
)
145+
146+
someRows
147+
148+
let peekVectorColumnDataInConsole (mlContext : MLContext) columnName (dataView : IDataView) (pipeline : IEstimator<ITransformer>) numberOfRows =
149+
let msg = sprintf "Peek data in DataView: : Show %d rows with just the '%s' column" numberOfRows columnName
150+
consoleWriteHeader [| msg |]
151+
152+
let transformer = pipeline.Fit dataView
153+
let transformedData = transformer.Transform dataView
154+
155+
// Extract the 'Features' column.
156+
let someColumnData =
157+
transformedData.GetColumn<float32[]>(mlContext, columnName)
158+
|> Seq.take numberOfRows
159+
|> Seq.toList
160+
161+
// print to console the peeked rows
162+
someColumnData
163+
|> List.iter(fun row ->
164+
let concatColumn =
165+
row
166+
|> Array.map string
167+
|> Array.fold (+) " "
168+
printfn "%s" concatColumn
169+
)
170+
171+
someColumnData;
172+
173+
let consoleWriterSection (lines : string array) =
174+
let defaultColor = Console.ForegroundColor
175+
Console.ForegroundColor <- ConsoleColor.Blue
176+
printfn " "
177+
lines
178+
|> Array.iter (printfn "%s")
179+
180+
let maxLength = lines |> Array.map(fun x -> x.Length) |> Array.max
181+
printfn "%s" (new string('-', maxLength))
182+
Console.ForegroundColor <- defaultColor
183+
184+
let consolePressAnyKey () =
185+
let defaultColor = Console.ForegroundColor
186+
Console.ForegroundColor <- ConsoleColor.Green
187+
printfn " "
188+
printfn "Press any key to finish."
189+
Console.ForegroundColor <- defaultColor
190+
Console.ReadKey() |> ignore
191+
192+
let consoleWriteException (lines : string array) =
193+
let defaultColor = Console.ForegroundColor
194+
Console.ForegroundColor <- ConsoleColor.Red
195+
let exceptionTitle = "EXCEPTION"
196+
printfn " "
197+
printfn "%s" exceptionTitle
198+
printfn "%s" (new string('#', exceptionTitle.Length))
199+
Console.ForegroundColor <- defaultColor
200+
lines
201+
|> Array.iter (printfn "%s")
202+
203+
let consoleWriteWarning (lines : string array) =
204+
let defaultColor = Console.ForegroundColor
205+
Console.ForegroundColor <- ConsoleColor.DarkMagenta
206+
let warningTitle = "WARNING"
207+
printfn " "
208+
printfn "%s" warningTitle
209+
printfn "%s" (new string('#', warningTitle.Length))
210+
Console.ForegroundColor <- defaultColor
211+
lines
212+
|> Array.iter (printfn "%s")
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
namespace Common
2+
3+
open System.IO
4+
open Microsoft.ML
5+
open Microsoft.ML.Core.Data
6+
open Microsoft.ML.Transforms
7+
open Microsoft.ML.Runtime.Data
8+
9+
module ModelBuilder =
10+
11+
let create (mlContext : MLContext) (pipeline : IEstimator<ITransformer>) =
12+
(mlContext, pipeline)
13+
14+
let append (estimator : IEstimator<'a>) (pipeline : IEstimator<'b>) =
15+
match pipeline with
16+
| :? IEstimator<ITransformer> as p ->
17+
p.Append estimator
18+
| _ -> failwith "The pipeline has to be an instance of IEstimator<ITransformer>."
19+
20+
21+
let addTrainer (trainer : IEstimator<'b>) (mlContext : MLContext, pipeline : IEstimator<'a>) =
22+
let newPipeline =
23+
pipeline
24+
|> append trainer
25+
(mlContext, newPipeline)
26+
27+
let train (trainingData : IDataView) (mlContext : MLContext, pipeline : IEstimator<'a>) =
28+
pipeline.Fit trainingData :> ITransformer
29+
30+
31+
let private checkTrained (trainedModel : ITransformer) =
32+
if (trainedModel = null) then
33+
failwith "Cannot test before training. Call Train() first."
34+
35+
let evaluateClusteringModel (dataView : IDataView) (trainedModel : ITransformer) (mlContext : MLContext, pipeline : IEstimator<'a>) =
36+
checkTrained trainedModel
37+
let predictions = trainedModel.Transform dataView
38+
mlContext.Clustering.Evaluate(predictions, score = "Score", features = "Features")
39+
40+
let saveModelAsFile persistedModelPath (trainedModel : ITransformer, (mlContext : MLContext, _)) =
41+
checkTrained trainedModel
42+
43+
use fs = new FileStream(persistedModelPath, FileMode.Create, FileAccess.Write, FileShare.Write)
44+
mlContext.Model.Save(trainedModel, fs);
45+
printfn "The model is saved to %s" persistedModelPath
46+
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
namespace Common
2+
3+
open System.IO
4+
open Microsoft.ML
5+
open Microsoft.ML.Core.Data
6+
open Microsoft.ML.Transforms
7+
open Microsoft.ML.Runtime.Data
8+
9+
module ModelScorer =
10+
11+
let create (mlContext : MLContext) =
12+
mlContext
13+
14+
let loadModelFromZipFile<'TObservation, 'TPrediction when 'TPrediction : (new : unit -> 'TPrediction) and 'TPrediction : not struct and 'TObservation : not struct> modelPath (mlContext : MLContext) =
15+
use stream = new FileStream(modelPath, FileMode.Open, FileAccess.Read, FileShare.Read)
16+
let trainedModel = TransformerChain.LoadFrom(mlContext, stream)
17+
let predictionFunction = trainedModel.MakePredictionFunction<'TObservation, 'TPrediction>(mlContext);
18+
19+
mlContext, trainedModel, predictionFunction
20+
21+
let private checkTrainedModelIsLoaded (trainedModel) =
22+
if trainedModel = null then
23+
failwith "Need to have a model before scoring. Call LoadModelFromZipFile(modelPath) first or provided a model through the constructor."
24+
25+
let predictSingle (input : 'TObservation) (mlContext : MLContext, trainedModel, predictionFunction : PredictionFunction<'TObservation, 'TPrediction>) =
26+
checkTrainedModelIsLoaded trainedModel
27+
predictionFunction.Predict(input);
28+

samples/fsharp/common/Pipeline.fs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
namespace Common
2+
3+
module Pipeline =
4+
5+
open Microsoft.ML.Core.Data
6+
open Microsoft.ML.Transforms
7+
open Microsoft.ML.Runtime.Data
8+
9+
//let textTransform (inputColumn : string) outputColumn env =
10+
// Microsoft.ML.Transforms.Text.TextTransform(env, inputColumn, outputColumn)
11+
12+
let copyColumnsEstimator input output env =
13+
CopyColumnsEstimator(env, input, output)
14+
15+
//let concatEstimator input output env =
16+
// ConcatEstimator(env, input, output)
17+
18+
//let append' (estimator : IEstimator<'b>) (pipeline : IEstimator<ITransformer>) =
19+
// pipeline.Append estimator
20+
21+
let append (estimator : IEstimator<'a>) (pipeline : IEstimator<'b>) =
22+
match pipeline with
23+
| :? IEstimator<ITransformer> as p ->
24+
p.Append estimator
25+
| _ -> failwith "The pipeline has to be an instance of IEstimator<ITransformer>."
26+
27+
28+
let fit (dataView : IDataView) (pipeline : EstimatorChain<'a>) =
29+
pipeline.Fit dataView
30+
31+
32+
33+
let downcast' (b : IEstimator<'a>) =
34+
match b with
35+
| :? IEstimator<ITransformer> as b -> b
36+
| _ -> failwith "qwe"

samples/fsharp/getting-started/Clustering_Iris/Clustering_Iris.fsproj

Lines changed: 0 additions & 19 deletions
This file was deleted.
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
2+
Microsoft Visual Studio Solution File, Format Version 12.00
3+
# Visual Studio 15
4+
VisualStudioVersion = 15.0.28010.2050
5+
MinimumVisualStudioVersion = 10.0.40219.1
6+
Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "Clustering_Iris", "IrisClustering\IrisClusteringConsoleApp\Clustering_Iris.fsproj", "{20A1A0DB-A515-4EF9-891B-C57A4D66AC49}"
7+
EndProject
8+
Global
9+
GlobalSection(SolutionConfigurationPlatforms) = preSolution
10+
Debug|Any CPU = Debug|Any CPU
11+
Release|Any CPU = Release|Any CPU
12+
EndGlobalSection
13+
GlobalSection(ProjectConfigurationPlatforms) = postSolution
14+
{20A1A0DB-A515-4EF9-891B-C57A4D66AC49}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15+
{20A1A0DB-A515-4EF9-891B-C57A4D66AC49}.Debug|Any CPU.Build.0 = Debug|Any CPU
16+
{20A1A0DB-A515-4EF9-891B-C57A4D66AC49}.Release|Any CPU.ActiveCfg = Release|Any CPU
17+
{20A1A0DB-A515-4EF9-891B-C57A4D66AC49}.Release|Any CPU.Build.0 = Release|Any CPU
18+
EndGlobalSection
19+
GlobalSection(SolutionProperties) = preSolution
20+
HideSolutionNode = FALSE
21+
EndGlobalSection
22+
GlobalSection(ExtensibilityGlobals) = postSolution
23+
SolutionGuid = {08E1E2D6-0946-4172-A8D4-1C8B3F691DCD}
24+
EndGlobalSection
25+
EndGlobal

0 commit comments

Comments
 (0)