diff --git a/examples/pole/common.go b/examples/pole/common.go deleted file mode 100644 index e5a3602..0000000 --- a/examples/pole/common.go +++ /dev/null @@ -1,15 +0,0 @@ -// Package pole provides definition of the pole balancing experiments is classic Reinforced Learning task proposed by -// Richard Sutton and Charles Anderson. -// In this experiment we will try to teach RF model of balancing pole placed on the moving cart. -package pole - -// ActionType The type of action to be applied to environment -type ActionType byte - -// The supported action types -const ( - // ContinuousAction The continuous action type meaning continuous values to be applied to environment - ContinuousAction ActionType = iota - // DiscreteAction The discrete action assumes that there are only discrete values of action (e.g. 0, 1) - DiscreteAction -) diff --git a/examples/pole2/cart2pole.go b/examples/pole2/cart2pole.go new file mode 100644 index 0000000..5bf3d80 --- /dev/null +++ b/examples/pole2/cart2pole.go @@ -0,0 +1,109 @@ +package pole2 + +import ( + "context" + "fmt" + "github.com/yaricom/goNEAT/v4/experiment" + "github.com/yaricom/goNEAT/v4/experiment/utils" + "github.com/yaricom/goNEAT/v4/neat" + "github.com/yaricom/goNEAT/v4/neat/genetics" +) + +type cartDoublePoleGenerationEvaluator struct { + // The output path to store execution results + OutputPath string + // The flag to indicate whether to apply Markov evaluation variant + Markov bool + + // The flag to indicate whether to use continuous activation or discrete + ActionType ActionType +} + +// NewCartDoublePoleGenerationEvaluator is the generations evaluator for double-pole balancing experiment: both Markov and non-Markov versions +func NewCartDoublePoleGenerationEvaluator(outDir string, markov bool, actionType ActionType) experiment.GenerationEvaluator { + return &cartDoublePoleGenerationEvaluator{ + OutputPath: outDir, + Markov: markov, + ActionType: actionType, + } +} + +// GenerationEvaluate Perform evaluation of one epoch on double pole balancing +func (e *cartDoublePoleGenerationEvaluator) GenerationEvaluate(ctx context.Context, pop *genetics.Population, epoch *experiment.Generation) error { + options, ok := neat.FromContext(ctx) + if !ok { + return neat.ErrNEATOptionsNotFound + } + cartPole := NewCartPole(e.Markov) + + cartPole.nonMarkovLong = false + cartPole.generalizationTest = false + + // Evaluate each organism on a test + for _, org := range pop.Organisms { + winner, err := OrganismEvaluate(org, cartPole, e.ActionType) + if err != nil { + return err + } + + if winner && (epoch.Champion == nil || org.Fitness > epoch.Champion.Fitness) { + // This will be winner in Markov case + epoch.Solved = true + epoch.WinnerNodes = len(org.Genotype.Nodes) + epoch.WinnerGenes = org.Genotype.Extrons() + epoch.WinnerEvals = options.PopSize*epoch.Id + org.Genotype.Id + epoch.Champion = org + org.IsWinner = true + } + } + + // Check for winner in Non-Markov case + if !e.Markov { + epoch.Solved = false + // evaluate generalization tests + if champion, err := EvaluateOrganismGeneralization(pop.Species, cartPole, e.ActionType); err != nil { + return err + } else if champion.IsWinner { + epoch.Solved = true + epoch.WinnerNodes = len(champion.Genotype.Nodes) + epoch.WinnerGenes = champion.Genotype.Extrons() + epoch.WinnerEvals = options.PopSize*epoch.Id + champion.Genotype.Id + epoch.Champion = champion + } + } + + // Fill statistics about current epoch + epoch.FillPopulationStatistics(pop) + + // Only print to file every print_every generation + if epoch.Solved || epoch.Id%options.PrintEvery == 0 { + if _, err := utils.WritePopulationPlain(e.OutputPath, pop, epoch); err != nil { + neat.ErrorLog(fmt.Sprintf("Failed to dump population, reason: %s\n", err)) + return err + } + } + + if epoch.Solved { + // print winner organism's statistics + org := epoch.Champion + utils.PrintActivationDepth(org, true) + + genomeFile := "pole2_winner_genome" + // Prints the winner organism to file! + if orgPath, err := utils.WriteGenomePlain(genomeFile, e.OutputPath, org, epoch); err != nil { + neat.ErrorLog(fmt.Sprintf("Failed to dump winner organism's genome, reason: %s\n", err)) + } else { + neat.InfoLog(fmt.Sprintf("Generation #%d winner's genome dumped to: %s\n", epoch.Id, orgPath)) + } + + // Prints the winner organism's phenotype to the Cytoscape JSON file! + if orgPath, err := utils.WriteGenomeCytoscapeJSON(genomeFile, e.OutputPath, org, epoch); err != nil { + neat.ErrorLog(fmt.Sprintf("Failed to dump winner organism's phenome Cytoscape JSON graph, reason: %s\n", err)) + } else { + neat.InfoLog(fmt.Sprintf("Generation #%d winner's phenome Cytoscape JSON graph dumped to: %s\n", + epoch.Id, orgPath)) + } + } + + return nil +} diff --git a/examples/pole2/cart2pole_parallel.go b/examples/pole2/cart2pole_parallel.go new file mode 100644 index 0000000..3bbf316 --- /dev/null +++ b/examples/pole2/cart2pole_parallel.go @@ -0,0 +1,138 @@ +package pole2 + +import ( + "context" + "fmt" + "github.com/yaricom/goNEAT/v4/experiment" + "github.com/yaricom/goNEAT/v4/experiment/utils" + "github.com/yaricom/goNEAT/v4/neat" + "github.com/yaricom/goNEAT/v4/neat/genetics" + "sync" +) + +type cartDoublePoleParallelGenerationEvaluator struct { + cartDoublePoleGenerationEvaluator +} + +type parallelEvaluationResult struct { + genomeId int + fitness float64 + error float64 + winner bool + err error +} + +// NewCartDoublePoleParallelGenerationEvaluator is the generations evaluator for double-pole balancing experiment: both Markov and non-Markov versions +func NewCartDoublePoleParallelGenerationEvaluator(outDir string, markov bool, actionType ActionType) experiment.GenerationEvaluator { + return &cartDoublePoleParallelGenerationEvaluator{ + cartDoublePoleGenerationEvaluator{ + OutputPath: outDir, + Markov: markov, + ActionType: actionType, + }, + } +} + +func (e *cartDoublePoleParallelGenerationEvaluator) GenerationEvaluate(ctx context.Context, pop *genetics.Population, epoch *experiment.Generation) error { + options, ok := neat.FromContext(ctx) + if !ok { + return neat.ErrNEATOptionsNotFound + } + + organismMapping := make(map[int]*genetics.Organism) + + popSize := len(pop.Organisms) + resChan := make(chan parallelEvaluationResult, popSize) + // The wait group to wait for all GO routines + var wg sync.WaitGroup + + // Evaluate each organism in generation + for _, org := range pop.Organisms { + if _, ok = organismMapping[org.Genotype.Id]; ok { + return fmt.Errorf("organism with %d already exists in mapping", org.Genotype.Id) + } + organismMapping[org.Genotype.Id] = org + wg.Add(1) + + // run in separate GO thread + go func(organism *genetics.Organism, actionType ActionType, resChan chan<- parallelEvaluationResult, wg *sync.WaitGroup) { + defer wg.Done() + + // create simulator and evaluate + cartPole := NewCartPole(e.Markov) + cartPole.nonMarkovLong = false + cartPole.generalizationTest = false + + winner, err := OrganismEvaluate(organism, cartPole, actionType) + if err != nil { + resChan <- parallelEvaluationResult{err: err} + return + } + + // create result + result := parallelEvaluationResult{ + genomeId: organism.Genotype.Id, + fitness: organism.Fitness, + error: organism.Error, + winner: winner, + } + resChan <- result + + }(org, e.ActionType, resChan, &wg) + } + + // wait for evaluation results + wg.Wait() + close(resChan) + + for result := range resChan { + if result.err != nil { + return result.err + } + // find and update original organism + org, ok := organismMapping[result.genomeId] + if ok { + org.Fitness = result.fitness + org.Error = result.error + } else { + return fmt.Errorf("organism not found in mapping for id: %d", result.genomeId) + } + + if result.winner && (epoch.Champion == nil || org.Fitness > epoch.Champion.Fitness) { + // This will be winner in Markov case + epoch.Solved = true + epoch.WinnerNodes = len(org.Genotype.Nodes) + epoch.WinnerGenes = org.Genotype.Extrons() + epoch.WinnerEvals = options.PopSize*epoch.Id + org.Genotype.Id + epoch.Champion = org + org.IsWinner = true + } + } + + // Fill statistics about current epoch + epoch.FillPopulationStatistics(pop) + + if epoch.Solved { + // print winner organism's statistics + org := epoch.Champion + utils.PrintActivationDepth(org, true) + + genomeFile := "pole2_parallel_winner_genome" + // Prints the winner organism to file! + if orgPath, err := utils.WriteGenomePlain(genomeFile, e.OutputPath, org, epoch); err != nil { + neat.ErrorLog(fmt.Sprintf("Failed to dump winner organism's genome, reason: %s\n", err)) + } else { + neat.InfoLog(fmt.Sprintf("Generation #%d winner's genome dumped to: %s\n", epoch.Id, orgPath)) + } + + // Prints the winner organism's phenotype to the Cytoscape JSON file! + if orgPath, err := utils.WriteGenomeCytoscapeJSON(genomeFile, e.OutputPath, org, epoch); err != nil { + neat.ErrorLog(fmt.Sprintf("Failed to dump winner organism's phenome Cytoscape JSON graph, reason: %s\n", err)) + } else { + neat.InfoLog(fmt.Sprintf("Generation #%d winner's phenome Cytoscape JSON graph dumped to: %s\n", + epoch.Id, orgPath)) + } + } + + return nil +} diff --git a/examples/pole/cart2pole_test.go b/examples/pole2/cart2pole_test.go similarity index 99% rename from examples/pole/cart2pole_test.go rename to examples/pole2/cart2pole_test.go index 0752992..595d042 100644 --- a/examples/pole/cart2pole_test.go +++ b/examples/pole2/cart2pole_test.go @@ -1,4 +1,4 @@ -package pole +package pole2 import ( "fmt" diff --git a/examples/pole/cart2pole.go b/examples/pole2/common.go similarity index 54% rename from examples/pole/cart2pole.go rename to examples/pole2/common.go index 1457423..3bb5217 100644 --- a/examples/pole/cart2pole.go +++ b/examples/pole2/common.go @@ -1,15 +1,13 @@ -package pole +// Package pole2 provides definition of the two pole balancing experiment. +// In this experiment we will try to teach RF model of balancing of two poles placed on the moving cart. +package pole2 import ( - "context" "fmt" - "github.com/yaricom/goNEAT/v4/experiment" - "github.com/yaricom/goNEAT/v4/experiment/utils" "github.com/yaricom/goNEAT/v4/neat" "github.com/yaricom/goNEAT/v4/neat/genetics" "github.com/yaricom/goNEAT/v4/neat/network" "math" - "sort" ) const thirtySixDegrees = 36 * math.Pi / 180.0 @@ -23,24 +21,16 @@ const nonMarkovLongMaxSteps = 100000 // The maximal number of time steps for Non-Markov generalization run const nonMarkovGeneralizationMaxSteps = 1000 -type cartDoublePoleGenerationEvaluator struct { - // The output path to store execution results - OutputPath string - // The flag to indicate whether to apply Markov evaluation variant - Markov bool +// ActionType The type of action to be applied to environment +type ActionType byte - // The flag to indicate whether to use continuous activation or discrete - ActionType ActionType -} - -// NewCartDoublePoleGenerationEvaluator is the generations evaluator for double-pole balancing experiment: both Markov and non-Markov versions -func NewCartDoublePoleGenerationEvaluator(outDir string, markov bool, actionType ActionType) experiment.GenerationEvaluator { - return &cartDoublePoleGenerationEvaluator{ - OutputPath: outDir, - Markov: markov, - ActionType: actionType, - } -} +// The supported action types +const ( + // ContinuousAction The continuous action type meaning continuous values to be applied to environment + ContinuousAction ActionType = iota + // DiscreteAction The discrete action assumes that there are only discrete values of action (e.g. 0, 1) + DiscreteAction +) // CartPole The structure to describe cart pole emulation type CartPole struct { @@ -66,248 +56,8 @@ type CartPole struct { poleVelocitySum float64 } -// GenerationEvaluate Perform evaluation of one epoch on double pole balancing -func (e *cartDoublePoleGenerationEvaluator) GenerationEvaluate(ctx context.Context, pop *genetics.Population, epoch *experiment.Generation) error { - options, ok := neat.FromContext(ctx) - if !ok { - return neat.ErrNEATOptionsNotFound - } - cartPole := newCartPole(e.Markov) - - cartPole.nonMarkovLong = false - cartPole.generalizationTest = false - - // Evaluate each organism on a test - for _, org := range pop.Organisms { - winner, err := e.orgEvaluate(org, cartPole) - if err != nil { - return err - } - - if winner && (epoch.Champion == nil || org.Fitness > epoch.Champion.Fitness) { - // This will be winner in Markov case - epoch.Solved = true - epoch.WinnerNodes = len(org.Genotype.Nodes) - epoch.WinnerGenes = org.Genotype.Extrons() - epoch.WinnerEvals = options.PopSize*epoch.Id + org.Genotype.Id - epoch.Champion = org - org.IsWinner = true - } - } - - // Check for winner in Non-Markov case - if !e.Markov { - // The best individual (i.e. the one with the highest fitness value) of every generation is tested for - // its ability to balance the system for a longer time period. If a potential solution passes this test - // by keeping the system balanced for 100’000 time steps, the so called generalization score(GS) of this - // particular individual is calculated. This score measures the potential of a controller to balance the - // system starting from different initial conditions. It's calculated with a series of experiments, running - // over 1000 time steps, starting from 625 different initial conditions. - // The initial conditions are chosen by assigning each value of the set Ω = [0.05 0.25 0.5 0.75 0.95] to - // each of the states x, ∆x/∆t, θ1 and ∆θ1/∆t, scaled to the range of the variables.The short pole angle θ2 - // and its angular velocity ∆θ2/∆t are set to zero. The GS is then defined as the number of successful runs - // from the 625 initial conditions and an individual is defined as a solution if it reaches a generalization - // score of 200 or more. - - // Sort the species by max organism fitness in descending order - the highest fitness first - sortedSpecies := make([]*genetics.Species, len(pop.Species)) - copy(sortedSpecies, pop.Species) - sort.Sort(sort.Reverse(genetics.ByOrganismFitness(sortedSpecies))) - - // First update what is checked and unchecked - var currSpecies *genetics.Species - for _, currSpecies = range sortedSpecies { - max, _ := currSpecies.ComputeMaxAndAvgFitness() - if max > currSpecies.MaxFitnessEver { - currSpecies.IsChecked = false - } - } - - // Now find first (most fit) species that is unchecked - currSpecies = nil - for _, currSpecies = range sortedSpecies { - if !currSpecies.IsChecked { - break - } - } - if currSpecies == nil { - currSpecies = sortedSpecies[0] - } - - // Remember it was checked - currSpecies.IsChecked = true - - // the organism champion - champion := currSpecies.FindChampion() - championFitness := champion.Fitness - championPhenotype, err := champion.Phenotype() - if err != nil { - return err - } - - // Now check to make sure the champion can do 100'000 evaluations - cartPole.nonMarkovLong = true - cartPole.generalizationTest = false - - longRunPassed, err := e.orgEvaluate(champion, cartPole) - if err != nil { - return err - } - if longRunPassed { - - // the champion passed non-Markov long test, start generalization - cartPole.nonMarkovLong = false - cartPole.generalizationTest = true - - // Given that the champion passed long run test, now run it on generalization tests running - // over 1'000 time steps, starting from 625 different initial conditions - stateVals := [5]float64{0.05, 0.25, 0.5, 0.75, 0.95} - generalizationScore := 0 - for s0c := 0; s0c < 5; s0c++ { - for s1c := 0; s1c < 5; s1c++ { - for s2c := 0; s2c < 5; s2c++ { - for s3c := 0; s3c < 5; s3c++ { - cartPole.state[0] = stateVals[s0c]*4.32 - 2.16 - cartPole.state[1] = stateVals[s1c]*2.70 - 1.35 - cartPole.state[2] = stateVals[s2c]*0.12566304 - 0.06283152 // 0.06283152 = 3.6 degrees - cartPole.state[3] = stateVals[s3c]*0.30019504 - 0.15009752 // 0.15009752 = 8.6 degrees - // The short pole angle and its angular velocity are set to zero. - cartPole.state[4] = 0.0 - cartPole.state[5] = 0.0 - - // The champion needs to be flushed here because it may have - // leftover activation from its last test run that could affect - // its recurrent memory - if _, err = championPhenotype.Flush(); err != nil { - return err - } - - if generalized, err := e.orgEvaluate(champion, cartPole); generalized { - generalizationScore++ - - if neat.LogLevel == neat.LogLevelDebug { - neat.DebugLog( - fmt.Sprintf("x: %f, xv: %f, t1: %f, t2: %f, angle: %f\n", - cartPole.state[0], cartPole.state[1], - cartPole.state[2], cartPole.state[4], thirtySixDegrees)) - } - } else if err != nil { - return err - } - } - } - } - } - - if generalizationScore >= 200 { - // The generalization test winner - neat.InfoLog( - fmt.Sprintf("The non-Markov champion found! (Generalization Score = %d)", - generalizationScore)) - champion.Fitness = float64(generalizationScore) - champion.IsWinner = true - epoch.Solved = true - epoch.WinnerNodes = len(champion.Genotype.Nodes) - epoch.WinnerGenes = champion.Genotype.Extrons() - epoch.WinnerEvals = options.PopSize*epoch.Id + champion.Genotype.Id - epoch.Champion = champion - } else { - neat.InfoLog("The non-Markov champion unable to generalize") - champion.Fitness = championFitness // Restore the champ's fitness - champion.IsWinner = false - } - } else { - neat.InfoLog("The non-Markov champion missed the 100'000 run test") - champion.Fitness = championFitness // Restore the champ's fitness - champion.IsWinner = false - } - } - - // Fill statistics about current epoch - epoch.FillPopulationStatistics(pop) - - // Only print to file every print_every generation - if epoch.Solved || epoch.Id%options.PrintEvery == 0 { - if _, err := utils.WritePopulationPlain(e.OutputPath, pop, epoch); err != nil { - neat.ErrorLog(fmt.Sprintf("Failed to dump population, reason: %s\n", err)) - return err - } - } - - if epoch.Solved { - // print winner organism's statistics - org := epoch.Champion - utils.PrintActivationDepth(org, true) - - genomeFile := "pole2_winner_genome" - // Prints the winner organism to file! - if orgPath, err := utils.WriteGenomePlain(genomeFile, e.OutputPath, org, epoch); err != nil { - neat.ErrorLog(fmt.Sprintf("Failed to dump winner organism's genome, reason: %s\n", err)) - } else { - neat.InfoLog(fmt.Sprintf("Generation #%d winner's genome dumped to: %s\n", epoch.Id, orgPath)) - } - - // Prints the winner organism's phenotype to the Cytoscape JSON file! - if orgPath, err := utils.WriteGenomeCytoscapeJSON(genomeFile, e.OutputPath, org, epoch); err != nil { - neat.ErrorLog(fmt.Sprintf("Failed to dump winner organism's phenome Cytoscape JSON graph, reason: %s\n", err)) - } else { - neat.InfoLog(fmt.Sprintf("Generation #%d winner's phenome Cytoscape JSON graph dumped to: %s\n", - epoch.Id, orgPath)) - } - } - - return nil -} - -// orgEvaluate method evaluates fitness of the organism for cart double pole-balancing task -func (e *cartDoublePoleGenerationEvaluator) orgEvaluate(organism *genetics.Organism, cartPole *CartPole) (winner bool, err error) { - // Try to balance a pole now - phenotype, err := organism.Phenotype() - if err != nil { - return false, err - } - organism.Fitness, err = cartPole.evalNet(phenotype, e.ActionType) - if err != nil { - return false, err - } - - if neat.LogLevel == neat.LogLevelDebug { - neat.DebugLog(fmt.Sprintf("Organism #%3d\tfitness: %f", organism.Genotype.Id, organism.Fitness)) - } - - // DEBUG CHECK if organism is damaged - if !(cartPole.nonMarkovLong && cartPole.generalizationTest) && organism.CheckChampionChildDamaged() { - neat.WarnLog(fmt.Sprintf("ORGANISM DEGRADED:\n%s", organism.Genotype)) - } - - // Decide if it's a winner, in Markov Case - if cartPole.isMarkov { - if organism.Fitness >= markovMaxSteps { - winner = true - organism.Fitness = 1.0 - organism.Error = 0.0 - } else { - // we use linear scale - organism.Error = (markovMaxSteps - organism.Fitness) / markovMaxSteps - organism.Fitness = 1.0 - organism.Error - } - } else if cartPole.nonMarkovLong { - // if doing the long test non-markov - if organism.Fitness >= nonMarkovLongMaxSteps { - winner = true - } - } else if cartPole.generalizationTest { - if organism.Fitness >= nonMarkovGeneralizationMaxSteps { - winner = true - } - } else { - winner = false - } - return winner, err -} - -// If markov is false, then velocity information will be withheld from the network population (non-Markov) -func newCartPole(markov bool) *CartPole { +// NewCartPole If markov is false, then velocity information will be withheld from the network population (non-Markov) +func NewCartPole(markov bool) *CartPole { return &CartPole{ isMarkov: markov, } @@ -578,3 +328,50 @@ func (p *CartPole) resetState() { } p.balancedTimeSteps = 0 // Always count # of balanced time steps } + +// OrganismEvaluate method evaluates fitness of the organism for cart double pole-balancing task +func OrganismEvaluate(organism *genetics.Organism, cartPole *CartPole, actionType ActionType) (winner bool, err error) { + // Try to balance a pole now + phenotype, err := organism.Phenotype() + if err != nil { + return false, err + } + organism.Fitness, err = cartPole.evalNet(phenotype, actionType) + if err != nil { + return false, err + } + + if neat.LogLevel == neat.LogLevelDebug { + neat.DebugLog(fmt.Sprintf("Organism #%3d\tfitness: %f", organism.Genotype.Id, organism.Fitness)) + } + + // DEBUG CHECK if organism is damaged + if !(cartPole.nonMarkovLong && cartPole.generalizationTest) && organism.CheckChampionChildDamaged() { + neat.WarnLog(fmt.Sprintf("ORGANISM DEGRADED:\n%s", organism.Genotype)) + } + + // Decide if it's a winner, in Markov Case + if cartPole.isMarkov { + if organism.Fitness >= markovMaxSteps { + winner = true + organism.Fitness = 1.0 + organism.Error = 0.0 + } else { + // we use linear scale + organism.Error = (markovMaxSteps - organism.Fitness) / markovMaxSteps + organism.Fitness = 1.0 - organism.Error + } + } else if cartPole.nonMarkovLong { + // if doing the long test non-markov + if organism.Fitness >= nonMarkovLongMaxSteps { + winner = true + } + } else if cartPole.generalizationTest { + if organism.Fitness >= nonMarkovGeneralizationMaxSteps { + winner = true + } + } else { + winner = false + } + return winner, err +} diff --git a/examples/pole2/generalization.go b/examples/pole2/generalization.go new file mode 100644 index 0000000..907df03 --- /dev/null +++ b/examples/pole2/generalization.go @@ -0,0 +1,134 @@ +package pole2 + +import ( + "fmt" + "github.com/yaricom/goNEAT/v4/neat" + "github.com/yaricom/goNEAT/v4/neat/genetics" + "sort" +) + +// EvaluateOrganismGeneralization +// The best individual (i.e. the one with the highest fitness value) of every generation is tested for +// its ability to balance the system for a longer time period. If a potential solution passes this test +// by keeping the system balanced for 100’000 time steps, the so-called generalization score(GS) of this +// particular individual is calculated. This score measures the potential of a controller to balance the +// system starting from different initial conditions. It's calculated with a series of experiments, running +// over 1000 time steps, starting from 625 different initial conditions. +// The initial conditions are chosen by assigning each value of the set Ω = [0.05 0.25 0.5 0.75 0.95] to +// each of the states x, ∆x/∆t, θ1 and ∆θ1/∆t, scaled to the range of the variables.The short pole angle θ2 +// and its angular velocity ∆θ2/∆t are set to zero. The GS is then defined as the number of successful runs +// from the 625 initial conditions and an individual is defined as a solution if it reaches a generalization +// score of 200 or more. +func EvaluateOrganismGeneralization(species []*genetics.Species, cartPole *CartPole, actionType ActionType) (*genetics.Organism, error) { + // Sort the species by max organism fitness in descending order - the highest fitness first + sortedSpecies := make([]*genetics.Species, len(species)) + copy(sortedSpecies, species) + sort.Sort(sort.Reverse(genetics.ByOrganismFitness(sortedSpecies))) + + // First update what is checked and unchecked + var currSpecies *genetics.Species + for _, currSpecies = range sortedSpecies { + maxFitness, _ := currSpecies.ComputeMaxAndAvgFitness() + if maxFitness > currSpecies.MaxFitnessEver { + currSpecies.IsChecked = false + } else { + currSpecies.IsChecked = true + } + } + + // Now find first (most fit) species that is unchecked + currSpecies = nil + for _, currSpecies = range sortedSpecies { + if !currSpecies.IsChecked { + break + } + } + if currSpecies == nil { + currSpecies = sortedSpecies[0] + } + + // Remember it was checked + currSpecies.IsChecked = true + + // the organism champion + champion := currSpecies.FindChampion() + championFitness := champion.Fitness + championPhenotype, err := champion.Phenotype() + if err != nil { + return nil, err + } + + // Now check to make sure the champion can do 100'000 evaluations + cartPole.nonMarkovLong = true + cartPole.generalizationTest = false + + longRunPassed, err := OrganismEvaluate(champion, cartPole, actionType) + if err != nil { + return nil, err + } + if longRunPassed { + + // the champion passed non-Markov long test, start generalization + cartPole.nonMarkovLong = false + cartPole.generalizationTest = true + + // Given that the champion passed long run test, now run it on generalization tests running + // over 1'000 time steps, starting from 625 different initial conditions + stateVals := [5]float64{0.05, 0.25, 0.5, 0.75, 0.95} + generalizationScore := 0 + for s0c := 0; s0c < 5; s0c++ { + for s1c := 0; s1c < 5; s1c++ { + for s2c := 0; s2c < 5; s2c++ { + for s3c := 0; s3c < 5; s3c++ { + cartPole.state[0] = stateVals[s0c]*4.32 - 2.16 + cartPole.state[1] = stateVals[s1c]*2.70 - 1.35 + cartPole.state[2] = stateVals[s2c]*0.12566304 - 0.06283152 // 0.06283152 = 3.6 degrees + cartPole.state[3] = stateVals[s3c]*0.30019504 - 0.15009752 // 0.15009752 = 8.6 degrees + // The short pole angle and its angular velocity are set to zero. + cartPole.state[4] = 0.0 + cartPole.state[5] = 0.0 + + // The champion needs to be flushed here because it may have + // leftover activation from its last test run that could affect + // its recurrent memory + if _, err = championPhenotype.Flush(); err != nil { + return nil, err + } + + if generalized, err := OrganismEvaluate(champion, cartPole, actionType); generalized { + generalizationScore++ + + if neat.LogLevel == neat.LogLevelDebug { + neat.DebugLog( + fmt.Sprintf("x: %f, xv: %f, t1: %f, t2: %f, angle: %f\n", + cartPole.state[0], cartPole.state[1], + cartPole.state[2], cartPole.state[4], thirtySixDegrees)) + } + } else if err != nil { + return nil, err + } + } + } + } + } + + if generalizationScore >= 200 { + // The generalization test winner + neat.InfoLog( + fmt.Sprintf("The non-Markov champion found! (Generalization Score = %d)", + generalizationScore)) + champion.Fitness = float64(generalizationScore) + champion.IsWinner = true + } else { + neat.InfoLog("The non-Markov champion unable to generalize") + champion.Fitness = championFitness // Restore the champ's fitness + champion.IsWinner = false + } + } else { + neat.InfoLog("The non-Markov champion missed the 100'000 run test") + champion.Fitness = championFitness // Restore the champ's fitness + champion.IsWinner = false + } + + return champion, nil +} diff --git a/executor.go b/executor.go index 20f0603..cf342d3 100644 --- a/executor.go +++ b/executor.go @@ -5,6 +5,7 @@ import ( "flag" "fmt" "github.com/yaricom/goNEAT/v4/examples/pole" + "github.com/yaricom/goNEAT/v4/examples/pole2" "github.com/yaricom/goNEAT/v4/examples/xor" "github.com/yaricom/goNEAT/v4/experiment" "github.com/yaricom/goNEAT/v4/neat" @@ -98,9 +99,12 @@ func main() { generationEvaluator = pole.NewCartPoleGenerationEvaluator(outDir, true, 500000) case "cart_2pole_markov": expt.MaxFitnessScore = 1.0 // as given by fitness function definition - generationEvaluator = pole.NewCartDoublePoleGenerationEvaluator(outDir, true, pole.ContinuousAction) + generationEvaluator = pole2.NewCartDoublePoleGenerationEvaluator(outDir, true, pole2.ContinuousAction) case "cart_2pole_non-markov": - generationEvaluator = pole.NewCartDoublePoleGenerationEvaluator(outDir, false, pole.ContinuousAction) + generationEvaluator = pole2.NewCartDoublePoleGenerationEvaluator(outDir, false, pole2.ContinuousAction) + case "cart_2pole_markov_parallel": + expt.MaxFitnessScore = 1.0 // as given by fitness function definition + generationEvaluator = pole2.NewCartDoublePoleParallelGenerationEvaluator(outDir, true, pole2.ContinuousAction) default: log.Fatalf("Unsupported experiment: %s", *experimentName) } diff --git a/neat/genetics/species.go b/neat/genetics/species.go index 310d93c..308994b 100644 --- a/neat/genetics/species.go +++ b/neat/genetics/species.go @@ -474,7 +474,7 @@ func (s *Species) reproduce(ctx context.Context, generation int, pop *Population if rand.Float64() > opts.MateOnlyProb || dad.Genotype.Id == mom.Genotype.Id || dad.Genotype.compatibility(mom.Genotype, opts) == 0.0 { - neat.DebugLog("SPECIES: ------> Mutatte baby genome:") + neat.DebugLog("SPECIES: ------> Mutate baby genome:") // Do the mutation depending on probabilities of various mutations if rand.Float64() < opts.MutateAddNodeProb { @@ -542,9 +542,9 @@ func createFirstSpecies(pop *Population, baby *Organism) { } func (s *Species) String() string { - max, avg := s.ComputeMaxAndAvgFitness() + maxFitness, avgFitness := s.ComputeMaxAndAvgFitness() str := fmt.Sprintf("Species #%d, age=%d, avg_fitness=%.3f, max_fitness=%.3f, max_fitness_ever=%.3f, expected_offspring=%d, age_of_last_improvement=%d\n", - s.Id, s.Age, avg, max, s.MaxFitnessEver, s.ExpectedOffspring, s.AgeOfLastImprovement) + s.Id, s.Age, avgFitness, maxFitness, s.MaxFitnessEver, s.ExpectedOffspring, s.AgeOfLastImprovement) str += fmt.Sprintf("Has %d Organisms:\n", len(s.Organisms)) for _, o := range s.Organisms { str += fmt.Sprintf("\t%s\n", o)