-
Notifications
You must be signed in to change notification settings - Fork 0
/
article.html
1316 lines (1076 loc) · 60.9 KB
/
article.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<dt-article>
<header class="site-header px2 px-responsive l-middle">
<div class="mt2 wrap">
<div class="measure">
<a href="https://developmentalsystems.org" class="site-title" style="border-bottom:none;">
<img src="https://developmentalsystems.org/flowers-logo.png">
</a>
<nav class="site-nav" style="line-height:2;">
<a class="nav-link" href="https://flowers.inria.fr/" target="_blank">Flowers Lab</a>
<a class="nav-link" href="https://developmentalsystems.org/publications/">Publications</a>
<a class="nav-link" href="https://developmentalsystems.org/about">About</a>
</nav>
<div class="clearfix"></div>
<div class="social-icons" style="line-height:2;float:right;border-bottom:none;">
<div class="social-icons-right">
<a class="fa fa-github" href="https://github.com/flowersteam"></a>
<a class="fa fa-rss" href="/feed.xml"></a>
<a class="fa fa-twitter" href="https://twitter.com/FlowersINRIA"></a>
<a class="fa fa-envelope" href="mailto:[email protected]"></a>
</div>
<div class="right">
</div>
</div>
<div class="clearfix"></div>
</div>
</div>
</header>
<h1>An ecological approach to Artificial Intelligence </h1>
<h2 id='subtitle'> How the field of ecology helped us conceptualize skill acquisition and design studies in AI</h2>
<div align="center" style="margin-bottom:40px">
<img src="public/introblog_ecoAI.png" alt="scheme" style="width:120%">
</div>
<p> 'Can machines think?' When Turing posed this question in his seminal paper
<a href="https://psycnet.apa.org/record/1951-02887-001"> Computing machinery and intelligence </a>
<dt-cite key="turing_computing_1950"> </dt-cite> in the '50s, he inadvertently laid the ground for what was to
become the field of Artificial Intelligence (AI). The ground was set firmly: generations of researchers have been
searching for the cognitive architectures and optimization objectives that, when evaluated on carefully selected benchmarks, will lead to agents
whose behavior is similar, or even improves upon, human behavior. Chess, video games and Go, are the carrots on a stick that
have been driving the AI community forward.
</p>
<p> We call this the <em> cognition-centric </em> approach and want to contrast it to the <em> ecological </em>
approach. Under this alternative attitude towards creating AI, intelligence is viewed as an emergent product
of adaptive systems interacting with their environments. While cognition-centric approaches attempt to reverse-engineer
intelligent behavior by searching in the space of cognitive functions, ecological approaches search in the space of
environmental properties to reverse-engineer the conditions that drive intelligent behavior.
</p>
<p> Ecological perspectives abide in the study of biological organisms.
From the emergence of vision systems <dt-cite key="gibson__2014"> </dt-cite>
to that of religion <dt-cite key="botero_ecology_2014"> </dt-cite>, environments encountered in the
evolutionary trajectory of a species can help us understand why certain functions persist over others.
If we look at the evolution of our own species, we will find that many behaviors that we consider simple,
like using language and tools, took the longest time to evolve.
Once a new skill was acquired, it also acted as a driver and enabler for new skills.
Our ability to continuously acquire new skills by learning through others led to an explosively large set of behaviors: our cultural repertoire.
It is this cultural repertoire that makes human societies seemingly open-ended and qualitatively different from the societies of other species, albeit complex <dt-cite key="boyd_why_1996"> </dt-cite>.
</p>
<p> The cognition-centric approach has recently given us many successful algorithms and applications.
But it may have kept us busy on a research agenda that does not lead to artificial intelligence with the distinctive
characteristics of a natural one.
Machine learning algorithms are today criticised for being data-hungry, brittle when encountering new problems
and morally disconnected from the human society that gave birth to them.
</p>
<p> In this blog post, we will discuss how we could pursue an ecological approach to AI.
Our first objective is to discover the similarities between studies of natural and artificial intelligence.
Our second objective is to see how leveraging these similarities can be useful in practice for AI studies.
In particular we: </p>
<ul>
<li > <b> present a conceptual framework for grounding an ecological approach to AI in Human
Behavioral Ecology (HBE), a research field studying the evolution of humans in interaction with their environments</b> </li>
<li > <b> illustrate how an ecological perspective to AI looks in practise through two of our recent
computational studies:
<ul>
<li > the study of the emergence of adaptability in environments with temporal and spatial variability using a simple an eco-evolutionary model
</li>
<li > the study of the effect of social connectivity on collective innovation with distributed groups of reinforcement learning agents
</ul>
</b> </li>
</li>
<li> <b> Discuss promising perspectives on how these two apparently disconnected contributions can shed light on the complex interactions between ecological and socio-cultural dynamics </b></li>
<!-- <table cellpadding="10" cellspacing="10">-->
<!--<tbody>-->
<!--<td style="text-align: center;padding:10px"> <h4> How does environmental variability affect the evolution of adaptability? </h4>-->
<!-- <p style="text-align: justify;padding:10px" > In a world with temporal and spatial variability, a population of agents evolves its adaptability-->
<!-- in order to survive. Agents can adapt using evolvability and phenotypic plasticity and the fittest ones in a-->
<!-- niche leave offspring. Can these agents survive in the face of extreme climatic variability? In this work,-->
<!-- we will see how populations adapt, diversify and disperse under different climatic conditions. </p></td>-->
<!--<td style="text-align: center;padding:10px"> <h4> How does social connectivity affect collective innovation? </h4>-->
<!--<p style="text-align: justify;padding:10px"> From drug discovery to music composition, our human cultural repertoire relies on innovations produced-->
<!-- by collectives that jointly explore, communicate and recombine solutions. Can artificial agents also innovate in-->
<!-- collectives? In the learning paradigm of distributed RL, groups of agents solve tasks in parallel and share their-->
<!-- experiences with observed benefits to performance. But, while biological organisms exhibit a variety of social-->
<!-- network structures,, distributed RL agents always share with everyone. Here, we study the effect of social network-->
<!-- structure in a group of DQN agents playing the <a href='https://littlealchemy2.com/'> Little Alchemy game </a>.-->
<!--</p></td>-->
<!--</tbody>-->
<!--</table>-->
</ul>
<h2 id="framework"> A conceptual framework for linking human and artificial ecologies </h2>
<p> We recently proposed <dt-cite key="nisioti_grounding_2021"> </dt-cite> <dt-cite key="clement_thesis"> </dt-cite> that a possible pathway
to getting artificial agents that exhibit the ability to continuously acquire skills in an open-ended way is to find inspiration in a biological
species with an impressively open-ended behavioral repertoire: our own.
</p>
<p> Our methodology was three-step: </p>
<ol type="1">
<li style="..."> skim the Human Behavioral Ecology (HBE) literature to identify the key factors that this
community proposes as conducive for the evolution of the uniquely diverse human cultural repertoire
</li>
<li style="..."> identify similar research questions and hypotheses in AI, in particular the sub-fields of
meta and multi-agent reinforcement learning
</li>
<li style="..."> abstract away the particularities of the two fields to arrive at a conceptual framework that
unifies their terminologies and research agendas.
</li>
</ol>
<p> Here is our conceptual framework, <em> ORIGINS </em>, that our work led to: </p>
<div align="center" style="margin-bottom:40px">
<img class="80" src="public/framework.png" height="465" width="665" alt="ORIGINS" />
<div>
<sub style="display: block; line-height: 1.5em">
<i> <em> ORIGINS </em> is a conceptual framework for studying skill acquisition in both human and artificial ecologies </i></sub>
</div>
</div>
<p> The story that the framework tells, put succinctly, is:
</p>
<p> <em> Environmental complexity, and its evolution, has a key role in skill acquisition.
It bootstraps the emergence of skills at both an individual and a collective level.
At an individual level, it modulates reproduction pressures and creates the need for cognitive mechanisms.
At a collective level, cooperation and competition pressures drive the need for social skills.
The skills themselves then act as drivers for further skill acquisition in two ways:
a) they interact with each other to give rise to the uniquely complex human cultural repertoire b) they modulate
environmental complexity through the process of niche construction.
It is this latter process that makes skill acquisition open-ended, as it creates a positive feedback loop that
continuously complexifies the environment.</em> </p>
<p> <em> ORIGINS </em> can be a useful tool for both the AI and HBE communities: AI researchers can borrow existing hypotheses from
HBE about which environmental conditions affect which skills to appropriately shape their environments. At the same time, HBE researchers can use AI as a computational tool
for studying their hypotheses.</p>
<p> In the rest of the post we describe two of our projects that can be seen as following the ecological approach we are proposing.
We will zoom-in into different parts of the conceptual framework, discuss the ecological hypotheses they were inspired from,
and explain the computational models we designed to study them.
</p>
<h2 id="Study-A"> Evolution of adaptability in complex environments </h2>
<div style="clear: both;float: right; margin-top: 10px; margin-left: 10px; margin-right: calc((100vw - 1500px) / 2 + 168px);
width: calc((1500px - 648px) / 2 - 24px);">
<img src="public/part1.png" alt="scheme" style="width:90%">
<sub style="display: block; line-height: 1.5em">
<i> In this study we focus on the effect of environmental complexity on individual adaptation
</i> </sub>
</div>
<div align="center" style="margin-bottom:40px">
<img class="80" src="public/earth_species.gif" height="465" width="665" alt="ORIGINS" />
<div>
<sub style="display: block; line-height: 1.5em">
<i> Earth: the apparent diversity of species in our planet could not have existed without an equally impressive diversity in environmental conditions.</i></sub>
</div>
</div>
<p> What makes us human? For HBE researchers, this is not a philosophical inquiry, but a rather pressing scientific
question: why did the first hominin species appear about 5 million years ago <dt-cite key="maslin_synthesis_2015"> </dt-cite> and which skills did they evolve
that allowed them to expand into a population that today holds a powerful position in the Earth's ecosystem?</p>
<p> When we trace back the history of our planet, we find climatic records of very ''busy" periods : many rapid and
large-amplitude climatic cycles have taken place in the last few million years, leading to high levels of climate
change that must have significantly shaped populations at a global and local scale. Our own species appeared
and dispersed during such periods
<dt-cite key="maslin_synthesis_2015"> </dt-cite> <dt-cite key="sanchez_goni_regional_2020"> </dt-cite>
<dt-cite key="potts_hominin_2013"> </dt-cite>.
</p>
<p> Could there be a link between this climatic complexity and the birth of one of the most generalist species?
Understanding the mechanisms with which temporal and spatial diversity in an environment influences individuals and populations
is a crucial step towards answering this question.
</p>.
<p> A similar story has been unfolding in AI, where the first impressive agents were extremely specialized, solving a single task like Chess.
But recently, the focus is on generalist agents that can adapt to changes in tasks that they have never encountered during their design.
</p>
<p> The consensus of the community is that, when it comes to creating generalist agents,
the environments used for training play a big part.
In meta-learning <dt-cite key="vanschoren_meta-learning_2018"> </dt-cite>, the focus is on ensuring a wide diversity of environments.
In curriculum learning <dt-cite key="curriculumNarvekar2020"> </dt-cite> the focus is on the order in which environments are encountered,
as the training procedure needs to ensure that agents are continuously challenged but are able to improve.
</p>
<div align="center" style="margin-bottom:40px">
<img class="80" src="public/benchmark_evo.png" height="465" width="665" alt="ORIGINS" />
<div>
<sub style="display: block; line-height: 1.5em">
<i> Benchmarks used to evaluate agents are becoming increasingly complex and diverse: chess and Go <dt-cite key="silver2017MasteringChessShogi"> </dt-cite>
require strategic reasoning in a large-dimensional space, Atari games <dt-cite key="mnihPlayingAtariDeep"> </dt-cite>
pose diverse challenges such as partial observability, sparse rewards and were one of the first benchmarks where agents
operated solely based on pixel values.
Multi-agent environments initially tested for a single skill, such as cooperation in foraging tasks <dt-cite key="perolatMultiagentReinforcementLearning2017b"> </dt-cite>.
XLand <dt-cite key="openendedlearningteamOpenEndedLearningLeads2021"> </dt-cite> is a vast world of single-agent and multi-agent tasks that require navigation and object manipulation in 3D space.
Minecraft <dt-cite key="guss2019MineRLLargeScaleDataset"> </dt-cite> comes very close to our need for open-ended environments, as the agent can craft items and
continuously complexify the environment.
</i></sub>
</div>
</div>
<p>
These are the ideas that motivated the computational study we describe next <dt-cite key="nisioti_plasticity_2022"> </dt-cite>,
where we attempt to understand how the complexity of environments,
both in terms of their temporal dynamics and task diversity, acts as a driver for generalism.
</p>
<h3> An eco-evo-devo study of the emergence of generalist agents</h3>
<div style="clear: both;float: right; margin-top: 10px; margin-left: 10px; margin-right: calc((100vw - 1500px) / 2 + 168px);
width: calc((1500px - 648px) / 2 - 24px);">
<img src="public/tolerance_curves.png" alt="scheme" style="width:90%">
<sub style="display: block; line-height: 1.5em">
<i> An agent with low plasticity (on the left) has small σ and a high peak at their preferred niche, while a
plastic individual (on the right) has large σ and a lower peak at their preferred niche. Fitness in a
certain environmental state is computed as the probability density function of the distribution at that point
the plastic individual has lower fitness (cost of plasticity). If the actual environmental state differs
significantly from the preferred one the plastic individual has higher fitness.
</i> </sub>
</div>
<h4> The model </h4>
<p> In this project we designed an environment that exhibits both temporal and spatial diversity in terms of resource availability.
The amount of resources depends on the latitude of a niche and a climate function that changes with time.
Agents have three ways to adapt to their environment, all encoded in their genome: a preferred environmental state,
phenotypic plasticity and a mutation rate.
Thus, agents can adapt at two time-scales: phenotypic plasticity is a developmental mechanism that enables survival
in diverse environments within
a single lifetime, while mutations enable adaptation at a slower, evolutionary time-scale.
</p>
<p> To model phenotypic plasticity we have adopted tolerance curves, a tool originally developed in ecology <dt-cite key="grove_evolution_2014"> </dt-cite>.
Tolerance curves have the form of a Gaussian whose mean corresponds to the preferred environmental state of an individual and
variance to its plasticity, i.e., its ability to survive under different environmental conditions. </p>
<p> Tolerance curves elegantly capture the cost and benefit of plasticity: if a plastic and non-plastic agent compete
in an environment that is identical to their preferred niche, the plastic one will lose as its peak is lower. But
if, for some reason, the environment changes in the next generation so that it differs significantly from the preferred niche
of the two individuals, the plastic one will be at an advantage.
</p>
<!--<p> The world consists of a number of niches N and a climate function that shows how the state of all niches changes with time.-->
<!--The state of a niche at a given generation depends on the value of the climate function and the latitude of the niches,-->
<!--with southern niches having better states.</p>-->
<p> At each generation, an agent surviving in a niche can reproduce until the capacity of the niche is filled.
Agents are chosen based on their fitness, which depends on their preferred state, their plasticity and the state of the niche.
If an agent can survive in multiple niches it will be eligible for reproduction in all of them, and thus, will have higher chances of reproducing.
We refer to this selection mechanism as <i> niche-limited competition </i>.
We also compared against a condition where selection does not happen independently in each niche, but agents compete against everyone based on their average
fitness across all niches.
This condition corresponds to survival-of-the-fittest, the most widely used evolutionary algorithm.
</p>
<h4> Results </h4>
<p> Here is an illustration of how the population behaves in a world with 100 niches and a climate function that has the
form of a sinusoid when the selection mechanism is niche-limited competition:</p>
<div align="center" style="margin-bottom:40px">
<video style="width:120%" controls>
<source src="public/Gecco.mp4" type=video/mp4>
</video>
<!--<img class="80" src="public/Gecco.gif" width="120%" alt="toleranceCurve" />-->
<div>
<sub style="display: block; line-height: 1.5em">
<i> Illustration of a simulation in our environment. The population evolves under niche-limited competition in an environment where south niches have higher
quality than norther niches.
</i>
</sub>
</div>
</div>
<div align="center" style="clear: both;float: right; margin-top: 10px; margin-left: 10px; margin-right: calc((100vw - 1500px) / 2 + 168px);
width: calc((1500px - 648px) / 2 - 24px);">
<!--<video style="width:120%" controls>-->
<!-- <source src="public/original.mp4" type=video/mp4>-->
<!--</video>-->
<img src="public/ecoevojax.gif" alt="scheme" style="width:90%">
<sub style="display: block; line-height: 1.5em">
<i> Visualization of the environment we used to study foraging behaviors at scale <dt-cite key="gautier2023EcoevolutionaryDynamicsNonepisodic"> </dt-cite>.
Resources are in green and their regeneration frequency is higher in the south, while agents are in black. (See <a href="https://www.youtube.com/watch?v=LiTdUp8rOic&ab_channel=AnoNymous"> this video</a> for a visualization
of the whole duration of training.)
</i>
</sub>
</div>
<p> In this study (that you can read more about <a href="https://dl.acm.org/doi/abs/10.1145/3512290.3528826"> in our paper </a>) we saw that adaptability emerges when the number of niches is large or when there is temporal
variability.
We also saw that some environmental conditions and selection pressures may favor plasticity, while others favor
evolvability.
We also saw that limiting competition within niches is an important driver of generalism, at a time when most meta RL works use survival-of-the-fittest.
</p>
<p> If there is one take-away message that we would like to distill from this study for the AI community that is <a href="https://twitter.com/pyoudeyer/status/1664240739000414213?ref_src=twsrc%5Etfw%7Ctwcamp%5Etweetembed%7Ctwterm%5E1664240739000414213%7Ctwgr%5E3109f952a8b279eb218b53ff4621fc40a230c5ab%7Ctwcon%5Es1_&ref_url=https%3A%2F%2Froamresearch.com%2F%2Fapp%2Feleni%2Fpage%2FbxjHrKM0F"> </a> that there is no such thing as a generalist agent.
Discussions of the generality of an agent need to consider its ecological niche.
This niche is necessarily bounded because, to occupy it, an agent needs to spend time and energy.
What is more, an agent will be only as generalist as its personal history and environment require.
Even if we design an agent with powerful cognitive mechanisms, such as a large artificial neural network, generalisation will not emerge
unless the spatiotemporal dynamics of the environment require it.
</p>
<!--<h4> Next steps </h4>-->
<p>
Do our conclusions generalize to meta-RL agents?
Transferring our computational study to a grounded environment, where plasticity
is not hard-coded but emerges out of a behavioral policy is an important next step towards showcasing the importance of
ecological dynamics.
For this reason we implemented a large-scale environment, where populations of thousands of agents can forage resources
in a grid-world with resource density that varies in space and time <dt-cite key="gautier2023EcoevolutionaryDynamicsNonepisodic"> </dt-cite>.
</p>
<p> We now zoom-out of the link between environmental dynamics and adaptability in our conceptual framework and move our gaze to the right of the framework schematic above.
There we meet the cultural repertoire, a set of rather advanced skills that presuppose the existence of both individual cognitive mechanisms and social dynamics.
Next, we will describe a study of how multi-agent dynamics influence the formation of a cultural repertoire.
</p>
<h2 id="sapiens"> The effect of social connectivity on collective innovation </h2>
<div style="clear: both;float: right; margin-top: 10px; margin-left: 10px; margin-right: calc((100vw - 1500px) / 2 + 168px);
width: calc((1500px - 648px) / 2 - 24px);">
<img src="public/part2.png" alt="scheme" style="width:90%">
<sub style="display: block; line-height: 1.5em">
<i> In this study we focus on the effect of social dynamics on the collective innovation of learning agents
</i> </sub>
</div>
<div align="center" style="margin-bottom:40px">
<img class="80" src="public/culture.gif" alt="ORIGINS" style="width:90%" />
<div>
<sub style="display: block; line-height: 1.5em">
<i> Learning through and alongside others is a behavior encountered in many individuals, including artificial ones.
What role do social dynamics like group connectivity play and can we find similarities between biological and artificial social learning?
</i></sub>
</div>
</div>
<p> Culture, the ability to create and spread traditions through social learning, is often seen as a monopoly of our own species.
But there are many species that learn through others <dt-cite key="whiten1999CulturesChimpanzees"> </dt-cite>.
Few of them can change their cultural skills with time <dt-cite key="tyarks2022ChangesHumpbackWhale"> </dt-cite>.
Some species can even accumulate changes with time, which leads to a continuous “complexification” of their skills <dt-cite key="sasaki2017CumulativeCultureCan"> </dt-cite>.
What is unique in humans, however, is the intensity of accumulation.
From programming languages to musical instruments <dt-cite key="solee2013EvolutionaryEcologyTechnological"> </dt-cite>, the fossil record of human innovations has a rather intricate, tree structure, with new innovations arising out
of recombination of existing ones.
</p>
<div align="center" style="clear: both;float: right; margin-top: 10px; margin-left: 10px; margin-right: calc((100vw - 1500px) / 2 + 168px);
width: calc((1500px - 648px) / 2 - 24px);">
<img src="public/innovation_networks.png" alt="scheme" style="width:90%">
<sub style="display: block; line-height: 1.5em">
<i> Social networks characterizing human collective innovation often have a clustered structure.
On the top, reconstructed regional networks of cultural artifacts in Africa about 350 thousand years ago <dt-cite key="migliano_origins_2022"> </dt-cite>.
On the bottom, the citation network of a recent research paper (Image credit <a href="https://www.connectedpapers.com/"> connected papers </a>).
Does this pattern suggest a link between social connectivity and the ability to innovate collectively?
</i>
</sub>
</div>
<p> Why do humans innovate to such an unprecendented degree?
Some theories point to our increased cognitive capacity or sociality <dt-cite key="klinePopulationSizePredicts2010"> </dt-cite>.
But others point to the benefits of our social connectivity: human societies often self-organize into small-world, hierarchical or dynamic
topologies. The common feature underlying those is their ability to protect information within sub-parts of the group.
In this way, the group maintains its cultural diversity, and it is this diversity that may be driving the continuous appearance of innovations.
</p>
<p> Reinforcement learning, the sub-field of AI concerned with learning by interacting with an environment,
is also interested in the benefits of collective exploration.
In distributed RL, multiple agents solve a task in parallel and exchange information on the way <dt-cite key="mnihAsynchronousMethodsDeep2016a"></dt-cite>
<dt-cite key="horganDistributedPrioritizedExperience2018"> </dt-cite>
<dt-cite key="espeholt_impala_2018"> </dt-cite>.
This has shown benefits both in terms of the quality of the final solution and the speed at which it is discovered.
</p>
<p> Yet this community has considered a single type of group connectivity, the star topology:
all agents interact with the environment to collect information and then share it with a single central node responsible for
processing the new information and updating the behavior of the agents.
Thus, artificial groups look very different from the partially-connected human groups.
If this structure reduces their collective diversity, it may negatively impact the ability of these groups to solvex innovation challenges.
</p>
<p>
This is the idea that motivated the computational study we describe next <dt-cite key="nisioti2022SocialNetworkStructure"> </dt-cite>,
where groups of RL agents solve innovation tasks under different topologies.
</p>
<h3> SAPIENS: a distributed RL framework where social connectivity matters </h3>
<!-- <div style="clear: both;float: right; margin-top: 10px; margin-left: 10px; margin-right: calc((100vw - 1500px) / 2 + 168px);-->
<!--width: calc((1500px - 648px) / 2 - 24px);">-->
<!-- <img src="public/sharing_DQN.svg" alt="scheme" style="width:90%">-->
<!--<p> Schematic of a SAPIENS algorithm with 2 DQN learners exchanging experiences from their replay buffer.-->
<!--</p>-->
<!--</div>-->
<h4> The model </h4>
<p> To study innovation we employed Wordcraft <dt-cite key="jiangWordCraftEnvironmentBenchmarking2020"> </dt-cite>,
an RL text-world inspired from the Little Alchemy 2 game.
The player starts with a set of elements, for example "fire", "water" and "earth" and can combine them in pairs to craft new elements.
Not all combinations give rise to new elements; the description of a task includes a list of the possible combinations.
</p>
<div align="center" style="margin-bottom:40px">
<img class="80" src="public/littlealchemy.gif" alt="ORIGINS" style="width:100%" />
<div>
<sub style="display: block; line-height: 1.5em">
<i> Our environment is a text-based version of the Little Alchemy 2 game, where a player combines elements to form new elements.
</i></sub>
</div>
</div>
<div align="center" style="clear: both;float: right; margin-top: 10px; margin-left: 10px; margin-right: calc((100vw - 1500px) / 2 + 168px);
width: calc((1500px - 648px) / 2 - 24px);">
<img src="public/task_illustration.png" alt="scheme" style="width:100%">
<sub style="display: block; line-height: 1.5em">
<i> Visualization of our innovation tasks
</i>
</sub>
</div>
<p> We were interested in studying a variety of challenges a group can encounter when solving innovation tasks, so we designed
three different tasks:
<ul>
<li> the single-path task, where new innovations arise as modifications of the most recent past innovations.
Search is easy in this type of task.
The challenge lies in solving it as quickly as possible.
We are therefore interested in the speed at which groups innovate.
This task can be used to model arms races, such as the invention of aircrafts and their continuous improvement to machine gun-mounted planes like
<a href="https://en.wikipedia.org/wiki/Fokker_D.VII"> the Fokker during the First World War </a> .
</li>
<li> the merging-paths task, where two identical paths merge into a more rewarding path.
This task contains a strong local optima: to find the more rewarding path one needs to start exploring both other paths instead of reaching the end of one of them.
It can be useful to model the recombination of innovations that often happens in technological progress, as for example in the evolution of gasoline.
The distillation of petroleum can be seen as one innovation path, that gave us the inventions of kerosine and gasoline.
Kerosine attracted a lot of attention but gasoline was ignored as a volatile by-product.
This was until another innovation path gave us the internal combustion engine.
In itself this engine was not very popular, but when combined with gasoline, it changed the world.
</li>
<li> the best-of-ten paths task, with nine identical paths and one path that is the most rewarding.
Here, the challenge lies in exploring a large search space.
Large search spaces are inherent in many fields of innovation, such as biology and medicine.
</li>
</ul>
</p>
<div align="center" style="clear: both;float: right; margin-top: 10px; margin-left: 10px; margin-right: calc((100vw - 1500px) / 2 + 168px);
width: calc((1500px - 648px) / 2 - 24px);">
<img src="public/agents.png" alt="scheme" style="width:100%">
<sub style="display: block; line-height: 1.5em">
<i> Visualization of two RL agents interacting with the environment and sharing experiences
</i>
</sub>
</div>
<p> In our experiments, a game is played by a group of RL agents.
An RL agent embodies the paradigm of learning through trial and error.
It interacts with an environment by executing <i> actions </i> and receiving <i> observations</i> and <i> rewards</i>.
For example, in a Little Alchemy 2 task, an agent defines which elements it will combine and the environment returns the newly created elements and the points added to the player's score.
The main idea in RL is that an agent can learn a policy that solves a task optimally if it is exposed to multiple trials of it and uses this experience to maximize
the rewards it receives.
</p>
<p> The RL algorithm that we employed was DQN <dt-cite key="mnihPlayingAtariDeep"> </dt-cite>, a deep RL algorithm where
an agent has an explicit memory of past experiences that is periodically sampled
to update its policy.
These experiences have the form <i> [observation, action, reward] </i> and are also employed for sharing information with others.
When we say that an agent shares an experience with another agent we mean that it samples a random experience from its memory
and directly inserts it to the other's memory.
</p>
<p> An agent shares experiences only with its neighbors, which are determined by the social connectivity of the group.
In our study, we considered the following social connectivities:
</p>
<div align="center" style="margin-bottom:40px">
<img class="80" src="public/Sapiens.gif" style="width:100%" alt="ORIGINS" />
<div>
<sub style="display: block; line-height: 1.5em">
<i> The social connectivities we studied empirically. The dynamic connectivity is inspired from a previous study in human
ecology <dt-cite key="derexPartialConnectivityIncreases2016"> </dt-cite></i></sub>
</div>
</div>
<h4> Results </h4>
<p>
Our empirical evaluation of the four types of social connectivitues showed that fully-connected groups perform worse in all three tasks.
In the single-path task, they are the slowest to find the optimal solution.
In the merging-paths task, they converge to the local optima.
In the best-of-ten paths, they explore too slowly to discover the optimal path.
</p>
<p>
To explain these behaviors we measured the diversity of memories that agents have.
We observed that agents in fully-connected groups had high diversity as individuals, but the group as a whole had the lowest diversity.
Low diversity indicates that the group is collectively exploring a small party of the search space and can thus explain the failure of fully-connected groups.
</p>
<p> The animation below illustrates how a group with partial connectivity and full connectivity behave in the merging-paths task.
As we see, agents in fully-connected group end up all exploring the same branch and fail to discover the global optimum:
</p>
<div align="center" style="margin-bottom:40px">
<!--<img class="80" src="public/Recipe.g" style="width:150%" alt="ORIGINS" />-->
<video style="width:100%" controls>
<source src="public/Recipe.mp4" type=video/mp4>
</video>
<div>
<sub style="display: block; line-height: 1.5em">
<i> Illustration of how a dynamic group is better at avoiding local optima compared to a fully-connected group.
</i></sub>
</div>
</div>
<p> For more detailed results, including performances of the different baselines on all tasks, take a look <a href="https://arxiv.org/abs/2206.05060"> at our paper </a>, written
in collaboration with Ida Mommenejad from Microsoft Research.</p>
<p>
Do our empirical conclusions generalize to more complex environments, such as grid-worlds and Atari games?
And how can we use our new understanding to guide the design of social connectivity for improving performance in certain tasks?
These are examples of the research questions we plan to explore in future work.
</p>
<h2> Connecting the dots </h2>
<p> With these two works we showed how our research methodology can benefit two very different fields: we started from
hypotheses in ecology, designed computational studies inspired from them and derived empirical conclusions that can feed
future studies both in ecology and AI.
</p>
<p> At a first glance our two studies may seem disconnected.
One is concerned with the effect of ecological dynamics on a populations' dispersal and adaptability and the other with
the effect of social connectivity on collective innovation.
But isn't the social connectivity of a population related to its dispersal?
This observation allows us to draw a link between ecological dynamics and collective innovation.
Can certain ecological conditions favor certain social connectivities and what would this mean for the evolution of our species?
</p>
<p> This connection may not be far-fetched. Ecological studies posit that elements in our environment such as the Sahara desert act as
ecological barriers that prohibit immigration but can disappear during periods of extreme climatic variability
(<a href="https://www.youtube.com/watch?v=ZQP-7BPvvq0"> did you know the Sahara was green a few tens of thousands
of years ago?) </a> <dt-cite key="larrasoana_dynamics_2013"> </dt-cite>.
And studies of human cultural innovation posit that such barriers played an important part
in the spread of cultural artifacts <dt-cite key="derricoIdentifyingEarlyModern2017"> </dt-cite>.
</p>
<div align="center" style="margin-bottom:40px">
<img src="public/ending.png" alt="scheme" style="width:100%">
<sub style="display: block; line-height: 1.5em">
<i> Would simulation environments with RL agents navigating a world where diverse continents are separated
by oceans lead to artificial societies that remind us of our own?
</i></sub>
</div>
</dt-article>
<dt-appendix>
</dt-appendix>
<script type="text/bibliography">
@article{curriculumNarvekar2020,
author = {Sanmit Narvekar and
Bei Peng and
Matteo Leonetti and
Jivko Sinapov and
Matthew E. Taylor and
Peter Stone},
title = {Curriculum Learning for Reinforcement Learning Domains: A Framework
and Survey},
journal = {CoRR},
volume = {abs/2003.04960},
year = {2020},
url = {https://arxiv.org/abs/2003.04960},
eprinttype = {arXiv},
eprint = {2003.04960},
timestamp = {Sun, 18 Dec 2022 19:02:47 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2003-04960.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{derricoIdentifyingEarlyModern2017,
title = {Identifying Early Modern Human Ecological Niche Expansions and Associated Cultural Dynamics in the South African Middle Stone Age},
author = {d'Errico, Francesco and Banks, William E. and Warren, Dan L. and Sgubin, Giovanni and van Niekerk, Karen and Henshilwood, Christopher and Daniau, Anne-Laure and Sanchez Goni, Maria Fernanda},
year = {2017},
month = jul,
journal = {Proceedings of the National Academy of Sciences},
volume = {114},
number = {30},
pages = {7869--7876},
issn = {0027-8424, 1091-6490},
doi = {10.1073/pnas.1620752114},
urldate = {2022-11-30},
}
@article{larrasoana_dynamics_2013,
title = {Dynamics of Green Sahara Periods and Their Role in Hominin Evolution},
volume = {8},
issn = {1932-6203},
url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0076514},
doi = {10.1371/journal.pone.0076514},
language = {en},
number = {10},
urldate = {2022-12-14},
journal = {PLOS ONE},
author = {Larrasoaña, Juan C. and Roberts, Andrew P. and Rohling, Eelco J.},
month = oct,
year = {2013},
note = {Publisher: Public Library of Science},
pages = {e76514},
}
@article{derexPartialConnectivityIncreases2016,
title = {Partial connectivity increases cultural accumulation within groups},
volume = {113},
issn = {0027-8424, 1091-6490},
url = {http://www.pnas.org/lookup/doi/10.1073/pnas.1518798113},
doi = {10.1073/pnas.1518798113},
language = {en},
number = {11},
urldate = {2021-02-07},
journal = {Proceedings of the National Academy of Sciences},
author = {Derex, Maxime and Boyd, Robert},
month = mar,
year = {2016},
pages = {2982--2987},
}
@article{jiangWordCraftEnvironmentBenchmarking2020,
title = {WordCraft: An Environment for Benchmarking Commonsense Agents},
shorttitle = {WordCraft},
url = {http://arxiv.org/abs/2007.09185},
language = {en},
urldate = {2021-02-10},
journal = {arXiv:2007.09185 [cs]},
author = {Jiang, Minqi and Luketina, Jelena and Nardelli, Nantas and Minervini, Pasquale and Torr, Philip H. S. and Whiteson, Shimon and Rocktäschel, Tim},
month = jul,
year = {2020},
note = {arXiv: 2007.09185},
}
@article{mnihAsynchronousMethodsDeep2016a,
title = {Asynchronous Methods for Deep Reinforcement Learning},
url = {http://arxiv.org/abs/1602.01783},
language = {en},
urldate = {2022-02-09},
journal = {arXiv:1602.01783 [cs]},
author = {Mnih, Volodymyr and Badia, Adrià Puigdomènech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy P. and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
month = jun,
year = {2016},
note = {arXiv: 1602.01783},
keywords = {Computer Science - Machine Learning},
}
@misc{nisioti2022SocialNetworkStructure,
title = {Social Network Structure Shapes Innovation: Experience-sharing in RL with SAPIENS},
author = {Nisioti, Eleni and Mahaut, Mateo and Oudeyer, Pierre-Yves and Momennejad, Ida and Moulin-Frier, Clement},
year = {2022},
month = nov,
number = {arXiv:2206.05060},
eprint = {2206.05060},
primaryclass = {cs},
publisher = {arXiv},
doi = {10.48550/arXiv.2206.05060},
urldate = {2023-03-13},
archiveprefix = {arxiv},
}
@techreport{espeholt_impala_2018,
title = {IMPALA: Scalable Distributed Deep-RL with Importance Weighted Actor-Learner Architectures},
shorttitle = {IMPALA},
url = {http://arxiv.org/abs/1802.01561},
language = {en},
number = {arXiv:1802.01561},
urldate = {2022-05-15},
institution = {arXiv},
author = {Espeholt, Lasse and Soyer, Hubert and Munos, Remi and Simonyan, Karen and Mnih, Volodymir and Ward, Tom and Doron, Yotam and Firoiu, Vlad and Harley, Tim and Dunning, Iain and Legg, Shane and Kavukcuoglu, Koray},
month = jun,
year = {2018},
note = {arXiv:1802.01561 [cs]},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning},
}
@article{horganDistributedPrioritizedExperience2018,
title = {Distributed Prioritized Experience Replay},
url = {http://arxiv.org/abs/1803.00933},
language = {en},
urldate = {2021-04-26},
journal = {arXiv:1803.00933 [cs]},
author = {Horgan, Dan and Quan, John and Budden, David and Barth-Maron, Gabriel and Hessel, Matteo and van Hasselt, Hado and Silver, David},
month = mar,
year = {2018},
note = {arXiv: 1803.00933},
keywords = {Computer Science - Machine Learning},
}
@article{klinePopulationSizePredicts2010,
title = {Population Size Predicts Technological Complexity in Oceania},
author = {Kline, Michelle A. and Boyd, Robert},
year = {2010},
month = aug,
journal = {Proceedings of the Royal Society B: Biological Sciences},
volume = {277},
number = {1693},
pages = {2559--2564},
issn = {0962-8452, 1471-2954},
doi = {10.1098/rspb.2010.0452},
urldate = {2021-03-01},
}
@article{solee2013EvolutionaryEcologyTechnological,
title = {The Evolutionary Ecology of Technological Innovations},
author = {Solee, Ricard V. and Valverde, Sergi and Casals, Marti Rosas and Kauffman, Stuart A. and Farmer, Doyne and Eldredge, Niles},
year = {2013},
journal = {Complexity},
volume = {18},
number = {4},
pages = {15--27},
issn = {1099-0526},
doi = {10.1002/cplx.21436},
urldate = {2023-02-18},
}
@article{sasaki2017CumulativeCultureCan,
title = {Cumulative Culture Can Emerge from Collective Intelligence in Animal Groups},
author = {Sasaki, Takao and Biro, Dora},
year = {2017},
month = apr,
journal = {Nature Communications},
volume = {8},
number = {1},
pages = {15049},
publisher = {Nature Publishing Group},
issn = {2041-1723},
doi = {10.1038/ncomms15049},
urldate = {2023-05-15},
copyright = {2017 The Author(s)},
}
@article{tyarks2022ChangesHumpbackWhale,
title = {Changes in Humpback Whale Song Structure and Complexity Reveal a Rapid Evolution on a Feeding Ground in Northern Norway},
author = {Tyarks, Saskia C. and Aniceto, Ana S. and Ahonen, Heidi and Pedersen, Geir and Lindstrom, Ulf},
year = {2022},
journal = {Frontiers in Marine Science},
volume = {9},
issn = {2296-7745},
urldate = {2023-05-15},
}
@article{whiten1999CulturesChimpanzees,
title = {Cultures in Chimpanzees},
author = {Whiten, A. and Goodall, J. and McGrew, W. C. and Nishida, T. and Reynolds, V. and Sugiyama, Y. and Tutin, C. E. G. and Wrangham, R. W. and Boesch, C.},
year = {1999},
month = jun,
journal = {Nature},
volume = {399},
number = {6737},
pages = {682--685},
publisher = {Nature Publishing Group},
issn = {1476-4687},
doi = {10.1038/21415},
urldate = {2023-02-18},
copyright = {1999 Macmillan Magazines Ltd.},
}
@article{migliano_origins_2022,
title = {The origins of human cumulative culture: from the foraging niche to collective intelligence},
volume = {377},
shorttitle = {The origins of human cumulative culture},
url = {https://royalsocietypublishing.org/doi/10.1098/rstb.2020.0317},
doi = {10.1098/rstb.2020.0317},
number = {1843},
urldate = {2022-05-18},
journal = {Philosophical Transactions of the Royal Society B: Biological Sciences},
author = {Migliano, Andrea Bamberg and Vinicius, Lucio},
month = jan,
year = {2022},
note = {Publisher: Royal Society},
keywords = {cultural evolution, cumulative culture, hominins, Homo sapiens, hunter–gatherers},
pages = {20200317},
}
@misc{gautier2023EcoevolutionaryDynamicsNonepisodic,
title = {Eco-Evolutionary Dynamics of Non-episodic Neuroevolution in Large Multi-agent Environments},
author = {Hamon, Gautier and Nisioti, Eleni and Moulin-Frier, Clement},
year = {2023},
month = feb,
number = {arXiv:2302.09334},
eprint = {2302.09334},
primaryclass = {cs},
publisher = {arXiv},
doi = {10.48550/arXiv.2302.09334},
urldate = {2023-03-12},
}
@article{grove_evolution_2014,
title = {Evolution and dispersal under climatic instability: a simple evolutionary algorithm},
volume = {22},
issn = {1059-7123, 1741-2633},
shorttitle = {Evolution and dispersal under climatic instability},
url = {http://journals.sagepub.com/doi/10.1177/1059712314533573},
doi = {10.1177/1059712314533573},
language = {en},
number = {4},
urldate = {2021-05-10},
journal = {Adaptive Behavior},
author = {Grove, Matt},
month = aug,
year = {2014},
pages = {235--254},
}
@inproceedings{nisioti_plasticity_2022,
address = {Boston / Hybrid, United States},
title = {Plasticity and evolvability under environmental variability: the joint role of fitness-based selection and niche-limited competition},
shorttitle = {Plasticity and evolvability under environmental variability},
url = {https://hal.archives-ouvertes.fr/hal-03715928},
abstract = {The diversity and quality of natural systems have been a puzzle and inspiration for communities studying artificial life. It is now widely admitted that the adaptation mechanisms enabling these properties are largely influenced by the environments they inhabit. Organisms facing environmental variability have two alternative adaptation mechanisms operating at different timescales: {\textbackslash}textit\{plasticity\}, the ability of a phenotype to survive in diverse environments and {\textbackslash}textit\{evolvability\}, the ability to adapt through mutations. Although vital under environmental variability, both mechanisms are associated with fitness costs hypothesized to render them unnecessary in stable environments. In this work, we study the interplay between environmental dynamics and adaptation in a minimal model of the evolution of plasticity and evolvability. We experiment with different types of environments characterized by the presence of niches and a climate function that determines the fitness landscape. We empirically show that environmental dynamics affect plasticity and evolvability differently and that the presence of diverse ecological niches favors adaptability even in stable environments. We perform ablation studies of the selection mechanisms to separate the role of fitness-based selection and niche-limited competition. Results obtained from our minimal model allow us to propose promising research directions in the study of open-endedness in biological and artificial systems.},
urldate = {2022-12-14},
booktitle = {GECCO 2022 - The Genetic and Evolutionary Computation Conference},
author = {Nisioti, Eleni and Moulin-Frier, Clément},
month = jul,
year = {2022},
}
@misc{guss2019MineRLLargeScaleDataset,
title = {MineRL: A Large-Scale Dataset of Minecraft Demonstrations},
author = {Guss, William H. and Houghton, Brandon and Topin, Nicholay and Wang, Phillip and Codel, Cayden and Veloso, Manuela and Salakhutdinov, Ruslan},
year = {2019},
month = jul,
number = {arXiv:1907.13440},
eprint = {1907.13440},
primaryclass = {cs, stat},
publisher = {arXiv},
doi = {10.48550/arXiv.1907.13440},
urldate = {2023-05-15},
}
@inproceedings{perolatMultiagentReinforcementLearning2017b,
title = {A Multi-Agent Reinforcement Learning Model of Common-Pool Resource Appropriation},
booktitle = {Advances in Neural Information Processing Systems 30 (NIPS 2017)},
author = {Perolat, Julien and Leibo, Joel Z. and Zambaldi, Vinicius and Beattie, Charles and Tuyls, Karl and Graepel, Thore},
year = {2017},
pages = {3643--3652},
urldate = {2018-12-10},
}
@misc{silver2017MasteringChessShogi,
title = {Mastering Chess and Shogi by Self-Play with a General Reinforcement Learning Algorithm},
author = {Silver, David and Hubert, Thomas and Schrittwieser, Julian and Antonoglou, Ioannis and Lai, Matthew and Guez, Arthur and Lanctot, Marc and Sifre, Laurent and Kumaran, Dharshan and Graepel, Thore and Lillicrap, Timothy and Simonyan, Karen and Hassabis, Demis},
year = {2017},
month = dec,
number = {arXiv:1712.01815},
eprint = {1712.01815},
primaryclass = {cs},
publisher = {arXiv},
}
@techreport{openendedlearningteamOpenEndedLearningLeads2021,
title = {Open-Ended Learning Leads to Generally Capable Agents},
author = {Open Ended Learning Team and Stooke, Adam and Mahajan, Anuj and Barros, Catarina and Deck, Charlie and Bauer, Jakob and Sygnowski, Jakub and Trebacz, Maja and Jaderberg, Max and Mathieu, Michael and McAleese, Nat and Bradley-Schmieg, Nathalie and Wong, Nathaniel and Porcel, Nicolas and Raileanu, Roberta and Hughes-Fitt, Steph and Dalibard, Valentin and Czarnecki, Wojciech Marian},
year = {2021},
month = jul,
number = {arXiv:2107.12808},
eprint = {2107.12808},
primaryclass = {cs},
institution = {arXiv},
doi = {10.48550/arXiv.2107.12808},
urldate = {2022-06-01},
}
@article{mnihPlayingAtariDeep,
title = {Playing Atari with Deep Reinforcement Learning},
author = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Graves, Alex and Antonoglou, Ioannis and Wierstra, Daan and Riedmiller, Martin},
year={2013}
}
@misc{vanschoren_meta-learning_2018,
title = {Meta-{Learning}: {A} {Survey}},
shorttitle = {Meta-{Learning}},
url = {http://arxiv.org/abs/1810.03548},
doi = {10.48550/arXiv.1810.03548},
abstract = {Meta-learning, or learning to learn, is the science of systematically observing how different machine learning approaches perform on a wide range of learning tasks, and then learning from this experience, or meta-data, to learn new tasks much faster than otherwise possible. Not only does this dramatically speed up and improve the design of machine learning pipelines or neural architectures, it also allows us to replace hand-engineered algorithms with novel approaches learned in a data-driven way. In this chapter, we provide an overview of the state of the art in this fascinating and continuously evolving field.},
urldate = {2022-12-14},