diff --git a/experiments/mpsc/config_overrides/cartpole/nl_mpsc_cartpole.yaml b/experiments/mpsc/config_overrides/cartpole/nl_mpsc_cartpole.yaml index d21564c1b..2da499fa9 100644 --- a/experiments/mpsc/config_overrides/cartpole/nl_mpsc_cartpole.yaml +++ b/experiments/mpsc/config_overrides/cartpole/nl_mpsc_cartpole.yaml @@ -10,11 +10,11 @@ sf_config: - 0.5 # MPC Parameters - use_acados: False + use_acados: True horizon: 20 warmstart: True integration_algo: rk4 - use_terminal_set: True + use_terminal_set: False # Prior info prior_info: diff --git a/experiments/mpsc/config_overrides/quadrotor_2D/nl_mpsc_quadrotor_2D.yaml b/experiments/mpsc/config_overrides/quadrotor_2D/nl_mpsc_quadrotor_2D.yaml index 03ca717ac..c63f3bacf 100644 --- a/experiments/mpsc/config_overrides/quadrotor_2D/nl_mpsc_quadrotor_2D.yaml +++ b/experiments/mpsc/config_overrides/quadrotor_2D/nl_mpsc_quadrotor_2D.yaml @@ -12,11 +12,11 @@ sf_config: - 0.5 # MPC Parameters - use_acados: False + use_acados: True horizon: 20 warmstart: True integration_algo: rk4 - use_terminal_set: True + use_terminal_set: False # Prior info prior_info: diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/loss/approx_kl.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/loss/approx_kl.log index 535b6e82a..410acf535 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/loss/approx_kl.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/loss/approx_kl.log @@ -1,101 +1,101 @@ step,loss/approx_kl -10000,0.019267342627669375 -20000,0.024363837763667108 -30000,0.014531421413024265 -40000,0.015404991634810966 -50000,0.01967445264259974 -60000,0.011831035542612275 -70000,0.015947554850329955 -80000,0.012423110799863935 -90000,0.015280927252024411 -100000,0.02220213850960135 -110000,0.012349804987510047 -120000,0.01828191550448537 -130000,0.028774391937380035 -140000,0.015258365931610266 -150000,0.02345449778561791 -160000,0.027410029154270894 -170000,0.019542859680950642 -180000,0.019825634950151046 -190000,0.03221142959470551 -200000,0.034784734916562844 -210000,0.030562105650703116 -220000,0.03874263330362737 -230000,0.021048107215513784 -240000,0.023490131444608174 -250000,0.021312880609184508 -260000,0.016016186385725936 -270000,0.02354920314004024 -280000,0.02117116949521005 -290000,0.02161381165497005 -300000,0.019996917573735118 -310000,0.025390521561106043 -320000,0.019103998783975836 -330000,0.023890269749487443 -340000,0.02149774165203174 -350000,0.027560550440102815 -360000,0.019251088829090195 -370000,0.018506101751700043 -380000,0.028167877004792292 -390000,0.03479318170187374 -400000,0.0266167871033152 -410000,0.015584384184330701 -420000,0.023963576337943476 -430000,0.02370765237137675 -440000,0.0196848229970783 -450000,0.01707787923514843 -460000,0.022351718166222174 -470000,0.029980707820504902 -480000,0.01593708001698057 -490000,0.0208636624738574 -500000,0.01895671679327885 -510000,0.02578317878457407 -520000,0.023863097761447228 -530000,0.024787811158845822 -540000,0.031156336826582746 -550000,0.022468352271243934 -560000,0.026645291860525812 -570000,0.024720640030379094 -580000,0.01759668216109276 -590000,0.02706180753496786 -600000,0.0339090638483564 -610000,0.02853210787288844 -620000,0.025294201184685033 -630000,0.02775376898547014 -640000,0.025818154526253544 -650000,0.029325115183989205 -660000,0.030363137849296125 -670000,0.033694293132672706 -680000,0.030928333460663752 -690000,0.01576555427163839 -700000,0.017671081035708386 -710000,0.026511379843577744 -720000,0.00761902214338382 -730000,0.022826692027350264 -740000,0.049169556067014744 -750000,0.031370429896439116 -760000,0.02555970596149563 -770000,0.02349021608630816 -780000,0.02183050780246655 -790000,0.02328371241067847 -800000,0.02474643061868846 -810000,0.031218589407702287 -820000,0.031579947207743916 -830000,0.023295751493424176 -840000,0.02685841120158633 -850000,0.023855931901683412 -860000,0.022539945039898156 -870000,0.024766629251341023 -880000,0.020198249692718186 -890000,0.026560715958476068 -900000,0.016740009219696123 -910000,0.021413230057805772 -920000,0.026581356239815557 -930000,0.0316499317996204 -940000,0.02264405793199937 -950000,0.025666885481526458 -960000,0.01787282700339953 -970000,0.02585337059572339 -980000,0.026152768296500047 -990000,0.01878221851463119 -1000000,0.02222854793071747 +10000,0.018229736884435018 +20000,0.02127453736805668 +30000,0.02370014371505628 +40000,0.02065292401239276 +50000,0.025316688713307185 +60000,0.01826790439275404 +70000,0.025006062878916657 +80000,0.018720928160473705 +90000,0.019625773886218664 +100000,0.013361939182505009 +110000,0.03154762430737416 +120000,0.031806574435904615 +130000,0.02165382777651151 +140000,0.015200028227021298 +150000,0.020803385553881525 +160000,0.014369704714044928 +170000,0.02885335066045324 +180000,0.020664520297820364 +190000,0.02774980144264797 +200000,0.014654031302779914 +210000,0.028576524602249263 +220000,0.03024926499153177 +230000,0.02645964035764336 +240000,0.04079947238788009 +250000,0.017594279488548636 +260000,0.032765278577183685 +270000,0.029457671707496042 +280000,0.015534943140422305 +290000,0.030238619043181342 +300000,0.030985550116747612 +310000,0.033439734640220806 +320000,0.010407944504792491 +330000,0.035565994260832665 +340000,0.01913086099860569 +350000,0.018777612193177147 +360000,0.02521338181880613 +370000,0.026439335926746334 +380000,0.022027451839918895 +390000,0.028303294302895667 +400000,0.017139489576220517 +410000,0.04025533202414711 +420000,0.031722171915074183 +430000,0.014261707291007042 +440000,0.02689499178280433 +450000,0.02678934750147164 +460000,0.03554095750053724 +470000,0.026956252986565225 +480000,0.02715459933194021 +490000,0.029161349792654313 +500000,0.020614128513261675 +510000,0.03876745159116884 +520000,0.0294973047139744 +530000,0.02062256399852534 +540000,0.02528983733306328 +550000,0.019311279663816096 +560000,0.025200433749705552 +570000,0.029168432097261155 +580000,0.025244836416095495 +590000,0.018281504170348246 +600000,0.025952478436132277 +610000,0.027890596538782113 +620000,0.026617188627521198 +630000,0.026978990544254584 +640000,0.023182477817560238 +650000,0.026558673909554888 +660000,0.026973483727003138 +670000,0.038929785757015146 +680000,0.03427885382746656 +690000,0.02805567579343915 +700000,0.030999637239923078 +710000,0.021579861144224805 +720000,0.02489494209488233 +730000,0.02404243256896734 +740000,0.022397541472067434 +750000,0.024323036956290397 +760000,0.019284146217008434 +770000,0.014312991220504043 +780000,0.026891583390533918 +790000,0.024835351140548783 +800000,0.024412206746637823 +810000,0.021250387715796633 +820000,0.025366627704352142 +830000,0.02246172120794654 +840000,0.026730975322425365 +850000,0.03421310999741157 +860000,0.01983120329678059 +870000,0.015500476863235236 +880000,0.013675535159806412 +890000,0.026640326157212256 +900000,0.028744155572106446 +910000,0.025880434301992255 +920000,0.02237929714222749 +930000,0.03249762508397301 +940000,0.024294908437877894 +950000,0.026201037224382162 +960000,0.013028102368116379 +970000,0.032526198371003075 +980000,0.01626911209896207 +990000,0.023152368733038504 +1000000,0.023702411105235414 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/loss/entropy_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/loss/entropy_loss.log index e971c6d69..fefef6eaf 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/loss/entropy_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/loss/entropy_loss.log @@ -1,101 +1,101 @@ step,loss/entropy_loss -10000,-3.783592291673025 -20000,-3.749724864959717 -30000,-3.825554072856903 -40000,-3.9300530910491944 -50000,-4.038460834821066 -60000,-4.087599031130472 -70000,-4.087605730692546 -80000,-4.156779797871907 -90000,-4.1111091613769535 -100000,-4.1141591548919685 -110000,-4.174881807963054 -120000,-4.0822129805882765 -130000,-4.006744766235352 -140000,-4.050142375628154 -150000,-4.050960580507914 -160000,-4.06744339466095 -170000,-4.1185931205749515 -180000,-4.192100222905477 -190000,-4.216858816146851 -200000,-4.275761588414509 -210000,-4.279669944445292 -220000,-4.236900194485982 -230000,-4.22452552318573 -240000,-4.266336520512899 -250000,-4.308406949043274 -260000,-4.3612007459004705 -270000,-4.362849728266398 -280000,-4.307822179794312 -290000,-4.315399026870727 -300000,-4.30090720653534 -310000,-4.3443690776824955 -320000,-4.403918107350668 -330000,-4.424695340792338 -340000,-4.328205291430156 -350000,-4.432089654604594 -360000,-4.5287307341893515 -370000,-4.594285527865091 -380000,-4.678087385495504 -390000,-4.647881857554117 -400000,-4.617446271578471 -410000,-4.652551317214966 -420000,-4.731773583094279 -430000,-4.807478260993958 -440000,-4.736821778615315 -450000,-4.708827241261799 -460000,-4.68938467502594 -470000,-4.691419164339702 -480000,-4.65495731830597 -490000,-4.80546731154124 -500000,-4.856191102663677 -510000,-4.870964407920837 -520000,-4.869671718279521 -530000,-4.862848424911499 -540000,-4.89074026743571 -550000,-4.930573503176371 -560000,-4.932835801442464 -570000,-4.999426364898682 -580000,-4.994794257481894 -590000,-4.996972195307413 -600000,-4.995818090438843 -610000,-5.0700911521911625 -620000,-5.1466048717498785 -630000,-5.192474897702534 -640000,-5.188129957516988 -650000,-5.261360168457031 -660000,-5.309441860516866 -670000,-5.383105508486429 -680000,-5.405433448155721 -690000,-5.43002036412557 -700000,-5.43468603293101 -710000,-5.5271518468856815 -720000,-5.4993197600046795 -730000,-5.439562273025513 -740000,-5.475778444608052 -750000,-5.5216336409250895 -760000,-5.5857444842656445 -770000,-5.609491904576619 -780000,-5.6593504985173535 -790000,-5.724252875645956 -800000,-5.782556207974752 -810000,-5.905972337722778 -820000,-5.947991180419922 -830000,-6.000066884358724 -840000,-6.004981350898742 -850000,-6.0744964202245075 -860000,-6.193298411369324 -870000,-6.217937270800272 -880000,-6.282960240046184 -890000,-6.311434594790141 -900000,-6.409352262814839 -910000,-6.410375483830769 -920000,-6.515099986394246 -930000,-6.463827530543009 -940000,-6.416235287984213 -950000,-6.481866653760275 -960000,-6.519961396853129 -970000,-6.5977703332901 -980000,-6.6386641502380375 -990000,-6.663167524337768 -1000000,-6.695708362261454 +10000,-3.7525172630945844 +20000,-3.7450521349906927 +30000,-3.9033399899800623 +40000,-3.9637731115023294 +50000,-3.9524892012278245 +60000,-3.9905235926310225 +70000,-3.939301574230194 +80000,-4.065708629290262 +90000,-4.097379501660665 +100000,-4.149353249867757 +110000,-4.186742361386616 +120000,-4.208763988812764 +130000,-4.228386282920837 +140000,-4.246629985173543 +150000,-4.228652246793111 +160000,-4.253186154365539 +170000,-4.341217470169068 +180000,-4.304578232765198 +190000,-4.350293405850729 +200000,-4.456865247090658 +210000,-4.5162286837895715 +220000,-4.605994526545207 +230000,-4.591232546170552 +240000,-4.6380350192387905 +250000,-4.58505392074585 +260000,-4.608377265930176 +270000,-4.614946405092876 +280000,-4.7119491020838415 +290000,-4.77703898747762 +300000,-4.810743355751038 +310000,-4.878026254971822 +320000,-4.9163307428359975 +330000,-4.877486340204874 +340000,-4.902164570490519 +350000,-4.955760558446249 +360000,-4.965298636754353 +370000,-5.004945572217305 +380000,-5.002308336893718 +390000,-4.999552098910014 +400000,-5.033946959177653 +410000,-5.166763122876486 +420000,-5.1751739104588825 +430000,-5.241968059539796 +440000,-5.258599837621054 +450000,-5.262419621149698 +460000,-5.312570865948994 +470000,-5.385249241193135 +480000,-5.4606070915857945 +490000,-5.4202710628509525 +500000,-5.448024845123291 +510000,-5.471761655807495 +520000,-5.545347245534261 +530000,-5.616912817955017 +540000,-5.661895338694254 +550000,-5.586144391695659 +560000,-5.619239743550619 +570000,-5.636061882972717 +580000,-5.652188944816589 +590000,-5.702798016866048 +600000,-5.724225854873657 +610000,-5.775075872739156 +620000,-5.789408254623412 +630000,-5.79253789583842 +640000,-5.818220440546671 +650000,-5.857601126035054 +660000,-5.910353390375773 +670000,-5.980622967084249 +680000,-5.99431943098704 +690000,-5.992568882306417 +700000,-6.098023303349813 +710000,-6.139954328536987 +720000,-6.22035961151123 +730000,-6.205275662740072 +740000,-6.328862277666728 +750000,-6.458466315269471 +760000,-6.577422094345093 +770000,-6.63744081656138 +780000,-6.662033836046855 +790000,-6.677245728174846 +800000,-6.749131162961324 +810000,-6.811589956283569 +820000,-6.987385853131613 +830000,-7.025915288925171 +840000,-7.0096252759297695 +850000,-7.0447643280029295 +860000,-7.044905511538187 +870000,-7.088141576449077 +880000,-7.1306557496388745 +890000,-7.208000508944193 +900000,-7.238578621546428 +910000,-7.345711628595988 +920000,-7.444418684641519 +930000,-7.43301142056783 +940000,-7.5822496334711715 +950000,-7.626623765627544 +960000,-7.700907468795778 +970000,-7.661727062861123 +980000,-7.68259261449178 +990000,-7.6578941583633435 +1000000,-7.592430408795674 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/loss/policy_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/loss/policy_loss.log index 179ab2cba..c7a4b18e3 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/loss/policy_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/loss/policy_loss.log @@ -1,101 +1,101 @@ step,loss/policy_loss -10000,-0.019336815562360454 -20000,-0.012958686189612508 -30000,-0.010106932388859879 -40000,-0.016559636873563127 -50000,-0.01085027394491524 -60000,-0.011096255348449423 -70000,-0.015601859652333466 -80000,-0.019114009293171023 -90000,-0.013711898021893276 -100000,-0.011062247384896421 -110000,-0.011730799119164598 -120000,-0.02144345477679043 -130000,-0.009183625764727983 -140000,-0.01880956703201349 -150000,-0.02474470306748968 -160000,-0.001934095567410056 -170000,-0.011427797900979482 -180000,-0.013506900932670384 -190000,-0.015213663196205474 -200000,-0.01382831642800246 -210000,-0.007302870840828103 -220000,-0.011634113379802273 -230000,-0.007532063740354372 -240000,-0.018059479775531687 -250000,-0.014410319706226823 -260000,-0.01273618016551161 -270000,-0.01000545361981952 -280000,-0.02026479623632303 -290000,-0.010291549272278606 -300000,-0.006618779262244098 -310000,-0.00724927382606517 -320000,-0.011923203246648183 -330000,-0.026615493039198822 -340000,-0.020184271761847207 -350000,-0.01378585606201107 -360000,-0.013371805605852563 -370000,-0.016632226221165274 -380000,-0.007140230822554795 -390000,-0.006152938826680402 -400000,-0.016027484729120238 -410000,-0.022825987180729736 -420000,-0.003210778134537181 -430000,-0.007980712363551929 -440000,-0.014115524697759698 -450000,-0.012690971625935257 -460000,-0.02421523234011442 -470000,-0.017469853778111787 -480000,-0.007640380434570807 -490000,-0.0046946447440035614 -500000,-0.02147959824026512 -510000,-0.02227242984991285 -520000,-0.016354066453565463 -530000,-0.014820598937815496 -540000,-0.010212995458023334 -550000,-0.014852728658692749 -560000,-0.016150271302526047 -570000,-0.0017183784049467688 -580000,-0.01575329770987851 -590000,-0.014733128934575266 -600000,0.009521835093905213 -610000,-0.010498603086834798 -620000,-0.004123636100829056 -630000,-0.010944569810384532 -640000,-0.01136277455869475 -650000,-0.011559879567482598 -660000,-0.007638807674891501 -670000,-0.005259559607049018 -680000,-0.013919827375851268 -690000,-0.01383272777228676 -700000,-0.016972390932601562 -710000,-0.00435304737818163 -720000,-0.016022835289078764 -730000,-0.007387064288013783 -740000,0.0013632416776744657 -750000,-0.004534730799930059 -760000,-0.009914961489279776 -770000,-0.009556550384292107 -780000,-0.004945612553970227 -790000,-0.013490797173849146 -800000,-0.020149796320216344 -810000,-0.009038492534349662 -820000,-0.006014647240893678 -830000,-0.00593038238596743 -840000,-0.00944730317323566 -850000,-0.0007116231976476595 -860000,-0.006802469550821988 -870000,-0.009312556901687556 -880000,-0.0111578860992118 -890000,-0.010514505388747894 -900000,-0.015553128885154486 -910000,-0.006763150003316539 -920000,-0.010709816383139273 -930000,-0.011689047803428257 -940000,-0.0063937295474439685 -950000,-0.0070217438048268185 -960000,-0.00885149778266448 -970000,-0.019100065569471348 -980000,-0.013104447261642898 -990000,-0.017098136467862256 -1000000,-0.011613826243901993 +10000,-0.02057395789167387 +20000,-0.019295056765323927 +30000,-0.011586949402009659 +40000,-0.021343990335687946 +50000,0.006954515544980515 +60000,-0.005789192843174825 +70000,-0.015408742981646706 +80000,-0.012670328514927665 +90000,-0.012715807164640094 +100000,-0.010531519793425518 +110000,-0.006031421602342437 +120000,-0.004291891476807785 +130000,-0.01523618353732851 +140000,-0.017162670028186652 +150000,-0.009667088507150704 +160000,-0.013759078817429748 +170000,-0.0045129524832332735 +180000,-0.003483454846412864 +190000,-0.012995430923867391 +200000,-0.01163826419623559 +210000,-0.014737549312607132 +220000,-0.011719635652041363 +230000,0.002327234451188079 +240000,-0.004759997098631637 +250000,-0.007177062772582251 +260000,-0.020567657454006995 +270000,-0.008962662752258129 +280000,-0.008666776691037697 +290000,-0.007835869796729823 +300000,-0.021764890545731462 +310000,-0.011008064201809647 +320000,-0.014450165826561598 +330000,-0.018124619929919206 +340000,-0.010662423450099406 +350000,-0.00873508764275617 +360000,-0.014133331135482447 +370000,-0.010080531915900203 +380000,-0.012819286533735855 +390000,0.001079161587902101 +400000,-0.008573963923996548 +410000,-0.01619930189986524 +420000,-0.01007861078693752 +430000,-0.0052865299166900605 +440000,-0.0022887731037667673 +450000,-0.01452757855508845 +460000,-0.011182555216783824 +470000,-0.015998578004833026 +480000,-0.007604967278925677 +490000,-0.013195820971166217 +500000,-0.017022545698519072 +510000,-0.0038243962095354605 +520000,0.00229913041681955 +530000,-0.014531220741355424 +540000,-0.012768526705048485 +550000,-0.017881035973459948 +560000,-0.004317313977803999 +570000,-0.005280051965334843 +580000,-0.01970120175432865 +590000,-0.017209304672785447 +600000,-0.014839289280792528 +610000,-0.0037749955300591617 +620000,-0.005980555669163457 +630000,-0.008093560382639597 +640000,-0.01566826198194112 +650000,-0.0231773661060739 +660000,-0.010859798176639606 +670000,-0.01061019352252064 +680000,0.0015211279605480755 +690000,-0.015144113552499191 +700000,-0.014751480562135908 +710000,-0.021911181391510957 +720000,-0.012260897268030969 +730000,-0.010371117696749293 +740000,-0.007832098066563422 +750000,-0.01791073376127162 +760000,-0.006786362405830272 +770000,-0.028143636753258296 +780000,-0.008722948668275166 +790000,-0.011850146411267554 +800000,-0.01857690664391505 +810000,-0.019146110619190814 +820000,-0.009007253470800211 +830000,-0.003839069307386515 +840000,-0.008794299396483669 +850000,-0.005835862294012094 +860000,-0.020765318223475444 +870000,-0.015400306596849905 +880000,-0.01809265152275541 +890000,-0.0070629899754021605 +900000,-0.018214235076531114 +910000,-0.011393205260447587 +920000,-0.019267902930048694 +930000,-0.005139616634585583 +940000,-0.021503466932913214 +950000,-0.006415611151813541 +960000,-0.031578658354302944 +970000,-0.01559181524442379 +980000,-0.020899240494592533 +990000,-0.013088021011234154 +1000000,-0.01390190139847183 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/loss/value_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/loss/value_loss.log index e6caf7938..698678c33 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/loss/value_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/loss/value_loss.log @@ -1,101 +1,101 @@ step,loss/value_loss -10000,91.6126268694071 -20000,35.83116148390352 -30000,8.316448385428767 -40000,4.240038797597162 -50000,3.632991376650552 -60000,2.037894320264046 -70000,2.8723418521507718 -80000,3.4150528952473906 -90000,1.7841556824838427 -100000,2.8891663328565387 -110000,1.4109985175558206 -120000,1.832707940540472 -130000,0.9000422186842204 -140000,1.8003901820799455 -150000,0.8978411716497637 -160000,1.323641499345366 -170000,0.9210137623076335 -180000,1.1130017345253074 -190000,0.5965903070297068 -200000,1.0280276543449633 -210000,1.0302690533677823 -220000,1.140841746568571 -230000,1.6968069818475873 -240000,0.5198777633130547 -250000,0.5651352885992162 -260000,0.6906983423335322 -270000,0.7796730036281704 -280000,1.1784501853242308 -290000,0.9838969425234956 -300000,0.6523892488818818 -310000,0.5188761445117397 -320000,4.178676790850284 -330000,1.1086482668485096 -340000,1.2936223678469314 -350000,3.3126493978835456 -360000,1.1461696796400938 -370000,0.871758495048585 -380000,0.5531769847934318 -390000,0.29910888118294704 -400000,0.7746566052039803 -410000,1.5817073370814574 -420000,0.7666706512870031 -430000,0.7867418777089628 -440000,0.3479314210599812 -450000,0.8145764811657058 -460000,2.038459517063714 -470000,1.339272451783868 -480000,0.5753835764601487 -490000,0.7272684931834793 -500000,0.7179720092109851 -510000,0.4289938205991575 -520000,1.048156861024654 -530000,0.5243071940842133 -540000,0.5840439678198944 -550000,0.40384524258598153 -560000,0.2526999588430951 -570000,0.2517364114153906 -580000,0.4836744617576668 -590000,0.6070319760898484 -600000,10.16363600541298 -610000,0.7408815817357749 -620000,0.1996037377539769 -630000,2.563450587139547 -640000,0.33888771024477254 -650000,2.148665490217412 -660000,0.301718403124556 -670000,3.8920105850237663 -680000,0.23829260460175677 -690000,0.809747478189889 -700000,0.8097149469556989 -710000,7.557241535831286 -720000,0.536833570377917 -730000,0.7591896441175844 -740000,1.7953737257169475 -750000,1.11540717464218 -760000,0.9164219419677796 -770000,0.7630080544843617 -780000,0.40093358483786556 -790000,0.971121129787903 -800000,0.3015687688040352 -810000,0.28226661882735926 -820000,1.0011805471231106 -830000,0.5509314574130126 -840000,0.34010881950246236 -850000,0.6064179992789549 -860000,1.1626542514738627 -870000,0.2712611543871003 -880000,0.7265633798098258 -890000,0.5733351812554556 -900000,1.0420008938261722 -910000,0.3487142171813701 -920000,1.1911992745117481 -930000,0.2854785452322896 -940000,0.4889032691846623 -950000,1.5281259254577193 -960000,0.37178661688512793 -970000,0.2182210488280433 -980000,0.8701766789256989 -990000,0.5224187567990768 -1000000,0.2704695065365745 +10000,45.40830130577938 +20000,11.178123195145973 +30000,4.60288164153862 +40000,7.928503845192701 +50000,4.447204940292849 +60000,4.775557300845171 +70000,1.6218161932830497 +80000,1.265516131571631 +90000,2.380259623808361 +100000,1.2226653065902275 +110000,1.485318447649552 +120000,1.801143163554914 +130000,1.99340687149448 +140000,2.206477870801175 +150000,2.4089532258864303 +160000,2.1102522310415064 +170000,3.7943512944688202 +180000,1.4320410382669937 +190000,1.090700166310723 +200000,1.0481352935954504 +210000,1.9592727147691449 +220000,1.1182734354328625 +230000,2.0660463853896163 +240000,0.7560392964100277 +250000,0.7137781202221658 +260000,0.8455487760675082 +270000,0.6380383464215644 +280000,1.0132260855810065 +290000,0.6995390846579406 +300000,0.5599660998167196 +310000,1.458315561950669 +320000,3.8433033908081407 +330000,0.6638066053653472 +340000,0.5528311202199063 +350000,0.6485203439800526 +360000,0.688860080105252 +370000,0.8763127042321452 +380000,0.33193820480554764 +390000,0.9241435313858235 +400000,1.3439759050775173 +410000,2.731504561644489 +420000,1.8725605724035224 +430000,1.5152260566724753 +440000,0.7241438404096987 +450000,0.7321885772665444 +460000,2.0143852431509837 +470000,0.6452592086454805 +480000,0.5644927220913993 +490000,0.8192889093981398 +500000,0.719063517792611 +510000,2.6777905056937756 +520000,0.6214150986173758 +530000,1.1295246160013768 +540000,0.6933184750511334 +550000,0.8429872430181191 +560000,0.7231593278315788 +570000,0.4587941529660834 +580000,0.7919697701605839 +590000,0.35831881130040033 +600000,1.622796514417797 +610000,1.569501258332893 +620000,1.3911637565029162 +630000,1.6740487720831598 +640000,0.6199500886162176 +650000,0.5235104965567672 +660000,0.7369734040583757 +670000,0.3997595844309626 +680000,0.33123791547053183 +690000,0.5650669134853409 +700000,0.5434009293372967 +710000,0.6742830330362267 +720000,0.45707761005886044 +730000,0.4741120615127724 +740000,0.6337096497175543 +750000,0.3043106929930137 +760000,0.5315821002039101 +770000,0.7015208425469535 +780000,1.0314649290998268 +790000,1.2290287268020834 +800000,0.32792519109826757 +810000,1.9872508880348358 +820000,0.9858997014104233 +830000,0.2258979215647444 +840000,0.5607063270621115 +850000,0.4722188389559956 +860000,0.24786848167854408 +870000,0.5036732325170715 +880000,0.3618655298772728 +890000,1.8031003646612103 +900000,0.27430286726485725 +910000,0.7926684100343593 +920000,0.5126200586975106 +930000,0.5226473567607541 +940000,0.42084879435905337 +950000,0.5388643175008236 +960000,0.3352429374228147 +970000,0.48320198711032286 +980000,0.5206450872599141 +990000,0.4514499897492132 +1000000,1.864049879245234 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/constraint_violation.log index 7f1952e86..8f841d488 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/constraint_violation.log @@ -1,101 +1,101 @@ step,stat/constraint_violation -10000,34 -20000,43 -30000,128 -40000,141 -50000,182 -60000,214 -70000,245 -80000,250 -90000,266 -100000,293 -110000,318 -120000,339 -130000,360 -140000,399 -150000,411 -160000,446 -170000,479 -180000,497 -190000,535 -200000,567 -210000,589 -220000,614 -230000,634 -240000,659 -250000,679 -260000,703 -270000,741 -280000,768 -290000,778 -300000,822 -310000,838 -320000,860 -330000,912 -340000,923 -350000,928 -360000,987 -370000,1025 -380000,1047 -390000,1082 -400000,1110 -410000,1140 -420000,1150 -430000,1165 -440000,1167 -450000,1198 -460000,1228 -470000,1270 -480000,1303 -490000,1327 -500000,1339 -510000,1358 -520000,1399 -530000,1422 -540000,1463 -550000,1500 -560000,1511 -570000,1522 -580000,1559 -590000,1562 -600000,1599 -610000,1627 -620000,1629 -630000,1646 -640000,1661 -650000,1694 -660000,1697 -670000,1729 -680000,1749 -690000,1785 -700000,1807 -710000,1811 -720000,1861 -730000,1870 -740000,1901 -750000,1937 -760000,1957 -770000,1992 -780000,2021 -790000,2044 -800000,2056 -810000,2061 -820000,2098 -830000,2116 -840000,2153 -850000,2195 -860000,2246 -870000,2268 -880000,2274 -890000,2290 -900000,2319 -910000,2346 -920000,2359 -930000,2390 -940000,2404 -950000,2431 -960000,2458 -970000,2466 -980000,2495 -990000,2506 -1000000,2553 +10000,35 +20000,46 +30000,120 +40000,129 +50000,174 +60000,208 +70000,228 +80000,232 +90000,247 +100000,276 +110000,307 +120000,317 +130000,337 +140000,379 +150000,409 +160000,430 +170000,472 +180000,494 +190000,521 +200000,553 +210000,574 +220000,601 +230000,622 +240000,644 +250000,662 +260000,687 +270000,725 +280000,758 +290000,764 +300000,810 +310000,825 +320000,853 +330000,902 +340000,912 +350000,915 +360000,970 +370000,1008 +380000,1026 +390000,1063 +400000,1091 +410000,1116 +420000,1125 +430000,1138 +440000,1140 +450000,1172 +460000,1210 +470000,1249 +480000,1283 +490000,1302 +500000,1312 +510000,1348 +520000,1361 +530000,1384 +540000,1427 +550000,1461 +560000,1470 +570000,1496 +580000,1519 +590000,1519 +600000,1562 +610000,1586 +620000,1593 +630000,1609 +640000,1625 +650000,1660 +660000,1664 +670000,1693 +680000,1713 +690000,1756 +700000,1773 +710000,1777 +720000,1826 +730000,1835 +740000,1868 +750000,1905 +760000,1921 +770000,1987 +780000,1989 +790000,2021 +800000,2033 +810000,2039 +820000,2075 +830000,2099 +840000,2139 +850000,2171 +860000,2221 +870000,2235 +880000,2251 +890000,2268 +900000,2289 +910000,2315 +920000,2336 +930000,2369 +940000,2372 +950000,2405 +960000,2424 +970000,2432 +980000,2458 +990000,2468 +1000000,2522 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/ep_constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/ep_constraint_violation.log index 42e80179e..1c9e700b7 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/ep_constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/ep_constraint_violation.log @@ -1,101 +1,101 @@ step,stat/ep_constraint_violation -10000,1.1 -20000,0.1 +10000,0.9 +20000,0.2 30000,1.1 40000,0.1 -50000,0.9 -60000,1.2 +50000,1.2 +60000,2.4 70000,0.0 80000,0.2 -90000,0.2 +90000,0.0 100000,0.0 110000,0.8 -120000,0.2 +120000,0.0 130000,0.2 -140000,1.4 -150000,0.2 +140000,0.1 +150000,1.4 160000,0.2 -170000,1.3 -180000,0.2 -190000,0.2 -200000,1.5 -210000,1.7 -220000,0.3 -230000,0.0 +170000,1.9 +180000,1.4 +190000,0.1 +200000,1.4 +210000,0.1 +220000,0.4 +230000,0.1 240000,0.0 250000,0.1 260000,0.0 270000,0.6 -280000,0.0 +280000,0.6 290000,0.0 300000,2.3 -310000,0.0 -320000,1.1 -330000,1.4 +310000,0.1 +320000,1.2 +330000,0.1 340000,0.5 -350000,0.3 -360000,1.0 -370000,0.9 +350000,0.1 +360000,0.1 +370000,0.2 380000,0.0 -390000,1.5 -400000,0.3 -410000,2.1 -420000,0.0 +390000,0.0 +400000,0.4 +410000,0.2 +420000,0.1 430000,0.0 440000,0.0 -450000,2.7 -460000,0.8 +450000,0.2 +460000,0.1 470000,1.4 480000,0.0 490000,0.1 500000,0.0 -510000,1.6 -520000,0.7 +510000,3.3 +520000,0.0 530000,0.9 -540000,0.5 -550000,1.0 -560000,0.9 -570000,0.0 -580000,0.0 +540000,0.0 +550000,0.0 +560000,0.0 +570000,1.4 +580000,0.1 590000,0.0 -600000,2.1 -610000,0.2 -620000,0.0 +600000,0.6 +610000,0.0 +620000,0.4 630000,0.2 640000,0.0 -650000,1.0 -660000,0.0 -670000,2.0 +650000,0.3 +660000,0.2 +670000,0.1 680000,0.0 -690000,0.2 +690000,0.8 700000,0.1 -710000,0.2 +710000,0.0 720000,1.4 -730000,0.2 -740000,1.8 -750000,1.2 -760000,0.2 -770000,1.9 -780000,0.1 -790000,0.1 -800000,0.0 -810000,0.0 -820000,2.0 -830000,0.0 -840000,0.0 -850000,0.2 -860000,1.5 -870000,0.0 -880000,0.1 -890000,0.1 -900000,0.1 -910000,0.1 -920000,0.2 -930000,1.3 +730000,0.0 +740000,0.2 +750000,0.0 +760000,0.1 +770000,2.2 +780000,0.0 +790000,0.2 +800000,0.2 +810000,0.3 +820000,0.1 +830000,0.8 +840000,1.4 +850000,1.0 +860000,0.9 +870000,0.1 +880000,1.3 +890000,1.3 +900000,0.6 +910000,0.2 +920000,0.9 +930000,1.2 940000,0.1 950000,0.8 -960000,1.1 -970000,0.1 -980000,1.1 -990000,0.6 -1000000,0.1 +960000,0.0 +970000,0.0 +980000,0.1 +990000,0.1 +1000000,0.9 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/ep_length.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/ep_length.log index e3ec43b99..40bf534e1 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/ep_length.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/ep_length.log @@ -1,35 +1,35 @@ step,stat/ep_length 10000,225.9 -20000,225.5 +20000,201.1 30000,250.0 40000,225.2 50000,225.9 60000,227.0 70000,250.0 80000,226.0 -90000,225.8 +90000,250.0 100000,250.0 110000,225.7 -120000,225.5 -130000,201.6 +120000,250.0 +130000,225.1 140000,250.0 -150000,201.8 +150000,226.3 160000,200.3 -170000,200.3 +170000,201.5 180000,250.0 190000,225.4 200000,250.0 -210000,201.8 +210000,250.0 220000,250.0 -230000,250.0 +230000,226.6 240000,250.0 250000,250.0 260000,250.0 270000,250.0 280000,250.0 290000,250.0 -300000,225.9 -310000,250.0 +300000,250.0 +310000,225.6 320000,227.4 330000,250.0 340000,250.0 @@ -37,65 +37,65 @@ step,stat/ep_length 360000,225.9 370000,225.2 380000,250.0 -390000,225.6 -400000,250.0 -410000,250.0 -420000,250.0 +390000,250.0 +400000,225.8 +410000,203.3 +420000,225.7 430000,250.0 440000,250.0 -450000,203.7 -460000,225.3 +450000,227.0 +460000,226.8 470000,200.5 480000,250.0 -490000,227.0 +490000,227.1 500000,250.0 -510000,250.0 -520000,201.8 +510000,225.4 +520000,250.0 530000,250.0 540000,250.0 550000,250.0 -560000,225.2 +560000,250.0 570000,250.0 -580000,250.0 +580000,225.5 590000,250.0 -600000,226.8 -610000,201.1 -620000,250.0 -630000,201.8 +600000,153.2 +610000,250.0 +620000,225.3 +630000,202.6 640000,250.0 -650000,152.6 -660000,250.0 -670000,227.8 +650000,225.2 +660000,225.7 +670000,225.1 680000,250.0 690000,226.4 700000,225.6 -710000,201.0 +710000,250.0 720000,250.0 -730000,200.6 -740000,225.3 -750000,202.4 -760000,200.9 -770000,202.0 -780000,225.7 -790000,225.3 -800000,250.0 -810000,250.0 -820000,177.0 -830000,250.0 +730000,250.0 +740000,225.1 +750000,250.0 +760000,225.5 +770000,227.0 +780000,250.0 +790000,200.5 +800000,200.6 +810000,177.6 +820000,225.1 +830000,225.1 840000,250.0 -850000,200.2 -860000,227.1 -870000,250.0 +850000,250.0 +860000,225.3 +870000,225.5 880000,225.5 -890000,225.5 -900000,225.3 +890000,201.6 +900000,200.5 910000,225.2 -920000,201.8 +920000,225.1 930000,250.0 -940000,225.5 -950000,200.8 -960000,225.3 -970000,225.2 -980000,226.9 -990000,226.3 -1000000,226.2 +940000,225.6 +950000,250.0 +960000,250.0 +970000,250.0 +980000,226.3 +990000,226.4 +1000000,226.6 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/ep_return.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/ep_return.log index f92ca9b23..c27e05c50 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/ep_return.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/ep_return.log @@ -1,101 +1,101 @@ step,stat/ep_return -10000,78.20988868328206 -20000,75.78017686292269 -30000,108.61310680796785 -40000,82.6195291972392 -50000,96.3703645381922 -60000,101.3689937483201 -70000,131.61428314884333 -80000,106.54924234519585 -90000,106.65278828722057 -100000,124.42035179132799 -110000,121.38799401367498 -120000,120.04915074854428 -130000,88.67438374763081 -140000,119.03793525300254 -150000,97.95206444743224 -160000,117.88020034848753 -170000,116.15766524925068 -180000,135.60143946615932 -190000,115.86316306691549 -200000,132.5638115084816 -210000,111.08155338008119 -220000,143.43938783456332 -230000,132.87554400288116 -240000,141.9064856945224 -250000,138.40642399734125 -260000,157.6286352503434 -270000,146.3788548418314 -280000,128.89243835251136 -290000,141.58595195107264 -300000,124.11054686589254 -310000,139.5349050152831 -320000,128.80602570184112 -330000,138.16332568739924 -340000,139.69245189242298 -350000,121.42792945587294 -360000,129.03842427854178 -370000,131.3754325054536 -380000,143.28744841911765 -390000,132.49730164990632 -400000,153.18374049296153 -410000,150.39646066512498 -420000,142.08992811508736 -430000,162.82827238922079 -440000,149.50408951313807 -450000,130.0440447009584 -460000,137.49938064915432 -470000,126.758017707959 -480000,158.75185713046375 -490000,155.8165769924683 -500000,159.11953431529605 -510000,149.61772422957054 -520000,136.84113652136952 -530000,166.85114156131823 -540000,167.14586485722307 -550000,166.47842088449403 -560000,141.94087874738247 -570000,171.33774297810163 -580000,171.630966994015 -590000,172.02379247163105 -600000,146.63866603972647 -610000,130.13509508458105 -620000,159.9863961785672 -630000,123.13088906305298 -640000,169.23797604237703 -650000,95.35433641046919 -660000,163.44698001744885 -670000,157.4336769449757 -680000,137.1012074888646 -690000,135.82351452507504 -700000,160.41343976943108 -710000,137.3598426654616 -720000,155.98195047926458 -730000,134.7575745948265 -740000,157.39527839463182 -750000,131.0752306359191 -760000,126.11802752573388 -770000,127.19804428573514 -780000,155.97617546835755 -790000,155.2629086864974 -800000,173.79388913377983 -810000,165.28104508729544 -820000,126.80618004439766 -830000,175.744322230485 -840000,154.00714582445363 -850000,127.434671359637 -860000,157.21156560603328 -870000,162.0582131239074 -880000,143.8166474980508 -890000,153.7670156487118 -900000,151.68062482839474 -910000,155.23410040657532 -920000,122.54914827680018 -930000,169.14802898494312 -940000,156.3299891833045 -950000,131.20025083001192 -960000,142.83211598070733 -970000,159.49552460941337 -980000,150.50617223939457 -990000,152.71234673386635 -1000000,156.83173030891092 +10000,55.0782513305122 +20000,57.438576992380504 +30000,58.20186258517041 +40000,82.6839112978184 +50000,93.5613890362972 +60000,99.02998550520101 +70000,140.55301675068475 +80000,103.06653949772974 +90000,115.82991664003495 +100000,130.1893323428725 +110000,123.03852124102036 +120000,140.33787527308675 +130000,110.73792326799214 +140000,129.29467628524517 +150000,109.17886940154384 +160000,125.1990894685016 +170000,106.10302623783184 +180000,146.89281166236384 +190000,110.4536114379162 +200000,138.06639812014424 +210000,145.39335338713727 +220000,140.6376733627122 +230000,126.02146330427658 +240000,145.16140788927066 +250000,154.97270746446907 +260000,151.4417215284064 +270000,136.29034108288266 +280000,144.76991832354747 +290000,142.89175253402254 +300000,140.09423483611062 +310000,127.37340200482144 +320000,134.62220427018673 +330000,143.833350218964 +340000,146.91527913310057 +350000,123.31730310452102 +360000,131.74920996431064 +370000,125.12662696709023 +380000,149.61005637571023 +390000,155.00759590721742 +400000,138.31707509444774 +410000,126.23093427650804 +420000,135.05407273947503 +430000,156.47988405320098 +440000,147.208578771438 +450000,140.4087682663814 +460000,138.83066254858687 +470000,129.1174952320405 +480000,154.97240596763245 +490000,152.56900857952945 +500000,162.42519315193687 +510000,137.6945564882885 +520000,160.417129895531 +530000,139.6229449547288 +540000,159.5053627783055 +550000,169.94529681500052 +560000,154.77103283711014 +570000,170.34878423848403 +580000,133.40819290446728 +590000,161.54619635246738 +600000,102.82239760111845 +610000,153.01672538573288 +620000,143.24649288976286 +630000,136.23716049667627 +640000,151.25293222847245 +650000,147.96560549361146 +660000,145.881191822141 +670000,159.89059744261215 +680000,155.99067630143531 +690000,155.0459593087507 +700000,152.63987809473483 +710000,161.65734089924914 +720000,148.7296348186006 +730000,172.68559609548112 +740000,137.97452537060371 +750000,154.60762461054955 +760000,134.60602941170413 +770000,147.371361042323 +780000,167.55999772846263 +790000,129.05571896623343 +800000,128.58612760200822 +810000,112.25973407880699 +820000,145.84533233862936 +830000,144.03992493237917 +840000,153.63978647273473 +850000,170.44762146155466 +860000,152.64488069303889 +870000,148.794385968909 +880000,139.86270315066815 +890000,115.32338462566489 +900000,126.93872095901153 +910000,137.57971085514592 +920000,150.9386582892732 +930000,163.82360450473925 +940000,152.8570772242469 +950000,168.51924679071004 +960000,175.85868222800627 +970000,171.15317328424257 +980000,158.01482021366704 +990000,157.20994376958544 +1000000,140.98236771947393 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/ep_reward.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/ep_reward.log index 4af705a8a..11130c867 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/ep_reward.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat/ep_reward.log @@ -1,101 +1,101 @@ step,stat/ep_reward -10000,0.3144402510554733 -20000,0.3032090465428744 -30000,0.4344524272318714 -40000,0.3472713614743766 -50000,0.3867826939250387 -60000,0.409074622914339 -70000,0.5264571325953735 -80000,0.4262291165981399 -90000,0.42662512325488794 -100000,0.49768140716531206 -110000,0.4856225297999298 -120000,0.4802172089190685 -130000,0.3580659777211854 -140000,0.4761517410120101 -150000,0.3921202507789416 -160000,0.47235126151823537 -170000,0.4690632046280955 -180000,0.5424057578646373 -190000,0.4671525779979728 -200000,0.5302552460339263 -210000,0.4449751351473844 -220000,0.5737575513382531 -230000,0.5315021760115245 -240000,0.5676259427780895 -250000,0.5536256959893651 -260000,0.6305145410013735 -270000,0.5855154193673255 -280000,0.5155697534100454 -290000,0.5663438078042905 -300000,0.49660907176324265 -310000,0.5581396200611325 -320000,0.5162259284515861 -330000,0.552653302749597 -340000,0.558769807569692 -350000,0.4861596742252516 -360000,0.5197670706719344 -370000,0.5258698620948951 -380000,0.5731497936764706 -390000,0.5315137121467413 -400000,0.6127349619718461 -410000,0.6015858426604999 -420000,0.5683597124603494 -430000,0.651313089556883 -440000,0.5980163580525523 -450000,0.5250145008400587 -460000,0.5546527190607217 -470000,0.5084475027388563 -480000,0.635007428521855 -490000,0.62348284663851 -500000,0.6364781372611843 -510000,0.5984708969182823 -520000,0.5478000110751579 -530000,0.667404566245273 -540000,0.6685834594288922 -550000,0.6659136835379762 -560000,0.5679708951446568 -570000,0.6853509719124066 -580000,0.68652386797606 -590000,0.6880951698865243 -600000,0.587184745136401 -610000,0.5209509395505104 -620000,0.6399455847142688 -630000,0.4981269079042342 -640000,0.676951904169508 -650000,0.3869646762469193 -660000,0.6537879200697955 -670000,0.6320808218993186 -680000,0.5484048299554584 -690000,0.54440662585474 -700000,0.6419519020424185 -710000,0.5510326860430451 -720000,0.6239278019170583 -730000,0.5393222186444773 -740000,0.6304257582588007 -750000,0.5249947918542472 -760000,0.509393811193507 -770000,0.5098012313180582 -780000,0.6240262808155361 -790000,0.621480685663843 -800000,0.6951755565351193 -810000,0.6611241803491816 -820000,0.5147745627513622 -830000,0.70297728892194 -840000,0.6160285832978147 -850000,0.5123126558255983 -860000,0.6355439351566293 -870000,0.6482328524956296 -880000,0.5755756942590283 -890000,0.6150771119627482 -900000,0.6069349270105016 -910000,0.6210636451624929 -920000,0.49528097780515684 -930000,0.6765921159397725 -940000,0.6259564407783775 -950000,0.5253117002793906 -960000,0.5717464699147847 -970000,0.6421833363859435 -980000,0.6023866032382119 -990000,0.6112687821998551 -1000000,0.6298110061603778 +10000,0.22189639290765245 +20000,0.22988438243143014 +30000,0.23280745034068168 +40000,0.34750580992735663 +50000,0.3755262764714134 +60000,0.39963630037332976 +70000,0.562212067002739 +80000,0.4122982730379389 +90000,0.4633196665601398 +100000,0.52075732937149 +110000,0.4940326224130498 +120000,0.5613515010923471 +130000,0.44298164247161465 +140000,0.5171787051409806 +150000,0.43930645174512684 +160000,0.5016265174130934 +170000,0.42556813819001366 +180000,0.5875712466494555 +190000,0.4455203386376258 +200000,0.5522655924805769 +210000,0.5815734135485491 +220000,0.5625506934508488 +230000,0.5041225093617743 +240000,0.5806456315570827 +250000,0.6198908298578762 +260000,0.6057668861136254 +270000,0.5451613643315307 +280000,0.5790796732941899 +290000,0.5715670101360901 +300000,0.5603769393444424 +310000,0.510286689370548 +320000,0.539499363459685 +330000,0.5753334008758559 +340000,0.5876611165324023 +350000,0.49371814939603115 +360000,0.5305928795106414 +370000,0.5008726925829506 +380000,0.5984402255028409 +390000,0.6200303836288699 +400000,0.5549013603922308 +410000,0.5070117366684932 +420000,0.5414913544290413 +430000,0.6259195362128038 +440000,0.588834315085752 +450000,0.5658430732775757 +460000,0.5554404429036985 +470000,0.5178855283492803 +480000,0.6198896238705298 +490000,0.6104860405945909 +500000,0.6497007726077474 +510000,0.5539698163453697 +520000,0.6416685195821239 +530000,0.5584917798189152 +540000,0.6380214511132221 +550000,0.679781187260002 +560000,0.6190841313484406 +570000,0.681395136953936 +580000,0.5339599917613329 +590000,0.6461847854098696 +600000,0.41819996111426017 +610000,0.6120669015429315 +620000,0.5772166315250618 +630000,0.5506505498397338 +640000,0.6050117289138897 +650000,0.5921485102717826 +660000,0.5844143912843217 +670000,0.6396183198927369 +680000,0.6239627052057413 +690000,0.6212954101192449 +700000,0.6136308079426972 +710000,0.6466293635969965 +720000,0.5949185392744025 +730000,0.6907423843819244 +740000,0.5530201348214115 +750000,0.6184304984421983 +760000,0.539031142392915 +770000,0.5900378944550603 +780000,0.6702399909138506 +790000,0.5185604713067148 +800000,0.5166879980845158 +810000,0.46300493438717566 +820000,0.5834698320492923 +830000,0.576169091128009 +840000,0.614559145890939 +850000,0.6817904858462186 +860000,0.6119003087399999 +870000,0.5970570790746276 +880000,0.5597607838296867 +890000,0.4638179477302297 +900000,0.5090402989764437 +910000,0.5518077911551943 +920000,0.6038408571917514 +930000,0.655294418018957 +940000,0.6114494240985084 +950000,0.6740769871628404 +960000,0.7034347289120253 +970000,0.6846126931369703 +980000,0.6332778911153578 +990000,0.6292074286651624 +1000000,0.5648172356266594 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/constraint_violation.log index 02e1beaf0..a03f05b4c 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/constraint_violation.log @@ -1,57 +1,57 @@ step,stat_eval/constraint_violation -10000,0.2 -20000,0.3 +10000,0.3 +20000,0.4 30000,0.1 -40000,1.1 +40000,0.0 50000,0.8 60000,0.0 -70000,1.3 -80000,1.1 +70000,1.7 +80000,1.0 90000,1.0 -100000,0.0 +100000,0.1 110000,0.2 120000,0.1 -130000,1.4 +130000,1.1 140000,0.0 -150000,1.2 -160000,0.0 -170000,0.2 -180000,0.3 -190000,1.7 -200000,0.0 +150000,1.3 +160000,0.1 +170000,0.3 +180000,0.2 +190000,1.6 +200000,0.1 210000,0.3 -220000,0.2 +220000,0.5 230000,0.0 240000,0.9 250000,0.3 260000,0.0 -270000,3.0 +270000,2.3 280000,0.1 290000,0.3 300000,0.1 310000,0.0 320000,0.0 330000,0.2 -340000,0.0 +340000,0.1 350000,0.0 -360000,1.2 +360000,1.3 370000,0.2 380000,0.2 -390000,1.7 +390000,1.6 400000,0.1 410000,0.0 420000,1.3 430000,2.1 440000,0.2 -450000,0.1 -460000,0.1 +450000,0.0 +460000,0.0 470000,0.2 480000,1.6 -490000,0.3 +490000,0.2 500000,0.0 510000,0.6 -520000,0.5 -530000,0.2 +520000,0.4 +530000,0.0 540000,2.1 550000,0.1 560000,0.2 @@ -61,41 +61,41 @@ step,stat_eval/constraint_violation 600000,0.0 610000,0.0 620000,0.1 -630000,0.0 +630000,0.7 640000,0.0 650000,0.0 -660000,2.3 -670000,0.7 +660000,2.2 +670000,0.8 680000,0.0 -690000,0.8 +690000,0.9 700000,0.0 710000,1.2 -720000,1.4 -730000,0.1 +720000,1.5 +730000,0.2 740000,0.0 750000,0.1 760000,0.1 -770000,0.3 +770000,0.2 780000,1.5 790000,0.1 800000,0.0 810000,0.0 -820000,1.0 +820000,0.7 830000,0.1 840000,0.0 850000,0.4 860000,0.0 870000,1.3 880000,0.2 -890000,0.3 -900000,1.2 +890000,0.2 +900000,1.4 910000,0.1 920000,1.9 930000,0.0 940000,0.0 950000,0.1 -960000,0.2 -970000,1.7 +960000,0.6 +970000,1.8 980000,0.0 990000,0.2 1000000,0.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/ep_length.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/ep_length.log index 13275b532..9df1b525f 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/ep_length.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/ep_length.log @@ -11,7 +11,7 @@ step,stat_eval/ep_length 100000,250.0 110000,201.3 120000,250.0 -130000,177.5 +130000,177.6 140000,250.0 150000,250.0 160000,250.0 @@ -23,7 +23,7 @@ step,stat_eval/ep_length 220000,226.3 230000,250.0 240000,225.1 -250000,201.9 +250000,202.0 260000,250.0 270000,250.0 280000,250.0 @@ -35,17 +35,17 @@ step,stat_eval/ep_length 340000,250.0 350000,250.0 360000,250.0 -370000,202.3 +370000,202.2 380000,225.9 390000,250.0 400000,225.6 410000,250.0 420000,201.4 -430000,201.9 -440000,226.5 +430000,201.8 +440000,226.6 450000,250.0 460000,250.0 -470000,226.3 +470000,226.2 480000,201.2 490000,250.0 500000,250.0 @@ -56,7 +56,7 @@ step,stat_eval/ep_length 550000,225.3 560000,201.6 570000,250.0 -580000,202.6 +580000,202.5 590000,225.3 600000,250.0 610000,250.0 @@ -85,15 +85,15 @@ step,stat_eval/ep_length 840000,250.0 850000,151.6 860000,250.0 -870000,201.6 -880000,201.6 +870000,201.7 +880000,201.5 890000,226.1 900000,226.0 910000,226.6 920000,225.2 930000,250.0 940000,250.0 -950000,227.5 +950000,227.3 960000,250.0 970000,225.2 980000,250.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/ep_return.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/ep_return.log index c5fc2611a..ae449f3c2 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/ep_return.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/ep_return.log @@ -1,101 +1,101 @@ step,stat_eval/ep_return -10000,77.2614088215646 -20000,82.08816492903273 -30000,91.66913224435433 -40000,110.08075840121444 -50000,121.16950344145944 -60000,114.0928179395426 -70000,106.82471739524699 -80000,133.0122294330469 -90000,126.41628142339464 -100000,134.00086936083466 -110000,97.47094599382353 -120000,134.8133843820241 -130000,98.7906515705432 -140000,132.03696954057003 -150000,147.38850630511925 -160000,122.94845798969304 -170000,128.63183450512273 -180000,115.2313954514188 -190000,134.95702688069645 -200000,145.7141986850916 -210000,102.64924228754549 -220000,126.10793692126659 -230000,145.13570608387903 -240000,125.24281462375458 -250000,111.53494122987722 -260000,148.37371592328776 -270000,147.48117336204038 -280000,154.81138824324697 -290000,113.39506771755059 -300000,127.93538493486089 -310000,146.155707747101 -320000,141.69309454172296 -330000,124.41498487558064 -340000,144.9387041767835 -350000,143.52380053378096 -360000,158.9202759913778 -370000,125.63012878204754 -380000,138.93762982472109 -390000,143.8247088533121 -400000,137.7717203891894 -410000,154.56482730248177 -420000,131.3538289714356 -430000,127.05283948583443 -440000,136.31922131439575 -450000,160.36206490526516 -460000,177.98494461453922 -470000,134.6334854247403 -480000,134.44648522756268 -490000,146.20853987683347 -500000,148.64180951629868 -510000,160.78063337368442 -520000,151.65750246136076 -530000,159.20541650929295 -540000,144.4891727555926 -550000,142.033206581607 -560000,155.87864805166618 -570000,159.3755065190731 -580000,119.86584779952106 -590000,150.68522274300267 -600000,159.3747274273264 -610000,172.03499125185422 -620000,149.863047499431 -630000,162.7412667072623 -640000,160.19099601521089 -650000,160.5648136769573 -660000,157.97788044732368 -670000,169.4564243500858 -680000,155.63221523032553 -690000,174.7340515365793 -700000,161.08373469477118 -710000,171.3254644124106 -720000,149.0712022197965 -730000,160.6231836210954 -740000,154.69495554102022 -750000,143.48480528949 -760000,145.85035897336496 -770000,135.53499799916872 -780000,157.498874054124 -790000,144.18245965438706 -800000,155.039153138307 -810000,172.03895010701385 -820000,184.31950516152392 -830000,174.16437699926897 -840000,170.7721417990786 -850000,101.83611730282018 -860000,153.61961628974692 -870000,135.2647980702981 -880000,135.6727596769663 -890000,156.79304783049662 -900000,144.32487549181366 -910000,149.97796768416242 -920000,173.17444692690327 -930000,174.6190677671411 -940000,160.82277348139866 -950000,144.0777875106978 -960000,169.13692615251605 -970000,140.85339354641474 -980000,172.9825789226335 -990000,165.26667187634428 -1000000,156.0146744110092 +10000,52.58037925779316 +20000,70.71989648443534 +30000,61.06606590828509 +40000,98.26029040943634 +50000,115.63532220382278 +60000,117.38230362475296 +70000,103.12915768071659 +80000,127.32495361673804 +90000,132.9799977518705 +100000,138.83723496767578 +110000,93.18504540998201 +120000,137.191890861839 +130000,107.78610358649618 +140000,140.8839048326664 +150000,142.3280653008773 +160000,129.51214723059553 +170000,135.53149080234704 +180000,118.59859786427111 +190000,143.2381644933055 +200000,140.01693883915428 +210000,102.12680547863049 +220000,131.3286701519543 +230000,142.17055016127958 +240000,122.90096121948527 +250000,116.10374196377776 +260000,146.39174425594882 +270000,152.66732456791797 +280000,158.41480619409123 +290000,129.7745924942916 +300000,122.90256306807836 +310000,142.6614257505802 +320000,145.57083575242527 +330000,122.97261740884692 +340000,151.09500557091164 +350000,151.05902156162125 +360000,157.9278660896897 +370000,132.8587286115316 +380000,142.51299697014855 +390000,141.73388086032716 +400000,134.98889026273721 +410000,159.87404797022631 +420000,136.26408348854056 +430000,129.96723572619473 +440000,136.87779195468872 +450000,164.94844777862076 +460000,177.76763401049968 +470000,127.89452149483182 +480000,128.6805294446373 +490000,140.5402733851485 +500000,147.37830191209065 +510000,156.13714468996253 +520000,140.9402260519773 +530000,151.04309017815643 +540000,138.7053446531122 +550000,134.9441339206674 +560000,150.4275131104323 +570000,145.98624602410945 +580000,114.66349872282322 +590000,145.23131640598302 +600000,155.11178380870516 +610000,162.26547059073656 +620000,142.61534989617172 +630000,160.73828696763252 +640000,155.31347373184454 +650000,157.18692456414877 +660000,158.86896183825698 +670000,164.89190069664542 +680000,156.88750379051532 +690000,179.545329022949 +700000,155.95909991405466 +710000,169.59102346799 +720000,152.2999196972088 +730000,161.45152826452514 +740000,152.22352520119244 +750000,141.61570408813037 +760000,144.41745353298364 +770000,133.51639630229718 +780000,156.4804179286578 +790000,145.2804941237527 +800000,155.40624545539703 +810000,167.77234240001667 +820000,181.4531873597596 +830000,171.5369018381219 +840000,171.45216452720038 +850000,99.40623387861896 +860000,154.71796618128457 +870000,134.0882548693167 +880000,134.04075192726236 +890000,151.46699909691745 +900000,144.40638094648745 +910000,149.89497834681092 +920000,165.36425134319683 +930000,167.78506595090488 +940000,154.57729324792697 +950000,139.4875761580764 +960000,167.21570033538964 +970000,143.01097129673713 +980000,174.68163351314507 +990000,170.72414112218507 +1000000,155.74060350429048 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/ep_reward.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/ep_reward.log index d463089b5..7ace15d2d 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/ep_reward.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/ep_reward.log @@ -1,101 +1,101 @@ step,stat_eval/ep_reward -10000,0.31439236433386164 -20000,0.3290529432463646 -30000,0.36680535554203897 -40000,0.4403230336048578 -50000,0.48467801376583786 -60000,0.4563712717581703 -70000,0.4277936966527324 -80000,0.5320489177321877 -90000,0.5056651256935785 -100000,0.5360034774433385 -110000,0.39109481337226376 -120000,0.5392535375280962 -130000,0.40107163054219075 -140000,0.52814787816228 -150000,0.5895540252204768 -160000,0.49179383195877213 -170000,0.5146728562822745 -180000,0.4613454795712305 -190000,0.539828107522786 -200000,0.5828567947403663 -210000,0.4146348695180933 -220000,0.5048782486689363 -230000,0.5805428243355163 -240000,0.5015903260191069 -250000,0.44660689161208805 -260000,0.5934948636931511 -270000,0.5899246934481615 -280000,0.6192455529729878 -290000,0.455812815528033 -300000,0.5117584110887871 -310000,0.5846228309884041 -320000,0.566772378166892 -330000,0.501343089339191 -340000,0.579754816707134 -350000,0.5740952021351239 -360000,0.635681103965511 -370000,0.5069430672719037 -380000,0.5564975802798274 -390000,0.5752988354132484 -400000,0.5515610572715326 -410000,0.6182593092099271 -420000,0.5259113639456349 -430000,0.5135850838274602 -440000,0.5453882269095224 -450000,0.6414482596210607 -460000,0.7119397784581569 -470000,0.5411422606397329 -480000,0.539695311727481 -490000,0.584834159507334 -500000,0.594567238065195 -510000,0.6474307611813428 -520000,0.6080524574200216 -530000,0.6368216660371718 -540000,0.5779934933794636 -550000,0.5685128337922447 -560000,0.6243442561274405 -570000,0.6375020260762924 -580000,0.47985103118404454 -590000,0.6030433299322709 -600000,0.6374989097093058 -610000,0.6881399650074169 -620000,0.6005196128387135 -630000,0.6509650668290492 -640000,0.6407639840608436 -650000,0.6422592547078292 -660000,0.6319115217892948 -670000,0.6778256974003433 -680000,0.622528860921302 -690000,0.6989362061463171 -700000,0.6443349387790847 -710000,0.6853018576496424 -720000,0.5963117424281215 -730000,0.642829747200841 -740000,0.6187798221640809 -750000,0.5800556325425825 -760000,0.5834189981738553 -770000,0.5464142907339775 -780000,0.6378123070969529 -790000,0.5795222574679461 -800000,0.6201566125532281 -810000,0.6881558004280555 -820000,0.7372780206460956 -830000,0.6966575079970758 -840000,0.6830885671963145 -850000,0.4160121418977988 -860000,0.6144784651589876 -870000,0.5417983382954235 -880000,0.5441000965037658 -890000,0.6271973321710527 -900000,0.5775688291662175 -910000,0.6008677488226954 -920000,0.6933813616840389 -930000,0.6984762710685645 -940000,0.6432910939255946 -950000,0.5765021340594304 -960000,0.6765477046100643 -970000,0.5640620429274027 -980000,0.6919303156905341 -990000,0.6690053707575141 -1000000,0.6240586976440368 +10000,0.21561504552115016 +20000,0.2836027019259546 +30000,0.24439329064512724 +40000,0.39304116163774533 +50000,0.4625412888152912 +60000,0.4695292144990118 +70000,0.4130121131796982 +80000,0.5092998144669523 +90000,0.531919991007482 +100000,0.5553489398707032 +110000,0.3739618599571542 +120000,0.5487675634473559 +130000,0.43709218884924184 +140000,0.5635356193306656 +150000,0.5693122612035093 +160000,0.5180485889223823 +170000,0.542271197304713 +180000,0.47481397006238807 +190000,0.5729526579732219 +200000,0.5600677553566171 +210000,0.41255567247117975 +220000,0.5257610504222352 +230000,0.5686822006451183 +240000,0.49221718493290556 +250000,0.46485902070609597 +260000,0.5855669770237953 +270000,0.6106692982716718 +280000,0.6336592247763649 +290000,0.5213279922219353 +300000,0.49162712900461847 +310000,0.5706457030023208 +320000,0.5822833430097011 +330000,0.49558744748276345 +340000,0.6043800222836466 +350000,0.604236086246485 +360000,0.6317114643587588 +370000,0.5357224402809662 +380000,0.5708030386093722 +390000,0.5669355234413087 +400000,0.5404297372187257 +410000,0.6394961918809053 +420000,0.5455523558023756 +430000,0.5253036738761778 +440000,0.5476185454977989 +450000,0.6597937911144831 +460000,0.7110705360419987 +470000,0.5140870928345519 +480000,0.516633316827491 +490000,0.5621610935405941 +500000,0.5895132076483625 +510000,0.6288622208804766 +520000,0.5651835046973164 +530000,0.6041723607126257 +540000,0.5548582746256943 +550000,0.540158330824376 +560000,0.6025399985479096 +570000,0.5839449840964377 +580000,0.45904455745108236 +590000,0.5812277146763973 +600000,0.6204471352348205 +610000,0.6490618823629462 +620000,0.5715283518365544 +630000,0.6429531478705299 +640000,0.6212538949273783 +650000,0.628747698256595 +660000,0.635475847353028 +670000,0.6595676027865818 +680000,0.6275500151620613 +690000,0.718181316091796 +700000,0.6238363996562185 +710000,0.67836409387196 +720000,0.6092268520878454 +730000,0.6461429152979368 +740000,0.6088941008047698 +750000,0.5725904109875471 +760000,0.5776873449715632 +770000,0.5383477492139895 +780000,0.6337447280239319 +790000,0.5838977354822874 +800000,0.621624981821588 +810000,0.6710893696000667 +820000,0.7258127494390383 +830000,0.6861476073524877 +840000,0.6858086581088016 +850000,0.406294413129591 +860000,0.6188718647251383 +870000,0.5370913938510634 +880000,0.5375908620949639 +890000,0.6058929112605221 +900000,0.5778952902787597 +910000,0.6005368622544373 +920000,0.6621416510255069 +930000,0.6711402638036197 +940000,0.6183091729917078 +950000,0.5581581659650717 +960000,0.6688628013415585 +970000,0.5726913264001988 +980000,0.6987265340525803 +990000,0.6908537973580426 +1000000,0.6229624140171618 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/mse.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/mse.log index fcedb137e..771a51653 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/mse.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/logs/stat_eval/mse.log @@ -1,101 +1,101 @@ step,stat_eval/mse -10000,379.3954158305747 -20000,299.5147015968893 -30000,357.5082174564933 -40000,304.7986142768026 -50000,288.33718849126 -60000,332.5154400672318 -70000,360.9225455188356 -80000,211.07216359816078 -90000,246.13931650314322 -100000,290.9281524120805 -110000,340.36564784877527 -120000,285.4345707487544 -130000,170.399174781908 -140000,283.13075590501705 -150000,231.84179725724658 -160000,316.9058969181189 -170000,236.434652224614 -180000,223.0423265596322 -190000,281.1362233036059 -200000,276.8161033336543 -210000,209.2725748354419 -220000,256.40393161016783 -230000,276.3030201569104 -240000,271.4575843000281 -250000,257.0671731287914 -260000,256.8503924183255 -270000,270.19238778549413 -280000,248.2641340516838 -290000,188.10511779620336 -300000,293.96082965663464 -310000,307.9504286083207 -320000,321.3894153732816 -330000,202.30564661153068 -340000,300.14885941237713 -350000,346.1801339994671 -360000,237.476166351855 -370000,175.02466378776143 -380000,209.68986623727915 -390000,316.96263773471327 -400000,232.0637555484528 -410000,248.2948597610394 -420000,148.87021301926066 -430000,186.5139562888106 -440000,281.2953424805638 -450000,207.6490897860479 -460000,126.32689909411314 -470000,239.46073068909783 -480000,173.09129324972355 -490000,378.0344569069938 -500000,265.74637913188303 -510000,134.24008510019183 -520000,207.95097618715917 -530000,281.8613854965229 -540000,248.2540699721591 -550000,264.426618002476 -560000,103.95723345595954 -570000,272.8681449697424 -580000,294.40375261665423 -590000,251.13736256140834 -600000,274.8750247071642 -610000,234.51965330119884 -620000,208.41131934896507 -630000,230.9395437606363 -640000,235.40244102388402 -650000,305.34416973244856 -660000,260.28896807791176 -670000,232.8474144305927 -680000,301.01682208263173 -690000,158.90450589169276 -700000,225.90227787475274 -710000,237.073627289368 -720000,198.78695377409008 -730000,158.43211303977156 -740000,251.45337549817464 -750000,225.88057503568325 -760000,239.00946736923734 -770000,134.71319286390838 -780000,198.1073377271849 -790000,200.9453422114432 -800000,293.5750078947307 -810000,188.24784929272573 -820000,135.52785645583901 -830000,212.4282972187103 -840000,205.7328375234994 -850000,106.02722387686008 -860000,346.01862455593744 -870000,210.182662222948 -880000,193.82697153078283 -890000,199.38932626627874 -900000,233.16473936758393 -910000,228.43936630176603 -920000,101.39281013819627 -930000,164.17341449678284 -940000,247.18276332438558 -950000,268.91606189570166 -960000,200.18781822426538 -970000,252.00954117790812 -980000,170.42831052455716 -990000,104.21345929535872 -1000000,343.94831952817793 +10000,534.5875686419292 +20000,326.57161068150737 +30000,463.565254086163 +40000,327.12659528516804 +50000,292.8636554454421 +60000,336.30178226609365 +70000,366.96404807696086 +80000,219.56648282867687 +90000,233.46904586972224 +100000,273.94270324565287 +110000,358.20510587572414 +120000,274.7719356705781 +130000,151.2739953920723 +140000,265.53333638391257 +150000,234.19328027195306 +160000,300.44899397880664 +170000,218.72705792077127 +180000,217.28692104763257 +190000,266.28807865467127 +200000,276.8656857287995 +210000,210.39920325487182 +220000,244.1944117066167 +230000,270.5985355654903 +240000,262.17755974464546 +250000,243.06571069337306 +260000,253.85347029054128 +270000,252.0905161798793 +280000,230.2180606780878 +290000,155.7071561316254 +300000,292.8515350443639 +310000,301.19137149747866 +320000,306.8620343689463 +330000,199.71967980413407 +340000,285.0181062298626 +350000,327.5913965150983 +360000,235.04296139581106 +370000,164.7554275521165 +380000,199.81416664217377 +390000,318.5463144853798 +400000,231.74647839202083 +410000,234.06691976942548 +420000,136.8166306645749 +430000,177.64415381508385 +440000,277.39906001453136 +450000,199.95881372165167 +460000,123.05155258220191 +470000,248.0138501847147 +480000,179.41190563417862 +490000,373.64769555516665 +500000,256.6339130652795 +510000,136.77603689863653 +520000,222.0904972601491 +530000,289.87419502267795 +540000,255.21391628655903 +550000,274.1499835948086 +560000,108.68918131453161 +570000,289.5288537582038 +580000,298.58129971083065 +590000,253.19695954866975 +600000,279.37029753233213 +610000,244.68614541002435 +620000,216.26726873301862 +630000,224.9198009047919 +640000,242.20736447742107 +650000,300.29601130278627 +660000,254.67682830357134 +670000,236.68352584699406 +680000,291.1577934458993 +690000,152.19238745053448 +700000,233.94456838057422 +710000,238.290736260523 +720000,193.21300790771556 +730000,157.48276567897764 +740000,251.99600340838637 +750000,228.98506351753036 +760000,237.15004166244051 +770000,141.88609633294445 +780000,197.1206446761577 +790000,198.22053840244484 +800000,294.6231128993558 +810000,195.76561957943 +820000,140.18324658097964 +830000,215.33958690867948 +840000,199.8050075755192 +850000,107.24487664156591 +860000,339.4726509800618 +870000,209.26374867593137 +880000,200.3155875942596 +890000,206.51916178549988 +900000,229.04528269079097 +910000,221.33521246074764 +920000,110.16322518361339 +930000,173.98665741413345 +940000,255.06223829074347 +950000,272.91722689302776 +960000,206.14369237998 +970000,247.04537673853807 +980000,173.94619447963115 +990000,98.06494680799544 +1000000,337.87416171067287 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/model_best.pt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/model_best.pt index 8c60b1802..a60cf19fd 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/model_best.pt and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/model_best.pt differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/model_latest.pt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/model_latest.pt index 675bd1523..755b7b5e0 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/model_latest.pt and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/model_latest.pt differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-loss-approx_kl.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-loss-approx_kl.jpg index bd5be2db7..4534cbcc1 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-loss-approx_kl.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-loss-approx_kl.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-loss-entropy_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-loss-entropy_loss.jpg index c6d35b147..38484b1e4 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-loss-entropy_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-loss-entropy_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-loss-policy_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-loss-policy_loss.jpg index 825884da1..b0e2ea0fb 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-loss-policy_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-loss-policy_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-loss-value_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-loss-value_loss.jpg index 56ad847f5..7a27c2cad 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-loss-value_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-loss-value_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-constraint_violation.jpg index 614c1baed..36a006b8c 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-ep_constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-ep_constraint_violation.jpg index 1b55c7359..485eb922c 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-ep_constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-ep_constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-ep_length.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-ep_length.jpg index 90a928d36..90032bb78 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-ep_length.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-ep_length.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-ep_return.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-ep_return.jpg index ffd7546a7..afa425852 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-ep_return.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-ep_return.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-ep_reward.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-ep_reward.jpg index 1dca5ed21..6985f3a08 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-ep_reward.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat-ep_reward.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-constraint_violation.jpg index f9d9fb21b..e237018fc 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-ep_length.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-ep_length.jpg index 6a82901d7..937b42456 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-ep_length.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-ep_length.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-ep_return.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-ep_return.jpg index d006e28d0..dc35dcc22 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-ep_return.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-ep_return.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-ep_reward.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-ep_reward.jpg index fc91c9797..b52038822 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-ep_reward.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-ep_reward.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-mse.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-mse.jpg index 04b2b1521..a6542bf00 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-mse.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/plots/-stat_eval-mse.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/std_out.txt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/std_out.txt index 728e56bd4..da2324361 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/std_out.txt +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf/std_out.txt @@ -1,2601 +1,2601 @@ -2023-10-19 14:51:55,671 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 77.261 +/- 37.320 -2023-10-19 14:51:55,695 : +2023-10-27 16:42:35,846 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 52.580 +/- 32.977 +2023-10-27 16:42:35,866 : -------------------------------------- | loss/ | | -| approx_kl | 0.0193 | -| entropy_loss | -3.78 | -| policy_loss | -0.0193 | -| value_loss | 91.6 | +| approx_kl | 0.0182 | +| entropy_loss | -3.75 | +| policy_loss | -0.0206 | +| value_loss | 45.4 | | stat/ | | -| constraint_violation | 34 | -| ep_constraint_vio... | 1.1 | +| constraint_violation | 35 | +| ep_constraint_vio... | 0.9 | | ep_length | 226 | -| ep_return | 78.2 | -| ep_reward | 0.314 | +| ep_return | 55.1 | +| ep_reward | 0.222 | | stat_eval/ | | -| constraint_violation | 0.2 | +| constraint_violation | 0.3 | | ep_length | 226 | -| ep_return | 77.3 | -| ep_reward | 0.314 | -| mse | 379 | +| ep_return | 52.6 | +| ep_reward | 0.216 | +| mse | 535 | | time/ | | | progress | 0.01 | | step | 1e+04 | -| step_time | 11 | +| step_time | 8.13 | -------------------------------------- -2023-10-19 14:54:06,460 : Eval | ep_lengths 201.90 +/- 96.23 | ep_return 82.088 +/- 47.970 -2023-10-19 14:54:06,489 : +2023-10-27 16:44:12,356 : Eval | ep_lengths 201.90 +/- 96.23 | ep_return 70.720 +/- 41.501 +2023-10-27 16:44:12,390 : -------------------------------------- | loss/ | | -| approx_kl | 0.0244 | +| approx_kl | 0.0213 | | entropy_loss | -3.75 | -| policy_loss | -0.013 | -| value_loss | 35.8 | +| policy_loss | -0.0193 | +| value_loss | 11.2 | | stat/ | | -| constraint_violation | 43 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 75.8 | -| ep_reward | 0.303 | +| constraint_violation | 46 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 57.4 | +| ep_reward | 0.23 | | stat_eval/ | | -| constraint_violation | 0.3 | +| constraint_violation | 0.4 | | ep_length | 202 | -| ep_return | 82.1 | -| ep_reward | 0.329 | -| mse | 300 | +| ep_return | 70.7 | +| ep_reward | 0.284 | +| mse | 327 | | time/ | | | progress | 0.02 | | step | 2e+04 | -| step_time | 11 | +| step_time | 8.21 | -------------------------------------- -2023-10-19 14:56:20,802 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 91.669 +/- 38.435 -2023-10-19 14:56:20,812 : +2023-10-27 16:45:50,280 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 61.066 +/- 34.246 +2023-10-27 16:45:50,281 : -------------------------------------- | loss/ | | -| approx_kl | 0.0145 | -| entropy_loss | -3.83 | -| policy_loss | -0.0101 | -| value_loss | 8.32 | +| approx_kl | 0.0237 | +| entropy_loss | -3.9 | +| policy_loss | -0.0116 | +| value_loss | 4.6 | | stat/ | | -| constraint_violation | 128 | +| constraint_violation | 120 | | ep_constraint_vio... | 1.1 | | ep_length | 250 | -| ep_return | 109 | -| ep_reward | 0.434 | +| ep_return | 58.2 | +| ep_reward | 0.233 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 91.7 | -| ep_reward | 0.367 | -| mse | 358 | +| ep_return | 61.1 | +| ep_reward | 0.244 | +| mse | 464 | | time/ | | | progress | 0.03 | | step | 3e+04 | -| step_time | 11.1 | +| step_time | 8.18 | -------------------------------------- -2023-10-19 14:58:36,945 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 110.081 +/- 20.423 -2023-10-19 14:58:36,955 : +2023-10-27 16:47:30,305 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 98.260 +/- 31.144 +2023-10-27 16:47:30,341 : -------------------------------------- | loss/ | | -| approx_kl | 0.0154 | -| entropy_loss | -3.93 | -| policy_loss | -0.0166 | -| value_loss | 4.24 | +| approx_kl | 0.0207 | +| entropy_loss | -3.96 | +| policy_loss | -0.0213 | +| value_loss | 7.93 | | stat/ | | -| constraint_violation | 141 | +| constraint_violation | 129 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 82.6 | -| ep_reward | 0.347 | +| ep_return | 82.7 | +| ep_reward | 0.348 | | stat_eval/ | | -| constraint_violation | 1.1 | +| constraint_violation | 0 | | ep_length | 250 | -| ep_return | 110 | -| ep_reward | 0.44 | -| mse | 305 | +| ep_return | 98.3 | +| ep_reward | 0.393 | +| mse | 327 | | time/ | | | progress | 0.04 | | step | 4e+04 | -| step_time | 11.1 | +| step_time | 8.04 | -------------------------------------- -2023-10-19 15:00:53,467 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 121.170 +/- 14.786 -2023-10-19 15:00:53,477 : +2023-10-27 16:49:11,235 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 115.635 +/- 13.291 +2023-10-27 16:49:11,262 : -------------------------------------- | loss/ | | -| approx_kl | 0.0197 | -| entropy_loss | -4.04 | -| policy_loss | -0.0109 | -| value_loss | 3.63 | +| approx_kl | 0.0253 | +| entropy_loss | -3.95 | +| policy_loss | 0.00695 | +| value_loss | 4.45 | | stat/ | | -| constraint_violation | 182 | -| ep_constraint_vio... | 0.9 | +| constraint_violation | 174 | +| ep_constraint_vio... | 1.2 | | ep_length | 226 | -| ep_return | 96.4 | -| ep_reward | 0.387 | +| ep_return | 93.6 | +| ep_reward | 0.376 | | stat_eval/ | | | constraint_violation | 0.8 | | ep_length | 250 | -| ep_return | 121 | -| ep_reward | 0.485 | -| mse | 288 | +| ep_return | 116 | +| ep_reward | 0.463 | +| mse | 293 | | time/ | | | progress | 0.05 | | step | 5e+04 | -| step_time | 11 | +| step_time | 8.25 | -------------------------------------- -2023-10-19 15:03:11,326 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 114.093 +/- 27.000 -2023-10-19 15:03:11,328 : +2023-10-27 16:50:52,949 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 117.382 +/- 28.692 +2023-10-27 16:50:52,958 : -------------------------------------- | loss/ | | -| approx_kl | 0.0118 | -| entropy_loss | -4.09 | -| policy_loss | -0.0111 | -| value_loss | 2.04 | +| approx_kl | 0.0183 | +| entropy_loss | -3.99 | +| policy_loss | -0.00579 | +| value_loss | 4.78 | | stat/ | | -| constraint_violation | 214 | -| ep_constraint_vio... | 1.2 | +| constraint_violation | 208 | +| ep_constraint_vio... | 2.4 | | ep_length | 227 | -| ep_return | 101 | -| ep_reward | 0.409 | +| ep_return | 99 | +| ep_reward | 0.4 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 114 | -| ep_reward | 0.456 | -| mse | 333 | +| ep_return | 117 | +| ep_reward | 0.47 | +| mse | 336 | | time/ | | | progress | 0.06 | | step | 6e+04 | -| step_time | 11.4 | +| step_time | 8.43 | -------------------------------------- -2023-10-19 15:05:27,739 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 106.825 +/- 41.296 -2023-10-19 15:05:27,740 : +2023-10-27 16:52:33,458 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 103.129 +/- 40.769 +2023-10-27 16:52:33,459 : -------------------------------------- | loss/ | | -| approx_kl | 0.0159 | -| entropy_loss | -4.09 | -| policy_loss | -0.0156 | -| value_loss | 2.87 | +| approx_kl | 0.025 | +| entropy_loss | -3.94 | +| policy_loss | -0.0154 | +| value_loss | 1.62 | | stat/ | | -| constraint_violation | 245 | +| constraint_violation | 228 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 132 | -| ep_reward | 0.526 | +| ep_return | 141 | +| ep_reward | 0.562 | | stat_eval/ | | -| constraint_violation | 1.3 | +| constraint_violation | 1.7 | | ep_length | 225 | -| ep_return | 107 | -| ep_reward | 0.428 | -| mse | 361 | +| ep_return | 103 | +| ep_reward | 0.413 | +| mse | 367 | | time/ | | | progress | 0.07 | | step | 7e+04 | -| step_time | 11.3 | +| step_time | 8.19 | -------------------------------------- -2023-10-19 15:07:46,230 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 133.012 +/- 17.202 -2023-10-19 15:07:46,241 : +2023-10-27 16:54:15,316 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 127.325 +/- 24.331 +2023-10-27 16:54:15,352 : -------------------------------------- | loss/ | | -| approx_kl | 0.0124 | -| entropy_loss | -4.16 | -| policy_loss | -0.0191 | -| value_loss | 3.42 | +| approx_kl | 0.0187 | +| entropy_loss | -4.07 | +| policy_loss | -0.0127 | +| value_loss | 1.27 | | stat/ | | -| constraint_violation | 250 | +| constraint_violation | 232 | | ep_constraint_vio... | 0.2 | | ep_length | 226 | -| ep_return | 107 | -| ep_reward | 0.426 | +| ep_return | 103 | +| ep_reward | 0.412 | | stat_eval/ | | -| constraint_violation | 1.1 | +| constraint_violation | 1 | | ep_length | 250 | -| ep_return | 133 | -| ep_reward | 0.532 | -| mse | 211 | +| ep_return | 127 | +| ep_reward | 0.509 | +| mse | 220 | | time/ | | | progress | 0.08 | | step | 8e+04 | -| step_time | 11.3 | +| step_time | 8.24 | -------------------------------------- -2023-10-19 15:10:05,412 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 126.416 +/- 14.104 -2023-10-19 15:10:05,414 : +2023-10-27 16:55:57,213 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 132.980 +/- 17.472 +2023-10-27 16:55:57,244 : -------------------------------------- | loss/ | | -| approx_kl | 0.0153 | -| entropy_loss | -4.11 | -| policy_loss | -0.0137 | -| value_loss | 1.78 | +| approx_kl | 0.0196 | +| entropy_loss | -4.1 | +| policy_loss | -0.0127 | +| value_loss | 2.38 | | stat/ | | -| constraint_violation | 266 | -| ep_constraint_vio... | 0.2 | -| ep_length | 226 | -| ep_return | 107 | -| ep_reward | 0.427 | +| constraint_violation | 247 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 116 | +| ep_reward | 0.463 | | stat_eval/ | | | constraint_violation | 1 | | ep_length | 250 | -| ep_return | 126 | -| ep_reward | 0.506 | -| mse | 246 | +| ep_return | 133 | +| ep_reward | 0.532 | +| mse | 233 | | time/ | | | progress | 0.09 | | step | 9e+04 | -| step_time | 11.4 | +| step_time | 8.18 | -------------------------------------- -2023-10-19 15:12:23,899 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 134.001 +/- 25.562 -2023-10-19 15:12:23,908 : +2023-10-27 16:57:39,063 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 138.837 +/- 27.194 +2023-10-27 16:57:39,113 : -------------------------------------- | loss/ | | -| approx_kl | 0.0222 | -| entropy_loss | -4.11 | -| policy_loss | -0.0111 | -| value_loss | 2.89 | +| approx_kl | 0.0134 | +| entropy_loss | -4.15 | +| policy_loss | -0.0105 | +| value_loss | 1.22 | | stat/ | | -| constraint_violation | 293 | +| constraint_violation | 276 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 124 | -| ep_reward | 0.498 | +| ep_return | 130 | +| ep_reward | 0.521 | | stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 0.1 | | ep_length | 250 | -| ep_return | 134 | -| ep_reward | 0.536 | -| mse | 291 | +| ep_return | 139 | +| ep_reward | 0.555 | +| mse | 274 | | time/ | | | progress | 0.1 | | step | 1e+05 | -| step_time | 11.1 | +| step_time | 8.28 | -------------------------------------- -2023-10-19 15:14:36,783 : Eval | ep_lengths 201.30 +/- 97.42 | ep_return 97.471 +/- 49.303 -2023-10-19 15:14:36,784 : +2023-10-27 16:59:18,446 : Eval | ep_lengths 201.30 +/- 97.42 | ep_return 93.185 +/- 48.012 +2023-10-27 16:59:18,447 : -------------------------------------- | loss/ | | -| approx_kl | 0.0123 | -| entropy_loss | -4.17 | -| policy_loss | -0.0117 | -| value_loss | 1.41 | +| approx_kl | 0.0315 | +| entropy_loss | -4.19 | +| policy_loss | -0.00603 | +| value_loss | 1.49 | | stat/ | | -| constraint_violation | 318 | +| constraint_violation | 307 | | ep_constraint_vio... | 0.8 | | ep_length | 226 | -| ep_return | 121 | -| ep_reward | 0.486 | +| ep_return | 123 | +| ep_reward | 0.494 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 97.5 | -| ep_reward | 0.391 | -| mse | 340 | +| ep_return | 93.2 | +| ep_reward | 0.374 | +| mse | 358 | | time/ | | | progress | 0.11 | | step | 1.1e+05 | -| step_time | 10.9 | +| step_time | 8.23 | -------------------------------------- -2023-10-19 15:16:52,080 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 134.813 +/- 24.581 -2023-10-19 15:16:52,109 : +2023-10-27 17:00:59,831 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 137.192 +/- 33.612 +2023-10-27 17:00:59,832 : -------------------------------------- | loss/ | | -| approx_kl | 0.0183 | -| entropy_loss | -4.08 | -| policy_loss | -0.0214 | -| value_loss | 1.83 | +| approx_kl | 0.0318 | +| entropy_loss | -4.21 | +| policy_loss | -0.00429 | +| value_loss | 1.8 | | stat/ | | -| constraint_violation | 339 | -| ep_constraint_vio... | 0.2 | -| ep_length | 226 | -| ep_return | 120 | -| ep_reward | 0.48 | +| constraint_violation | 317 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 140 | +| ep_reward | 0.561 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 250 | -| ep_return | 135 | -| ep_reward | 0.539 | -| mse | 285 | +| ep_return | 137 | +| ep_reward | 0.549 | +| mse | 275 | | time/ | | | progress | 0.12 | | step | 1.2e+05 | -| step_time | 10.9 | +| step_time | 8.04 | -------------------------------------- -2023-10-19 15:18:57,498 : Eval | ep_lengths 177.50 +/- 110.77 | ep_return 98.791 +/- 66.112 -2023-10-19 15:18:57,499 : +2023-10-27 17:02:35,884 : Eval | ep_lengths 177.60 +/- 110.62 | ep_return 107.786 +/- 72.621 +2023-10-27 17:02:35,910 : -------------------------------------- | loss/ | | -| approx_kl | 0.0288 | -| entropy_loss | -4.01 | -| policy_loss | -0.00918 | -| value_loss | 0.9 | +| approx_kl | 0.0217 | +| entropy_loss | -4.23 | +| policy_loss | -0.0152 | +| value_loss | 1.99 | | stat/ | | -| constraint_violation | 360 | +| constraint_violation | 337 | | ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 88.7 | -| ep_reward | 0.358 | +| ep_length | 225 | +| ep_return | 111 | +| ep_reward | 0.443 | | stat_eval/ | | -| constraint_violation | 1.4 | +| constraint_violation | 1.1 | | ep_length | 178 | -| ep_return | 98.8 | -| ep_reward | 0.401 | -| mse | 170 | +| ep_return | 108 | +| ep_reward | 0.437 | +| mse | 151 | | time/ | | | progress | 0.13 | | step | 1.3e+05 | -| step_time | 10.6 | +| step_time | 8.28 | -------------------------------------- -2023-10-19 15:21:12,771 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 132.037 +/- 16.359 -2023-10-19 15:21:12,773 : +2023-10-27 17:04:17,543 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 140.884 +/- 29.489 +2023-10-27 17:04:17,554 : -------------------------------------- | loss/ | | -| approx_kl | 0.0153 | -| entropy_loss | -4.05 | -| policy_loss | -0.0188 | -| value_loss | 1.8 | +| approx_kl | 0.0152 | +| entropy_loss | -4.25 | +| policy_loss | -0.0172 | +| value_loss | 2.21 | | stat/ | | -| constraint_violation | 399 | -| ep_constraint_vio... | 1.4 | +| constraint_violation | 379 | +| ep_constraint_vio... | 0.1 | | ep_length | 250 | -| ep_return | 119 | -| ep_reward | 0.476 | +| ep_return | 129 | +| ep_reward | 0.517 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 132 | -| ep_reward | 0.528 | -| mse | 283 | +| ep_return | 141 | +| ep_reward | 0.564 | +| mse | 266 | | time/ | | | progress | 0.14 | | step | 1.4e+05 | -| step_time | 11 | +| step_time | 8.26 | -------------------------------------- -2023-10-19 15:23:27,977 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 147.389 +/- 16.112 -2023-10-19 15:23:27,986 : +2023-10-27 17:05:58,247 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 142.328 +/- 21.800 +2023-10-27 17:05:58,257 : -------------------------------------- | loss/ | | -| approx_kl | 0.0235 | -| entropy_loss | -4.05 | -| policy_loss | -0.0247 | -| value_loss | 0.898 | +| approx_kl | 0.0208 | +| entropy_loss | -4.23 | +| policy_loss | -0.00967 | +| value_loss | 2.41 | | stat/ | | -| constraint_violation | 411 | -| ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 98 | -| ep_reward | 0.392 | +| constraint_violation | 409 | +| ep_constraint_vio... | 1.4 | +| ep_length | 226 | +| ep_return | 109 | +| ep_reward | 0.439 | | stat_eval/ | | -| constraint_violation | 1.2 | +| constraint_violation | 1.3 | | ep_length | 250 | -| ep_return | 147 | -| ep_reward | 0.59 | -| mse | 232 | +| ep_return | 142 | +| ep_reward | 0.569 | +| mse | 234 | | time/ | | | progress | 0.15 | | step | 1.5e+05 | -| step_time | 11.1 | +| step_time | 8.23 | -------------------------------------- -2023-10-19 15:25:42,814 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 122.948 +/- 16.481 -2023-10-19 15:25:42,816 : +2023-10-27 17:07:39,199 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 129.512 +/- 20.021 +2023-10-27 17:07:39,200 : -------------------------------------- | loss/ | | -| approx_kl | 0.0274 | -| entropy_loss | -4.07 | -| policy_loss | -0.00193 | -| value_loss | 1.32 | +| approx_kl | 0.0144 | +| entropy_loss | -4.25 | +| policy_loss | -0.0138 | +| value_loss | 2.11 | | stat/ | | -| constraint_violation | 446 | +| constraint_violation | 430 | | ep_constraint_vio... | 0.2 | | ep_length | 200 | -| ep_return | 118 | -| ep_reward | 0.472 | +| ep_return | 125 | +| ep_reward | 0.502 | | stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 0.1 | | ep_length | 250 | -| ep_return | 123 | -| ep_reward | 0.492 | -| mse | 317 | +| ep_return | 130 | +| ep_reward | 0.518 | +| mse | 300 | | time/ | | | progress | 0.16 | | step | 1.6e+05 | -| step_time | 11 | +| step_time | 8.08 | -------------------------------------- -2023-10-19 15:27:51,574 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 128.632 +/- 49.673 -2023-10-19 15:27:51,576 : +2023-10-27 17:09:19,377 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 135.531 +/- 57.335 +2023-10-27 17:09:19,378 : -------------------------------------- | loss/ | | -| approx_kl | 0.0195 | -| entropy_loss | -4.12 | -| policy_loss | -0.0114 | -| value_loss | 0.921 | +| approx_kl | 0.0289 | +| entropy_loss | -4.34 | +| policy_loss | -0.00451 | +| value_loss | 3.79 | | stat/ | | -| constraint_violation | 479 | -| ep_constraint_vio... | 1.3 | -| ep_length | 200 | -| ep_return | 116 | -| ep_reward | 0.469 | +| constraint_violation | 472 | +| ep_constraint_vio... | 1.9 | +| ep_length | 202 | +| ep_return | 106 | +| ep_reward | 0.426 | | stat_eval/ | | -| constraint_violation | 0.2 | +| constraint_violation | 0.3 | | ep_length | 226 | -| ep_return | 129 | -| ep_reward | 0.515 | -| mse | 236 | +| ep_return | 136 | +| ep_reward | 0.542 | +| mse | 219 | | time/ | | | progress | 0.17 | | step | 1.7e+05 | -| step_time | 10.7 | +| step_time | 8.36 | -------------------------------------- -2023-10-19 15:29:56,984 : Eval | ep_lengths 200.90 +/- 98.20 | ep_return 115.231 +/- 60.799 -2023-10-19 15:29:57,006 : +2023-10-27 17:10:57,601 : Eval | ep_lengths 200.90 +/- 98.20 | ep_return 118.599 +/- 62.572 +2023-10-27 17:10:57,627 : -------------------------------------- | loss/ | | -| approx_kl | 0.0198 | -| entropy_loss | -4.19 | -| policy_loss | -0.0135 | -| value_loss | 1.11 | +| approx_kl | 0.0207 | +| entropy_loss | -4.3 | +| policy_loss | -0.00348 | +| value_loss | 1.43 | | stat/ | | -| constraint_violation | 497 | -| ep_constraint_vio... | 0.2 | +| constraint_violation | 494 | +| ep_constraint_vio... | 1.4 | | ep_length | 250 | -| ep_return | 136 | -| ep_reward | 0.542 | +| ep_return | 147 | +| ep_reward | 0.588 | | stat_eval/ | | -| constraint_violation | 0.3 | +| constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 115 | -| ep_reward | 0.461 | -| mse | 223 | +| ep_return | 119 | +| ep_reward | 0.475 | +| mse | 217 | | time/ | | | progress | 0.18 | | step | 1.8e+05 | -| step_time | 10.6 | +| step_time | 8.38 | -------------------------------------- -2023-10-19 15:32:06,561 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 134.957 +/- 23.330 -2023-10-19 15:32:06,563 : +2023-10-27 17:12:40,317 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 143.238 +/- 20.606 +2023-10-27 17:12:40,327 : -------------------------------------- | loss/ | | -| approx_kl | 0.0322 | -| entropy_loss | -4.22 | -| policy_loss | -0.0152 | -| value_loss | 0.597 | +| approx_kl | 0.0277 | +| entropy_loss | -4.35 | +| policy_loss | -0.013 | +| value_loss | 1.09 | | stat/ | | -| constraint_violation | 535 | -| ep_constraint_vio... | 0.2 | +| constraint_violation | 521 | +| ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 116 | -| ep_reward | 0.467 | +| ep_return | 110 | +| ep_reward | 0.446 | | stat_eval/ | | -| constraint_violation | 1.7 | +| constraint_violation | 1.6 | | ep_length | 250 | -| ep_return | 135 | -| ep_reward | 0.54 | -| mse | 281 | +| ep_return | 143 | +| ep_reward | 0.573 | +| mse | 266 | | time/ | | | progress | 0.19 | | step | 1.9e+05 | -| step_time | 10.6 | +| step_time | 8.35 | -------------------------------------- -2023-10-19 15:34:12,834 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 145.714 +/- 22.826 -2023-10-19 15:34:12,835 : +2023-10-27 17:14:22,231 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 140.017 +/- 31.941 +2023-10-27 17:14:22,232 : -------------------------------------- | loss/ | | -| approx_kl | 0.0348 | -| entropy_loss | -4.28 | -| policy_loss | -0.0138 | -| value_loss | 1.03 | +| approx_kl | 0.0147 | +| entropy_loss | -4.46 | +| policy_loss | -0.0116 | +| value_loss | 1.05 | | stat/ | | -| constraint_violation | 567 | -| ep_constraint_vio... | 1.5 | +| constraint_violation | 553 | +| ep_constraint_vio... | 1.4 | | ep_length | 250 | -| ep_return | 133 | -| ep_reward | 0.53 | +| ep_return | 138 | +| ep_reward | 0.552 | | stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 0.1 | | ep_length | 250 | -| ep_return | 146 | -| ep_reward | 0.583 | +| ep_return | 140 | +| ep_reward | 0.56 | | mse | 277 | | time/ | | | progress | 0.2 | | step | 2e+05 | -| step_time | 10.2 | +| step_time | 8.18 | -------------------------------------- -2023-10-19 15:36:09,914 : Eval | ep_lengths 176.60 +/- 112.16 | ep_return 102.649 +/- 70.564 -2023-10-19 15:36:09,915 : +2023-10-27 17:15:59,361 : Eval | ep_lengths 176.60 +/- 112.16 | ep_return 102.127 +/- 71.072 +2023-10-27 17:15:59,388 : -------------------------------------- | loss/ | | -| approx_kl | 0.0306 | -| entropy_loss | -4.28 | -| policy_loss | -0.0073 | -| value_loss | 1.03 | +| approx_kl | 0.0286 | +| entropy_loss | -4.52 | +| policy_loss | -0.0147 | +| value_loss | 1.96 | | stat/ | | -| constraint_violation | 589 | -| ep_constraint_vio... | 1.7 | -| ep_length | 202 | -| ep_return | 111 | -| ep_reward | 0.445 | +| constraint_violation | 574 | +| ep_constraint_vio... | 0.1 | +| ep_length | 250 | +| ep_return | 145 | +| ep_reward | 0.582 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 177 | -| ep_return | 103 | -| ep_reward | 0.415 | -| mse | 209 | +| ep_return | 102 | +| ep_reward | 0.413 | +| mse | 210 | | time/ | | | progress | 0.21 | | step | 2.1e+05 | -| step_time | 9.91 | +| step_time | 8.33 | -------------------------------------- -2023-10-19 15:38:12,152 : Eval | ep_lengths 226.30 +/- 71.10 | ep_return 126.108 +/- 46.573 -2023-10-19 15:38:12,172 : +2023-10-27 17:17:41,350 : Eval | ep_lengths 226.30 +/- 71.10 | ep_return 131.329 +/- 47.560 +2023-10-27 17:17:41,351 : -------------------------------------- | loss/ | | -| approx_kl | 0.0387 | -| entropy_loss | -4.24 | -| policy_loss | -0.0116 | -| value_loss | 1.14 | +| approx_kl | 0.0302 | +| entropy_loss | -4.61 | +| policy_loss | -0.0117 | +| value_loss | 1.12 | | stat/ | | -| constraint_violation | 614 | -| ep_constraint_vio... | 0.3 | +| constraint_violation | 601 | +| ep_constraint_vio... | 0.4 | | ep_length | 250 | -| ep_return | 143 | -| ep_reward | 0.574 | +| ep_return | 141 | +| ep_reward | 0.563 | | stat_eval/ | | -| constraint_violation | 0.2 | +| constraint_violation | 0.5 | | ep_length | 226 | -| ep_return | 126 | -| ep_reward | 0.505 | -| mse | 256 | +| ep_return | 131 | +| ep_reward | 0.526 | +| mse | 244 | | time/ | | | progress | 0.22 | | step | 2.2e+05 | -| step_time | 10.1 | +| step_time | 8.31 | -------------------------------------- -2023-10-19 15:40:16,336 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 145.136 +/- 19.832 -2023-10-19 15:40:16,338 : +2023-10-27 17:19:24,233 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 142.171 +/- 25.858 +2023-10-27 17:19:24,234 : -------------------------------------- | loss/ | | -| approx_kl | 0.021 | -| entropy_loss | -4.22 | -| policy_loss | -0.00753 | -| value_loss | 1.7 | +| approx_kl | 0.0265 | +| entropy_loss | -4.59 | +| policy_loss | 0.00233 | +| value_loss | 2.07 | | stat/ | | -| constraint_violation | 634 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 133 | -| ep_reward | 0.532 | +| constraint_violation | 622 | +| ep_constraint_vio... | 0.1 | +| ep_length | 227 | +| ep_return | 126 | +| ep_reward | 0.504 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 145 | -| ep_reward | 0.581 | -| mse | 276 | +| ep_return | 142 | +| ep_reward | 0.569 | +| mse | 271 | | time/ | | | progress | 0.23 | | step | 2.3e+05 | -| step_time | 9.88 | +| step_time | 8.37 | -------------------------------------- -2023-10-19 15:42:18,144 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 125.243 +/- 50.089 -2023-10-19 15:42:18,145 : +2023-10-27 17:21:05,033 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 122.901 +/- 51.611 +2023-10-27 17:21:05,034 : -------------------------------------- | loss/ | | -| approx_kl | 0.0235 | -| entropy_loss | -4.27 | -| policy_loss | -0.0181 | -| value_loss | 0.52 | +| approx_kl | 0.0408 | +| entropy_loss | -4.64 | +| policy_loss | -0.00476 | +| value_loss | 0.756 | | stat/ | | -| constraint_violation | 659 | +| constraint_violation | 644 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 142 | -| ep_reward | 0.568 | +| ep_return | 145 | +| ep_reward | 0.581 | | stat_eval/ | | | constraint_violation | 0.9 | | ep_length | 225 | -| ep_return | 125 | -| ep_reward | 0.502 | -| mse | 271 | +| ep_return | 123 | +| ep_reward | 0.492 | +| mse | 262 | | time/ | | | progress | 0.24 | | step | 2.4e+05 | -| step_time | 10.3 | +| step_time | 8.38 | -------------------------------------- -2023-10-19 15:44:20,839 : Eval | ep_lengths 201.90 +/- 96.26 | ep_return 111.535 +/- 59.146 -2023-10-19 15:44:20,841 : +2023-10-27 17:22:44,925 : Eval | ep_lengths 202.00 +/- 96.07 | ep_return 116.104 +/- 63.694 +2023-10-27 17:22:44,926 : -------------------------------------- | loss/ | | -| approx_kl | 0.0213 | -| entropy_loss | -4.31 | -| policy_loss | -0.0144 | -| value_loss | 0.565 | +| approx_kl | 0.0176 | +| entropy_loss | -4.59 | +| policy_loss | -0.00718 | +| value_loss | 0.714 | | stat/ | | -| constraint_violation | 679 | +| constraint_violation | 662 | | ep_constraint_vio... | 0.1 | | ep_length | 250 | -| ep_return | 138 | -| ep_reward | 0.554 | +| ep_return | 155 | +| ep_reward | 0.62 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 202 | -| ep_return | 112 | -| ep_reward | 0.447 | -| mse | 257 | +| ep_return | 116 | +| ep_reward | 0.465 | +| mse | 243 | | time/ | | | progress | 0.25 | | step | 2.5e+05 | -| step_time | 10.5 | +| step_time | 8.29 | -------------------------------------- -2023-10-19 15:46:36,722 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 148.374 +/- 22.111 -2023-10-19 15:46:36,732 : +2023-10-27 17:24:28,814 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.392 +/- 24.925 +2023-10-27 17:24:28,824 : -------------------------------------- | loss/ | | -| approx_kl | 0.016 | -| entropy_loss | -4.36 | -| policy_loss | -0.0127 | -| value_loss | 0.691 | +| approx_kl | 0.0328 | +| entropy_loss | -4.61 | +| policy_loss | -0.0206 | +| value_loss | 0.846 | | stat/ | | -| constraint_violation | 703 | +| constraint_violation | 687 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 158 | -| ep_reward | 0.631 | +| ep_return | 151 | +| ep_reward | 0.606 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 148 | -| ep_reward | 0.593 | -| mse | 257 | +| ep_return | 146 | +| ep_reward | 0.586 | +| mse | 254 | | time/ | | | progress | 0.26 | | step | 2.6e+05 | -| step_time | 10.9 | +| step_time | 8.42 | -------------------------------------- -2023-10-19 15:48:51,128 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 147.481 +/- 27.129 -2023-10-19 15:48:51,129 : +2023-10-27 17:26:12,569 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.667 +/- 25.246 +2023-10-27 17:26:12,577 : -------------------------------------- | loss/ | | -| approx_kl | 0.0235 | -| entropy_loss | -4.36 | -| policy_loss | -0.01 | -| value_loss | 0.78 | +| approx_kl | 0.0295 | +| entropy_loss | -4.61 | +| policy_loss | -0.00896 | +| value_loss | 0.638 | | stat/ | | -| constraint_violation | 741 | +| constraint_violation | 725 | | ep_constraint_vio... | 0.6 | | ep_length | 250 | -| ep_return | 146 | -| ep_reward | 0.586 | +| ep_return | 136 | +| ep_reward | 0.545 | | stat_eval/ | | -| constraint_violation | 3 | +| constraint_violation | 2.3 | | ep_length | 250 | -| ep_return | 147 | -| ep_reward | 0.59 | -| mse | 270 | +| ep_return | 153 | +| ep_reward | 0.611 | +| mse | 252 | | time/ | | | progress | 0.27 | | step | 2.7e+05 | -| step_time | 11 | +| step_time | 8.36 | -------------------------------------- -2023-10-19 15:51:04,299 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.811 +/- 20.410 -2023-10-19 15:51:04,308 : +2023-10-27 17:27:56,268 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 158.415 +/- 26.178 +2023-10-27 17:27:56,276 : -------------------------------------- | loss/ | | -| approx_kl | 0.0212 | -| entropy_loss | -4.31 | -| policy_loss | -0.0203 | -| value_loss | 1.18 | +| approx_kl | 0.0155 | +| entropy_loss | -4.71 | +| policy_loss | -0.00867 | +| value_loss | 1.01 | | stat/ | | -| constraint_violation | 768 | -| ep_constraint_vio... | 0 | +| constraint_violation | 758 | +| ep_constraint_vio... | 0.6 | | ep_length | 250 | -| ep_return | 129 | -| ep_reward | 0.516 | +| ep_return | 145 | +| ep_reward | 0.579 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 250 | -| ep_return | 155 | -| ep_reward | 0.619 | -| mse | 248 | +| ep_return | 158 | +| ep_reward | 0.634 | +| mse | 230 | | time/ | | | progress | 0.28 | | step | 2.8e+05 | -| step_time | 10.8 | +| step_time | 8.41 | -------------------------------------- -2023-10-19 15:53:09,369 : Eval | ep_lengths 201.40 +/- 97.21 | ep_return 113.395 +/- 62.455 -2023-10-19 15:53:09,371 : +2023-10-27 17:29:35,368 : Eval | ep_lengths 201.40 +/- 97.21 | ep_return 129.775 +/- 69.082 +2023-10-27 17:29:35,369 : -------------------------------------- | loss/ | | -| approx_kl | 0.0216 | -| entropy_loss | -4.32 | -| policy_loss | -0.0103 | -| value_loss | 0.984 | +| approx_kl | 0.0302 | +| entropy_loss | -4.78 | +| policy_loss | -0.00784 | +| value_loss | 0.7 | | stat/ | | -| constraint_violation | 778 | +| constraint_violation | 764 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 142 | -| ep_reward | 0.566 | +| ep_return | 143 | +| ep_reward | 0.572 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 201 | -| ep_return | 113 | -| ep_reward | 0.456 | -| mse | 188 | +| ep_return | 130 | +| ep_reward | 0.521 | +| mse | 156 | | time/ | | | progress | 0.29 | | step | 2.9e+05 | -| step_time | 10.5 | +| step_time | 8.34 | -------------------------------------- -2023-10-19 15:55:15,277 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 127.935 +/- 43.728 -2023-10-19 15:55:15,279 : +2023-10-27 17:31:16,257 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 122.903 +/- 43.591 +2023-10-27 17:31:16,258 : -------------------------------------- | loss/ | | -| approx_kl | 0.02 | -| entropy_loss | -4.3 | -| policy_loss | -0.00662 | -| value_loss | 0.652 | +| approx_kl | 0.031 | +| entropy_loss | -4.81 | +| policy_loss | -0.0218 | +| value_loss | 0.56 | | stat/ | | -| constraint_violation | 822 | +| constraint_violation | 810 | | ep_constraint_vio... | 2.3 | -| ep_length | 226 | -| ep_return | 124 | -| ep_reward | 0.497 | +| ep_length | 250 | +| ep_return | 140 | +| ep_reward | 0.56 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 128 | -| ep_reward | 0.512 | -| mse | 294 | +| ep_return | 123 | +| ep_reward | 0.492 | +| mse | 293 | | time/ | | | progress | 0.3 | | step | 3e+05 | -| step_time | 10.4 | +| step_time | 8.41 | -------------------------------------- -2023-10-19 15:57:22,522 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.156 +/- 25.141 -2023-10-19 15:57:22,523 : +2023-10-27 17:32:58,886 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 142.661 +/- 25.632 +2023-10-27 17:32:58,887 : -------------------------------------- | loss/ | | -| approx_kl | 0.0254 | -| entropy_loss | -4.34 | -| policy_loss | -0.00725 | -| value_loss | 0.519 | +| approx_kl | 0.0334 | +| entropy_loss | -4.88 | +| policy_loss | -0.011 | +| value_loss | 1.46 | | stat/ | | -| constraint_violation | 838 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 140 | -| ep_reward | 0.558 | +| constraint_violation | 825 | +| ep_constraint_vio... | 0.1 | +| ep_length | 226 | +| ep_return | 127 | +| ep_reward | 0.51 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 146 | -| ep_reward | 0.585 | -| mse | 308 | +| ep_return | 143 | +| ep_reward | 0.571 | +| mse | 301 | | time/ | | | progress | 0.31 | | step | 3.1e+05 | -| step_time | 10.3 | +| step_time | 8.26 | -------------------------------------- -2023-10-19 15:59:27,621 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 141.693 +/- 19.603 -2023-10-19 15:59:27,622 : +2023-10-27 17:34:41,630 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 145.571 +/- 23.076 +2023-10-27 17:34:41,632 : -------------------------------------- | loss/ | | -| approx_kl | 0.0191 | -| entropy_loss | -4.4 | -| policy_loss | -0.0119 | -| value_loss | 4.18 | +| approx_kl | 0.0104 | +| entropy_loss | -4.92 | +| policy_loss | -0.0145 | +| value_loss | 3.84 | | stat/ | | -| constraint_violation | 860 | -| ep_constraint_vio... | 1.1 | +| constraint_violation | 853 | +| ep_constraint_vio... | 1.2 | | ep_length | 227 | -| ep_return | 129 | -| ep_reward | 0.516 | +| ep_return | 135 | +| ep_reward | 0.539 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 142 | -| ep_reward | 0.567 | -| mse | 321 | +| ep_return | 146 | +| ep_reward | 0.582 | +| mse | 307 | | time/ | | | progress | 0.32 | | step | 3.2e+05 | -| step_time | 10.2 | +| step_time | 8.39 | -------------------------------------- -2023-10-19 16:01:28,622 : Eval | ep_lengths 202.00 +/- 96.08 | ep_return 124.415 +/- 65.648 -2023-10-19 16:01:28,624 : +2023-10-27 17:36:20,328 : Eval | ep_lengths 202.00 +/- 96.08 | ep_return 122.973 +/- 64.454 +2023-10-27 17:36:20,330 : -------------------------------------- | loss/ | | -| approx_kl | 0.0239 | -| entropy_loss | -4.42 | -| policy_loss | -0.0266 | -| value_loss | 1.11 | +| approx_kl | 0.0356 | +| entropy_loss | -4.88 | +| policy_loss | -0.0181 | +| value_loss | 0.664 | | stat/ | | -| constraint_violation | 912 | -| ep_constraint_vio... | 1.4 | +| constraint_violation | 902 | +| ep_constraint_vio... | 0.1 | | ep_length | 250 | -| ep_return | 138 | -| ep_reward | 0.553 | +| ep_return | 144 | +| ep_reward | 0.575 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | -| ep_return | 124 | -| ep_reward | 0.501 | -| mse | 202 | +| ep_return | 123 | +| ep_reward | 0.496 | +| mse | 200 | | time/ | | | progress | 0.33 | | step | 3.3e+05 | -| step_time | 10.2 | +| step_time | 8.25 | -------------------------------------- -2023-10-19 16:03:32,630 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 144.939 +/- 18.197 -2023-10-19 16:03:32,632 : +2023-10-27 17:38:03,521 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 151.095 +/- 28.533 +2023-10-27 17:38:03,522 : -------------------------------------- | loss/ | | -| approx_kl | 0.0215 | -| entropy_loss | -4.33 | -| policy_loss | -0.0202 | -| value_loss | 1.29 | +| approx_kl | 0.0191 | +| entropy_loss | -4.9 | +| policy_loss | -0.0107 | +| value_loss | 0.553 | | stat/ | | -| constraint_violation | 923 | +| constraint_violation | 912 | | ep_constraint_vio... | 0.5 | | ep_length | 250 | -| ep_return | 140 | -| ep_reward | 0.559 | +| ep_return | 147 | +| ep_reward | 0.588 | | stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 0.1 | | ep_length | 250 | -| ep_return | 145 | -| ep_reward | 0.58 | -| mse | 300 | +| ep_return | 151 | +| ep_reward | 0.604 | +| mse | 285 | | time/ | | | progress | 0.34 | | step | 3.4e+05 | -| step_time | 10 | +| step_time | 8.37 | -------------------------------------- -2023-10-19 16:05:37,924 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 143.524 +/- 30.035 -2023-10-19 16:05:37,926 : +2023-10-27 17:39:46,906 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 151.059 +/- 40.976 +2023-10-27 17:39:46,907 : -------------------------------------- | loss/ | | -| approx_kl | 0.0276 | -| entropy_loss | -4.43 | -| policy_loss | -0.0138 | -| value_loss | 3.31 | +| approx_kl | 0.0188 | +| entropy_loss | -4.96 | +| policy_loss | -0.00874 | +| value_loss | 0.649 | | stat/ | | -| constraint_violation | 928 | -| ep_constraint_vio... | 0.3 | +| constraint_violation | 915 | +| ep_constraint_vio... | 0.1 | | ep_length | 226 | -| ep_return | 121 | -| ep_reward | 0.486 | +| ep_return | 123 | +| ep_reward | 0.494 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 144 | -| ep_reward | 0.574 | -| mse | 346 | +| ep_return | 151 | +| ep_reward | 0.604 | +| mse | 328 | | time/ | | | progress | 0.35 | | step | 3.5e+05 | -| step_time | 10.1 | +| step_time | 8.33 | -------------------------------------- -2023-10-19 16:07:40,743 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 158.920 +/- 21.715 -2023-10-19 16:07:40,752 : +2023-10-27 17:41:30,028 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 157.928 +/- 28.262 +2023-10-27 17:41:30,029 : -------------------------------------- | loss/ | | -| approx_kl | 0.0193 | -| entropy_loss | -4.53 | -| policy_loss | -0.0134 | -| value_loss | 1.15 | +| approx_kl | 0.0252 | +| entropy_loss | -4.97 | +| policy_loss | -0.0141 | +| value_loss | 0.689 | | stat/ | | -| constraint_violation | 987 | -| ep_constraint_vio... | 1 | +| constraint_violation | 970 | +| ep_constraint_vio... | 0.1 | | ep_length | 226 | -| ep_return | 129 | -| ep_reward | 0.52 | +| ep_return | 132 | +| ep_reward | 0.531 | | stat_eval/ | | -| constraint_violation | 1.2 | +| constraint_violation | 1.3 | | ep_length | 250 | -| ep_return | 159 | -| ep_reward | 0.636 | -| mse | 237 | +| ep_return | 158 | +| ep_reward | 0.632 | +| mse | 235 | | time/ | | | progress | 0.36 | | step | 3.6e+05 | -| step_time | 9.96 | +| step_time | 8.39 | -------------------------------------- -2023-10-19 16:09:39,574 : Eval | ep_lengths 202.30 +/- 95.41 | ep_return 125.630 +/- 65.463 -2023-10-19 16:09:39,595 : +2023-10-27 17:43:09,714 : Eval | ep_lengths 202.20 +/- 95.60 | ep_return 132.859 +/- 71.342 +2023-10-27 17:43:09,715 : -------------------------------------- | loss/ | | -| approx_kl | 0.0185 | -| entropy_loss | -4.59 | -| policy_loss | -0.0166 | -| value_loss | 0.872 | +| approx_kl | 0.0264 | +| entropy_loss | -5 | +| policy_loss | -0.0101 | +| value_loss | 0.876 | | stat/ | | -| constraint_violation | 1.02e+03 | -| ep_constraint_vio... | 0.9 | +| constraint_violation | 1.01e+03 | +| ep_constraint_vio... | 0.2 | | ep_length | 225 | -| ep_return | 131 | -| ep_reward | 0.526 | +| ep_return | 125 | +| ep_reward | 0.501 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | -| ep_return | 126 | -| ep_reward | 0.507 | -| mse | 175 | +| ep_return | 133 | +| ep_reward | 0.536 | +| mse | 165 | | time/ | | | progress | 0.37 | | step | 3.7e+05 | -| step_time | 10.1 | +| step_time | 8.39 | -------------------------------------- -2023-10-19 16:11:41,228 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 138.938 +/- 51.339 -2023-10-19 16:11:41,248 : +2023-10-27 17:44:51,419 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 142.513 +/- 51.908 +2023-10-27 17:44:51,420 : -------------------------------------- | loss/ | | -| approx_kl | 0.0282 | -| entropy_loss | -4.68 | -| policy_loss | -0.00714 | -| value_loss | 0.553 | +| approx_kl | 0.022 | +| entropy_loss | -5 | +| policy_loss | -0.0128 | +| value_loss | 0.332 | | stat/ | | -| constraint_violation | 1.05e+03 | +| constraint_violation | 1.03e+03 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 143 | -| ep_reward | 0.573 | +| ep_return | 150 | +| ep_reward | 0.598 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 226 | -| ep_return | 139 | -| ep_reward | 0.556 | -| mse | 210 | +| ep_return | 143 | +| ep_reward | 0.571 | +| mse | 200 | | time/ | | | progress | 0.38 | | step | 3.8e+05 | -| step_time | 9.99 | +| step_time | 8.34 | -------------------------------------- -2023-10-19 16:13:44,414 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 143.825 +/- 24.129 -2023-10-19 16:13:44,416 : +2023-10-27 17:46:34,549 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 141.734 +/- 26.293 +2023-10-27 17:46:34,550 : -------------------------------------- | loss/ | | -| approx_kl | 0.0348 | -| entropy_loss | -4.65 | -| policy_loss | -0.00615 | -| value_loss | 0.299 | +| approx_kl | 0.0283 | +| entropy_loss | -5 | +| policy_loss | 0.00108 | +| value_loss | 0.924 | | stat/ | | -| constraint_violation | 1.08e+03 | -| ep_constraint_vio... | 1.5 | -| ep_length | 226 | -| ep_return | 132 | -| ep_reward | 0.532 | +| constraint_violation | 1.06e+03 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 155 | +| ep_reward | 0.62 | | stat_eval/ | | -| constraint_violation | 1.7 | +| constraint_violation | 1.6 | | ep_length | 250 | -| ep_return | 144 | -| ep_reward | 0.575 | -| mse | 317 | +| ep_return | 142 | +| ep_reward | 0.567 | +| mse | 319 | | time/ | | | progress | 0.39 | | step | 3.9e+05 | -| step_time | 9.97 | +| step_time | 8.42 | -------------------------------------- -2023-10-19 16:15:45,753 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 137.772 +/- 50.332 -2023-10-19 16:15:45,755 : +2023-10-27 17:48:15,954 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 134.989 +/- 50.118 +2023-10-27 17:48:15,955 : -------------------------------------- | loss/ | | -| approx_kl | 0.0266 | -| entropy_loss | -4.62 | -| policy_loss | -0.016 | -| value_loss | 0.775 | +| approx_kl | 0.0171 | +| entropy_loss | -5.03 | +| policy_loss | -0.00857 | +| value_loss | 1.34 | | stat/ | | -| constraint_violation | 1.11e+03 | -| ep_constraint_vio... | 0.3 | -| ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.613 | +| constraint_violation | 1.09e+03 | +| ep_constraint_vio... | 0.4 | +| ep_length | 226 | +| ep_return | 138 | +| ep_reward | 0.555 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 138 | -| ep_reward | 0.552 | +| ep_return | 135 | +| ep_reward | 0.54 | | mse | 232 | | time/ | | | progress | 0.4 | | step | 4e+05 | -| step_time | 10.1 | +| step_time | 8.43 | -------------------------------------- -2023-10-19 16:17:49,116 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.565 +/- 26.205 -2023-10-19 16:17:49,118 : +2023-10-27 17:49:59,348 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 159.874 +/- 28.572 +2023-10-27 17:49:59,360 : -------------------------------------- | loss/ | | -| approx_kl | 0.0156 | -| entropy_loss | -4.65 | -| policy_loss | -0.0228 | -| value_loss | 1.58 | +| approx_kl | 0.0403 | +| entropy_loss | -5.17 | +| policy_loss | -0.0162 | +| value_loss | 2.73 | | stat/ | | -| constraint_violation | 1.14e+03 | -| ep_constraint_vio... | 2.1 | -| ep_length | 250 | -| ep_return | 150 | -| ep_reward | 0.602 | +| constraint_violation | 1.12e+03 | +| ep_constraint_vio... | 0.2 | +| ep_length | 203 | +| ep_return | 126 | +| ep_reward | 0.507 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 155 | -| ep_reward | 0.618 | -| mse | 248 | +| ep_return | 160 | +| ep_reward | 0.639 | +| mse | 234 | | time/ | | | progress | 0.41 | | step | 4.1e+05 | -| step_time | 10.2 | +| step_time | 8.38 | -------------------------------------- -2023-10-19 16:19:48,474 : Eval | ep_lengths 201.40 +/- 97.20 | ep_return 131.354 +/- 69.343 -2023-10-19 16:19:48,476 : +2023-10-27 17:51:39,170 : Eval | ep_lengths 201.40 +/- 97.20 | ep_return 136.264 +/- 70.148 +2023-10-27 17:51:39,172 : -------------------------------------- | loss/ | | -| approx_kl | 0.024 | -| entropy_loss | -4.73 | -| policy_loss | -0.00321 | -| value_loss | 0.767 | +| approx_kl | 0.0317 | +| entropy_loss | -5.18 | +| policy_loss | -0.0101 | +| value_loss | 1.87 | | stat/ | | -| constraint_violation | 1.15e+03 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 142 | -| ep_reward | 0.568 | +| constraint_violation | 1.12e+03 | +| ep_constraint_vio... | 0.1 | +| ep_length | 226 | +| ep_return | 135 | +| ep_reward | 0.541 | | stat_eval/ | | | constraint_violation | 1.3 | | ep_length | 201 | -| ep_return | 131 | -| ep_reward | 0.526 | -| mse | 149 | +| ep_return | 136 | +| ep_reward | 0.546 | +| mse | 137 | | time/ | | | progress | 0.42 | | step | 4.2e+05 | -| step_time | 10.1 | +| step_time | 8.39 | -------------------------------------- -2023-10-19 16:21:46,559 : Eval | ep_lengths 201.90 +/- 96.26 | ep_return 127.053 +/- 66.099 -2023-10-19 16:21:46,561 : +2023-10-27 17:53:19,003 : Eval | ep_lengths 201.80 +/- 96.45 | ep_return 129.967 +/- 68.177 +2023-10-27 17:53:19,004 : -------------------------------------- | loss/ | | -| approx_kl | 0.0237 | -| entropy_loss | -4.81 | -| policy_loss | -0.00798 | -| value_loss | 0.787 | +| approx_kl | 0.0143 | +| entropy_loss | -5.24 | +| policy_loss | -0.00529 | +| value_loss | 1.52 | | stat/ | | -| constraint_violation | 1.16e+03 | +| constraint_violation | 1.14e+03 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 163 | -| ep_reward | 0.651 | +| ep_return | 156 | +| ep_reward | 0.626 | | stat_eval/ | | | constraint_violation | 2.1 | | ep_length | 202 | -| ep_return | 127 | -| ep_reward | 0.514 | -| mse | 187 | +| ep_return | 130 | +| ep_reward | 0.525 | +| mse | 178 | | time/ | | | progress | 0.43 | | step | 4.3e+05 | -| step_time | 9.87 | +| step_time | 8.4 | -------------------------------------- -2023-10-19 16:23:46,235 : Eval | ep_lengths 226.50 +/- 70.50 | ep_return 136.319 +/- 51.518 -2023-10-19 16:23:46,262 : +2023-10-27 17:55:01,537 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 136.878 +/- 52.372 +2023-10-27 17:55:01,538 : -------------------------------------- | loss/ | | -| approx_kl | 0.0197 | -| entropy_loss | -4.74 | -| policy_loss | -0.0141 | -| value_loss | 0.348 | +| approx_kl | 0.0269 | +| entropy_loss | -5.26 | +| policy_loss | -0.00229 | +| value_loss | 0.724 | | stat/ | | -| constraint_violation | 1.17e+03 | +| constraint_violation | 1.14e+03 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 150 | -| ep_reward | 0.598 | +| ep_return | 147 | +| ep_reward | 0.589 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 226 | -| ep_return | 136 | -| ep_reward | 0.545 | -| mse | 281 | +| ep_length | 227 | +| ep_return | 137 | +| ep_reward | 0.548 | +| mse | 277 | | time/ | | | progress | 0.44 | | step | 4.4e+05 | -| step_time | 9.9 | +| step_time | 8.45 | -------------------------------------- -2023-10-19 16:25:49,351 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 160.362 +/- 28.985 -2023-10-19 16:25:49,362 : +2023-10-27 17:56:45,761 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 164.948 +/- 23.394 +2023-10-27 17:56:45,771 : -------------------------------------- | loss/ | | -| approx_kl | 0.0171 | -| entropy_loss | -4.71 | -| policy_loss | -0.0127 | -| value_loss | 0.815 | +| approx_kl | 0.0268 | +| entropy_loss | -5.26 | +| policy_loss | -0.0145 | +| value_loss | 0.732 | | stat/ | | -| constraint_violation | 1.2e+03 | -| ep_constraint_vio... | 2.7 | -| ep_length | 204 | -| ep_return | 130 | -| ep_reward | 0.525 | +| constraint_violation | 1.17e+03 | +| ep_constraint_vio... | 0.2 | +| ep_length | 227 | +| ep_return | 140 | +| ep_reward | 0.566 | | stat_eval/ | | -| constraint_violation | 0.1 | +| constraint_violation | 0 | | ep_length | 250 | -| ep_return | 160 | -| ep_reward | 0.641 | -| mse | 208 | +| ep_return | 165 | +| ep_reward | 0.66 | +| mse | 200 | | time/ | | | progress | 0.45 | | step | 4.5e+05 | -| step_time | 10 | +| step_time | 8.47 | -------------------------------------- -2023-10-19 16:27:52,923 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 177.985 +/- 17.580 -2023-10-19 16:27:52,931 : +2023-10-27 17:58:30,017 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 177.768 +/- 15.695 +2023-10-27 17:58:30,025 : -------------------------------------- | loss/ | | -| approx_kl | 0.0224 | -| entropy_loss | -4.69 | -| policy_loss | -0.0242 | -| value_loss | 2.04 | +| approx_kl | 0.0355 | +| entropy_loss | -5.31 | +| policy_loss | -0.0112 | +| value_loss | 2.01 | | stat/ | | -| constraint_violation | 1.23e+03 | -| ep_constraint_vio... | 0.8 | -| ep_length | 225 | -| ep_return | 137 | +| constraint_violation | 1.21e+03 | +| ep_constraint_vio... | 0.1 | +| ep_length | 227 | +| ep_return | 139 | | ep_reward | 0.555 | | stat_eval/ | | -| constraint_violation | 0.1 | +| constraint_violation | 0 | | ep_length | 250 | | ep_return | 178 | -| ep_reward | 0.712 | -| mse | 126 | +| ep_reward | 0.711 | +| mse | 123 | | time/ | | | progress | 0.46 | | step | 4.6e+05 | -| step_time | 10.2 | +| step_time | 8.37 | -------------------------------------- -2023-10-19 16:29:54,121 : Eval | ep_lengths 226.30 +/- 71.10 | ep_return 134.633 +/- 47.177 -2023-10-19 16:29:54,123 : +2023-10-27 18:00:11,908 : Eval | ep_lengths 226.20 +/- 71.40 | ep_return 127.895 +/- 44.141 +2023-10-27 18:00:11,909 : -------------------------------------- | loss/ | | -| approx_kl | 0.03 | -| entropy_loss | -4.69 | -| policy_loss | -0.0175 | -| value_loss | 1.34 | +| approx_kl | 0.027 | +| entropy_loss | -5.39 | +| policy_loss | -0.016 | +| value_loss | 0.645 | | stat/ | | -| constraint_violation | 1.27e+03 | +| constraint_violation | 1.25e+03 | | ep_constraint_vio... | 1.4 | | ep_length | 200 | -| ep_return | 127 | -| ep_reward | 0.508 | +| ep_return | 129 | +| ep_reward | 0.518 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 226 | -| ep_return | 135 | -| ep_reward | 0.541 | -| mse | 239 | +| ep_return | 128 | +| ep_reward | 0.514 | +| mse | 248 | | time/ | | | progress | 0.47 | | step | 4.7e+05 | -| step_time | 9.99 | +| step_time | 8.4 | -------------------------------------- -2023-10-19 16:31:52,657 : Eval | ep_lengths 201.20 +/- 97.60 | ep_return 134.446 +/- 71.019 -2023-10-19 16:31:52,658 : +2023-10-27 18:01:51,289 : Eval | ep_lengths 201.20 +/- 97.60 | ep_return 128.681 +/- 68.615 +2023-10-27 18:01:51,290 : -------------------------------------- | loss/ | | -| approx_kl | 0.0159 | -| entropy_loss | -4.65 | -| policy_loss | -0.00764 | -| value_loss | 0.575 | +| approx_kl | 0.0272 | +| entropy_loss | -5.46 | +| policy_loss | -0.0076 | +| value_loss | 0.564 | | stat/ | | -| constraint_violation | 1.3e+03 | +| constraint_violation | 1.28e+03 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 159 | -| ep_reward | 0.635 | +| ep_return | 155 | +| ep_reward | 0.62 | | stat_eval/ | | | constraint_violation | 1.6 | | ep_length | 201 | -| ep_return | 134 | -| ep_reward | 0.54 | -| mse | 173 | +| ep_return | 129 | +| ep_reward | 0.517 | +| mse | 179 | | time/ | | | progress | 0.48 | | step | 4.8e+05 | -| step_time | 9.99 | +| step_time | 8.33 | -------------------------------------- -2023-10-19 16:33:54,786 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.209 +/- 36.340 -2023-10-19 16:33:54,787 : +2023-10-27 18:03:34,334 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 140.540 +/- 27.911 +2023-10-27 18:03:34,335 : -------------------------------------- | loss/ | | -| approx_kl | 0.0209 | -| entropy_loss | -4.81 | -| policy_loss | -0.00469 | -| value_loss | 0.727 | +| approx_kl | 0.0292 | +| entropy_loss | -5.42 | +| policy_loss | -0.0132 | +| value_loss | 0.819 | | stat/ | | -| constraint_violation | 1.33e+03 | +| constraint_violation | 1.3e+03 | | ep_constraint_vio... | 0.1 | | ep_length | 227 | -| ep_return | 156 | -| ep_reward | 0.623 | +| ep_return | 153 | +| ep_reward | 0.61 | | stat_eval/ | | -| constraint_violation | 0.3 | +| constraint_violation | 0.2 | | ep_length | 250 | -| ep_return | 146 | -| ep_reward | 0.585 | -| mse | 378 | +| ep_return | 141 | +| ep_reward | 0.562 | +| mse | 374 | | time/ | | | progress | 0.49 | | step | 4.9e+05 | -| step_time | 10 | +| step_time | 8.27 | -------------------------------------- -2023-10-19 16:35:49,840 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 148.642 +/- 19.261 -2023-10-19 16:35:49,842 : +2023-10-27 18:05:17,800 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 147.378 +/- 15.233 +2023-10-27 18:05:17,801 : -------------------------------------- | loss/ | | -| approx_kl | 0.019 | -| entropy_loss | -4.86 | -| policy_loss | -0.0215 | -| value_loss | 0.718 | +| approx_kl | 0.0206 | +| entropy_loss | -5.45 | +| policy_loss | -0.017 | +| value_loss | 0.719 | | stat/ | | -| constraint_violation | 1.34e+03 | +| constraint_violation | 1.31e+03 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 159 | -| ep_reward | 0.636 | +| ep_return | 162 | +| ep_reward | 0.65 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 149 | -| ep_reward | 0.595 | -| mse | 266 | +| ep_return | 147 | +| ep_reward | 0.59 | +| mse | 257 | | time/ | | | progress | 0.5 | | step | 5e+05 | -| step_time | 9.66 | +| step_time | 8.41 | -------------------------------------- -2023-10-19 16:37:42,905 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 160.781 +/- 58.358 -2023-10-19 16:37:42,907 : +2023-10-27 18:06:59,771 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 156.137 +/- 55.679 +2023-10-27 18:06:59,772 : -------------------------------------- | loss/ | | -| approx_kl | 0.0258 | -| entropy_loss | -4.87 | -| policy_loss | -0.0223 | -| value_loss | 0.429 | +| approx_kl | 0.0388 | +| entropy_loss | -5.47 | +| policy_loss | -0.00382 | +| value_loss | 2.68 | | stat/ | | -| constraint_violation | 1.36e+03 | -| ep_constraint_vio... | 1.6 | -| ep_length | 250 | -| ep_return | 150 | -| ep_reward | 0.598 | +| constraint_violation | 1.35e+03 | +| ep_constraint_vio... | 3.3 | +| ep_length | 225 | +| ep_return | 138 | +| ep_reward | 0.554 | | stat_eval/ | | | constraint_violation | 0.6 | | ep_length | 225 | -| ep_return | 161 | -| ep_reward | 0.647 | -| mse | 134 | +| ep_return | 156 | +| ep_reward | 0.629 | +| mse | 137 | | time/ | | | progress | 0.51 | | step | 5.1e+05 | -| step_time | 9.35 | +| step_time | 8.48 | -------------------------------------- -2023-10-19 16:39:36,185 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 151.658 +/- 56.869 -2023-10-19 16:39:36,186 : +2023-10-27 18:08:41,521 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 140.940 +/- 53.172 +2023-10-27 18:08:41,523 : -------------------------------------- | loss/ | | -| approx_kl | 0.0239 | -| entropy_loss | -4.87 | -| policy_loss | -0.0164 | -| value_loss | 1.05 | +| approx_kl | 0.0295 | +| entropy_loss | -5.55 | +| policy_loss | 0.0023 | +| value_loss | 0.621 | | stat/ | | -| constraint_violation | 1.4e+03 | -| ep_constraint_vio... | 0.7 | -| ep_length | 202 | -| ep_return | 137 | -| ep_reward | 0.548 | +| constraint_violation | 1.36e+03 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 160 | +| ep_reward | 0.642 | | stat_eval/ | | -| constraint_violation | 0.5 | +| constraint_violation | 0.4 | | ep_length | 226 | -| ep_return | 152 | -| ep_reward | 0.608 | -| mse | 208 | +| ep_return | 141 | +| ep_reward | 0.565 | +| mse | 222 | | time/ | | | progress | 0.52 | | step | 5.2e+05 | -| step_time | 9.3 | +| step_time | 8.4 | -------------------------------------- -2023-10-19 16:41:28,922 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 159.205 +/- 25.088 -2023-10-19 16:41:28,923 : +2023-10-27 18:10:24,529 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 151.043 +/- 24.760 +2023-10-27 18:10:24,530 : -------------------------------------- | loss/ | | -| approx_kl | 0.0248 | -| entropy_loss | -4.86 | -| policy_loss | -0.0148 | -| value_loss | 0.524 | +| approx_kl | 0.0206 | +| entropy_loss | -5.62 | +| policy_loss | -0.0145 | +| value_loss | 1.13 | | stat/ | | -| constraint_violation | 1.42e+03 | +| constraint_violation | 1.38e+03 | | ep_constraint_vio... | 0.9 | | ep_length | 250 | -| ep_return | 167 | -| ep_reward | 0.667 | +| ep_return | 140 | +| ep_reward | 0.558 | | stat_eval/ | | -| constraint_violation | 0.2 | +| constraint_violation | 0 | | ep_length | 250 | -| ep_return | 159 | -| ep_reward | 0.637 | -| mse | 282 | +| ep_return | 151 | +| ep_reward | 0.604 | +| mse | 290 | | time/ | | | progress | 0.53 | | step | 5.3e+05 | -| step_time | 9.06 | +| step_time | 8.4 | -------------------------------------- -2023-10-19 16:43:18,735 : Eval | ep_lengths 226.30 +/- 71.10 | ep_return 144.489 +/- 54.827 -2023-10-19 16:43:18,736 : +2023-10-27 18:12:05,459 : Eval | ep_lengths 226.30 +/- 71.10 | ep_return 138.705 +/- 55.944 +2023-10-27 18:12:05,460 : -------------------------------------- | loss/ | | -| approx_kl | 0.0312 | -| entropy_loss | -4.89 | -| policy_loss | -0.0102 | -| value_loss | 0.584 | +| approx_kl | 0.0253 | +| entropy_loss | -5.66 | +| policy_loss | -0.0128 | +| value_loss | 0.693 | | stat/ | | -| constraint_violation | 1.46e+03 | -| ep_constraint_vio... | 0.5 | +| constraint_violation | 1.43e+03 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 167 | -| ep_reward | 0.669 | +| ep_return | 160 | +| ep_reward | 0.638 | | stat_eval/ | | | constraint_violation | 2.1 | | ep_length | 226 | -| ep_return | 144 | -| ep_reward | 0.578 | -| mse | 248 | +| ep_return | 139 | +| ep_reward | 0.555 | +| mse | 255 | | time/ | | | progress | 0.54 | | step | 5.4e+05 | -| step_time | 9 | +| step_time | 8.26 | -------------------------------------- -2023-10-19 16:45:08,953 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 142.033 +/- 52.303 -2023-10-19 16:45:08,955 : +2023-10-27 18:13:46,852 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 134.944 +/- 50.904 +2023-10-27 18:13:46,853 : -------------------------------------- | loss/ | | -| approx_kl | 0.0225 | -| entropy_loss | -4.93 | -| policy_loss | -0.0149 | -| value_loss | 0.404 | +| approx_kl | 0.0193 | +| entropy_loss | -5.59 | +| policy_loss | -0.0179 | +| value_loss | 0.843 | | stat/ | | -| constraint_violation | 1.5e+03 | -| ep_constraint_vio... | 1 | +| constraint_violation | 1.46e+03 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 166 | -| ep_reward | 0.666 | +| ep_return | 170 | +| ep_reward | 0.68 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 142 | -| ep_reward | 0.569 | -| mse | 264 | +| ep_return | 135 | +| ep_reward | 0.54 | +| mse | 274 | | time/ | | | progress | 0.55 | | step | 5.5e+05 | -| step_time | 9.16 | +| step_time | 8.46 | -------------------------------------- -2023-10-19 16:46:57,448 : Eval | ep_lengths 201.60 +/- 96.80 | ep_return 155.879 +/- 81.901 -2023-10-19 16:46:57,449 : +2023-10-27 18:15:26,649 : Eval | ep_lengths 201.60 +/- 96.80 | ep_return 150.428 +/- 78.583 +2023-10-27 18:15:26,650 : -------------------------------------- | loss/ | | -| approx_kl | 0.0266 | -| entropy_loss | -4.93 | -| policy_loss | -0.0162 | -| value_loss | 0.253 | +| approx_kl | 0.0252 | +| entropy_loss | -5.62 | +| policy_loss | -0.00432 | +| value_loss | 0.723 | | stat/ | | -| constraint_violation | 1.51e+03 | -| ep_constraint_vio... | 0.9 | -| ep_length | 225 | -| ep_return | 142 | -| ep_reward | 0.568 | +| constraint_violation | 1.47e+03 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 155 | +| ep_reward | 0.619 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | -| ep_return | 156 | -| ep_reward | 0.624 | -| mse | 104 | +| ep_return | 150 | +| ep_reward | 0.603 | +| mse | 109 | | time/ | | | progress | 0.56 | | step | 5.6e+05 | -| step_time | 9.16 | +| step_time | 8.39 | -------------------------------------- -2023-10-19 16:48:50,563 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 159.376 +/- 18.854 -2023-10-19 16:48:50,565 : +2023-10-27 18:17:10,429 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 145.986 +/- 21.548 +2023-10-27 18:17:10,430 : -------------------------------------- | loss/ | | -| approx_kl | 0.0247 | -| entropy_loss | -5 | -| policy_loss | -0.00172 | -| value_loss | 0.252 | +| approx_kl | 0.0292 | +| entropy_loss | -5.64 | +| policy_loss | -0.00528 | +| value_loss | 0.459 | | stat/ | | -| constraint_violation | 1.52e+03 | -| ep_constraint_vio... | 0 | +| constraint_violation | 1.5e+03 | +| ep_constraint_vio... | 1.4 | | ep_length | 250 | -| ep_return | 171 | -| ep_reward | 0.685 | +| ep_return | 170 | +| ep_reward | 0.681 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 159 | -| ep_reward | 0.638 | -| mse | 273 | +| ep_return | 146 | +| ep_reward | 0.584 | +| mse | 290 | | time/ | | | progress | 0.57 | | step | 5.7e+05 | -| step_time | 9.11 | +| step_time | 8.38 | -------------------------------------- -2023-10-19 16:50:40,220 : Eval | ep_lengths 202.60 +/- 94.82 | ep_return 119.866 +/- 63.359 -2023-10-19 16:50:40,222 : +2023-10-27 18:18:51,139 : Eval | ep_lengths 202.50 +/- 95.01 | ep_return 114.663 +/- 62.861 +2023-10-27 18:18:51,140 : -------------------------------------- | loss/ | | -| approx_kl | 0.0176 | -| entropy_loss | -4.99 | -| policy_loss | -0.0158 | -| value_loss | 0.484 | +| approx_kl | 0.0252 | +| entropy_loss | -5.65 | +| policy_loss | -0.0197 | +| value_loss | 0.792 | | stat/ | | -| constraint_violation | 1.56e+03 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 172 | -| ep_reward | 0.687 | +| constraint_violation | 1.52e+03 | +| ep_constraint_vio... | 0.1 | +| ep_length | 226 | +| ep_return | 133 | +| ep_reward | 0.534 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 203 | -| ep_return | 120 | -| ep_reward | 0.48 | -| mse | 294 | +| ep_length | 202 | +| ep_return | 115 | +| ep_reward | 0.459 | +| mse | 299 | | time/ | | | progress | 0.58 | | step | 5.8e+05 | -| step_time | 9.25 | +| step_time | 8.51 | -------------------------------------- -2023-10-19 16:52:31,404 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 150.685 +/- 57.662 -2023-10-19 16:52:31,405 : +2023-10-27 18:20:33,767 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 145.231 +/- 54.848 +2023-10-27 18:20:33,768 : -------------------------------------- | loss/ | | -| approx_kl | 0.0271 | -| entropy_loss | -5 | -| policy_loss | -0.0147 | -| value_loss | 0.607 | +| approx_kl | 0.0183 | +| entropy_loss | -5.7 | +| policy_loss | -0.0172 | +| value_loss | 0.358 | | stat/ | | -| constraint_violation | 1.56e+03 | +| constraint_violation | 1.52e+03 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 172 | -| ep_reward | 0.688 | +| ep_return | 162 | +| ep_reward | 0.646 | | stat_eval/ | | | constraint_violation | 0.6 | | ep_length | 225 | -| ep_return | 151 | -| ep_reward | 0.603 | -| mse | 251 | +| ep_return | 145 | +| ep_reward | 0.581 | +| mse | 253 | | time/ | | | progress | 0.59 | | step | 5.9e+05 | -| step_time | 9.22 | +| step_time | 8.45 | -------------------------------------- -2023-10-19 16:54:24,328 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 159.375 +/- 29.476 -2023-10-19 16:54:24,330 : +2023-10-27 18:22:18,647 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.112 +/- 27.320 +2023-10-27 18:22:18,648 : -------------------------------------- | loss/ | | -| approx_kl | 0.0339 | -| entropy_loss | -5 | -| policy_loss | 0.00952 | -| value_loss | 10.2 | +| approx_kl | 0.026 | +| entropy_loss | -5.72 | +| policy_loss | -0.0148 | +| value_loss | 1.62 | | stat/ | | -| constraint_violation | 1.6e+03 | -| ep_constraint_vio... | 2.1 | -| ep_length | 227 | -| ep_return | 147 | -| ep_reward | 0.587 | +| constraint_violation | 1.56e+03 | +| ep_constraint_vio... | 0.6 | +| ep_length | 153 | +| ep_return | 103 | +| ep_reward | 0.418 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 159 | -| ep_reward | 0.637 | -| mse | 275 | +| ep_return | 155 | +| ep_reward | 0.62 | +| mse | 279 | | time/ | | | progress | 0.6 | | step | 6e+05 | -| step_time | 9.14 | +| step_time | 8.52 | -------------------------------------- -2023-10-19 16:56:18,033 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 172.035 +/- 31.346 -2023-10-19 16:56:18,034 : +2023-10-27 18:24:03,834 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 162.265 +/- 27.935 +2023-10-27 18:24:03,835 : -------------------------------------- | loss/ | | -| approx_kl | 0.0285 | -| entropy_loss | -5.07 | -| policy_loss | -0.0105 | -| value_loss | 0.741 | +| approx_kl | 0.0279 | +| entropy_loss | -5.78 | +| policy_loss | -0.00377 | +| value_loss | 1.57 | | stat/ | | -| constraint_violation | 1.63e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 130 | -| ep_reward | 0.521 | +| constraint_violation | 1.59e+03 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 153 | +| ep_reward | 0.612 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 172 | -| ep_reward | 0.688 | -| mse | 235 | +| ep_return | 162 | +| ep_reward | 0.649 | +| mse | 245 | | time/ | | | progress | 0.61 | | step | 6.1e+05 | -| step_time | 9.22 | +| step_time | 8.51 | -------------------------------------- -2023-10-19 16:58:07,892 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 149.863 +/- 55.865 -2023-10-19 16:58:07,893 : +2023-10-27 18:25:47,435 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 142.615 +/- 52.070 +2023-10-27 18:25:47,436 : -------------------------------------- | loss/ | | -| approx_kl | 0.0253 | -| entropy_loss | -5.15 | -| policy_loss | -0.00412 | -| value_loss | 0.2 | +| approx_kl | 0.0266 | +| entropy_loss | -5.79 | +| policy_loss | -0.00598 | +| value_loss | 1.39 | | stat/ | | -| constraint_violation | 1.63e+03 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 160 | -| ep_reward | 0.64 | +| constraint_violation | 1.59e+03 | +| ep_constraint_vio... | 0.4 | +| ep_length | 225 | +| ep_return | 143 | +| ep_reward | 0.577 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 150 | -| ep_reward | 0.601 | -| mse | 208 | +| ep_return | 143 | +| ep_reward | 0.572 | +| mse | 216 | | time/ | | | progress | 0.62 | | step | 6.2e+05 | -| step_time | 8.97 | +| step_time | 8.63 | -------------------------------------- -2023-10-19 16:59:59,070 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 162.741 +/- 20.940 -2023-10-19 16:59:59,071 : +2023-10-27 18:27:33,435 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 160.738 +/- 20.016 +2023-10-27 18:27:33,436 : -------------------------------------- | loss/ | | -| approx_kl | 0.0278 | -| entropy_loss | -5.19 | -| policy_loss | -0.0109 | -| value_loss | 2.56 | +| approx_kl | 0.027 | +| entropy_loss | -5.79 | +| policy_loss | -0.00809 | +| value_loss | 1.67 | | stat/ | | -| constraint_violation | 1.65e+03 | +| constraint_violation | 1.61e+03 | | ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 123 | -| ep_reward | 0.498 | +| ep_length | 203 | +| ep_return | 136 | +| ep_reward | 0.551 | | stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 0.7 | | ep_length | 250 | -| ep_return | 163 | -| ep_reward | 0.651 | -| mse | 231 | +| ep_return | 161 | +| ep_reward | 0.643 | +| mse | 225 | | time/ | | | progress | 0.63 | | step | 6.3e+05 | -| step_time | 8.91 | +| step_time | 8.54 | -------------------------------------- -2023-10-19 17:01:50,244 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 160.191 +/- 25.908 -2023-10-19 17:01:50,245 : +2023-10-27 18:29:19,199 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.313 +/- 23.418 +2023-10-27 18:29:19,200 : -------------------------------------- | loss/ | | -| approx_kl | 0.0258 | -| entropy_loss | -5.19 | -| policy_loss | -0.0114 | -| value_loss | 0.339 | +| approx_kl | 0.0232 | +| entropy_loss | -5.82 | +| policy_loss | -0.0157 | +| value_loss | 0.62 | | stat/ | | -| constraint_violation | 1.66e+03 | +| constraint_violation | 1.62e+03 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 169 | -| ep_reward | 0.677 | +| ep_return | 151 | +| ep_reward | 0.605 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 160 | -| ep_reward | 0.641 | -| mse | 235 | +| ep_return | 155 | +| ep_reward | 0.621 | +| mse | 242 | | time/ | | | progress | 0.64 | | step | 6.4e+05 | -| step_time | 9.19 | +| step_time | 8.54 | -------------------------------------- -2023-10-19 17:03:41,771 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 160.565 +/- 36.733 -2023-10-19 17:03:41,773 : +2023-10-27 18:31:04,652 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 157.187 +/- 27.565 +2023-10-27 18:31:04,654 : -------------------------------------- | loss/ | | -| approx_kl | 0.0293 | -| entropy_loss | -5.26 | -| policy_loss | -0.0116 | -| value_loss | 2.15 | +| approx_kl | 0.0266 | +| entropy_loss | -5.86 | +| policy_loss | -0.0232 | +| value_loss | 0.524 | | stat/ | | -| constraint_violation | 1.69e+03 | -| ep_constraint_vio... | 1 | -| ep_length | 153 | -| ep_return | 95.4 | -| ep_reward | 0.387 | +| constraint_violation | 1.66e+03 | +| ep_constraint_vio... | 0.3 | +| ep_length | 225 | +| ep_return | 148 | +| ep_reward | 0.592 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 161 | -| ep_reward | 0.642 | -| mse | 305 | +| ep_return | 157 | +| ep_reward | 0.629 | +| mse | 300 | | time/ | | | progress | 0.65 | | step | 6.5e+05 | -| step_time | 9.15 | +| step_time | 8.5 | -------------------------------------- -2023-10-19 17:05:33,528 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 157.978 +/- 22.977 -2023-10-19 17:05:33,529 : +2023-10-27 18:32:49,895 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 158.869 +/- 16.825 +2023-10-27 18:32:49,896 : -------------------------------------- | loss/ | | -| approx_kl | 0.0304 | -| entropy_loss | -5.31 | -| policy_loss | -0.00764 | -| value_loss | 0.302 | +| approx_kl | 0.027 | +| entropy_loss | -5.91 | +| policy_loss | -0.0109 | +| value_loss | 0.737 | | stat/ | | -| constraint_violation | 1.7e+03 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 163 | -| ep_reward | 0.654 | +| constraint_violation | 1.66e+03 | +| ep_constraint_vio... | 0.2 | +| ep_length | 226 | +| ep_return | 146 | +| ep_reward | 0.584 | | stat_eval/ | | -| constraint_violation | 2.3 | +| constraint_violation | 2.2 | | ep_length | 250 | -| ep_return | 158 | -| ep_reward | 0.632 | -| mse | 260 | +| ep_return | 159 | +| ep_reward | 0.635 | +| mse | 255 | | time/ | | | progress | 0.66 | | step | 6.6e+05 | -| step_time | 9.1 | +| step_time | 8.47 | -------------------------------------- -2023-10-19 17:07:25,297 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 169.456 +/- 26.648 -2023-10-19 17:07:25,298 : +2023-10-27 18:34:35,611 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 164.892 +/- 25.237 +2023-10-27 18:34:35,612 : -------------------------------------- | loss/ | | -| approx_kl | 0.0337 | -| entropy_loss | -5.38 | -| policy_loss | -0.00526 | -| value_loss | 3.89 | +| approx_kl | 0.0389 | +| entropy_loss | -5.98 | +| policy_loss | -0.0106 | +| value_loss | 0.4 | | stat/ | | -| constraint_violation | 1.73e+03 | -| ep_constraint_vio... | 2 | -| ep_length | 228 | -| ep_return | 157 | -| ep_reward | 0.632 | +| constraint_violation | 1.69e+03 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 160 | +| ep_reward | 0.64 | | stat_eval/ | | -| constraint_violation | 0.7 | +| constraint_violation | 0.8 | | ep_length | 250 | -| ep_return | 169 | -| ep_reward | 0.678 | -| mse | 233 | +| ep_return | 165 | +| ep_reward | 0.66 | +| mse | 237 | | time/ | | | progress | 0.67 | | step | 6.7e+05 | -| step_time | 9.07 | +| step_time | 8.48 | -------------------------------------- -2023-10-19 17:09:17,468 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.632 +/- 35.330 -2023-10-19 17:09:17,469 : +2023-10-27 18:36:21,096 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 156.888 +/- 28.506 +2023-10-27 18:36:21,097 : -------------------------------------- | loss/ | | -| approx_kl | 0.0309 | -| entropy_loss | -5.41 | -| policy_loss | -0.0139 | -| value_loss | 0.238 | +| approx_kl | 0.0343 | +| entropy_loss | -5.99 | +| policy_loss | 0.00152 | +| value_loss | 0.331 | | stat/ | | -| constraint_violation | 1.75e+03 | +| constraint_violation | 1.71e+03 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 137 | -| ep_reward | 0.548 | +| ep_return | 156 | +| ep_reward | 0.624 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 156 | -| ep_reward | 0.623 | -| mse | 301 | +| ep_return | 157 | +| ep_reward | 0.628 | +| mse | 291 | | time/ | | | progress | 0.68 | | step | 6.8e+05 | -| step_time | 9.03 | +| step_time | 8.48 | -------------------------------------- -2023-10-19 17:11:08,743 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 174.734 +/- 29.020 -2023-10-19 17:11:08,744 : +2023-10-27 18:38:06,196 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 179.545 +/- 30.291 +2023-10-27 18:38:06,205 : -------------------------------------- | loss/ | | -| approx_kl | 0.0158 | -| entropy_loss | -5.43 | -| policy_loss | -0.0138 | -| value_loss | 0.81 | +| approx_kl | 0.0281 | +| entropy_loss | -5.99 | +| policy_loss | -0.0151 | +| value_loss | 0.565 | | stat/ | | -| constraint_violation | 1.78e+03 | -| ep_constraint_vio... | 0.2 | +| constraint_violation | 1.76e+03 | +| ep_constraint_vio... | 0.8 | | ep_length | 226 | -| ep_return | 136 | -| ep_reward | 0.544 | +| ep_return | 155 | +| ep_reward | 0.621 | | stat_eval/ | | -| constraint_violation | 0.8 | +| constraint_violation | 0.9 | | ep_length | 250 | -| ep_return | 175 | -| ep_reward | 0.699 | -| mse | 159 | +| ep_return | 180 | +| ep_reward | 0.718 | +| mse | 152 | | time/ | | | progress | 0.69 | | step | 6.9e+05 | -| step_time | 9.15 | +| step_time | 8.44 | -------------------------------------- -2023-10-19 17:13:00,033 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 161.084 +/- 19.284 -2023-10-19 17:13:00,034 : +2023-10-27 18:39:50,993 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.959 +/- 20.556 +2023-10-27 18:39:50,994 : -------------------------------------- | loss/ | | -| approx_kl | 0.0177 | -| entropy_loss | -5.43 | -| policy_loss | -0.017 | -| value_loss | 0.81 | +| approx_kl | 0.031 | +| entropy_loss | -6.1 | +| policy_loss | -0.0148 | +| value_loss | 0.543 | | stat/ | | -| constraint_violation | 1.81e+03 | +| constraint_violation | 1.77e+03 | | ep_constraint_vio... | 0.1 | | ep_length | 226 | -| ep_return | 160 | -| ep_reward | 0.642 | +| ep_return | 153 | +| ep_reward | 0.614 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 161 | -| ep_reward | 0.644 | -| mse | 226 | +| ep_return | 156 | +| ep_reward | 0.624 | +| mse | 234 | | time/ | | | progress | 0.7 | | step | 7e+05 | -| step_time | 9.09 | +| step_time | 8.52 | -------------------------------------- -2023-10-19 17:14:51,935 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 171.325 +/- 37.520 -2023-10-19 17:14:51,937 : +2023-10-27 18:41:36,173 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 169.591 +/- 33.229 +2023-10-27 18:41:36,174 : -------------------------------------- | loss/ | | -| approx_kl | 0.0265 | -| entropy_loss | -5.53 | -| policy_loss | -0.00435 | -| value_loss | 7.56 | +| approx_kl | 0.0216 | +| entropy_loss | -6.14 | +| policy_loss | -0.0219 | +| value_loss | 0.674 | | stat/ | | -| constraint_violation | 1.81e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 137 | -| ep_reward | 0.551 | +| constraint_violation | 1.78e+03 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 162 | +| ep_reward | 0.647 | | stat_eval/ | | | constraint_violation | 1.2 | | ep_length | 250 | -| ep_return | 171 | -| ep_reward | 0.685 | -| mse | 237 | +| ep_return | 170 | +| ep_reward | 0.678 | +| mse | 238 | | time/ | | | progress | 0.71 | | step | 7.1e+05 | -| step_time | 9.16 | +| step_time | 8.43 | -------------------------------------- -2023-10-19 17:16:41,773 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 149.071 +/- 54.636 -2023-10-19 17:16:41,774 : +2023-10-27 18:43:19,210 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 152.300 +/- 57.665 +2023-10-27 18:43:19,211 : -------------------------------------- | loss/ | | -| approx_kl | 0.00762 | -| entropy_loss | -5.5 | -| policy_loss | -0.016 | -| value_loss | 0.537 | +| approx_kl | 0.0249 | +| entropy_loss | -6.22 | +| policy_loss | -0.0123 | +| value_loss | 0.457 | | stat/ | | -| constraint_violation | 1.86e+03 | +| constraint_violation | 1.83e+03 | | ep_constraint_vio... | 1.4 | | ep_length | 250 | -| ep_return | 156 | -| ep_reward | 0.624 | +| ep_return | 149 | +| ep_reward | 0.595 | | stat_eval/ | | -| constraint_violation | 1.4 | +| constraint_violation | 1.5 | | ep_length | 226 | -| ep_return | 149 | -| ep_reward | 0.596 | -| mse | 199 | +| ep_return | 152 | +| ep_reward | 0.609 | +| mse | 193 | | time/ | | | progress | 0.72 | | step | 7.2e+05 | -| step_time | 9.18 | +| step_time | 8.49 | -------------------------------------- -2023-10-19 17:18:31,381 : Eval | ep_lengths 226.70 +/- 69.90 | ep_return 160.623 +/- 58.979 -2023-10-19 17:18:31,382 : +2023-10-27 18:45:02,584 : Eval | ep_lengths 226.70 +/- 69.90 | ep_return 161.452 +/- 59.951 +2023-10-27 18:45:02,585 : -------------------------------------- | loss/ | | -| approx_kl | 0.0228 | -| entropy_loss | -5.44 | -| policy_loss | -0.00739 | -| value_loss | 0.759 | +| approx_kl | 0.024 | +| entropy_loss | -6.21 | +| policy_loss | -0.0104 | +| value_loss | 0.474 | | stat/ | | -| constraint_violation | 1.87e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 135 | -| ep_reward | 0.539 | +| constraint_violation | 1.84e+03 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 173 | +| ep_reward | 0.691 | | stat_eval/ | | -| constraint_violation | 0.1 | +| constraint_violation | 0.2 | | ep_length | 227 | | ep_return | 161 | -| ep_reward | 0.643 | -| mse | 158 | +| ep_reward | 0.646 | +| mse | 157 | | time/ | | | progress | 0.73 | | step | 7.3e+05 | -| step_time | 9.21 | +| step_time | 8.55 | -------------------------------------- -2023-10-19 17:20:20,959 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.695 +/- 23.310 -2023-10-19 17:20:20,960 : +2023-10-27 18:46:47,756 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.224 +/- 20.711 +2023-10-27 18:46:47,757 : -------------------------------------- | loss/ | | -| approx_kl | 0.0492 | -| entropy_loss | -5.48 | -| policy_loss | 0.00136 | -| value_loss | 1.8 | +| approx_kl | 0.0224 | +| entropy_loss | -6.33 | +| policy_loss | -0.00783 | +| value_loss | 0.634 | | stat/ | | -| constraint_violation | 1.9e+03 | -| ep_constraint_vio... | 1.8 | +| constraint_violation | 1.87e+03 | +| ep_constraint_vio... | 0.2 | | ep_length | 225 | -| ep_return | 157 | -| ep_reward | 0.63 | +| ep_return | 138 | +| ep_reward | 0.553 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 155 | -| ep_reward | 0.619 | -| mse | 251 | +| ep_return | 152 | +| ep_reward | 0.609 | +| mse | 252 | | time/ | | | progress | 0.74 | | step | 7.4e+05 | -| step_time | 8.91 | +| step_time | 8.48 | -------------------------------------- -2023-10-19 17:22:08,215 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 143.485 +/- 52.369 -2023-10-19 17:22:08,216 : +2023-10-27 18:48:30,868 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 141.616 +/- 50.329 +2023-10-27 18:48:30,869 : -------------------------------------- | loss/ | | -| approx_kl | 0.0314 | -| entropy_loss | -5.52 | -| policy_loss | -0.00453 | -| value_loss | 1.12 | +| approx_kl | 0.0243 | +| entropy_loss | -6.46 | +| policy_loss | -0.0179 | +| value_loss | 0.304 | | stat/ | | -| constraint_violation | 1.94e+03 | -| ep_constraint_vio... | 1.2 | -| ep_length | 202 | -| ep_return | 131 | -| ep_reward | 0.525 | +| constraint_violation | 1.9e+03 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 155 | +| ep_reward | 0.618 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 143 | -| ep_reward | 0.58 | -| mse | 226 | +| ep_return | 142 | +| ep_reward | 0.573 | +| mse | 229 | | time/ | | | progress | 0.75 | | step | 7.5e+05 | -| step_time | 8.92 | +| step_time | 8.47 | -------------------------------------- -2023-10-19 17:24:01,050 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 145.850 +/- 57.612 -2023-10-19 17:24:01,052 : +2023-10-27 18:50:13,980 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 144.417 +/- 55.684 +2023-10-27 18:50:13,982 : -------------------------------------- | loss/ | | -| approx_kl | 0.0256 | -| entropy_loss | -5.59 | -| policy_loss | -0.00991 | -| value_loss | 0.916 | +| approx_kl | 0.0193 | +| entropy_loss | -6.58 | +| policy_loss | -0.00679 | +| value_loss | 0.532 | | stat/ | | -| constraint_violation | 1.96e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 126 | -| ep_reward | 0.509 | +| constraint_violation | 1.92e+03 | +| ep_constraint_vio... | 0.1 | +| ep_length | 226 | +| ep_return | 135 | +| ep_reward | 0.539 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 146 | -| ep_reward | 0.583 | -| mse | 239 | +| ep_return | 144 | +| ep_reward | 0.578 | +| mse | 237 | | time/ | | | progress | 0.76 | | step | 7.6e+05 | -| step_time | 9.35 | +| step_time | 8.57 | -------------------------------------- -2023-10-19 17:26:02,165 : Eval | ep_lengths 201.30 +/- 97.41 | ep_return 135.535 +/- 68.700 -2023-10-19 17:26:02,166 : +2023-10-27 18:51:55,246 : Eval | ep_lengths 201.30 +/- 97.41 | ep_return 133.516 +/- 67.480 +2023-10-27 18:51:55,247 : -------------------------------------- | loss/ | | -| approx_kl | 0.0235 | -| entropy_loss | -5.61 | -| policy_loss | -0.00956 | -| value_loss | 0.763 | +| approx_kl | 0.0143 | +| entropy_loss | -6.64 | +| policy_loss | -0.0281 | +| value_loss | 0.702 | | stat/ | | | constraint_violation | 1.99e+03 | -| ep_constraint_vio... | 1.9 | -| ep_length | 202 | -| ep_return | 127 | -| ep_reward | 0.51 | +| ep_constraint_vio... | 2.2 | +| ep_length | 227 | +| ep_return | 147 | +| ep_reward | 0.59 | | stat_eval/ | | -| constraint_violation | 0.3 | +| constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 136 | -| ep_reward | 0.546 | -| mse | 135 | +| ep_return | 134 | +| ep_reward | 0.538 | +| mse | 142 | | time/ | | | progress | 0.77 | | step | 7.7e+05 | -| step_time | 10.6 | +| step_time | 8.52 | -------------------------------------- -2023-10-19 17:28:05,601 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 157.499 +/- 59.679 -2023-10-19 17:28:05,602 : +2023-10-27 18:53:37,803 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 156.480 +/- 58.889 +2023-10-27 18:53:37,804 : -------------------------------------- | loss/ | | -| approx_kl | 0.0218 | -| entropy_loss | -5.66 | -| policy_loss | -0.00495 | -| value_loss | 0.401 | +| approx_kl | 0.0269 | +| entropy_loss | -6.66 | +| policy_loss | -0.00872 | +| value_loss | 1.03 | | stat/ | | -| constraint_violation | 2.02e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 156 | -| ep_reward | 0.624 | +| constraint_violation | 1.99e+03 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 168 | +| ep_reward | 0.67 | | stat_eval/ | | | constraint_violation | 1.5 | | ep_length | 225 | -| ep_return | 157 | -| ep_reward | 0.638 | -| mse | 198 | +| ep_return | 156 | +| ep_reward | 0.634 | +| mse | 197 | | time/ | | | progress | 0.78 | | step | 7.8e+05 | -| step_time | 10.3 | +| step_time | 8.43 | -------------------------------------- -2023-10-19 17:30:07,065 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 144.182 +/- 52.222 -2023-10-19 17:30:07,066 : +2023-10-27 18:55:20,672 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 145.280 +/- 50.161 +2023-10-27 18:55:20,673 : -------------------------------------- | loss/ | | -| approx_kl | 0.0233 | -| entropy_loss | -5.72 | -| policy_loss | -0.0135 | -| value_loss | 0.971 | +| approx_kl | 0.0248 | +| entropy_loss | -6.68 | +| policy_loss | -0.0119 | +| value_loss | 1.23 | | stat/ | | -| constraint_violation | 2.04e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 155 | -| ep_reward | 0.621 | +| constraint_violation | 2.02e+03 | +| ep_constraint_vio... | 0.2 | +| ep_length | 200 | +| ep_return | 129 | +| ep_reward | 0.519 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 144 | -| ep_reward | 0.58 | -| mse | 201 | +| ep_return | 145 | +| ep_reward | 0.584 | +| mse | 198 | | time/ | | | progress | 0.79 | | step | 7.9e+05 | -| step_time | 10.1 | +| step_time | 8.53 | -------------------------------------- -2023-10-19 17:32:10,216 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.039 +/- 22.818 -2023-10-19 17:32:10,217 : +2023-10-27 18:57:05,213 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.406 +/- 19.938 +2023-10-27 18:57:05,215 : -------------------------------------- | loss/ | | -| approx_kl | 0.0247 | -| entropy_loss | -5.78 | -| policy_loss | -0.0201 | -| value_loss | 0.302 | +| approx_kl | 0.0244 | +| entropy_loss | -6.75 | +| policy_loss | -0.0186 | +| value_loss | 0.328 | | stat/ | | -| constraint_violation | 2.06e+03 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 174 | -| ep_reward | 0.695 | +| constraint_violation | 2.03e+03 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 129 | +| ep_reward | 0.517 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | | ep_return | 155 | -| ep_reward | 0.62 | -| mse | 294 | +| ep_reward | 0.622 | +| mse | 295 | | time/ | | | progress | 0.8 | | step | 8e+05 | -| step_time | 9.53 | +| step_time | 8.42 | -------------------------------------- -2023-10-19 17:34:12,007 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 172.039 +/- 22.278 -2023-10-19 17:34:12,008 : +2023-10-27 18:58:49,745 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 167.772 +/- 20.058 +2023-10-27 18:58:49,746 : -------------------------------------- | loss/ | | -| approx_kl | 0.0312 | -| entropy_loss | -5.91 | -| policy_loss | -0.00904 | -| value_loss | 0.282 | +| approx_kl | 0.0213 | +| entropy_loss | -6.81 | +| policy_loss | -0.0191 | +| value_loss | 1.99 | | stat/ | | -| constraint_violation | 2.06e+03 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 165 | -| ep_reward | 0.661 | +| constraint_violation | 2.04e+03 | +| ep_constraint_vio... | 0.3 | +| ep_length | 178 | +| ep_return | 112 | +| ep_reward | 0.463 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 172 | -| ep_reward | 0.688 | -| mse | 188 | +| ep_return | 168 | +| ep_reward | 0.671 | +| mse | 196 | | time/ | | | progress | 0.81 | | step | 8.1e+05 | -| step_time | 9.64 | +| step_time | 8.54 | -------------------------------------- -2023-10-19 17:36:10,659 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 184.320 +/- 20.274 -2023-10-19 17:36:10,669 : +2023-10-27 19:00:34,160 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 181.453 +/- 18.678 +2023-10-27 19:00:34,189 : -------------------------------------- | loss/ | | -| approx_kl | 0.0316 | -| entropy_loss | -5.95 | -| policy_loss | -0.00601 | -| value_loss | 1 | +| approx_kl | 0.0254 | +| entropy_loss | -6.99 | +| policy_loss | -0.00901 | +| value_loss | 0.986 | | stat/ | | -| constraint_violation | 2.1e+03 | -| ep_constraint_vio... | 2 | -| ep_length | 177 | -| ep_return | 127 | -| ep_reward | 0.515 | +| constraint_violation | 2.08e+03 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 146 | +| ep_reward | 0.583 | | stat_eval/ | | -| constraint_violation | 1 | +| constraint_violation | 0.7 | | ep_length | 250 | -| ep_return | 184 | -| ep_reward | 0.737 | -| mse | 136 | +| ep_return | 181 | +| ep_reward | 0.726 | +| mse | 140 | | time/ | | | progress | 0.82 | | step | 8.2e+05 | -| step_time | 9.84 | +| step_time | 8.43 | -------------------------------------- -2023-10-19 17:38:10,885 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 174.164 +/- 27.292 -2023-10-19 17:38:10,886 : +2023-10-27 19:02:18,233 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 171.537 +/- 24.174 +2023-10-27 19:02:18,234 : -------------------------------------- | loss/ | | -| approx_kl | 0.0233 | -| entropy_loss | -6 | -| policy_loss | -0.00593 | -| value_loss | 0.551 | +| approx_kl | 0.0225 | +| entropy_loss | -7.03 | +| policy_loss | -0.00384 | +| value_loss | 0.226 | | stat/ | | -| constraint_violation | 2.12e+03 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 176 | -| ep_reward | 0.703 | +| constraint_violation | 2.1e+03 | +| ep_constraint_vio... | 0.8 | +| ep_length | 225 | +| ep_return | 144 | +| ep_reward | 0.576 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 250 | -| ep_return | 174 | -| ep_reward | 0.697 | -| mse | 212 | +| ep_return | 172 | +| ep_reward | 0.686 | +| mse | 215 | | time/ | | | progress | 0.83 | | step | 8.3e+05 | -| step_time | 9.2 | +| step_time | 8.36 | -------------------------------------- -2023-10-19 17:40:10,689 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 170.772 +/- 30.526 -2023-10-19 17:40:10,690 : +2023-10-27 19:04:02,072 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 171.452 +/- 27.466 +2023-10-27 19:04:02,073 : -------------------------------------- | loss/ | | -| approx_kl | 0.0269 | -| entropy_loss | -6 | -| policy_loss | -0.00945 | -| value_loss | 0.34 | +| approx_kl | 0.0267 | +| entropy_loss | -7.01 | +| policy_loss | -0.00879 | +| value_loss | 0.561 | | stat/ | | -| constraint_violation | 2.15e+03 | -| ep_constraint_vio... | 0 | +| constraint_violation | 2.14e+03 | +| ep_constraint_vio... | 1.4 | | ep_length | 250 | | ep_return | 154 | -| ep_reward | 0.616 | +| ep_reward | 0.615 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | | ep_return | 171 | -| ep_reward | 0.683 | -| mse | 206 | +| ep_reward | 0.686 | +| mse | 200 | | time/ | | | progress | 0.84 | | step | 8.4e+05 | -| step_time | 9.44 | --------------------------------------- - -2023-10-19 17:42:01,087 : Eval | ep_lengths 151.60 +/- 120.52 | ep_return 101.836 +/- 84.947 -2023-10-19 17:42:01,088 : ---------------------------------------- -| loss/ | | -| approx_kl | 0.0239 | -| entropy_loss | -6.07 | -| policy_loss | -0.000712 | -| value_loss | 0.606 | -| stat/ | | -| constraint_violation | 2.2e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 127 | -| ep_reward | 0.512 | -| stat_eval/ | | -| constraint_violation | 0.4 | -| ep_length | 152 | -| ep_return | 102 | -| ep_reward | 0.416 | -| mse | 106 | -| time/ | | -| progress | 0.85 | -| step | 8.5e+05 | -| step_time | 9.65 | ---------------------------------------- - -2023-10-19 17:44:00,481 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.620 +/- 37.813 -2023-10-19 17:44:00,482 : +| step_time | 8.46 | +-------------------------------------- + +2023-10-27 19:05:38,560 : Eval | ep_lengths 151.60 +/- 120.52 | ep_return 99.406 +/- 82.493 +2023-10-27 19:05:38,561 : -------------------------------------- | loss/ | | -| approx_kl | 0.0225 | -| entropy_loss | -6.19 | -| policy_loss | -0.0068 | -| value_loss | 1.16 | +| approx_kl | 0.0342 | +| entropy_loss | -7.04 | +| policy_loss | -0.00584 | +| value_loss | 0.472 | | stat/ | | -| constraint_violation | 2.25e+03 | -| ep_constraint_vio... | 1.5 | -| ep_length | 227 | -| ep_return | 157 | -| ep_reward | 0.636 | +| constraint_violation | 2.17e+03 | +| ep_constraint_vio... | 1 | +| ep_length | 250 | +| ep_return | 170 | +| ep_reward | 0.682 | +| stat_eval/ | | +| constraint_violation | 0.4 | +| ep_length | 152 | +| ep_return | 99.4 | +| ep_reward | 0.406 | +| mse | 107 | +| time/ | | +| progress | 0.85 | +| step | 8.5e+05 | +| step_time | 8.47 | +-------------------------------------- + +2023-10-27 19:07:22,513 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.718 +/- 29.642 +2023-10-27 19:07:22,514 : +-------------------------------------- +| loss/ | | +| approx_kl | 0.0198 | +| entropy_loss | -7.04 | +| policy_loss | -0.0208 | +| value_loss | 0.248 | +| stat/ | | +| constraint_violation | 2.22e+03 | +| ep_constraint_vio... | 0.9 | +| ep_length | 225 | +| ep_return | 153 | +| ep_reward | 0.612 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 154 | -| ep_reward | 0.614 | -| mse | 346 | +| ep_return | 155 | +| ep_reward | 0.619 | +| mse | 339 | | time/ | | | progress | 0.86 | | step | 8.6e+05 | -| step_time | 9.72 | +| step_time | 8.38 | -------------------------------------- -2023-10-19 17:45:54,354 : Eval | ep_lengths 201.60 +/- 96.81 | ep_return 135.265 +/- 74.012 -2023-10-19 17:45:54,356 : +2023-10-27 19:09:03,180 : Eval | ep_lengths 201.70 +/- 96.61 | ep_return 134.088 +/- 70.400 +2023-10-27 19:09:03,181 : -------------------------------------- | loss/ | | -| approx_kl | 0.0248 | -| entropy_loss | -6.22 | -| policy_loss | -0.00931 | -| value_loss | 0.271 | +| approx_kl | 0.0155 | +| entropy_loss | -7.09 | +| policy_loss | -0.0154 | +| value_loss | 0.504 | | stat/ | | -| constraint_violation | 2.27e+03 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 162 | -| ep_reward | 0.648 | +| constraint_violation | 2.24e+03 | +| ep_constraint_vio... | 0.1 | +| ep_length | 226 | +| ep_return | 149 | +| ep_reward | 0.597 | | stat_eval/ | | | constraint_violation | 1.3 | | ep_length | 202 | -| ep_return | 135 | -| ep_reward | 0.542 | -| mse | 210 | +| ep_return | 134 | +| ep_reward | 0.537 | +| mse | 209 | | time/ | | | progress | 0.87 | | step | 8.7e+05 | -| step_time | 9.39 | +| step_time | 8.44 | -------------------------------------- -2023-10-19 17:47:49,344 : Eval | ep_lengths 201.60 +/- 96.82 | ep_return 135.673 +/- 72.988 -2023-10-19 17:47:49,345 : +2023-10-27 19:10:44,140 : Eval | ep_lengths 201.50 +/- 97.02 | ep_return 134.041 +/- 71.218 +2023-10-27 19:10:44,141 : -------------------------------------- | loss/ | | -| approx_kl | 0.0202 | -| entropy_loss | -6.28 | -| policy_loss | -0.0112 | -| value_loss | 0.727 | +| approx_kl | 0.0137 | +| entropy_loss | -7.13 | +| policy_loss | -0.0181 | +| value_loss | 0.362 | | stat/ | | -| constraint_violation | 2.27e+03 | -| ep_constraint_vio... | 0.1 | +| constraint_violation | 2.25e+03 | +| ep_constraint_vio... | 1.3 | | ep_length | 226 | -| ep_return | 144 | -| ep_reward | 0.576 | +| ep_return | 140 | +| ep_reward | 0.56 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | -| ep_return | 136 | -| ep_reward | 0.544 | -| mse | 194 | +| ep_return | 134 | +| ep_reward | 0.538 | +| mse | 200 | | time/ | | | progress | 0.88 | | step | 8.8e+05 | -| step_time | 9.39 | +| step_time | 8.43 | -------------------------------------- -2023-10-19 17:49:46,150 : Eval | ep_lengths 226.10 +/- 71.70 | ep_return 156.793 +/- 61.153 -2023-10-19 17:49:46,151 : +2023-10-27 19:12:27,523 : Eval | ep_lengths 226.10 +/- 71.70 | ep_return 151.467 +/- 57.260 +2023-10-27 19:12:27,524 : -------------------------------------- | loss/ | | | approx_kl | 0.0266 | -| entropy_loss | -6.31 | -| policy_loss | -0.0105 | -| value_loss | 0.573 | +| entropy_loss | -7.21 | +| policy_loss | -0.00706 | +| value_loss | 1.8 | | stat/ | | -| constraint_violation | 2.29e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 154 | -| ep_reward | 0.615 | +| constraint_violation | 2.27e+03 | +| ep_constraint_vio... | 1.3 | +| ep_length | 202 | +| ep_return | 115 | +| ep_reward | 0.464 | | stat_eval/ | | -| constraint_violation | 0.3 | +| constraint_violation | 0.2 | | ep_length | 226 | -| ep_return | 157 | -| ep_reward | 0.627 | -| mse | 199 | +| ep_return | 151 | +| ep_reward | 0.606 | +| mse | 207 | | time/ | | | progress | 0.89 | | step | 8.9e+05 | -| step_time | 9.79 | +| step_time | 8.54 | -------------------------------------- -2023-10-19 17:51:43,335 : Eval | ep_lengths 226.00 +/- 72.00 | ep_return 144.325 +/- 56.679 -2023-10-19 17:51:43,336 : +2023-10-27 19:14:08,683 : Eval | ep_lengths 226.00 +/- 72.00 | ep_return 144.406 +/- 53.693 +2023-10-27 19:14:08,684 : -------------------------------------- | loss/ | | -| approx_kl | 0.0167 | -| entropy_loss | -6.41 | -| policy_loss | -0.0156 | -| value_loss | 1.04 | +| approx_kl | 0.0287 | +| entropy_loss | -7.24 | +| policy_loss | -0.0182 | +| value_loss | 0.274 | | stat/ | | -| constraint_violation | 2.32e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 152 | -| ep_reward | 0.607 | +| constraint_violation | 2.29e+03 | +| ep_constraint_vio... | 0.6 | +| ep_length | 200 | +| ep_return | 127 | +| ep_reward | 0.509 | | stat_eval/ | | -| constraint_violation | 1.2 | +| constraint_violation | 1.4 | | ep_length | 226 | | ep_return | 144 | | ep_reward | 0.578 | -| mse | 233 | +| mse | 229 | | time/ | | | progress | 0.9 | | step | 9e+05 | -| step_time | 10.1 | +| step_time | 8.32 | -------------------------------------- -2023-10-19 17:53:40,207 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 149.978 +/- 56.280 -2023-10-19 17:53:40,209 : +2023-10-27 19:15:49,197 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 149.895 +/- 54.493 +2023-10-27 19:15:49,198 : -------------------------------------- | loss/ | | -| approx_kl | 0.0214 | -| entropy_loss | -6.41 | -| policy_loss | -0.00676 | -| value_loss | 0.349 | +| approx_kl | 0.0259 | +| entropy_loss | -7.35 | +| policy_loss | -0.0114 | +| value_loss | 0.793 | | stat/ | | -| constraint_violation | 2.35e+03 | -| ep_constraint_vio... | 0.1 | +| constraint_violation | 2.32e+03 | +| ep_constraint_vio... | 0.2 | | ep_length | 225 | -| ep_return | 155 | -| ep_reward | 0.621 | +| ep_return | 138 | +| ep_reward | 0.552 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 227 | | ep_return | 150 | | ep_reward | 0.601 | -| mse | 228 | +| mse | 221 | | time/ | | | progress | 0.91 | | step | 9.1e+05 | -| step_time | 10.1 | +| step_time | 8.15 | -------------------------------------- -2023-10-19 17:55:37,656 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 173.174 +/- 62.142 -2023-10-19 17:55:37,657 : +2023-10-27 19:17:30,177 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 165.364 +/- 58.577 +2023-10-27 19:17:30,178 : -------------------------------------- | loss/ | | -| approx_kl | 0.0266 | -| entropy_loss | -6.52 | -| policy_loss | -0.0107 | -| value_loss | 1.19 | +| approx_kl | 0.0224 | +| entropy_loss | -7.44 | +| policy_loss | -0.0193 | +| value_loss | 0.513 | | stat/ | | -| constraint_violation | 2.36e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 123 | -| ep_reward | 0.495 | +| constraint_violation | 2.34e+03 | +| ep_constraint_vio... | 0.9 | +| ep_length | 225 | +| ep_return | 151 | +| ep_reward | 0.604 | | stat_eval/ | | | constraint_violation | 1.9 | | ep_length | 225 | -| ep_return | 173 | -| ep_reward | 0.693 | -| mse | 101 | +| ep_return | 165 | +| ep_reward | 0.662 | +| mse | 110 | | time/ | | | progress | 0.92 | | step | 9.2e+05 | -| step_time | 10.2 | +| step_time | 8.24 | -------------------------------------- -2023-10-19 17:57:36,439 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 174.619 +/- 21.200 -2023-10-19 17:57:36,440 : +2023-10-27 19:19:12,617 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 167.785 +/- 21.314 +2023-10-27 19:19:12,619 : -------------------------------------- | loss/ | | -| approx_kl | 0.0316 | -| entropy_loss | -6.46 | -| policy_loss | -0.0117 | -| value_loss | 0.285 | +| approx_kl | 0.0325 | +| entropy_loss | -7.43 | +| policy_loss | -0.00514 | +| value_loss | 0.523 | | stat/ | | -| constraint_violation | 2.39e+03 | -| ep_constraint_vio... | 1.3 | +| constraint_violation | 2.37e+03 | +| ep_constraint_vio... | 1.2 | | ep_length | 250 | -| ep_return | 169 | -| ep_reward | 0.677 | +| ep_return | 164 | +| ep_reward | 0.655 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 175 | -| ep_reward | 0.698 | -| mse | 164 | +| ep_return | 168 | +| ep_reward | 0.671 | +| mse | 174 | | time/ | | | progress | 0.93 | | step | 9.3e+05 | -| step_time | 9.13 | +| step_time | 8.22 | -------------------------------------- -2023-10-19 17:59:36,713 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 160.823 +/- 27.986 -2023-10-19 17:59:36,714 : +2023-10-27 19:20:55,746 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.577 +/- 28.343 +2023-10-27 19:20:55,747 : -------------------------------------- | loss/ | | -| approx_kl | 0.0226 | -| entropy_loss | -6.42 | -| policy_loss | -0.00639 | -| value_loss | 0.489 | +| approx_kl | 0.0243 | +| entropy_loss | -7.58 | +| policy_loss | -0.0215 | +| value_loss | 0.421 | | stat/ | | -| constraint_violation | 2.4e+03 | +| constraint_violation | 2.37e+03 | | ep_constraint_vio... | 0.1 | | ep_length | 226 | -| ep_return | 156 | -| ep_reward | 0.626 | +| ep_return | 153 | +| ep_reward | 0.611 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 161 | -| ep_reward | 0.643 | -| mse | 247 | +| ep_return | 155 | +| ep_reward | 0.618 | +| mse | 255 | | time/ | | | progress | 0.94 | | step | 9.4e+05 | -| step_time | 10.2 | +| step_time | 8.42 | -------------------------------------- -2023-10-19 18:01:32,649 : Eval | ep_lengths 227.50 +/- 67.50 | ep_return 144.078 +/- 55.001 -2023-10-19 18:01:32,650 : +2023-10-27 19:22:36,769 : Eval | ep_lengths 227.30 +/- 68.10 | ep_return 139.488 +/- 52.309 +2023-10-27 19:22:36,770 : -------------------------------------- | loss/ | | -| approx_kl | 0.0257 | -| entropy_loss | -6.48 | -| policy_loss | -0.00702 | -| value_loss | 1.53 | +| approx_kl | 0.0262 | +| entropy_loss | -7.63 | +| policy_loss | -0.00642 | +| value_loss | 0.539 | | stat/ | | -| constraint_violation | 2.43e+03 | +| constraint_violation | 2.4e+03 | | ep_constraint_vio... | 0.8 | -| ep_length | 201 | -| ep_return | 131 | -| ep_reward | 0.525 | +| ep_length | 250 | +| ep_return | 169 | +| ep_reward | 0.674 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 228 | -| ep_return | 144 | -| ep_reward | 0.577 | -| mse | 269 | +| ep_length | 227 | +| ep_return | 139 | +| ep_reward | 0.558 | +| mse | 273 | | time/ | | | progress | 0.95 | | step | 9.5e+05 | -| step_time | 10.2 | +| step_time | 8.27 | -------------------------------------- -2023-10-19 18:03:29,738 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 169.137 +/- 25.285 -2023-10-19 18:03:29,739 : +2023-10-27 19:24:20,434 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 167.216 +/- 24.609 +2023-10-27 19:24:20,435 : -------------------------------------- | loss/ | | -| approx_kl | 0.0179 | -| entropy_loss | -6.52 | -| policy_loss | -0.00885 | -| value_loss | 0.372 | +| approx_kl | 0.013 | +| entropy_loss | -7.7 | +| policy_loss | -0.0316 | +| value_loss | 0.335 | | stat/ | | -| constraint_violation | 2.46e+03 | -| ep_constraint_vio... | 1.1 | -| ep_length | 225 | -| ep_return | 143 | -| ep_reward | 0.572 | +| constraint_violation | 2.42e+03 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 176 | +| ep_reward | 0.703 | | stat_eval/ | | -| constraint_violation | 0.2 | +| constraint_violation | 0.6 | | ep_length | 250 | -| ep_return | 169 | -| ep_reward | 0.677 | -| mse | 200 | +| ep_return | 167 | +| ep_reward | 0.669 | +| mse | 206 | | time/ | | | progress | 0.96 | | step | 9.6e+05 | -| step_time | 9.85 | +| step_time | 8.37 | -------------------------------------- -2023-10-19 18:05:22,055 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 140.853 +/- 50.921 -2023-10-19 18:05:22,056 : +2023-10-27 19:26:01,933 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 143.011 +/- 51.603 +2023-10-27 19:26:01,934 : -------------------------------------- | loss/ | | -| approx_kl | 0.0259 | -| entropy_loss | -6.6 | -| policy_loss | -0.0191 | -| value_loss | 0.218 | +| approx_kl | 0.0325 | +| entropy_loss | -7.66 | +| policy_loss | -0.0156 | +| value_loss | 0.483 | | stat/ | | -| constraint_violation | 2.47e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 159 | -| ep_reward | 0.642 | +| constraint_violation | 2.43e+03 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 171 | +| ep_reward | 0.685 | | stat_eval/ | | -| constraint_violation | 1.7 | +| constraint_violation | 1.8 | | ep_length | 225 | -| ep_return | 141 | -| ep_reward | 0.564 | -| mse | 252 | +| ep_return | 143 | +| ep_reward | 0.573 | +| mse | 247 | | time/ | | | progress | 0.97 | | step | 9.7e+05 | -| step_time | 9.96 | +| step_time | 8.38 | -------------------------------------- -2023-10-19 18:07:18,075 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 172.983 +/- 22.324 -2023-10-19 18:07:18,076 : +2023-10-27 19:27:43,734 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 174.682 +/- 24.061 +2023-10-27 19:27:43,735 : -------------------------------------- | loss/ | | -| approx_kl | 0.0262 | -| entropy_loss | -6.64 | -| policy_loss | -0.0131 | -| value_loss | 0.87 | +| approx_kl | 0.0163 | +| entropy_loss | -7.68 | +| policy_loss | -0.0209 | +| value_loss | 0.521 | | stat/ | | -| constraint_violation | 2.5e+03 | -| ep_constraint_vio... | 1.1 | -| ep_length | 227 | -| ep_return | 151 | -| ep_reward | 0.602 | +| constraint_violation | 2.46e+03 | +| ep_constraint_vio... | 0.1 | +| ep_length | 226 | +| ep_return | 158 | +| ep_reward | 0.633 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 173 | -| ep_reward | 0.692 | -| mse | 170 | +| ep_return | 175 | +| ep_reward | 0.699 | +| mse | 174 | | time/ | | | progress | 0.98 | | step | 9.8e+05 | -| step_time | 9.37 | +| step_time | 8.16 | -------------------------------------- -2023-10-19 18:09:09,720 : Eval | ep_lengths 226.80 +/- 69.60 | ep_return 165.267 +/- 56.622 -2023-10-19 18:09:09,721 : +2023-10-27 19:29:23,926 : Eval | ep_lengths 226.80 +/- 69.60 | ep_return 170.724 +/- 58.728 +2023-10-27 19:29:23,927 : -------------------------------------- | loss/ | | -| approx_kl | 0.0188 | -| entropy_loss | -6.66 | -| policy_loss | -0.0171 | -| value_loss | 0.522 | +| approx_kl | 0.0232 | +| entropy_loss | -7.66 | +| policy_loss | -0.0131 | +| value_loss | 0.451 | | stat/ | | -| constraint_violation | 2.51e+03 | -| ep_constraint_vio... | 0.6 | +| constraint_violation | 2.47e+03 | +| ep_constraint_vio... | 0.1 | | ep_length | 226 | -| ep_return | 153 | -| ep_reward | 0.611 | +| ep_return | 157 | +| ep_reward | 0.629 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 227 | -| ep_return | 165 | -| ep_reward | 0.669 | -| mse | 104 | +| ep_return | 171 | +| ep_reward | 0.691 | +| mse | 98.1 | | time/ | | | progress | 0.99 | | step | 9.9e+05 | -| step_time | 9.35 | +| step_time | 8.3 | -------------------------------------- -2023-10-19 18:10:43,112 : Checkpoint | ./models/rl_models/quadrotor_3D/track/ppo/mpsf/model_latest.pt -2023-10-19 18:11:05,604 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 156.015 +/- 34.030 -2023-10-19 18:11:05,605 : +2023-10-27 19:30:46,603 : Checkpoint | ./models/rl_models/quadrotor_3D/track/ppo/mpsf/model_latest.pt +2023-10-27 19:31:06,103 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.741 +/- 35.404 +2023-10-27 19:31:06,104 : -------------------------------------- | loss/ | | -| approx_kl | 0.0222 | -| entropy_loss | -6.7 | -| policy_loss | -0.0116 | -| value_loss | 0.27 | +| approx_kl | 0.0237 | +| entropy_loss | -7.59 | +| policy_loss | -0.0139 | +| value_loss | 1.86 | | stat/ | | -| constraint_violation | 2.55e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 157 | -| ep_reward | 0.63 | +| constraint_violation | 2.52e+03 | +| ep_constraint_vio... | 0.9 | +| ep_length | 227 | +| ep_return | 141 | +| ep_reward | 0.565 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | | ep_return | 156 | -| ep_reward | 0.624 | -| mse | 344 | +| ep_reward | 0.623 | +| mse | 338 | | time/ | | | progress | 1 | | step | 1e+06 | -| step_time | 8.32 | +| step_time | 8.28 | -------------------------------------- diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/loss/approx_kl.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/loss/approx_kl.log index faca0b1ef..719f7525a 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/loss/approx_kl.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/loss/approx_kl.log @@ -1,101 +1,101 @@ step,loss/approx_kl -10000,0.021126577684966227 -20000,0.017644851817749444 -30000,0.012675225858887038 -40000,0.010706881030152242 -50000,0.018675037574333447 -60000,0.017848487927888835 -70000,0.0197184768350174 -80000,0.019011311209760608 -90000,0.012039610456364851 -100000,0.021107027144171297 -110000,0.016650329010250663 -120000,0.01927302705589682 -130000,0.016021958955874042 -140000,0.018265193157518903 -150000,0.025127072446048265 -160000,0.01713958237475405 -170000,0.023242854202787083 -180000,0.028378791144738597 -190000,0.02072594325679044 -200000,0.022870912387346228 -210000,0.03130756144722303 -220000,0.014104977110400799 -230000,0.022540866831938428 -240000,0.024565973489855727 -250000,0.01964573375880718 -260000,0.0228823878026257 -270000,0.010542182127634685 -280000,0.02439388505493601 -290000,0.025107879036416612 -300000,0.01818708176724613 -310000,0.01513851325338085 -320000,0.017085525502140323 -330000,0.01685718574250738 -340000,0.023359127280612783 -350000,0.028345729488258566 -360000,0.01749886601852874 -370000,0.010340712964534761 -380000,0.01861922095219294 -390000,0.03130213829378287 -400000,0.024221228295937182 -410000,0.03119014869444072 -420000,0.02792586605064571 -430000,0.041491690340141454 -440000,0.03176048025488853 -450000,0.024024228317042193 -460000,0.019580421410501002 -470000,0.02672595477973421 -480000,0.014215648717557391 -490000,0.020906206841270134 -500000,0.019368692394345997 -510000,0.027097441845883925 -520000,0.02398263953315715 -530000,0.021601718664169312 -540000,0.028408399891729157 -550000,0.02188740459581216 -560000,0.03041009142373998 -570000,0.01541854793516298 -580000,0.02752657916086415 -590000,0.025313524063676596 -600000,0.02546896881734332 -610000,0.029547313290337718 -620000,0.026911399870490033 -630000,0.020672668625290196 -640000,0.0254563333466649 -650000,0.025662605836987495 -660000,0.019850210814426346 -670000,0.025153688962260883 -680000,0.018516193920125563 -690000,0.037099217918391034 -700000,0.03021468824396531 -710000,0.031126793629179396 -720000,0.014356723955521983 -730000,0.031153097531447804 -740000,0.024710145251204567 -750000,0.03340305257588626 -760000,0.030452593167622888 -770000,0.018998699480046826 -780000,0.012788033578544855 -790000,0.01416510691245397 -800000,0.030763711345692474 -810000,0.025341046104828513 -820000,0.0234948740961651 -830000,0.014271462863932053 -840000,0.018362331421424943 -850000,0.02579103270545602 -860000,0.018417970898250736 -870000,0.011669936372588077 -880000,0.016275342336545387 -890000,0.024729946504036586 -900000,0.0276499609152476 -910000,0.025626790927102166 -920000,0.020558532762030762 -930000,0.025471023097634316 -940000,0.025820466348280512 -950000,0.032154689108332 -960000,0.019024734074870743 -970000,0.034246889781206845 -980000,0.017052652469525732 -990000,0.016991907047728697 -1000000,0.02198956630503138 +10000,0.029863175516948108 +20000,0.01583143201035758 +30000,0.018110800394788386 +40000,0.028889421730612718 +50000,0.014719256882866228 +60000,0.023236529792969425 +70000,0.02248983896958331 +80000,0.01745137305309375 +90000,0.019221076431373753 +100000,0.01058004267203311 +110000,0.02906361669301987 +120000,0.024189919543763 +130000,0.028483634069561958 +140000,0.018095251576354106 +150000,0.02918023293217023 +160000,0.011271631019189954 +170000,0.02110663079656661 +180000,0.026708625443279743 +190000,0.02909677966187398 +200000,0.022478982744117575 +210000,0.019247073555986087 +220000,0.019816831250985465 +230000,0.008338527195155621 +240000,0.03248660567527016 +250000,0.023507432794819275 +260000,0.016566366422921416 +270000,0.011854500866805515 +280000,0.023202411985645693 +290000,0.02337160947111746 +300000,0.013420750293880701 +310000,0.024340336211025715 +320000,0.021132037385056417 +330000,0.024545510672032835 +340000,0.02946794681871931 +350000,0.01809466117992997 +360000,0.03501281168622275 +370000,0.02551541620244583 +380000,0.01876240338509281 +390000,0.02320357862239083 +400000,0.019534078193828464 +410000,0.02873670039698482 +420000,0.03159761720647415 +430000,0.03367063732196887 +440000,0.021781919927646713 +450000,0.021991660725325345 +460000,0.02038949710937838 +470000,0.0244272211411347 +480000,0.024035206902772194 +490000,0.022345224209129814 +500000,0.03292131923759977 +510000,0.025124495926623542 +520000,0.036418066329012316 +530000,0.03056174715360006 +540000,0.028867826346928877 +550000,0.025908987239624065 +560000,0.029495946519697707 +570000,0.025149104806284107 +580000,0.0150318866285185 +590000,0.023973228828981515 +600000,0.02084536040201783 +610000,0.036328385739276804 +620000,0.030739373806864027 +630000,0.028164754000802837 +640000,0.013802068152775368 +650000,0.023630880502363043 +660000,0.019340334522227446 +670000,0.029424931046863396 +680000,0.026138659411420427 +690000,0.024852128916730484 +700000,0.03762132975583275 +710000,0.03324688030406833 +720000,0.02238186175624529 +730000,0.02062430555621783 +740000,0.0250671058272322 +750000,0.02017987659201026 +760000,0.029477630338321126 +770000,0.0175890915406247 +780000,0.003065002833803495 +790000,0.022095450138052305 +800000,0.028554636240005492 +810000,0.023057207403083643 +820000,0.0319789191087087 +830000,0.01746692614008983 +840000,0.024939591872195403 +850000,0.015791176414738102 +860000,0.03321144903699557 +870000,0.025508443483461936 +880000,0.02884951177984476 +890000,0.025013840726266307 +900000,0.027775529958307742 +910000,0.01843212026481827 +920000,0.02151405407736699 +930000,0.017705400381237267 +940000,0.024305684181551136 +950000,0.029185671390344704 +960000,0.026862960432966543 +970000,0.01858222301428517 +980000,0.02546307062730193 +990000,0.01932518696412444 +1000000,0.029406983777880667 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/loss/entropy_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/loss/entropy_loss.log index ffda5d5ce..d2ea85eff 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/loss/entropy_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/loss/entropy_loss.log @@ -1,101 +1,101 @@ step,loss/entropy_loss -10000,-3.7439846436182656 -20000,-3.7555615623792007 -30000,-3.835310590267182 -40000,-3.88610620101293 -50000,-3.880940560499827 -60000,-3.8419632236162817 -70000,-3.8135478138923644 -80000,-3.937078595161438 -90000,-3.935103046894074 -100000,-3.9561249732971193 -110000,-3.928658219178517 -120000,-3.8972739259401954 -130000,-3.977044757207235 -140000,-4.011685593922933 -150000,-3.9696713765462235 -160000,-3.954807273546855 -170000,-4.035748608907063 -180000,-4.070866330464681 -190000,-4.064112210273742 -200000,-4.062342302004497 -210000,-4.1746816237767534 -220000,-4.214873226483663 -230000,-4.2033896764119465 -240000,-4.238874983787537 -250000,-4.392209760348001 -260000,-4.473275502522786 -270000,-4.582501045862833 -280000,-4.649739058812459 -290000,-4.633986838658651 -300000,-4.728703093528749 -310000,-4.729860091209412 -320000,-4.712502153714498 -330000,-4.75728501478831 -340000,-4.8574045419692995 -350000,-4.868029101689656 -360000,-4.924523131052654 -370000,-4.954329737027487 -380000,-4.958245952924093 -390000,-5.042308632532755 -400000,-5.032074157396952 -410000,-5.0492747068405155 -420000,-5.090390563011169 -430000,-5.155517737070719 -440000,-5.208713269233703 -450000,-5.302892732620238 -460000,-5.280759151776631 -470000,-5.305807709693909 -480000,-5.365936136245727 -490000,-5.295128599802653 -500000,-5.278785387674968 -510000,-5.3473347425460815 -520000,-5.333341646194457 -530000,-5.397724167505901 -540000,-5.4241393327713014 -550000,-5.516351779301962 -560000,-5.5616948684056595 -570000,-5.612595589955648 -580000,-5.646896338462829 -590000,-5.703376007080078 -600000,-5.716490602493287 -610000,-5.6911312739054365 -620000,-5.725796055793763 -630000,-5.809796770413717 -640000,-5.894406962394714 -650000,-5.9165442943572994 -660000,-6.00111022790273 -670000,-6.001385990778605 -680000,-5.968782138824461 -690000,-5.932756853103638 -700000,-5.9476848363876345 -710000,-5.971945325533549 -720000,-6.050468492507934 -730000,-6.042882871627808 -740000,-6.124083693822225 -750000,-6.196564896901448 -760000,-6.219127988815307 -770000,-6.240385699272155 -780000,-6.325978406270345 -790000,-6.4239177783330295 -800000,-6.43978533744812 -810000,-6.464726122220357 -820000,-6.435974367459616 -830000,-6.530791203180948 -840000,-6.639711570739746 -850000,-6.747877168655395 -860000,-6.719669334093728 -870000,-6.789119720458983 -880000,-6.803426949183146 -890000,-6.758233499526978 -900000,-6.773803281784057 -910000,-6.876751820246378 -920000,-6.983166631062827 -930000,-7.022795804341635 -940000,-7.034873604774475 -950000,-7.029666113853454 -960000,-7.111058799425761 -970000,-7.142039799690247 -980000,-7.120481689771017 -990000,-7.233380699157715 -1000000,-7.254465794563293 +10000,-3.7333647251129145 +20000,-3.910959339141846 +30000,-4.011837649345398 +40000,-3.972362939516704 +50000,-4.127776193618774 +60000,-4.26914415359497 +70000,-4.322549883524577 +80000,-4.273997545242309 +90000,-4.33320160706838 +100000,-4.447042552630107 +110000,-4.453826832771301 +120000,-4.51828806400299 +130000,-4.642024079958597 +140000,-4.777574173609416 +150000,-4.8476453940073645 +160000,-4.883921519915264 +170000,-4.9995689233144125 +180000,-5.020859432220459 +190000,-4.989502724011739 +200000,-4.997171068191529 +210000,-5.020249478022258 +220000,-5.04681183497111 +230000,-5.079893477757772 +240000,-5.161438767115275 +250000,-5.249421127637227 +260000,-5.235421633720398 +270000,-5.186394158999125 +280000,-5.219669802983602 +290000,-5.228623731931051 +300000,-5.2307358105977375 +310000,-5.38412667910258 +320000,-5.444392681121826 +330000,-5.487814609209695 +340000,-5.46624988714854 +350000,-5.50592257976532 +360000,-5.508975283304851 +370000,-5.553137238820393 +380000,-5.582453354199728 +390000,-5.603512938817342 +400000,-5.577901124954224 +410000,-5.603288865089416 +420000,-5.613901201883952 +430000,-5.618547217051188 +440000,-5.5736452738444004 +450000,-5.583340064684551 +460000,-5.6231076637903845 +470000,-5.6262451887130736 +480000,-5.6542920986811325 +490000,-5.677017172177632 +500000,-5.6414648532867435 +510000,-5.687996617952983 +520000,-5.7470456997553505 +530000,-5.74830694993337 +540000,-5.720073088010151 +550000,-5.80220116774241 +560000,-5.823719437917073 +570000,-5.853754456837972 +580000,-5.857461524009706 +590000,-5.826247946421305 +600000,-5.902747066815694 +610000,-5.917049058278401 +620000,-5.987109955151875 +630000,-6.074909321467082 +640000,-6.140358996391297 +650000,-6.13482784430186 +660000,-6.188188171386718 +670000,-6.238601009051005 +680000,-6.258844431241354 +690000,-6.259089096387227 +700000,-6.343152109781901 +710000,-6.41621758143107 +720000,-6.453715475400289 +730000,-6.513643733660379 +740000,-6.564627639452615 +750000,-6.686325534184773 +760000,-6.710102693239847 +770000,-6.74495833714803 +780000,-6.771806963284812 +790000,-6.818896238009136 +800000,-6.8021790981292725 +810000,-6.871288537979126 +820000,-6.923415493965149 +830000,-6.887011289596559 +840000,-6.9385428826014195 +850000,-6.998370504379271 +860000,-6.986448955535889 +870000,-7.092585039138792 +880000,-7.098632335662842 +890000,-7.052982179323832 +900000,-7.017825849850972 +910000,-6.983316659927368 +920000,-7.04489515622457 +930000,-7.07880833943685 +940000,-7.110083889961243 +950000,-7.208208862940471 +960000,-7.218657128016153 +970000,-7.206284109751384 +980000,-7.2038898944854735 +990000,-7.255405767758688 +1000000,-7.292151888211569 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/loss/policy_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/loss/policy_loss.log index 0900be434..cda269cd3 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/loss/policy_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/loss/policy_loss.log @@ -1,101 +1,101 @@ step,loss/policy_loss -10000,-0.015148067743980097 -20000,-0.010565941411092855 -30000,-0.01799791289107223 -40000,-0.012092239778595467 -50000,-0.014086563370087818 -60000,-0.014519797944843288 -70000,-0.026962285831410783 -80000,-0.0046730654271929135 -90000,-0.012069149248226755 -100000,-0.013449912396194271 -110000,-0.01323942151343203 -120000,0.000770715386442099 -130000,-0.010011288191993092 -140000,-0.0077005399694587 -150000,-0.017280283076223455 -160000,-0.006512581372963448 -170000,-0.013619215284899452 -180000,-0.009952210986492464 -190000,-0.010338659373981918 -200000,-0.020112818077411658 -210000,-0.0025741459549065795 -220000,-0.009722462456494137 -230000,-0.011167482783345456 -240000,-0.007821461277963427 -250000,-0.02036222036573795 -260000,-0.016729674508027526 -270000,-0.01118705002617371 -280000,-0.024030828791360107 -290000,-0.009567076470729718 -300000,-0.01564113019513303 -310000,-0.019210694599591512 -320000,-0.019567894096672 -330000,-0.009506641955434849 -340000,-0.014335815932501935 -350000,-0.016780135661901917 -360000,-0.0022274998094650457 -370000,-0.016491652240317258 -380000,-0.011301815308572046 -390000,-0.005417800365142363 -400000,-0.007870748691276646 -410000,-0.01616644482494652 -420000,-0.011016532798116615 -430000,-0.010628405450698892 -440000,-0.016727007274929456 -450000,-0.01645945078119253 -460000,-0.0010388844846423106 -470000,0.000287457497713042 -480000,-0.01601854692238367 -490000,-0.021736016844293694 -500000,-0.02176655725357179 -510000,-0.012011107896377557 -520000,-0.012374716895291068 -530000,-0.022186348575431555 -540000,-0.005499780381151708 -550000,-0.023407731255388857 -560000,-0.002686961586449884 -570000,-0.0132716525057054 -580000,-0.01144181642574525 -590000,-0.01236731470701855 -600000,-0.009629683311683509 -610000,-0.014844120962863338 -620000,-0.003693603374990547 -630000,-0.018511874259608988 -640000,-0.003914593216439155 -650000,-0.00949577894853665 -660000,-0.022731439776135003 -670000,-0.008722936041004067 -680000,-0.0076983738074226276 -690000,-0.007803950156729497 -700000,-0.011632377829865549 -710000,-0.010767075462587239 -720000,-0.006940346925127057 -730000,-0.01599517416368976 -740000,-0.014726532186659142 -750000,-0.0021940549379645844 -760000,-0.012662602089613725 -770000,-0.009721143220768598 -780000,-0.019758525003874882 -790000,-0.008763525582144403 -800000,-0.009116977914422928 -810000,-0.02002186826092881 -820000,-0.008292404297928654 -830000,-0.01356398202514525 -840000,-0.009489876474855241 -850000,-0.0021059037732946485 -860000,-0.02153356687178896 -870000,-0.015806855107984054 -880000,-0.017439680085678578 -890000,-0.016707356279202126 -900000,-0.01017909592287584 -910000,-0.014795302119758996 -920000,-0.01938891631232257 -930000,-0.010174823760090112 -940000,-0.014433615069308087 -950000,-0.01851575459770922 -960000,-0.013208465719034635 -970000,-0.024764373696166034 -980000,-0.008965826202234973 -990000,-0.01751646747151955 -1000000,-0.012450972429412767 +10000,-0.018119875674761275 +20000,-0.016789268886525398 +30000,-0.014302150562817011 +40000,-0.003965973641474498 +50000,-0.012237087897905966 +60000,-0.015897548778533145 +70000,-0.015431549473422254 +80000,-0.016673335733078495 +90000,-0.017473786890935164 +100000,-0.01328681965648865 +110000,-0.017283274265168694 +120000,-0.010043968445408334 +130000,-0.011548828539281682 +140000,-0.017412710278446143 +150000,-0.007352383425079062 +160000,-0.022959412548575565 +170000,-0.016341396213315117 +180000,-0.007416514793129224 +190000,-0.016293112244448286 +200000,-0.010544927896002457 +210000,-0.00969721854591027 +220000,-0.004939246352241861 +230000,-0.02674796371348771 +240000,-0.020867336713198802 +250000,-0.01791151146485091 +260000,-0.025797767007141433 +270000,-0.022354305395990277 +280000,-0.008816982400454709 +290000,-0.019033685243589414 +300000,-0.024190754029101112 +310000,-0.011575438961749288 +320000,-0.009589355245175698 +330000,-0.015079069091770952 +340000,-0.02341159338834908 +350000,-0.02119488948520809 +360000,-0.007658442008887316 +370000,-0.00327228140197268 +380000,-0.004372147582585717 +390000,-0.01477440942607346 +400000,-0.013227477641179316 +410000,-0.0020749315434308354 +420000,-0.019475228899432647 +430000,-0.006797872764667354 +440000,-0.021521756577543745 +450000,-0.016329199897433748 +460000,-0.004971050418550615 +470000,-0.013599792423082988 +480000,-0.011393103331768567 +490000,-0.003034383455946294 +500000,-0.006916433399715816 +510000,-0.01582242834713368 +520000,-0.01396457914174771 +530000,-0.011722471938348246 +540000,-0.02054313633272361 +550000,-0.0049776271320048195 +560000,-0.012186820356154598 +570000,-0.004943854593935726 +580000,-0.008516628009858508 +590000,-0.021847439255324473 +600000,-0.01405445790598717 +610000,-0.019303771610381625 +620000,0.0004926267776882383 +630000,-0.007623316270246883 +640000,-0.02264043831249301 +650000,-0.012421664396845857 +660000,-0.020955921526714404 +670000,-0.010030523461694196 +680000,-0.010325448166170583 +690000,-0.014158774828910342 +700000,-0.0019559131531654474 +710000,-0.011629511668311693 +720000,-0.011378346610051309 +730000,-0.022775762275749924 +740000,-0.012339416679261635 +750000,-0.010895406017578376 +760000,-0.019087201424702105 +770000,-0.012931644354562673 +780000,-0.02625448379727848 +790000,-0.012666717186471405 +800000,-0.004886239473196987 +810000,-0.016624571338060775 +820000,-0.022515611629632316 +830000,-0.014360166544175343 +840000,-0.02044897834163775 +850000,-0.015735888801097393 +860000,-0.00010285787872377755 +870000,-0.018363199599491446 +880000,-0.01637234872952159 +890000,-0.01613961711315128 +900000,-0.012752769178837036 +910000,-0.021477885117504574 +920000,-0.01691074816751994 +930000,-0.02375084883239518 +940000,-0.009037830267370532 +950000,-0.005022355398237946 +960000,-0.011096543135662832 +970000,-0.012488968566155879 +980000,-0.023245390407266642 +990000,-0.02279352235112162 +1000000,-0.016055868547553774 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/loss/value_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/loss/value_loss.log index 2923cfedc..c5217b9a3 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/loss/value_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/loss/value_loss.log @@ -1,101 +1,101 @@ step,loss/value_loss -10000,69.61669796405651 -20000,9.743458941939922 -30000,13.646692112483162 -40000,3.261265875067481 -50000,3.960716297846806 -60000,4.737179151971235 -70000,1.7533877331921441 -80000,1.5531250511859322 -90000,1.8371544669228008 -100000,1.5293594984149979 -110000,1.6321317178715595 -120000,2.0467776258098818 -130000,1.4239385065609595 -140000,0.5569751085243504 -150000,3.888537963224711 -160000,4.149852311906599 -170000,0.611433895954996 -180000,1.5705241383543367 -190000,0.731151022624797 -200000,2.120841781838693 -210000,0.805637327089765 -220000,1.157749622005246 -230000,0.9576645639320415 -240000,1.402137066187648 -250000,2.6789151257303283 -260000,1.3540271699312467 -270000,2.7556596994246543 -280000,3.409463261387061 -290000,2.052218967088376 -300000,1.2285431192150944 -310000,4.657127193317473 -320000,2.6522384178764007 -330000,1.8730845802948894 -340000,1.7953535532337992 -350000,0.654283501253843 -360000,0.9792409672452342 -370000,0.7866902308220682 -380000,1.3465518838352977 -390000,0.5991979359517388 -400000,0.5605877607942531 -410000,1.2559786565368254 -420000,0.9821805323880737 -430000,1.2620796701537071 -440000,0.8054358933337291 -450000,0.9087702799588362 -460000,1.5111548525175391 -470000,1.2569355899783727 -480000,0.9001740105621023 -490000,1.650962213172074 -500000,0.5520837571383171 -510000,0.7240093941487483 -520000,2.3846333968296496 -530000,0.925568432385796 -540000,2.712167178566072 -550000,0.39311260050522207 -560000,0.2151714721473852 -570000,1.9050490608936612 -580000,1.150740844405353 -590000,1.0828884212500112 -600000,0.5527649865071325 -610000,2.1805729992781346 -620000,0.5390471691435531 -630000,1.6848943995514774 -640000,2.3746657509613396 -650000,0.29792684378866824 -660000,1.8286404386205883 -670000,0.849173839237201 -680000,3.2083876012328574 -690000,5.093803849810024 -700000,3.4258476150396766 -710000,1.5012279002059474 -720000,0.24104391041037046 -730000,0.27478199537176 -740000,0.7664424609011373 -750000,1.48003072312826 -760000,3.2597562963367688 -770000,0.6991714445852262 -780000,2.376198883752172 -790000,3.1193635404793385 -800000,2.0500647299274446 -810000,0.4925945423478673 -820000,6.991873422657401 -830000,0.712579493316194 -840000,0.772368778848808 -850000,1.7540320442055914 -860000,0.957098560462231 -870000,0.541513916644232 -880000,0.9890155795906844 -890000,1.0420118571992196 -900000,6.151837934126123 -910000,2.5881459399386273 -920000,6.5528611544929305 -930000,0.771240889073821 -940000,2.415047606278663 -950000,0.7573949620422612 -960000,0.7626244311066234 -970000,0.43875582931963775 -980000,1.6654580154835248 -990000,4.041583574754592 -1000000,3.165210498417912 +10000,52.526456144422994 +20000,19.819517259361568 +30000,5.873996463166597 +40000,11.325605865206587 +50000,6.475228039124803 +60000,3.1267321229156733 +70000,1.4860961691989254 +80000,3.7693175595080084 +90000,1.7981937722330812 +100000,3.5692478157629024 +110000,2.118895420053426 +120000,1.610588930900081 +130000,2.7005914732978904 +140000,5.275956260905759 +150000,3.347050533566622 +160000,1.9031104725042762 +170000,0.5914325949502867 +180000,1.6782993223080378 +190000,1.1130100220167949 +200000,3.2252616259640696 +210000,2.5645219309351623 +220000,2.912615108144574 +230000,1.8158945314163488 +240000,1.6227914012714053 +250000,0.7877696269276 +260000,0.779313982786536 +270000,1.2031094177227653 +280000,1.582615089690038 +290000,0.6899302559502554 +300000,1.8220944327238662 +310000,4.043776781625828 +320000,1.9280347696329965 +330000,1.0079777652080124 +340000,3.2896779267689604 +350000,1.7437622803511943 +360000,0.9134751914299054 +370000,1.0985097426818866 +380000,0.8883382303152176 +390000,0.809163725013233 +400000,0.8448458347290225 +410000,1.1601723049657273 +420000,4.111140499846489 +430000,1.0063615908108114 +440000,0.4283191897550883 +450000,1.905984655867878 +460000,1.5626805269799966 +470000,1.8219670116847613 +480000,2.060027809786365 +490000,0.6595012519827492 +500000,0.6705640465507157 +510000,0.919721220193682 +520000,0.5848131697354215 +530000,0.9629841094756552 +540000,0.406764541235907 +550000,3.784439822668502 +560000,0.669355573595188 +570000,0.8953186697030766 +580000,1.956484598456602 +590000,0.9731751597115152 +600000,1.9450041606997726 +610000,1.265326954013942 +620000,0.6675225878435976 +630000,0.5592748329705594 +640000,2.308365856525379 +650000,0.7780102264970769 +660000,0.6854896353460458 +670000,2.884636445252777 +680000,0.8941446094416273 +690000,0.7895436716182487 +700000,4.305417199036922 +710000,2.6884683871624517 +720000,1.415814859104842 +730000,0.907057070761933 +740000,0.8862444433097911 +750000,0.6007886748248974 +760000,1.699232897727701 +770000,2.6602567189663753 +780000,1.1189319609777542 +790000,2.3657701776480513 +800000,7.26570047677035 +810000,2.207169518778057 +820000,0.767397904641321 +830000,4.496475306507098 +840000,0.36067030695008284 +850000,1.6133962868065592 +860000,1.510778556661301 +870000,0.7585111255175943 +880000,0.7694656014772022 +890000,0.7081391398706716 +900000,0.5093111167887614 +910000,2.729323682479147 +920000,3.4510018202065096 +930000,1.498879162290091 +940000,1.5150109372153524 +950000,11.012244179989182 +960000,1.4199284235807998 +970000,9.493041228615931 +980000,0.6006152021867939 +990000,1.0917232796807008 +1000000,2.365902629254411 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/constraint_violation.log index adceaf1d6..62c30f212 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/constraint_violation.log @@ -1,101 +1,101 @@ step,stat/constraint_violation -10000,9 -20000,16 -30000,32 -40000,39 +10000,10 +20000,19 +30000,33 +40000,41 50000,49 -60000,57 -70000,59 -80000,69 -90000,73 -100000,83 -110000,89 -120000,96 -130000,106 -140000,117 -150000,126 -160000,144 -170000,150 -180000,156 -190000,165 -200000,174 -210000,191 -220000,199 -230000,205 -240000,212 -250000,217 -260000,222 -270000,233 -280000,238 -290000,247 -300000,259 -310000,268 -320000,279 -330000,297 -340000,302 -350000,306 -360000,315 -370000,319 -380000,323 -390000,325 -400000,328 -410000,338 -420000,346 -430000,353 -440000,360 -450000,365 -460000,370 -470000,379 -480000,384 -490000,390 -500000,395 -510000,398 -520000,406 -530000,408 -540000,410 -550000,419 -560000,425 -570000,433 -580000,438 -590000,448 -600000,450 -610000,457 -620000,464 -630000,474 -640000,480 -650000,485 -660000,496 -670000,501 -680000,511 -690000,518 -700000,523 -710000,531 -720000,534 -730000,539 -740000,543 -750000,551 -760000,559 -770000,561 -780000,574 -790000,581 -800000,585 -810000,587 -820000,596 -830000,602 -840000,608 -850000,617 -860000,622 -870000,627 -880000,633 -890000,636 -900000,644 -910000,652 -920000,661 -930000,666 -940000,678 -950000,687 -960000,696 -970000,698 -980000,706 -990000,714 -1000000,719 +60000,55 +70000,58 +80000,67 +90000,71 +100000,80 +110000,85 +120000,91 +130000,98 +140000,109 +150000,117 +160000,131 +170000,137 +180000,144 +190000,149 +200000,160 +210000,168 +220000,175 +230000,180 +240000,182 +250000,187 +260000,193 +270000,196 +280000,206 +290000,212 +300000,219 +310000,228 +320000,233 +330000,243 +340000,260 +350000,264 +360000,267 +370000,276 +380000,280 +390000,285 +400000,287 +410000,291 +420000,303 +430000,309 +440000,316 +450000,321 +460000,326 +470000,331 +480000,340 +490000,345 +500000,351 +510000,355 +520000,358 +530000,366 +540000,368 +550000,370 +560000,379 +570000,384 +580000,391 +590000,398 +600000,407 +610000,410 +620000,418 +630000,425 +640000,435 +650000,439 +660000,445 +670000,456 +680000,461 +690000,469 +700000,477 +710000,482 +720000,489 +730000,493 +740000,498 +750000,502 +760000,511 +770000,519 +780000,521 +790000,534 +800000,542 +810000,546 +820000,548 +830000,557 +840000,562 +850000,568 +860000,577 +870000,582 +880000,588 +890000,594 +900000,598 +910000,602 +920000,609 +930000,616 +940000,624 +950000,633 +960000,643 +970000,651 +980000,654 +990000,662 +1000000,670 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/ep_constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/ep_constraint_violation.log index 634c01ccc..aec42bc24 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/ep_constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/ep_constraint_violation.log @@ -1,101 +1,101 @@ step,stat/ep_constraint_violation -10000,0.1 +10000,0.2 20000,0.3 -30000,0.2 +30000,0.1 40000,0.4 50000,0.2 60000,0.2 -70000,0.1 -80000,0.2 +70000,0.2 +80000,0.3 90000,0.0 100000,0.1 110000,0.1 120000,0.1 -130000,0.2 -140000,0.1 +130000,0.0 +140000,0.3 150000,0.2 -160000,0.1 +160000,0.2 170000,0.1 -180000,0.1 +180000,0.2 190000,0.1 -200000,0.0 -210000,0.3 -220000,0.2 -230000,0.1 -240000,0.3 +200000,0.1 +210000,0.1 +220000,0.1 +230000,0.2 +240000,0.0 250000,0.0 260000,0.1 -270000,0.4 -280000,0.3 -290000,0.2 -300000,0.0 +270000,0.1 +280000,0.0 +290000,0.0 +300000,0.2 310000,0.2 320000,0.2 330000,0.2 -340000,0.2 +340000,0.3 350000,0.1 360000,0.1 -370000,0.0 -380000,0.2 -390000,0.1 -400000,0.2 -410000,0.2 -420000,0.0 -430000,0.1 +370000,0.1 +380000,0.0 +390000,0.2 +400000,0.1 +410000,0.3 +420000,0.4 +430000,0.0 440000,0.1 -450000,0.0 -460000,0.3 -470000,0.0 -480000,0.1 +450000,0.1 +460000,0.0 +470000,0.3 +480000,0.0 490000,0.1 -500000,0.2 -510000,0.1 +500000,0.1 +510000,0.2 520000,0.1 -530000,0.0 -540000,0.2 -550000,0.1 -560000,0.0 -570000,0.2 -580000,0.0 -590000,0.4 -600000,0.0 -610000,0.2 -620000,0.0 -630000,0.2 +530000,0.1 +540000,0.0 +550000,0.2 +560000,0.1 +570000,0.0 +580000,0.2 +590000,0.0 +600000,0.4 +610000,0.0 +620000,0.2 +630000,0.0 640000,0.2 -650000,0.2 -660000,0.4 -670000,0.0 -680000,0.1 -690000,0.2 -700000,0.1 +650000,0.1 +660000,0.2 +670000,0.4 +680000,0.0 +690000,0.1 +700000,0.2 710000,0.1 -720000,0.0 +720000,0.1 730000,0.0 740000,0.0 -750000,0.1 -760000,0.2 -770000,0.0 -780000,0.2 +750000,0.0 +760000,0.1 +770000,0.2 +780000,0.0 790000,0.2 800000,0.3 -810000,0.0 -820000,0.2 -830000,0.1 -840000,0.2 -850000,0.1 -860000,0.0 -870000,0.1 +810000,0.3 +820000,0.0 +830000,0.2 +840000,0.0 +850000,0.2 +860000,0.1 +870000,0.0 880000,0.1 -890000,0.0 -900000,0.2 -910000,0.2 -920000,0.3 -930000,0.3 -940000,0.4 -950000,0.2 -960000,0.2 -970000,0.1 -980000,0.2 +890000,0.1 +900000,0.1 +910000,0.1 +920000,0.2 +930000,0.1 +940000,0.3 +950000,0.4 +960000,0.3 +970000,0.4 +980000,0.1 990000,0.2 -1000000,0.3 +1000000,0.2 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/ep_length.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/ep_length.log index cb794dd80..fa6e20cb9 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/ep_length.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/ep_length.log @@ -1,101 +1,101 @@ step,stat/ep_length -10000,225.9 -20000,176.4 -30000,200.4 -40000,150.9 -50000,200.3 +10000,201.0 +20000,175.8 +30000,225.3 +40000,150.8 +50000,200.2 60000,200.2 -70000,226.8 -80000,200.9 +70000,202.1 +80000,176.0 90000,250.0 100000,225.1 110000,225.1 120000,225.1 -130000,200.2 -140000,225.1 -150000,200.2 -160000,225.1 -170000,225.4 -180000,225.1 +130000,250.0 +140000,175.3 +150000,200.4 +160000,200.6 +170000,225.1 +180000,201.0 190000,225.1 -200000,250.0 -210000,175.3 -220000,200.3 -230000,225.1 -240000,175.4 +200000,225.2 +210000,225.2 +220000,225.4 +230000,201.0 +240000,250.0 250000,250.0 -260000,225.1 -270000,150.4 -280000,176.3 -290000,200.8 -300000,250.0 -310000,202.1 -320000,201.0 -330000,201.6 -340000,200.2 +260000,225.2 +270000,225.1 +280000,250.0 +290000,250.0 +300000,200.3 +310000,200.4 +320000,200.6 +330000,201.3 +340000,176.7 350000,225.1 -360000,226.3 -370000,250.0 -380000,200.3 -390000,225.1 -400000,200.4 -410000,201.3 -420000,250.0 -430000,225.1 -440000,227.0 -450000,250.0 -460000,175.3 -470000,250.0 -480000,225.1 +360000,225.1 +370000,226.2 +380000,250.0 +390000,200.4 +400000,225.1 +410000,175.4 +420000,152.3 +430000,250.0 +440000,225.1 +450000,227.1 +460000,250.0 +470000,175.4 +480000,250.0 490000,225.1 -500000,201.3 -510000,225.9 -520000,225.3 -530000,250.0 -540000,200.7 -550000,225.1 -560000,250.0 -570000,201.0 -580000,250.0 -590000,151.0 -600000,250.0 -610000,200.7 -620000,250.0 -630000,200.6 -640000,200.8 -650000,200.6 -660000,153.4 -670000,250.0 -680000,225.2 -690000,200.6 -700000,225.4 -710000,225.8 -720000,250.0 +500000,225.2 +510000,201.3 +520000,225.9 +530000,225.3 +540000,250.0 +550000,201.0 +560000,225.2 +570000,250.0 +580000,201.0 +590000,250.0 +600000,151.0 +610000,250.0 +620000,200.3 +630000,250.0 +640000,200.3 +650000,225.1 +660000,200.6 +670000,153.3 +680000,250.0 +690000,226.3 +700000,200.6 +710000,225.4 +720000,225.2 730000,250.0 740000,250.0 -750000,225.1 -760000,200.5 -770000,250.0 -780000,200.2 +750000,250.0 +760000,225.1 +770000,200.2 +780000,250.0 790000,200.2 -800000,175.6 -810000,250.0 -820000,201.2 -830000,227.4 -840000,200.6 -850000,225.1 -860000,250.0 -870000,225.1 -880000,225.2 -890000,250.0 -900000,202.0 -910000,201.5 -920000,177.7 -930000,176.0 -940000,150.6 -950000,200.4 -960000,200.5 -970000,225.7 -980000,200.7 -990000,200.9 -1000000,177.4 +800000,175.9 +810000,175.7 +820000,250.0 +830000,200.7 +840000,250.0 +850000,201.5 +860000,225.3 +870000,250.0 +880000,225.1 +890000,225.2 +900000,225.2 +910000,226.7 +920000,201.7 +930000,226.5 +940000,175.8 +950000,151.7 +960000,175.5 +970000,151.1 +980000,225.7 +990000,201.0 +1000000,200.6 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/ep_return.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/ep_return.log index 81f77f2c7..cd6849757 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/ep_return.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/ep_return.log @@ -1,101 +1,101 @@ step,stat/ep_return -10000,68.65947776336625 -20000,81.51216264653095 -30000,73.19448096929167 -40000,45.95355334793498 -50000,83.24079096283455 -60000,95.36653233245946 -70000,104.41984915973157 -80000,79.29928612762511 -90000,117.50172492031922 -100000,113.03527706113168 -110000,111.59772171405773 -120000,122.93077770618535 -130000,107.96688147529339 -140000,113.97350230185134 -150000,91.37001905585628 -160000,119.02640827353405 -170000,118.179538557347 -180000,118.53857786479902 -190000,128.6892861147673 -200000,143.65854522765287 -210000,106.95732504387777 -220000,107.46669133456314 -230000,115.31561449710406 -240000,100.95619281686129 -250000,119.04496290133281 -260000,128.069025037091 -270000,82.75452336426481 -280000,90.3182979971498 -290000,120.48092503394369 -300000,140.65838533856692 -310000,127.29900307739233 -320000,106.13956461577818 -330000,112.46682381278053 -340000,114.16714240110153 -350000,119.0049307652691 -360000,138.6218206766283 -370000,133.93230383933974 -380000,109.6465031972211 -390000,132.8607893598223 -400000,125.93148322615075 -410000,136.6511288797982 -420000,145.73631140602487 -430000,134.89446462578024 -440000,150.49698079108265 -450000,164.58135598918858 -460000,111.96735786447596 -470000,161.2513643252433 -480000,129.39711881557454 -490000,137.95423900293403 -500000,108.12149514053961 -510000,151.8590086973378 -520000,131.87335849500545 -530000,150.24138200418741 -540000,122.18686199995025 -550000,129.0045027563413 -560000,147.14518843658766 -570000,122.06215771280424 -580000,168.94543479791625 -590000,91.6272232307987 -600000,157.9637000067895 -610000,140.48634173656907 -620000,152.3026836330027 -630000,134.09871191815674 -640000,119.55001412947738 -650000,124.45833830700806 -660000,102.45210730165329 -670000,172.17012810332324 -680000,157.36876379250157 -690000,140.52384935317713 -700000,152.28396637017732 -710000,161.40468480075987 -720000,169.15020710808045 -730000,189.84535898765213 -740000,163.55904111967703 -750000,162.52008486265828 -760000,143.23925811160035 -770000,157.32407333660908 -780000,140.49238278508355 -790000,136.41409942995944 -800000,115.03945162115276 -810000,163.30136502867316 -820000,142.7303943266068 -830000,153.37020040291435 -840000,144.8715743995836 -850000,144.93061317610042 -860000,158.04993536452065 -870000,152.93948743061125 -880000,148.67157291101324 -890000,175.87180697981756 -900000,127.82691869295653 -910000,122.40974628102572 -920000,115.3177008961128 -930000,128.30608572333705 -940000,93.16675258338834 -950000,124.41344941599246 -960000,127.80195106244496 -970000,155.34583343313278 -980000,144.73650710607615 -990000,143.0097662950713 -1000000,117.85588819852146 +10000,46.947158096833164 +20000,61.205663807240136 +30000,84.27839681345225 +40000,55.12223236316654 +50000,93.62810930261847 +60000,105.2269590117215 +70000,101.6702087399175 +80000,85.48869120479495 +90000,117.83857569411214 +100000,111.6248623717394 +110000,116.63174808747465 +120000,119.13340573474457 +130000,130.5739866678975 +140000,91.26905261271793 +150000,100.03767547581442 +160000,97.85531303836555 +170000,109.96527779391188 +180000,97.98845085293465 +190000,122.85545865291058 +200000,120.78287427742809 +210000,122.35382683908256 +220000,115.57233936970343 +230000,111.09151103178183 +240000,135.98803430158517 +250000,133.8549010791848 +260000,134.63252573345284 +270000,124.59980090462793 +280000,146.87059215531585 +290000,145.1222765304466 +300000,115.77148660490889 +310000,103.93836286523552 +320000,125.15009917149419 +330000,108.8457646648034 +340000,109.57682560891753 +350000,121.94091958033523 +360000,125.87507448406193 +370000,138.94302786961396 +380000,140.16177604869668 +390000,109.86824700006468 +400000,134.3876385214126 +410000,107.84359404093284 +420000,105.06676104417336 +430000,147.50663993778758 +440000,131.80885755197642 +450000,156.80175555538437 +460000,167.38774633972196 +470000,108.08901539835242 +480000,163.0264404512308 +490000,140.2483090942471 +500000,142.70254427211066 +510000,115.79990865682502 +520000,150.71412537298016 +530000,131.3727892336484 +540000,158.39006835753707 +550000,122.94780735024258 +560000,130.95754533749897 +570000,151.39805546427934 +580000,119.53654668545296 +590000,171.28342985577154 +600000,94.51364337718084 +610000,160.97118412935023 +620000,136.45892902270424 +630000,156.29181964544654 +640000,131.00411478550546 +650000,131.16676864943614 +660000,124.31936359528842 +670000,99.41870214567454 +680000,161.0451338875191 +690000,151.2282383342642 +700000,131.26363345799854 +710000,154.65793000079037 +720000,158.7624140220257 +730000,163.25098254394103 +740000,179.12876353640817 +750000,164.27138228813547 +760000,156.43858286993222 +770000,136.67421655094512 +780000,159.47336509036126 +790000,134.0694799331593 +800000,118.69367543031584 +810000,111.37984210901217 +820000,155.56008852793929 +830000,141.83060309826345 +840000,169.10954606727643 +850000,141.55197291445361 +860000,148.27288901513242 +870000,162.58732202405494 +880000,157.88497483251513 +890000,153.44612387296837 +900000,162.92851174446315 +910000,134.68390375182926 +920000,136.7894112540965 +930000,151.09090355253065 +940000,121.66238076372561 +950000,100.76980422520269 +960000,111.87880222544293 +970000,97.13060027425044 +980000,154.44361485650566 +990000,143.21188404077157 +1000000,143.00907415261636 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/ep_reward.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/ep_reward.log index 25eeb5d7e..6bc1f2e20 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/ep_reward.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat/ep_reward.log @@ -1,101 +1,101 @@ step,stat/ep_reward -10000,0.27623602885690673 -20000,0.33494533936116533 -30000,0.3001024935501039 -40000,0.30655177829992425 -50000,0.3862852934904847 -60000,0.38755952701969304 -70000,0.41975474994573136 -80000,0.336782537868258 -90000,0.4700068996812769 -100000,0.45444486655545324 -110000,0.44657108237873383 -120000,0.49261107017661815 -130000,0.44523372117496207 -140000,0.5286788304940769 -150000,0.48026335717123014 -160000,0.5604015257154847 -170000,0.47642629708534595 -180000,0.5070471163505335 -190000,0.5919457829392725 -200000,0.5746341809106114 -210000,0.4996296395766254 -220000,0.4519404564317826 -230000,0.4716868854599883 -240000,0.4281730022041291 -250000,0.4761798516053313 -260000,0.5146057495540484 -270000,0.3463441019415784 -280000,0.43852812519395057 -290000,0.48609350197866624 -300000,0.5626335413542678 -310000,0.5133727124481244 -320000,0.4812096495909137 -330000,0.4563688017418387 -340000,0.4725208317073533 -350000,0.48791261410041475 -360000,0.5551341686984662 -370000,0.5357292153573588 -380000,0.46710217897211886 -390000,0.5314875085151238 -400000,0.505268709684089 -410000,0.5517741336178942 -420000,0.5829452456240994 -430000,0.5442790494175516 -440000,0.6022037369045575 -450000,0.6583254239567542 -460000,0.46234287655980166 -470000,0.6450054573009731 -480000,0.5187783480196568 -490000,0.5532903569754997 -500000,0.43257404552889495 -510000,0.6075093176554762 -520000,0.5283256194244574 -530000,0.6009655280167496 -540000,0.4958700876486041 -550000,0.5552876560030062 -560000,0.5885807537463507 -570000,0.4890322305025835 -580000,0.675781739191665 -590000,0.3709906003620106 -600000,0.631854800027158 -610000,0.5900097116265196 -620000,0.6092107345320107 -630000,0.540159544998623 -640000,0.4815520105590605 -650000,0.4989706746346682 -660000,0.4114898959068312 -670000,0.6886805124132931 -680000,0.6506793324353065 -690000,0.5650600025174954 -700000,0.6091873711239563 -710000,0.6457428295194605 -720000,0.6766008284323217 -730000,0.7593814359506086 -740000,0.6542361644787081 -750000,0.6509300855338273 -760000,0.6021567059725319 -770000,0.6292962933464363 -780000,0.6252367903876269 -790000,0.58708294959756 -800000,0.46676079828044825 -810000,0.6532054601146925 -820000,0.574506025263154 -830000,0.6160579467209493 -840000,0.5813898020729769 -850000,0.5806649963660934 -860000,0.6321997414580828 -870000,0.6118926259186745 -880000,0.5996761577583578 -890000,0.7034872279192703 -900000,0.5131364323241911 -910000,0.5043956494361362 -920000,0.47583771001777286 -930000,0.5221689236429099 -940000,0.4568318424463376 -950000,0.49907519591379906 -960000,0.5797712197889038 -970000,0.6264421029133367 -980000,0.6533053974774179 -990000,0.5847599527010058 -1000000,0.4736835488806193 +10000,0.1901343231889939 +20000,0.25494247541555215 +30000,0.3375723877763945 +40000,0.2652286946729159 +50000,0.3767250806262777 +60000,0.42705013331170383 +70000,0.41200587014306195 +80000,0.3622408717058877 +90000,0.47135430277644846 +100000,0.44881588891281776 +110000,0.467293676752642 +120000,0.4774207371341245 +130000,0.5222959466715901 +140000,0.4434829677484343 +150000,0.46368280853801097 +160000,0.39252028857912935 +170000,0.43990182835591246 +180000,0.39743165690119503 +190000,0.49145016399898866 +200000,0.49401240892982284 +210000,0.48971111779015664 +220000,0.4650903334757513 +230000,0.4451782717530001 +240000,0.5439521372063406 +250000,0.5354196043167392 +260000,0.5414159532436126 +270000,0.5513820220206224 +280000,0.5874823686212635 +290000,0.5804891061217863 +300000,0.46338199641898903 +310000,0.43654977012104546 +320000,0.5025394321621626 +330000,0.49319656556778735 +340000,0.44534577476999637 +350000,0.5002504660511563 +360000,0.5154133211218115 +370000,0.5564158687241811 +380000,0.5606471041947867 +390000,0.46645783101472194 +400000,0.53759508869205 +410000,0.4338329796040294 +420000,0.4285150270740153 +430000,0.5900265597511504 +440000,0.5319317577783559 +450000,0.6274163019564485 +460000,0.6695509853588879 +470000,0.4469148871321676 +480000,0.6521057618049232 +490000,0.5621824631520544 +500000,0.5723621125055102 +510000,0.4632877155804104 +520000,0.6029296209437631 +530000,0.5263248128942459 +540000,0.6335602734301483 +550000,0.5004892254719523 +560000,0.5623875126177605 +570000,0.6055922218571174 +580000,0.478929799772672 +590000,0.6851337194230863 +600000,0.38203568140854693 +610000,0.6438847365174011 +620000,0.5680092405657984 +630000,0.6251672785817861 +640000,0.5278855613553524 +650000,0.5277694175345491 +660000,0.49841590726231466 +670000,0.39939603677212504 +680000,0.6441805355500765 +690000,0.6049386785635573 +700000,0.528024797276655 +710000,0.618683168491881 +720000,0.6390786965828196 +730000,0.6530039301757642 +740000,0.7165150541456329 +750000,0.6570855291525418 +760000,0.626604233871365 +770000,0.5754327689828451 +780000,0.6378934603614449 +790000,0.5995392984264336 +800000,0.5152416938423251 +810000,0.4521745839261335 +820000,0.6222403541117569 +830000,0.5705150775699898 +840000,0.6764381842691056 +850000,0.5683352415314301 +860000,0.5940484858269237 +870000,0.6503492880962198 +880000,0.6316746174146248 +890000,0.6187753540850586 +900000,0.6559361350694866 +910000,0.5391344102569597 +920000,0.5620455618443664 +930000,0.6053043505900111 +940000,0.49549561415296217 +950000,0.5077723449248097 +960000,0.4537034782990844 +970000,0.4717995051096076 +980000,0.6228211206930576 +990000,0.5729327878979307 +1000000,0.5810638641129329 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/constraint_violation.log index 05cdb891d..8f3ef25da 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/constraint_violation.log @@ -2,7 +2,7 @@ step,stat_eval/constraint_violation 10000,0.3 20000,0.2 30000,0.1 -40000,0.0 +40000,0.1 50000,0.1 60000,0.0 70000,0.3 @@ -10,29 +10,29 @@ step,stat_eval/constraint_violation 90000,0.1 100000,0.0 110000,0.2 -120000,0.1 +120000,0.0 130000,0.3 140000,0.0 150000,0.1 -160000,0.1 +160000,0.0 170000,0.1 180000,0.3 190000,0.1 -200000,0.2 +200000,0.1 210000,0.3 220000,0.3 230000,0.0 -240000,0.3 +240000,0.2 250000,0.2 -260000,0.1 -270000,0.3 +260000,0.0 +270000,0.2 280000,0.1 290000,0.2 300000,0.2 310000,0.0 320000,0.0 330000,0.2 -340000,0.3 +340000,0.1 350000,0.1 360000,0.1 370000,0.2 @@ -43,16 +43,16 @@ step,stat_eval/constraint_violation 420000,0.3 430000,0.3 440000,0.2 -450000,0.2 +450000,0.1 460000,0.1 470000,0.2 480000,0.3 490000,0.1 500000,0.0 510000,0.2 -520000,0.3 +520000,0.2 530000,0.0 -540000,0.3 +540000,0.2 550000,0.1 560000,0.2 570000,0.0 @@ -62,10 +62,10 @@ step,stat_eval/constraint_violation 610000,0.0 620000,0.1 630000,0.0 -640000,0.0 +640000,0.1 650000,0.0 660000,0.2 -670000,0.1 +670000,0.2 680000,0.1 690000,0.1 700000,0.0 @@ -83,14 +83,14 @@ step,stat_eval/constraint_violation 820000,0.1 830000,0.0 840000,0.0 -850000,0.5 +850000,0.4 860000,0.0 870000,0.3 880000,0.2 890000,0.2 900000,0.2 910000,0.1 -920000,0.3 +920000,0.2 930000,0.0 940000,0.0 950000,0.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/ep_length.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/ep_length.log index f329a940e..456fd785b 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/ep_length.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/ep_length.log @@ -1,38 +1,38 @@ step,stat_eval/ep_length 10000,176.1 -20000,200.5 +20000,201.9 30000,225.3 -40000,250.0 -50000,225.1 +40000,225.1 +50000,225.2 60000,250.0 -70000,175.7 +70000,175.6 80000,225.2 90000,225.1 100000,250.0 110000,201.3 -120000,225.1 -130000,176.2 +120000,250.0 +130000,176.5 140000,250.0 -150000,225.1 -160000,225.1 -170000,225.1 +150000,225.2 +160000,250.0 +170000,225.8 180000,176.0 -190000,225.1 -200000,200.2 -210000,176.6 -220000,176.5 +190000,225.2 +200000,225.1 +210000,176.5 +220000,176.7 230000,250.0 -240000,175.3 -250000,201.9 -260000,225.1 -270000,175.3 +240000,200.2 +250000,200.3 +260000,250.0 +270000,200.3 280000,225.1 -290000,200.5 +290000,201.4 300000,200.5 310000,250.0 320000,250.0 330000,202.0 -340000,175.4 +340000,225.1 350000,225.1 360000,225.1 370000,202.2 @@ -41,37 +41,37 @@ step,stat_eval/ep_length 400000,225.6 410000,250.0 420000,176.7 -430000,175.4 +430000,175.7 440000,201.7 -450000,200.2 -460000,225.1 -470000,201.3 +450000,225.1 +460000,225.2 +470000,201.4 480000,176.3 490000,225.8 500000,250.0 -510000,200.4 -520000,175.9 +510000,200.5 +520000,200.8 530000,250.0 -540000,176.6 +540000,201.4 550000,225.3 560000,201.7 570000,250.0 580000,178.5 590000,200.4 -600000,225.5 +600000,225.1 610000,250.0 620000,225.1 630000,250.0 -640000,250.0 +640000,227.6 650000,250.0 -660000,200.5 -670000,225.3 -680000,227.3 -690000,225.3 +660000,200.6 +670000,200.4 +680000,227.2 +690000,225.4 700000,250.0 -710000,225.1 +710000,225.2 720000,200.8 -730000,226.6 +730000,226.7 740000,250.0 750000,225.2 760000,225.6 @@ -79,18 +79,18 @@ step,stat_eval/ep_length 780000,200.2 790000,225.7 800000,250.0 -810000,225.2 -820000,225.1 +810000,225.8 +820000,225.2 830000,250.0 840000,250.0 -850000,126.7 +850000,151.6 860000,250.0 -870000,176.9 -880000,201.4 +870000,177.0 +880000,201.6 890000,201.3 900000,201.0 910000,226.6 -920000,175.6 +920000,200.3 930000,250.0 940000,250.0 950000,250.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/ep_return.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/ep_return.log index 13dc6515f..1bbb11173 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/ep_return.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/ep_return.log @@ -1,101 +1,101 @@ step,stat_eval/ep_return -10000,58.20287851789293 -20000,67.17078826803456 -30000,86.27336340997809 -40000,98.42718367198233 -50000,97.96774922376824 -60000,118.80158698432611 -70000,74.45385143360633 -80000,121.41964271565396 -90000,116.38922606190867 -100000,124.17581580635624 -110000,79.58835984536356 -120000,116.52401598635477 -130000,92.76140403822556 -140000,123.79997686790833 -150000,121.915359806422 -160000,113.4572308286167 -170000,120.77215957741467 -180000,98.25986921193025 -190000,126.85837936390264 -200000,113.27067889035372 -210000,97.32584805573617 -220000,102.62427686579072 -230000,136.36230043752906 -240000,89.82518496202655 -250000,114.66578688731249 -260000,129.8365522538517 -270000,92.35413966475829 -280000,151.23307366257268 -290000,125.05332932653633 -300000,112.67533860038161 -310000,148.59963645815435 -320000,145.52911701306033 -330000,124.4239128320834 -340000,103.38950383228169 -350000,123.90570768147563 -360000,138.32461062120134 -370000,123.28908080979404 -380000,118.94608794251789 -390000,105.43490463069001 -400000,131.4277159793035 -410000,150.19149567893382 -420000,114.95278397177887 -430000,108.71516862740657 -440000,121.90975669589083 -450000,127.22285460214114 -460000,162.15513364074974 -470000,118.35276386752324 -480000,108.5489957949708 -490000,129.77771715654006 -500000,146.466317561641 -510000,132.42274064514365 -520000,118.35532031207131 -530000,150.04918230862626 -540000,111.64096538381193 -550000,134.84812045780433 -560000,151.67250940140482 -570000,147.00639261163684 -580000,90.114859768089 -590000,117.32331285274623 -600000,135.72450888620102 -610000,159.03567093233778 -620000,138.03022085739957 -630000,161.61966173643629 -640000,158.8762915537879 -650000,160.65168288508406 -660000,129.44898770583762 -670000,147.61441407180652 -680000,149.13878232892944 -690000,165.7873140810246 -700000,161.2575615584827 -710000,148.57866209297737 -720000,139.7530306864687 -730000,161.64200391771882 -740000,152.75784475957096 -750000,142.26584751028585 -760000,145.7589810273336 -770000,135.99923698300933 -780000,138.94446676826533 -790000,146.15040731569368 -800000,156.2875868386646 -810000,151.53656465918777 -820000,163.36174209006055 -830000,171.68763912899647 -840000,167.21833495682165 -850000,83.79511621725577 -860000,148.75808841829857 -870000,118.73739585825615 -880000,130.57736035402422 -890000,138.55959347447111 -900000,120.4981142892406 -910000,141.446518372026 -920000,135.3714199551176 -930000,168.0790685156764 -940000,152.98737805262977 -950000,150.72367115294867 -960000,157.1525942676295 -970000,110.75200579349257 -980000,171.49584554487802 -990000,166.07383895124053 -1000000,153.86262431581227 +10000,55.34831182992705 +20000,76.09648044186022 +30000,71.47979538830886 +40000,91.90087312236047 +50000,114.00670898701901 +60000,121.45134866281985 +70000,81.49983883740626 +80000,118.26504469147206 +90000,122.20165647670892 +100000,131.57340361273242 +110000,91.90453140375016 +120000,135.49544381234642 +130000,98.96519461122155 +140000,128.03252101808533 +150000,121.38435111584947 +160000,126.82446174380632 +170000,125.27485654698447 +180000,97.68390905375973 +190000,123.43628784628943 +200000,123.08905758649621 +210000,101.74147174301883 +220000,104.37415333820213 +230000,137.41193171789894 +240000,102.17940308624439 +250000,116.31829445995979 +260000,142.09793285117877 +270000,114.60869205274837 +280000,144.59601418017425 +290000,126.13594271831589 +300000,110.98660417220704 +310000,143.46975872597417 +320000,144.06699460167883 +330000,120.89852087155828 +340000,131.2857422896139 +350000,122.91921588038765 +360000,137.5360241692698 +370000,122.577968386484 +380000,120.38801229862426 +390000,112.40881303800276 +400000,136.41065894173576 +410000,157.770876539222 +420000,116.48276870097638 +430000,107.34754609956408 +440000,128.07162726607862 +450000,144.98144514092664 +460000,157.45301557913214 +470000,120.30194270585723 +480000,115.30928372802607 +490000,135.0611942106401 +500000,153.19760050885037 +510000,137.34599484620225 +520000,123.41598329136025 +530000,152.3260364038793 +540000,126.77854694079278 +550000,136.51311529683622 +560000,146.10613869020492 +570000,152.9813283344439 +580000,96.53215551063572 +590000,120.92838093783482 +600000,139.26993780347166 +610000,158.2695229868248 +620000,141.38211593506819 +630000,160.91145819786112 +640000,145.73835158091805 +650000,152.1293052056197 +660000,125.95319609014359 +670000,123.36667179178457 +680000,142.3851054289411 +690000,161.78481822396878 +700000,159.32904560148 +710000,144.16738906744132 +720000,139.0462931562484 +730000,156.16875781168255 +740000,147.63215781218136 +750000,144.2296507351308 +760000,142.61578106725085 +770000,132.67595510729583 +780000,133.9235570002558 +790000,144.03993233221567 +800000,153.00503358998256 +810000,149.6858417622874 +820000,163.63299057066646 +830000,173.45050978637917 +840000,173.2406554040348 +850000,102.4500978550246 +860000,155.5690217163449 +870000,116.23076979334073 +880000,135.30712040652608 +890000,142.72785852562285 +900000,125.54007908242882 +910000,152.21653286024363 +920000,153.09204733084212 +930000,172.06274256912357 +940000,159.2460932023223 +950000,156.97342662193435 +960000,156.05084954312878 +970000,112.05333305561598 +980000,174.41628101227155 +990000,164.39750499168795 +1000000,152.65809370943938 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/ep_reward.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/ep_reward.log index 98e459b39..85f44129b 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/ep_reward.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/ep_reward.log @@ -1,101 +1,101 @@ step,stat_eval/ep_reward -10000,0.23935436066944754 -20000,0.26937148976161057 -30000,0.3452224954985412 -40000,0.3937087346879293 -50000,0.4032027521419942 -60000,0.4752063479373042 -70000,0.3042279267673864 -80000,0.4861107304260138 -90000,0.4822986257124202 -100000,0.496703263225425 -110000,0.31956629012907284 -120000,0.4688362563591725 -130000,0.37680057462656835 -140000,0.49519990747163334 -150000,0.49451096410071677 -160000,0.48358963574449076 -170000,0.483305083197006 -180000,0.39607298389899903 -190000,0.5074846668613546 -200000,0.4732738157505138 -210000,0.39333411411847274 -220000,0.4164305796111705 -230000,0.5454492017501162 -240000,0.44822654955227714 -250000,0.4591297590634018 -260000,0.5277452549683619 -270000,0.46381489376918356 -280000,0.6050405468893998 -290000,0.5019985355286107 -300000,0.4681530425071723 -310000,0.5943985458326175 -320000,0.5821164680522413 -330000,0.5013876817343748 -340000,0.45483411801792906 -350000,0.5192885298291781 -360000,0.6064564941683159 -370000,0.497448919113034 -380000,0.47721191332669494 -390000,0.43291372746554535 -400000,0.5261850400676436 -410000,0.6007659827157352 -420000,0.48134070281385755 -430000,0.4610211401250147 -440000,0.4885175838723003 -450000,0.5340013024114736 -460000,0.6607637213855074 -470000,0.47961202236162503 -480000,0.4425977436237174 -490000,0.5193191729376473 -500000,0.585865270246564 -510000,0.5889841861892101 -520000,0.5016269454993963 -530000,0.6001967292345052 -540000,0.46135349404553444 -550000,0.5397746474226872 -560000,0.6075470720547871 -570000,0.5880255704465474 -580000,0.36398815545352514 -590000,0.5357702358127521 -600000,0.5436105582293249 -610000,0.6361426837293511 -620000,0.5531798290187041 -630000,0.6464786469457451 -640000,0.6355051662151516 -650000,0.6426067315403363 -660000,0.5320394744897585 -670000,0.6224668514524626 -680000,0.5975676158838843 -690000,0.6684457843564787 -700000,0.6450302462339309 -710000,0.607998836739486 -720000,0.5751663078352595 -730000,0.6469156880554079 -740000,0.6110313790382838 -750000,0.5751932700403228 -760000,0.5830535617186017 -770000,0.5482810317038148 -780000,0.6205754034334028 -790000,0.5873616684423589 -800000,0.6251503473546586 -810000,0.6204662618783059 -820000,0.6819416540434333 -830000,0.6867505565159859 -840000,0.6688733398272866 -850000,0.39966369638309074 -860000,0.5950323536731944 -870000,0.4770310213588941 -880000,0.5237498953918862 -890000,0.5546446375648015 -900000,0.4876485163300094 -910000,0.5667417174171183 -920000,0.5647155112125878 -930000,0.6723162740627056 -940000,0.6119495122105191 -950000,0.6028946846117946 -960000,0.6302132478938173 -970000,0.4460856996698442 -980000,0.685983382179512 -990000,0.6721892067995462 -1000000,0.6154504972632491 +10000,0.2279237189597146 +20000,0.30510030362415985 +30000,0.2860470640826525 +40000,0.37029292744552667 +50000,0.4675100370243713 +60000,0.48580539465127937 +70000,0.33232923437533884 +80000,0.4734827596481678 +90000,0.5055084591523096 +100000,0.5262936144509297 +110000,0.36884163380163615 +120000,0.5419817752493856 +130000,0.4016634164396263 +140000,0.5121300840723413 +150000,0.49263053922134753 +160000,0.5072978469752253 +170000,0.5012447533496209 +180000,0.3938077307465107 +190000,0.4937973126764167 +200000,0.5082890925528242 +210000,0.4109397472625168 +220000,0.42299821343019406 +230000,0.5496477268715957 +240000,0.48090568240604903 +250000,0.46657980025227114 +260000,0.5683917314047151 +270000,0.5527856759187004 +280000,0.5784934479546533 +290000,0.5067737112472896 +300000,0.4612596378343512 +310000,0.5738790349038967 +320000,0.5762679784067153 +330000,0.48729051333725354 +340000,0.5320819600813644 +350000,0.5153542032825548 +360000,0.6033660267656148 +370000,0.49458180516963635 +380000,0.48298414997266736 +390000,0.46077648297611296 +400000,0.5461119166249625 +410000,0.631083506156888 +420000,0.4875199545296912 +430000,0.45705501209121524 +440000,0.5131665151739464 +450000,0.5984125965064082 +460000,0.641635508841638 +470000,0.48744290665167067 +480000,0.46964848812023696 +490000,0.540453545161854 +500000,0.6127904020354015 +510000,0.6080786229255161 +520000,0.5217229464847402 +530000,0.6093041456155174 +540000,0.5161021811282851 +550000,0.5464349676410754 +560000,0.5852822108247329 +570000,0.6119253133377756 +580000,0.3896608794778447 +590000,0.550192907211167 +600000,0.5577021142658288 +610000,0.6330780919472991 +620000,0.5665955493232345 +630000,0.6436458327914445 +640000,0.5836103680428399 +650000,0.6085172208224787 +660000,0.5178744800357029 +670000,0.5358431100141109 +680000,0.5705961730830666 +690000,0.6525754169233943 +700000,0.6373161824059199 +710000,0.5907149380819892 +720000,0.5722828775356161 +730000,0.6250104901699569 +740000,0.5905286312487255 +750000,0.5830504141838654 +760000,0.5704808088864601 +770000,0.5349839701870992 +780000,0.6006238793088493 +790000,0.5789697552210631 +800000,0.6120201343599302 +810000,0.6152618484716263 +820000,0.6839610826273064 +830000,0.6938020391455166 +840000,0.6929626216161393 +850000,0.4184466154549208 +860000,0.6222760868653798 +870000,0.4670088569542104 +880000,0.5426413457087491 +890000,0.5713179792389436 +900000,0.5078160462364145 +910000,0.6098217453580209 +920000,0.6188848107987223 +930000,0.6882509702764943 +940000,0.6369843728092892 +950000,0.6278937064877375 +960000,0.6258110912798633 +970000,0.4512970900639245 +980000,0.6976651240490861 +990000,0.6654673439908606 +1000000,0.6106323748377576 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/mse.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/mse.log index cf63aac8a..2dd7febbe 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/mse.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/logs/stat_eval/mse.log @@ -1,101 +1,101 @@ step,stat_eval/mse -10000,275.69075918863507 -20000,335.08856435074443 -30000,365.53861081939533 -40000,339.47348142607785 -50000,301.4612197032606 -60000,335.85601900364156 -70000,333.08185505022334 -80000,177.52869979132342 -90000,217.08794615198696 -100000,310.5262102624377 -110000,364.92994412435365 -120000,272.8541786801045 -130000,176.7524869393938 -140000,302.904804946663 -150000,232.67325130603336 -160000,292.1485472072101 -170000,240.81647188242323 -180000,181.60538649710583 -190000,222.26472139483585 -200000,226.8363187238741 -210000,213.31869491934518 -220000,158.9503493584031 -230000,276.3184236382841 -240000,231.45426012115337 -250000,244.2670704166971 -260000,211.01132964265395 -270000,199.7569876133946 -280000,136.38430345861812 -290000,161.26969526639638 -300000,254.33493134896986 -310000,302.48296576957546 -320000,317.0058505356885 -330000,201.64601993238136 -340000,209.72713738139728 -350000,340.0049213268047 -360000,228.85937448293421 -370000,178.43822897070322 -380000,197.8889991274702 -390000,282.5522281445828 -400000,242.37038609583382 -410000,257.68420451008535 -420000,140.32614190302502 -430000,173.691003892096 -440000,219.2651044514515 -450000,180.79864871371745 -460000,101.24801906377778 -470000,218.775234826427 -480000,166.7010612189929 -490000,331.53913526739404 -500000,280.90857742287005 -510000,143.12702086026772 -520000,121.27748866670417 -530000,295.09858697656506 -540000,181.15031282992777 -550000,275.5296536494585 -560000,107.88301442398696 -570000,292.15813220835764 -580000,316.8131629654562 -590000,263.6152763254621 -600000,267.0185981165814 -610000,255.27864484344371 -620000,226.73369746898388 -630000,228.89146334814535 -640000,240.3132693349199 -650000,306.4737984065146 -660000,184.64800471443115 -670000,231.16790837410795 -680000,235.94159547400332 -690000,123.48584629002012 -700000,227.73750748153347 -710000,232.26636371111917 -720000,167.79808835426633 -730000,157.24029551075893 -740000,259.0763758644345 -750000,230.37565766724265 -760000,238.74286018834633 -770000,134.46597888186767 -780000,190.03605932249715 -790000,196.7781899163544 -800000,291.9544030231548 -810000,182.72841989442193 -820000,132.32911084604532 -830000,216.8969898480969 -840000,207.51222548755896 -850000,94.59484703328368 -860000,349.5253343776005 -870000,181.94681268708462 -880000,198.07942546558016 -890000,146.4960308743608 -900000,229.48166073269212 -910000,233.989635410095 -920000,55.84548917833047 -930000,180.63634744945352 -940000,258.14094856032295 -950000,320.0426063726718 -960000,138.57273578850055 -970000,200.45862701877576 -980000,173.43849091002403 -990000,102.45053341781833 -1000000,352.99188302570036 +10000,306.5062024943574 +20000,342.72533549097363 +30000,415.4955109814399 +40000,308.9579244007196 +50000,258.02372037769635 +60000,311.8594676837241 +70000,306.4920567164904 +80000,178.2250735395259 +90000,205.128120048924 +100000,298.380680012436 +110000,355.05775949078395 +120000,284.48674113559855 +130000,171.40728955321353 +140000,304.771897145324 +150000,243.56920907780062 +160000,315.02999840960643 +170000,240.5135061326536 +180000,182.31027724013927 +190000,230.71166929211162 +200000,262.7794022653463 +210000,208.96197631279924 +220000,160.1277697125814 +230000,275.5949442571532 +240000,251.30962984999678 +250000,231.3126602844493 +260000,264.17380280929245 +270000,237.22521781725877 +280000,147.31132722531748 +290000,163.53808837474153 +300000,252.05858659509119 +310000,300.99088419919946 +320000,309.6994731270402 +330000,206.28179731681843 +340000,248.77592636017462 +350000,335.14736847576415 +360000,230.02751959535703 +370000,182.88031859804914 +380000,195.20158890421254 +390000,274.4337064668445 +400000,239.78837366793596 +410000,238.646571756162 +420000,135.55537729471706 +430000,177.4692763956435 +440000,206.9704685893662 +450000,198.8720272667427 +460000,107.61665399525373 +470000,220.50273899043214 +480000,157.94624756984493 +490000,309.8343214312182 +500000,250.66907560855515 +510000,130.80333818921898 +520000,208.84328424295754 +530000,288.124482796111 +540000,201.51998333332185 +550000,268.7502049483868 +560000,116.24390434453173 +570000,280.33472994148605 +580000,293.38756285154193 +590000,254.7418848606565 +600000,263.73718494485956 +610000,250.12007304840571 +620000,219.59672323139836 +630000,229.33559929124027 +640000,186.36617678415962 +650000,312.1771195158198 +660000,189.14058298024815 +670000,227.92623675276636 +680000,241.5103253189669 +690000,127.48933810386993 +700000,225.27293885907193 +710000,237.9800283466481 +720000,169.0119427609805 +730000,163.91365285185947 +740000,260.97196462582446 +750000,225.38093552918363 +760000,240.392328506386 +770000,139.55747081595945 +780000,193.82299020967307 +790000,202.810427786252 +800000,297.37146593587636 +810000,186.4863712773513 +820000,129.53831883685268 +830000,215.12478309576323 +840000,199.68570895960386 +850000,104.70383421381219 +860000,333.3589980353779 +870000,184.03716989484474 +880000,196.00321746961657 +890000,136.62392435118272 +900000,218.3291288001552 +910000,218.97181196031323 +920000,93.28075808107813 +930000,166.14329319450573 +940000,249.4428932606464 +950000,309.1559776949467 +960000,140.88838406399688 +970000,193.13269620456958 +980000,169.93012783430416 +990000,103.40466103931332 +1000000,343.64345350169003 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/model_best.pt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/model_best.pt index 3afc9bd75..6022f56ec 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/model_best.pt and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/model_best.pt differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/model_latest.pt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/model_latest.pt index 286958795..b75259691 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/model_latest.pt and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/model_latest.pt differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-loss-approx_kl.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-loss-approx_kl.jpg index d62fad2f6..852eb2a41 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-loss-approx_kl.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-loss-approx_kl.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-loss-entropy_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-loss-entropy_loss.jpg index 97e9374e3..a6a08440f 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-loss-entropy_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-loss-entropy_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-loss-policy_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-loss-policy_loss.jpg index 44788c122..2dea71c53 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-loss-policy_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-loss-policy_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-loss-value_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-loss-value_loss.jpg index 8cff77dce..84d8f86ef 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-loss-value_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-loss-value_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-constraint_violation.jpg index a49d76ee4..5c7b7680b 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-ep_constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-ep_constraint_violation.jpg index 8add7b1db..06dd556e8 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-ep_constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-ep_constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-ep_length.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-ep_length.jpg index bb0cba321..8d71affa5 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-ep_length.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-ep_length.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-ep_return.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-ep_return.jpg index 75af20053..d8c56b759 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-ep_return.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-ep_return.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-ep_reward.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-ep_reward.jpg index 7db407c8a..1fcf9ab3e 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-ep_reward.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat-ep_reward.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-constraint_violation.jpg index 9fb1cc6ce..319e2d589 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-ep_length.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-ep_length.jpg index 55c954451..6bcfef0e8 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-ep_length.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-ep_length.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-ep_return.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-ep_return.jpg index dfc41d2b2..7a2a35358 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-ep_return.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-ep_return.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-ep_reward.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-ep_reward.jpg index a6725a9f1..e2a83e843 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-ep_reward.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-ep_reward.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-mse.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-mse.jpg index 61d374e63..663e6ed2b 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-mse.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/plots/-stat_eval-mse.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/std_out.txt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/std_out.txt index b95f38d74..67f538857 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/std_out.txt +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es/std_out.txt @@ -1,2601 +1,2601 @@ -2023-10-19 14:51:50,794 : Eval | ep_lengths 176.10 +/- 112.90 | ep_return 58.203 +/- 51.079 -2023-10-19 14:51:50,812 : +2023-10-27 16:42:33,658 : Eval | ep_lengths 176.10 +/- 112.90 | ep_return 55.348 +/- 51.552 +2023-10-27 16:42:33,693 : -------------------------------------- | loss/ | | -| approx_kl | 0.0211 | -| entropy_loss | -3.74 | -| policy_loss | -0.0151 | -| value_loss | 69.6 | +| approx_kl | 0.0299 | +| entropy_loss | -3.73 | +| policy_loss | -0.0181 | +| value_loss | 52.5 | | stat/ | | -| constraint_violation | 9 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 68.7 | -| ep_reward | 0.276 | +| constraint_violation | 10 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 46.9 | +| ep_reward | 0.19 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 176 | -| ep_return | 58.2 | -| ep_reward | 0.239 | -| mse | 276 | +| ep_return | 55.3 | +| ep_reward | 0.228 | +| mse | 307 | | time/ | | | progress | 0.01 | | step | 1e+04 | -| step_time | 11.3 | +| step_time | 8.39 | -------------------------------------- -2023-10-19 14:54:02,981 : Eval | ep_lengths 200.50 +/- 99.00 | ep_return 67.171 +/- 36.780 -2023-10-19 14:54:03,013 : +2023-10-27 16:44:11,737 : Eval | ep_lengths 201.90 +/- 96.23 | ep_return 76.096 +/- 54.071 +2023-10-27 16:44:11,771 : -------------------------------------- | loss/ | | -| approx_kl | 0.0176 | -| entropy_loss | -3.76 | -| policy_loss | -0.0106 | -| value_loss | 9.74 | +| approx_kl | 0.0158 | +| entropy_loss | -3.91 | +| policy_loss | -0.0168 | +| value_loss | 19.8 | | stat/ | | -| constraint_violation | 16 | +| constraint_violation | 19 | | ep_constraint_vio... | 0.3 | | ep_length | 176 | -| ep_return | 81.5 | -| ep_reward | 0.335 | +| ep_return | 61.2 | +| ep_reward | 0.255 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 67.2 | -| ep_reward | 0.269 | -| mse | 335 | +| ep_length | 202 | +| ep_return | 76.1 | +| ep_reward | 0.305 | +| mse | 343 | | time/ | | | progress | 0.02 | | step | 2e+04 | -| step_time | 11.1 | +| step_time | 8.3 | -------------------------------------- -2023-10-19 14:56:18,675 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 86.273 +/- 37.369 -2023-10-19 14:56:18,684 : +2023-10-27 16:45:51,968 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 71.480 +/- 33.167 +2023-10-27 16:45:51,969 : -------------------------------------- | loss/ | | -| approx_kl | 0.0127 | -| entropy_loss | -3.84 | -| policy_loss | -0.018 | -| value_loss | 13.6 | +| approx_kl | 0.0181 | +| entropy_loss | -4.01 | +| policy_loss | -0.0143 | +| value_loss | 5.87 | | stat/ | | -| constraint_violation | 32 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 73.2 | -| ep_reward | 0.3 | +| constraint_violation | 33 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 84.3 | +| ep_reward | 0.338 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 86.3 | -| ep_reward | 0.345 | -| mse | 366 | +| ep_return | 71.5 | +| ep_reward | 0.286 | +| mse | 415 | | time/ | | | progress | 0.03 | | step | 3e+04 | -| step_time | 11.1 | +| step_time | 8.24 | -------------------------------------- -2023-10-19 14:58:35,938 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 98.427 +/- 21.960 -2023-10-19 14:58:35,947 : +2023-10-27 16:47:31,823 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 91.901 +/- 44.633 +2023-10-27 16:47:31,861 : -------------------------------------- | loss/ | | -| approx_kl | 0.0107 | -| entropy_loss | -3.89 | -| policy_loss | -0.0121 | -| value_loss | 3.26 | +| approx_kl | 0.0289 | +| entropy_loss | -3.97 | +| policy_loss | -0.00397 | +| value_loss | 11.3 | | stat/ | | -| constraint_violation | 39 | +| constraint_violation | 41 | | ep_constraint_vio... | 0.4 | | ep_length | 151 | -| ep_return | 46 | -| ep_reward | 0.307 | +| ep_return | 55.1 | +| ep_reward | 0.265 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 98.4 | -| ep_reward | 0.394 | -| mse | 339 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 91.9 | +| ep_reward | 0.37 | +| mse | 309 | | time/ | | | progress | 0.04 | | step | 4e+04 | -| step_time | 11.5 | +| step_time | 8.26 | -------------------------------------- -2023-10-19 15:00:50,539 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 97.968 +/- 41.815 -2023-10-19 15:00:50,541 : +2023-10-27 16:49:11,754 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 114.007 +/- 43.163 +2023-10-27 16:49:11,762 : -------------------------------------- | loss/ | | -| approx_kl | 0.0187 | -| entropy_loss | -3.88 | -| policy_loss | -0.0141 | -| value_loss | 3.96 | +| approx_kl | 0.0147 | +| entropy_loss | -4.13 | +| policy_loss | -0.0122 | +| value_loss | 6.48 | | stat/ | | | constraint_violation | 49 | | ep_constraint_vio... | 0.2 | | ep_length | 200 | -| ep_return | 83.2 | -| ep_reward | 0.386 | +| ep_return | 93.6 | +| ep_reward | 0.377 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 98 | -| ep_reward | 0.403 | -| mse | 301 | +| ep_return | 114 | +| ep_reward | 0.468 | +| mse | 258 | | time/ | | | progress | 0.05 | | step | 5e+04 | -| step_time | 11.2 | +| step_time | 8.26 | -------------------------------------- -2023-10-19 15:03:08,805 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 118.802 +/- 15.163 -2023-10-19 15:03:08,813 : +2023-10-27 16:50:55,027 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 121.451 +/- 21.099 +2023-10-27 16:50:55,037 : -------------------------------------- | loss/ | | -| approx_kl | 0.0178 | -| entropy_loss | -3.84 | -| policy_loss | -0.0145 | -| value_loss | 4.74 | +| approx_kl | 0.0232 | +| entropy_loss | -4.27 | +| policy_loss | -0.0159 | +| value_loss | 3.13 | | stat/ | | -| constraint_violation | 57 | +| constraint_violation | 55 | | ep_constraint_vio... | 0.2 | | ep_length | 200 | -| ep_return | 95.4 | -| ep_reward | 0.388 | +| ep_return | 105 | +| ep_reward | 0.427 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 119 | -| ep_reward | 0.475 | -| mse | 336 | +| ep_return | 121 | +| ep_reward | 0.486 | +| mse | 312 | | time/ | | | progress | 0.06 | | step | 6e+04 | -| step_time | 11.3 | +| step_time | 8.43 | -------------------------------------- -2023-10-19 15:05:20,248 : Eval | ep_lengths 175.70 +/- 113.50 | ep_return 74.454 +/- 50.875 -2023-10-19 15:05:20,249 : +2023-10-27 16:52:32,559 : Eval | ep_lengths 175.60 +/- 113.65 | ep_return 81.500 +/- 56.089 +2023-10-27 16:52:32,560 : -------------------------------------- | loss/ | | -| approx_kl | 0.0197 | -| entropy_loss | -3.81 | -| policy_loss | -0.027 | -| value_loss | 1.75 | +| approx_kl | 0.0225 | +| entropy_loss | -4.32 | +| policy_loss | -0.0154 | +| value_loss | 1.49 | | stat/ | | -| constraint_violation | 59 | -| ep_constraint_vio... | 0.1 | -| ep_length | 227 | -| ep_return | 104 | -| ep_reward | 0.42 | +| constraint_violation | 58 | +| ep_constraint_vio... | 0.2 | +| ep_length | 202 | +| ep_return | 102 | +| ep_reward | 0.412 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 176 | -| ep_return | 74.5 | -| ep_reward | 0.304 | -| mse | 333 | +| ep_return | 81.5 | +| ep_reward | 0.332 | +| mse | 306 | | time/ | | | progress | 0.07 | | step | 7e+04 | -| step_time | 11.4 | +| step_time | 8.21 | -------------------------------------- -2023-10-19 15:07:37,908 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 121.420 +/- 41.667 -2023-10-19 15:07:37,918 : +2023-10-27 16:54:14,320 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 118.265 +/- 40.409 +2023-10-27 16:54:14,347 : -------------------------------------- | loss/ | | -| approx_kl | 0.019 | -| entropy_loss | -3.94 | -| policy_loss | -0.00467 | -| value_loss | 1.55 | +| approx_kl | 0.0175 | +| entropy_loss | -4.27 | +| policy_loss | -0.0167 | +| value_loss | 3.77 | | stat/ | | -| constraint_violation | 69 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 79.3 | -| ep_reward | 0.337 | +| constraint_violation | 67 | +| ep_constraint_vio... | 0.3 | +| ep_length | 176 | +| ep_return | 85.5 | +| ep_reward | 0.362 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 121 | -| ep_reward | 0.486 | +| ep_return | 118 | +| ep_reward | 0.473 | | mse | 178 | | time/ | | | progress | 0.08 | | step | 8e+04 | -| step_time | 11.4 | +| step_time | 8.45 | -------------------------------------- -2023-10-19 15:09:55,857 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 116.389 +/- 42.012 -2023-10-19 15:09:55,859 : +2023-10-27 16:55:56,021 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 122.202 +/- 43.342 +2023-10-27 16:55:56,057 : -------------------------------------- | loss/ | | -| approx_kl | 0.012 | -| entropy_loss | -3.94 | -| policy_loss | -0.0121 | -| value_loss | 1.84 | +| approx_kl | 0.0192 | +| entropy_loss | -4.33 | +| policy_loss | -0.0175 | +| value_loss | 1.8 | | stat/ | | -| constraint_violation | 73 | +| constraint_violation | 71 | | ep_constraint_vio... | 0 | | ep_length | 250 | | ep_return | 118 | -| ep_reward | 0.47 | +| ep_reward | 0.471 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 116 | -| ep_reward | 0.482 | -| mse | 217 | +| ep_return | 122 | +| ep_reward | 0.506 | +| mse | 205 | | time/ | | | progress | 0.09 | | step | 9e+04 | -| step_time | 11.4 | +| step_time | 8.49 | -------------------------------------- -2023-10-19 15:12:15,483 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 124.176 +/- 24.013 -2023-10-19 15:12:15,492 : +2023-10-27 16:57:39,674 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 131.573 +/- 26.155 +2023-10-27 16:57:39,707 : -------------------------------------- | loss/ | | -| approx_kl | 0.0211 | -| entropy_loss | -3.96 | -| policy_loss | -0.0134 | -| value_loss | 1.53 | +| approx_kl | 0.0106 | +| entropy_loss | -4.45 | +| policy_loss | -0.0133 | +| value_loss | 3.57 | | stat/ | | -| constraint_violation | 83 | +| constraint_violation | 80 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 113 | -| ep_reward | 0.454 | +| ep_return | 112 | +| ep_reward | 0.449 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 124 | -| ep_reward | 0.497 | -| mse | 311 | +| ep_return | 132 | +| ep_reward | 0.526 | +| mse | 298 | | time/ | | | progress | 0.1 | | step | 1e+05 | -| step_time | 11.4 | +| step_time | 8.4 | -------------------------------------- -2023-10-19 15:14:30,064 : Eval | ep_lengths 201.30 +/- 97.42 | ep_return 79.588 +/- 40.800 -2023-10-19 15:14:30,065 : +2023-10-27 16:59:20,543 : Eval | ep_lengths 201.30 +/- 97.42 | ep_return 91.905 +/- 46.167 +2023-10-27 16:59:20,544 : -------------------------------------- | loss/ | | -| approx_kl | 0.0167 | -| entropy_loss | -3.93 | -| policy_loss | -0.0132 | -| value_loss | 1.63 | +| approx_kl | 0.0291 | +| entropy_loss | -4.45 | +| policy_loss | -0.0173 | +| value_loss | 2.12 | | stat/ | | -| constraint_violation | 89 | +| constraint_violation | 85 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 112 | -| ep_reward | 0.447 | +| ep_return | 117 | +| ep_reward | 0.467 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 79.6 | -| ep_reward | 0.32 | -| mse | 365 | +| ep_return | 91.9 | +| ep_reward | 0.369 | +| mse | 355 | | time/ | | | progress | 0.11 | | step | 1.1e+05 | -| step_time | 11.3 | +| step_time | 8.38 | -------------------------------------- -2023-10-19 15:16:45,359 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 116.524 +/- 43.703 -2023-10-19 15:16:45,381 : +2023-10-27 17:01:04,245 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 135.495 +/- 20.732 +2023-10-27 17:01:04,255 : -------------------------------------- | loss/ | | -| approx_kl | 0.0193 | -| entropy_loss | -3.9 | -| policy_loss | 0.000771 | -| value_loss | 2.05 | +| approx_kl | 0.0242 | +| entropy_loss | -4.52 | +| policy_loss | -0.01 | +| value_loss | 1.61 | | stat/ | | -| constraint_violation | 96 | +| constraint_violation | 91 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 123 | -| ep_reward | 0.493 | +| ep_return | 119 | +| ep_reward | 0.477 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 117 | -| ep_reward | 0.469 | -| mse | 273 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 135 | +| ep_reward | 0.542 | +| mse | 284 | | time/ | | | progress | 0.12 | | step | 1.2e+05 | -| step_time | 11.2 | +| step_time | 8.32 | -------------------------------------- -2023-10-19 15:18:53,374 : Eval | ep_lengths 176.20 +/- 112.74 | ep_return 92.761 +/- 65.727 -2023-10-19 15:18:53,376 : +2023-10-27 17:02:42,885 : Eval | ep_lengths 176.50 +/- 112.28 | ep_return 98.965 +/- 67.000 +2023-10-27 17:02:42,911 : -------------------------------------- | loss/ | | -| approx_kl | 0.016 | -| entropy_loss | -3.98 | -| policy_loss | -0.01 | -| value_loss | 1.42 | +| approx_kl | 0.0285 | +| entropy_loss | -4.64 | +| policy_loss | -0.0115 | +| value_loss | 2.7 | | stat/ | | -| constraint_violation | 106 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 108 | -| ep_reward | 0.445 | +| constraint_violation | 98 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 131 | +| ep_reward | 0.522 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 176 | -| ep_return | 92.8 | -| ep_reward | 0.377 | -| mse | 177 | +| ep_return | 99 | +| ep_reward | 0.402 | +| mse | 171 | | time/ | | | progress | 0.13 | | step | 1.3e+05 | -| step_time | 10.7 | +| step_time | 8.44 | -------------------------------------- -2023-10-19 15:21:11,811 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 123.800 +/- 19.493 -2023-10-19 15:21:11,813 : +2023-10-27 17:04:27,123 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 128.033 +/- 17.640 +2023-10-27 17:04:27,124 : -------------------------------------- | loss/ | | -| approx_kl | 0.0183 | -| entropy_loss | -4.01 | -| policy_loss | -0.0077 | -| value_loss | 0.557 | +| approx_kl | 0.0181 | +| entropy_loss | -4.78 | +| policy_loss | -0.0174 | +| value_loss | 5.28 | | stat/ | | -| constraint_violation | 117 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 114 | -| ep_reward | 0.529 | +| constraint_violation | 109 | +| ep_constraint_vio... | 0.3 | +| ep_length | 175 | +| ep_return | 91.3 | +| ep_reward | 0.443 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 124 | -| ep_reward | 0.495 | -| mse | 303 | +| ep_return | 128 | +| ep_reward | 0.512 | +| mse | 305 | | time/ | | | progress | 0.14 | | step | 1.4e+05 | -| step_time | 11.3 | +| step_time | 8.51 | -------------------------------------- -2023-10-19 15:23:27,767 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 121.915 +/- 44.982 -2023-10-19 15:23:27,769 : +2023-10-27 17:06:10,021 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 121.384 +/- 44.179 +2023-10-27 17:06:10,022 : -------------------------------------- | loss/ | | -| approx_kl | 0.0251 | -| entropy_loss | -3.97 | -| policy_loss | -0.0173 | -| value_loss | 3.89 | +| approx_kl | 0.0292 | +| entropy_loss | -4.85 | +| policy_loss | -0.00735 | +| value_loss | 3.35 | | stat/ | | -| constraint_violation | 126 | +| constraint_violation | 117 | | ep_constraint_vio... | 0.2 | | ep_length | 200 | -| ep_return | 91.4 | -| ep_reward | 0.48 | +| ep_return | 100 | +| ep_reward | 0.464 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 122 | -| ep_reward | 0.495 | -| mse | 233 | +| ep_return | 121 | +| ep_reward | 0.493 | +| mse | 244 | | time/ | | | progress | 0.15 | | step | 1.5e+05 | -| step_time | 11.2 | +| step_time | 8.53 | -------------------------------------- -2023-10-19 15:25:43,297 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 113.457 +/- 40.389 -2023-10-19 15:25:43,300 : +2023-10-27 17:07:55,088 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 126.824 +/- 9.462 +2023-10-27 17:07:55,089 : -------------------------------------- | loss/ | | -| approx_kl | 0.0171 | -| entropy_loss | -3.95 | -| policy_loss | -0.00651 | -| value_loss | 4.15 | +| approx_kl | 0.0113 | +| entropy_loss | -4.88 | +| policy_loss | -0.023 | +| value_loss | 1.9 | | stat/ | | -| constraint_violation | 144 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 119 | -| ep_reward | 0.56 | +| constraint_violation | 131 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 97.9 | +| ep_reward | 0.393 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 113 | -| ep_reward | 0.484 | -| mse | 292 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 127 | +| ep_reward | 0.507 | +| mse | 315 | | time/ | | | progress | 0.16 | | step | 1.6e+05 | -| step_time | 11.4 | +| step_time | 8.5 | -------------------------------------- -2023-10-19 15:27:54,223 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 120.772 +/- 53.634 -2023-10-19 15:27:54,224 : +2023-10-27 17:09:37,644 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 125.275 +/- 46.891 +2023-10-27 17:09:37,645 : -------------------------------------- | loss/ | | -| approx_kl | 0.0232 | -| entropy_loss | -4.04 | -| policy_loss | -0.0136 | -| value_loss | 0.611 | +| approx_kl | 0.0211 | +| entropy_loss | -5 | +| policy_loss | -0.0163 | +| value_loss | 0.591 | | stat/ | | -| constraint_violation | 150 | +| constraint_violation | 137 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 118 | -| ep_reward | 0.476 | +| ep_return | 110 | +| ep_reward | 0.44 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 121 | -| ep_reward | 0.483 | +| ep_length | 226 | +| ep_return | 125 | +| ep_reward | 0.501 | | mse | 241 | | time/ | | | progress | 0.17 | | step | 1.7e+05 | -| step_time | 10.7 | +| step_time | 8.39 | -------------------------------------- -2023-10-19 15:29:59,700 : Eval | ep_lengths 176.00 +/- 113.04 | ep_return 98.260 +/- 66.935 -2023-10-19 15:29:59,721 : +2023-10-27 17:11:16,265 : Eval | ep_lengths 176.00 +/- 113.04 | ep_return 97.684 +/- 64.825 +2023-10-27 17:11:16,292 : -------------------------------------- | loss/ | | -| approx_kl | 0.0284 | -| entropy_loss | -4.07 | -| policy_loss | -0.00995 | -| value_loss | 1.57 | +| approx_kl | 0.0267 | +| entropy_loss | -5.02 | +| policy_loss | -0.00742 | +| value_loss | 1.68 | | stat/ | | -| constraint_violation | 156 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 119 | -| ep_reward | 0.507 | +| constraint_violation | 144 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 98 | +| ep_reward | 0.397 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 176 | -| ep_return | 98.3 | -| ep_reward | 0.396 | +| ep_return | 97.7 | +| ep_reward | 0.394 | | mse | 182 | | time/ | | | progress | 0.18 | | step | 1.8e+05 | -| step_time | 10.6 | +| step_time | 8.47 | -------------------------------------- -2023-10-19 15:32:09,486 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 126.858 +/- 47.360 -2023-10-19 15:32:09,495 : +2023-10-27 17:12:58,557 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 123.436 +/- 44.160 +2023-10-27 17:12:58,558 : -------------------------------------- | loss/ | | -| approx_kl | 0.0207 | -| entropy_loss | -4.06 | -| policy_loss | -0.0103 | -| value_loss | 0.731 | +| approx_kl | 0.0291 | +| entropy_loss | -4.99 | +| policy_loss | -0.0163 | +| value_loss | 1.11 | | stat/ | | -| constraint_violation | 165 | +| constraint_violation | 149 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 129 | -| ep_reward | 0.592 | +| ep_return | 123 | +| ep_reward | 0.491 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 127 | -| ep_reward | 0.507 | -| mse | 222 | +| ep_return | 123 | +| ep_reward | 0.494 | +| mse | 231 | | time/ | | | progress | 0.19 | | step | 1.9e+05 | -| step_time | 10.8 | +| step_time | 8.55 | -------------------------------------- -2023-10-19 15:34:14,631 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 113.271 +/- 61.057 -2023-10-19 15:34:14,632 : +2023-10-27 17:14:41,751 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 123.089 +/- 45.283 +2023-10-27 17:14:41,778 : -------------------------------------- | loss/ | | -| approx_kl | 0.0229 | -| entropy_loss | -4.06 | -| policy_loss | -0.0201 | -| value_loss | 2.12 | +| approx_kl | 0.0225 | +| entropy_loss | -5 | +| policy_loss | -0.0105 | +| value_loss | 3.23 | | stat/ | | -| constraint_violation | 174 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 144 | -| ep_reward | 0.575 | +| constraint_violation | 160 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 121 | +| ep_reward | 0.494 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 113 | -| ep_reward | 0.473 | -| mse | 227 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 123 | +| ep_reward | 0.508 | +| mse | 263 | | time/ | | | progress | 0.2 | | step | 2e+05 | -| step_time | 10.4 | +| step_time | 8.53 | -------------------------------------- -2023-10-19 15:36:18,602 : Eval | ep_lengths 176.60 +/- 112.16 | ep_return 97.326 +/- 65.640 -2023-10-19 15:36:18,603 : +2023-10-27 17:16:20,383 : Eval | ep_lengths 176.50 +/- 112.31 | ep_return 101.741 +/- 69.865 +2023-10-27 17:16:20,384 : -------------------------------------- | loss/ | | -| approx_kl | 0.0313 | -| entropy_loss | -4.17 | -| policy_loss | -0.00257 | -| value_loss | 0.806 | +| approx_kl | 0.0192 | +| entropy_loss | -5.02 | +| policy_loss | -0.0097 | +| value_loss | 2.56 | | stat/ | | -| constraint_violation | 191 | -| ep_constraint_vio... | 0.3 | -| ep_length | 175 | -| ep_return | 107 | -| ep_reward | 0.5 | +| constraint_violation | 168 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 122 | +| ep_reward | 0.49 | | stat_eval/ | | | constraint_violation | 0.3 | -| ep_length | 177 | -| ep_return | 97.3 | -| ep_reward | 0.393 | -| mse | 213 | +| ep_length | 176 | +| ep_return | 102 | +| ep_reward | 0.411 | +| mse | 209 | | time/ | | | progress | 0.21 | | step | 2.1e+05 | -| step_time | 10.5 | +| step_time | 8.54 | -------------------------------------- -2023-10-19 15:38:21,983 : Eval | ep_lengths 176.50 +/- 112.32 | ep_return 102.624 +/- 68.556 -2023-10-19 15:38:22,005 : +2023-10-27 17:17:59,029 : Eval | ep_lengths 176.70 +/- 112.00 | ep_return 104.374 +/- 69.621 +2023-10-27 17:17:59,030 : -------------------------------------- | loss/ | | -| approx_kl | 0.0141 | -| entropy_loss | -4.21 | -| policy_loss | -0.00972 | -| value_loss | 1.16 | +| approx_kl | 0.0198 | +| entropy_loss | -5.05 | +| policy_loss | -0.00494 | +| value_loss | 2.91 | | stat/ | | -| constraint_violation | 199 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 107 | -| ep_reward | 0.452 | +| constraint_violation | 175 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 116 | +| ep_reward | 0.465 | | stat_eval/ | | | constraint_violation | 0.3 | -| ep_length | 176 | -| ep_return | 103 | -| ep_reward | 0.416 | -| mse | 159 | +| ep_length | 177 | +| ep_return | 104 | +| ep_reward | 0.423 | +| mse | 160 | | time/ | | | progress | 0.22 | | step | 2.2e+05 | -| step_time | 10.5 | +| step_time | 8.52 | -------------------------------------- -2023-10-19 15:40:31,917 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 136.362 +/- 29.575 -2023-10-19 15:40:31,927 : +2023-10-27 17:19:42,836 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 137.412 +/- 22.619 +2023-10-27 17:19:42,850 : -------------------------------------- | loss/ | | -| approx_kl | 0.0225 | -| entropy_loss | -4.2 | -| policy_loss | -0.0112 | -| value_loss | 0.958 | +| approx_kl | 0.00834 | +| entropy_loss | -5.08 | +| policy_loss | -0.0267 | +| value_loss | 1.82 | | stat/ | | -| constraint_violation | 205 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 115 | -| ep_reward | 0.472 | +| constraint_violation | 180 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 111 | +| ep_reward | 0.445 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 136 | -| ep_reward | 0.545 | +| ep_return | 137 | +| ep_reward | 0.55 | | mse | 276 | | time/ | | | progress | 0.23 | | step | 2.3e+05 | -| step_time | 10.6 | +| step_time | 8.47 | -------------------------------------- -2023-10-19 15:42:34,623 : Eval | ep_lengths 175.30 +/- 114.11 | ep_return 89.825 +/- 63.038 -2023-10-19 15:42:34,624 : +2023-10-27 17:21:22,603 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 102.179 +/- 57.394 +2023-10-27 17:21:22,604 : -------------------------------------- | loss/ | | -| approx_kl | 0.0246 | -| entropy_loss | -4.24 | -| policy_loss | -0.00782 | -| value_loss | 1.4 | +| approx_kl | 0.0325 | +| entropy_loss | -5.16 | +| policy_loss | -0.0209 | +| value_loss | 1.62 | | stat/ | | -| constraint_violation | 212 | -| ep_constraint_vio... | 0.3 | -| ep_length | 175 | -| ep_return | 101 | -| ep_reward | 0.428 | +| constraint_violation | 182 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 136 | +| ep_reward | 0.544 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 175 | -| ep_return | 89.8 | -| ep_reward | 0.448 | -| mse | 231 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 102 | +| ep_reward | 0.481 | +| mse | 251 | | time/ | | | progress | 0.24 | | step | 2.4e+05 | -| step_time | 10.5 | +| step_time | 8.33 | -------------------------------------- -2023-10-19 15:44:42,941 : Eval | ep_lengths 201.90 +/- 96.26 | ep_return 114.666 +/- 59.374 -2023-10-19 15:44:42,943 : +2023-10-27 17:23:03,128 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 116.318 +/- 61.356 +2023-10-27 17:23:03,129 : -------------------------------------- | loss/ | | -| approx_kl | 0.0196 | -| entropy_loss | -4.39 | -| policy_loss | -0.0204 | -| value_loss | 2.68 | +| approx_kl | 0.0235 | +| entropy_loss | -5.25 | +| policy_loss | -0.0179 | +| value_loss | 0.788 | | stat/ | | -| constraint_violation | 217 | +| constraint_violation | 187 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 119 | -| ep_reward | 0.476 | +| ep_return | 134 | +| ep_reward | 0.535 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 202 | -| ep_return | 115 | -| ep_reward | 0.459 | -| mse | 244 | +| ep_length | 200 | +| ep_return | 116 | +| ep_reward | 0.467 | +| mse | 231 | | time/ | | | progress | 0.25 | | step | 2.5e+05 | -| step_time | 11.3 | +| step_time | 8.45 | -------------------------------------- -2023-10-19 15:46:59,486 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 129.837 +/- 48.195 -2023-10-19 15:46:59,488 : +2023-10-27 17:24:48,270 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 142.098 +/- 24.528 +2023-10-27 17:24:48,282 : -------------------------------------- | loss/ | | -| approx_kl | 0.0229 | -| entropy_loss | -4.47 | -| policy_loss | -0.0167 | -| value_loss | 1.35 | +| approx_kl | 0.0166 | +| entropy_loss | -5.24 | +| policy_loss | -0.0258 | +| value_loss | 0.779 | | stat/ | | -| constraint_violation | 222 | +| constraint_violation | 193 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 128 | -| ep_reward | 0.515 | +| ep_return | 135 | +| ep_reward | 0.541 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 130 | -| ep_reward | 0.528 | -| mse | 211 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 142 | +| ep_reward | 0.568 | +| mse | 264 | | time/ | | | progress | 0.26 | | step | 2.6e+05 | -| step_time | 11.1 | +| step_time | 8.52 | -------------------------------------- -2023-10-19 15:49:10,643 : Eval | ep_lengths 175.30 +/- 114.11 | ep_return 92.354 +/- 63.544 -2023-10-19 15:49:10,645 : +2023-10-27 17:26:29,605 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 114.609 +/- 60.266 +2023-10-27 17:26:29,606 : -------------------------------------- | loss/ | | -| approx_kl | 0.0105 | -| entropy_loss | -4.58 | -| policy_loss | -0.0112 | -| value_loss | 2.76 | +| approx_kl | 0.0119 | +| entropy_loss | -5.19 | +| policy_loss | -0.0224 | +| value_loss | 1.2 | | stat/ | | -| constraint_violation | 233 | -| ep_constraint_vio... | 0.4 | -| ep_length | 150 | -| ep_return | 82.8 | -| ep_reward | 0.346 | +| constraint_violation | 196 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 125 | +| ep_reward | 0.551 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 175 | -| ep_return | 92.4 | -| ep_reward | 0.464 | -| mse | 200 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 115 | +| ep_reward | 0.553 | +| mse | 237 | | time/ | | | progress | 0.27 | | step | 2.7e+05 | -| step_time | 11.2 | +| step_time | 8.5 | -------------------------------------- -2023-10-19 15:51:24,869 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 151.233 +/- 53.169 -2023-10-19 15:51:24,877 : +2023-10-27 17:28:13,181 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 144.596 +/- 51.330 +2023-10-27 17:28:13,189 : -------------------------------------- | loss/ | | -| approx_kl | 0.0244 | -| entropy_loss | -4.65 | -| policy_loss | -0.024 | -| value_loss | 3.41 | +| approx_kl | 0.0232 | +| entropy_loss | -5.22 | +| policy_loss | -0.00882 | +| value_loss | 1.58 | | stat/ | | -| constraint_violation | 238 | -| ep_constraint_vio... | 0.3 | -| ep_length | 176 | -| ep_return | 90.3 | -| ep_reward | 0.439 | +| constraint_violation | 206 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 147 | +| ep_reward | 0.587 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 151 | -| ep_reward | 0.605 | -| mse | 136 | +| ep_return | 145 | +| ep_reward | 0.578 | +| mse | 147 | | time/ | | | progress | 0.28 | | step | 2.8e+05 | -| step_time | 11.1 | +| step_time | 8.58 | -------------------------------------- -2023-10-19 15:53:32,634 : Eval | ep_lengths 200.50 +/- 99.00 | ep_return 125.053 +/- 66.915 -2023-10-19 15:53:32,635 : +2023-10-27 17:29:55,371 : Eval | ep_lengths 201.40 +/- 97.21 | ep_return 126.136 +/- 67.393 +2023-10-27 17:29:55,372 : -------------------------------------- | loss/ | | -| approx_kl | 0.0251 | -| entropy_loss | -4.63 | -| policy_loss | -0.00957 | -| value_loss | 2.05 | +| approx_kl | 0.0234 | +| entropy_loss | -5.23 | +| policy_loss | -0.019 | +| value_loss | 0.69 | | stat/ | | -| constraint_violation | 247 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 120 | -| ep_reward | 0.486 | +| constraint_violation | 212 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 145 | +| ep_reward | 0.58 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 125 | -| ep_reward | 0.502 | -| mse | 161 | +| ep_length | 201 | +| ep_return | 126 | +| ep_reward | 0.507 | +| mse | 164 | | time/ | | | progress | 0.29 | | step | 2.9e+05 | -| step_time | 10.8 | +| step_time | 8.5 | -------------------------------------- -2023-10-19 15:55:38,464 : Eval | ep_lengths 200.50 +/- 99.00 | ep_return 112.675 +/- 57.048 -2023-10-19 15:55:38,466 : +2023-10-27 17:31:37,095 : Eval | ep_lengths 200.50 +/- 99.00 | ep_return 110.987 +/- 56.858 +2023-10-27 17:31:37,096 : -------------------------------------- | loss/ | | -| approx_kl | 0.0182 | -| entropy_loss | -4.73 | -| policy_loss | -0.0156 | -| value_loss | 1.23 | +| approx_kl | 0.0134 | +| entropy_loss | -5.23 | +| policy_loss | -0.0242 | +| value_loss | 1.82 | | stat/ | | -| constraint_violation | 259 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 141 | -| ep_reward | 0.563 | +| constraint_violation | 219 | +| ep_constraint_vio... | 0.2 | +| ep_length | 200 | +| ep_return | 116 | +| ep_reward | 0.463 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 200 | -| ep_return | 113 | -| ep_reward | 0.468 | -| mse | 254 | +| ep_return | 111 | +| ep_reward | 0.461 | +| mse | 252 | | time/ | | | progress | 0.3 | | step | 3e+05 | -| step_time | 10.6 | +| step_time | 8.64 | -------------------------------------- -2023-10-19 15:57:47,171 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 148.600 +/- 29.065 -2023-10-19 15:57:47,172 : +2023-10-27 17:33:23,814 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 143.470 +/- 23.367 +2023-10-27 17:33:23,815 : -------------------------------------- | loss/ | | -| approx_kl | 0.0151 | -| entropy_loss | -4.73 | -| policy_loss | -0.0192 | -| value_loss | 4.66 | +| approx_kl | 0.0243 | +| entropy_loss | -5.38 | +| policy_loss | -0.0116 | +| value_loss | 4.04 | | stat/ | | -| constraint_violation | 268 | +| constraint_violation | 228 | | ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 127 | -| ep_reward | 0.513 | +| ep_length | 200 | +| ep_return | 104 | +| ep_reward | 0.437 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 149 | -| ep_reward | 0.594 | -| mse | 302 | +| ep_return | 143 | +| ep_reward | 0.574 | +| mse | 301 | | time/ | | | progress | 0.31 | | step | 3.1e+05 | -| step_time | 10.3 | +| step_time | 8.62 | -------------------------------------- -2023-10-19 15:59:52,683 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 145.529 +/- 29.593 -2023-10-19 15:59:52,684 : +2023-10-27 17:35:09,786 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 144.067 +/- 22.646 +2023-10-27 17:35:09,788 : -------------------------------------- | loss/ | | -| approx_kl | 0.0171 | -| entropy_loss | -4.71 | -| policy_loss | -0.0196 | -| value_loss | 2.65 | +| approx_kl | 0.0211 | +| entropy_loss | -5.44 | +| policy_loss | -0.00959 | +| value_loss | 1.93 | | stat/ | | -| constraint_violation | 279 | +| constraint_violation | 233 | | ep_constraint_vio... | 0.2 | | ep_length | 201 | -| ep_return | 106 | -| ep_reward | 0.481 | +| ep_return | 125 | +| ep_reward | 0.503 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 146 | -| ep_reward | 0.582 | -| mse | 317 | +| ep_return | 144 | +| ep_reward | 0.576 | +| mse | 310 | | time/ | | | progress | 0.32 | | step | 3.2e+05 | -| step_time | 10.3 | +| step_time | 8.4 | -------------------------------------- -2023-10-19 16:01:55,108 : Eval | ep_lengths 202.00 +/- 96.08 | ep_return 124.424 +/- 67.212 -2023-10-19 16:01:55,110 : +2023-10-27 17:36:51,422 : Eval | ep_lengths 202.00 +/- 96.08 | ep_return 120.899 +/- 65.635 +2023-10-27 17:36:51,423 : -------------------------------------- | loss/ | | -| approx_kl | 0.0169 | -| entropy_loss | -4.76 | -| policy_loss | -0.00951 | -| value_loss | 1.87 | +| approx_kl | 0.0245 | +| entropy_loss | -5.49 | +| policy_loss | -0.0151 | +| value_loss | 1.01 | | stat/ | | -| constraint_violation | 297 | +| constraint_violation | 243 | | ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 112 | -| ep_reward | 0.456 | +| ep_length | 201 | +| ep_return | 109 | +| ep_reward | 0.493 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | -| ep_return | 124 | -| ep_reward | 0.501 | -| mse | 202 | +| ep_return | 121 | +| ep_reward | 0.487 | +| mse | 206 | | time/ | | | progress | 0.33 | | step | 3.3e+05 | -| step_time | 10.4 | +| step_time | 8.46 | -------------------------------------- -2023-10-19 16:03:55,125 : Eval | ep_lengths 175.40 +/- 113.95 | ep_return 103.390 +/- 69.776 -2023-10-19 16:03:55,127 : +2023-10-27 17:38:34,633 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 131.286 +/- 51.957 +2023-10-27 17:38:34,634 : -------------------------------------- | loss/ | | -| approx_kl | 0.0234 | -| entropy_loss | -4.86 | -| policy_loss | -0.0143 | -| value_loss | 1.8 | +| approx_kl | 0.0295 | +| entropy_loss | -5.47 | +| policy_loss | -0.0234 | +| value_loss | 3.29 | | stat/ | | -| constraint_violation | 302 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 114 | -| ep_reward | 0.473 | +| constraint_violation | 260 | +| ep_constraint_vio... | 0.3 | +| ep_length | 177 | +| ep_return | 110 | +| ep_reward | 0.445 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 175 | -| ep_return | 103 | -| ep_reward | 0.455 | -| mse | 210 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 131 | +| ep_reward | 0.532 | +| mse | 249 | | time/ | | | progress | 0.34 | | step | 3.4e+05 | -| step_time | 10.3 | +| step_time | 8.45 | -------------------------------------- -2023-10-19 16:05:58,751 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 123.906 +/- 55.957 -2023-10-19 16:05:58,752 : +2023-10-27 17:40:17,539 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 122.919 +/- 51.657 +2023-10-27 17:40:17,540 : -------------------------------------- | loss/ | | -| approx_kl | 0.0283 | -| entropy_loss | -4.87 | -| policy_loss | -0.0168 | -| value_loss | 0.654 | +| approx_kl | 0.0181 | +| entropy_loss | -5.51 | +| policy_loss | -0.0212 | +| value_loss | 1.74 | | stat/ | | -| constraint_violation | 306 | +| constraint_violation | 264 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 119 | -| ep_reward | 0.488 | +| ep_return | 122 | +| ep_reward | 0.5 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 124 | -| ep_reward | 0.519 | -| mse | 340 | +| ep_return | 123 | +| ep_reward | 0.515 | +| mse | 335 | | time/ | | | progress | 0.35 | | step | 3.5e+05 | -| step_time | 10.2 | +| step_time | 8.53 | -------------------------------------- -2023-10-19 16:08:02,402 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 138.325 +/- 51.814 -2023-10-19 16:08:02,403 : +2023-10-27 17:42:00,427 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 137.536 +/- 55.036 +2023-10-27 17:42:00,428 : -------------------------------------- | loss/ | | -| approx_kl | 0.0175 | -| entropy_loss | -4.92 | -| policy_loss | -0.00223 | -| value_loss | 0.979 | +| approx_kl | 0.035 | +| entropy_loss | -5.51 | +| policy_loss | -0.00766 | +| value_loss | 0.913 | | stat/ | | -| constraint_violation | 315 | +| constraint_violation | 267 | | ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 139 | -| ep_reward | 0.555 | +| ep_length | 225 | +| ep_return | 126 | +| ep_reward | 0.515 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | | ep_return | 138 | -| ep_reward | 0.606 | -| mse | 229 | +| ep_reward | 0.603 | +| mse | 230 | | time/ | | | progress | 0.36 | | step | 3.6e+05 | -| step_time | 10.2 | +| step_time | 8.47 | -------------------------------------- -2023-10-19 16:10:02,783 : Eval | ep_lengths 202.20 +/- 95.60 | ep_return 123.289 +/- 64.970 -2023-10-19 16:10:02,802 : +2023-10-27 17:43:41,277 : Eval | ep_lengths 202.20 +/- 95.60 | ep_return 122.578 +/- 66.770 +2023-10-27 17:43:41,278 : -------------------------------------- | loss/ | | -| approx_kl | 0.0103 | -| entropy_loss | -4.95 | -| policy_loss | -0.0165 | -| value_loss | 0.787 | +| approx_kl | 0.0255 | +| entropy_loss | -5.55 | +| policy_loss | -0.00327 | +| value_loss | 1.1 | | stat/ | | -| constraint_violation | 319 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 134 | -| ep_reward | 0.536 | +| constraint_violation | 276 | +| ep_constraint_vio... | 0.1 | +| ep_length | 226 | +| ep_return | 139 | +| ep_reward | 0.556 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | | ep_return | 123 | -| ep_reward | 0.497 | -| mse | 178 | +| ep_reward | 0.495 | +| mse | 183 | | time/ | | | progress | 0.37 | | step | 3.7e+05 | -| step_time | 9.96 | +| step_time | 8.47 | -------------------------------------- -2023-10-19 16:12:03,771 : Eval | ep_lengths 201.00 +/- 98.02 | ep_return 118.946 +/- 66.811 -2023-10-19 16:12:03,789 : +2023-10-27 17:45:20,870 : Eval | ep_lengths 201.00 +/- 98.02 | ep_return 120.388 +/- 64.891 +2023-10-27 17:45:20,871 : -------------------------------------- | loss/ | | -| approx_kl | 0.0186 | -| entropy_loss | -4.96 | -| policy_loss | -0.0113 | -| value_loss | 1.35 | +| approx_kl | 0.0188 | +| entropy_loss | -5.58 | +| policy_loss | -0.00437 | +| value_loss | 0.888 | | stat/ | | -| constraint_violation | 323 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 110 | -| ep_reward | 0.467 | +| constraint_violation | 280 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 140 | +| ep_reward | 0.561 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 119 | -| ep_reward | 0.477 | -| mse | 198 | +| ep_return | 120 | +| ep_reward | 0.483 | +| mse | 195 | | time/ | | | progress | 0.38 | | step | 3.8e+05 | -| step_time | 10.1 | +| step_time | 8.32 | -------------------------------------- -2023-10-19 16:14:03,703 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 105.435 +/- 58.173 -2023-10-19 16:14:03,704 : +2023-10-27 17:47:00,555 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 112.409 +/- 59.472 +2023-10-27 17:47:00,556 : -------------------------------------- | loss/ | | -| approx_kl | 0.0313 | -| entropy_loss | -5.04 | -| policy_loss | -0.00542 | -| value_loss | 0.599 | +| approx_kl | 0.0232 | +| entropy_loss | -5.6 | +| policy_loss | -0.0148 | +| value_loss | 0.809 | | stat/ | | -| constraint_violation | 325 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 133 | -| ep_reward | 0.531 | +| constraint_violation | 285 | +| ep_constraint_vio... | 0.2 | +| ep_length | 200 | +| ep_return | 110 | +| ep_reward | 0.466 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 200 | -| ep_return | 105 | -| ep_reward | 0.433 | -| mse | 283 | +| ep_return | 112 | +| ep_reward | 0.461 | +| mse | 274 | | time/ | | | progress | 0.39 | | step | 3.9e+05 | -| step_time | 10.2 | +| step_time | 8.29 | -------------------------------------- -2023-10-19 16:16:06,440 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 131.428 +/- 51.927 -2023-10-19 16:16:06,441 : +2023-10-27 17:48:41,877 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 136.411 +/- 50.241 +2023-10-27 17:48:41,878 : -------------------------------------- | loss/ | | -| approx_kl | 0.0242 | -| entropy_loss | -5.03 | -| policy_loss | -0.00787 | -| value_loss | 0.561 | +| approx_kl | 0.0195 | +| entropy_loss | -5.58 | +| policy_loss | -0.0132 | +| value_loss | 0.845 | | stat/ | | -| constraint_violation | 328 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 126 | -| ep_reward | 0.505 | +| constraint_violation | 287 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 134 | +| ep_reward | 0.538 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 131 | -| ep_reward | 0.526 | -| mse | 242 | +| ep_return | 136 | +| ep_reward | 0.546 | +| mse | 240 | | time/ | | | progress | 0.4 | | step | 4e+05 | -| step_time | 10.1 | +| step_time | 8.39 | -------------------------------------- -2023-10-19 16:18:11,375 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 150.191 +/- 30.986 -2023-10-19 16:18:11,377 : +2023-10-27 17:50:25,431 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 157.771 +/- 31.728 +2023-10-27 17:50:25,442 : -------------------------------------- | loss/ | | -| approx_kl | 0.0312 | -| entropy_loss | -5.05 | -| policy_loss | -0.0162 | -| value_loss | 1.26 | +| approx_kl | 0.0287 | +| entropy_loss | -5.6 | +| policy_loss | -0.00207 | +| value_loss | 1.16 | | stat/ | | -| constraint_violation | 338 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 137 | -| ep_reward | 0.552 | +| constraint_violation | 291 | +| ep_constraint_vio... | 0.3 | +| ep_length | 175 | +| ep_return | 108 | +| ep_reward | 0.434 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 150 | -| ep_reward | 0.601 | -| mse | 258 | +| ep_return | 158 | +| ep_reward | 0.631 | +| mse | 239 | | time/ | | | progress | 0.41 | | step | 4.1e+05 | -| step_time | 10.4 | +| step_time | 8.29 | -------------------------------------- -2023-10-19 16:20:10,007 : Eval | ep_lengths 176.70 +/- 111.97 | ep_return 114.953 +/- 78.253 -2023-10-19 16:20:10,009 : +2023-10-27 17:52:03,501 : Eval | ep_lengths 176.70 +/- 111.97 | ep_return 116.483 +/- 77.737 +2023-10-27 17:52:03,502 : -------------------------------------- | loss/ | | -| approx_kl | 0.0279 | -| entropy_loss | -5.09 | -| policy_loss | -0.011 | -| value_loss | 0.982 | +| approx_kl | 0.0316 | +| entropy_loss | -5.61 | +| policy_loss | -0.0195 | +| value_loss | 4.11 | | stat/ | | -| constraint_violation | 346 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 146 | -| ep_reward | 0.583 | +| constraint_violation | 303 | +| ep_constraint_vio... | 0.4 | +| ep_length | 152 | +| ep_return | 105 | +| ep_reward | 0.429 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 177 | -| ep_return | 115 | -| ep_reward | 0.481 | -| mse | 140 | +| ep_return | 116 | +| ep_reward | 0.488 | +| mse | 136 | | time/ | | | progress | 0.42 | | step | 4.2e+05 | -| step_time | 10.2 | +| step_time | 8.43 | -------------------------------------- -2023-10-19 16:22:07,756 : Eval | ep_lengths 175.40 +/- 113.95 | ep_return 108.715 +/- 74.127 -2023-10-19 16:22:07,757 : +2023-10-27 17:53:41,623 : Eval | ep_lengths 175.70 +/- 113.50 | ep_return 107.348 +/- 71.202 +2023-10-27 17:53:41,624 : -------------------------------------- | loss/ | | -| approx_kl | 0.0415 | -| entropy_loss | -5.16 | -| policy_loss | -0.0106 | -| value_loss | 1.26 | +| approx_kl | 0.0337 | +| entropy_loss | -5.62 | +| policy_loss | -0.0068 | +| value_loss | 1.01 | | stat/ | | -| constraint_violation | 353 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 135 | -| ep_reward | 0.544 | +| constraint_violation | 309 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 148 | +| ep_reward | 0.59 | | stat_eval/ | | | constraint_violation | 0.3 | -| ep_length | 175 | -| ep_return | 109 | -| ep_reward | 0.461 | -| mse | 174 | +| ep_length | 176 | +| ep_return | 107 | +| ep_reward | 0.457 | +| mse | 177 | | time/ | | | progress | 0.43 | | step | 4.3e+05 | -| step_time | 10 | +| step_time | 8.43 | -------------------------------------- -2023-10-19 16:24:08,446 : Eval | ep_lengths 201.70 +/- 96.64 | ep_return 121.910 +/- 66.871 -2023-10-19 16:24:08,464 : +2023-10-27 17:55:22,197 : Eval | ep_lengths 201.70 +/- 96.64 | ep_return 128.072 +/- 68.102 +2023-10-27 17:55:22,198 : -------------------------------------- | loss/ | | -| approx_kl | 0.0318 | -| entropy_loss | -5.21 | -| policy_loss | -0.0167 | -| value_loss | 0.805 | +| approx_kl | 0.0218 | +| entropy_loss | -5.57 | +| policy_loss | -0.0215 | +| value_loss | 0.428 | | stat/ | | -| constraint_violation | 360 | +| constraint_violation | 316 | | ep_constraint_vio... | 0.1 | -| ep_length | 227 | -| ep_return | 150 | -| ep_reward | 0.602 | +| ep_length | 225 | +| ep_return | 132 | +| ep_reward | 0.532 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | -| ep_return | 122 | -| ep_reward | 0.489 | -| mse | 219 | +| ep_return | 128 | +| ep_reward | 0.513 | +| mse | 207 | | time/ | | | progress | 0.44 | | step | 4.4e+05 | -| step_time | 10.1 | +| step_time | 8.38 | -------------------------------------- -2023-10-19 16:26:08,802 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 127.223 +/- 69.356 -2023-10-19 16:26:08,804 : +2023-10-27 17:57:04,368 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 144.981 +/- 51.970 +2023-10-27 17:57:04,369 : -------------------------------------- | loss/ | | -| approx_kl | 0.024 | -| entropy_loss | -5.3 | -| policy_loss | -0.0165 | -| value_loss | 0.909 | +| approx_kl | 0.022 | +| entropy_loss | -5.58 | +| policy_loss | -0.0163 | +| value_loss | 1.91 | | stat/ | | -| constraint_violation | 365 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 165 | -| ep_reward | 0.658 | +| constraint_violation | 321 | +| ep_constraint_vio... | 0.1 | +| ep_length | 227 | +| ep_return | 157 | +| ep_reward | 0.627 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 127 | -| ep_reward | 0.534 | -| mse | 181 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 145 | +| ep_reward | 0.598 | +| mse | 199 | | time/ | | | progress | 0.45 | | step | 4.5e+05 | -| step_time | 10.2 | +| step_time | 8.45 | -------------------------------------- -2023-10-19 16:28:12,274 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 162.155 +/- 56.440 -2023-10-19 16:28:12,283 : +2023-10-27 17:58:46,938 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 157.453 +/- 55.306 +2023-10-27 17:58:46,939 : -------------------------------------- | loss/ | | -| approx_kl | 0.0196 | -| entropy_loss | -5.28 | -| policy_loss | -0.00104 | -| value_loss | 1.51 | +| approx_kl | 0.0204 | +| entropy_loss | -5.62 | +| policy_loss | -0.00497 | +| value_loss | 1.56 | | stat/ | | -| constraint_violation | 370 | -| ep_constraint_vio... | 0.3 | -| ep_length | 175 | -| ep_return | 112 | -| ep_reward | 0.462 | +| constraint_violation | 326 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 167 | +| ep_reward | 0.67 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 162 | -| ep_reward | 0.661 | -| mse | 101 | +| ep_return | 157 | +| ep_reward | 0.642 | +| mse | 108 | | time/ | | | progress | 0.46 | | step | 4.6e+05 | -| step_time | 10.4 | +| step_time | 8.36 | -------------------------------------- -2023-10-19 16:30:12,961 : Eval | ep_lengths 201.30 +/- 97.43 | ep_return 118.353 +/- 60.123 -2023-10-19 16:30:12,962 : +2023-10-27 18:00:27,578 : Eval | ep_lengths 201.40 +/- 97.23 | ep_return 120.302 +/- 61.112 +2023-10-27 18:00:27,579 : -------------------------------------- | loss/ | | -| approx_kl | 0.0267 | -| entropy_loss | -5.31 | -| policy_loss | 0.000287 | -| value_loss | 1.26 | +| approx_kl | 0.0244 | +| entropy_loss | -5.63 | +| policy_loss | -0.0136 | +| value_loss | 1.82 | | stat/ | | -| constraint_violation | 379 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 161 | -| ep_reward | 0.645 | +| constraint_violation | 331 | +| ep_constraint_vio... | 0.3 | +| ep_length | 175 | +| ep_return | 108 | +| ep_reward | 0.447 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 118 | -| ep_reward | 0.48 | -| mse | 219 | +| ep_return | 120 | +| ep_reward | 0.487 | +| mse | 221 | | time/ | | | progress | 0.47 | | step | 4.7e+05 | -| step_time | 10.1 | +| step_time | 8.54 | -------------------------------------- -2023-10-19 16:32:11,009 : Eval | ep_lengths 176.30 +/- 112.59 | ep_return 108.549 +/- 75.092 -2023-10-19 16:32:11,010 : +2023-10-27 18:02:06,422 : Eval | ep_lengths 176.30 +/- 112.59 | ep_return 115.309 +/- 78.816 +2023-10-27 18:02:06,423 : -------------------------------------- | loss/ | | -| approx_kl | 0.0142 | -| entropy_loss | -5.37 | -| policy_loss | -0.016 | -| value_loss | 0.9 | +| approx_kl | 0.024 | +| entropy_loss | -5.65 | +| policy_loss | -0.0114 | +| value_loss | 2.06 | | stat/ | | -| constraint_violation | 384 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 129 | -| ep_reward | 0.519 | +| constraint_violation | 340 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 163 | +| ep_reward | 0.652 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 176 | -| ep_return | 109 | -| ep_reward | 0.443 | -| mse | 167 | +| ep_return | 115 | +| ep_reward | 0.47 | +| mse | 158 | | time/ | | | progress | 0.48 | | step | 4.8e+05 | -| step_time | 10.2 | +| step_time | 8.45 | -------------------------------------- -2023-10-19 16:34:11,251 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 129.778 +/- 54.807 -2023-10-19 16:34:11,252 : +2023-10-27 18:03:49,318 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 135.061 +/- 55.577 +2023-10-27 18:03:49,319 : -------------------------------------- | loss/ | | -| approx_kl | 0.0209 | -| entropy_loss | -5.3 | -| policy_loss | -0.0217 | -| value_loss | 1.65 | +| approx_kl | 0.0223 | +| entropy_loss | -5.68 | +| policy_loss | -0.00303 | +| value_loss | 0.66 | | stat/ | | -| constraint_violation | 390 | +| constraint_violation | 345 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 138 | -| ep_reward | 0.553 | +| ep_return | 140 | +| ep_reward | 0.562 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 130 | -| ep_reward | 0.519 | -| mse | 332 | +| ep_return | 135 | +| ep_reward | 0.54 | +| mse | 310 | | time/ | | | progress | 0.49 | | step | 4.9e+05 | -| step_time | 9.39 | +| step_time | 8.47 | -------------------------------------- -2023-10-19 16:36:07,842 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.466 +/- 25.104 -2023-10-19 16:36:07,844 : +2023-10-27 18:05:34,301 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.198 +/- 14.005 +2023-10-27 18:05:34,302 : -------------------------------------- | loss/ | | -| approx_kl | 0.0194 | -| entropy_loss | -5.28 | -| policy_loss | -0.0218 | -| value_loss | 0.552 | +| approx_kl | 0.0329 | +| entropy_loss | -5.64 | +| policy_loss | -0.00692 | +| value_loss | 0.671 | | stat/ | | -| constraint_violation | 395 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 108 | -| ep_reward | 0.433 | +| constraint_violation | 351 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 143 | +| ep_reward | 0.572 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 146 | -| ep_reward | 0.586 | -| mse | 281 | +| ep_return | 153 | +| ep_reward | 0.613 | +| mse | 251 | | time/ | | | progress | 0.5 | | step | 5e+05 | -| step_time | 9.5 | +| step_time | 8.5 | -------------------------------------- -2023-10-19 16:38:00,621 : Eval | ep_lengths 200.40 +/- 99.20 | ep_return 132.423 +/- 71.837 -2023-10-19 16:38:00,622 : +2023-10-27 18:07:15,512 : Eval | ep_lengths 200.50 +/- 99.00 | ep_return 137.346 +/- 71.672 +2023-10-27 18:07:15,513 : -------------------------------------- | loss/ | | -| approx_kl | 0.0271 | -| entropy_loss | -5.35 | -| policy_loss | -0.012 | -| value_loss | 0.724 | +| approx_kl | 0.0251 | +| entropy_loss | -5.69 | +| policy_loss | -0.0158 | +| value_loss | 0.92 | | stat/ | | -| constraint_violation | 398 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 152 | -| ep_reward | 0.608 | +| constraint_violation | 355 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 116 | +| ep_reward | 0.463 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 200 | -| ep_return | 132 | -| ep_reward | 0.589 | -| mse | 143 | +| ep_return | 137 | +| ep_reward | 0.608 | +| mse | 131 | | time/ | | | progress | 0.51 | | step | 5.1e+05 | -| step_time | 9.48 | +| step_time | 8.43 | -------------------------------------- -2023-10-19 16:39:51,600 : Eval | ep_lengths 175.90 +/- 113.20 | ep_return 118.355 +/- 80.497 -2023-10-19 16:39:51,602 : +2023-10-27 18:08:56,147 : Eval | ep_lengths 200.80 +/- 98.41 | ep_return 123.416 +/- 65.002 +2023-10-27 18:08:56,148 : -------------------------------------- | loss/ | | -| approx_kl | 0.024 | -| entropy_loss | -5.33 | -| policy_loss | -0.0124 | -| value_loss | 2.38 | +| approx_kl | 0.0364 | +| entropy_loss | -5.75 | +| policy_loss | -0.014 | +| value_loss | 0.585 | | stat/ | | -| constraint_violation | 406 | +| constraint_violation | 358 | | ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 132 | -| ep_reward | 0.528 | +| ep_length | 226 | +| ep_return | 151 | +| ep_reward | 0.603 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 176 | -| ep_return | 118 | -| ep_reward | 0.502 | -| mse | 121 | +| constraint_violation | 0.2 | +| ep_length | 201 | +| ep_return | 123 | +| ep_reward | 0.522 | +| mse | 209 | | time/ | | | progress | 0.52 | | step | 5.2e+05 | -| step_time | 9.66 | +| step_time | 8.4 | -------------------------------------- -2023-10-19 16:41:46,164 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 150.049 +/- 28.748 -2023-10-19 16:41:46,165 : +2023-10-27 18:10:41,690 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.326 +/- 24.058 +2023-10-27 18:10:41,691 : -------------------------------------- | loss/ | | -| approx_kl | 0.0216 | -| entropy_loss | -5.4 | -| policy_loss | -0.0222 | -| value_loss | 0.926 | +| approx_kl | 0.0306 | +| entropy_loss | -5.75 | +| policy_loss | -0.0117 | +| value_loss | 0.963 | | stat/ | | -| constraint_violation | 408 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 150 | -| ep_reward | 0.601 | +| constraint_violation | 366 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 131 | +| ep_reward | 0.526 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 150 | -| ep_reward | 0.6 | -| mse | 295 | +| ep_return | 152 | +| ep_reward | 0.609 | +| mse | 288 | | time/ | | | progress | 0.53 | | step | 5.3e+05 | -| step_time | 9.26 | +| step_time | 8.6 | -------------------------------------- -2023-10-19 16:43:34,785 : Eval | ep_lengths 176.60 +/- 112.16 | ep_return 111.641 +/- 78.880 -2023-10-19 16:43:34,787 : +2023-10-27 18:12:23,175 : Eval | ep_lengths 201.40 +/- 97.24 | ep_return 126.779 +/- 67.320 +2023-10-27 18:12:23,176 : -------------------------------------- | loss/ | | -| approx_kl | 0.0284 | -| entropy_loss | -5.42 | -| policy_loss | -0.0055 | -| value_loss | 2.71 | +| approx_kl | 0.0289 | +| entropy_loss | -5.72 | +| policy_loss | -0.0205 | +| value_loss | 0.407 | | stat/ | | -| constraint_violation | 410 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 122 | -| ep_reward | 0.496 | +| constraint_violation | 368 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 158 | +| ep_reward | 0.634 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 177 | -| ep_return | 112 | -| ep_reward | 0.461 | -| mse | 181 | +| constraint_violation | 0.2 | +| ep_length | 201 | +| ep_return | 127 | +| ep_reward | 0.516 | +| mse | 202 | | time/ | | | progress | 0.54 | | step | 5.4e+05 | -| step_time | 9.44 | +| step_time | 8.54 | -------------------------------------- -2023-10-19 16:45:26,694 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 134.848 +/- 51.628 -2023-10-19 16:45:26,696 : +2023-10-27 18:14:06,623 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 136.513 +/- 50.182 +2023-10-27 18:14:06,624 : -------------------------------------- | loss/ | | -| approx_kl | 0.0219 | -| entropy_loss | -5.52 | -| policy_loss | -0.0234 | -| value_loss | 0.393 | +| approx_kl | 0.0259 | +| entropy_loss | -5.8 | +| policy_loss | -0.00498 | +| value_loss | 3.78 | | stat/ | | -| constraint_violation | 419 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 129 | -| ep_reward | 0.555 | +| constraint_violation | 370 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 123 | +| ep_reward | 0.5 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 135 | -| ep_reward | 0.54 | -| mse | 276 | +| ep_return | 137 | +| ep_reward | 0.546 | +| mse | 269 | | time/ | | | progress | 0.55 | | step | 5.5e+05 | -| step_time | 9.16 | +| step_time | 8.62 | -------------------------------------- -2023-10-19 16:47:16,971 : Eval | ep_lengths 201.70 +/- 96.61 | ep_return 151.673 +/- 80.467 -2023-10-19 16:47:16,972 : +2023-10-27 18:15:47,590 : Eval | ep_lengths 201.70 +/- 96.61 | ep_return 146.106 +/- 75.231 +2023-10-27 18:15:47,591 : -------------------------------------- | loss/ | | -| approx_kl | 0.0304 | -| entropy_loss | -5.56 | -| policy_loss | -0.00269 | -| value_loss | 0.215 | +| approx_kl | 0.0295 | +| entropy_loss | -5.82 | +| policy_loss | -0.0122 | +| value_loss | 0.669 | | stat/ | | -| constraint_violation | 425 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 147 | -| ep_reward | 0.589 | +| constraint_violation | 379 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 131 | +| ep_reward | 0.562 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | -| ep_return | 152 | -| ep_reward | 0.608 | -| mse | 108 | +| ep_return | 146 | +| ep_reward | 0.585 | +| mse | 116 | | time/ | | | progress | 0.56 | | step | 5.6e+05 | -| step_time | 9.31 | +| step_time | 8.44 | -------------------------------------- -2023-10-19 16:49:11,536 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 147.006 +/- 25.222 -2023-10-19 16:49:11,537 : +2023-10-27 18:17:32,606 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.981 +/- 22.604 +2023-10-27 18:17:32,607 : -------------------------------------- | loss/ | | -| approx_kl | 0.0154 | -| entropy_loss | -5.61 | -| policy_loss | -0.0133 | -| value_loss | 1.91 | +| approx_kl | 0.0251 | +| entropy_loss | -5.85 | +| policy_loss | -0.00494 | +| value_loss | 0.895 | | stat/ | | -| constraint_violation | 433 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 122 | -| ep_reward | 0.489 | +| constraint_violation | 384 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 151 | +| ep_reward | 0.606 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 147 | -| ep_reward | 0.588 | -| mse | 292 | +| ep_return | 153 | +| ep_reward | 0.612 | +| mse | 280 | | time/ | | | progress | 0.57 | | step | 5.7e+05 | -| step_time | 9.51 | +| step_time | 8.53 | -------------------------------------- -2023-10-19 16:51:00,154 : Eval | ep_lengths 178.50 +/- 109.25 | ep_return 90.115 +/- 64.912 -2023-10-19 16:51:00,155 : +2023-10-27 18:19:12,343 : Eval | ep_lengths 178.50 +/- 109.25 | ep_return 96.532 +/- 67.421 +2023-10-27 18:19:12,344 : -------------------------------------- | loss/ | | -| approx_kl | 0.0275 | -| entropy_loss | -5.65 | -| policy_loss | -0.0114 | -| value_loss | 1.15 | +| approx_kl | 0.015 | +| entropy_loss | -5.86 | +| policy_loss | -0.00852 | +| value_loss | 1.96 | | stat/ | | -| constraint_violation | 438 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 169 | -| ep_reward | 0.676 | +| constraint_violation | 391 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 120 | +| ep_reward | 0.479 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 178 | -| ep_return | 90.1 | -| ep_reward | 0.364 | -| mse | 317 | +| ep_return | 96.5 | +| ep_reward | 0.39 | +| mse | 293 | | time/ | | | progress | 0.58 | | step | 5.8e+05 | -| step_time | 9.26 | +| step_time | 8.64 | -------------------------------------- -2023-10-19 16:52:50,520 : Eval | ep_lengths 200.40 +/- 99.20 | ep_return 117.323 +/- 62.067 -2023-10-19 16:52:50,521 : +2023-10-27 18:20:53,888 : Eval | ep_lengths 200.40 +/- 99.20 | ep_return 120.928 +/- 63.125 +2023-10-27 18:20:53,889 : -------------------------------------- | loss/ | | -| approx_kl | 0.0253 | -| entropy_loss | -5.7 | -| policy_loss | -0.0124 | -| value_loss | 1.08 | +| approx_kl | 0.024 | +| entropy_loss | -5.83 | +| policy_loss | -0.0218 | +| value_loss | 0.973 | | stat/ | | -| constraint_violation | 448 | -| ep_constraint_vio... | 0.4 | -| ep_length | 151 | -| ep_return | 91.6 | -| ep_reward | 0.371 | +| constraint_violation | 398 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 171 | +| ep_reward | 0.685 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 200 | -| ep_return | 117 | -| ep_reward | 0.536 | -| mse | 264 | +| ep_return | 121 | +| ep_reward | 0.55 | +| mse | 255 | | time/ | | | progress | 0.59 | | step | 5.9e+05 | -| step_time | 9.32 | +| step_time | 8.57 | -------------------------------------- -2023-10-19 16:54:41,624 : Eval | ep_lengths 225.50 +/- 73.50 | ep_return 135.725 +/- 53.840 -2023-10-19 16:54:41,625 : +2023-10-27 18:22:37,074 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 139.270 +/- 54.648 +2023-10-27 18:22:37,075 : -------------------------------------- | loss/ | | -| approx_kl | 0.0255 | -| entropy_loss | -5.72 | -| policy_loss | -0.00963 | -| value_loss | 0.553 | +| approx_kl | 0.0208 | +| entropy_loss | -5.9 | +| policy_loss | -0.0141 | +| value_loss | 1.95 | | stat/ | | -| constraint_violation | 450 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 158 | -| ep_reward | 0.632 | +| constraint_violation | 407 | +| ep_constraint_vio... | 0.4 | +| ep_length | 151 | +| ep_return | 94.5 | +| ep_reward | 0.382 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 136 | -| ep_reward | 0.544 | -| mse | 267 | +| ep_length | 225 | +| ep_return | 139 | +| ep_reward | 0.558 | +| mse | 264 | | time/ | | | progress | 0.6 | | step | 6e+05 | -| step_time | 9.27 | +| step_time | 8.51 | -------------------------------------- -2023-10-19 16:56:34,392 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 159.036 +/- 32.703 -2023-10-19 16:56:34,394 : +2023-10-27 18:24:21,907 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 158.270 +/- 22.737 +2023-10-27 18:24:21,922 : -------------------------------------- | loss/ | | -| approx_kl | 0.0295 | -| entropy_loss | -5.69 | -| policy_loss | -0.0148 | -| value_loss | 2.18 | +| approx_kl | 0.0363 | +| entropy_loss | -5.92 | +| policy_loss | -0.0193 | +| value_loss | 1.27 | | stat/ | | -| constraint_violation | 457 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 140 | -| ep_reward | 0.59 | +| constraint_violation | 410 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 161 | +| ep_reward | 0.644 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 159 | -| ep_reward | 0.636 | -| mse | 255 | +| ep_return | 158 | +| ep_reward | 0.633 | +| mse | 250 | | time/ | | | progress | 0.61 | | step | 6.1e+05 | -| step_time | 9.01 | +| step_time | 8.53 | -------------------------------------- -2023-10-19 16:58:24,739 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 138.030 +/- 52.446 -2023-10-19 16:58:24,740 : +2023-10-27 18:26:05,323 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 141.382 +/- 50.457 +2023-10-27 18:26:05,324 : -------------------------------------- | loss/ | | -| approx_kl | 0.0269 | -| entropy_loss | -5.73 | -| policy_loss | -0.00369 | -| value_loss | 0.539 | +| approx_kl | 0.0307 | +| entropy_loss | -5.99 | +| policy_loss | 0.000493 | +| value_loss | 0.668 | | stat/ | | -| constraint_violation | 464 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 152 | -| ep_reward | 0.609 | +| constraint_violation | 418 | +| ep_constraint_vio... | 0.2 | +| ep_length | 200 | +| ep_return | 136 | +| ep_reward | 0.568 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 138 | -| ep_reward | 0.553 | -| mse | 227 | +| ep_return | 141 | +| ep_reward | 0.567 | +| mse | 220 | | time/ | | | progress | 0.62 | | step | 6.2e+05 | -| step_time | 9.07 | +| step_time | 8.59 | -------------------------------------- -2023-10-19 17:00:17,448 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 161.620 +/- 23.082 -2023-10-19 17:00:17,449 : +2023-10-27 18:27:51,024 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 160.911 +/- 17.372 +2023-10-27 18:27:51,032 : -------------------------------------- | loss/ | | -| approx_kl | 0.0207 | -| entropy_loss | -5.81 | -| policy_loss | -0.0185 | -| value_loss | 1.68 | +| approx_kl | 0.0282 | +| entropy_loss | -6.07 | +| policy_loss | -0.00762 | +| value_loss | 0.559 | | stat/ | | -| constraint_violation | 474 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 134 | -| ep_reward | 0.54 | +| constraint_violation | 425 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 156 | +| ep_reward | 0.625 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 162 | -| ep_reward | 0.646 | +| ep_return | 161 | +| ep_reward | 0.644 | | mse | 229 | | time/ | | | progress | 0.63 | | step | 6.3e+05 | -| step_time | 9.18 | +| step_time | 8.44 | -------------------------------------- -2023-10-19 17:02:10,063 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 158.876 +/- 27.499 -2023-10-19 17:02:10,064 : +2023-10-27 18:29:35,269 : Eval | ep_lengths 227.60 +/- 67.20 | ep_return 145.738 +/- 51.148 +2023-10-27 18:29:35,270 : -------------------------------------- | loss/ | | -| approx_kl | 0.0255 | -| entropy_loss | -5.89 | -| policy_loss | -0.00391 | -| value_loss | 2.37 | +| approx_kl | 0.0138 | +| entropy_loss | -6.14 | +| policy_loss | -0.0226 | +| value_loss | 2.31 | | stat/ | | -| constraint_violation | 480 | +| constraint_violation | 435 | | ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 120 | -| ep_reward | 0.482 | +| ep_length | 200 | +| ep_return | 131 | +| ep_reward | 0.528 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 159 | -| ep_reward | 0.636 | -| mse | 240 | +| constraint_violation | 0.1 | +| ep_length | 228 | +| ep_return | 146 | +| ep_reward | 0.584 | +| mse | 186 | | time/ | | | progress | 0.64 | | step | 6.4e+05 | -| step_time | 9.13 | +| step_time | 8.58 | -------------------------------------- -2023-10-19 17:04:02,498 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 160.652 +/- 36.611 -2023-10-19 17:04:02,499 : +2023-10-27 18:31:20,229 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.129 +/- 26.962 +2023-10-27 18:31:20,230 : -------------------------------------- | loss/ | | -| approx_kl | 0.0257 | -| entropy_loss | -5.92 | -| policy_loss | -0.0095 | -| value_loss | 0.298 | +| approx_kl | 0.0236 | +| entropy_loss | -6.13 | +| policy_loss | -0.0124 | +| value_loss | 0.778 | | stat/ | | -| constraint_violation | 485 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 124 | -| ep_reward | 0.499 | +| constraint_violation | 439 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 131 | +| ep_reward | 0.528 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 161 | -| ep_reward | 0.643 | -| mse | 306 | +| ep_return | 152 | +| ep_reward | 0.609 | +| mse | 312 | | time/ | | | progress | 0.65 | | step | 6.5e+05 | -| step_time | 9.12 | +| step_time | 8.51 | -------------------------------------- -2023-10-19 17:05:51,018 : Eval | ep_lengths 200.50 +/- 99.00 | ep_return 129.449 +/- 66.843 -2023-10-19 17:05:51,019 : +2023-10-27 18:33:01,709 : Eval | ep_lengths 200.60 +/- 98.80 | ep_return 125.953 +/- 65.085 +2023-10-27 18:33:01,710 : -------------------------------------- | loss/ | | -| approx_kl | 0.0199 | -| entropy_loss | -6 | -| policy_loss | -0.0227 | -| value_loss | 1.83 | +| approx_kl | 0.0193 | +| entropy_loss | -6.19 | +| policy_loss | -0.021 | +| value_loss | 0.685 | | stat/ | | -| constraint_violation | 496 | -| ep_constraint_vio... | 0.4 | -| ep_length | 153 | -| ep_return | 102 | -| ep_reward | 0.411 | +| constraint_violation | 445 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 124 | +| ep_reward | 0.498 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 129 | -| ep_reward | 0.532 | -| mse | 185 | +| ep_length | 201 | +| ep_return | 126 | +| ep_reward | 0.518 | +| mse | 189 | | time/ | | | progress | 0.66 | | step | 6.6e+05 | -| step_time | 9.25 | +| step_time | 8.57 | -------------------------------------- -2023-10-19 17:07:40,985 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 147.614 +/- 55.153 -2023-10-19 17:07:40,987 : +2023-10-27 18:34:43,110 : Eval | ep_lengths 200.40 +/- 99.20 | ep_return 123.367 +/- 63.011 +2023-10-27 18:34:43,111 : -------------------------------------- | loss/ | | -| approx_kl | 0.0252 | -| entropy_loss | -6 | -| policy_loss | -0.00872 | -| value_loss | 0.849 | +| approx_kl | 0.0294 | +| entropy_loss | -6.24 | +| policy_loss | -0.01 | +| value_loss | 2.88 | | stat/ | | -| constraint_violation | 501 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 172 | -| ep_reward | 0.689 | +| constraint_violation | 456 | +| ep_constraint_vio... | 0.4 | +| ep_length | 153 | +| ep_return | 99.4 | +| ep_reward | 0.399 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 148 | -| ep_reward | 0.622 | -| mse | 231 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 123 | +| ep_reward | 0.536 | +| mse | 228 | | time/ | | | progress | 0.67 | | step | 6.7e+05 | -| step_time | 9.01 | +| step_time | 8.59 | -------------------------------------- -2023-10-19 17:09:31,897 : Eval | ep_lengths 227.30 +/- 68.10 | ep_return 149.139 +/- 57.757 -2023-10-19 17:09:31,898 : +2023-10-27 18:36:26,241 : Eval | ep_lengths 227.20 +/- 68.40 | ep_return 142.385 +/- 52.983 +2023-10-27 18:36:26,242 : -------------------------------------- | loss/ | | -| approx_kl | 0.0185 | -| entropy_loss | -5.97 | -| policy_loss | -0.0077 | -| value_loss | 3.21 | +| approx_kl | 0.0261 | +| entropy_loss | -6.26 | +| policy_loss | -0.0103 | +| value_loss | 0.894 | | stat/ | | -| constraint_violation | 511 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 157 | -| ep_reward | 0.651 | +| constraint_violation | 461 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 161 | +| ep_reward | 0.644 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 227 | -| ep_return | 149 | -| ep_reward | 0.598 | -| mse | 236 | +| ep_return | 142 | +| ep_reward | 0.571 | +| mse | 242 | | time/ | | | progress | 0.68 | | step | 6.8e+05 | -| step_time | 9.21 | +| step_time | 8.49 | -------------------------------------- -2023-10-19 17:11:22,348 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 165.787 +/- 60.948 -2023-10-19 17:11:22,357 : +2023-10-27 18:38:09,951 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 161.785 +/- 60.229 +2023-10-27 18:38:09,961 : -------------------------------------- | loss/ | | -| approx_kl | 0.0371 | -| entropy_loss | -5.93 | -| policy_loss | -0.0078 | -| value_loss | 5.09 | +| approx_kl | 0.0249 | +| entropy_loss | -6.26 | +| policy_loss | -0.0142 | +| value_loss | 0.79 | | stat/ | | -| constraint_violation | 518 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 141 | -| ep_reward | 0.565 | +| constraint_violation | 469 | +| ep_constraint_vio... | 0.1 | +| ep_length | 226 | +| ep_return | 151 | +| ep_reward | 0.605 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 166 | -| ep_reward | 0.668 | -| mse | 123 | +| ep_return | 162 | +| ep_reward | 0.653 | +| mse | 127 | | time/ | | | progress | 0.69 | | step | 6.9e+05 | -| step_time | 9.01 | +| step_time | 8.52 | -------------------------------------- -2023-10-19 17:13:14,684 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 161.258 +/- 24.628 -2023-10-19 17:13:14,685 : +2023-10-27 18:39:55,739 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 159.329 +/- 21.402 +2023-10-27 18:39:55,740 : -------------------------------------- | loss/ | | -| approx_kl | 0.0302 | -| entropy_loss | -5.95 | -| policy_loss | -0.0116 | -| value_loss | 3.43 | +| approx_kl | 0.0376 | +| entropy_loss | -6.34 | +| policy_loss | -0.00196 | +| value_loss | 4.31 | | stat/ | | -| constraint_violation | 523 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 152 | -| ep_reward | 0.609 | +| constraint_violation | 477 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 131 | +| ep_reward | 0.528 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 161 | -| ep_reward | 0.645 | -| mse | 228 | +| ep_return | 159 | +| ep_reward | 0.637 | +| mse | 225 | | time/ | | | progress | 0.7 | | step | 7e+05 | -| step_time | 9.11 | +| step_time | 8.53 | -------------------------------------- -2023-10-19 17:15:03,831 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 148.579 +/- 58.287 -2023-10-19 17:15:03,832 : +2023-10-27 18:41:39,010 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 144.167 +/- 58.169 +2023-10-27 18:41:39,012 : -------------------------------------- | loss/ | | -| approx_kl | 0.0311 | -| entropy_loss | -5.97 | -| policy_loss | -0.0108 | -| value_loss | 1.5 | +| approx_kl | 0.0332 | +| entropy_loss | -6.42 | +| policy_loss | -0.0116 | +| value_loss | 2.69 | | stat/ | | -| constraint_violation | 531 | +| constraint_violation | 482 | | ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 161 | -| ep_reward | 0.646 | +| ep_length | 225 | +| ep_return | 155 | +| ep_reward | 0.619 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 149 | -| ep_reward | 0.608 | -| mse | 232 | +| ep_return | 144 | +| ep_reward | 0.591 | +| mse | 238 | | time/ | | | progress | 0.71 | | step | 7.1e+05 | -| step_time | 9 | +| step_time | 8.55 | -------------------------------------- -2023-10-19 17:16:50,621 : Eval | ep_lengths 200.80 +/- 98.41 | ep_return 139.753 +/- 73.996 -2023-10-19 17:16:50,623 : +2023-10-27 18:43:19,882 : Eval | ep_lengths 200.80 +/- 98.41 | ep_return 139.046 +/- 73.978 +2023-10-27 18:43:19,883 : -------------------------------------- | loss/ | | -| approx_kl | 0.0144 | -| entropy_loss | -6.05 | -| policy_loss | -0.00694 | -| value_loss | 0.241 | +| approx_kl | 0.0224 | +| entropy_loss | -6.45 | +| policy_loss | -0.0114 | +| value_loss | 1.42 | | stat/ | | -| constraint_violation | 534 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 169 | -| ep_reward | 0.677 | +| constraint_violation | 489 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 159 | +| ep_reward | 0.639 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 140 | -| ep_reward | 0.575 | -| mse | 168 | +| ep_return | 139 | +| ep_reward | 0.572 | +| mse | 169 | | time/ | | | progress | 0.72 | | step | 7.2e+05 | -| step_time | 9.06 | +| step_time | 8.34 | -------------------------------------- -2023-10-19 17:18:39,754 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 161.642 +/- 60.383 -2023-10-19 17:18:39,755 : +2023-10-27 18:45:02,970 : Eval | ep_lengths 226.70 +/- 69.90 | ep_return 156.169 +/- 57.743 +2023-10-27 18:45:02,972 : -------------------------------------- | loss/ | | -| approx_kl | 0.0312 | -| entropy_loss | -6.04 | -| policy_loss | -0.016 | -| value_loss | 0.275 | +| approx_kl | 0.0206 | +| entropy_loss | -6.51 | +| policy_loss | -0.0228 | +| value_loss | 0.907 | | stat/ | | -| constraint_violation | 539 | +| constraint_violation | 493 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 190 | -| ep_reward | 0.759 | +| ep_return | 163 | +| ep_reward | 0.653 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 227 | -| ep_return | 162 | -| ep_reward | 0.647 | -| mse | 157 | +| ep_return | 156 | +| ep_reward | 0.625 | +| mse | 164 | | time/ | | | progress | 0.73 | | step | 7.3e+05 | -| step_time | 9.05 | +| step_time | 8.52 | -------------------------------------- -2023-10-19 17:20:29,047 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.758 +/- 23.874 -2023-10-19 17:20:29,048 : +2023-10-27 18:46:47,847 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 147.632 +/- 21.788 +2023-10-27 18:46:47,848 : -------------------------------------- | loss/ | | -| approx_kl | 0.0247 | -| entropy_loss | -6.12 | -| policy_loss | -0.0147 | -| value_loss | 0.766 | +| approx_kl | 0.0251 | +| entropy_loss | -6.56 | +| policy_loss | -0.0123 | +| value_loss | 0.886 | | stat/ | | -| constraint_violation | 543 | +| constraint_violation | 498 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 164 | -| ep_reward | 0.654 | +| ep_return | 179 | +| ep_reward | 0.717 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.611 | -| mse | 259 | +| ep_return | 148 | +| ep_reward | 0.591 | +| mse | 261 | | time/ | | | progress | 0.74 | | step | 7.4e+05 | -| step_time | 9 | +| step_time | 8.45 | -------------------------------------- -2023-10-19 17:22:16,969 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 142.266 +/- 52.518 -2023-10-19 17:22:16,970 : +2023-10-27 18:48:30,959 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 144.230 +/- 52.251 +2023-10-27 18:48:30,960 : -------------------------------------- | loss/ | | -| approx_kl | 0.0334 | -| entropy_loss | -6.2 | -| policy_loss | -0.00219 | -| value_loss | 1.48 | +| approx_kl | 0.0202 | +| entropy_loss | -6.69 | +| policy_loss | -0.0109 | +| value_loss | 0.601 | | stat/ | | -| constraint_violation | 551 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 163 | -| ep_reward | 0.651 | +| constraint_violation | 502 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 164 | +| ep_reward | 0.657 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 142 | -| ep_reward | 0.575 | -| mse | 230 | +| ep_return | 144 | +| ep_reward | 0.583 | +| mse | 225 | | time/ | | | progress | 0.75 | | step | 7.5e+05 | -| step_time | 8.79 | +| step_time | 8.42 | -------------------------------------- -2023-10-19 17:24:11,686 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 145.759 +/- 57.243 -2023-10-19 17:24:11,687 : +2023-10-27 18:50:14,716 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 142.616 +/- 54.345 +2023-10-27 18:50:14,717 : -------------------------------------- | loss/ | | -| approx_kl | 0.0305 | -| entropy_loss | -6.22 | -| policy_loss | -0.0127 | -| value_loss | 3.26 | +| approx_kl | 0.0295 | +| entropy_loss | -6.71 | +| policy_loss | -0.0191 | +| value_loss | 1.7 | | stat/ | | -| constraint_violation | 559 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 143 | -| ep_reward | 0.602 | +| constraint_violation | 511 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 156 | +| ep_reward | 0.627 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 146 | -| ep_reward | 0.583 | -| mse | 239 | +| ep_return | 143 | +| ep_reward | 0.57 | +| mse | 240 | | time/ | | | progress | 0.76 | | step | 7.6e+05 | -| step_time | 9.41 | +| step_time | 8.44 | -------------------------------------- -2023-10-19 17:26:13,735 : Eval | ep_lengths 201.30 +/- 97.41 | ep_return 135.999 +/- 68.805 -2023-10-19 17:26:13,736 : +2023-10-27 18:51:56,559 : Eval | ep_lengths 201.30 +/- 97.41 | ep_return 132.676 +/- 66.956 +2023-10-27 18:51:56,560 : -------------------------------------- | loss/ | | -| approx_kl | 0.019 | -| entropy_loss | -6.24 | -| policy_loss | -0.00972 | -| value_loss | 0.699 | +| approx_kl | 0.0176 | +| entropy_loss | -6.74 | +| policy_loss | -0.0129 | +| value_loss | 2.66 | | stat/ | | -| constraint_violation | 561 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 157 | -| ep_reward | 0.629 | +| constraint_violation | 519 | +| ep_constraint_vio... | 0.2 | +| ep_length | 200 | +| ep_return | 137 | +| ep_reward | 0.575 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 136 | -| ep_reward | 0.548 | -| mse | 134 | +| ep_return | 133 | +| ep_reward | 0.535 | +| mse | 140 | | time/ | | | progress | 0.77 | | step | 7.7e+05 | -| step_time | 10.6 | +| step_time | 8.55 | -------------------------------------- -2023-10-19 17:28:16,515 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 138.944 +/- 73.971 -2023-10-19 17:28:16,516 : +2023-10-27 18:53:38,033 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 133.924 +/- 70.364 +2023-10-27 18:53:38,035 : -------------------------------------- | loss/ | | -| approx_kl | 0.0128 | -| entropy_loss | -6.33 | -| policy_loss | -0.0198 | -| value_loss | 2.38 | +| approx_kl | 0.00307 | +| entropy_loss | -6.77 | +| policy_loss | -0.0263 | +| value_loss | 1.12 | | stat/ | | -| constraint_violation | 574 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 140 | -| ep_reward | 0.625 | +| constraint_violation | 521 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 159 | +| ep_reward | 0.638 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 200 | -| ep_return | 139 | -| ep_reward | 0.621 | -| mse | 190 | +| ep_return | 134 | +| ep_reward | 0.601 | +| mse | 194 | | time/ | | | progress | 0.78 | | step | 7.8e+05 | -| step_time | 10.6 | +| step_time | 8.63 | -------------------------------------- -2023-10-19 17:30:21,114 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 146.150 +/- 51.142 -2023-10-19 17:30:21,115 : +2023-10-27 18:55:22,407 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 144.040 +/- 50.879 +2023-10-27 18:55:22,408 : -------------------------------------- | loss/ | | -| approx_kl | 0.0142 | -| entropy_loss | -6.42 | -| policy_loss | -0.00876 | -| value_loss | 3.12 | +| approx_kl | 0.0221 | +| entropy_loss | -6.82 | +| policy_loss | -0.0127 | +| value_loss | 2.37 | | stat/ | | -| constraint_violation | 581 | +| constraint_violation | 534 | | ep_constraint_vio... | 0.2 | | ep_length | 200 | -| ep_return | 136 | -| ep_reward | 0.587 | +| ep_return | 134 | +| ep_reward | 0.6 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 146 | -| ep_reward | 0.587 | -| mse | 197 | +| ep_return | 144 | +| ep_reward | 0.579 | +| mse | 203 | | time/ | | | progress | 0.79 | | step | 7.9e+05 | -| step_time | 10.7 | +| step_time | 8.61 | -------------------------------------- -2023-10-19 17:32:27,332 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 156.288 +/- 19.560 -2023-10-19 17:32:27,333 : +2023-10-27 18:57:08,806 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.005 +/- 18.577 +2023-10-27 18:57:08,807 : -------------------------------------- | loss/ | | -| approx_kl | 0.0308 | -| entropy_loss | -6.44 | -| policy_loss | -0.00912 | -| value_loss | 2.05 | +| approx_kl | 0.0286 | +| entropy_loss | -6.8 | +| policy_loss | -0.00489 | +| value_loss | 7.27 | | stat/ | | -| constraint_violation | 585 | +| constraint_violation | 542 | | ep_constraint_vio... | 0.3 | | ep_length | 176 | -| ep_return | 115 | -| ep_reward | 0.467 | +| ep_return | 119 | +| ep_reward | 0.515 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 156 | -| ep_reward | 0.625 | -| mse | 292 | +| ep_return | 153 | +| ep_reward | 0.612 | +| mse | 297 | | time/ | | | progress | 0.8 | | step | 8e+05 | -| step_time | 10.3 | +| step_time | 8.64 | -------------------------------------- -2023-10-19 17:34:29,611 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 151.537 +/- 54.060 -2023-10-19 17:34:29,612 : +2023-10-27 18:58:53,263 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 149.686 +/- 52.829 +2023-10-27 18:58:53,264 : -------------------------------------- | loss/ | | -| approx_kl | 0.0253 | -| entropy_loss | -6.46 | -| policy_loss | -0.02 | -| value_loss | 0.493 | +| approx_kl | 0.0231 | +| entropy_loss | -6.87 | +| policy_loss | -0.0166 | +| value_loss | 2.21 | | stat/ | | -| constraint_violation | 587 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 163 | -| ep_reward | 0.653 | +| constraint_violation | 546 | +| ep_constraint_vio... | 0.3 | +| ep_length | 176 | +| ep_return | 111 | +| ep_reward | 0.452 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 152 | -| ep_reward | 0.62 | -| mse | 183 | +| ep_length | 226 | +| ep_return | 150 | +| ep_reward | 0.615 | +| mse | 186 | | time/ | | | progress | 0.81 | | step | 8.1e+05 | -| step_time | 9.85 | +| step_time | 8.78 | -------------------------------------- -2023-10-19 17:36:28,508 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 163.362 +/- 58.583 -2023-10-19 17:36:28,509 : +2023-10-27 19:00:38,099 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 163.633 +/- 57.990 +2023-10-27 19:00:38,110 : -------------------------------------- | loss/ | | -| approx_kl | 0.0235 | -| entropy_loss | -6.44 | -| policy_loss | -0.00829 | -| value_loss | 6.99 | +| approx_kl | 0.032 | +| entropy_loss | -6.92 | +| policy_loss | -0.0225 | +| value_loss | 0.767 | | stat/ | | -| constraint_violation | 596 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 143 | -| ep_reward | 0.575 | +| constraint_violation | 548 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 156 | +| ep_reward | 0.622 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 163 | -| ep_reward | 0.682 | -| mse | 132 | +| ep_return | 164 | +| ep_reward | 0.684 | +| mse | 130 | | time/ | | | progress | 0.82 | | step | 8.2e+05 | -| step_time | 9.32 | +| step_time | 8.57 | -------------------------------------- -2023-10-19 17:38:31,036 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 171.688 +/- 28.778 -2023-10-19 17:38:31,047 : +2023-10-27 19:02:25,306 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 173.451 +/- 25.792 +2023-10-27 19:02:25,335 : -------------------------------------- | loss/ | | -| approx_kl | 0.0143 | -| entropy_loss | -6.53 | -| policy_loss | -0.0136 | -| value_loss | 0.713 | +| approx_kl | 0.0175 | +| entropy_loss | -6.89 | +| policy_loss | -0.0144 | +| value_loss | 4.5 | | stat/ | | -| constraint_violation | 602 | -| ep_constraint_vio... | 0.1 | -| ep_length | 227 | -| ep_return | 153 | -| ep_reward | 0.616 | +| constraint_violation | 557 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 142 | +| ep_reward | 0.571 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 172 | -| ep_reward | 0.687 | -| mse | 217 | +| ep_return | 173 | +| ep_reward | 0.694 | +| mse | 215 | | time/ | | | progress | 0.83 | | step | 8.3e+05 | -| step_time | 10.1 | +| step_time | 8.64 | -------------------------------------- -2023-10-19 17:40:34,548 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 167.218 +/- 29.363 -2023-10-19 17:40:34,549 : +2023-10-27 19:04:12,863 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 173.241 +/- 29.495 +2023-10-27 19:04:12,864 : -------------------------------------- | loss/ | | -| approx_kl | 0.0184 | -| entropy_loss | -6.64 | -| policy_loss | -0.00949 | -| value_loss | 0.772 | +| approx_kl | 0.0249 | +| entropy_loss | -6.94 | +| policy_loss | -0.0204 | +| value_loss | 0.361 | | stat/ | | -| constraint_violation | 608 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 145 | -| ep_reward | 0.581 | +| constraint_violation | 562 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 169 | +| ep_reward | 0.676 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 167 | -| ep_reward | 0.669 | -| mse | 208 | +| ep_return | 173 | +| ep_reward | 0.693 | +| mse | 200 | | time/ | | | progress | 0.84 | | step | 8.4e+05 | -| step_time | 9.81 | +| step_time | 8.7 | -------------------------------------- -2023-10-19 17:42:25,212 : Eval | ep_lengths 126.70 +/- 123.31 | ep_return 83.795 +/- 85.255 -2023-10-19 17:42:25,213 : +2023-10-27 19:05:52,736 : Eval | ep_lengths 151.60 +/- 120.52 | ep_return 102.450 +/- 85.273 +2023-10-27 19:05:52,737 : -------------------------------------- | loss/ | | -| approx_kl | 0.0258 | -| entropy_loss | -6.75 | -| policy_loss | -0.00211 | -| value_loss | 1.75 | +| approx_kl | 0.0158 | +| entropy_loss | -7 | +| policy_loss | -0.0157 | +| value_loss | 1.61 | | stat/ | | -| constraint_violation | 617 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 145 | -| ep_reward | 0.581 | +| constraint_violation | 568 | +| ep_constraint_vio... | 0.2 | +| ep_length | 202 | +| ep_return | 142 | +| ep_reward | 0.568 | | stat_eval/ | | -| constraint_violation | 0.5 | -| ep_length | 127 | -| ep_return | 83.8 | -| ep_reward | 0.4 | -| mse | 94.6 | +| constraint_violation | 0.4 | +| ep_length | 152 | +| ep_return | 102 | +| ep_reward | 0.418 | +| mse | 105 | | time/ | | | progress | 0.85 | | step | 8.5e+05 | -| step_time | 9.24 | +| step_time | 8.72 | -------------------------------------- -2023-10-19 17:44:27,761 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 148.758 +/- 38.524 -2023-10-19 17:44:27,762 : +2023-10-27 19:07:40,298 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.569 +/- 31.693 +2023-10-27 19:07:40,299 : +--------------------------------------- +| loss/ | | +| approx_kl | 0.0332 | +| entropy_loss | -6.99 | +| policy_loss | -0.000103 | +| value_loss | 1.51 | +| stat/ | | +| constraint_violation | 577 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 148 | +| ep_reward | 0.594 | +| stat_eval/ | | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 156 | +| ep_reward | 0.622 | +| mse | 333 | +| time/ | | +| progress | 0.86 | +| step | 8.6e+05 | +| step_time | 8.59 | +--------------------------------------- + +2023-10-27 19:09:21,399 : Eval | ep_lengths 177.00 +/- 111.53 | ep_return 116.231 +/- 79.926 +2023-10-27 19:09:21,400 : -------------------------------------- | loss/ | | -| approx_kl | 0.0184 | -| entropy_loss | -6.72 | -| policy_loss | -0.0215 | -| value_loss | 0.957 | +| approx_kl | 0.0255 | +| entropy_loss | -7.09 | +| policy_loss | -0.0184 | +| value_loss | 0.759 | | stat/ | | -| constraint_violation | 622 | +| constraint_violation | 582 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 158 | -| ep_reward | 0.632 | -| stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 149 | -| ep_reward | 0.595 | -| mse | 350 | -| time/ | | -| progress | 0.86 | -| step | 8.6e+05 | -| step_time | 9.77 | --------------------------------------- - -2023-10-19 17:46:24,047 : Eval | ep_lengths 176.90 +/- 111.69 | ep_return 118.737 +/- 82.718 -2023-10-19 17:46:24,049 : --------------------------------------- -| loss/ | | -| approx_kl | 0.0117 | -| entropy_loss | -6.79 | -| policy_loss | -0.0158 | -| value_loss | 0.542 | -| stat/ | | -| constraint_violation | 627 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 153 | -| ep_reward | 0.612 | +| ep_return | 163 | +| ep_reward | 0.65 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 177 | -| ep_return | 119 | -| ep_reward | 0.477 | -| mse | 182 | +| ep_return | 116 | +| ep_reward | 0.467 | +| mse | 184 | | time/ | | | progress | 0.87 | | step | 8.7e+05 | -| step_time | 10.1 | +| step_time | 8.67 | -------------------------------------- -2023-10-19 17:48:22,170 : Eval | ep_lengths 201.40 +/- 97.22 | ep_return 130.577 +/- 70.489 -2023-10-19 17:48:22,171 : +2023-10-27 19:11:04,322 : Eval | ep_lengths 201.60 +/- 96.82 | ep_return 135.307 +/- 71.815 +2023-10-27 19:11:04,323 : -------------------------------------- | loss/ | | -| approx_kl | 0.0163 | -| entropy_loss | -6.8 | -| policy_loss | -0.0174 | -| value_loss | 0.989 | +| approx_kl | 0.0288 | +| entropy_loss | -7.1 | +| policy_loss | -0.0164 | +| value_loss | 0.769 | | stat/ | | -| constraint_violation | 633 | +| constraint_violation | 588 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 149 | -| ep_reward | 0.6 | +| ep_return | 158 | +| ep_reward | 0.632 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 131 | -| ep_reward | 0.524 | -| mse | 198 | +| ep_length | 202 | +| ep_return | 135 | +| ep_reward | 0.543 | +| mse | 196 | | time/ | | | progress | 0.88 | | step | 8.8e+05 | -| step_time | 10.2 | +| step_time | 8.59 | -------------------------------------- -2023-10-19 17:50:20,027 : Eval | ep_lengths 201.30 +/- 97.42 | ep_return 138.560 +/- 74.240 -2023-10-19 17:50:20,029 : +2023-10-27 19:12:47,595 : Eval | ep_lengths 201.30 +/- 97.42 | ep_return 142.728 +/- 75.128 +2023-10-27 19:12:47,596 : -------------------------------------- | loss/ | | -| approx_kl | 0.0247 | -| entropy_loss | -6.76 | -| policy_loss | -0.0167 | -| value_loss | 1.04 | +| approx_kl | 0.025 | +| entropy_loss | -7.05 | +| policy_loss | -0.0161 | +| value_loss | 0.708 | | stat/ | | -| constraint_violation | 636 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 176 | -| ep_reward | 0.703 | +| constraint_violation | 594 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 153 | +| ep_reward | 0.619 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 139 | -| ep_reward | 0.555 | -| mse | 146 | +| ep_return | 143 | +| ep_reward | 0.571 | +| mse | 137 | | time/ | | | progress | 0.89 | | step | 8.9e+05 | -| step_time | 10.2 | +| step_time | 8.74 | -------------------------------------- -2023-10-19 17:52:17,264 : Eval | ep_lengths 201.00 +/- 98.02 | ep_return 120.498 +/- 64.627 -2023-10-19 17:52:17,265 : +2023-10-27 19:14:29,258 : Eval | ep_lengths 201.00 +/- 98.02 | ep_return 125.540 +/- 66.691 +2023-10-27 19:14:29,259 : -------------------------------------- | loss/ | | -| approx_kl | 0.0276 | -| entropy_loss | -6.77 | -| policy_loss | -0.0102 | -| value_loss | 6.15 | +| approx_kl | 0.0278 | +| entropy_loss | -7.02 | +| policy_loss | -0.0128 | +| value_loss | 0.509 | | stat/ | | -| constraint_violation | 644 | -| ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 128 | -| ep_reward | 0.513 | +| constraint_violation | 598 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 163 | +| ep_reward | 0.656 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 120 | -| ep_reward | 0.488 | -| mse | 229 | +| ep_return | 126 | +| ep_reward | 0.508 | +| mse | 218 | | time/ | | | progress | 0.9 | | step | 9e+05 | -| step_time | 10.3 | +| step_time | 8.62 | -------------------------------------- -2023-10-19 17:54:15,699 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 141.447 +/- 53.170 -2023-10-19 17:54:15,701 : +2023-10-27 19:16:12,208 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 152.217 +/- 56.024 +2023-10-27 19:16:12,209 : -------------------------------------- | loss/ | | -| approx_kl | 0.0256 | -| entropy_loss | -6.88 | -| policy_loss | -0.0148 | -| value_loss | 2.59 | +| approx_kl | 0.0184 | +| entropy_loss | -6.98 | +| policy_loss | -0.0215 | +| value_loss | 2.73 | | stat/ | | -| constraint_violation | 652 | -| ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 122 | -| ep_reward | 0.504 | +| constraint_violation | 602 | +| ep_constraint_vio... | 0.1 | +| ep_length | 227 | +| ep_return | 135 | +| ep_reward | 0.539 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 227 | -| ep_return | 141 | -| ep_reward | 0.567 | -| mse | 234 | +| ep_return | 152 | +| ep_reward | 0.61 | +| mse | 219 | | time/ | | | progress | 0.91 | | step | 9.1e+05 | -| step_time | 10 | +| step_time | 8.57 | -------------------------------------- -2023-10-19 17:56:09,024 : Eval | ep_lengths 175.60 +/- 113.65 | ep_return 135.371 +/- 89.714 -2023-10-19 17:56:09,025 : +2023-10-27 19:17:52,229 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 153.092 +/- 80.047 +2023-10-27 19:17:52,230 : -------------------------------------- | loss/ | | -| approx_kl | 0.0206 | -| entropy_loss | -6.98 | -| policy_loss | -0.0194 | -| value_loss | 6.55 | +| approx_kl | 0.0215 | +| entropy_loss | -7.04 | +| policy_loss | -0.0169 | +| value_loss | 3.45 | | stat/ | | -| constraint_violation | 661 | -| ep_constraint_vio... | 0.3 | -| ep_length | 178 | -| ep_return | 115 | -| ep_reward | 0.476 | +| constraint_violation | 609 | +| ep_constraint_vio... | 0.2 | +| ep_length | 202 | +| ep_return | 137 | +| ep_reward | 0.562 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 176 | -| ep_return | 135 | -| ep_reward | 0.565 | -| mse | 55.8 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 153 | +| ep_reward | 0.619 | +| mse | 93.3 | | time/ | | | progress | 0.92 | | step | 9.2e+05 | -| step_time | 10.2 | +| step_time | 8.53 | -------------------------------------- -2023-10-19 17:58:07,924 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 168.079 +/- 25.070 -2023-10-19 17:58:07,926 : +2023-10-27 19:19:36,602 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 172.063 +/- 22.349 +2023-10-27 19:19:36,603 : -------------------------------------- | loss/ | | -| approx_kl | 0.0255 | -| entropy_loss | -7.02 | -| policy_loss | -0.0102 | -| value_loss | 0.771 | +| approx_kl | 0.0177 | +| entropy_loss | -7.08 | +| policy_loss | -0.0238 | +| value_loss | 1.5 | | stat/ | | -| constraint_violation | 666 | -| ep_constraint_vio... | 0.3 | -| ep_length | 176 | -| ep_return | 128 | -| ep_reward | 0.522 | +| constraint_violation | 616 | +| ep_constraint_vio... | 0.1 | +| ep_length | 226 | +| ep_return | 151 | +| ep_reward | 0.605 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 168 | -| ep_reward | 0.672 | -| mse | 181 | +| ep_return | 172 | +| ep_reward | 0.688 | +| mse | 166 | | time/ | | | progress | 0.93 | | step | 9.3e+05 | -| step_time | 9.42 | +| step_time | 8.57 | -------------------------------------- -2023-10-19 18:00:10,802 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.987 +/- 32.023 -2023-10-19 18:00:10,804 : +2023-10-27 19:21:21,676 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 159.246 +/- 30.693 +2023-10-27 19:21:21,677 : -------------------------------------- | loss/ | | -| approx_kl | 0.0258 | -| entropy_loss | -7.03 | -| policy_loss | -0.0144 | -| value_loss | 2.42 | +| approx_kl | 0.0243 | +| entropy_loss | -7.11 | +| policy_loss | -0.00904 | +| value_loss | 1.52 | | stat/ | | -| constraint_violation | 678 | -| ep_constraint_vio... | 0.4 | -| ep_length | 151 | -| ep_return | 93.2 | -| ep_reward | 0.457 | +| constraint_violation | 624 | +| ep_constraint_vio... | 0.3 | +| ep_length | 176 | +| ep_return | 122 | +| ep_reward | 0.495 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.612 | -| mse | 258 | +| ep_return | 159 | +| ep_reward | 0.637 | +| mse | 249 | | time/ | | | progress | 0.94 | | step | 9.4e+05 | -| step_time | 10.1 | +| step_time | 8.53 | -------------------------------------- -2023-10-19 18:02:07,716 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 150.724 +/- 33.959 -2023-10-19 18:02:07,717 : +2023-10-27 19:23:06,815 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 156.973 +/- 30.933 +2023-10-27 19:23:06,816 : -------------------------------------- | loss/ | | -| approx_kl | 0.0322 | -| entropy_loss | -7.03 | -| policy_loss | -0.0185 | -| value_loss | 0.757 | +| approx_kl | 0.0292 | +| entropy_loss | -7.21 | +| policy_loss | -0.00502 | +| value_loss | 11 | | stat/ | | -| constraint_violation | 687 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 124 | -| ep_reward | 0.499 | +| constraint_violation | 633 | +| ep_constraint_vio... | 0.4 | +| ep_length | 152 | +| ep_return | 101 | +| ep_reward | 0.508 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 151 | -| ep_reward | 0.603 | -| mse | 320 | +| ep_return | 157 | +| ep_reward | 0.628 | +| mse | 309 | | time/ | | | progress | 0.95 | | step | 9.5e+05 | -| step_time | 9.09 | +| step_time | 8.64 | -------------------------------------- -2023-10-19 18:04:03,763 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 157.153 +/- 55.482 -2023-10-19 18:04:03,764 : +2023-10-27 19:24:50,130 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 156.051 +/- 54.254 +2023-10-27 19:24:50,131 : -------------------------------------- | loss/ | | -| approx_kl | 0.019 | -| entropy_loss | -7.11 | -| policy_loss | -0.0132 | -| value_loss | 0.763 | +| approx_kl | 0.0269 | +| entropy_loss | -7.22 | +| policy_loss | -0.0111 | +| value_loss | 1.42 | | stat/ | | -| constraint_violation | 696 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 128 | -| ep_reward | 0.58 | +| constraint_violation | 643 | +| ep_constraint_vio... | 0.3 | +| ep_length | 176 | +| ep_return | 112 | +| ep_reward | 0.454 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 157 | -| ep_reward | 0.63 | -| mse | 139 | +| ep_return | 156 | +| ep_reward | 0.626 | +| mse | 141 | | time/ | | | progress | 0.96 | | step | 9.6e+05 | -| step_time | 8.72 | +| step_time | 8.48 | -------------------------------------- -2023-10-19 18:05:54,949 : Eval | ep_lengths 175.50 +/- 113.80 | ep_return 110.752 +/- 75.185 -2023-10-19 18:05:54,950 : +2023-10-27 19:26:29,800 : Eval | ep_lengths 175.50 +/- 113.80 | ep_return 112.053 +/- 75.765 +2023-10-27 19:26:29,801 : -------------------------------------- | loss/ | | -| approx_kl | 0.0342 | -| entropy_loss | -7.14 | -| policy_loss | -0.0248 | -| value_loss | 0.439 | +| approx_kl | 0.0186 | +| entropy_loss | -7.21 | +| policy_loss | -0.0125 | +| value_loss | 9.49 | | stat/ | | -| constraint_violation | 698 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 155 | -| ep_reward | 0.626 | +| constraint_violation | 651 | +| ep_constraint_vio... | 0.4 | +| ep_length | 151 | +| ep_return | 97.1 | +| ep_reward | 0.472 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 176 | -| ep_return | 111 | -| ep_reward | 0.446 | -| mse | 200 | +| ep_return | 112 | +| ep_reward | 0.451 | +| mse | 193 | | time/ | | | progress | 0.97 | | step | 9.7e+05 | -| step_time | 8.9 | +| step_time | 8.54 | -------------------------------------- -2023-10-19 18:07:52,835 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 171.496 +/- 22.055 -2023-10-19 18:07:52,836 : +2023-10-27 19:28:14,208 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 174.416 +/- 22.191 +2023-10-27 19:28:14,236 : -------------------------------------- | loss/ | | -| approx_kl | 0.0171 | -| entropy_loss | -7.12 | -| policy_loss | -0.00897 | -| value_loss | 1.67 | +| approx_kl | 0.0255 | +| entropy_loss | -7.2 | +| policy_loss | -0.0232 | +| value_loss | 0.601 | | stat/ | | -| constraint_violation | 706 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 145 | -| ep_reward | 0.653 | +| constraint_violation | 654 | +| ep_constraint_vio... | 0.1 | +| ep_length | 226 | +| ep_return | 154 | +| ep_reward | 0.623 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 171 | -| ep_reward | 0.686 | -| mse | 173 | +| ep_return | 174 | +| ep_reward | 0.698 | +| mse | 170 | | time/ | | | progress | 0.98 | | step | 9.8e+05 | -| step_time | 9.77 | +| step_time | 8.47 | -------------------------------------- -2023-10-19 18:09:48,054 : Eval | ep_lengths 226.80 +/- 69.60 | ep_return 166.074 +/- 57.320 -2023-10-19 18:09:48,055 : +2023-10-27 19:29:57,252 : Eval | ep_lengths 226.80 +/- 69.60 | ep_return 164.398 +/- 55.916 +2023-10-27 19:29:57,253 : -------------------------------------- | loss/ | | -| approx_kl | 0.017 | -| entropy_loss | -7.23 | -| policy_loss | -0.0175 | -| value_loss | 4.04 | +| approx_kl | 0.0193 | +| entropy_loss | -7.26 | +| policy_loss | -0.0228 | +| value_loss | 1.09 | | stat/ | | -| constraint_violation | 714 | +| constraint_violation | 662 | | ep_constraint_vio... | 0.2 | | ep_length | 201 | | ep_return | 143 | -| ep_reward | 0.585 | +| ep_reward | 0.573 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 227 | -| ep_return | 166 | -| ep_reward | 0.672 | -| mse | 102 | +| ep_return | 164 | +| ep_reward | 0.665 | +| mse | 103 | | time/ | | | progress | 0.99 | | step | 9.9e+05 | -| step_time | 10.2 | +| step_time | 8.49 | -------------------------------------- -2023-10-19 18:11:26,028 : Checkpoint | ./models/rl_models/quadrotor_3D/track/ppo/mpsf_es/model_latest.pt -2023-10-19 18:11:47,245 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.863 +/- 35.547 -2023-10-19 18:11:47,246 : +2023-10-27 19:31:22,878 : Checkpoint | ./models/rl_models/quadrotor_3D/track/ppo/mpsf_es/model_latest.pt +2023-10-27 19:31:42,948 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.658 +/- 33.142 +2023-10-27 19:31:42,949 : -------------------------------------- | loss/ | | -| approx_kl | 0.022 | -| entropy_loss | -7.25 | -| policy_loss | -0.0125 | -| value_loss | 3.17 | +| approx_kl | 0.0294 | +| entropy_loss | -7.29 | +| policy_loss | -0.0161 | +| value_loss | 2.37 | | stat/ | | -| constraint_violation | 719 | -| ep_constraint_vio... | 0.3 | -| ep_length | 177 | -| ep_return | 118 | -| ep_reward | 0.474 | +| constraint_violation | 670 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 143 | +| ep_reward | 0.581 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 154 | -| ep_reward | 0.615 | -| mse | 353 | +| ep_return | 153 | +| ep_reward | 0.611 | +| mse | 344 | | time/ | | | progress | 1 | | step | 1e+06 | -| step_time | 9.9 | +| step_time | 8.55 | -------------------------------------- diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/loss/approx_kl.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/loss/approx_kl.log index 79f9fafca..278973989 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/loss/approx_kl.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/loss/approx_kl.log @@ -1,101 +1,101 @@ step,loss/approx_kl -10000,0.02874450978512565 -20000,0.026780654451188945 -30000,0.022765095881186425 -40000,0.03003193565333883 -50000,0.02678423233640691 -60000,0.013024521331923703 -70000,0.03235851416053871 -80000,0.023862248946291707 -90000,0.030346490683344506 -100000,0.02490632268600166 -110000,0.015724591336523493 -120000,0.030204566622463368 -130000,0.04031357979401946 -140000,0.0292551217523093 -150000,0.02462770380855848 -160000,0.02739316535492738 -170000,0.02286112491662304 -180000,0.031302459600071116 -190000,0.023700934834778316 -200000,0.024156790273264044 -210000,0.03434017544301847 -220000,0.019579221688521407 -230000,0.028128523216582834 -240000,0.03160478652765354 -250000,0.03928022175872077 -260000,0.033389151616332435 -270000,0.025957842605809366 -280000,0.03333249722296992 -290000,0.017399363398241503 -300000,0.0347040789667517 -310000,0.03477981527491162 -320000,0.045953211529801294 -330000,0.031165482181434827 -340000,0.04429962404537945 -350000,0.027057969849556683 -360000,0.033123961890426774 -370000,0.032940107032967104 -380000,0.03205405252519995 -390000,0.028114102416050935 -400000,0.024030266515910628 -410000,0.02600450983736664 -420000,0.024209127676052353 -430000,0.031093690117510654 -440000,0.03405899223095428 -450000,0.023794358223676683 -460000,0.03099985789352407 -470000,0.03398739801098903 -480000,0.03156182190869004 -490000,0.036766731855459506 -500000,0.032414264340574546 -510000,0.03432757379487157 -520000,0.03246760279871523 -530000,0.037113247935970615 -540000,0.04502515656252702 -550000,0.03450066158547997 -560000,0.035187149140983816 -570000,0.03561198743991553 -580000,0.025073501405616604 -590000,0.03293074038811029 -600000,0.039089687075465916 -610000,0.033398558866853514 -620000,0.03607199171092361 -630000,0.024005940626375375 -640000,0.03319519540915887 -650000,0.0249471220963945 -660000,0.04215250838703165 -670000,0.04232951266846309 -680000,0.03474371493017922 -690000,0.022737429919652635 -700000,0.035302710331355534 -710000,0.029882803466171025 -720000,0.028711367429544533 -730000,0.03597497057635338 -740000,0.025641219069560368 -750000,0.026862678110289074 -760000,0.029228219452003636 -770000,0.03203682734941443 -780000,0.02766041286134472 -790000,0.032703586854040616 -800000,0.028968882483119768 -810000,0.03817038725440701 -820000,0.03228878350152323 -830000,0.03385313892892251 -840000,0.026827858861846226 -850000,0.021791931552191578 -860000,0.03476229829248041 -870000,0.03366087491934498 -880000,0.034660154526742794 -890000,0.037380632331284386 -900000,0.033847111052212615 -910000,0.05952811196912081 -920000,0.027621172756577533 -930000,0.03292679170457026 -940000,0.035938500721628464 -950000,0.0295753551259016 -960000,0.029276891766736902 -970000,0.036200374698576836 -980000,0.029966662102378906 -990000,0.03095310269078861 -1000000,0.030584095938441653 +10000,0.029633267309206228 +20000,0.02266388279385865 +30000,0.016588618971096973 +40000,0.022440078870082893 +50000,0.03439235909997175 +60000,0.016431261063553394 +70000,0.024458146017665664 +80000,0.029663698949540657 +90000,0.022499308766176306 +100000,0.03182710343971848 +110000,0.03495878352162739 +120000,0.02981498461061468 +130000,0.018575245599883293 +140000,0.030545232802008594 +150000,0.03635953502574314 +160000,0.03550435108287881 +170000,0.026502946795274807 +180000,0.032685360739318034 +190000,0.030854443352048587 +200000,0.03939461355718474 +210000,0.033701947230535254 +220000,0.026133738295175134 +230000,0.025986253159741558 +240000,0.028053279073598485 +250000,0.026385399792343372 +260000,0.033704991165238125 +270000,0.02952577592805028 +280000,0.03975467265117914 +290000,0.02157542622492959 +300000,0.03619362219081572 +310000,0.035420374789585665 +320000,0.02083340292641272 +330000,0.03468128707415114 +340000,0.0333476013271138 +350000,0.027469713132207592 +360000,0.03433781150573244 +370000,0.027258278608011704 +380000,0.02661940389468024 +390000,0.028236040131499367 +400000,0.03311489207359651 +410000,0.023878435697406532 +420000,0.030756488683012624 +430000,0.018636462705520294 +440000,0.03415849689239015 +450000,0.02329163851682097 +460000,0.03132354967917005 +470000,0.023886372513758643 +480000,0.016492140207750104 +490000,0.021442569396458565 +500000,0.03030902079772204 +510000,0.02471681145640711 +520000,0.027372560983834164 +530000,0.025594865429836015 +540000,0.024599457811564213 +550000,0.015668900567106903 +560000,0.030910860871275266 +570000,0.03670800349209457 +580000,0.03681187293647478 +590000,0.027852160188679892 +600000,0.019334267335943876 +610000,0.02918752173427492 +620000,0.0205797257910793 +630000,0.01709593305519471 +640000,0.022337404871359465 +650000,0.03173941480927169 +660000,0.028232338551121457 +670000,0.02884605197856823 +680000,0.033174266759306195 +690000,0.025606261969854438 +700000,0.02731837669853121 +710000,0.021580068689460555 +720000,0.024723431103241943 +730000,0.030887579824775458 +740000,0.027943005979371566 +750000,0.022038707540680966 +760000,0.025590634485706688 +770000,0.02520988614608844 +780000,0.02286158218048513 +790000,0.02998485216715683 +800000,0.027959838796717422 +810000,0.027809453856510423 +820000,0.02855235397194822 +830000,0.0338897408451885 +840000,0.02784019419147323 +850000,0.030929399297262232 +860000,0.0311606114031747 +870000,0.03682748722688605 +880000,0.02736035884202769 +890000,0.030169884682012104 +900000,0.038479360821656886 +910000,0.04025383446520815 +920000,0.032256770359041786 +930000,0.030980461083042123 +940000,0.017267570807598533 +950000,0.023618934233672916 +960000,0.035133860555167 +970000,0.04886893681250513 +980000,0.04520976429339498 +990000,0.031271653521495565 +1000000,0.02854376533068716 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/loss/entropy_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/loss/entropy_loss.log index 8fe5f6f29..9099cdb4c 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/loss/entropy_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/loss/entropy_loss.log @@ -1,101 +1,101 @@ step,loss/entropy_loss -10000,-3.7573443611462904 -20000,-3.7340151508649186 -30000,-3.725442385673523 -40000,-3.7098780433336893 -50000,-3.6669336954752603 -60000,-3.692508010069529 -70000,-3.6646942496299744 -80000,-3.6808392524719244 -90000,-3.605351289113363 -100000,-3.622165377934773 -110000,-3.6543871800104784 -120000,-3.6804215391476953 -130000,-3.692097282409668 -140000,-3.668979620933533 -150000,-3.6260230263074242 -160000,-3.618795323371887 -170000,-3.582029231389364 -180000,-3.527496095498403 -190000,-3.509046641985576 -200000,-3.4888561924298607 -210000,-3.5052628556887306 -220000,-3.5083884040514635 -230000,-3.4975698232650756 -240000,-3.484304209550222 -250000,-3.4479555646578475 -260000,-3.458886218070984 -270000,-3.454187846183777 -280000,-3.4592267314593004 -290000,-3.433492692311605 -300000,-3.4509341716766357 -310000,-3.491183678309123 -320000,-3.4992069045702623 -330000,-3.454265125592549 -340000,-3.419578401247661 -350000,-3.399776653448741 -360000,-3.469572492440541 -370000,-3.449491302172343 -380000,-3.4840759197870894 -390000,-3.513916381200154 -400000,-3.5695487737655647 -410000,-3.622512809435527 -420000,-3.6549576322237654 -430000,-3.658440498510997 -440000,-3.6713192860285444 -450000,-3.6570885817209886 -460000,-3.646821562449137 -470000,-3.62384424606959 -480000,-3.700735445817311 -490000,-3.676515909036001 -500000,-3.623811936378479 -510000,-3.5908866961797075 -520000,-3.621074024836222 -530000,-3.6060533086458846 -540000,-3.569109654426575 -550000,-3.55707057317098 -560000,-3.4998646299044287 -570000,-3.4652657508850098 -580000,-3.4680787563323974 -590000,-3.502098925908406 -600000,-3.5400346914927168 -610000,-3.5543637355168656 -620000,-3.5323746720949813 -630000,-3.493886911869049 -640000,-3.4728256702423095 -650000,-3.4889715234438574 -660000,-3.4708867232004805 -670000,-3.4466432094573975 -680000,-3.4394262631734214 -690000,-3.475049432118733 -700000,-3.477719672520955 -710000,-3.4881235917409263 -720000,-3.5308543960253393 -730000,-3.5811563928922014 -740000,-3.6251233220100403 -750000,-3.570723466078441 -760000,-3.5562257488568627 -770000,-3.6068177223205566 -780000,-3.5861178636550903 -790000,-3.6364087065060935 -800000,-3.6940639297167466 -810000,-3.7067343870798743 -820000,-3.7100394129753114 -830000,-3.6683343966801965 -840000,-3.6526807069778444 -850000,-3.6101735115051277 -860000,-3.608029476801555 -870000,-3.5861784696578978 -880000,-3.5102564851442972 -890000,-3.5025401194890344 -900000,-3.4830044905344644 -910000,-3.4781193017959593 -920000,-3.506784641742706 -930000,-3.474042650063833 -940000,-3.4729817509651184 -950000,-3.485863546530406 -960000,-3.523092142740885 -970000,-3.5356125831604004 -980000,-3.5865202029546106 -990000,-3.5829888383547464 -1000000,-3.5879282077153527 +10000,-3.69324959119161 +20000,-3.652541593710582 +30000,-3.6508625308672586 +40000,-3.6113442142804457 +50000,-3.6201024770736696 +60000,-3.6743167877197265 +70000,-3.676709910233815 +80000,-3.5978386799494424 +90000,-3.620584841569264 +100000,-3.5832377672195426 +110000,-3.577643219629924 +120000,-3.588697226842244 +130000,-3.5518103202184035 +140000,-3.5265710989634202 +150000,-3.5377649903297423 +160000,-3.5351189732551576 +170000,-3.5003939827283226 +180000,-3.4841785788536073 +190000,-3.5369034091631577 +200000,-3.563501910368602 +210000,-3.5670030196507776 +220000,-3.566830205917358 +230000,-3.5988956371943153 +240000,-3.6182766119639083 +250000,-3.5683033625284835 +260000,-3.529946279525757 +270000,-3.480259915192922 +280000,-3.501977288722992 +290000,-3.495335857073466 +300000,-3.488387739658356 +310000,-3.492487903436025 +320000,-3.53573853969574 +330000,-3.484663685162862 +340000,-3.4503454685211183 +350000,-3.4248104890187583 +360000,-3.4127522706985474 +370000,-3.4489695390065513 +380000,-3.440312846501668 +390000,-3.4146705269813538 +400000,-3.4113913774490356 +410000,-3.4366871078809096 +420000,-3.404278723398844 +430000,-3.4593818545341493 +440000,-3.482207703590393 +450000,-3.464934515953064 +460000,-3.5029254357020063 +470000,-3.491364026069641 +480000,-3.490274008115132 +490000,-3.487744267781575 +500000,-3.444623557726542 +510000,-3.4359579006830847 +520000,-3.377688841025035 +530000,-3.3591577649116515 +540000,-3.372088332970937 +550000,-3.3930862625439957 +560000,-3.36546505689621 +570000,-3.4122120459874474 +580000,-3.4170920292536424 +590000,-3.3750243902206423 +600000,-3.3503378311793015 +610000,-3.316614679495493 +620000,-3.295833718776703 +630000,-3.2533807039260862 +640000,-3.302095719178517 +650000,-3.2282842914263403 +660000,-3.2679613272349037 +670000,-3.2681239207585655 +680000,-3.300957330067953 +690000,-3.26215850909551 +700000,-3.3422752737998964 +710000,-3.360147976875305 +720000,-3.348189302285512 +730000,-3.3387194871902466 +740000,-3.2880716403325394 +750000,-3.263476240634918 +760000,-3.3016702334086103 +770000,-3.326563207308451 +780000,-3.323869729042053 +790000,-3.3062797625859575 +800000,-3.30342458486557 +810000,-3.2793331583340963 +820000,-3.262773172060649 +830000,-3.258349208037058 +840000,-3.3239718675613403 +850000,-3.351929048697154 +860000,-3.3084825595219924 +870000,-3.3338367660840347 +880000,-3.3905816674232483 +890000,-3.420437093575795 +900000,-3.3283808747927353 +910000,-3.2901434540748595 +920000,-3.3057348489761353 +930000,-3.3061218738555906 +940000,-3.2926884929339097 +950000,-3.2834578116734825 +960000,-3.3120635390281676 +970000,-3.325933039188385 +980000,-3.2741854389508567 +990000,-3.298924148082733 +1000000,-3.263537732760111 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/loss/policy_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/loss/policy_loss.log index 0d1c04d4d..28f4fdb80 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/loss/policy_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/loss/policy_loss.log @@ -1,101 +1,101 @@ step,loss/policy_loss -10000,-0.004935202477693391 -20000,-0.012945637779497454 -30000,-0.018178624682187526 -40000,-0.011901349490880812 -50000,-0.014343916119085389 -60000,-0.01493984413542158 -70000,-0.008304207386438653 -80000,-0.009659426960537638 -90000,0.0007543145764191851 -100000,-0.013526328736089513 -110000,-0.0162503669979764 -120000,-0.01434844812419701 -130000,-0.013270470964506073 -140000,-0.011474027171579831 -150000,-0.00940349017909732 -160000,-0.006157359765481477 -170000,-0.011905198303645944 -180000,-0.006919181544853497 -190000,-0.01313425086642278 -200000,-0.01479922677540926 -210000,-0.012914920966248383 -220000,-0.015512729622731678 -230000,-0.0023904535185338893 -240000,-0.008775329635391247 -250000,-0.009706941774873731 -260000,-0.02369817571714026 -270000,0.002610565516919073 -280000,-0.005902770347394653 -290000,-0.009719360307053861 -300000,-0.004177327833704737 -310000,-0.00891555497173447 -320000,-0.0018520527550268927 -330000,-0.010086238676532296 -340000,-0.004964787095773054 -350000,-0.0027116007632697722 -360000,-0.007592766700866721 -370000,-0.008787016541025972 -380000,8.54032886712704e-05 -390000,-0.008647364520559229 -400000,-0.009856850022174889 -410000,-0.010034856413079203 -420000,-0.010782446303741866 -430000,-0.025880337788171514 -440000,-0.01358136339670189 -450000,-0.004523995165680869 -460000,-0.006320303401833924 -470000,-0.014634048015317083 -480000,-0.008874101986235795 -490000,0.0021186195271834466 -500000,-0.009467041617922427 -510000,0.007854565929229021 -520000,-0.007006101987822781 -530000,0.002612022481009055 -540000,-0.014199224422537671 -550000,-0.016170634090759927 -560000,-0.010204974561274498 -570000,-0.004257062210035096 -580000,-0.011461324532059763 -590000,-0.012925102997980673 -600000,-0.009386372242224689 -610000,-0.012686486313631182 -620000,-0.009715515780852734 -630000,-0.01269665840663855 -640000,-0.002283134368885513 -650000,-0.021907435677352243 -660000,-0.005952236660039981 -670000,-0.002588935571922521 -680000,-0.001417984611198609 -690000,-0.004157629476857529 -700000,-0.006704147642987814 -710000,-0.0070373044815884855 -720000,-0.008028961549726762 -730000,-0.007360651039359613 -740000,-0.016643653057822644 -750000,-0.0034444345474256613 -760000,-0.0023409175045718354 -770000,-0.014293562581924508 -780000,-0.007654161543170261 -790000,-0.01601656883460875 -800000,-0.004900250403930094 -810000,-0.003982525531790402 -820000,-0.012253735551557742 -830000,-0.006740559572645387 -840000,-0.010578115297803326 -850000,-0.0018812901302895535 -860000,-0.010486154303112298 -870000,0.00018229934208592035 -880000,0.0007407208682457769 -890000,-0.013362583694379445 -900000,-0.00708226732870096 -910000,-0.0077066867836647224 -920000,-0.000419283620347999 -930000,-0.01031344844596655 -940000,0.0027816923201259853 -950000,-0.00428372407813288 -960000,-0.015095562953179287 -970000,0.010889100399938192 -980000,-0.01409699293492952 -990000,-0.0060174964771527156 -1000000,-0.01192135526346769 +10000,-4.700913847142836e-05 +20000,-0.015178057465515163 +30000,-0.01930933113316626 +40000,-0.012928896648410617 +50000,-0.014665090473712079 +60000,-0.01916364337114323 +70000,-0.01438766488531747 +80000,-0.00822082837856312 +90000,0.0001409899166699645 +100000,-0.015290074966681166 +110000,-0.013328988960028088 +120000,-0.013011007266168443 +130000,-0.019247730959122605 +140000,-0.01278607214755044 +150000,-0.004472688401845052 +160000,-0.010665859652690684 +170000,0.005709191835540022 +180000,-0.016118792848130028 +190000,-0.017636639558139055 +200000,-0.006811771367072901 +210000,0.004468968034826599 +220000,-0.00039995242286350807 +230000,-0.007794310767178909 +240000,-0.015710955124758622 +250000,-0.01890947933535513 +260000,-0.010043814452735842 +270000,-0.016500216323120133 +280000,-0.0010190523436859607 +290000,-0.009711710908053697 +300000,-0.011273515546922323 +310000,-0.005604630585858638 +320000,-0.016922963275417795 +330000,0.0011737324692677339 +340000,-0.013445041597100676 +350000,-0.010683541851316041 +360000,-0.014102900588547778 +370000,-0.005949963475205534 +380000,-0.011567298716694072 +390000,-0.009221380287316247 +400000,-0.010567101607127607 +410000,-0.01077479575221644 +420000,-0.011184059882690923 +430000,-0.009261973975324213 +440000,-0.01775073375320934 +450000,-0.011217128472809115 +460000,-0.00837772051875339 +470000,-0.009778964875268765 +480000,-0.01847940442190776 +490000,-0.014540191667805702 +500000,-0.0035795901475980203 +510000,-0.015710996551247025 +520000,-0.014959110467773262 +530000,-0.01545664902424287 +540000,-0.024187150662786143 +550000,-0.009038262985553793 +560000,-0.008381937334483995 +570000,-0.001938693864301836 +580000,-0.0007017203383688022 +590000,-0.016602759494578286 +600000,-0.008508919362713339 +610000,-0.006635166232903662 +620000,-0.007959622175923173 +630000,-0.012155834808464742 +640000,-0.0015887851404507536 +650000,0.001606233751278979 +660000,-0.019412789226896918 +670000,-0.004891059881252135 +680000,-0.003237735567630097 +690000,-0.013548495034593253 +700000,-0.0105380733779714 +710000,-0.013377386831870277 +720000,-0.014432823904794242 +730000,-0.01362124245125483 +740000,-0.008887763989969873 +750000,-0.013588248103775036 +760000,-0.010279588730681914 +770000,-0.005822898944536133 +780000,-0.012875770836565062 +790000,-0.008627084131860077 +800000,-0.015320332556637787 +810000,-0.005616158432677119 +820000,-0.011884697301945198 +830000,-0.005580022729275138 +840000,-0.005552818610369961 +850000,-0.0036778073845799703 +860000,-0.0022282687575962755 +870000,-0.0061678531693378615 +880000,-0.011146723628575762 +890000,-0.012328243514317216 +900000,-0.00760285174544984 +910000,-0.009335710658296567 +920000,-0.012040604137638723 +930000,-0.003290462321700307 +940000,-0.007117681688183766 +950000,-0.01881724916674317 +960000,-0.0004166553232212215 +970000,-0.0025675284149267955 +980000,-0.008999644212846712 +990000,0.0025355419301750985 +1000000,-0.002343062657117166 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/loss/value_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/loss/value_loss.log index a3301d1de..3826de082 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/loss/value_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/loss/value_loss.log @@ -1,101 +1,101 @@ step,loss/value_loss -10000,13.13252601193923 -20000,8.793233444828603 -30000,9.13259050437837 -40000,8.389102234413992 -50000,8.813886638215028 -60000,4.9152698278154805 -70000,4.793894129114706 -80000,4.8112748078576075 -90000,2.5872434633212618 -100000,5.287417197493337 -110000,3.3469140625402076 -120000,1.540862067141098 -130000,1.7053852964304124 -140000,1.75472291239863 -150000,1.7131017913117297 -160000,1.3604509681799815 -170000,0.8497567721488772 -180000,1.425049491188353 -190000,1.5714618689221969 -200000,1.6466794893442853 -210000,3.6783268501685447 -220000,1.3521799070657996 -230000,1.2773749833121837 -240000,0.5002426802218513 -250000,0.6242417825366671 -260000,1.7075351099995995 -270000,4.247791063963743 -280000,0.8772254742730385 -290000,2.405165691661267 -300000,0.6002749928691112 -310000,0.6296446914639106 -320000,1.1760414030663466 -330000,1.4101201265798298 -340000,4.493928526698388 -350000,0.8685644992905578 -360000,0.43747978321681513 -370000,1.9427574635148097 -380000,1.0945404705634327 -390000,0.7051760275158382 -400000,1.5754102077695544 -410000,0.39164116399000076 -420000,7.006016986683375 -430000,0.6172286797918586 -440000,0.7504012567399752 -450000,3.166341670479752 -460000,0.5602575354637306 -470000,0.6497560584654332 -480000,3.15537457358296 -490000,0.5692449608241701 -500000,0.8286218584634876 -510000,0.8877301490292627 -520000,0.32678518822941116 -530000,4.670238555000208 -540000,1.1626308373594394 -550000,0.8770425582927054 -560000,0.8715957254693855 -570000,0.38178309332522714 -580000,0.6842948776630936 -590000,1.7675483399124048 -600000,0.45237355866931106 -610000,0.4247688344230075 -620000,0.1820211891244258 -630000,0.4567553100957169 -640000,1.4716602231760956 -650000,0.4592139791201708 -660000,3.602805002921669 -670000,0.8647504921647396 -680000,1.1923064781822144 -690000,0.9263690394114205 -700000,0.7835710354849446 -710000,3.4707191023264996 -720000,0.8151596643623547 -730000,1.5279201489348206 -740000,1.687186779880443 -750000,0.5562437148285551 -760000,2.415422100464133 -770000,0.5964682385499038 -780000,1.5850134969457552 -790000,8.25053774390509 -800000,1.3066957939358623 -810000,0.8332228268308397 -820000,1.101741101954178 -830000,0.9769799437333517 -840000,1.4982517457585218 -850000,0.9043596612738055 -860000,1.0468333164602055 -870000,2.380386618063642 -880000,1.2315594330963833 -890000,3.9172732784551294 -900000,0.3779143968860278 -910000,5.526078091971924 -920000,0.3719663895179309 -930000,2.253413058311892 -940000,0.6484624966424811 -950000,0.5508841348228826 -960000,0.6852473828769962 -970000,1.1500315410593656 -980000,1.1853777167292077 -990000,1.2727347655004988 -1000000,0.5397722423311776 +10000,28.06506556932638 +20000,4.596008623124967 +30000,6.41673896898657 +40000,6.983473293676306 +50000,5.106132508375315 +60000,4.544061995220028 +70000,6.49657311242417 +80000,3.371509770644471 +90000,1.8664312975656934 +100000,2.2307041393430938 +110000,4.966713487573175 +120000,2.963901353498506 +130000,2.99997997844114 +140000,1.972539380582082 +150000,1.5411266190976178 +160000,1.8329968137757155 +170000,2.0275755652354914 +180000,1.1610829467461925 +190000,0.9879162518426574 +200000,1.2372403169388329 +210000,3.1732158858733177 +220000,1.2785590276037342 +230000,1.2510793872915489 +240000,1.5312570322915877 +250000,1.4899782011151061 +260000,1.0084074616012038 +270000,1.2543968347099275 +280000,2.1103413761412186 +290000,1.8673584755356731 +300000,0.5834255209809405 +310000,0.7951435561531515 +320000,1.9867227229940974 +330000,1.1632890149728248 +340000,1.2011778808934077 +350000,0.6389358458480217 +360000,1.956756843856001 +370000,2.2727806471651606 +380000,3.852034151385232 +390000,0.6965964576887579 +400000,0.7481752785023993 +410000,0.5563923750423159 +420000,0.8617749306817959 +430000,1.429603664188155 +440000,0.7541052792156788 +450000,0.4502154741265959 +460000,0.40176801258645406 +470000,0.3261603503285909 +480000,0.4350106050271988 +490000,1.4620536824470491 +500000,1.4805563810444018 +510000,0.7804327490161302 +520000,0.6165989409639553 +530000,0.6453956534541134 +540000,0.5721346456463182 +550000,0.44758490406428136 +560000,1.7244882318540484 +570000,0.5670614999424644 +580000,0.779585420653007 +590000,2.3310993171906786 +600000,0.9962232790132409 +610000,0.7804899024644426 +620000,0.5716399698220438 +630000,0.7267755166779313 +640000,0.552170148819757 +650000,1.1078751074174105 +660000,0.6975480926196386 +670000,1.0047796384543228 +680000,0.7361491699875427 +690000,2.084269161564115 +700000,0.7945934574032474 +710000,0.6386886483356228 +720000,1.4766855354023085 +730000,2.115705344731807 +740000,3.541686409743342 +750000,1.1315577539142971 +760000,0.5234836833357324 +770000,0.5425092119478707 +780000,1.1994781605164204 +790000,6.161588439124519 +800000,0.36909094917745844 +810000,0.6374294762312991 +820000,0.3845061570431642 +830000,1.6896552362284862 +840000,0.8239898603915348 +850000,1.0057949140313078 +860000,1.753988996107736 +870000,2.5245620366013366 +880000,2.2920480042473 +890000,0.6401855920009468 +900000,0.6310334531590627 +910000,4.725869147487765 +920000,0.7523930858369436 +930000,2.7058050614436833 +940000,1.6145459957224886 +950000,0.577692019846224 +960000,0.6477117878464151 +970000,0.7668582069168792 +980000,0.7268528532368623 +990000,0.5359143340741244 +1000000,0.2601068454169709 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/constraint_violation.log index 3777f81a8..23dd7a8ed 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/constraint_violation.log @@ -1,101 +1,101 @@ step,stat/constraint_violation -10000,12 -20000,22 +10000,11 +20000,20 30000,33 40000,37 50000,45 60000,51 -70000,54 -80000,61 -90000,65 -100000,74 -110000,81 -120000,90 -130000,96 -140000,106 -150000,112 -160000,125 -170000,131 -180000,137 -190000,143 -200000,153 -210000,163 -220000,169 -230000,173 -240000,176 -250000,181 -260000,187 -270000,189 -280000,198 -290000,204 -300000,210 -310000,217 -320000,222 -330000,230 -340000,243 -350000,247 -360000,252 -370000,258 -380000,265 -390000,267 -400000,269 -410000,271 -420000,281 -430000,292 -440000,298 -450000,303 -460000,310 -470000,310 -480000,321 -490000,328 -500000,335 -510000,340 -520000,343 -530000,348 -540000,354 -550000,355 -560000,365 -570000,370 -580000,376 -590000,383 -600000,386 -610000,396 -620000,400 -630000,409 -640000,417 -650000,422 -660000,428 -670000,436 -680000,445 -690000,451 -700000,458 -710000,464 -720000,473 -730000,477 -740000,482 -750000,485 -760000,493 -770000,499 -780000,504 -790000,515 -800000,520 -810000,525 -820000,529 -830000,534 -840000,543 -850000,547 -860000,558 -870000,565 -880000,569 -890000,577 -900000,579 -910000,582 -920000,592 -930000,600 -940000,606 -950000,615 -960000,623 -970000,633 +70000,52 +80000,56 +90000,63 +100000,71 +110000,76 +120000,80 +130000,88 +140000,94 +150000,101 +160000,113 +170000,121 +180000,127 +190000,133 +200000,140 +210000,151 +220000,158 +230000,163 +240000,167 +250000,171 +260000,176 +270000,180 +280000,191 +290000,195 +300000,201 +310000,211 +320000,216 +330000,223 +340000,230 +350000,242 +360000,246 +370000,251 +380000,259 +390000,260 +400000,263 +410000,265 +420000,271 +430000,284 +440000,289 +450000,294 +460000,302 +470000,302 +480000,311 +490000,319 +500000,327 +510000,330 +520000,333 +530000,335 +540000,343 +550000,344 +560000,352 +570000,358 +580000,364 +590000,376 +600000,381 +610000,392 +620000,396 +630000,405 +640000,412 +650000,418 +660000,422 +670000,432 +680000,442 +690000,448 +700000,456 +710000,460 +720000,469 +730000,473 +740000,478 +750000,482 +760000,488 +770000,497 +780000,502 +790000,508 +800000,517 +810000,522 +820000,525 +830000,531 +840000,538 +850000,544 +860000,555 +870000,560 +880000,564 +890000,570 +900000,575 +910000,579 +920000,590 +930000,597 +940000,604 +950000,613 +960000,620 +970000,632 980000,639 -990000,646 -1000000,653 +990000,647 +1000000,654 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/ep_constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/ep_constraint_violation.log index f1650923b..4d332494f 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/ep_constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/ep_constraint_violation.log @@ -1,101 +1,101 @@ step,stat/ep_constraint_violation 10000,0.1 20000,0.3 -30000,0.1 +30000,0.2 40000,0.2 -50000,0.0 -60000,0.1 -70000,0.2 -80000,0.4 -90000,0.0 -100000,0.2 +50000,0.1 +60000,0.3 +70000,0.0 +80000,0.2 +90000,0.1 +100000,0.1 110000,0.2 120000,0.2 -130000,0.1 -140000,0.3 -150000,0.2 +130000,0.2 +140000,0.2 +150000,0.1 160000,0.2 -170000,0.1 -180000,0.1 -190000,0.0 +170000,0.2 +180000,0.3 +190000,0.2 200000,0.3 -210000,0.2 +210000,0.5 220000,0.1 230000,0.1 240000,0.0 -250000,0.1 -260000,0.2 -270000,0.1 -280000,0.2 -290000,0.3 -300000,0.2 -310000,0.0 -320000,0.1 +250000,0.2 +260000,0.0 +270000,0.0 +280000,0.5 +290000,0.2 +300000,0.1 +310000,0.1 +320000,0.0 330000,0.2 -340000,0.6 +340000,0.4 350000,0.0 360000,0.2 -370000,0.2 -380000,0.2 -390000,0.1 +370000,0.1 +380000,0.3 +390000,0.0 400000,0.0 410000,0.0 -420000,0.4 -430000,0.1 -440000,0.1 -450000,0.2 -460000,0.0 +420000,0.1 +430000,0.2 +440000,0.0 +450000,0.1 +460000,0.2 470000,0.0 480000,0.2 -490000,0.1 -500000,0.1 +490000,0.2 +500000,0.2 510000,0.1 -520000,0.0 -530000,0.3 -540000,0.1 +520000,0.1 +530000,0.0 +540000,0.0 550000,0.0 -560000,0.1 -570000,0.1 -580000,0.1 -590000,0.0 -600000,0.0 +560000,0.5 +570000,0.0 +580000,0.3 +590000,0.4 +600000,0.3 610000,0.1 620000,0.1 630000,0.1 -640000,0.1 +640000,0.2 650000,0.1 -660000,0.3 -670000,0.1 +660000,0.0 +670000,0.0 680000,0.2 690000,0.3 700000,0.0 -710000,0.0 -720000,0.0 +710000,0.1 +720000,0.2 730000,0.2 740000,0.2 -750000,0.0 -760000,0.1 +750000,0.1 +760000,0.2 770000,0.2 -780000,0.1 -790000,0.6 +780000,0.2 +790000,0.5 800000,0.1 810000,0.0 -820000,0.1 +820000,0.0 830000,0.1 -840000,0.3 +840000,0.2 850000,0.1 -860000,0.3 +860000,0.4 870000,0.1 880000,0.2 -890000,0.2 +890000,0.0 900000,0.0 910000,0.1 920000,0.2 -930000,0.2 +930000,0.1 940000,0.2 950000,0.1 -960000,0.2 -970000,0.1 -980000,0.0 +960000,0.1 +970000,0.2 +980000,0.1 990000,0.2 1000000,0.1 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/ep_length.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/ep_length.log index 3d9e48b0a..0c473a42f 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/ep_length.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/ep_length.log @@ -1,101 +1,101 @@ step,stat/ep_length 10000,225.1 -20000,177.3 -30000,225.3 -40000,200.8 -50000,250.0 -60000,225.1 -70000,202.0 -80000,151.1 -90000,250.0 -100000,200.3 -110000,200.6 -120000,200.2 -130000,225.2 -140000,175.9 -150000,200.3 -160000,201.4 -170000,225.1 -180000,225.2 -190000,250.0 -200000,176.0 -210000,201.7 -220000,225.5 +20000,175.8 +30000,200.4 +40000,200.2 +50000,225.1 +60000,175.4 +70000,250.0 +80000,200.2 +90000,225.1 +100000,225.1 +110000,202.7 +120000,201.6 +130000,200.3 +140000,201.4 +150000,225.2 +160000,200.4 +170000,201.6 +180000,176.9 +190000,202.4 +200000,175.5 +210000,129.9 +220000,225.1 230000,225.2 240000,250.0 -250000,225.7 -260000,200.3 -270000,225.2 -280000,200.3 -290000,177.6 -300000,200.8 -310000,250.0 -320000,226.6 -330000,200.4 -340000,102.3 +250000,200.3 +260000,250.0 +270000,250.0 +280000,125.6 +290000,201.2 +300000,225.2 +310000,225.6 +320000,250.0 +330000,201.4 +340000,150.8 350000,250.0 -360000,200.8 -370000,201.0 -380000,203.4 -390000,225.7 +360000,200.7 +370000,225.2 +380000,178.9 +390000,250.0 400000,250.0 410000,250.0 -420000,152.0 -430000,226.2 -440000,225.9 -450000,200.4 -460000,250.0 +420000,225.1 +430000,203.0 +440000,250.0 +450000,225.2 +460000,201.0 470000,250.0 -480000,201.6 -490000,225.2 -500000,225.1 -510000,225.4 -520000,250.0 -530000,175.4 -540000,225.5 +480000,202.3 +490000,201.0 +500000,200.3 +510000,225.1 +520000,225.2 +530000,250.0 +540000,250.0 550000,250.0 -560000,225.4 -570000,226.4 -580000,225.6 -590000,250.0 -600000,250.0 +560000,128.7 +570000,250.0 +580000,175.5 +590000,153.2 +600000,175.3 610000,225.8 620000,225.7 -630000,225.4 -640000,225.2 +630000,225.1 +640000,200.5 650000,226.6 -660000,176.1 -670000,225.1 +660000,250.0 +670000,250.0 680000,200.6 690000,175.6 700000,250.0 -710000,250.0 -720000,250.0 -730000,200.3 -740000,200.3 -750000,250.0 -760000,227.0 +710000,225.5 +720000,200.2 +730000,200.2 +740000,200.4 +750000,226.7 +760000,201.5 770000,200.3 -780000,225.1 -790000,102.4 +780000,200.2 +790000,126.0 800000,225.2 810000,250.0 -820000,225.5 -830000,225.5 -840000,175.7 +820000,250.0 +830000,225.1 +840000,200.4 850000,225.1 -860000,176.7 -870000,225.4 -880000,200.3 -890000,202.2 +860000,153.0 +870000,225.1 +880000,200.8 +890000,250.0 900000,250.0 910000,225.7 -920000,201.2 -930000,200.7 -940000,200.9 -950000,225.2 -960000,200.2 -970000,225.3 -980000,250.0 +920000,201.1 +930000,225.1 +940000,200.8 +950000,225.1 +960000,225.1 +970000,200.4 +980000,225.1 990000,200.5 1000000,225.4 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/ep_return.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/ep_return.log index f29c164ca..55991a655 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/ep_return.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/ep_return.log @@ -1,101 +1,101 @@ step,stat/ep_return -10000,59.67477630661536 -20000,72.57288247785716 -30000,96.04172684216954 -40000,99.85396102557672 -50000,95.35261512635114 -60000,105.95586096637085 -70000,98.32548562655987 -80000,78.79723350923146 -90000,118.30732673481558 -100000,92.69835006232728 -110000,106.47079129696525 -120000,101.93708475353769 -130000,123.93539425027366 -140000,103.20097160617836 -150000,103.6814441201645 -160000,108.16292786511538 -170000,132.50691320001968 -180000,134.55744948563947 -190000,159.89515286324394 -200000,99.8432101850556 -210000,115.35270416302141 -220000,142.32610586031035 -230000,130.40107816018133 -240000,161.4458839064662 -250000,130.19643013381233 -260000,120.3583347584924 -270000,130.3913447806639 -280000,120.71682483049776 -290000,110.18665371012678 -300000,139.9260049729274 -310000,157.7618636468541 -320000,136.76860101399637 -330000,121.97372270937964 -340000,51.884358416574955 -350000,140.07875668956908 -360000,120.72398096129436 -370000,116.05396490093673 -380000,130.9821826864034 -390000,140.83759580735187 -400000,150.51440771786196 -410000,152.64736007618507 -420000,90.67110327409009 -430000,137.66593117322847 -440000,143.41057566574506 -450000,125.56431159195722 -460000,150.77519923798891 -470000,160.49000339393447 -480000,126.89188212420433 -490000,146.6294813030559 -500000,148.68900386184808 -510000,152.293598540244 -520000,156.43415120349758 -530000,115.31678481853942 -540000,131.92728116278224 -550000,165.539276899178 -560000,133.7020822850148 -570000,134.6513024909837 -580000,134.91936116812408 -590000,155.22816764496213 -600000,162.4492624393911 -610000,132.33934871047452 -620000,151.20386181728887 -630000,143.4698380278818 -640000,157.55234361455555 -650000,144.7163928038254 -660000,120.2525384716259 -670000,136.4685510963588 -680000,134.93387550640654 -690000,107.00803885139047 -700000,152.79778814258557 -710000,133.42067077586643 -720000,144.50230218593526 -730000,123.00475307310201 -740000,123.87256261366205 -750000,144.85534733487592 -760000,126.06844513426032 -770000,117.79627185717428 -780000,148.23260357477443 -790000,56.67572645656791 -800000,139.5281992849192 -810000,150.83976380718272 -820000,136.26965116142043 -830000,137.06517099504316 -840000,98.13731768319492 -850000,119.8509102455545 -860000,108.31023949765923 -870000,134.77597938085086 -880000,131.05118271143317 -890000,123.17865999435125 -900000,156.7368462669557 -910000,146.6010322975324 -920000,129.72105995298736 -930000,130.11090892590767 -940000,132.238367376408 -950000,146.96123893397902 -960000,138.21833288771307 -970000,135.50821337379062 -980000,139.26082881734524 -990000,133.94730225398604 -1000000,132.05814297202352 +10000,50.85359558813856 +20000,68.82313985747876 +30000,77.31790992014871 +40000,82.93446488834475 +50000,99.21962276699806 +60000,81.41369240561515 +70000,132.79674605015586 +80000,91.67255447208098 +90000,119.71410119787761 +100000,104.9328403398863 +110000,110.22319075040419 +120000,94.76936032360086 +130000,80.39025697288733 +140000,88.86277925909492 +150000,122.25878782782036 +160000,90.19726134261265 +170000,106.84032730859391 +180000,97.97058818835562 +190000,115.21284863413662 +200000,88.81590505763492 +210000,70.43177735973248 +220000,127.22981119889532 +230000,134.73083576869755 +240000,143.50962910728487 +250000,119.21985140343511 +260000,141.08804217056164 +270000,145.91767850811175 +280000,73.77853507562988 +290000,110.95509390081989 +300000,139.367603293561 +310000,149.53937711099576 +320000,142.83635159513486 +330000,113.03770493726697 +340000,97.18577240550209 +350000,147.97417503083778 +360000,126.78821165042757 +370000,133.40090815579683 +380000,108.48037439182485 +390000,151.51278034734207 +400000,157.8223172278994 +410000,146.16544481170416 +420000,144.02844993464655 +430000,128.6183180991472 +440000,157.03778173942095 +450000,126.09241344124982 +460000,135.0474550592226 +470000,147.01295856126325 +480000,118.25389855569351 +490000,131.4317442461376 +500000,121.76864854734102 +510000,128.54015492446547 +520000,149.73457903516916 +530000,150.7665988385865 +540000,160.7463004400828 +550000,148.37173291249016 +560000,77.79364614820182 +570000,146.69152764025472 +580000,96.46585612169412 +590000,89.10427929707805 +600000,104.78402716962663 +610000,126.7962697429247 +620000,142.71814525871247 +630000,154.7211656453204 +640000,125.65042436028837 +650000,125.47326576270345 +660000,155.78996299201526 +670000,146.9543058890028 +680000,121.32270384424149 +690000,108.99923723979104 +700000,145.1542494276621 +710000,125.46408061929296 +720000,122.29085780572443 +730000,117.50139668281831 +740000,114.99393141501034 +750000,126.14691500803755 +760000,113.95386479649687 +770000,109.88923687018132 +780000,132.3208302669147 +790000,82.59489768291068 +800000,147.64651702062244 +810000,161.22190084997823 +820000,141.2904842694984 +830000,136.20488203343376 +840000,115.34538736755933 +850000,129.88055452522323 +860000,92.85788718031645 +870000,131.34251856065106 +880000,123.00528414463813 +890000,155.34067270901554 +900000,156.507922542699 +910000,144.03493530374004 +920000,131.2850815911749 +930000,141.05752094785902 +940000,125.59740370139961 +950000,148.91478746250937 +960000,151.55429595014863 +970000,121.02528672834657 +980000,126.05589750123542 +990000,127.62759521796355 +1000000,139.50512640959042 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/ep_reward.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/ep_reward.log index 1722a983d..dac3e2748 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/ep_reward.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat/ep_reward.log @@ -1,101 +1,101 @@ step,stat/ep_reward -10000,0.23945441932737815 -20000,0.2946495190356303 -30000,0.3846257095464523 -40000,0.4033023336591143 -50000,0.3814104605054046 -60000,0.42487002388738465 -70000,0.3984707629974812 -80000,0.33570828153080196 -90000,0.47322930693926235 -100000,0.4025740551495371 -110000,0.42666636003300873 -120000,0.41216037411057815 -130000,0.506531467385021 -140000,0.4186082117142333 -150000,0.42565810063542353 -160000,0.43358651657824687 -170000,0.5300683598025137 -180000,0.5437713901460821 -190000,0.639580611452976 -200000,0.4040129533115251 -210000,0.47224362807682096 -220000,0.5711093227539519 -230000,0.5223611361627574 -240000,0.6457835356258649 -250000,0.5246240479362516 -260000,0.4910683566188389 -270000,0.5430440023514521 -280000,0.48511707388339165 -290000,0.5179916526555828 -300000,0.5639315968290755 -310000,0.6310474545874164 -320000,0.5509939347928563 -330000,0.4962578416982063 -340000,0.2946711699265194 -350000,0.5603150267582764 -360000,0.4927334442134194 -370000,0.48086309392484566 -380000,0.5260171436073402 -390000,0.564625361208126 -400000,0.6020576308714478 -410000,0.6105894403047403 -420000,0.37576354349029484 -430000,0.550772693284067 -440000,0.5866888406767622 -450000,0.5695979927051955 -460000,0.6031007969519555 -470000,0.6419600135757378 -480000,0.5080318428243619 -490000,0.5868127723411487 -500000,0.5967230190943774 -510000,0.6203032386627815 -520000,0.6257366048139903 -530000,0.6254625068793928 -540000,0.5280364464258878 -550000,0.6621571075967119 -560000,0.5353804179257924 -570000,0.5397282457420104 -580000,0.5398449614562439 -590000,0.6209126705798484 -600000,0.6497970497575645 -610000,0.5293697706635095 -620000,0.6073125102638894 -630000,0.5747889480642464 -640000,0.6306211535804747 -650000,0.5790388986649061 -660000,0.49393841660846727 -670000,0.5540405136317303 -680000,0.5400273689113114 -690000,0.5026966263087631 -700000,0.6111911525703422 -710000,0.5336826831034657 -720000,0.5780092087437411 -730000,0.5043684186755708 -740000,0.5205121884834065 -750000,0.5794213893395036 -760000,0.5094710362695437 -770000,0.4812707763324613 -780000,0.6173220272034636 -790000,0.3168448549025198 -800000,0.5588985491720952 -810000,0.6033590552287308 -820000,0.5453883601611701 -830000,0.5482697262098055 -840000,0.40246115687857864 -850000,0.48091302337722874 -860000,0.4432700705286998 -870000,0.5395263891278606 -880000,0.5313474983882464 -890000,0.4964000225854829 -900000,0.6269473850678228 -910000,0.6269606291927737 -920000,0.5261409509181741 -930000,0.5864181735002274 -940000,0.5649044399963963 -950000,0.5905298536214569 -960000,0.5538529500624427 -970000,0.5421489860899633 -980000,0.5570433152693809 -990000,0.5369780180655118 -1000000,0.5331269610128249 +10000,0.20417334595812098 +20000,0.2853624705799045 +30000,0.3165495118046363 +40000,0.3355782729769441 +50000,0.3980057529887725 +60000,0.34389113990988884 +70000,0.5311869842006236 +80000,0.37620989309409136 +90000,0.5125287202661142 +100000,0.4198076322898402 +110000,0.44093335841385944 +120000,0.38244252101837894 +130000,0.36630278055352516 +140000,0.3596896198709554 +150000,0.4897195618646754 +160000,0.36140612123233695 +170000,0.4308266939784676 +180000,0.3950295950819792 +190000,0.46682810077204157 +200000,0.3826821724438471 +210000,0.30350867859415037 +220000,0.5109317461259333 +230000,0.5398107813476506 +240000,0.5740385164291395 +250000,0.5002240908061357 +260000,0.5643521686822466 +270000,0.5836707140324471 +280000,0.3138220956084594 +290000,0.4446770719181152 +300000,0.5584453008447698 +310000,0.5985953610029997 +320000,0.5713454063805392 +330000,0.45432208663221 +340000,0.43576953121256273 +350000,0.5918967001233513 +360000,0.5095559064034985 +370000,0.5486427837990855 +380000,0.436123126545116 +390000,0.6060511213893685 +400000,0.6312892689115975 +410000,0.5846617792468167 +420000,0.5803871352176919 +430000,0.5147005904484985 +440000,0.6281511269576837 +450000,0.5179998183758358 +460000,0.5406715785165241 +470000,0.5880518342450529 +480000,0.47327844495037363 +490000,0.5592269827176344 +500000,0.4997425903415144 +510000,0.5150695667598224 +520000,0.5991453281290438 +530000,0.603066395354346 +540000,0.6429852017603311 +550000,0.5934869316499607 +560000,0.32154506012469986 +570000,0.5867661105610189 +580000,0.43528417110396 +590000,0.4516037801209136 +600000,0.42364187851448454 +610000,0.507197453373778 +620000,0.5733685033218904 +630000,0.6200452180335949 +640000,0.523664157516407 +650000,0.5020664951065839 +660000,0.6231598519680611 +670000,0.5878172235560113 +680000,0.48558305467862334 +690000,0.5111385591837763 +700000,0.5806169977106483 +710000,0.5024668895075786 +720000,0.49676061500278246 +730000,0.4725289626626502 +740000,0.48647792068326445 +750000,0.5046807331302927 +760000,0.45678409379675167 +770000,0.44964255201039355 +780000,0.557159178124443 +790000,0.3382916400800398 +800000,0.5913719839831811 +810000,0.6448876033999128 +820000,0.5651619370779936 +830000,0.5448347199629137 +840000,0.46267620403047227 +850000,0.5210399333081679 +860000,0.3830407101119127 +870000,0.6009128535544207 +880000,0.496688659330486 +890000,0.6213626908360619 +900000,0.6260316901707961 +910000,0.6167236531167056 +920000,0.5324535656507553 +930000,0.6049253254577414 +940000,0.537896247697131 +950000,0.5985158507701469 +960000,0.6062543181503177 +970000,0.48951967512154565 +980000,0.5118260631230274 +990000,0.5116970288278347 +1000000,0.5629136615436259 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/constraint_violation.log index 9121007e0..19b90d387 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/constraint_violation.log @@ -2,10 +2,10 @@ step,stat_eval/constraint_violation 10000,0.1 20000,0.2 30000,0.1 -40000,0.1 +40000,0.0 50000,0.1 60000,0.0 -70000,0.4 +70000,0.3 80000,0.1 90000,0.1 100000,0.0 @@ -18,51 +18,51 @@ step,stat_eval/constraint_violation 170000,0.1 180000,0.2 190000,0.1 -200000,0.1 +200000,0.2 210000,0.3 220000,0.2 230000,0.0 240000,0.2 250000,0.2 260000,0.0 -270000,0.3 +270000,0.2 280000,0.0 290000,0.2 300000,0.1 310000,0.0 320000,0.0 330000,0.2 -340000,0.0 +340000,0.2 350000,0.0 360000,0.1 370000,0.2 380000,0.2 390000,0.2 400000,0.1 -410000,0.1 +410000,0.0 420000,0.3 430000,0.3 -440000,0.1 +440000,0.2 450000,0.0 460000,0.1 -470000,0.2 +470000,0.1 480000,0.3 490000,0.1 500000,0.0 510000,0.2 520000,0.3 -530000,0.1 -540000,0.2 +530000,0.0 +540000,0.3 550000,0.1 560000,0.2 570000,0.0 -580000,0.2 +580000,0.3 590000,0.2 -600000,0.0 -610000,0.0 +600000,0.2 +610000,0.1 620000,0.1 630000,0.1 -640000,0.1 +640000,0.0 650000,0.0 660000,0.2 670000,0.2 @@ -83,7 +83,7 @@ step,stat_eval/constraint_violation 820000,0.1 830000,0.0 840000,0.0 -850000,0.4 +850000,0.5 860000,0.0 870000,0.3 880000,0.2 @@ -94,7 +94,7 @@ step,stat_eval/constraint_violation 930000,0.0 940000,0.0 950000,0.1 -960000,0.1 +960000,0.0 970000,0.2 980000,0.0 990000,0.2 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/ep_length.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/ep_length.log index ff8d5cfbf..6562db2e5 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/ep_length.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/ep_length.log @@ -1,71 +1,71 @@ step,stat_eval/ep_length 10000,225.1 -20000,201.9 +20000,200.5 30000,225.3 -40000,225.1 +40000,250.0 50000,225.3 60000,250.0 -70000,150.7 -80000,225.2 +70000,175.7 +80000,225.1 90000,225.1 100000,250.0 110000,201.3 120000,225.2 -130000,176.5 +130000,176.4 140000,250.0 150000,225.2 160000,250.0 170000,225.8 180000,200.9 190000,225.1 -200000,225.1 +200000,200.2 210000,176.6 -220000,201.4 +220000,201.6 230000,250.0 240000,200.2 250000,201.9 260000,250.0 -270000,175.3 +270000,200.2 280000,250.0 290000,201.4 300000,225.4 310000,250.0 320000,250.0 -330000,202.0 -340000,250.0 +330000,201.9 +340000,200.4 350000,250.0 360000,225.1 370000,202.3 380000,201.0 390000,200.3 400000,225.6 -410000,225.2 +410000,250.0 420000,176.7 -430000,175.7 -440000,226.5 +430000,175.8 +440000,201.7 450000,250.0 -460000,225.4 -470000,201.4 +460000,225.2 +470000,226.2 480000,176.3 -490000,225.8 +490000,225.7 500000,250.0 -510000,200.5 +510000,200.4 520000,175.9 -530000,225.2 -540000,200.4 +530000,250.0 +540000,175.6 550000,225.3 560000,201.6 570000,250.0 -580000,202.6 -590000,200.3 -600000,250.0 -610000,250.0 +580000,178.4 +590000,200.4 +600000,200.2 +610000,225.1 620000,225.1 -630000,225.5 -640000,227.0 +630000,225.6 +640000,250.0 650000,250.0 -660000,200.4 -670000,200.6 +660000,200.2 +670000,200.4 680000,250.0 690000,225.5 700000,250.0 @@ -80,22 +80,22 @@ step,stat_eval/ep_length 790000,225.7 800000,250.0 810000,250.0 -820000,225.4 +820000,225.1 830000,250.0 840000,250.0 -850000,151.6 +850000,126.7 860000,250.0 870000,176.8 880000,201.6 890000,201.3 900000,201.0 910000,226.6 -920000,175.6 +920000,175.4 930000,250.0 940000,250.0 -950000,227.6 -960000,225.5 +950000,227.2 +960000,250.0 970000,200.3 980000,250.0 -990000,201.9 +990000,202.4 1000000,250.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/ep_return.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/ep_return.log index 7e18c7be6..683dbbe57 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/ep_return.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/ep_return.log @@ -1,101 +1,101 @@ step,stat_eval/ep_return -10000,51.04985095866592 -20000,60.941225365008435 -30000,73.28630176839951 -40000,83.63374128363618 -50000,73.50476812981995 -60000,80.12274029097964 -70000,61.15452257667887 -80000,109.70070134578425 -90000,94.14796011976208 -100000,118.2533877588885 -110000,90.65827650756943 -120000,123.72533848009937 -130000,103.8425065908503 -140000,129.8396645278161 -150000,124.0888300465358 -160000,130.0431141708938 -170000,129.75205708762945 -180000,113.31235685552477 -190000,133.56408085027147 -200000,126.02941315742889 -210000,98.96676304284956 -220000,118.0753713415108 -230000,147.89731126922536 -240000,105.99631094504964 -250000,116.61354187517516 -260000,146.27019263879293 -270000,95.68995703899732 -280000,159.80452161414934 -290000,122.3309020310777 -300000,126.59744184075096 -310000,141.14294970739735 -320000,143.49586767857141 -330000,117.82423919843386 -340000,140.31017827179056 -350000,140.65512970852885 -360000,133.55550877898196 -370000,116.84516303828426 -380000,111.68779480791018 -390000,109.14581468786523 -400000,121.53088892936219 -410000,124.33952221538951 -420000,103.32667573074013 -430000,97.20156255114081 -440000,121.41554724037005 -450000,147.9296740370175 -460000,141.02416638098939 -470000,108.54740813615668 -480000,105.40216474748823 -490000,127.88251683253755 -500000,144.6886054589761 -510000,125.05820156568295 -520000,112.0820559327054 -530000,132.30480865141587 -540000,119.08888134134477 -550000,125.41259117340648 -560000,138.00552429860016 -570000,140.44119107837355 -580000,109.34828594682278 -590000,114.28204748509538 -600000,145.54048707511703 -610000,153.26371729769295 -620000,138.021252597042 -630000,135.8929059622254 -640000,141.35532651775128 -650000,153.1351529373031 -660000,116.94865733053996 -670000,115.71001350678102 -680000,149.67775770857207 -690000,152.67276099074812 -700000,144.54287640463966 -710000,121.46403604304537 -720000,118.91044368746523 -730000,132.9828342386492 -740000,138.79238593916818 -750000,131.5634665206548 -760000,129.50029277324663 -770000,121.19795663600523 -780000,120.19857844921667 -790000,129.380355747817 -800000,135.6197216946579 -810000,150.59784042870487 -820000,143.39114778625583 -830000,150.74445580082767 -840000,143.05680765675112 -850000,78.09131725356102 -860000,132.2925530476843 -870000,104.79823491430662 -880000,118.36370280771527 -890000,129.96185279676246 -900000,106.02722467926237 -910000,131.276197554587 -920000,114.72416294557597 -930000,151.1096398407897 -940000,140.14710167867688 -950000,132.60972201787396 -960000,143.7590907315972 -970000,114.81897631495724 -980000,153.9566124491435 -990000,132.95776876555874 -1000000,139.78758521347993 +10000,54.131969458331966 +20000,59.31294081401785 +30000,62.067744110027775 +40000,77.8093927601448 +50000,90.63781913675979 +60000,108.27854312712282 +70000,71.25895295494698 +80000,109.72551055773104 +90000,110.25341220023286 +100000,131.14579863881 +110000,88.10274159662141 +120000,104.28876727082668 +130000,86.26763794177994 +140000,114.33122774908392 +150000,110.8619547442473 +160000,126.95386725137078 +170000,123.66705740333434 +180000,114.54989928188766 +190000,117.92603624152238 +200000,111.9427946560616 +210000,95.56571832250127 +220000,110.34301996611718 +230000,139.0317716304152 +240000,104.46003962205111 +250000,116.57511076148543 +260000,143.54886806694702 +270000,116.30184451594668 +280000,156.6815247675464 +290000,118.22267838636046 +300000,125.05888926948566 +310000,144.8182803021944 +320000,145.54047722298722 +330000,120.38804013104277 +340000,118.12244079050087 +350000,145.06432504585922 +360000,130.82211600337016 +370000,119.18165750657718 +380000,114.87825961200636 +390000,109.2814325520751 +400000,129.97590469950677 +410000,143.04527699035657 +420000,103.44379690061551 +430000,96.70982465970471 +440000,117.21336520551984 +450000,152.3009038322204 +460000,144.5919048880989 +470000,123.91272291776849 +480000,110.34385197330778 +490000,124.08823313691214 +500000,144.4816798329933 +510000,112.47012456876291 +520000,113.25948420470156 +530000,139.20653113159628 +540000,106.05309503523661 +550000,123.60896775645635 +560000,120.95883900830226 +570000,136.0081502390858 +580000,92.1979440186361 +590000,108.17173790160305 +600000,106.54646031411532 +610000,131.94487926915804 +620000,126.3774354301801 +630000,128.11230299722448 +640000,132.81537307591356 +650000,139.82019711287882 +660000,104.4161194808888 +670000,113.01687364720362 +680000,141.5896128309651 +690000,142.00796188290437 +700000,142.54669997740982 +710000,119.34676896742401 +720000,117.61562512939345 +730000,123.22609942663614 +740000,131.8716067726077 +750000,122.0695775760187 +760000,122.94407650868034 +770000,114.73766042299917 +780000,119.39299059119375 +790000,129.98756603308726 +800000,135.00424432432683 +810000,148.37780477132313 +820000,136.92586298950022 +830000,141.93671986162335 +840000,149.99493718743537 +850000,61.18663559654938 +860000,135.429302277309 +870000,106.21353298902287 +880000,118.32387433999187 +890000,126.01429695078396 +900000,107.04204883540584 +910000,130.3815191707833 +920000,121.39315234460814 +930000,155.8705429370229 +940000,146.07642199278544 +950000,133.5629873279963 +960000,155.94699538750044 +970000,117.45998543366109 +980000,156.90456664328298 +990000,129.28720914503467 +1000000,145.62634443479575 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/ep_reward.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/ep_reward.log index a56546631..95ad04786 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/ep_reward.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/ep_reward.log @@ -1,101 +1,101 @@ step,stat_eval/ep_reward -10000,0.20419941903380953 -20000,0.2438129992596007 -30000,0.29314520707959735 -40000,0.3345349651602997 -50000,0.2940191290705651 -60000,0.3204909611639185 -70000,0.24461809051035757 -80000,0.4388028113756737 -90000,0.3765918565625844 -100000,0.47301355103555387 -110000,0.36263339169873465 -120000,0.4949013539220218 -130000,0.41537007652634167 -140000,0.5193586581112644 -150000,0.49635532102962887 -160000,0.5201724566835753 -170000,0.5190082283730245 -180000,0.45324952454646084 -190000,0.5342563235224027 -200000,0.5041186685051037 -210000,0.3958739270500982 -220000,0.4723014975823081 -230000,0.5915892450769014 -240000,0.4239852468925995 -250000,0.4664541689098208 -260000,0.5850807705551717 -270000,0.3827598281559893 -280000,0.6392180864565975 -290000,0.489323608699065 -300000,0.5063897673633254 -310000,0.5645717988295895 -320000,0.5739834707142857 -330000,0.4712971268771602 -340000,0.5612407130871622 -350000,0.5626205188341153 -360000,0.5342220351159279 -370000,0.4673810059992019 -380000,0.4467512043622218 -390000,0.43659823847376344 -400000,0.48612355571820676 -410000,0.4973580959954589 -420000,0.4133067694182647 -430000,0.3888062622721516 -440000,0.485662190509642 -450000,0.59171869614807 -460000,0.5640970431440664 -470000,0.4341896695413623 -480000,0.42160865899051814 -490000,0.5115300673302563 -500000,0.5787544218359044 -510000,0.5002359713603857 -520000,0.44832834893081264 -530000,0.5292192346165423 -540000,0.4763555253655758 -550000,0.5016503646936676 -560000,0.5520220976918094 -570000,0.5617647643134942 -580000,0.43739408907521815 -590000,0.45712819215031875 -600000,0.5821619483004682 -610000,0.6130548691907716 -620000,0.5520850103881694 -630000,0.543571624849538 -640000,0.5654213112325498 -650000,0.6125406117492123 -660000,0.46779475601431386 -670000,0.46284086989846784 -680000,0.5987110308342882 -690000,0.6106912964222083 -700000,0.5781715056185586 -710000,0.4858567359156411 -720000,0.4756417867891131 -730000,0.5319314747534956 -740000,0.5551695437566727 -750000,0.5262538663381584 -760000,0.5180011797422914 -770000,0.484795489336471 -780000,0.48079434270398647 -790000,0.5175224221692286 -800000,0.5424788867786317 -810000,0.6023913617148194 -820000,0.5735646301833519 -830000,0.6029778232033106 -840000,0.5722272306270045 -850000,0.3123653774533971 -860000,0.5291702121907372 -870000,0.41919300375174046 -880000,0.4734553655551199 -890000,0.5198476191403969 -900000,0.4241088999232067 -910000,0.5251047952797938 -920000,0.45889669083934737 -930000,0.6044385593631587 -940000,0.5605884067147076 -950000,0.5304388888355427 -960000,0.5750363648703176 -970000,0.4592759132483318 -980000,0.6158264497965742 -990000,0.5318312227750153 -1000000,0.5591503408539197 +10000,0.21652791525492843 +20000,0.23725176325638922 +30000,0.24827097678332474 +40000,0.31123757104057925 +50000,0.36255127787384445 +60000,0.43311417250849127 +70000,0.28503581370297526 +80000,0.4389020422309241 +90000,0.4410137599873206 +100000,0.5245831945552399 +110000,0.3524114875276278 +120000,0.4171550690878532 +130000,0.34507321686767123 +140000,0.45732491099633565 +150000,0.44344781981222325 +160000,0.5078154690054831 +170000,0.4946682303525957 +180000,0.458199604380924 +190000,0.4717041449669136 +200000,0.44777590249447596 +210000,0.38226749105721813 +220000,0.4413720841821859 +230000,0.5561270865216608 +240000,0.41784015849017997 +250000,0.46630046302601835 +260000,0.5741954722677881 +270000,0.4652118917289341 +280000,0.6267260990701857 +290000,0.47289071394107784 +300000,0.5002355570795871 +310000,0.5792731212087777 +320000,0.582161908891949 +330000,0.4815524478661735 +340000,0.4724898762426736 +350000,0.580257300183437 +360000,0.5232884640134846 +370000,0.47672781270593295 +380000,0.45951306722521645 +390000,0.43712647417890665 +400000,0.519903620628619 +410000,0.5721811079614261 +420000,0.41377519668644336 +430000,0.386841100519456 +440000,0.46885350985979024 +450000,0.6092036153288816 +460000,0.578367619602161 +470000,0.49565111520607863 +480000,0.44137540790086865 +490000,0.4963529327851123 +500000,0.5779267193319731 +510000,0.44998912779256883 +520000,0.4530391212949971 +530000,0.5568261245263851 +540000,0.4242125128007168 +550000,0.49443587104509507 +560000,0.4838353585768392 +570000,0.5440326009563432 +580000,0.3687922316184538 +590000,0.43270608526815674 +600000,0.4261858412565761 +610000,0.5277795186197961 +620000,0.5055097417837509 +630000,0.5124492239537756 +640000,0.5312614923036542 +650000,0.5592807884515153 +660000,0.4176645106677427 +670000,0.45206752847724496 +680000,0.5663584513238604 +690000,0.5680318484993319 +700000,0.5701867999096393 +710000,0.47738751765073584 +720000,0.4704625007176249 +730000,0.4929045568527391 +740000,0.5274864270904308 +750000,0.48827831036380226 +760000,0.4917763077438355 +770000,0.4589517323408788 +780000,0.4775730378022084 +790000,0.5199747465491267 +800000,0.5400169772973074 +810000,0.5935112190852924 +820000,0.5477034519583437 +830000,0.5677468794464934 +840000,0.5999797487497417 +850000,0.2447475114931351 +860000,0.541717209109236 +870000,0.42485413941237143 +880000,0.47329611938255073 +890000,0.504057209897152 +900000,0.4281681974192666 +910000,0.5215261255619602 +920000,0.48557272776437 +930000,0.6234821717480917 +940000,0.5843056879711417 +950000,0.5342520241924973 +960000,0.6237879815500017 +970000,0.4698399417373569 +980000,0.6276182665731319 +990000,0.5172002463921096 +1000000,0.5825053777391831 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/mse.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/mse.log index ebf150fe6..e0d38950b 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/mse.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/logs/stat_eval/mse.log @@ -1,101 +1,101 @@ step,stat_eval/mse -10000,415.41029529892575 -20000,313.05411884638295 -30000,337.0610460257782 -40000,237.28844801204158 -50000,308.8043178196814 -60000,328.14041762261024 -70000,227.50759850083614 -80000,172.08641978220075 -90000,230.28840135739893 -100000,290.1724119902373 -110000,345.4069116070745 -120000,244.38612697173977 -130000,149.55344470360552 -140000,261.27587845181944 -150000,222.8527908389172 -160000,298.9743469585129 -170000,218.1782913802142 -180000,224.45273399881336 -190000,216.77634395683657 -200000,257.9039867676725 -210000,214.16475838668285 -220000,178.98076304279294 -230000,263.9991202920975 -240000,236.12571683374617 -250000,245.48512362111956 -260000,247.5583744467248 -270000,183.18481622902019 -280000,220.02861312706804 -290000,169.17685462634705 -300000,289.954194572304 -310000,300.264297390096 -320000,310.9926237831924 -330000,204.06035284092576 -340000,297.23716912131687 -350000,339.9731729146743 -360000,227.10011559185597 -370000,184.39569261760658 -380000,193.75815024546864 -390000,272.4720704524531 -400000,253.00219077041135 -410000,245.80793104607915 -420000,146.486780386658 -430000,188.53884255379216 -440000,294.886758595994 -450000,219.46546683256193 -460000,111.38993641493862 -470000,239.66011353427447 -480000,162.00754304863136 -490000,322.4195116391703 -500000,264.94727374215694 -510000,138.4230978719673 -520000,128.8161472944808 -530000,233.39639722775422 -540000,203.81576840495745 -550000,280.1679827613471 -560000,119.41260216782317 -570000,293.42735852635167 -580000,312.0680873623512 -590000,265.5929062721463 -600000,287.90730640195903 -610000,254.82130467133317 -620000,223.53979637592116 -630000,219.91116358094996 -640000,216.7412923461988 -650000,306.2913204611244 -660000,195.96240275747127 -670000,233.7266794078311 -680000,305.1329319223843 -690000,136.53816965782198 -700000,253.02574539899197 -710000,216.3987332595982 -720000,186.48426235963953 -730000,148.66021208518788 -740000,268.4852524355427 -750000,237.08121773708734 -760000,253.0649043828239 -770000,152.16578413729172 -780000,214.6801465758071 -790000,212.70279443796386 -800000,305.09044817472426 -810000,206.3830441388007 -820000,144.02921559916737 -830000,232.88199027716124 -840000,232.41023070516968 -850000,131.1530077263561 -860000,365.1432361906625 -870000,192.10636591277753 -880000,209.6364164224231 -890000,155.93348916425808 -900000,245.6009765995782 -910000,242.70534705322916 -920000,73.44970268735531 -930000,202.82174589631865 -940000,265.43101208031806 -950000,272.6559814695675 -960000,149.72270240931462 -970000,236.41829022839698 -980000,187.686079930314 -990000,107.4131223857685 -1000000,360.18110144837635 +10000,391.2108552834895 +20000,308.6645123893604 +30000,384.15571509125357 +40000,314.59704775819165 +50000,275.45888116829747 +60000,335.2021406185029 +70000,307.645641463113 +80000,150.895187098296 +90000,215.21171765322674 +100000,273.36646127125266 +110000,348.2526880481523 +120000,280.1935418443809 +130000,168.7478405453462 +140000,290.59186455058443 +150000,250.35876902748382 +160000,300.4670141355797 +170000,224.63486165104814 +180000,216.23594218689172 +190000,229.46148591816956 +200000,231.59341959749958 +210000,209.02340500695527 +220000,186.55251019091693 +230000,269.48590140343686 +240000,225.66151220448424 +250000,236.25638129957915 +260000,249.87429990050055 +270000,232.92564249328098 +280000,226.08367442810376 +290000,159.9510215503518 +300000,291.9033634054971 +310000,293.63753429693867 +320000,311.0866301678424 +330000,207.28669312643757 +340000,219.50429970541376 +350000,339.10973603246805 +360000,232.53538360948355 +370000,177.41571137133042 +380000,190.52164335129132 +390000,276.16191262096066 +400000,244.09033346009323 +410000,253.29634514207 +420000,144.51006509049256 +430000,184.49211744959402 +440000,218.73851585373967 +450000,212.40970024876088 +460000,115.63944026472163 +470000,252.3102289765323 +480000,162.66501536955346 +490000,328.1601038945513 +500000,264.2161017116697 +510000,157.75198339765592 +520000,125.87035724057952 +530000,312.89345780854876 +540000,175.58985869057224 +550000,288.40143100505696 +560000,127.14185657675068 +570000,302.01996358716303 +580000,308.1922418402695 +590000,272.8557677530258 +600000,267.9067169328422 +610000,240.69447460682113 +620000,243.05819721105314 +630000,225.25571110274396 +640000,272.4854768455003 +650000,329.7169614404885 +660000,214.98678089403066 +670000,239.1140547433381 +680000,316.8572074820209 +690000,150.1227770602069 +700000,254.29566796382264 +710000,214.50907415925855 +720000,191.37289426810094 +730000,159.0964704430345 +740000,283.8808059133965 +750000,258.9055180799934 +760000,266.90599573304667 +770000,159.34524219982123 +780000,215.952438692865 +790000,214.4017498671861 +800000,322.2220250030241 +810000,209.59480380103724 +820000,154.9704567712309 +830000,243.71939414356729 +840000,221.4243454591611 +850000,117.00987452483864 +860000,366.4111765530357 +870000,192.71906840486554 +880000,210.66518862525328 +890000,157.5502580687592 +900000,247.68321947163273 +910000,252.26862904990827 +920000,65.10624754972847 +930000,187.26033129034306 +940000,267.0787889929178 +950000,279.3780162401444 +960000,222.94407105135923 +970000,239.59638517343745 +980000,194.06280886250448 +990000,115.61430439767294 +1000000,360.86897841243206 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/model_best.pt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/model_best.pt index 6b4a5d61d..e8e18d589 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/model_best.pt and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/model_best.pt differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/model_latest.pt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/model_latest.pt index 9cc36447f..4e60336f3 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/model_latest.pt and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/model_latest.pt differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-loss-approx_kl.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-loss-approx_kl.jpg index dec0a19d8..6b6470c92 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-loss-approx_kl.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-loss-approx_kl.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-loss-entropy_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-loss-entropy_loss.jpg index 3f896334b..44988a0c5 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-loss-entropy_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-loss-entropy_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-loss-policy_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-loss-policy_loss.jpg index f002f66f0..037590ea5 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-loss-policy_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-loss-policy_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-loss-value_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-loss-value_loss.jpg index 171bcae54..300b01849 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-loss-value_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-loss-value_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-constraint_violation.jpg index fad190d7e..8d10fb55c 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-ep_constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-ep_constraint_violation.jpg index 52f31365a..641e1497e 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-ep_constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-ep_constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-ep_length.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-ep_length.jpg index 8b594cf81..028aa5104 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-ep_length.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-ep_length.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-ep_return.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-ep_return.jpg index 981e8aa2a..e37d5dc0d 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-ep_return.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-ep_return.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-ep_reward.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-ep_reward.jpg index fa83a6498..13886a71b 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-ep_reward.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat-ep_reward.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-constraint_violation.jpg index 1dd1ec4da..37104e206 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-ep_length.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-ep_length.jpg index 57df2dcc3..68ad04edd 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-ep_length.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-ep_length.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-ep_return.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-ep_return.jpg index fcd22acbf..5d38feeb3 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-ep_return.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-ep_return.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-ep_reward.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-ep_reward.jpg index 1e7a5a76b..117cad812 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-ep_reward.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-ep_reward.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-mse.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-mse.jpg index ab887d205..616385015 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-mse.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/plots/-stat_eval-mse.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/std_out.txt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/std_out.txt index b7780004a..acb44efbd 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/std_out.txt +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/std_out.txt @@ -1,2601 +1,2601 @@ -2023-10-19 14:51:55,843 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 51.050 +/- 26.920 -2023-10-19 14:51:55,860 : +2023-10-27 16:43:25,838 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 54.132 +/- 28.897 +2023-10-27 16:43:25,853 : -------------------------------------- | loss/ | | -| approx_kl | 0.0287 | -| entropy_loss | -3.76 | -| policy_loss | -0.00494 | -| value_loss | 13.1 | +| approx_kl | 0.0296 | +| entropy_loss | -3.69 | +| policy_loss | -4.7e-05 | +| value_loss | 28.1 | | stat/ | | -| constraint_violation | 12 | +| constraint_violation | 11 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 59.7 | -| ep_reward | 0.239 | +| ep_return | 50.9 | +| ep_reward | 0.204 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 51 | -| ep_reward | 0.204 | -| mse | 415 | +| ep_return | 54.1 | +| ep_reward | 0.217 | +| mse | 391 | | time/ | | | progress | 0.01 | | step | 1e+04 | -| step_time | 11.1 | +| step_time | 11.8 | -------------------------------------- -2023-10-19 14:54:05,069 : Eval | ep_lengths 201.90 +/- 96.23 | ep_return 60.941 +/- 37.292 -2023-10-19 14:54:05,098 : +2023-10-27 16:45:47,223 : Eval | ep_lengths 200.50 +/- 99.00 | ep_return 59.313 +/- 35.831 +2023-10-27 16:45:47,231 : -------------------------------------- | loss/ | | -| approx_kl | 0.0268 | -| entropy_loss | -3.73 | -| policy_loss | -0.0129 | -| value_loss | 8.79 | +| approx_kl | 0.0227 | +| entropy_loss | -3.65 | +| policy_loss | -0.0152 | +| value_loss | 4.6 | | stat/ | | -| constraint_violation | 22 | +| constraint_violation | 20 | | ep_constraint_vio... | 0.3 | -| ep_length | 177 | -| ep_return | 72.6 | -| ep_reward | 0.295 | +| ep_length | 176 | +| ep_return | 68.8 | +| ep_reward | 0.285 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 202 | -| ep_return | 60.9 | -| ep_reward | 0.244 | -| mse | 313 | +| ep_length | 200 | +| ep_return | 59.3 | +| ep_reward | 0.237 | +| mse | 309 | | time/ | | | progress | 0.02 | | step | 2e+04 | -| step_time | 11.1 | +| step_time | 12 | -------------------------------------- -2023-10-19 14:56:15,773 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 73.286 +/- 29.888 -2023-10-19 14:56:15,781 : +2023-10-27 16:48:10,134 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 62.068 +/- 31.973 +2023-10-27 16:48:10,155 : -------------------------------------- | loss/ | | -| approx_kl | 0.0228 | -| entropy_loss | -3.73 | -| policy_loss | -0.0182 | -| value_loss | 9.13 | +| approx_kl | 0.0166 | +| entropy_loss | -3.65 | +| policy_loss | -0.0193 | +| value_loss | 6.42 | | stat/ | | | constraint_violation | 33 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 96 | -| ep_reward | 0.385 | +| ep_constraint_vio... | 0.2 | +| ep_length | 200 | +| ep_return | 77.3 | +| ep_reward | 0.317 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 73.3 | -| ep_reward | 0.293 | -| mse | 337 | +| ep_return | 62.1 | +| ep_reward | 0.248 | +| mse | 384 | | time/ | | | progress | 0.03 | | step | 3e+04 | -| step_time | 10.6 | +| step_time | 11.6 | -------------------------------------- -2023-10-19 14:58:23,785 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 83.634 +/- 32.813 -2023-10-19 14:58:23,794 : +2023-10-27 16:50:34,104 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 77.809 +/- 13.228 +2023-10-27 16:50:34,113 : -------------------------------------- | loss/ | | -| approx_kl | 0.03 | -| entropy_loss | -3.71 | -| policy_loss | -0.0119 | -| value_loss | 8.39 | +| approx_kl | 0.0224 | +| entropy_loss | -3.61 | +| policy_loss | -0.0129 | +| value_loss | 6.98 | | stat/ | | | constraint_violation | 37 | | ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 99.9 | -| ep_reward | 0.403 | +| ep_length | 200 | +| ep_return | 82.9 | +| ep_reward | 0.336 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 83.6 | -| ep_reward | 0.335 | -| mse | 237 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 77.8 | +| ep_reward | 0.311 | +| mse | 315 | | time/ | | | progress | 0.04 | | step | 4e+04 | -| step_time | 10.6 | +| step_time | 11.7 | -------------------------------------- -2023-10-19 15:00:30,384 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 73.505 +/- 37.847 -2023-10-19 15:00:30,386 : +2023-10-27 16:52:52,898 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 90.638 +/- 38.018 +2023-10-27 16:52:52,906 : -------------------------------------- | loss/ | | -| approx_kl | 0.0268 | -| entropy_loss | -3.67 | -| policy_loss | -0.0143 | -| value_loss | 8.81 | +| approx_kl | 0.0344 | +| entropy_loss | -3.62 | +| policy_loss | -0.0147 | +| value_loss | 5.11 | | stat/ | | | constraint_violation | 45 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 95.4 | -| ep_reward | 0.381 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 99.2 | +| ep_reward | 0.398 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 73.5 | -| ep_reward | 0.294 | -| mse | 309 | +| ep_return | 90.6 | +| ep_reward | 0.363 | +| mse | 275 | | time/ | | | progress | 0.05 | | step | 5e+04 | -| step_time | 10.2 | +| step_time | 11 | -------------------------------------- -2023-10-19 15:02:41,559 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 80.123 +/- 19.766 -2023-10-19 15:02:41,561 : +2023-10-27 16:55:11,823 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 108.279 +/- 27.548 +2023-10-27 16:55:11,844 : -------------------------------------- | loss/ | | -| approx_kl | 0.013 | -| entropy_loss | -3.69 | -| policy_loss | -0.0149 | -| value_loss | 4.92 | +| approx_kl | 0.0164 | +| entropy_loss | -3.67 | +| policy_loss | -0.0192 | +| value_loss | 4.54 | | stat/ | | | constraint_violation | 51 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 106 | -| ep_reward | 0.425 | +| ep_constraint_vio... | 0.3 | +| ep_length | 175 | +| ep_return | 81.4 | +| ep_reward | 0.344 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 80.1 | -| ep_reward | 0.32 | -| mse | 328 | +| ep_return | 108 | +| ep_reward | 0.433 | +| mse | 335 | | time/ | | | progress | 0.06 | | step | 6e+04 | -| step_time | 10.6 | +| step_time | 11.2 | -------------------------------------- -2023-10-19 15:04:39,675 : Eval | ep_lengths 150.70 +/- 121.62 | ep_return 61.155 +/- 55.022 -2023-10-19 15:04:39,676 : +2023-10-27 16:57:22,705 : Eval | ep_lengths 175.70 +/- 113.50 | ep_return 71.259 +/- 49.911 +2023-10-27 16:57:22,718 : -------------------------------------- | loss/ | | -| approx_kl | 0.0324 | -| entropy_loss | -3.66 | -| policy_loss | -0.0083 | -| value_loss | 4.79 | +| approx_kl | 0.0245 | +| entropy_loss | -3.68 | +| policy_loss | -0.0144 | +| value_loss | 6.5 | | stat/ | | -| constraint_violation | 54 | -| ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 98.3 | -| ep_reward | 0.398 | +| constraint_violation | 52 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 133 | +| ep_reward | 0.531 | | stat_eval/ | | -| constraint_violation | 0.4 | -| ep_length | 151 | -| ep_return | 61.2 | -| ep_reward | 0.245 | -| mse | 228 | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 71.3 | +| ep_reward | 0.285 | +| mse | 308 | | time/ | | | progress | 0.07 | | step | 7e+04 | -| step_time | 10.3 | +| step_time | 11.3 | -------------------------------------- -2023-10-19 15:06:44,498 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 109.701 +/- 40.986 -2023-10-19 15:06:44,508 : +2023-10-27 16:59:41,012 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 109.726 +/- 39.504 +2023-10-27 16:59:41,020 : -------------------------------------- | loss/ | | -| approx_kl | 0.0239 | -| entropy_loss | -3.68 | -| policy_loss | -0.00966 | -| value_loss | 4.81 | +| approx_kl | 0.0297 | +| entropy_loss | -3.6 | +| policy_loss | -0.00822 | +| value_loss | 3.37 | | stat/ | | -| constraint_violation | 61 | -| ep_constraint_vio... | 0.4 | -| ep_length | 151 | -| ep_return | 78.8 | -| ep_reward | 0.336 | +| constraint_violation | 56 | +| ep_constraint_vio... | 0.2 | +| ep_length | 200 | +| ep_return | 91.7 | +| ep_reward | 0.376 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | | ep_return | 110 | | ep_reward | 0.439 | -| mse | 172 | +| mse | 151 | | time/ | | | progress | 0.08 | | step | 8e+04 | -| step_time | 10.2 | +| step_time | 11.3 | -------------------------------------- -2023-10-19 15:08:47,831 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 94.148 +/- 34.957 -2023-10-19 15:08:47,833 : +2023-10-27 17:01:55,514 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 110.253 +/- 40.186 +2023-10-27 17:01:55,534 : -------------------------------------- | loss/ | | -| approx_kl | 0.0303 | -| entropy_loss | -3.61 | -| policy_loss | 0.000754 | -| value_loss | 2.59 | +| approx_kl | 0.0225 | +| entropy_loss | -3.62 | +| policy_loss | 0.000141 | +| value_loss | 1.87 | | stat/ | | -| constraint_violation | 65 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 118 | -| ep_reward | 0.473 | +| constraint_violation | 63 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 120 | +| ep_reward | 0.513 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 94.1 | -| ep_reward | 0.377 | -| mse | 230 | +| ep_return | 110 | +| ep_reward | 0.441 | +| mse | 215 | | time/ | | | progress | 0.09 | | step | 9e+04 | -| step_time | 10.5 | +| step_time | 11.4 | -------------------------------------- -2023-10-19 15:10:53,295 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 118.253 +/- 25.515 -2023-10-19 15:10:53,305 : +2023-10-27 17:04:11,565 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 131.146 +/- 29.168 +2023-10-27 17:04:11,573 : -------------------------------------- | loss/ | | -| approx_kl | 0.0249 | -| entropy_loss | -3.62 | -| policy_loss | -0.0135 | -| value_loss | 5.29 | +| approx_kl | 0.0318 | +| entropy_loss | -3.58 | +| policy_loss | -0.0153 | +| value_loss | 2.23 | | stat/ | | -| constraint_violation | 74 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 92.7 | -| ep_reward | 0.403 | +| constraint_violation | 71 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 105 | +| ep_reward | 0.42 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 118 | -| ep_reward | 0.473 | -| mse | 290 | +| ep_return | 131 | +| ep_reward | 0.525 | +| mse | 273 | | time/ | | | progress | 0.1 | | step | 1e+05 | -| step_time | 10.1 | +| step_time | 11 | -------------------------------------- -2023-10-19 15:12:54,216 : Eval | ep_lengths 201.30 +/- 97.42 | ep_return 90.658 +/- 47.574 -2023-10-19 15:12:54,218 : +2023-10-27 17:06:22,486 : Eval | ep_lengths 201.30 +/- 97.42 | ep_return 88.103 +/- 45.500 +2023-10-27 17:06:22,488 : -------------------------------------- | loss/ | | -| approx_kl | 0.0157 | -| entropy_loss | -3.65 | -| policy_loss | -0.0163 | -| value_loss | 3.35 | +| approx_kl | 0.035 | +| entropy_loss | -3.58 | +| policy_loss | -0.0133 | +| value_loss | 4.97 | | stat/ | | -| constraint_violation | 81 | +| constraint_violation | 76 | | ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 106 | -| ep_reward | 0.427 | +| ep_length | 203 | +| ep_return | 110 | +| ep_reward | 0.441 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 90.7 | -| ep_reward | 0.363 | -| mse | 345 | +| ep_return | 88.1 | +| ep_reward | 0.352 | +| mse | 348 | | time/ | | | progress | 0.11 | | step | 1.1e+05 | -| step_time | 10.5 | +| step_time | 11 | -------------------------------------- -2023-10-19 15:14:55,581 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 123.725 +/- 49.125 -2023-10-19 15:14:55,591 : +2023-10-27 17:08:36,395 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 104.289 +/- 46.207 +2023-10-27 17:08:36,396 : -------------------------------------- | loss/ | | -| approx_kl | 0.0302 | -| entropy_loss | -3.68 | -| policy_loss | -0.0143 | -| value_loss | 1.54 | +| approx_kl | 0.0298 | +| entropy_loss | -3.59 | +| policy_loss | -0.013 | +| value_loss | 2.96 | | stat/ | | -| constraint_violation | 90 | +| constraint_violation | 80 | | ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 102 | -| ep_reward | 0.412 | +| ep_length | 202 | +| ep_return | 94.8 | +| ep_reward | 0.382 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 124 | -| ep_reward | 0.495 | -| mse | 244 | +| ep_return | 104 | +| ep_reward | 0.417 | +| mse | 280 | | time/ | | | progress | 0.12 | | step | 1.2e+05 | -| step_time | 9.82 | +| step_time | 11.3 | -------------------------------------- -2023-10-19 15:16:51,906 : Eval | ep_lengths 176.50 +/- 112.28 | ep_return 103.843 +/- 69.538 -2023-10-19 15:16:51,930 : +2023-10-27 17:10:45,675 : Eval | ep_lengths 176.40 +/- 112.43 | ep_return 86.268 +/- 59.223 +2023-10-27 17:10:45,689 : -------------------------------------- | loss/ | | -| approx_kl | 0.0403 | -| entropy_loss | -3.69 | -| policy_loss | -0.0133 | -| value_loss | 1.71 | +| approx_kl | 0.0186 | +| entropy_loss | -3.55 | +| policy_loss | -0.0192 | +| value_loss | 3 | | stat/ | | -| constraint_violation | 96 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 124 | -| ep_reward | 0.507 | +| constraint_violation | 88 | +| ep_constraint_vio... | 0.2 | +| ep_length | 200 | +| ep_return | 80.4 | +| ep_reward | 0.366 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 176 | -| ep_return | 104 | -| ep_reward | 0.415 | -| mse | 150 | +| ep_return | 86.3 | +| ep_reward | 0.345 | +| mse | 169 | | time/ | | | progress | 0.13 | | step | 1.3e+05 | -| step_time | 10 | +| step_time | 11.1 | -------------------------------------- -2023-10-19 15:18:51,680 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 129.840 +/- 17.785 -2023-10-19 15:18:51,693 : +2023-10-27 17:13:01,312 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 114.331 +/- 20.134 +2023-10-27 17:13:01,314 : -------------------------------------- | loss/ | | -| approx_kl | 0.0293 | -| entropy_loss | -3.67 | -| policy_loss | -0.0115 | -| value_loss | 1.75 | +| approx_kl | 0.0305 | +| entropy_loss | -3.53 | +| policy_loss | -0.0128 | +| value_loss | 1.97 | | stat/ | | -| constraint_violation | 106 | -| ep_constraint_vio... | 0.3 | -| ep_length | 176 | -| ep_return | 103 | -| ep_reward | 0.419 | +| constraint_violation | 94 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 88.9 | +| ep_reward | 0.36 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 130 | -| ep_reward | 0.519 | -| mse | 261 | +| ep_return | 114 | +| ep_reward | 0.457 | +| mse | 291 | | time/ | | | progress | 0.14 | | step | 1.4e+05 | -| step_time | 9.94 | +| step_time | 10.9 | -------------------------------------- -2023-10-19 15:20:51,595 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 124.089 +/- 44.123 -2023-10-19 15:20:51,597 : +2023-10-27 17:15:13,064 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 110.862 +/- 43.531 +2023-10-27 17:15:13,076 : -------------------------------------- | loss/ | | -| approx_kl | 0.0246 | -| entropy_loss | -3.63 | -| policy_loss | -0.0094 | -| value_loss | 1.71 | +| approx_kl | 0.0364 | +| entropy_loss | -3.54 | +| policy_loss | -0.00447 | +| value_loss | 1.54 | | stat/ | | -| constraint_violation | 112 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 104 | -| ep_reward | 0.426 | +| constraint_violation | 101 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 122 | +| ep_reward | 0.49 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 124 | -| ep_reward | 0.496 | -| mse | 223 | +| ep_return | 111 | +| ep_reward | 0.443 | +| mse | 250 | | time/ | | | progress | 0.15 | | step | 1.5e+05 | -| step_time | 10.1 | +| step_time | 11 | -------------------------------------- -2023-10-19 15:22:53,972 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 130.043 +/- 6.656 -2023-10-19 15:22:53,981 : +2023-10-27 17:17:28,166 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 126.954 +/- 11.423 +2023-10-27 17:17:28,167 : -------------------------------------- | loss/ | | -| approx_kl | 0.0274 | -| entropy_loss | -3.62 | -| policy_loss | -0.00616 | -| value_loss | 1.36 | +| approx_kl | 0.0355 | +| entropy_loss | -3.54 | +| policy_loss | -0.0107 | +| value_loss | 1.83 | | stat/ | | -| constraint_violation | 125 | +| constraint_violation | 113 | | ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 108 | -| ep_reward | 0.434 | +| ep_length | 200 | +| ep_return | 90.2 | +| ep_reward | 0.361 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 130 | -| ep_reward | 0.52 | -| mse | 299 | +| ep_return | 127 | +| ep_reward | 0.508 | +| mse | 300 | | time/ | | | progress | 0.16 | | step | 1.6e+05 | -| step_time | 9.84 | +| step_time | 10.7 | -------------------------------------- -2023-10-19 15:24:52,233 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 129.752 +/- 51.394 -2023-10-19 15:24:52,235 : +2023-10-27 17:19:39,582 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 123.667 +/- 47.910 +2023-10-27 17:19:39,584 : -------------------------------------- | loss/ | | -| approx_kl | 0.0229 | -| entropy_loss | -3.58 | -| policy_loss | -0.0119 | -| value_loss | 0.85 | +| approx_kl | 0.0265 | +| entropy_loss | -3.5 | +| policy_loss | 0.00571 | +| value_loss | 2.03 | | stat/ | | -| constraint_violation | 131 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 133 | -| ep_reward | 0.53 | +| constraint_violation | 121 | +| ep_constraint_vio... | 0.2 | +| ep_length | 202 | +| ep_return | 107 | +| ep_reward | 0.431 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 130 | -| ep_reward | 0.519 | -| mse | 218 | +| ep_return | 124 | +| ep_reward | 0.495 | +| mse | 225 | | time/ | | | progress | 0.17 | | step | 1.7e+05 | -| step_time | 10.1 | +| step_time | 11 | -------------------------------------- -2023-10-19 15:26:46,831 : Eval | ep_lengths 200.90 +/- 98.20 | ep_return 113.312 +/- 59.555 -2023-10-19 15:26:46,833 : +2023-10-27 17:21:49,201 : Eval | ep_lengths 200.90 +/- 98.20 | ep_return 114.550 +/- 60.121 +2023-10-27 17:21:49,202 : -------------------------------------- | loss/ | | -| approx_kl | 0.0313 | -| entropy_loss | -3.53 | -| policy_loss | -0.00692 | -| value_loss | 1.43 | +| approx_kl | 0.0327 | +| entropy_loss | -3.48 | +| policy_loss | -0.0161 | +| value_loss | 1.16 | | stat/ | | -| constraint_violation | 137 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 135 | -| ep_reward | 0.544 | +| constraint_violation | 127 | +| ep_constraint_vio... | 0.3 | +| ep_length | 177 | +| ep_return | 98 | +| ep_reward | 0.395 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 113 | -| ep_reward | 0.453 | -| mse | 224 | +| ep_return | 115 | +| ep_reward | 0.458 | +| mse | 216 | | time/ | | | progress | 0.18 | | step | 1.8e+05 | -| step_time | 9.51 | +| step_time | 10.9 | -------------------------------------- -2023-10-19 15:28:39,411 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 133.564 +/- 47.999 -2023-10-19 15:28:39,440 : +2023-10-27 17:24:00,651 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 117.926 +/- 41.803 +2023-10-27 17:24:00,653 : -------------------------------------- | loss/ | | -| approx_kl | 0.0237 | -| entropy_loss | -3.51 | -| policy_loss | -0.0131 | -| value_loss | 1.57 | +| approx_kl | 0.0309 | +| entropy_loss | -3.54 | +| policy_loss | -0.0176 | +| value_loss | 0.988 | | stat/ | | -| constraint_violation | 143 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 160 | -| ep_reward | 0.64 | +| constraint_violation | 133 | +| ep_constraint_vio... | 0.2 | +| ep_length | 202 | +| ep_return | 115 | +| ep_reward | 0.467 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 134 | -| ep_reward | 0.534 | -| mse | 217 | +| ep_return | 118 | +| ep_reward | 0.472 | +| mse | 229 | | time/ | | | progress | 0.19 | | step | 1.9e+05 | -| step_time | 9.42 | +| step_time | 10.8 | -------------------------------------- -2023-10-19 15:30:33,843 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 126.029 +/- 47.342 -2023-10-19 15:30:33,845 : +2023-10-27 17:26:09,591 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 111.943 +/- 59.706 +2023-10-27 17:26:09,592 : -------------------------------------- | loss/ | | -| approx_kl | 0.0242 | -| entropy_loss | -3.49 | -| policy_loss | -0.0148 | -| value_loss | 1.65 | +| approx_kl | 0.0394 | +| entropy_loss | -3.56 | +| policy_loss | -0.00681 | +| value_loss | 1.24 | | stat/ | | -| constraint_violation | 153 | +| constraint_violation | 140 | | ep_constraint_vio... | 0.3 | | ep_length | 176 | -| ep_return | 99.8 | -| ep_reward | 0.404 | +| ep_return | 88.8 | +| ep_reward | 0.383 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 126 | -| ep_reward | 0.504 | -| mse | 258 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 112 | +| ep_reward | 0.448 | +| mse | 232 | | time/ | | | progress | 0.2 | | step | 2e+05 | -| step_time | 9.54 | +| step_time | 11.2 | -------------------------------------- -2023-10-19 15:32:23,625 : Eval | ep_lengths 176.60 +/- 112.16 | ep_return 98.967 +/- 69.343 -2023-10-19 15:32:23,626 : +2023-10-27 17:28:16,301 : Eval | ep_lengths 176.60 +/- 112.16 | ep_return 95.566 +/- 65.598 +2023-10-27 17:28:16,303 : -------------------------------------- | loss/ | | -| approx_kl | 0.0343 | -| entropy_loss | -3.51 | -| policy_loss | -0.0129 | -| value_loss | 3.68 | +| approx_kl | 0.0337 | +| entropy_loss | -3.57 | +| policy_loss | 0.00447 | +| value_loss | 3.17 | | stat/ | | -| constraint_violation | 163 | -| ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 115 | -| ep_reward | 0.472 | +| constraint_violation | 151 | +| ep_constraint_vio... | 0.5 | +| ep_length | 130 | +| ep_return | 70.4 | +| ep_reward | 0.304 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 177 | -| ep_return | 99 | -| ep_reward | 0.396 | -| mse | 214 | +| ep_return | 95.6 | +| ep_reward | 0.382 | +| mse | 209 | | time/ | | | progress | 0.21 | | step | 2.1e+05 | -| step_time | 9 | +| step_time | 11.1 | -------------------------------------- -2023-10-19 15:34:11,707 : Eval | ep_lengths 201.40 +/- 97.24 | ep_return 118.075 +/- 60.468 -2023-10-19 15:34:11,708 : +2023-10-27 17:30:25,261 : Eval | ep_lengths 201.60 +/- 96.83 | ep_return 110.343 +/- 57.483 +2023-10-27 17:30:25,262 : -------------------------------------- | loss/ | | -| approx_kl | 0.0196 | -| entropy_loss | -3.51 | -| policy_loss | -0.0155 | -| value_loss | 1.35 | +| approx_kl | 0.0261 | +| entropy_loss | -3.57 | +| policy_loss | -0.0004 | +| value_loss | 1.28 | | stat/ | | -| constraint_violation | 169 | +| constraint_violation | 158 | | ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 142 | -| ep_reward | 0.571 | +| ep_length | 225 | +| ep_return | 127 | +| ep_reward | 0.511 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 118 | -| ep_reward | 0.472 | -| mse | 179 | +| ep_length | 202 | +| ep_return | 110 | +| ep_reward | 0.441 | +| mse | 187 | | time/ | | | progress | 0.22 | | step | 2.2e+05 | -| step_time | 9.1 | +| step_time | 10.5 | -------------------------------------- -2023-10-19 15:36:05,136 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 147.897 +/- 21.826 -2023-10-19 15:36:05,145 : +2023-10-27 17:32:38,959 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 139.032 +/- 22.952 +2023-10-27 17:32:38,968 : -------------------------------------- | loss/ | | -| approx_kl | 0.0281 | -| entropy_loss | -3.5 | -| policy_loss | -0.00239 | -| value_loss | 1.28 | +| approx_kl | 0.026 | +| entropy_loss | -3.6 | +| policy_loss | -0.00779 | +| value_loss | 1.25 | | stat/ | | -| constraint_violation | 173 | +| constraint_violation | 163 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 130 | -| ep_reward | 0.522 | +| ep_return | 135 | +| ep_reward | 0.54 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 148 | -| ep_reward | 0.592 | -| mse | 264 | +| ep_return | 139 | +| ep_reward | 0.556 | +| mse | 269 | | time/ | | | progress | 0.23 | | step | 2.3e+05 | -| step_time | 9.25 | +| step_time | 10.7 | -------------------------------------- -2023-10-19 15:37:54,639 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 105.996 +/- 57.517 -2023-10-19 15:37:54,661 : +2023-10-27 17:34:48,624 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 104.460 +/- 56.671 +2023-10-27 17:34:48,626 : -------------------------------------- | loss/ | | -| approx_kl | 0.0316 | -| entropy_loss | -3.48 | -| policy_loss | -0.00878 | -| value_loss | 0.5 | +| approx_kl | 0.0281 | +| entropy_loss | -3.62 | +| policy_loss | -0.0157 | +| value_loss | 1.53 | | stat/ | | -| constraint_violation | 176 | +| constraint_violation | 167 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 161 | -| ep_reward | 0.646 | +| ep_return | 144 | +| ep_reward | 0.574 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 200 | -| ep_return | 106 | -| ep_reward | 0.424 | -| mse | 236 | +| ep_return | 104 | +| ep_reward | 0.418 | +| mse | 226 | | time/ | | | progress | 0.24 | | step | 2.4e+05 | -| step_time | 9.19 | +| step_time | 11 | -------------------------------------- -2023-10-19 15:39:43,502 : Eval | ep_lengths 201.90 +/- 96.26 | ep_return 116.614 +/- 62.199 -2023-10-19 15:39:43,503 : +2023-10-27 17:36:57,661 : Eval | ep_lengths 201.90 +/- 96.26 | ep_return 116.575 +/- 61.598 +2023-10-27 17:36:57,662 : -------------------------------------- | loss/ | | -| approx_kl | 0.0393 | -| entropy_loss | -3.45 | -| policy_loss | -0.00971 | -| value_loss | 0.624 | +| approx_kl | 0.0264 | +| entropy_loss | -3.57 | +| policy_loss | -0.0189 | +| value_loss | 1.49 | | stat/ | | -| constraint_violation | 181 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 130 | -| ep_reward | 0.525 | +| constraint_violation | 171 | +| ep_constraint_vio... | 0.2 | +| ep_length | 200 | +| ep_return | 119 | +| ep_reward | 0.5 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | | ep_return | 117 | | ep_reward | 0.466 | -| mse | 245 | +| mse | 236 | | time/ | | | progress | 0.25 | | step | 2.5e+05 | -| step_time | 9.27 | +| step_time | 10.9 | -------------------------------------- -2023-10-19 15:41:36,447 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.270 +/- 21.544 -2023-10-19 15:41:36,448 : +2023-10-27 17:39:11,698 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 143.549 +/- 21.826 +2023-10-27 17:39:11,705 : -------------------------------------- | loss/ | | -| approx_kl | 0.0334 | -| entropy_loss | -3.46 | -| policy_loss | -0.0237 | -| value_loss | 1.71 | +| approx_kl | 0.0337 | +| entropy_loss | -3.53 | +| policy_loss | -0.01 | +| value_loss | 1.01 | | stat/ | | -| constraint_violation | 187 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 120 | -| ep_reward | 0.491 | +| constraint_violation | 176 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 141 | +| ep_reward | 0.564 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 146 | -| ep_reward | 0.585 | -| mse | 248 | +| ep_return | 144 | +| ep_reward | 0.574 | +| mse | 250 | | time/ | | | progress | 0.26 | | step | 2.6e+05 | -| step_time | 9.33 | +| step_time | 10.9 | -------------------------------------- -2023-10-19 15:43:24,204 : Eval | ep_lengths 175.30 +/- 114.11 | ep_return 95.690 +/- 66.829 -2023-10-19 15:43:24,205 : +2023-10-27 17:41:19,535 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 116.302 +/- 60.686 +2023-10-27 17:41:19,536 : -------------------------------------- | loss/ | | -| approx_kl | 0.026 | -| entropy_loss | -3.45 | -| policy_loss | 0.00261 | -| value_loss | 4.25 | +| approx_kl | 0.0295 | +| entropy_loss | -3.48 | +| policy_loss | -0.0165 | +| value_loss | 1.25 | | stat/ | | -| constraint_violation | 189 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 130 | -| ep_reward | 0.543 | +| constraint_violation | 180 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 146 | +| ep_reward | 0.584 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 175 | -| ep_return | 95.7 | -| ep_reward | 0.383 | -| mse | 183 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 116 | +| ep_reward | 0.465 | +| mse | 233 | | time/ | | | progress | 0.27 | | step | 2.7e+05 | -| step_time | 9.43 | +| step_time | 10.9 | -------------------------------------- -2023-10-19 15:45:23,593 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 159.805 +/- 19.783 -2023-10-19 15:45:23,603 : +2023-10-27 17:43:32,688 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 156.682 +/- 20.735 +2023-10-27 17:43:32,696 : -------------------------------------- | loss/ | | -| approx_kl | 0.0333 | -| entropy_loss | -3.46 | -| policy_loss | -0.0059 | -| value_loss | 0.877 | +| approx_kl | 0.0398 | +| entropy_loss | -3.5 | +| policy_loss | -0.00102 | +| value_loss | 2.11 | | stat/ | | -| constraint_violation | 198 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 121 | -| ep_reward | 0.485 | +| constraint_violation | 191 | +| ep_constraint_vio... | 0.5 | +| ep_length | 126 | +| ep_return | 73.8 | +| ep_reward | 0.314 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 160 | -| ep_reward | 0.639 | -| mse | 220 | +| ep_return | 157 | +| ep_reward | 0.627 | +| mse | 226 | | time/ | | | progress | 0.28 | | step | 2.8e+05 | -| step_time | 9.83 | +| step_time | 11 | -------------------------------------- -2023-10-19 15:47:21,853 : Eval | ep_lengths 201.40 +/- 97.21 | ep_return 122.331 +/- 68.927 -2023-10-19 15:47:21,855 : +2023-10-27 17:45:40,823 : Eval | ep_lengths 201.40 +/- 97.21 | ep_return 118.223 +/- 62.470 +2023-10-27 17:45:40,825 : -------------------------------------- | loss/ | | -| approx_kl | 0.0174 | -| entropy_loss | -3.43 | -| policy_loss | -0.00972 | -| value_loss | 2.41 | +| approx_kl | 0.0216 | +| entropy_loss | -3.5 | +| policy_loss | -0.00971 | +| value_loss | 1.87 | | stat/ | | -| constraint_violation | 204 | -| ep_constraint_vio... | 0.3 | -| ep_length | 178 | -| ep_return | 110 | -| ep_reward | 0.518 | +| constraint_violation | 195 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 111 | +| ep_reward | 0.445 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 122 | -| ep_reward | 0.489 | -| mse | 169 | +| ep_return | 118 | +| ep_reward | 0.473 | +| mse | 160 | | time/ | | | progress | 0.29 | | step | 2.9e+05 | -| step_time | 10.1 | +| step_time | 10.9 | -------------------------------------- -2023-10-19 15:49:21,376 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 126.597 +/- 42.517 -2023-10-19 15:49:21,378 : +2023-10-27 17:47:51,384 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 125.059 +/- 42.150 +2023-10-27 17:47:51,386 : -------------------------------------- | loss/ | | -| approx_kl | 0.0347 | -| entropy_loss | -3.45 | -| policy_loss | -0.00418 | -| value_loss | 0.6 | +| approx_kl | 0.0362 | +| entropy_loss | -3.49 | +| policy_loss | -0.0113 | +| value_loss | 0.583 | | stat/ | | -| constraint_violation | 210 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 140 | -| ep_reward | 0.564 | +| constraint_violation | 201 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 139 | +| ep_reward | 0.558 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 127 | -| ep_reward | 0.506 | -| mse | 290 | +| ep_return | 125 | +| ep_reward | 0.5 | +| mse | 292 | | time/ | | | progress | 0.3 | | step | 3e+05 | -| step_time | 9.93 | +| step_time | 10.6 | -------------------------------------- -2023-10-19 15:51:23,424 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 141.143 +/- 26.839 -2023-10-19 15:51:23,425 : +2023-10-27 17:50:04,840 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 144.818 +/- 24.157 +2023-10-27 17:50:04,842 : -------------------------------------- | loss/ | | -| approx_kl | 0.0348 | +| approx_kl | 0.0354 | | entropy_loss | -3.49 | -| policy_loss | -0.00892 | -| value_loss | 0.63 | +| policy_loss | -0.0056 | +| value_loss | 0.795 | | stat/ | | -| constraint_violation | 217 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 158 | -| ep_reward | 0.631 | +| constraint_violation | 211 | +| ep_constraint_vio... | 0.1 | +| ep_length | 226 | +| ep_return | 150 | +| ep_reward | 0.599 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 141 | -| ep_reward | 0.565 | -| mse | 300 | +| ep_return | 145 | +| ep_reward | 0.579 | +| mse | 294 | | time/ | | | progress | 0.31 | | step | 3.1e+05 | -| step_time | 10.1 | +| step_time | 10.8 | -------------------------------------- -2023-10-19 15:53:21,048 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 143.496 +/- 21.043 -2023-10-19 15:53:21,049 : +2023-10-27 17:52:18,001 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 145.540 +/- 21.610 +2023-10-27 17:52:18,003 : -------------------------------------- | loss/ | | -| approx_kl | 0.046 | -| entropy_loss | -3.5 | -| policy_loss | -0.00185 | -| value_loss | 1.18 | +| approx_kl | 0.0208 | +| entropy_loss | -3.54 | +| policy_loss | -0.0169 | +| value_loss | 1.99 | | stat/ | | -| constraint_violation | 222 | -| ep_constraint_vio... | 0.1 | -| ep_length | 227 | -| ep_return | 137 | -| ep_reward | 0.551 | +| constraint_violation | 216 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 143 | +| ep_reward | 0.571 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 143 | -| ep_reward | 0.574 | +| ep_return | 146 | +| ep_reward | 0.582 | | mse | 311 | | time/ | | | progress | 0.32 | | step | 3.2e+05 | -| step_time | 9.51 | +| step_time | 10.8 | -------------------------------------- -2023-10-19 15:55:13,050 : Eval | ep_lengths 202.00 +/- 96.08 | ep_return 117.824 +/- 64.960 -2023-10-19 15:55:13,052 : +2023-10-27 17:54:25,494 : Eval | ep_lengths 201.90 +/- 96.28 | ep_return 120.388 +/- 66.170 +2023-10-27 17:54:25,496 : -------------------------------------- | loss/ | | -| approx_kl | 0.0312 | -| entropy_loss | -3.45 | -| policy_loss | -0.0101 | -| value_loss | 1.41 | +| approx_kl | 0.0347 | +| entropy_loss | -3.48 | +| policy_loss | 0.00117 | +| value_loss | 1.16 | | stat/ | | -| constraint_violation | 230 | +| constraint_violation | 223 | | ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 122 | -| ep_reward | 0.496 | +| ep_length | 201 | +| ep_return | 113 | +| ep_reward | 0.454 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | -| ep_return | 118 | -| ep_reward | 0.471 | -| mse | 204 | +| ep_return | 120 | +| ep_reward | 0.482 | +| mse | 207 | | time/ | | | progress | 0.33 | | step | 3.3e+05 | -| step_time | 9.45 | +| step_time | 10.8 | -------------------------------------- -2023-10-19 15:57:09,018 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 140.310 +/- 18.480 -2023-10-19 15:57:09,020 : +2023-10-27 17:56:32,821 : Eval | ep_lengths 200.40 +/- 99.20 | ep_return 118.122 +/- 61.015 +2023-10-27 17:56:32,823 : -------------------------------------- | loss/ | | -| approx_kl | 0.0443 | -| entropy_loss | -3.42 | -| policy_loss | -0.00496 | -| value_loss | 4.49 | +| approx_kl | 0.0333 | +| entropy_loss | -3.45 | +| policy_loss | -0.0134 | +| value_loss | 1.2 | | stat/ | | -| constraint_violation | 243 | -| ep_constraint_vio... | 0.6 | -| ep_length | 102 | -| ep_return | 51.9 | -| ep_reward | 0.295 | +| constraint_violation | 230 | +| ep_constraint_vio... | 0.4 | +| ep_length | 151 | +| ep_return | 97.2 | +| ep_reward | 0.436 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 140 | -| ep_reward | 0.561 | -| mse | 297 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 118 | +| ep_reward | 0.472 | +| mse | 220 | | time/ | | | progress | 0.34 | | step | 3.4e+05 | -| step_time | 9.46 | +| step_time | 11 | -------------------------------------- -2023-10-19 15:59:02,946 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 140.655 +/- 32.336 -2023-10-19 15:59:02,947 : +2023-10-27 17:58:44,911 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 145.064 +/- 36.416 +2023-10-27 17:58:44,913 : -------------------------------------- | loss/ | | -| approx_kl | 0.0271 | -| entropy_loss | -3.4 | -| policy_loss | -0.00271 | -| value_loss | 0.869 | +| approx_kl | 0.0275 | +| entropy_loss | -3.42 | +| policy_loss | -0.0107 | +| value_loss | 0.639 | | stat/ | | -| constraint_violation | 247 | +| constraint_violation | 242 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 140 | -| ep_reward | 0.56 | +| ep_return | 148 | +| ep_reward | 0.592 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 141 | -| ep_reward | 0.563 | -| mse | 340 | +| ep_return | 145 | +| ep_reward | 0.58 | +| mse | 339 | | time/ | | | progress | 0.35 | | step | 3.5e+05 | -| step_time | 9.39 | +| step_time | 10.8 | -------------------------------------- -2023-10-19 16:00:54,813 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 133.556 +/- 50.538 -2023-10-19 16:00:54,814 : +2023-10-27 18:00:54,801 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 130.822 +/- 52.488 +2023-10-27 18:00:54,803 : -------------------------------------- | loss/ | | -| approx_kl | 0.0331 | -| entropy_loss | -3.47 | -| policy_loss | -0.00759 | -| value_loss | 0.437 | +| approx_kl | 0.0343 | +| entropy_loss | -3.41 | +| policy_loss | -0.0141 | +| value_loss | 1.96 | | stat/ | | -| constraint_violation | 252 | +| constraint_violation | 246 | | ep_constraint_vio... | 0.2 | | ep_length | 201 | -| ep_return | 121 | -| ep_reward | 0.493 | +| ep_return | 127 | +| ep_reward | 0.51 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 134 | -| ep_reward | 0.534 | -| mse | 227 | +| ep_return | 131 | +| ep_reward | 0.523 | +| mse | 233 | | time/ | | | progress | 0.36 | | step | 3.6e+05 | -| step_time | 9.42 | +| step_time | 10.8 | -------------------------------------- -2023-10-19 16:02:44,808 : Eval | ep_lengths 202.30 +/- 95.41 | ep_return 116.845 +/- 62.054 -2023-10-19 16:02:44,810 : +2023-10-27 18:03:02,929 : Eval | ep_lengths 202.30 +/- 95.41 | ep_return 119.182 +/- 62.812 +2023-10-27 18:03:02,930 : -------------------------------------- | loss/ | | -| approx_kl | 0.0329 | +| approx_kl | 0.0273 | | entropy_loss | -3.45 | -| policy_loss | -0.00879 | -| value_loss | 1.94 | +| policy_loss | -0.00595 | +| value_loss | 2.27 | | stat/ | | -| constraint_violation | 258 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 116 | -| ep_reward | 0.481 | +| constraint_violation | 251 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 133 | +| ep_reward | 0.549 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | -| ep_return | 117 | -| ep_reward | 0.467 | -| mse | 184 | +| ep_return | 119 | +| ep_reward | 0.477 | +| mse | 177 | | time/ | | | progress | 0.37 | | step | 3.7e+05 | -| step_time | 9.42 | +| step_time | 11 | -------------------------------------- -2023-10-19 16:04:34,379 : Eval | ep_lengths 201.00 +/- 98.02 | ep_return 111.688 +/- 60.881 -2023-10-19 16:04:34,381 : +2023-10-27 18:05:12,140 : Eval | ep_lengths 201.00 +/- 98.02 | ep_return 114.878 +/- 64.089 +2023-10-27 18:05:12,142 : -------------------------------------- | loss/ | | -| approx_kl | 0.0321 | -| entropy_loss | -3.48 | -| policy_loss | 8.54e-05 | -| value_loss | 1.09 | +| approx_kl | 0.0266 | +| entropy_loss | -3.44 | +| policy_loss | -0.0116 | +| value_loss | 3.85 | | stat/ | | -| constraint_violation | 265 | -| ep_constraint_vio... | 0.2 | -| ep_length | 203 | -| ep_return | 131 | -| ep_reward | 0.526 | +| constraint_violation | 259 | +| ep_constraint_vio... | 0.3 | +| ep_length | 179 | +| ep_return | 108 | +| ep_reward | 0.436 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 112 | -| ep_reward | 0.447 | -| mse | 194 | +| ep_return | 115 | +| ep_reward | 0.46 | +| mse | 191 | | time/ | | | progress | 0.38 | | step | 3.8e+05 | -| step_time | 9.08 | +| step_time | 11.2 | -------------------------------------- -2023-10-19 16:06:23,753 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 109.146 +/- 57.599 -2023-10-19 16:06:23,755 : +2023-10-27 18:07:20,976 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 109.281 +/- 57.578 +2023-10-27 18:07:20,978 : -------------------------------------- | loss/ | | -| approx_kl | 0.0281 | -| entropy_loss | -3.51 | -| policy_loss | -0.00865 | -| value_loss | 0.705 | +| approx_kl | 0.0282 | +| entropy_loss | -3.41 | +| policy_loss | -0.00922 | +| value_loss | 0.697 | | stat/ | | -| constraint_violation | 267 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 141 | -| ep_reward | 0.565 | +| constraint_violation | 260 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 152 | +| ep_reward | 0.606 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 200 | | ep_return | 109 | | ep_reward | 0.437 | -| mse | 272 | +| mse | 276 | | time/ | | | progress | 0.39 | | step | 3.9e+05 | -| step_time | 9.27 | +| step_time | 10.7 | -------------------------------------- -2023-10-19 16:08:14,654 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 121.531 +/- 47.655 -2023-10-19 16:08:14,655 : +2023-10-27 18:09:32,225 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 129.976 +/- 48.045 +2023-10-27 18:09:32,226 : -------------------------------------- | loss/ | | -| approx_kl | 0.024 | -| entropy_loss | -3.57 | -| policy_loss | -0.00986 | -| value_loss | 1.58 | +| approx_kl | 0.0331 | +| entropy_loss | -3.41 | +| policy_loss | -0.0106 | +| value_loss | 0.748 | | stat/ | | -| constraint_violation | 269 | +| constraint_violation | 263 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 151 | -| ep_reward | 0.602 | +| ep_return | 158 | +| ep_reward | 0.631 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 122 | -| ep_reward | 0.486 | -| mse | 253 | +| ep_return | 130 | +| ep_reward | 0.52 | +| mse | 244 | | time/ | | | progress | 0.4 | | step | 4e+05 | -| step_time | 9.12 | +| step_time | 11.2 | -------------------------------------- -2023-10-19 16:10:05,012 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 124.340 +/- 47.152 -2023-10-19 16:10:05,034 : +2023-10-27 18:11:45,653 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 143.045 +/- 22.379 +2023-10-27 18:11:45,654 : -------------------------------------- | loss/ | | -| approx_kl | 0.026 | -| entropy_loss | -3.62 | -| policy_loss | -0.01 | -| value_loss | 0.392 | +| approx_kl | 0.0239 | +| entropy_loss | -3.44 | +| policy_loss | -0.0108 | +| value_loss | 0.556 | | stat/ | | -| constraint_violation | 271 | +| constraint_violation | 265 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.611 | +| ep_return | 146 | +| ep_reward | 0.585 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 124 | -| ep_reward | 0.497 | -| mse | 246 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 143 | +| ep_reward | 0.572 | +| mse | 253 | | time/ | | | progress | 0.41 | | step | 4.1e+05 | -| step_time | 9.09 | +| step_time | 10.9 | -------------------------------------- -2023-10-19 16:11:52,388 : Eval | ep_lengths 176.70 +/- 111.97 | ep_return 103.327 +/- 68.720 -2023-10-19 16:11:52,415 : +2023-10-27 18:13:51,016 : Eval | ep_lengths 176.70 +/- 111.97 | ep_return 103.444 +/- 69.554 +2023-10-27 18:13:51,017 : -------------------------------------- | loss/ | | -| approx_kl | 0.0242 | -| entropy_loss | -3.65 | -| policy_loss | -0.0108 | -| value_loss | 7.01 | +| approx_kl | 0.0308 | +| entropy_loss | -3.4 | +| policy_loss | -0.0112 | +| value_loss | 0.862 | | stat/ | | -| constraint_violation | 281 | -| ep_constraint_vio... | 0.4 | -| ep_length | 152 | -| ep_return | 90.7 | -| ep_reward | 0.376 | +| constraint_violation | 271 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 144 | +| ep_reward | 0.58 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 177 | | ep_return | 103 | -| ep_reward | 0.413 | -| mse | 146 | +| ep_reward | 0.414 | +| mse | 145 | | time/ | | | progress | 0.42 | | step | 4.2e+05 | -| step_time | 9.52 | +| step_time | 10.8 | -------------------------------------- -2023-10-19 16:13:38,758 : Eval | ep_lengths 175.70 +/- 113.50 | ep_return 97.202 +/- 64.173 -2023-10-19 16:13:38,759 : +2023-10-27 18:15:56,900 : Eval | ep_lengths 175.80 +/- 113.35 | ep_return 96.710 +/- 65.136 +2023-10-27 18:15:56,902 : -------------------------------------- | loss/ | | -| approx_kl | 0.0311 | -| entropy_loss | -3.66 | -| policy_loss | -0.0259 | -| value_loss | 0.617 | +| approx_kl | 0.0186 | +| entropy_loss | -3.46 | +| policy_loss | -0.00926 | +| value_loss | 1.43 | | stat/ | | -| constraint_violation | 292 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 138 | -| ep_reward | 0.551 | +| constraint_violation | 284 | +| ep_constraint_vio... | 0.2 | +| ep_length | 203 | +| ep_return | 129 | +| ep_reward | 0.515 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 176 | -| ep_return | 97.2 | -| ep_reward | 0.389 | -| mse | 189 | +| ep_return | 96.7 | +| ep_reward | 0.387 | +| mse | 184 | | time/ | | | progress | 0.43 | | step | 4.3e+05 | -| step_time | 9.09 | +| step_time | 10.7 | -------------------------------------- -2023-10-19 16:15:29,101 : Eval | ep_lengths 226.50 +/- 70.50 | ep_return 121.416 +/- 46.361 -2023-10-19 16:15:29,102 : +2023-10-27 18:18:05,079 : Eval | ep_lengths 201.70 +/- 96.64 | ep_return 117.213 +/- 61.874 +2023-10-27 18:18:05,081 : -------------------------------------- | loss/ | | -| approx_kl | 0.0341 | -| entropy_loss | -3.67 | -| policy_loss | -0.0136 | -| value_loss | 0.75 | +| approx_kl | 0.0342 | +| entropy_loss | -3.48 | +| policy_loss | -0.0178 | +| value_loss | 0.754 | | stat/ | | -| constraint_violation | 298 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 143 | -| ep_reward | 0.587 | +| constraint_violation | 289 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 157 | +| ep_reward | 0.628 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 121 | -| ep_reward | 0.486 | -| mse | 295 | +| constraint_violation | 0.2 | +| ep_length | 202 | +| ep_return | 117 | +| ep_reward | 0.469 | +| mse | 219 | | time/ | | | progress | 0.44 | | step | 4.4e+05 | -| step_time | 9.13 | +| step_time | 10.8 | -------------------------------------- -2023-10-19 16:17:21,793 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 147.930 +/- 24.047 -2023-10-19 16:17:21,795 : +2023-10-27 18:20:18,333 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.301 +/- 23.582 +2023-10-27 18:20:18,334 : -------------------------------------- | loss/ | | -| approx_kl | 0.0238 | -| entropy_loss | -3.66 | -| policy_loss | -0.00452 | -| value_loss | 3.17 | +| approx_kl | 0.0233 | +| entropy_loss | -3.46 | +| policy_loss | -0.0112 | +| value_loss | 0.45 | | stat/ | | -| constraint_violation | 303 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | +| constraint_violation | 294 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | | ep_return | 126 | -| ep_reward | 0.57 | +| ep_reward | 0.518 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 148 | -| ep_reward | 0.592 | -| mse | 219 | +| ep_return | 152 | +| ep_reward | 0.609 | +| mse | 212 | | time/ | | | progress | 0.45 | | step | 4.5e+05 | -| step_time | 9.11 | +| step_time | 10.9 | -------------------------------------- -2023-10-19 16:19:11,742 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 141.024 +/- 54.580 -2023-10-19 16:19:11,744 : +2023-10-27 18:22:28,375 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 144.592 +/- 54.300 +2023-10-27 18:22:28,377 : -------------------------------------- | loss/ | | -| approx_kl | 0.031 | -| entropy_loss | -3.65 | -| policy_loss | -0.00632 | -| value_loss | 0.56 | +| approx_kl | 0.0313 | +| entropy_loss | -3.5 | +| policy_loss | -0.00838 | +| value_loss | 0.402 | | stat/ | | -| constraint_violation | 310 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 151 | -| ep_reward | 0.603 | +| constraint_violation | 302 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 135 | +| ep_reward | 0.541 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 141 | -| ep_reward | 0.564 | -| mse | 111 | +| ep_return | 145 | +| ep_reward | 0.578 | +| mse | 116 | | time/ | | | progress | 0.46 | | step | 4.6e+05 | -| step_time | 9.07 | +| step_time | 10.8 | -------------------------------------- -2023-10-19 16:20:58,761 : Eval | ep_lengths 201.40 +/- 97.24 | ep_return 108.547 +/- 55.848 -2023-10-19 16:20:58,762 : +2023-10-27 18:24:37,701 : Eval | ep_lengths 226.20 +/- 71.40 | ep_return 123.913 +/- 45.026 +2023-10-27 18:24:37,703 : -------------------------------------- | loss/ | | -| approx_kl | 0.034 | -| entropy_loss | -3.62 | -| policy_loss | -0.0146 | -| value_loss | 0.65 | +| approx_kl | 0.0239 | +| entropy_loss | -3.49 | +| policy_loss | -0.00978 | +| value_loss | 0.326 | | stat/ | | -| constraint_violation | 310 | +| constraint_violation | 302 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 160 | -| ep_reward | 0.642 | +| ep_return | 147 | +| ep_reward | 0.588 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 109 | -| ep_reward | 0.434 | -| mse | 240 | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 124 | +| ep_reward | 0.496 | +| mse | 252 | | time/ | | | progress | 0.47 | | step | 4.7e+05 | -| step_time | 9.02 | +| step_time | 10.8 | -------------------------------------- -2023-10-19 16:22:43,608 : Eval | ep_lengths 176.30 +/- 112.59 | ep_return 105.402 +/- 70.774 -2023-10-19 16:22:43,627 : +2023-10-27 18:26:42,694 : Eval | ep_lengths 176.30 +/- 112.59 | ep_return 110.344 +/- 74.064 +2023-10-27 18:26:42,695 : -------------------------------------- | loss/ | | -| approx_kl | 0.0316 | -| entropy_loss | -3.7 | -| policy_loss | -0.00887 | -| value_loss | 3.16 | +| approx_kl | 0.0165 | +| entropy_loss | -3.49 | +| policy_loss | -0.0185 | +| value_loss | 0.435 | | stat/ | | -| constraint_violation | 321 | +| constraint_violation | 311 | | ep_constraint_vio... | 0.2 | | ep_length | 202 | -| ep_return | 127 | -| ep_reward | 0.508 | +| ep_return | 118 | +| ep_reward | 0.473 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 176 | -| ep_return | 105 | -| ep_reward | 0.422 | -| mse | 162 | +| ep_return | 110 | +| ep_reward | 0.441 | +| mse | 163 | | time/ | | | progress | 0.48 | | step | 4.8e+05 | -| step_time | 9.19 | +| step_time | 10.9 | -------------------------------------- -2023-10-19 16:24:32,575 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 127.883 +/- 53.219 -2023-10-19 16:24:32,576 : +2023-10-27 18:28:53,544 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 124.088 +/- 48.848 +2023-10-27 18:28:53,545 : -------------------------------------- | loss/ | | -| approx_kl | 0.0368 | -| entropy_loss | -3.68 | -| policy_loss | 0.00212 | -| value_loss | 0.569 | +| approx_kl | 0.0214 | +| entropy_loss | -3.49 | +| policy_loss | -0.0145 | +| value_loss | 1.46 | | stat/ | | -| constraint_violation | 328 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 147 | -| ep_reward | 0.587 | +| constraint_violation | 319 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 131 | +| ep_reward | 0.559 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 128 | -| ep_reward | 0.512 | -| mse | 322 | +| ep_return | 124 | +| ep_reward | 0.496 | +| mse | 328 | | time/ | | | progress | 0.49 | | step | 4.9e+05 | -| step_time | 8.99 | +| step_time | 10.6 | -------------------------------------- -2023-10-19 16:26:23,987 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 144.689 +/- 13.061 -2023-10-19 16:26:23,988 : +2023-10-27 18:31:06,822 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 144.482 +/- 13.285 +2023-10-27 18:31:06,824 : -------------------------------------- | loss/ | | -| approx_kl | 0.0324 | -| entropy_loss | -3.62 | -| policy_loss | -0.00947 | -| value_loss | 0.829 | +| approx_kl | 0.0303 | +| entropy_loss | -3.44 | +| policy_loss | -0.00358 | +| value_loss | 1.48 | | stat/ | | -| constraint_violation | 335 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 149 | -| ep_reward | 0.597 | +| constraint_violation | 327 | +| ep_constraint_vio... | 0.2 | +| ep_length | 200 | +| ep_return | 122 | +| ep_reward | 0.5 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 145 | -| ep_reward | 0.579 | -| mse | 265 | +| ep_return | 144 | +| ep_reward | 0.578 | +| mse | 264 | | time/ | | | progress | 0.5 | | step | 5e+05 | -| step_time | 9.12 | +| step_time | 11 | -------------------------------------- -2023-10-19 16:28:11,842 : Eval | ep_lengths 200.50 +/- 99.00 | ep_return 125.058 +/- 67.008 -2023-10-19 16:28:11,843 : +2023-10-27 18:33:15,197 : Eval | ep_lengths 200.40 +/- 99.20 | ep_return 112.470 +/- 61.150 +2023-10-27 18:33:15,198 : -------------------------------------- | loss/ | | -| approx_kl | 0.0343 | -| entropy_loss | -3.59 | -| policy_loss | 0.00785 | -| value_loss | 0.888 | +| approx_kl | 0.0247 | +| entropy_loss | -3.44 | +| policy_loss | -0.0157 | +| value_loss | 0.78 | | stat/ | | -| constraint_violation | 340 | +| constraint_violation | 330 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 152 | -| ep_reward | 0.62 | +| ep_return | 129 | +| ep_reward | 0.515 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 200 | -| ep_return | 125 | -| ep_reward | 0.5 | -| mse | 138 | +| ep_return | 112 | +| ep_reward | 0.45 | +| mse | 158 | | time/ | | | progress | 0.51 | | step | 5.1e+05 | -| step_time | 9.31 | +| step_time | 10.9 | -------------------------------------- -2023-10-19 16:29:56,204 : Eval | ep_lengths 175.90 +/- 113.20 | ep_return 112.082 +/- 76.330 -2023-10-19 16:29:56,205 : +2023-10-27 18:35:20,431 : Eval | ep_lengths 175.90 +/- 113.20 | ep_return 113.259 +/- 76.451 +2023-10-27 18:35:20,433 : -------------------------------------- | loss/ | | -| approx_kl | 0.0325 | -| entropy_loss | -3.62 | -| policy_loss | -0.00701 | -| value_loss | 0.327 | +| approx_kl | 0.0274 | +| entropy_loss | -3.38 | +| policy_loss | -0.015 | +| value_loss | 0.617 | | stat/ | | -| constraint_violation | 343 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 156 | -| ep_reward | 0.626 | +| constraint_violation | 333 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 150 | +| ep_reward | 0.599 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 176 | -| ep_return | 112 | -| ep_reward | 0.448 | -| mse | 129 | +| ep_return | 113 | +| ep_reward | 0.453 | +| mse | 126 | | time/ | | | progress | 0.52 | | step | 5.2e+05 | -| step_time | 8.74 | +| step_time | 10.8 | -------------------------------------- -2023-10-19 16:31:45,458 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 132.305 +/- 47.736 -2023-10-19 16:31:45,459 : +2023-10-27 18:37:34,723 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 139.207 +/- 18.638 +2023-10-27 18:37:34,725 : -------------------------------------- | loss/ | | -| approx_kl | 0.0371 | -| entropy_loss | -3.61 | -| policy_loss | 0.00261 | -| value_loss | 4.67 | +| approx_kl | 0.0256 | +| entropy_loss | -3.36 | +| policy_loss | -0.0155 | +| value_loss | 0.645 | | stat/ | | -| constraint_violation | 348 | -| ep_constraint_vio... | 0.3 | -| ep_length | 175 | -| ep_return | 115 | -| ep_reward | 0.625 | +| constraint_violation | 335 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 151 | +| ep_reward | 0.603 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 132 | -| ep_reward | 0.529 | -| mse | 233 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 139 | +| ep_reward | 0.557 | +| mse | 313 | | time/ | | | progress | 0.53 | | step | 5.3e+05 | -| step_time | 9.19 | +| step_time | 10.7 | -------------------------------------- -2023-10-19 16:33:32,323 : Eval | ep_lengths 200.40 +/- 99.20 | ep_return 119.089 +/- 62.594 -2023-10-19 16:33:32,325 : +2023-10-27 18:39:41,718 : Eval | ep_lengths 175.60 +/- 113.65 | ep_return 106.053 +/- 71.885 +2023-10-27 18:39:41,720 : -------------------------------------- | loss/ | | -| approx_kl | 0.045 | -| entropy_loss | -3.57 | -| policy_loss | -0.0142 | -| value_loss | 1.16 | +| approx_kl | 0.0246 | +| entropy_loss | -3.37 | +| policy_loss | -0.0242 | +| value_loss | 0.572 | | stat/ | | -| constraint_violation | 354 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 132 | -| ep_reward | 0.528 | +| constraint_violation | 343 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 161 | +| ep_reward | 0.643 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 119 | -| ep_reward | 0.476 | -| mse | 204 | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 106 | +| ep_reward | 0.424 | +| mse | 176 | | time/ | | | progress | 0.54 | | step | 5.4e+05 | -| step_time | 8.92 | +| step_time | 11 | -------------------------------------- -2023-10-19 16:35:15,025 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 125.413 +/- 48.212 -2023-10-19 16:35:15,026 : +2023-10-27 18:41:53,955 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 123.609 +/- 46.301 +2023-10-27 18:41:53,957 : -------------------------------------- | loss/ | | -| approx_kl | 0.0345 | -| entropy_loss | -3.56 | -| policy_loss | -0.0162 | -| value_loss | 0.877 | +| approx_kl | 0.0157 | +| entropy_loss | -3.39 | +| policy_loss | -0.00904 | +| value_loss | 0.448 | | stat/ | | -| constraint_violation | 355 | +| constraint_violation | 344 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 166 | -| ep_reward | 0.662 | +| ep_return | 148 | +| ep_reward | 0.593 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 125 | -| ep_reward | 0.502 | -| mse | 280 | +| ep_return | 124 | +| ep_reward | 0.494 | +| mse | 288 | | time/ | | | progress | 0.55 | | step | 5.5e+05 | -| step_time | 8.53 | +| step_time | 11.2 | -------------------------------------- -2023-10-19 16:36:55,762 : Eval | ep_lengths 201.60 +/- 96.80 | ep_return 138.006 +/- 73.634 -2023-10-19 16:36:55,763 : +2023-10-27 18:44:03,183 : Eval | ep_lengths 201.60 +/- 96.80 | ep_return 120.959 +/- 64.905 +2023-10-27 18:44:03,185 : -------------------------------------- | loss/ | | -| approx_kl | 0.0352 | -| entropy_loss | -3.5 | -| policy_loss | -0.0102 | -| value_loss | 0.872 | +| approx_kl | 0.0309 | +| entropy_loss | -3.37 | +| policy_loss | -0.00838 | +| value_loss | 1.72 | | stat/ | | -| constraint_violation | 365 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 134 | -| ep_reward | 0.535 | +| constraint_violation | 352 | +| ep_constraint_vio... | 0.5 | +| ep_length | 129 | +| ep_return | 77.8 | +| ep_reward | 0.322 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | -| ep_return | 138 | -| ep_reward | 0.552 | -| mse | 119 | +| ep_return | 121 | +| ep_reward | 0.484 | +| mse | 127 | | time/ | | | progress | 0.56 | | step | 5.6e+05 | -| step_time | 8.38 | +| step_time | 11 | -------------------------------------- -2023-10-19 16:38:40,425 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 140.441 +/- 16.686 -2023-10-19 16:38:40,426 : +2023-10-27 18:46:18,167 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 136.008 +/- 17.015 +2023-10-27 18:46:18,169 : -------------------------------------- | loss/ | | -| approx_kl | 0.0356 | -| entropy_loss | -3.47 | -| policy_loss | -0.00426 | -| value_loss | 0.382 | +| approx_kl | 0.0367 | +| entropy_loss | -3.41 | +| policy_loss | -0.00194 | +| value_loss | 0.567 | | stat/ | | -| constraint_violation | 370 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 135 | -| ep_reward | 0.54 | +| constraint_violation | 358 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 147 | +| ep_reward | 0.587 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 140 | -| ep_reward | 0.562 | -| mse | 293 | +| ep_return | 136 | +| ep_reward | 0.544 | +| mse | 302 | | time/ | | | progress | 0.57 | | step | 5.7e+05 | -| step_time | 8.5 | +| step_time | 10.8 | -------------------------------------- -2023-10-19 16:40:20,345 : Eval | ep_lengths 202.60 +/- 94.82 | ep_return 109.348 +/- 57.769 -2023-10-19 16:40:20,346 : --------------------------------------- -| loss/ | | -| approx_kl | 0.0251 | -| entropy_loss | -3.47 | -| policy_loss | -0.0115 | -| value_loss | 0.684 | -| stat/ | | -| constraint_violation | 376 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 135 | -| ep_reward | 0.54 | -| stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 203 | -| ep_return | 109 | -| ep_reward | 0.437 | -| mse | 312 | -| time/ | | -| progress | 0.58 | -| step | 5.8e+05 | -| step_time | 8.56 | --------------------------------------- +2023-10-27 18:48:26,311 : Eval | ep_lengths 178.40 +/- 109.39 | ep_return 92.198 +/- 62.208 +2023-10-27 18:48:26,313 : +--------------------------------------- +| loss/ | | +| approx_kl | 0.0368 | +| entropy_loss | -3.42 | +| policy_loss | -0.000702 | +| value_loss | 0.78 | +| stat/ | | +| constraint_violation | 364 | +| ep_constraint_vio... | 0.3 | +| ep_length | 176 | +| ep_return | 96.5 | +| ep_reward | 0.435 | +| stat_eval/ | | +| constraint_violation | 0.3 | +| ep_length | 178 | +| ep_return | 92.2 | +| ep_reward | 0.369 | +| mse | 308 | +| time/ | | +| progress | 0.58 | +| step | 5.8e+05 | +| step_time | 11 | +--------------------------------------- -2023-10-19 16:41:56,938 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 114.282 +/- 58.967 -2023-10-19 16:41:56,940 : +2023-10-27 18:50:35,206 : Eval | ep_lengths 200.40 +/- 99.20 | ep_return 108.172 +/- 55.844 +2023-10-27 18:50:35,207 : -------------------------------------- | loss/ | | -| approx_kl | 0.0329 | -| entropy_loss | -3.5 | -| policy_loss | -0.0129 | -| value_loss | 1.77 | +| approx_kl | 0.0279 | +| entropy_loss | -3.38 | +| policy_loss | -0.0166 | +| value_loss | 2.33 | | stat/ | | -| constraint_violation | 383 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 155 | -| ep_reward | 0.621 | +| constraint_violation | 376 | +| ep_constraint_vio... | 0.4 | +| ep_length | 153 | +| ep_return | 89.1 | +| ep_reward | 0.452 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 200 | -| ep_return | 114 | -| ep_reward | 0.457 | -| mse | 266 | +| ep_return | 108 | +| ep_reward | 0.433 | +| mse | 273 | | time/ | | | progress | 0.59 | | step | 5.9e+05 | -| step_time | 8 | +| step_time | 11.1 | -------------------------------------- -2023-10-19 16:43:36,666 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 145.540 +/- 29.106 -2023-10-19 16:43:36,667 : +2023-10-27 18:52:43,320 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 106.546 +/- 56.374 +2023-10-27 18:52:43,321 : -------------------------------------- | loss/ | | -| approx_kl | 0.0391 | -| entropy_loss | -3.54 | -| policy_loss | -0.00939 | -| value_loss | 0.452 | +| approx_kl | 0.0193 | +| entropy_loss | -3.35 | +| policy_loss | -0.00851 | +| value_loss | 0.996 | | stat/ | | -| constraint_violation | 386 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 162 | -| ep_reward | 0.65 | +| constraint_violation | 381 | +| ep_constraint_vio... | 0.3 | +| ep_length | 175 | +| ep_return | 105 | +| ep_reward | 0.424 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 146 | -| ep_reward | 0.582 | -| mse | 288 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 107 | +| ep_reward | 0.426 | +| mse | 268 | | time/ | | | progress | 0.6 | | step | 6e+05 | -| step_time | 8.21 | +| step_time | 10.9 | -------------------------------------- -2023-10-19 16:45:16,687 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.264 +/- 25.140 -2023-10-19 16:45:16,688 : +2023-10-27 18:54:53,694 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 131.945 +/- 48.327 +2023-10-27 18:54:53,695 : -------------------------------------- | loss/ | | -| approx_kl | 0.0334 | -| entropy_loss | -3.55 | -| policy_loss | -0.0127 | -| value_loss | 0.425 | +| approx_kl | 0.0292 | +| entropy_loss | -3.32 | +| policy_loss | -0.00664 | +| value_loss | 0.78 | | stat/ | | -| constraint_violation | 396 | +| constraint_violation | 392 | | ep_constraint_vio... | 0.1 | | ep_length | 226 | -| ep_return | 132 | -| ep_reward | 0.529 | +| ep_return | 127 | +| ep_reward | 0.507 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.613 | -| mse | 255 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 132 | +| ep_reward | 0.528 | +| mse | 241 | | time/ | | | progress | 0.61 | | step | 6.1e+05 | -| step_time | 8.12 | +| step_time | 10.7 | -------------------------------------- -2023-10-19 16:46:54,168 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 138.021 +/- 50.073 -2023-10-19 16:46:54,169 : +2023-10-27 18:57:03,400 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 126.377 +/- 45.519 +2023-10-27 18:57:03,401 : -------------------------------------- | loss/ | | -| approx_kl | 0.0361 | -| entropy_loss | -3.53 | -| policy_loss | -0.00972 | -| value_loss | 0.182 | +| approx_kl | 0.0206 | +| entropy_loss | -3.3 | +| policy_loss | -0.00796 | +| value_loss | 0.572 | | stat/ | | -| constraint_violation | 400 | +| constraint_violation | 396 | | ep_constraint_vio... | 0.1 | | ep_length | 226 | -| ep_return | 151 | -| ep_reward | 0.607 | +| ep_return | 143 | +| ep_reward | 0.573 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 138 | -| ep_reward | 0.552 | -| mse | 224 | +| ep_return | 126 | +| ep_reward | 0.506 | +| mse | 243 | | time/ | | | progress | 0.62 | | step | 6.2e+05 | -| step_time | 8.04 | +| step_time | 10.7 | -------------------------------------- -2023-10-19 16:48:32,259 : Eval | ep_lengths 225.50 +/- 73.50 | ep_return 135.893 +/- 47.577 -2023-10-19 16:48:32,260 : +2023-10-27 18:59:13,652 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 128.112 +/- 45.733 +2023-10-27 18:59:13,654 : -------------------------------------- | loss/ | | -| approx_kl | 0.024 | -| entropy_loss | -3.49 | -| policy_loss | -0.0127 | -| value_loss | 0.457 | +| approx_kl | 0.0171 | +| entropy_loss | -3.25 | +| policy_loss | -0.0122 | +| value_loss | 0.727 | | stat/ | | -| constraint_violation | 409 | +| constraint_violation | 405 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 143 | -| ep_reward | 0.575 | +| ep_return | 155 | +| ep_reward | 0.62 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 136 | -| ep_reward | 0.544 | -| mse | 220 | +| ep_return | 128 | +| ep_reward | 0.512 | +| mse | 225 | | time/ | | | progress | 0.63 | | step | 6.3e+05 | -| step_time | 8.29 | +| step_time | 10.7 | -------------------------------------- -2023-10-19 16:50:11,517 : Eval | ep_lengths 227.00 +/- 69.00 | ep_return 141.355 +/- 50.179 -2023-10-19 16:50:11,518 : +2023-10-27 19:01:27,436 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 132.815 +/- 21.384 +2023-10-27 19:01:27,438 : -------------------------------------- | loss/ | | -| approx_kl | 0.0332 | -| entropy_loss | -3.47 | -| policy_loss | -0.00228 | -| value_loss | 1.47 | +| approx_kl | 0.0223 | +| entropy_loss | -3.3 | +| policy_loss | -0.00159 | +| value_loss | 0.552 | | stat/ | | -| constraint_violation | 417 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 158 | -| ep_reward | 0.631 | +| constraint_violation | 412 | +| ep_constraint_vio... | 0.2 | +| ep_length | 200 | +| ep_return | 126 | +| ep_reward | 0.524 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | -| ep_return | 141 | -| ep_reward | 0.565 | -| mse | 217 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 133 | +| ep_reward | 0.531 | +| mse | 272 | | time/ | | | progress | 0.64 | | step | 6.4e+05 | -| step_time | 8.1 | +| step_time | 10.7 | -------------------------------------- -2023-10-19 16:51:52,097 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.135 +/- 28.456 -2023-10-19 16:51:52,098 : +2023-10-27 19:03:39,752 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 139.820 +/- 21.297 +2023-10-27 19:03:39,753 : -------------------------------------- | loss/ | | -| approx_kl | 0.0249 | -| entropy_loss | -3.49 | -| policy_loss | -0.0219 | -| value_loss | 0.459 | +| approx_kl | 0.0317 | +| entropy_loss | -3.23 | +| policy_loss | 0.00161 | +| value_loss | 1.11 | | stat/ | | -| constraint_violation | 422 | +| constraint_violation | 418 | | ep_constraint_vio... | 0.1 | | ep_length | 227 | -| ep_return | 145 | -| ep_reward | 0.579 | +| ep_return | 125 | +| ep_reward | 0.502 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.613 | -| mse | 306 | +| ep_return | 140 | +| ep_reward | 0.559 | +| mse | 330 | | time/ | | | progress | 0.65 | | step | 6.5e+05 | -| step_time | 8.22 | +| step_time | 10.6 | -------------------------------------- -2023-10-19 16:53:28,778 : Eval | ep_lengths 200.40 +/- 99.20 | ep_return 116.949 +/- 60.958 -2023-10-19 16:53:28,779 : +2023-10-27 19:05:47,456 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 104.416 +/- 54.854 +2023-10-27 19:05:47,458 : -------------------------------------- | loss/ | | -| approx_kl | 0.0422 | -| entropy_loss | -3.47 | -| policy_loss | -0.00595 | -| value_loss | 3.6 | +| approx_kl | 0.0282 | +| entropy_loss | -3.27 | +| policy_loss | -0.0194 | +| value_loss | 0.698 | | stat/ | | -| constraint_violation | 428 | -| ep_constraint_vio... | 0.3 | -| ep_length | 176 | -| ep_return | 120 | -| ep_reward | 0.494 | +| constraint_violation | 422 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 156 | +| ep_reward | 0.623 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 200 | -| ep_return | 117 | -| ep_reward | 0.468 | -| mse | 196 | +| ep_return | 104 | +| ep_reward | 0.418 | +| mse | 215 | | time/ | | | progress | 0.66 | | step | 6.6e+05 | -| step_time | 8.14 | +| step_time | 10.7 | -------------------------------------- -2023-10-19 16:55:06,576 : Eval | ep_lengths 200.60 +/- 98.80 | ep_return 115.710 +/- 61.221 -2023-10-19 16:55:06,577 : +2023-10-27 19:07:55,762 : Eval | ep_lengths 200.40 +/- 99.20 | ep_return 113.017 +/- 58.350 +2023-10-27 19:07:55,764 : -------------------------------------- | loss/ | | -| approx_kl | 0.0423 | -| entropy_loss | -3.45 | -| policy_loss | -0.00259 | -| value_loss | 0.865 | +| approx_kl | 0.0288 | +| entropy_loss | -3.27 | +| policy_loss | -0.00489 | +| value_loss | 1 | | stat/ | | -| constraint_violation | 436 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 136 | -| ep_reward | 0.554 | +| constraint_violation | 432 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 147 | +| ep_reward | 0.588 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 116 | -| ep_reward | 0.463 | -| mse | 234 | +| ep_length | 200 | +| ep_return | 113 | +| ep_reward | 0.452 | +| mse | 239 | | time/ | | | progress | 0.67 | | step | 6.7e+05 | -| step_time | 8.32 | +| step_time | 10.9 | -------------------------------------- -2023-10-19 16:56:45,687 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 149.678 +/- 26.176 -2023-10-19 16:56:45,689 : +2023-10-27 19:10:09,229 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 141.590 +/- 19.527 +2023-10-27 19:10:09,231 : -------------------------------------- | loss/ | | -| approx_kl | 0.0347 | -| entropy_loss | -3.44 | -| policy_loss | -0.00142 | -| value_loss | 1.19 | +| approx_kl | 0.0332 | +| entropy_loss | -3.3 | +| policy_loss | -0.00324 | +| value_loss | 0.736 | | stat/ | | -| constraint_violation | 445 | +| constraint_violation | 442 | | ep_constraint_vio... | 0.2 | | ep_length | 201 | -| ep_return | 135 | -| ep_reward | 0.54 | +| ep_return | 121 | +| ep_reward | 0.486 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 150 | -| ep_reward | 0.599 | -| mse | 305 | +| ep_return | 142 | +| ep_reward | 0.566 | +| mse | 317 | | time/ | | | progress | 0.68 | | step | 6.8e+05 | -| step_time | 8.09 | +| step_time | 10.7 | -------------------------------------- -2023-10-19 16:58:22,568 : Eval | ep_lengths 225.50 +/- 73.50 | ep_return 152.673 +/- 58.752 -2023-10-19 16:58:22,569 : +2023-10-27 19:12:18,036 : Eval | ep_lengths 225.50 +/- 73.50 | ep_return 142.008 +/- 54.845 +2023-10-27 19:12:18,038 : -------------------------------------- | loss/ | | -| approx_kl | 0.0227 | -| entropy_loss | -3.48 | -| policy_loss | -0.00416 | -| value_loss | 0.926 | +| approx_kl | 0.0256 | +| entropy_loss | -3.26 | +| policy_loss | -0.0135 | +| value_loss | 2.08 | | stat/ | | -| constraint_violation | 451 | +| constraint_violation | 448 | | ep_constraint_vio... | 0.3 | | ep_length | 176 | -| ep_return | 107 | -| ep_reward | 0.503 | +| ep_return | 109 | +| ep_reward | 0.511 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 153 | -| ep_reward | 0.611 | -| mse | 137 | +| ep_return | 142 | +| ep_reward | 0.568 | +| mse | 150 | | time/ | | | progress | 0.69 | | step | 6.9e+05 | -| step_time | 8.06 | +| step_time | 10.6 | -------------------------------------- -2023-10-19 17:00:00,590 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 144.543 +/- 25.444 -2023-10-19 17:00:00,592 : +2023-10-27 19:14:30,720 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 142.547 +/- 19.735 +2023-10-27 19:14:30,721 : -------------------------------------- | loss/ | | -| approx_kl | 0.0353 | -| entropy_loss | -3.48 | -| policy_loss | -0.0067 | -| value_loss | 0.784 | +| approx_kl | 0.0273 | +| entropy_loss | -3.34 | +| policy_loss | -0.0105 | +| value_loss | 0.795 | | stat/ | | -| constraint_violation | 458 | +| constraint_violation | 456 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.611 | +| ep_return | 145 | +| ep_reward | 0.581 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 145 | -| ep_reward | 0.578 | -| mse | 253 | +| ep_return | 143 | +| ep_reward | 0.57 | +| mse | 254 | | time/ | | | progress | 0.7 | | step | 7e+05 | -| step_time | 7.92 | +| step_time | 10.9 | -------------------------------------- -2023-10-19 17:01:35,775 : Eval | ep_lengths 200.40 +/- 99.20 | ep_return 121.464 +/- 67.811 -2023-10-19 17:01:35,776 : +2023-10-27 19:16:38,902 : Eval | ep_lengths 200.40 +/- 99.20 | ep_return 119.347 +/- 63.140 +2023-10-27 19:16:38,904 : -------------------------------------- | loss/ | | -| approx_kl | 0.0299 | -| entropy_loss | -3.49 | -| policy_loss | -0.00704 | -| value_loss | 3.47 | +| approx_kl | 0.0216 | +| entropy_loss | -3.36 | +| policy_loss | -0.0134 | +| value_loss | 0.639 | | stat/ | | -| constraint_violation | 464 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 133 | -| ep_reward | 0.534 | +| constraint_violation | 460 | +| ep_constraint_vio... | 0.1 | +| ep_length | 226 | +| ep_return | 125 | +| ep_reward | 0.502 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 200 | -| ep_return | 121 | -| ep_reward | 0.486 | -| mse | 216 | +| ep_return | 119 | +| ep_reward | 0.477 | +| mse | 215 | | time/ | | | progress | 0.71 | | step | 7.1e+05 | -| step_time | 8.2 | +| step_time | 11.2 | -------------------------------------- -2023-10-19 17:03:11,263 : Eval | ep_lengths 200.80 +/- 98.41 | ep_return 118.910 +/- 62.991 -2023-10-19 17:03:11,264 : +2023-10-27 19:18:47,513 : Eval | ep_lengths 200.80 +/- 98.41 | ep_return 117.616 +/- 61.012 +2023-10-27 19:18:47,515 : -------------------------------------- | loss/ | | -| approx_kl | 0.0287 | -| entropy_loss | -3.53 | -| policy_loss | -0.00803 | -| value_loss | 0.815 | +| approx_kl | 0.0247 | +| entropy_loss | -3.35 | +| policy_loss | -0.0144 | +| value_loss | 1.48 | | stat/ | | -| constraint_violation | 473 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 145 | -| ep_reward | 0.578 | +| constraint_violation | 469 | +| ep_constraint_vio... | 0.2 | +| ep_length | 200 | +| ep_return | 122 | +| ep_reward | 0.497 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 119 | -| ep_reward | 0.476 | -| mse | 186 | +| ep_return | 118 | +| ep_reward | 0.47 | +| mse | 191 | | time/ | | | progress | 0.72 | | step | 7.2e+05 | -| step_time | 8.11 | +| step_time | 10.9 | -------------------------------------- -2023-10-19 17:04:45,201 : Eval | ep_lengths 201.80 +/- 96.47 | ep_return 132.983 +/- 72.179 -2023-10-19 17:04:45,203 : +2023-10-27 19:20:53,890 : Eval | ep_lengths 201.80 +/- 96.47 | ep_return 123.226 +/- 66.746 +2023-10-27 19:20:53,891 : -------------------------------------- | loss/ | | -| approx_kl | 0.036 | -| entropy_loss | -3.58 | -| policy_loss | -0.00736 | -| value_loss | 1.53 | +| approx_kl | 0.0309 | +| entropy_loss | -3.34 | +| policy_loss | -0.0136 | +| value_loss | 2.12 | | stat/ | | -| constraint_violation | 477 | +| constraint_violation | 473 | | ep_constraint_vio... | 0.2 | | ep_length | 200 | -| ep_return | 123 | -| ep_reward | 0.504 | +| ep_return | 118 | +| ep_reward | 0.473 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | -| ep_return | 133 | -| ep_reward | 0.532 | -| mse | 149 | +| ep_return | 123 | +| ep_reward | 0.493 | +| mse | 159 | | time/ | | | progress | 0.73 | | step | 7.3e+05 | -| step_time | 7.85 | +| step_time | 10.9 | -------------------------------------- -2023-10-19 17:06:23,067 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 138.792 +/- 23.877 -2023-10-19 17:06:23,068 : +2023-10-27 19:23:06,806 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 131.872 +/- 21.365 +2023-10-27 19:23:06,807 : -------------------------------------- | loss/ | | -| approx_kl | 0.0256 | -| entropy_loss | -3.63 | -| policy_loss | -0.0166 | -| value_loss | 1.69 | +| approx_kl | 0.0279 | +| entropy_loss | -3.29 | +| policy_loss | -0.00889 | +| value_loss | 3.54 | | stat/ | | -| constraint_violation | 482 | +| constraint_violation | 478 | | ep_constraint_vio... | 0.2 | | ep_length | 200 | -| ep_return | 124 | -| ep_reward | 0.521 | +| ep_return | 115 | +| ep_reward | 0.486 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 139 | -| ep_reward | 0.555 | -| mse | 268 | +| ep_return | 132 | +| ep_reward | 0.527 | +| mse | 284 | | time/ | | | progress | 0.74 | | step | 7.4e+05 | -| step_time | 7.87 | +| step_time | 11 | -------------------------------------- -2023-10-19 17:07:59,321 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 131.563 +/- 47.441 -2023-10-19 17:07:59,322 : +2023-10-27 19:25:17,474 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 122.070 +/- 43.951 +2023-10-27 19:25:17,476 : -------------------------------------- | loss/ | | -| approx_kl | 0.0269 | -| entropy_loss | -3.57 | -| policy_loss | -0.00344 | -| value_loss | 0.556 | +| approx_kl | 0.022 | +| entropy_loss | -3.26 | +| policy_loss | -0.0136 | +| value_loss | 1.13 | | stat/ | | -| constraint_violation | 485 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 145 | -| ep_reward | 0.579 | +| constraint_violation | 482 | +| ep_constraint_vio... | 0.1 | +| ep_length | 227 | +| ep_return | 126 | +| ep_reward | 0.505 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 132 | -| ep_reward | 0.526 | -| mse | 237 | +| ep_return | 122 | +| ep_reward | 0.488 | +| mse | 259 | | time/ | | | progress | 0.75 | | step | 7.5e+05 | -| step_time | 8.13 | +| step_time | 11 | -------------------------------------- -2023-10-19 17:09:36,085 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 129.500 +/- 53.047 -2023-10-19 17:09:36,086 : +2023-10-27 19:27:28,787 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 122.944 +/- 47.619 +2023-10-27 19:27:28,788 : -------------------------------------- | loss/ | | -| approx_kl | 0.0292 | -| entropy_loss | -3.56 | -| policy_loss | -0.00234 | -| value_loss | 2.42 | +| approx_kl | 0.0256 | +| entropy_loss | -3.3 | +| policy_loss | -0.0103 | +| value_loss | 0.523 | | stat/ | | -| constraint_violation | 493 | -| ep_constraint_vio... | 0.1 | -| ep_length | 227 | -| ep_return | 126 | -| ep_reward | 0.509 | +| constraint_violation | 488 | +| ep_constraint_vio... | 0.2 | +| ep_length | 202 | +| ep_return | 114 | +| ep_reward | 0.457 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 130 | -| ep_reward | 0.518 | -| mse | 253 | +| ep_return | 123 | +| ep_reward | 0.492 | +| mse | 267 | | time/ | | | progress | 0.76 | | step | 7.6e+05 | -| step_time | 8.04 | +| step_time | 11 | -------------------------------------- -2023-10-19 17:11:10,257 : Eval | ep_lengths 201.30 +/- 97.41 | ep_return 121.198 +/- 61.903 -2023-10-19 17:11:10,258 : +2023-10-27 19:29:37,320 : Eval | ep_lengths 201.30 +/- 97.41 | ep_return 114.738 +/- 59.252 +2023-10-27 19:29:37,322 : -------------------------------------- | loss/ | | -| approx_kl | 0.032 | -| entropy_loss | -3.61 | -| policy_loss | -0.0143 | -| value_loss | 0.596 | +| approx_kl | 0.0252 | +| entropy_loss | -3.33 | +| policy_loss | -0.00582 | +| value_loss | 0.543 | | stat/ | | -| constraint_violation | 499 | +| constraint_violation | 497 | | ep_constraint_vio... | 0.2 | | ep_length | 200 | -| ep_return | 118 | -| ep_reward | 0.481 | +| ep_return | 110 | +| ep_reward | 0.45 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 121 | -| ep_reward | 0.485 | -| mse | 152 | +| ep_return | 115 | +| ep_reward | 0.459 | +| mse | 159 | | time/ | | | progress | 0.77 | | step | 7.7e+05 | -| step_time | 7.91 | +| step_time | 10.8 | -------------------------------------- -2023-10-19 17:12:43,991 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 120.199 +/- 64.286 -2023-10-19 17:12:43,992 : +2023-10-27 19:31:44,110 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 119.393 +/- 63.147 +2023-10-27 19:31:44,112 : -------------------------------------- | loss/ | | -| approx_kl | 0.0277 | -| entropy_loss | -3.59 | -| policy_loss | -0.00765 | -| value_loss | 1.59 | +| approx_kl | 0.0229 | +| entropy_loss | -3.32 | +| policy_loss | -0.0129 | +| value_loss | 1.2 | | stat/ | | -| constraint_violation | 504 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 148 | -| ep_reward | 0.617 | +| constraint_violation | 502 | +| ep_constraint_vio... | 0.2 | +| ep_length | 200 | +| ep_return | 132 | +| ep_reward | 0.557 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 200 | -| ep_return | 120 | -| ep_reward | 0.481 | -| mse | 215 | +| ep_return | 119 | +| ep_reward | 0.478 | +| mse | 216 | | time/ | | | progress | 0.78 | | step | 7.8e+05 | -| step_time | 7.94 | +| step_time | 10.7 | -------------------------------------- -2023-10-19 17:14:20,686 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 129.380 +/- 48.651 -2023-10-19 17:14:20,687 : +2023-10-27 19:33:55,466 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 129.988 +/- 46.276 +2023-10-27 19:33:55,468 : -------------------------------------- | loss/ | | -| approx_kl | 0.0327 | -| entropy_loss | -3.64 | -| policy_loss | -0.016 | -| value_loss | 8.25 | -| stat/ | | -| constraint_violation | 515 | -| ep_constraint_vio... | 0.6 | -| ep_length | 102 | -| ep_return | 56.7 | -| ep_reward | 0.317 | +| approx_kl | 0.03 | +| entropy_loss | -3.31 | +| policy_loss | -0.00863 | +| value_loss | 6.16 | +| stat/ | | +| constraint_violation | 508 | +| ep_constraint_vio... | 0.5 | +| ep_length | 126 | +| ep_return | 82.6 | +| ep_reward | 0.338 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 129 | -| ep_reward | 0.518 | -| mse | 213 | +| ep_return | 130 | +| ep_reward | 0.52 | +| mse | 214 | | time/ | | | progress | 0.79 | | step | 7.9e+05 | -| step_time | 8.16 | +| step_time | 11.2 | -------------------------------------- -2023-10-19 17:15:58,918 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 135.620 +/- 17.799 -2023-10-19 17:15:58,919 : +2023-10-27 19:36:08,852 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 135.004 +/- 8.824 +2023-10-27 19:36:08,853 : -------------------------------------- | loss/ | | -| approx_kl | 0.029 | -| entropy_loss | -3.69 | -| policy_loss | -0.0049 | -| value_loss | 1.31 | +| approx_kl | 0.028 | +| entropy_loss | -3.3 | +| policy_loss | -0.0153 | +| value_loss | 0.369 | | stat/ | | -| constraint_violation | 520 | +| constraint_violation | 517 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 140 | -| ep_reward | 0.559 | +| ep_return | 148 | +| ep_reward | 0.591 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 136 | -| ep_reward | 0.542 | -| mse | 305 | +| ep_return | 135 | +| ep_reward | 0.54 | +| mse | 322 | | time/ | | | progress | 0.8 | | step | 8e+05 | -| step_time | 7.96 | +| step_time | 10.7 | -------------------------------------- -2023-10-19 17:17:35,791 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 150.598 +/- 22.967 -2023-10-19 17:17:35,792 : +2023-10-27 19:38:19,826 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 148.378 +/- 19.025 +2023-10-27 19:38:19,827 : -------------------------------------- | loss/ | | -| approx_kl | 0.0382 | -| entropy_loss | -3.71 | -| policy_loss | -0.00398 | -| value_loss | 0.833 | +| approx_kl | 0.0278 | +| entropy_loss | -3.28 | +| policy_loss | -0.00562 | +| value_loss | 0.637 | | stat/ | | -| constraint_violation | 525 | +| constraint_violation | 522 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 151 | -| ep_reward | 0.603 | +| ep_return | 161 | +| ep_reward | 0.645 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 151 | -| ep_reward | 0.602 | -| mse | 206 | +| ep_return | 148 | +| ep_reward | 0.594 | +| mse | 210 | | time/ | | | progress | 0.81 | | step | 8.1e+05 | -| step_time | 8.1 | +| step_time | 10.6 | -------------------------------------- -2023-10-19 17:19:11,434 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 143.391 +/- 50.057 -2023-10-19 17:19:11,435 : +2023-10-27 19:40:30,849 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 136.926 +/- 48.306 +2023-10-27 19:40:30,850 : -------------------------------------- | loss/ | | -| approx_kl | 0.0323 | -| entropy_loss | -3.71 | -| policy_loss | -0.0123 | -| value_loss | 1.1 | +| approx_kl | 0.0286 | +| entropy_loss | -3.26 | +| policy_loss | -0.0119 | +| value_loss | 0.385 | | stat/ | | -| constraint_violation | 529 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 136 | -| ep_reward | 0.545 | +| constraint_violation | 525 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 141 | +| ep_reward | 0.565 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 143 | -| ep_reward | 0.574 | -| mse | 144 | +| ep_return | 137 | +| ep_reward | 0.548 | +| mse | 155 | | time/ | | | progress | 0.82 | | step | 8.2e+05 | -| step_time | 7.82 | +| step_time | 10.9 | -------------------------------------- -2023-10-19 17:20:47,122 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 150.744 +/- 28.900 -2023-10-19 17:20:47,123 : +2023-10-27 19:42:44,290 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 141.937 +/- 24.574 +2023-10-27 19:42:44,292 : -------------------------------------- | loss/ | | | approx_kl | 0.0339 | -| entropy_loss | -3.67 | -| policy_loss | -0.00674 | -| value_loss | 0.977 | +| entropy_loss | -3.26 | +| policy_loss | -0.00558 | +| value_loss | 1.69 | | stat/ | | -| constraint_violation | 534 | +| constraint_violation | 531 | | ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 137 | -| ep_reward | 0.548 | +| ep_length | 225 | +| ep_return | 136 | +| ep_reward | 0.545 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 151 | -| ep_reward | 0.603 | -| mse | 233 | +| ep_return | 142 | +| ep_reward | 0.568 | +| mse | 244 | | time/ | | | progress | 0.83 | | step | 8.3e+05 | -| step_time | 7.77 | +| step_time | 11 | -------------------------------------- -2023-10-19 17:22:22,763 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 143.057 +/- 23.615 -2023-10-19 17:22:22,765 : +2023-10-27 19:44:56,814 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 149.995 +/- 17.872 +2023-10-27 19:44:56,815 : -------------------------------------- | loss/ | | -| approx_kl | 0.0268 | -| entropy_loss | -3.65 | -| policy_loss | -0.0106 | -| value_loss | 1.5 | +| approx_kl | 0.0278 | +| entropy_loss | -3.32 | +| policy_loss | -0.00555 | +| value_loss | 0.824 | | stat/ | | -| constraint_violation | 543 | -| ep_constraint_vio... | 0.3 | -| ep_length | 176 | -| ep_return | 98.1 | -| ep_reward | 0.402 | +| constraint_violation | 538 | +| ep_constraint_vio... | 0.2 | +| ep_length | 200 | +| ep_return | 115 | +| ep_reward | 0.463 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 143 | -| ep_reward | 0.572 | -| mse | 232 | +| ep_return | 150 | +| ep_reward | 0.6 | +| mse | 221 | | time/ | | | progress | 0.84 | | step | 8.4e+05 | -| step_time | 7.76 | +| step_time | 10.7 | -------------------------------------- -2023-10-19 17:23:58,175 : Eval | ep_lengths 151.60 +/- 120.52 | ep_return 78.091 +/- 65.592 -2023-10-19 17:23:58,176 : +2023-10-27 19:46:59,404 : Eval | ep_lengths 126.70 +/- 123.31 | ep_return 61.187 +/- 62.857 +2023-10-27 19:46:59,405 : -------------------------------------- | loss/ | | -| approx_kl | 0.0218 | -| entropy_loss | -3.61 | -| policy_loss | -0.00188 | -| value_loss | 0.904 | +| approx_kl | 0.0309 | +| entropy_loss | -3.35 | +| policy_loss | -0.00368 | +| value_loss | 1.01 | | stat/ | | -| constraint_violation | 547 | +| constraint_violation | 544 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 120 | -| ep_reward | 0.481 | +| ep_return | 130 | +| ep_reward | 0.521 | | stat_eval/ | | -| constraint_violation | 0.4 | -| ep_length | 152 | -| ep_return | 78.1 | -| ep_reward | 0.312 | -| mse | 131 | +| constraint_violation | 0.5 | +| ep_length | 127 | +| ep_return | 61.2 | +| ep_reward | 0.245 | +| mse | 117 | | time/ | | | progress | 0.85 | | step | 8.5e+05 | -| step_time | 8.29 | +| step_time | 11 | -------------------------------------- -2023-10-19 17:25:49,788 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 132.293 +/- 35.258 -2023-10-19 17:25:49,789 : +2023-10-27 19:49:14,304 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 135.429 +/- 30.686 +2023-10-27 19:49:14,306 : -------------------------------------- | loss/ | | -| approx_kl | 0.0348 | -| entropy_loss | -3.61 | -| policy_loss | -0.0105 | -| value_loss | 1.05 | +| approx_kl | 0.0312 | +| entropy_loss | -3.31 | +| policy_loss | -0.00223 | +| value_loss | 1.75 | | stat/ | | -| constraint_violation | 558 | -| ep_constraint_vio... | 0.3 | -| ep_length | 177 | -| ep_return | 108 | -| ep_reward | 0.443 | +| constraint_violation | 555 | +| ep_constraint_vio... | 0.4 | +| ep_length | 153 | +| ep_return | 92.9 | +| ep_reward | 0.383 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 132 | -| ep_reward | 0.529 | -| mse | 365 | +| ep_return | 135 | +| ep_reward | 0.542 | +| mse | 366 | | time/ | | | progress | 0.86 | | step | 8.6e+05 | -| step_time | 8.47 | +| step_time | 10.9 | -------------------------------------- -2023-10-19 17:27:34,763 : Eval | ep_lengths 176.80 +/- 111.84 | ep_return 104.798 +/- 72.511 -2023-10-19 17:27:34,764 : +2023-10-27 19:51:19,460 : Eval | ep_lengths 176.80 +/- 111.84 | ep_return 106.214 +/- 71.185 +2023-10-27 19:51:19,462 : -------------------------------------- | loss/ | | -| approx_kl | 0.0337 | -| entropy_loss | -3.59 | -| policy_loss | 0.000182 | -| value_loss | 2.38 | +| approx_kl | 0.0368 | +| entropy_loss | -3.33 | +| policy_loss | -0.00617 | +| value_loss | 2.52 | | stat/ | | -| constraint_violation | 565 | +| constraint_violation | 560 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 135 | -| ep_reward | 0.54 | +| ep_return | 131 | +| ep_reward | 0.601 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 177 | -| ep_return | 105 | -| ep_reward | 0.419 | -| mse | 192 | +| ep_return | 106 | +| ep_reward | 0.425 | +| mse | 193 | | time/ | | | progress | 0.87 | | step | 8.7e+05 | -| step_time | 9.21 | +| step_time | 10.8 | -------------------------------------- -2023-10-19 17:29:22,325 : Eval | ep_lengths 201.60 +/- 96.82 | ep_return 118.364 +/- 61.914 -2023-10-19 17:29:22,326 : +2023-10-27 19:53:29,036 : Eval | ep_lengths 201.60 +/- 96.82 | ep_return 118.324 +/- 61.108 +2023-10-27 19:53:29,037 : -------------------------------------- | loss/ | | -| approx_kl | 0.0347 | -| entropy_loss | -3.51 | -| policy_loss | 0.000741 | -| value_loss | 1.23 | +| approx_kl | 0.0274 | +| entropy_loss | -3.39 | +| policy_loss | -0.0111 | +| value_loss | 2.29 | | stat/ | | -| constraint_violation | 569 | +| constraint_violation | 564 | | ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 131 | -| ep_reward | 0.531 | +| ep_length | 201 | +| ep_return | 123 | +| ep_reward | 0.497 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | | ep_return | 118 | | ep_reward | 0.473 | -| mse | 210 | +| mse | 211 | | time/ | | | progress | 0.88 | | step | 8.8e+05 | -| step_time | 9.05 | +| step_time | 10.8 | -------------------------------------- -2023-10-19 17:31:08,662 : Eval | ep_lengths 201.30 +/- 97.42 | ep_return 129.962 +/- 68.633 -2023-10-19 17:31:08,663 : +2023-10-27 19:55:35,934 : Eval | ep_lengths 201.30 +/- 97.42 | ep_return 126.014 +/- 65.513 +2023-10-27 19:55:35,936 : -------------------------------------- | loss/ | | -| approx_kl | 0.0374 | -| entropy_loss | -3.5 | -| policy_loss | -0.0134 | -| value_loss | 3.92 | +| approx_kl | 0.0302 | +| entropy_loss | -3.42 | +| policy_loss | -0.0123 | +| value_loss | 0.64 | | stat/ | | -| constraint_violation | 577 | -| ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 123 | -| ep_reward | 0.496 | +| constraint_violation | 570 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 155 | +| ep_reward | 0.621 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 130 | -| ep_reward | 0.52 | -| mse | 156 | +| ep_return | 126 | +| ep_reward | 0.504 | +| mse | 158 | | time/ | | | progress | 0.89 | | step | 8.9e+05 | -| step_time | 9.42 | +| step_time | 10.8 | -------------------------------------- -2023-10-19 17:32:52,501 : Eval | ep_lengths 201.00 +/- 98.02 | ep_return 106.027 +/- 57.188 -2023-10-19 17:32:52,503 : +2023-10-27 19:57:43,905 : Eval | ep_lengths 201.00 +/- 98.02 | ep_return 107.042 +/- 56.653 +2023-10-27 19:57:43,907 : -------------------------------------- | loss/ | | -| approx_kl | 0.0338 | -| entropy_loss | -3.48 | -| policy_loss | -0.00708 | -| value_loss | 0.378 | +| approx_kl | 0.0385 | +| entropy_loss | -3.33 | +| policy_loss | -0.0076 | +| value_loss | 0.631 | | stat/ | | -| constraint_violation | 579 | +| constraint_violation | 575 | | ep_constraint_vio... | 0 | | ep_length | 250 | | ep_return | 157 | -| ep_reward | 0.627 | +| ep_reward | 0.626 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 106 | -| ep_reward | 0.424 | -| mse | 246 | +| ep_return | 107 | +| ep_reward | 0.428 | +| mse | 248 | | time/ | | | progress | 0.9 | | step | 9e+05 | -| step_time | 8.49 | +| step_time | 10.9 | -------------------------------------- -2023-10-19 17:34:38,939 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 131.276 +/- 48.579 -2023-10-19 17:34:38,940 : +2023-10-27 19:59:53,177 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 130.382 +/- 47.656 +2023-10-27 19:59:53,178 : -------------------------------------- | loss/ | | -| approx_kl | 0.0595 | -| entropy_loss | -3.48 | -| policy_loss | -0.00771 | -| value_loss | 5.53 | +| approx_kl | 0.0403 | +| entropy_loss | -3.29 | +| policy_loss | -0.00934 | +| value_loss | 4.73 | | stat/ | | -| constraint_violation | 582 | +| constraint_violation | 579 | | ep_constraint_vio... | 0.1 | | ep_length | 226 | -| ep_return | 147 | -| ep_reward | 0.627 | +| ep_return | 144 | +| ep_reward | 0.617 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 227 | -| ep_return | 131 | -| ep_reward | 0.525 | -| mse | 243 | +| ep_return | 130 | +| ep_reward | 0.522 | +| mse | 252 | | time/ | | | progress | 0.91 | | step | 9.1e+05 | -| step_time | 8.97 | +| step_time | 10.6 | -------------------------------------- -2023-10-19 17:36:17,936 : Eval | ep_lengths 175.60 +/- 113.65 | ep_return 114.724 +/- 75.985 -2023-10-19 17:36:17,937 : ---------------------------------------- -| loss/ | | -| approx_kl | 0.0276 | -| entropy_loss | -3.51 | -| policy_loss | -0.000419 | -| value_loss | 0.372 | -| stat/ | | -| constraint_violation | 592 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 130 | -| ep_reward | 0.526 | -| stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 176 | -| ep_return | 115 | -| ep_reward | 0.459 | -| mse | 73.4 | -| time/ | | -| progress | 0.92 | -| step | 9.2e+05 | -| step_time | 8.25 | ---------------------------------------- - -2023-10-19 17:38:03,738 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 151.110 +/- 17.858 -2023-10-19 17:38:03,739 : +2023-10-27 20:01:56,848 : Eval | ep_lengths 175.40 +/- 113.95 | ep_return 121.393 +/- 80.151 +2023-10-27 20:01:56,850 : -------------------------------------- | loss/ | | -| approx_kl | 0.0329 | -| entropy_loss | -3.47 | -| policy_loss | -0.0103 | -| value_loss | 2.25 | +| approx_kl | 0.0323 | +| entropy_loss | -3.31 | +| policy_loss | -0.012 | +| value_loss | 0.752 | | stat/ | | -| constraint_violation | 600 | +| constraint_violation | 590 | | ep_constraint_vio... | 0.2 | | ep_length | 201 | -| ep_return | 130 | -| ep_reward | 0.586 | +| ep_return | 131 | +| ep_reward | 0.532 | +| stat_eval/ | | +| constraint_violation | 0.3 | +| ep_length | 175 | +| ep_return | 121 | +| ep_reward | 0.486 | +| mse | 65.1 | +| time/ | | +| progress | 0.92 | +| step | 9.2e+05 | +| step_time | 10.5 | +-------------------------------------- + +2023-10-27 20:04:08,274 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.871 +/- 17.891 +2023-10-27 20:04:08,275 : +-------------------------------------- +| loss/ | | +| approx_kl | 0.031 | +| entropy_loss | -3.31 | +| policy_loss | -0.00329 | +| value_loss | 2.71 | +| stat/ | | +| constraint_violation | 597 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 141 | +| ep_reward | 0.605 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 151 | -| ep_reward | 0.604 | -| mse | 203 | +| ep_return | 156 | +| ep_reward | 0.623 | +| mse | 187 | | time/ | | | progress | 0.93 | | step | 9.3e+05 | -| step_time | 8.16 | +| step_time | 10.7 | -------------------------------------- -2023-10-19 17:39:50,599 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 140.147 +/- 23.958 -2023-10-19 17:39:50,601 : +2023-10-27 20:06:20,492 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.076 +/- 25.402 +2023-10-27 20:06:20,494 : -------------------------------------- | loss/ | | -| approx_kl | 0.0359 | -| entropy_loss | -3.47 | -| policy_loss | 0.00278 | -| value_loss | 0.648 | +| approx_kl | 0.0173 | +| entropy_loss | -3.29 | +| policy_loss | -0.00712 | +| value_loss | 1.61 | | stat/ | | -| constraint_violation | 606 | +| constraint_violation | 604 | | ep_constraint_vio... | 0.2 | | ep_length | 201 | -| ep_return | 132 | -| ep_reward | 0.565 | +| ep_return | 126 | +| ep_reward | 0.538 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 140 | -| ep_reward | 0.561 | -| mse | 265 | +| ep_return | 146 | +| ep_reward | 0.584 | +| mse | 267 | | time/ | | | progress | 0.94 | | step | 9.4e+05 | -| step_time | 8.98 | +| step_time | 10.6 | -------------------------------------- -2023-10-19 17:41:34,655 : Eval | ep_lengths 227.60 +/- 67.20 | ep_return 132.610 +/- 50.082 -2023-10-19 17:41:34,656 : +2023-10-27 20:08:29,299 : Eval | ep_lengths 227.20 +/- 68.40 | ep_return 133.563 +/- 49.593 +2023-10-27 20:08:29,300 : -------------------------------------- | loss/ | | -| approx_kl | 0.0296 | -| entropy_loss | -3.49 | -| policy_loss | -0.00428 | -| value_loss | 0.551 | +| approx_kl | 0.0236 | +| entropy_loss | -3.28 | +| policy_loss | -0.0188 | +| value_loss | 0.578 | | stat/ | | -| constraint_violation | 615 | +| constraint_violation | 613 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 147 | -| ep_reward | 0.591 | +| ep_return | 149 | +| ep_reward | 0.599 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 228 | -| ep_return | 133 | -| ep_reward | 0.53 | -| mse | 273 | +| ep_length | 227 | +| ep_return | 134 | +| ep_reward | 0.534 | +| mse | 279 | | time/ | | | progress | 0.95 | | step | 9.5e+05 | -| step_time | 9.13 | +| step_time | 10.7 | -------------------------------------- -2023-10-19 17:43:17,552 : Eval | ep_lengths 225.50 +/- 73.50 | ep_return 143.759 +/- 49.568 -2023-10-19 17:43:17,553 : +2023-10-27 20:10:39,721 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.947 +/- 17.361 +2023-10-27 20:10:39,722 : +--------------------------------------- +| loss/ | | +| approx_kl | 0.0351 | +| entropy_loss | -3.31 | +| policy_loss | -0.000417 | +| value_loss | 0.648 | +| stat/ | | +| constraint_violation | 620 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 152 | +| ep_reward | 0.606 | +| stat_eval/ | | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 156 | +| ep_reward | 0.624 | +| mse | 223 | +| time/ | | +| progress | 0.96 | +| step | 9.6e+05 | +| step_time | 10.5 | +--------------------------------------- + +2023-10-27 20:12:48,107 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 117.460 +/- 61.161 +2023-10-27 20:12:48,108 : -------------------------------------- | loss/ | | -| approx_kl | 0.0293 | -| entropy_loss | -3.52 | -| policy_loss | -0.0151 | -| value_loss | 0.685 | +| approx_kl | 0.0489 | +| entropy_loss | -3.33 | +| policy_loss | -0.00257 | +| value_loss | 0.767 | | stat/ | | -| constraint_violation | 623 | +| constraint_violation | 632 | | ep_constraint_vio... | 0.2 | | ep_length | 200 | -| ep_return | 138 | -| ep_reward | 0.554 | -| stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 144 | -| ep_reward | 0.575 | -| mse | 150 | -| time/ | | -| progress | 0.96 | -| step | 9.6e+05 | -| step_time | 8.49 | --------------------------------------- - -2023-10-19 17:45:01,195 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 114.819 +/- 59.793 -2023-10-19 17:45:01,197 : --------------------------------------- -| loss/ | | -| approx_kl | 0.0362 | -| entropy_loss | -3.54 | -| policy_loss | 0.0109 | -| value_loss | 1.15 | -| stat/ | | -| constraint_violation | 633 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 136 | -| ep_reward | 0.542 | +| ep_return | 121 | +| ep_reward | 0.49 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 200 | -| ep_return | 115 | -| ep_reward | 0.459 | -| mse | 236 | +| ep_return | 117 | +| ep_reward | 0.47 | +| mse | 240 | | time/ | | | progress | 0.97 | | step | 9.7e+05 | -| step_time | 8.75 | +| step_time | 10.7 | -------------------------------------- -2023-10-19 17:46:47,061 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.957 +/- 20.587 -2023-10-19 17:46:47,062 : +2023-10-27 20:14:59,860 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 156.905 +/- 19.813 +2023-10-27 20:14:59,870 : -------------------------------------- | loss/ | | -| approx_kl | 0.03 | -| entropy_loss | -3.59 | -| policy_loss | -0.0141 | -| value_loss | 1.19 | +| approx_kl | 0.0452 | +| entropy_loss | -3.27 | +| policy_loss | -0.009 | +| value_loss | 0.727 | | stat/ | | | constraint_violation | 639 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 139 | -| ep_reward | 0.557 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 126 | +| ep_reward | 0.512 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 154 | -| ep_reward | 0.616 | -| mse | 188 | +| ep_return | 157 | +| ep_reward | 0.628 | +| mse | 194 | | time/ | | | progress | 0.98 | | step | 9.8e+05 | -| step_time | 8.83 | +| step_time | 10.5 | -------------------------------------- -2023-10-19 17:48:28,965 : Eval | ep_lengths 201.90 +/- 96.28 | ep_return 132.958 +/- 68.180 -2023-10-19 17:48:28,966 : +2023-10-27 20:17:05,477 : Eval | ep_lengths 202.40 +/- 95.24 | ep_return 129.287 +/- 66.845 +2023-10-27 20:17:05,479 : -------------------------------------- | loss/ | | -| approx_kl | 0.031 | -| entropy_loss | -3.58 | -| policy_loss | -0.00602 | -| value_loss | 1.27 | +| approx_kl | 0.0313 | +| entropy_loss | -3.3 | +| policy_loss | 0.00254 | +| value_loss | 0.536 | | stat/ | | -| constraint_violation | 646 | +| constraint_violation | 647 | | ep_constraint_vio... | 0.2 | | ep_length | 200 | -| ep_return | 134 | -| ep_reward | 0.537 | +| ep_return | 128 | +| ep_reward | 0.512 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | -| ep_return | 133 | -| ep_reward | 0.532 | -| mse | 107 | +| ep_return | 129 | +| ep_reward | 0.517 | +| mse | 116 | | time/ | | | progress | 0.99 | | step | 9.9e+05 | -| step_time | 8.72 | +| step_time | 10.6 | -------------------------------------- -2023-10-19 17:49:55,946 : Checkpoint | ./models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/model_latest.pt -2023-10-19 17:50:15,302 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 139.788 +/- 27.611 -2023-10-19 17:50:15,303 : +2023-10-27 20:18:52,844 : Checkpoint | ./models/rl_models/quadrotor_3D/track/ppo/mpsf_es_pen/model_latest.pt +2023-10-27 20:19:16,696 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 145.626 +/- 27.961 +2023-10-27 20:19:16,697 : -------------------------------------- | loss/ | | -| approx_kl | 0.0306 | -| entropy_loss | -3.59 | -| policy_loss | -0.0119 | -| value_loss | 0.54 | +| approx_kl | 0.0285 | +| entropy_loss | -3.26 | +| policy_loss | -0.00234 | +| value_loss | 0.26 | | stat/ | | -| constraint_violation | 653 | +| constraint_violation | 654 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 132 | -| ep_reward | 0.533 | +| ep_return | 140 | +| ep_reward | 0.563 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 140 | -| ep_reward | 0.559 | -| mse | 360 | +| ep_return | 146 | +| ep_reward | 0.583 | +| mse | 361 | | time/ | | | progress | 1 | | step | 1e+06 | -| step_time | 8.98 | +| step_time | 10.6 | -------------------------------------- diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/loss/approx_kl.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/loss/approx_kl.log index 2b8f45d21..08aab8105 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/loss/approx_kl.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/loss/approx_kl.log @@ -1,101 +1,101 @@ step,loss/approx_kl -10000,0.022111140238121153 -20000,0.021352816761160888 -30000,0.018239149272752306 -40000,0.0284473275532946 -50000,0.031202962513392173 -60000,0.02144558693592747 -70000,0.01857717726379633 -80000,0.02271353603961567 -90000,0.03237317161789785 -100000,0.02971522058360278 -110000,0.033160094637423754 -120000,0.0355887106154114 -130000,0.026909683931929367 -140000,0.02132376537192613 -150000,0.022457935474812986 -160000,0.03609378449618816 -170000,0.03637282357861599 -180000,0.029215570865198964 -190000,0.01849697580716262 -200000,0.02579205827787518 -210000,0.0294672899801905 -220000,0.029712171976765 -230000,0.023108071849370998 -240000,0.029823576038082437 -250000,0.028218755498528474 -260000,0.019354521444377798 -270000,0.02203811181243509 -280000,0.0330073781699563 -290000,0.021187828341498967 -300000,0.02541885397707423 -310000,0.028791844177370268 -320000,0.0331593383103609 -330000,0.02272137852075199 -340000,0.02686781199493756 -350000,0.03963112962276985 -360000,0.024418458606426916 -370000,0.016434435406699775 -380000,0.030494655963654315 -390000,0.020348185642311972 -400000,0.034134119089382395 -410000,0.029019819339737296 -420000,0.03434102494890492 -430000,0.03194195199757815 -440000,0.027368854164766764 -450000,0.02640575885307044 -460000,0.036720154085196556 -470000,0.03280804537547131 -480000,0.027506394265219565 -490000,0.04174288792225221 -500000,0.0326923569664359 -510000,0.026557868851038318 -520000,0.02178068261127919 -530000,0.034011460219820336 -540000,0.02890150291689982 -550000,0.02669331409657995 -560000,0.03428658684715628 -570000,0.0285546770086512 -580000,0.027289175467255213 -590000,0.03236437547796716 -600000,0.03300062701261292 -610000,0.033397434977814555 -620000,0.027736908096509676 -630000,0.03256666293212523 -640000,0.021122537677486737 -650000,0.033670849873063464 -660000,0.02992955462541431 -670000,0.030612428765743976 -680000,0.025299157279854018 -690000,0.03091023807258656 -700000,0.02764814984208594 -710000,0.04531647422506164 -720000,0.02805203595974793 -730000,0.026409906218759716 -740000,0.040761618564526245 -750000,0.038415312076297906 -760000,0.036233939983261125 -770000,0.02379729984483371 -780000,0.04003918688589086 -790000,0.032318663271144035 -800000,0.03147669137300303 -810000,0.0230525688889126 -820000,0.03745051059716691 -830000,0.028596775528664388 -840000,0.032430003963721296 -850000,0.030612790840677917 -860000,0.034538034613554675 -870000,0.028075189267595613 -880000,0.02211087813290457 -890000,0.026641335012391215 -900000,0.03160403780639171 -910000,0.034469272235097985 -920000,0.03895326708443463 -930000,0.02065722729700307 -940000,0.03285018130360792 -950000,0.030561927643915014 -960000,0.018973070204568405 -970000,0.030426943853187068 -980000,0.024410002220732467 -990000,0.029767705818327765 -1000000,0.03983897011882315 +10000,0.025233088200911878 +20000,0.017477691483994327 +30000,0.020538179002081353 +40000,0.026337245106697084 +50000,0.028332312218844892 +60000,0.026588529837317763 +70000,0.03665355336852372 +80000,0.024995728426923356 +90000,0.02757378219782064 +100000,0.011599436844699083 +110000,0.022265824295269947 +120000,0.026048327214084566 +130000,0.032453587061415115 +140000,0.025993073770465946 +150000,0.03386882003396749 +160000,0.02465813043527305 +170000,0.036011485910664 +180000,0.02941639523487538 +190000,0.030457654548808933 +200000,0.029816157300956547 +210000,0.029444851858230932 +220000,0.03367573961149901 +230000,0.033627011463977396 +240000,0.03625142409000546 +250000,0.03225162726206084 +260000,0.046129271628645566 +270000,0.019153018932168683 +280000,0.026056569628417493 +290000,0.033370547283751266 +300000,0.03138967594131828 +310000,0.01574189238405476 +320000,0.034811871824786074 +330000,0.024636297176281616 +340000,0.022761084876644112 +350000,0.025671476397352917 +360000,0.03164259974534313 +370000,0.03229851260160406 +380000,0.03543443912640214 +390000,0.0247185018301631 +400000,0.0322835404270639 +410000,0.026433330588042737 +420000,0.024350707729657493 +430000,0.023919344359698398 +440000,0.013844692722583813 +450000,0.02438100401001672 +460000,0.03268510537066808 +470000,0.027905481851970153 +480000,0.030579091624046366 +490000,0.026584137479464216 +500000,0.02882119334147622 +510000,0.027067194522048033 +520000,0.030936324115221698 +530000,0.027304015262052415 +540000,0.02458855375492324 +550000,0.03817453633528203 +560000,0.02146430719488611 +570000,0.02591278710557769 +580000,0.023408724390901626 +590000,0.028492597529354197 +600000,0.032109812189204 +610000,0.028953754568162064 +620000,0.03559901105084766 +630000,0.024350862647406754 +640000,0.02293625953607261 +650000,0.02938895971359064 +660000,0.035260337358340624 +670000,0.027553137360761565 +680000,0.034466817881911985 +690000,0.03505338900722563 +700000,0.03905451351311058 +710000,0.01362552021940549 +720000,0.03511505380738526 +730000,0.03319760339800269 +740000,0.030902732225755857 +750000,0.024226302583701904 +760000,0.03260721541785945 +770000,0.02827780882362277 +780000,0.030728471310188365 +790000,0.0296471124359717 +800000,0.03287485922531535 +810000,0.026696134024920565 +820000,0.024973949270012476 +830000,0.025931365663806598 +840000,0.015674264216795564 +850000,0.02692121989869823 +860000,0.016227262661171457 +870000,0.01785585572943091 +880000,0.02461829735742261 +890000,0.028670004142137863 +900000,0.024459567363373937 +910000,0.02558895707285653 +920000,0.04474400416171799 +930000,0.02663019202494372 +940000,0.015334192803129556 +950000,0.025520622342204053 +960000,0.024101797949212293 +970000,0.028311181006332238 +980000,0.03456108044677724 +990000,0.03446267808321864 +1000000,0.02875643534741054 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/loss/entropy_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/loss/entropy_loss.log index c164b8033..395c7f7d8 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/loss/entropy_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/loss/entropy_loss.log @@ -1,101 +1,101 @@ step,loss/entropy_loss -10000,-3.678169790903728 -20000,-3.6013393719991043 -30000,-3.617695025602977 -40000,-3.6317789038022354 -50000,-3.5656736016273505 -60000,-3.558868829409281 -70000,-3.5707393407821657 -80000,-3.5925465305646265 -90000,-3.5298052787780763 -100000,-3.5282770474751795 -110000,-3.5494400421778365 -120000,-3.567583938439687 -130000,-3.5667113582293197 -140000,-3.5367508610089615 -150000,-3.5000190893809004 -160000,-3.4995872735977174 -170000,-3.505813491344452 -180000,-3.5000337044397996 -190000,-3.559721660614014 -200000,-3.533534034093221 -210000,-3.531258090337117 -220000,-3.4850338459014893 -230000,-3.493517311414083 -240000,-3.5097062031428017 -250000,-3.4912219961484277 -260000,-3.4991601069768272 -270000,-3.437724939982096 -280000,-3.4329693357149766 -290000,-3.4267562905947364 -300000,-3.4044104297955826 -310000,-3.356200337409973 -320000,-3.360111172993977 -330000,-3.395894821484884 -340000,-3.3883176485697426 -350000,-3.3980820377667746 -360000,-3.448195958137512 -370000,-3.41623694896698 -380000,-3.412528971831004 -390000,-3.4145924290021257 -400000,-3.428766556580861 -410000,-3.4590307871500654 -420000,-3.4834909439086923 -430000,-3.512020591894786 -440000,-3.4813439925511673 -450000,-3.5230077385902403 -460000,-3.559783597787221 -470000,-3.5504067659378054 -480000,-3.518723205725352 -490000,-3.532335118452708 -500000,-3.4888505578041076 -510000,-3.5151932160059607 -520000,-3.5124811450640356 -530000,-3.5722813049952196 -540000,-3.5594702879587814 -550000,-3.5460929075876875 -560000,-3.5629384795824692 -570000,-3.6059549013773604 -580000,-3.597211261590322 -590000,-3.6040411233901977 -600000,-3.6069986740748083 -610000,-3.5325455029805513 -620000,-3.5197139064470933 -630000,-3.454592696825663 -640000,-3.505715036392212 -650000,-3.5382988015810652 -660000,-3.5216501116752625 -670000,-3.5466074347496033 -680000,-3.583674681186676 -690000,-3.5643441438674928 -700000,-3.5398754358291624 -710000,-3.4699850638707472 -720000,-3.5094894925753275 -730000,-3.554455840587616 -740000,-3.5282819271087646 -750000,-3.502730977535248 -760000,-3.4908465822537735 -770000,-3.5372721552848816 -780000,-3.5102538188298547 -790000,-3.4972264091173804 -800000,-3.4479867935180666 -810000,-3.441772369543711 -820000,-3.3910104036331177 -830000,-3.330093630154928 -840000,-3.3185526609420775 -850000,-3.3455040733019508 -860000,-3.3655091285705567 -870000,-3.358830487728119 -880000,-3.3378323674201966 -890000,-3.40673261086146 -900000,-3.3948200027147926 -910000,-3.407046635945638 -920000,-3.423867571353912 -930000,-3.4097641825675966 -940000,-3.433763790130615 -950000,-3.417387270927429 -960000,-3.489895745118459 -970000,-3.4612897237141924 -980000,-3.4591241995493567 -990000,-3.4612253308296204 -1000000,-3.452151429653168 +10000,-3.670504828294118 +20000,-3.651647651195526 +30000,-3.6561014890670775 +40000,-3.5874200423558547 +50000,-3.589934047063191 +60000,-3.631274882952373 +70000,-3.5623010317484542 +80000,-3.5190330862998964 +90000,-3.533758779366811 +100000,-3.58474854628245 +110000,-3.518812211354574 +120000,-3.51155796845754 +130000,-3.524828815460205 +140000,-3.46921585003535 +150000,-3.4743420839309693 +160000,-3.49769184589386 +170000,-3.530289057890575 +180000,-3.520803674062093 +190000,-3.530750179290771 +200000,-3.545178051789601 +210000,-3.5603865146636964 +220000,-3.613964931170146 +230000,-3.647669645150503 +240000,-3.608467423915863 +250000,-3.598844655354818 +260000,-3.6007893800735475 +270000,-3.671857233842214 +280000,-3.6736552158991493 +290000,-3.6905321160952256 +300000,-3.6903343160947157 +310000,-3.658932495117187 +320000,-3.66512664159139 +330000,-3.686815198262532 +340000,-3.643665150801341 +350000,-3.5883996367454527 +360000,-3.5931111017862953 +370000,-3.640175004800161 +380000,-3.5910560687383017 +390000,-3.6251839836438497 +400000,-3.6231460094451906 +410000,-3.6135252356529235 +420000,-3.5503224333127337 +430000,-3.551543629169464 +440000,-3.587146727244059 +450000,-3.5770240823427835 +460000,-3.571390465895335 +470000,-3.5942684729894006 +480000,-3.5834606687227883 +490000,-3.578536641597748 +500000,-3.578882543245952 +510000,-3.5697313388188676 +520000,-3.589774350325267 +530000,-3.5756733576456705 +540000,-3.532288428147633 +550000,-3.496242809295654 +560000,-3.4610376199086503 +570000,-3.4378212094306946 +580000,-3.4422893603642777 +590000,-3.422907384236653 +600000,-3.4460418581962586 +610000,-3.457165662447612 +620000,-3.4778427163759873 +630000,-3.472316288948059 +640000,-3.4695672392845154 +650000,-3.405321729183197 +660000,-3.386856087048849 +670000,-3.3568331360816956 +680000,-3.364880124727885 +690000,-3.3392853697141005 +700000,-3.386869355042775 +710000,-3.4036250154177354 +720000,-3.454187639554341 +730000,-3.4713346163431806 +740000,-3.478751742839813 +750000,-3.438249393304189 +760000,-3.4364365577697753 +770000,-3.4463329553604125 +780000,-3.505610020955404 +790000,-3.496603616078695 +800000,-3.4785329818725588 +810000,-3.4515693942705794 +820000,-3.423990086714427 +830000,-3.4144377867380777 +840000,-3.4271103302637735 +850000,-3.5051892201105757 +860000,-3.5034650683403017 +870000,-3.4633739074071244 +880000,-3.4753178159395857 +890000,-3.509218394756317 +900000,-3.527465172608694 +910000,-3.554194251696269 +920000,-3.5650903224945067 +930000,-3.572671433289846 +940000,-3.563181153933207 +950000,-3.561006168524424 +960000,-3.4963494300842286 +970000,-3.5081828236579895 +980000,-3.469441433747609 +990000,-3.4192839900652574 +1000000,-3.4419839064280198 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/loss/policy_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/loss/policy_loss.log index 71b2979d3..683945134 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/loss/policy_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/loss/policy_loss.log @@ -1,101 +1,101 @@ step,loss/policy_loss -10000,-0.014436700897405735 -20000,-0.013839967260110797 -30000,-0.015121299142113007 -40000,-0.012485426947934463 -50000,-0.0010613418154504215 -60000,-0.013279136379728015 -70000,-0.009547456880833135 -80000,-0.0116014649466489 -90000,-0.016378268803262923 -100000,-0.013917441030584185 -110000,-0.019614670207369282 -120000,-0.011006322575453722 -130000,-0.01064250528594671 -140000,-0.0090201309366868 -150000,-0.0036871685874744985 -160000,-0.0064323278586986415 -170000,-0.01866572419345349 -180000,-0.011692953182808302 -190000,-0.012977910541339854 -200000,-0.01034126915255902 -210000,-0.009445017436688977 -220000,-0.023139673784264632 -230000,-0.010932114165170675 -240000,-0.005284416882953777 -250000,-0.01063820203157963 -260000,-0.019385434455947093 -270000,-0.016495079878543865 -280000,-0.0022891346429444546 -290000,-0.006149701140173254 -300000,0.0015765395271273598 -310000,-0.005362113433945669 -320000,-0.014204787762848153 -330000,-0.011977367036593899 -340000,-0.01420070252654516 -350000,-0.010040157722826407 -360000,-0.012042974632378972 -370000,-0.010428409851320148 -380000,-0.007765075304882109 -390000,-0.013798615729337785 -400000,-0.0024469914680401708 -410000,-0.008094480979932813 -420000,-0.009298534698135541 -430000,0.003732976465190692 -440000,-0.010529370211506153 -450000,-0.013749249101334012 -460000,-0.007655467844062308 -470000,-0.0032141283563058695 -480000,-0.00733897387068209 -490000,-0.008452062472471603 -500000,-0.002497095900118208 -510000,-0.012461098548741192 -520000,-0.012169256140736457 -530000,-0.020511755702855898 -540000,-0.014362382290902465 -550000,-0.01128384197650649 -560000,-0.00766117354334841 -570000,-0.0012636131343047978 -580000,-0.005142752938521328 -590000,-0.01762662089162667 -600000,-0.004376424806152255 -610000,-0.010682814214218465 -620000,-0.011056838387374653 -630000,-0.011451842919562609 -640000,-0.017026428882042942 -650000,-0.01658900016856339 -660000,-0.008894324136023613 -670000,-0.010294504792458383 -680000,-0.007780909483835646 -690000,-0.008420343291940535 -700000,-0.014846208676114822 -710000,0.0051397764956822205 -720000,0.0025283338960431297 -730000,-0.011094745012081792 -740000,0.0019891091335373515 -750000,0.0011048855974666659 -760000,-0.015484726874330192 -770000,-0.01189349928419427 -780000,0.012780163794148234 -790000,-0.02474331378034581 -800000,-0.01404185962202914 -810000,-0.005641497700352189 -820000,-0.012475574909380628 -830000,-0.005628232249181635 -840000,-0.008881653901346234 -850000,-0.01010225659429884 -860000,-0.004911212243050572 -870000,-0.008224499520870416 -880000,-0.00974390906663999 -890000,-0.010393152747642295 -900000,-0.005478909048679552 -910000,-0.011851643231034326 -920000,-0.008475061465348065 -930000,-0.010050749572538225 -940000,-0.0025034639822568844 -950000,-0.008182305798057352 -960000,-0.019042006155135457 -970000,-0.0039167662379618415 -980000,-0.010377918198882834 -990000,-0.009735770800090564 -1000000,-0.017301949931856332 +10000,-0.009277789995798733 +20000,-0.014435441189699772 +30000,-0.006331017263256332 +40000,-0.015405060247438379 +50000,-0.014661252720981519 +60000,-0.0074621169064097975 +70000,-0.006452861770177676 +80000,-0.006228519073085297 +90000,-0.019398747725036868 +100000,-0.02101784660251777 +110000,-0.014924152583428635 +120000,-0.006746372913002517 +130000,-0.01987173735733799 +140000,-0.012053041713564326 +150000,-0.0063044906554596035 +160000,-0.01831254667688472 +170000,-0.01122753802478155 +180000,-0.015611692136164968 +190000,-0.01653450580129328 +200000,-0.00738967595457896 +210000,-0.009936932248792989 +220000,-0.01156664882124589 +230000,-0.014251967232827418 +240000,-0.0028937748436213334 +250000,-0.013228257247053713 +260000,-0.009992988281134497 +270000,-0.0034555359192574692 +280000,-0.0031990540007185674 +290000,0.003542611362886864 +300000,-0.009623759194042474 +310000,-0.011467024156737317 +320000,-0.010572380506572252 +330000,-0.009362078236529028 +340000,-0.005864506251836163 +350000,-0.010532457086866839 +360000,-0.011384354644553459 +370000,-0.014013832578852779 +380000,-0.010132556374708546 +390000,-0.0034646618230639303 +400000,-0.012497509080664613 +410000,-0.005385646481556322 +420000,-0.01542347809003421 +430000,-0.01564026258599389 +440000,-0.008406680359419109 +450000,-0.01995073075765063 +460000,-0.012364461571094752 +470000,-0.0022313944766114632 +480000,-0.00268358979285256 +490000,-0.0020992975000419145 +500000,-0.012111506603249072 +510000,-0.021643262465387732 +520000,-0.01811747911989048 +530000,-0.008773069883192432 +540000,-0.014314669450925669 +550000,-0.00905292422709332 +560000,-0.008891177697040672 +570000,-0.009835822226258376 +580000,-0.009443122940014776 +590000,-0.019222982318084166 +600000,-0.01266609816285751 +610000,-0.011550843335300421 +620000,-0.010670524753062658 +630000,-0.013870301512537028 +640000,-0.00237853006501344 +650000,-0.000958172611305505 +660000,-0.006507182275586354 +670000,-0.014252504782254738 +680000,-0.007564755215805385 +690000,-0.016210391469466214 +700000,-0.019713915858872014 +710000,-0.013680719552657756 +720000,-0.0037593422567755394 +730000,-0.011244987165452969 +740000,-0.008339870197450855 +750000,-0.01862721112768467 +760000,-0.008029187313225166 +770000,-0.014188717733301288 +780000,0.001010357497501157 +790000,-0.009700372726516967 +800000,-0.013214360745229015 +810000,-0.009890502335294133 +820000,-0.0061419519126953825 +830000,-0.018213635763721293 +840000,-0.02284718695521739 +850000,-0.006066770873490454 +860000,-0.020395603421482063 +870000,-0.011618423300362472 +880000,-0.007631963519513116 +890000,-0.013068312502533022 +900000,-0.011786821805247129 +910000,-0.005146704581092073 +920000,-0.022613247433888314 +930000,-0.011835389486896858 +940000,-0.013435318277547601 +950000,-0.014021486292890142 +960000,-0.013949243915941151 +970000,-0.012509181667099401 +980000,0.017957856715561236 +990000,-0.013861897021895785 +1000000,-0.004531638136511441 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/loss/value_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/loss/value_loss.log index fc1ec1cb0..fddf64615 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/loss/value_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/loss/value_loss.log @@ -1,101 +1,101 @@ step,loss/value_loss -10000,7.491164324037664 -20000,4.329737268412914 -30000,3.963941475131417 -40000,5.548176824497641 -50000,5.116502395782742 -60000,4.929484045926976 -70000,4.378561614084292 -80000,3.0046953875987574 -90000,1.8614377750410973 -100000,2.991357879455429 -110000,2.946498953330627 -120000,1.7780928257304136 -130000,1.2349565134115115 -140000,3.567097967737019 -150000,1.3768993545018202 -160000,1.4124261910948754 -170000,1.2214881858820763 -180000,1.3634862438675337 -190000,1.5314653469273802 -200000,0.5014180676353774 -210000,1.1797370369213447 -220000,0.6245265632546128 -230000,0.8901807647384226 -240000,1.1647710184193993 -250000,0.44109665045561164 -260000,0.6310314719778237 -270000,0.5787805385155153 -280000,1.0071631146282207 -290000,1.0248460082689137 -300000,0.7858281404947542 -310000,0.6207438370639928 -320000,1.6894893322130713 -330000,1.6400827517685108 -340000,0.4616209587718294 -350000,2.1488786013024543 -360000,0.7244450435868006 -370000,0.8376909494065019 -380000,0.42251809239033256 -390000,0.21566184752670794 -400000,0.7644243709013145 -410000,1.112857169276143 -420000,0.6391389988262446 -430000,0.8069630817311712 -440000,0.5205151673862336 -450000,0.634749760314435 -460000,0.5507197735210163 -470000,1.8763413153461006 -480000,1.7323471206702066 -490000,1.9739052077821948 -500000,1.3113840136321784 -510000,0.6174006662760866 -520000,0.5105709755156888 -530000,0.7487252865484276 -540000,0.6422486492358876 -550000,0.35482045740926504 -560000,0.5149289679791782 -570000,0.44255135050830174 -580000,0.3718249516658724 -590000,0.8291720916678876 -600000,3.651353125297201 -610000,1.6608059377020994 -620000,0.8289893136154992 -630000,2.3774544271534954 -640000,0.4010369020231024 -650000,1.4117340353274277 -660000,0.5467516960780385 -670000,0.9190991736503191 -680000,0.21642505264009015 -690000,0.3801737001718837 -700000,0.35776150875163215 -710000,1.998256328450314 -720000,0.5296359483277892 -730000,0.6468189495189809 -740000,0.8184579703649826 -750000,0.3224037367773255 -760000,0.6027462778821714 -770000,0.7629590565521153 -780000,0.807125523447407 -790000,0.72719516644999 -800000,0.47704712515072945 -810000,0.993343527618143 -820000,1.626302771464252 -830000,1.076831947674269 -840000,0.8800174857779266 -850000,1.5169478171688873 -860000,0.3641472006510326 -870000,0.6632936954992947 -880000,0.39951652285383044 -890000,0.7252182707744178 -900000,0.36592951764332 -910000,0.9611517248882601 -920000,1.2650873285229598 -930000,0.35707438689845356 -940000,0.5321763370985686 -950000,0.7786624726215692 -960000,0.39372607910796087 -970000,0.4697300915356587 -980000,0.7471599561340996 -990000,1.013851671485476 -1000000,0.4997249547339078 +10000,10.291884093751836 +20000,1.2821917558045728 +30000,2.6463024280812513 +40000,6.005786612857574 +50000,4.814746828719044 +60000,5.225317862438109 +70000,7.66257421430539 +80000,5.357541944009259 +90000,3.6590684600310475 +100000,1.7697047748311736 +110000,5.16812866082917 +120000,1.21223426460709 +130000,3.3410036015277433 +140000,1.422713881988043 +150000,1.593898099889957 +160000,1.4204410328045864 +170000,1.5372445563463033 +180000,1.2602570571285672 +190000,0.5997138102157323 +200000,0.7265609294719155 +210000,1.545217608022432 +220000,1.538130775075356 +230000,0.5039557292430521 +240000,0.7013885145584208 +250000,0.5934697888173934 +260000,1.5350142435362413 +270000,0.415316288466804 +280000,1.1998446836584569 +290000,0.9157072856683082 +300000,1.0746467061707983 +310000,3.2415082654041263 +320000,1.4069791493144062 +330000,0.5166551402118109 +340000,0.4470558494304253 +350000,0.8008390925256869 +360000,0.6917669153633864 +370000,0.8907272299962026 +380000,0.5268690261866169 +390000,0.6033610661007927 +400000,2.056680858719852 +410000,3.4715509092847325 +420000,1.0822341964019855 +430000,0.7015712541416527 +440000,0.6385163618442079 +450000,0.5139056598122973 +460000,2.5151904403037166 +470000,0.5707997334015792 +480000,0.47174872920877575 +490000,1.0833559448421628 +500000,1.046064981185231 +510000,1.404967733771105 +520000,1.010403748656791 +530000,0.8059682795129437 +540000,1.0924552544498487 +550000,0.7976334743291071 +560000,0.37522465059735416 +570000,0.656122184274 +580000,0.6112113478697351 +590000,0.9700016662967608 +600000,3.130692216512746 +610000,0.5425168066863759 +620000,0.5462181185414122 +630000,0.5832960440159506 +640000,1.0952746096196861 +650000,1.2105828219004333 +660000,0.8358686121864313 +670000,1.365137358434207 +680000,0.521144136471903 +690000,0.5608174436318352 +700000,0.9309848981488861 +710000,0.6168158859125542 +720000,0.8269013572150058 +730000,0.5896273479608704 +740000,0.8630197959073109 +750000,0.48331437880032946 +760000,0.99262801942858 +770000,0.7347454991716227 +780000,0.5092325536493597 +790000,1.4318358054576121 +800000,1.093562217121104 +810000,2.4347283848365047 +820000,1.6075457397779886 +830000,1.104957129702654 +840000,0.677782855886304 +850000,1.0123563946456295 +860000,1.1540029038391348 +870000,0.6971970843849831 +880000,1.2334851157552496 +890000,0.9343585409969153 +900000,1.1430487705789645 +910000,0.8281345356677298 +920000,0.8859147802126754 +930000,1.3437793921720471 +940000,1.5113980758839562 +950000,0.440632303694675 +960000,0.9840680643690632 +970000,0.5435959145344237 +980000,1.4429508278145546 +990000,1.8738489014742599 +1000000,1.6506440957453414 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/constraint_violation.log index dd10705a2..13c3e7142 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/constraint_violation.log @@ -1,101 +1,101 @@ step,stat/constraint_violation -10000,35 -20000,46 -30000,124 -40000,132 -50000,166 -60000,189 -70000,226 -80000,230 -90000,243 -100000,270 -110000,300 -120000,312 -130000,332 -140000,370 -150000,382 -160000,418 -170000,457 -180000,483 -190000,504 -200000,538 -210000,559 -220000,589 -230000,607 -240000,627 -250000,644 -260000,666 -270000,704 -280000,731 -290000,740 -300000,786 -310000,803 -320000,824 -330000,872 -340000,884 -350000,887 -360000,948 -370000,988 -380000,1007 -390000,1042 -400000,1070 -410000,1098 -420000,1108 -430000,1121 -440000,1123 -450000,1153 -460000,1186 -470000,1230 -480000,1262 -490000,1282 -500000,1293 -510000,1312 -520000,1353 -530000,1374 -540000,1417 -550000,1452 -560000,1460 -570000,1472 -580000,1507 -590000,1508 -600000,1548 -610000,1575 -620000,1577 -630000,1597 -640000,1613 -650000,1646 -660000,1651 -670000,1667 -680000,1709 -690000,1749 -700000,1770 -710000,1776 -720000,1810 -730000,1834 -740000,1854 -750000,1896 -760000,1905 -770000,1926 -780000,1971 -790000,1993 -800000,2008 -810000,2011 -820000,2042 -830000,2067 -840000,2104 -850000,2132 -860000,2164 -870000,2200 -880000,2209 -890000,2225 -900000,2249 -910000,2276 -920000,2288 -930000,2307 -940000,2333 -950000,2354 -960000,2389 -970000,2396 -980000,2412 -990000,2427 -1000000,2479 +10000,39 +20000,53 +30000,134 +40000,140 +50000,185 +60000,215 +70000,240 +80000,244 +90000,256 +100000,283 +110000,315 +120000,323 +130000,343 +140000,383 +150000,407 +160000,430 +170000,472 +180000,494 +190000,517 +200000,548 +210000,569 +220000,595 +230000,614 +240000,641 +250000,658 +260000,675 +270000,720 +280000,747 +290000,757 +300000,800 +310000,817 +320000,841 +330000,890 +340000,902 +350000,905 +360000,962 +370000,1004 +380000,1023 +390000,1059 +400000,1088 +410000,1116 +420000,1122 +430000,1137 +440000,1139 +450000,1170 +460000,1201 +470000,1241 +480000,1274 +490000,1292 +500000,1301 +510000,1331 +520000,1361 +530000,1383 +540000,1424 +550000,1458 +560000,1461 +570000,1482 +580000,1509 +590000,1510 +600000,1551 +610000,1576 +620000,1581 +630000,1596 +640000,1610 +650000,1644 +660000,1646 +670000,1677 +680000,1698 +690000,1734 +700000,1755 +710000,1760 +720000,1811 +730000,1820 +740000,1850 +750000,1887 +760000,1907 +770000,1959 +780000,1975 +790000,2004 +800000,2018 +810000,2022 +820000,2057 +830000,2081 +840000,2107 +850000,2139 +860000,2201 +870000,2220 +880000,2222 +890000,2238 +900000,2266 +910000,2292 +920000,2306 +930000,2347 +940000,2350 +950000,2383 +960000,2402 +970000,2408 +980000,2440 +990000,2450 +1000000,2496 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/ep_constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/ep_constraint_violation.log index 4f6c8313c..ca2dfdeba 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/ep_constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/ep_constraint_violation.log @@ -1,101 +1,101 @@ step,stat/ep_constraint_violation -10000,1.3 -20000,0.2 -30000,1.1 +10000,1.0 +20000,0.3 +30000,1.3 40000,0.1 -50000,1.1 -60000,1.3 +50000,2.2 +60000,2.4 70000,0.0 80000,0.2 -90000,0.1 +90000,0.0 100000,0.0 -110000,0.7 -120000,0.1 -130000,0.2 -140000,1.3 -150000,0.1 +110000,0.8 +120000,0.0 +130000,0.1 +140000,0.1 +150000,1.3 160000,0.2 -170000,1.9 +170000,2.2 180000,1.5 190000,0.1 200000,1.5 -210000,1.5 -220000,0.9 +210000,0.1 +220000,0.4 230000,0.0 240000,0.0 -250000,0.1 +250000,0.2 260000,0.0 -270000,0.6 +270000,1.3 280000,0.0 290000,0.0 -300000,2.4 -310000,0.0 -320000,1.1 -330000,1.2 -340000,0.7 +300000,2.3 +310000,0.1 +320000,1.3 +330000,0.1 +340000,0.6 350000,0.1 -360000,1.1 -370000,0.5 +360000,0.1 +370000,0.3 380000,0.0 -390000,1.5 -400000,0.3 -410000,1.8 +390000,0.1 +400000,0.4 +410000,0.2 420000,0.0 430000,0.0 440000,0.0 -450000,2.7 -460000,0.8 +450000,2.6 +460000,0.9 470000,1.4 -480000,1.5 +480000,0.0 490000,0.1 500000,0.0 -510000,1.6 +510000,2.8 520000,0.7 530000,0.8 -540000,0.5 -550000,0.9 -560000,0.6 -570000,0.0 +540000,0.0 +550000,0.0 +560000,0.0 +570000,0.9 580000,0.0 590000,0.0 -600000,2.1 -610000,0.3 -620000,0.0 -630000,0.3 -640000,0.0 -650000,2.3 +600000,2.6 +610000,0.1 +620000,0.3 +630000,0.2 +640000,0.1 +650000,0.9 660000,0.0 -670000,0.6 +670000,1.9 680000,0.0 690000,0.1 700000,0.1 -710000,0.4 -720000,0.0 +710000,0.3 +720000,1.4 730000,0.2 -740000,0.8 -750000,1.8 -760000,0.0 -770000,0.9 -780000,0.1 -790000,0.0 -800000,0.0 -810000,0.0 -820000,1.4 -830000,0.0 +740000,1.8 +750000,0.2 +760000,0.2 +770000,3.1 +780000,0.0 +790000,0.1 +800000,0.2 +810000,0.2 +820000,0.7 +830000,0.7 840000,0.0 -850000,1.0 -860000,0.0 -870000,0.0 -880000,0.0 -890000,0.2 -900000,0.0 +850000,0.1 +860000,2.3 +870000,0.1 +880000,0.1 +890000,0.1 +900000,0.2 910000,0.1 -920000,0.2 -930000,0.1 +920000,0.3 +930000,2.4 940000,0.1 -950000,0.1 -960000,1.1 -970000,0.4 -980000,0.2 -990000,0.1 -1000000,0.7 +950000,1.5 +960000,0.1 +970000,0.1 +980000,1.1 +990000,0.6 +1000000,0.1 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/ep_length.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/ep_length.log index c533e536e..4602597af 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/ep_length.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/ep_length.log @@ -1,9 +1,9 @@ step,stat/ep_length 10000,225.9 -20000,225.5 +20000,201.1 30000,250.0 40000,225.2 -50000,225.9 +50000,202.9 60000,226.9 70000,250.0 80000,226.0 @@ -13,13 +13,13 @@ step,stat/ep_length 120000,250.0 130000,225.1 140000,250.0 -150000,225.6 +150000,226.3 160000,200.3 -170000,176.4 +170000,201.5 180000,250.0 190000,225.4 200000,250.0 -210000,226.4 +210000,250.0 220000,250.0 230000,250.0 240000,250.0 @@ -28,23 +28,23 @@ step,stat/ep_length 270000,250.0 280000,250.0 290000,250.0 -300000,225.9 -310000,250.0 -320000,227.4 -330000,250.0 +300000,250.0 +310000,225.6 +320000,227.3 +330000,227.5 340000,250.0 350000,225.8 360000,225.9 370000,225.2 380000,250.0 -390000,225.6 -400000,250.0 -410000,250.0 +390000,250.0 +400000,225.8 +410000,203.2 420000,250.0 430000,250.0 440000,250.0 450000,203.8 -460000,225.3 +460000,201.9 470000,200.5 480000,250.0 490000,227.0 @@ -54,48 +54,48 @@ step,stat/ep_length 530000,250.0 540000,250.0 550000,250.0 -560000,225.2 +560000,250.0 570000,250.0 580000,250.0 590000,250.0 -600000,226.8 -610000,176.4 -620000,250.0 -630000,177.7 +600000,153.8 +610000,225.7 +620000,225.3 +630000,201.8 640000,250.0 -650000,177.4 +650000,177.0 660000,250.0 -670000,250.0 +670000,227.8 680000,250.0 -690000,250.0 +690000,226.4 700000,225.6 -710000,200.9 +710000,201.0 720000,250.0 730000,200.6 -740000,250.0 -750000,250.0 -760000,250.0 -770000,226.8 -780000,225.7 -790000,250.0 -800000,250.0 -810000,250.0 -820000,226.4 -830000,250.0 +740000,225.3 +750000,202.4 +760000,200.9 +770000,202.0 +780000,250.0 +790000,225.3 +800000,200.6 +810000,200.3 +820000,200.7 +830000,225.1 840000,250.0 -850000,152.9 -860000,250.0 -870000,250.0 -880000,250.0 -890000,201.1 -900000,250.0 +850000,225.1 +860000,202.4 +870000,225.5 +880000,225.5 +890000,225.5 +900000,200.5 910000,225.2 -920000,201.7 -930000,225.3 -940000,225.5 -950000,225.1 -960000,250.0 -970000,250.0 -980000,203.3 +920000,177.7 +930000,250.0 +940000,225.6 +950000,225.7 +960000,225.3 +970000,225.2 +980000,226.3 990000,226.4 -1000000,226.2 +1000000,226.5 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/ep_return.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/ep_return.log index fd97a248a..98036b390 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/ep_return.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/ep_return.log @@ -1,101 +1,101 @@ step,stat/ep_return -10000,80.4035690470673 -20000,63.213023361769515 -30000,77.63084806479756 -40000,51.68451827192181 -50000,100.87987712644454 -60000,79.01468513350315 -70000,106.1519718006936 -80000,93.9357276260221 -90000,106.35755294819504 -100000,115.10110820257157 -110000,107.07172103951629 -120000,130.77299328597684 -130000,100.72075958742182 -140000,123.8778028204426 -150000,109.35536348223074 -160000,127.63096484764758 -170000,102.63340063018813 -180000,150.84740844568796 -190000,109.38861671689361 -200000,129.2657078504805 -210000,123.27193462863231 -220000,137.10631727281773 -230000,128.3944401073082 -240000,144.79429846675202 -250000,141.72849747110118 -260000,159.88433315193342 -270000,144.44445662263465 -280000,141.06721743922589 -290000,145.1168506423154 -300000,128.3108453224602 -310000,130.82569907942184 -320000,123.92969586863164 -330000,143.97181686321377 -340000,145.6089603429486 -350000,125.48699205629683 -360000,121.82273279680173 -370000,124.52242308533587 -380000,136.09484356006163 -390000,130.3868448638549 -400000,141.51284298042765 -410000,138.9550050735941 -420000,142.32944656029827 -430000,152.89297141762202 -440000,139.05009454629004 -450000,118.12193564036446 -460000,130.51185019146885 -470000,117.7329367242296 -480000,142.1215083499139 -490000,141.44699618164978 -500000,159.93061513238112 -510000,145.65952108981892 -520000,124.33905814961618 -530000,157.41784178226357 -540000,147.2942504839738 -550000,152.07313122708848 -560000,134.01225712031322 -570000,153.0192031450254 -580000,167.66344678399355 -590000,156.60729446512408 -600000,135.5145975042646 -610000,116.52916427395034 -620000,150.5420991440286 -630000,105.05071753377231 -640000,171.3397775544284 -650000,106.82572613609511 -660000,163.11272690244726 -670000,168.5606802977717 -680000,132.79818532977168 -690000,170.09326796084298 -700000,150.28202895153964 -710000,139.1329142230008 -720000,162.24258742139742 -730000,123.13270296586548 -740000,169.5980112017516 -750000,150.06943307443098 -760000,169.55588529048623 -770000,149.9319906074857 -780000,160.4161100336973 -790000,170.34596272586558 -800000,188.28267649577208 -810000,158.33195120744637 -820000,146.32366677938188 -830000,170.61322579272831 -840000,158.52946310451594 -850000,96.80092677645678 -860000,161.47978169353698 -870000,177.12929052157716 -880000,151.7233238085723 -890000,135.74902384881958 -900000,171.05021992934576 -910000,145.45509158290335 -920000,111.55710130542325 -930000,148.1629761265367 -940000,128.55831536695226 -950000,141.64706330973917 -960000,152.50147305086463 -970000,173.4171622665936 -980000,113.05582915274462 -990000,151.56614338766562 -1000000,147.18280115228012 +10000,62.832683178086356 +20000,43.08966040201721 +30000,77.89100578218373 +40000,96.03049536007066 +50000,78.77814495966183 +60000,89.56603688521855 +70000,133.82149645215182 +80000,93.03704522404719 +90000,101.24148354222493 +100000,109.23050569273767 +110000,100.47950859834393 +120000,132.8793846936274 +130000,101.18488892014773 +140000,129.67632927993918 +150000,113.71265714550273 +160000,121.57616286870487 +170000,99.09867391385094 +180000,133.070202801276 +190000,110.25387091453517 +200000,142.5684742669258 +210000,147.95681352541007 +220000,152.91341826713 +230000,143.2397512549264 +240000,154.26468662304092 +250000,160.43351637596072 +260000,158.44935543962077 +270000,150.35654726974136 +280000,148.72216134404454 +290000,154.49623716558662 +300000,151.64128957558364 +310000,135.08477580901712 +320000,137.60379010352943 +330000,141.380935574881 +340000,148.21027849518651 +350000,128.7022383136579 +360000,136.7541627915183 +370000,131.98095693275485 +380000,150.80014940394256 +390000,157.46420018647268 +400000,141.79825709314738 +410000,126.2575785532681 +420000,155.54262574257643 +430000,159.46106641993714 +440000,151.23920569376025 +450000,130.55909727963095 +460000,116.78956885285042 +470000,133.46915498198678 +480000,159.03471545897168 +490000,155.26010925446366 +500000,169.03490938937446 +510000,165.08767310361742 +520000,128.79568255736643 +530000,158.47002260880566 +540000,156.88606128427554 +550000,161.49300727573035 +560000,142.73206170525583 +570000,165.8167548898527 +580000,157.65483423959944 +590000,156.7432338223478 +600000,100.64981613990582 +610000,135.89281851892952 +620000,131.31262076383499 +630000,120.25767391850859 +640000,156.0260438874906 +650000,114.5267111337084 +660000,156.95533651938388 +670000,159.6265657763567 +680000,142.658546142886 +690000,136.37874756733834 +700000,149.0181560066264 +710000,119.90995837943906 +720000,140.9444030008173 +730000,139.25621260369053 +740000,145.3739052814217 +750000,124.0802221776565 +760000,117.40271666505377 +770000,109.82126253490301 +780000,165.37003519624577 +790000,144.73992952543193 +800000,127.42368748900773 +810000,121.6549613302868 +820000,123.44589985544503 +830000,136.7120242153735 +840000,147.59370453928398 +850000,134.8214379881264 +860000,133.66784508221866 +870000,137.36544804383618 +880000,137.30357352968497 +890000,138.53501352802078 +900000,117.47854907074482 +910000,134.85764130947203 +920000,108.39306810666446 +930000,146.45939619236984 +940000,160.35833777106228 +950000,142.11621982670053 +960000,151.87463464148496 +970000,147.18810416955097 +980000,162.30575833309365 +990000,147.66977178311814 +1000000,150.2170047990666 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/ep_reward.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/ep_reward.log index 1504d764b..024843766 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/ep_reward.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat/ep_reward.log @@ -1,101 +1,101 @@ step,stat/ep_reward -10000,0.32320317268310716 -20000,0.2529402371572051 -30000,0.3105233922591902 -40000,0.2236328627125969 -50000,0.40482543381164754 -60000,0.3196065336166409 -70000,0.4246078872027743 -80000,0.3757751749495169 -90000,0.42543021179278007 -100000,0.4604044328102864 -110000,0.4301658685234928 -120000,0.5230919731439074 -130000,0.4029127959426537 -140000,0.49551121128177045 -150000,0.43758596349331447 -160000,0.5113545064525973 -170000,0.4161004758200386 -180000,0.6033896337827519 -190000,0.44125551212019004 -200000,0.5170628314019221 -210000,0.49366911159095517 -220000,0.5484252690912709 -230000,0.5135777604292329 -240000,0.5791771938670082 -250000,0.5669139898844048 -260000,0.6395373326077337 -270000,0.5777778264905385 -280000,0.5642688697569036 -290000,0.5804674025692617 -300000,0.5134097606399168 -310000,0.5233027963176874 -320000,0.4967189804709401 -330000,0.5758872674528552 -340000,0.5824358413717944 -350000,0.5024008868382656 -360000,0.49090663195688206 -370000,0.4984579427963336 -380000,0.5443793742402464 -390000,0.523073325382936 -400000,0.5660513719217106 -410000,0.5558200202943764 -420000,0.5693177862411931 -430000,0.6115718856704881 -440000,0.5562003781851603 -450000,0.477331454870051 -460000,0.5267216386449854 -470000,0.47234793001729275 -480000,0.5684860333996556 -490000,0.5660039889141946 -500000,0.6397224605295244 -510000,0.5826380843592757 -520000,0.497791640275777 -530000,0.6296713671290544 -540000,0.5891770019358952 -550000,0.6082925249083537 -560000,0.5362564088491932 -570000,0.6120768125801016 -580000,0.6706537871359741 -590000,0.6264291778604963 -600000,0.5426878487643338 -610000,0.46654175788116536 -620000,0.6021683965761144 -630000,0.4258818694244697 -640000,0.6853591102177135 -650000,0.43257217974935774 -660000,0.6524509076097891 -670000,0.6742427211910869 -680000,0.5311927413190867 -690000,0.6803730718433718 -700000,0.6014262638894625 -710000,0.5580632036728327 -720000,0.6489703496855895 -730000,0.49282253394918935 -740000,0.6783920448070064 -750000,0.600277732297724 -760000,0.6782235411619448 -770000,0.6006861180073652 -780000,0.641786095034611 -790000,0.6813838509034623 -800000,0.7531307059830883 -810000,0.6333278048297856 -820000,0.5868414468842376 -830000,0.6824529031709133 -840000,0.6341178524180637 -850000,0.39800539369490123 -860000,0.645919126774148 -870000,0.7085171620863084 -880000,0.6068932952342893 -890000,0.5434336231166002 -900000,0.6842008797173831 -910000,0.5819476774661366 -920000,0.4524755797748144 -930000,0.5927466133358025 -940000,0.5148699354842046 -950000,0.5665958179285798 -960000,0.6100058922034586 -970000,0.6936686490663745 -980000,0.4529729669499263 -990000,0.6066701911535087 -1000000,0.5912295299655039 +10000,0.25293215432412974 +20000,0.17248869841777795 +30000,0.311564023128735 +40000,0.4010202262124067 +50000,0.31919861069840444 +60000,0.3617842140750186 +70000,0.5352859858086072 +80000,0.37218017072916404 +90000,0.4049659341688997 +100000,0.4369220227709506 +110000,0.4038030984200388 +120000,0.5315175387745097 +130000,0.40476931288340784 +140000,0.5187053171197568 +150000,0.45746811808563653 +160000,0.48713361604726335 +170000,0.3975554313832667 +180000,0.5322808112051041 +190000,0.44471998025945947 +200000,0.5702738970677032 +210000,0.5918272541016403 +220000,0.6116536730685201 +230000,0.5729590050197055 +240000,0.6170587464921635 +250000,0.6417340655038427 +260000,0.6337974217584831 +270000,0.6014261890789656 +280000,0.5948886453761782 +290000,0.6179849486623465 +300000,0.6065651583023347 +310000,0.5411347413051383 +320000,0.551438385592436 +330000,0.5718803624170596 +340000,0.592841113980746 +350000,0.5152623977489561 +360000,0.5506189226815403 +370000,0.5282936820476503 +380000,0.6032005976157702 +390000,0.6298568007458908 +400000,0.5688219341783228 +410000,0.5070325881723583 +420000,0.6221705029703056 +430000,0.6378442656797486 +440000,0.6049568227750409 +450000,0.5270765894389304 +460000,0.4719378299623226 +470000,0.5352926422669038 +480000,0.6361388618358867 +490000,0.6212565108211174 +500000,0.6761396375574977 +510000,0.6603506924144696 +520000,0.5156180279079232 +530000,0.6338800904352228 +540000,0.6275442451371023 +550000,0.6459720291029215 +560000,0.5709282468210233 +570000,0.6632670195594108 +580000,0.6306193369583977 +590000,0.6269729352893912 +600000,0.40697684819263447 +610000,0.5437137880517 +620000,0.5294679044609302 +630000,0.4866258946686302 +640000,0.6241041755499623 +650000,0.46316990361841776 +660000,0.6278213460775356 +670000,0.6408584233992803 +680000,0.5706341845715439 +690000,0.5466281167183509 +700000,0.596370769308675 +710000,0.4812325748782194 +720000,0.5637776120032691 +730000,0.5573170450629814 +740000,0.582340593260507 +750000,0.49701432496188075 +760000,0.47453438351521393 +770000,0.44030273740258147 +780000,0.6614801407849831 +790000,0.579388900431696 +800000,0.5120383377451165 +810000,0.49470320946968416 +820000,0.49986373784289884 +830000,0.5468574506056456 +840000,0.5903748181571358 +850000,0.5394398570051027 +860000,0.5426689342658648 +870000,0.551341252258744 +880000,0.5495250118059911 +890000,0.5541491003433958 +900000,0.4711981319549392 +910000,0.5409163411661075 +920000,0.4356111152803289 +930000,0.5858375847694794 +940000,0.6414544723081748 +950000,0.568964585328446 +960000,0.6079172283564422 +970000,0.5929738153537385 +980000,0.6504370052444954 +990000,0.5910471153553755 +1000000,0.6018076328345291 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/constraint_violation.log index 0fcc7411f..ca61f1da7 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/constraint_violation.log @@ -1,33 +1,33 @@ step,stat_eval/constraint_violation -10000,0.3 -20000,0.2 +10000,0.2 +20000,0.3 30000,0.1 -40000,1.2 -50000,1.0 +40000,1.1 +50000,0.7 60000,0.0 -70000,2.0 +70000,1.8 80000,1.1 90000,1.1 100000,0.0 110000,0.2 -120000,0.1 -130000,1.2 +120000,0.0 +130000,1.1 140000,0.0 -150000,1.3 -160000,0.1 -170000,0.2 +150000,1.4 +160000,0.0 +170000,0.1 180000,0.2 190000,1.7 -200000,0.0 +200000,0.1 210000,0.3 220000,0.1 230000,0.0 -240000,0.9 -250000,0.2 +240000,0.8 +250000,0.3 260000,0.0 -270000,2.9 -280000,0.2 -290000,0.2 +270000,2.5 +280000,0.0 +290000,0.3 300000,0.1 310000,0.0 320000,0.0 @@ -37,65 +37,65 @@ step,stat_eval/constraint_violation 360000,1.3 370000,0.2 380000,0.2 -390000,1.5 +390000,1.4 400000,0.1 -410000,0.1 -420000,1.3 -430000,1.9 +410000,0.2 +420000,1.2 +430000,2.0 440000,0.2 -450000,0.1 -460000,0.1 +450000,0.0 +460000,0.8 470000,0.3 480000,1.6 -490000,0.2 +490000,0.3 500000,0.0 510000,0.6 -520000,0.6 +520000,0.3 530000,0.0 -540000,2.2 +540000,2.0 550000,0.1 560000,0.2 570000,0.0 -580000,0.2 +580000,0.5 590000,0.6 600000,0.0 -610000,0.0 +610000,0.6 620000,0.1 -630000,0.8 -640000,0.0 +630000,0.7 +640000,0.1 650000,0.0 -660000,2.0 +660000,2.2 670000,0.8 680000,0.0 690000,0.8 700000,0.0 710000,1.2 -720000,1.5 -730000,0.1 +720000,1.3 +730000,0.2 740000,0.0 750000,0.1 760000,0.1 770000,0.2 -780000,1.3 +780000,1.5 790000,0.1 800000,0.0 810000,0.0 820000,0.7 830000,0.0 840000,0.0 -850000,0.5 +850000,0.4 860000,0.0 870000,1.3 880000,0.2 890000,0.3 900000,1.4 910000,0.1 -920000,1.8 +920000,1.9 930000,0.0 940000,0.0 -950000,0.0 -960000,0.4 +950000,0.1 +960000,0.5 970000,1.8 -980000,0.1 -990000,0.9 +980000,0.0 +990000,0.1 1000000,0.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/ep_length.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/ep_length.log index 065f01a07..6518ee199 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/ep_length.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/ep_length.log @@ -31,21 +31,21 @@ step,stat_eval/ep_length 300000,225.4 310000,250.0 320000,250.0 -330000,202.0 +330000,201.9 340000,250.0 350000,250.0 360000,250.0 -370000,202.3 +370000,202.2 380000,225.9 390000,250.0 400000,225.6 410000,250.0 420000,201.4 430000,201.9 -440000,226.5 +440000,226.6 450000,250.0 460000,250.0 -470000,226.3 +470000,226.2 480000,201.2 490000,250.0 500000,250.0 @@ -62,7 +62,7 @@ step,stat_eval/ep_length 610000,250.0 620000,225.1 630000,250.0 -640000,250.0 +640000,227.5 650000,250.0 660000,250.0 670000,250.0 @@ -71,7 +71,7 @@ step,stat_eval/ep_length 700000,250.0 710000,250.0 720000,225.7 -730000,226.6 +730000,226.7 740000,250.0 750000,225.2 760000,225.6 @@ -86,14 +86,14 @@ step,stat_eval/ep_length 850000,151.6 860000,250.0 870000,201.7 -880000,201.6 +880000,201.5 890000,226.1 900000,225.9 910000,226.6 920000,225.2 930000,250.0 940000,250.0 -950000,250.0 +950000,227.4 960000,250.0 970000,225.2 980000,250.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/ep_return.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/ep_return.log index a843ec704..dbcafef82 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/ep_return.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/ep_return.log @@ -1,101 +1,101 @@ step,stat_eval/ep_return -10000,50.646253924056694 -20000,44.14367153023045 -30000,53.397576136163494 -40000,32.84082460994584 -50000,78.65881203638777 -60000,73.72029308264464 -70000,69.20607120377852 -80000,102.98581830851319 -90000,105.2291210955862 -100000,123.09533691858378 -110000,77.0525171966317 -120000,121.45757484495137 -130000,85.29229315946581 -140000,121.49283373733915 -150000,142.8259797353962 -160000,113.57735707437034 -170000,118.57136811606397 -180000,105.182251497178 -190000,132.33391972667607 -200000,131.13807420686788 -210000,93.9032361602661 -220000,114.29428945762018 -230000,133.38676727640603 -240000,118.63496377036566 -250000,110.56791760157617 -260000,145.60655013855654 -270000,139.9614690155341 -280000,160.50097135850834 -290000,109.06573333520564 -300000,106.36361367673433 -310000,120.65390695182475 -320000,134.73493825962572 -330000,114.86969528332983 -340000,131.02901079454756 -350000,129.46009993379965 -360000,135.23129967653406 -370000,111.77681306183813 -380000,122.13433481261264 -390000,138.7104003926757 -400000,125.30231480321697 -410000,137.64360438221337 -420000,115.19259713049489 -430000,110.81042999306196 -440000,118.13954787912996 -450000,138.11054527430366 -460000,148.52013641756923 -470000,115.87638594147727 -480000,118.88809783337324 -490000,131.97323089965195 -500000,139.9121854418534 -510000,137.93283399315237 -520000,132.69803375719235 -530000,140.0957407707557 -540000,126.31648708324163 -550000,122.43221298128094 -560000,135.1204381270787 -570000,132.5837354373008 -580000,108.82045703285371 -590000,130.3106093986295 -600000,144.58261086407137 -610000,154.70650464999525 -620000,131.3886273561415 -630000,149.88322705274223 -640000,148.80079753954277 -650000,148.8785783595976 -660000,141.2841066526415 -670000,153.61412395058665 -680000,150.62295333637374 -690000,169.55373366677205 -700000,152.14676324080642 -710000,157.08460909333317 -720000,143.07070240900106 -730000,147.95705236056037 -740000,135.08893824695818 -750000,132.46369316732935 -760000,132.80350380059258 -770000,119.98845584251708 -780000,146.14688106111743 -790000,134.4729621637457 -800000,142.93064932991888 -810000,154.1922180695281 -820000,159.6144379433454 -830000,154.3743373713466 -840000,156.71479161511644 -850000,85.98996291569723 -860000,138.6303449099766 -870000,121.08560086052667 -880000,122.66039483984028 -890000,144.17258451957633 -900000,130.53830410263453 -910000,128.92998071014387 -920000,148.10384464802505 -930000,147.75682318821072 -940000,139.0253391492841 -950000,132.5996971446643 -960000,149.4090649345601 -970000,129.06155592217357 -980000,153.06938979743785 -990000,145.71407652496413 -1000000,140.16784466964623 +10000,39.46101551016082 +20000,39.913809641146074 +30000,56.739676530429776 +40000,73.44685292850696 +50000,88.62566521030975 +60000,73.02281230021762 +70000,80.9689304135662 +80000,96.45133001586173 +90000,108.90407574030401 +100000,110.75413887285569 +110000,83.50389232610985 +120000,117.30250815456853 +130000,91.29974854590343 +140000,130.2698684932348 +150000,136.95826795305862 +160000,113.63938734979953 +170000,110.88757131008745 +180000,106.92856687694321 +190000,126.26325172698246 +200000,137.94380173332746 +210000,100.5624916822082 +220000,127.49323294211133 +230000,143.57033880671707 +240000,121.03984892245725 +250000,120.07330868366122 +260000,148.1642856641967 +270000,155.42631078544431 +280000,164.04332680542743 +290000,119.47621676414626 +300000,122.41987623846262 +310000,138.8105855051778 +320000,141.75727565063846 +330000,120.52334209031508 +340000,142.4457062617815 +350000,140.43041165169672 +360000,145.90682687023983 +370000,124.17188048153245 +380000,130.9165488356126 +390000,138.58092901296573 +400000,129.44808563337105 +410000,148.4621131633098 +420000,128.6250672371511 +430000,119.27405807440627 +440000,127.69171820435285 +450000,150.41680938455244 +460000,162.08547432272678 +470000,125.38228004478344 +480000,127.9770337452338 +490000,141.79259750020057 +500000,144.46385513875134 +510000,145.57475227152807 +520000,138.07058275482038 +530000,146.20860716039428 +540000,132.98177441024473 +550000,125.58072374683498 +560000,138.6568206056275 +570000,138.43986076643313 +580000,104.81520938244857 +590000,133.0871107212746 +600000,143.15197851041876 +610000,153.78641243961926 +620000,135.147198332245 +630000,145.11823310630334 +640000,135.67811524463588 +650000,148.7766348374003 +660000,142.61086205991813 +670000,149.08321829994816 +680000,144.69027712935676 +690000,164.16526157694028 +700000,143.38271990219744 +710000,152.0443158824791 +720000,140.72243466606122 +730000,149.9413107060081 +740000,131.22363074248466 +750000,126.82841076295067 +760000,131.45689516261422 +770000,116.0221050028958 +780000,137.14415142204854 +790000,126.12124653244955 +800000,137.1113952973614 +810000,149.72908139096003 +820000,152.33775433203908 +830000,147.05399123589333 +840000,156.2046824413896 +850000,83.74317111247602 +860000,134.02806882782642 +870000,122.63560910183374 +880000,119.17872999128504 +890000,141.03749994169647 +900000,126.02067331061112 +910000,127.8124472553238 +920000,149.13149029079506 +930000,154.07292102594735 +940000,142.36607695911516 +950000,126.30833675434837 +960000,156.24410955403263 +970000,130.7254081158374 +980000,161.9155503820327 +990000,152.79715564245384 +1000000,143.9768172293834 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/ep_reward.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/ep_reward.log index 42574a4f4..a8c86a0fc 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/ep_reward.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/ep_reward.log @@ -1,101 +1,101 @@ step,stat_eval/ep_reward -10000,0.202585015906544 -20000,0.1766204717756274 -30000,0.21359030454887198 -40000,0.13136329843978334 -50000,0.31463524814555116 -60000,0.2948811723305786 -70000,0.27682429159705985 -80000,0.41194327323405283 -90000,0.4209164843823448 -100000,0.49238134767433517 -110000,0.30821037831186854 -120000,0.4858302993798054 -130000,0.3411694663947359 -140000,0.48597133494935657 -150000,0.5713039189415847 -160000,0.45430942829748144 -170000,0.4742854725039735 -180000,0.42072900970826205 -190000,0.5293356789067042 -200000,0.5245522968274716 -210000,0.37562370358194574 -220000,0.4571771599960945 -230000,0.5335470691056241 -240000,0.4745398551322708 -250000,0.44227167908111553 -260000,0.5824262005542263 -270000,0.5598458760621365 -280000,0.6420038854340333 -290000,0.4362629372684128 -300000,0.425454454708685 -310000,0.48261562780729905 -320000,0.5389397530385028 -330000,0.45947984737946157 -340000,0.5241160431781902 -350000,0.5178403997351986 -360000,0.5409251987061362 -370000,0.4471091233124209 -380000,0.48853734066829746 -390000,0.554841601570703 -400000,0.5012092592670587 -410000,0.5505744175288532 -420000,0.4607705409282919 -430000,0.44324172700299336 -440000,0.4725581939795271 -450000,0.5524421810972147 -460000,0.5940805456702769 -470000,0.46350555774321833 -480000,0.47555239161082186 -490000,0.5278929235986078 -500000,0.5596487417674136 -510000,0.5517314005029343 -520000,0.5307923196790674 -530000,0.5603829630830228 -540000,0.5052659534666212 -550000,0.48972885192705906 -560000,0.5404817540382955 -570000,0.5303349417492031 -580000,0.43528592502459 -590000,0.5212424519505523 -600000,0.5783304434562857 -610000,0.6188260185999811 -620000,0.5255545112527329 -630000,0.5995329082109689 -640000,0.5952031901581712 -650000,0.5955143134383903 -660000,0.5651364266105658 -670000,0.6144564958023466 -680000,0.602491813345495 -690000,0.678214934667088 -700000,0.6085870529632258 -710000,0.6283384363733326 -720000,0.5722828133251553 -730000,0.5918282127534275 -740000,0.5403557529878327 -750000,0.5298547726836673 -760000,0.5312140154078404 -770000,0.47995382661334685 -780000,0.5845875242452697 -790000,0.5378930664113033 -800000,0.5717225973196755 -810000,0.6167688722781126 -820000,0.6384577517733815 -830000,0.6174973494853863 -840000,0.6268591664604658 -850000,0.3439600024953252 -860000,0.5545213796399066 -870000,0.48434241255695387 -880000,0.4906418150456622 -890000,0.5766903396322903 -900000,0.522153216415236 -910000,0.5157199290099286 -920000,0.5924153786001881 -930000,0.591027292752843 -940000,0.5561013565971364 -950000,0.5303987885786572 -960000,0.5976362597382403 -970000,0.5162462236889935 -980000,0.6122775591897515 -990000,0.5828568262297059 -1000000,0.560671378678585 +10000,0.15784406258520106 +20000,0.1597537698022235 +30000,0.226958706123439 +40000,0.2937874117140278 +50000,0.354502660841239 +60000,0.29209124920087054 +70000,0.323875721656786 +80000,0.3858053200634469 +90000,0.43561630296121595 +100000,0.4430165554914228 +110000,0.33401569525529173 +120000,0.46921003261827404 +130000,0.36519934607775084 +140000,0.5210794739729392 +150000,0.5478330718122346 +160000,0.4545575493991982 +170000,0.44355028537081004 +180000,0.42771427607766876 +190000,0.5050530069079299 +200000,0.5517752069333097 +210000,0.40225119798117664 +220000,0.509972932778062 +230000,0.5742813552268683 +240000,0.4841593959501771 +250000,0.480293243672007 +260000,0.5926571426567868 +270000,0.6217052431417773 +280000,0.6561733072217096 +290000,0.4779048685355648 +300000,0.4896795052237309 +310000,0.5552423420207113 +320000,0.5670291026025538 +330000,0.48209344934378606 +340000,0.5697828250471261 +350000,0.5617216466067869 +360000,0.5836273074809594 +370000,0.4966883349914295 +380000,0.5236662019420202 +390000,0.5543237160518629 +400000,0.5177923426739873 +410000,0.593848452653239 +420000,0.5145002701471506 +430000,0.47709624156020175 +440000,0.5107669362078238 +450000,0.6016672375382097 +460000,0.6483418972909072 +470000,0.5015296327590406 +480000,0.5119081349813164 +490000,0.5671703900008023 +500000,0.5778554205550054 +510000,0.5822990091906735 +520000,0.5522824373334769 +530000,0.5848344286415773 +540000,0.5319270977863477 +550000,0.5023228957093993 +560000,0.5546272873273772 +570000,0.5537594430657325 +580000,0.4192611456918984 +590000,0.5323484487631663 +600000,0.572607914041675 +610000,0.6151456497584771 +620000,0.5405887933311775 +630000,0.5804729324252135 +640000,0.5427125321370171 +650000,0.5951065393496012 +660000,0.5704434482396725 +670000,0.5963328731997928 +680000,0.578761108517427 +690000,0.6566610463077612 +700000,0.5735308796087898 +710000,0.6081772635299164 +720000,0.5628897398384 +730000,0.5997657924695883 +740000,0.5248945229699387 +750000,0.507313643062832 +760000,0.5258275814270738 +770000,0.46408877019457817 +780000,0.5485766059130728 +790000,0.5044881604539109 +800000,0.5484455811894456 +810000,0.59891632556384 +820000,0.6093510173281562 +830000,0.5882159649435735 +840000,0.6248187297655583 +850000,0.33497313317356125 +860000,0.5361122753113057 +870000,0.4905424816404492 +880000,0.4767163433028661 +890000,0.5641500083177244 +900000,0.5040826940282782 +910000,0.5112508100205545 +920000,0.5965259630287909 +930000,0.6162916841037894 +940000,0.5694643078364606 +950000,0.5052333500451466 +960000,0.6249764382161305 +970000,0.5229016324634088 +980000,0.6476622015281308 +990000,0.6111906933368538 +1000000,0.5759072689175337 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/mse.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/mse.log index 0feed368f..f368f9fe3 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/mse.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/logs/stat_eval/mse.log @@ -1,101 +1,101 @@ step,stat_eval/mse -10000,378.8961527163199 -20000,305.6897178445237 -30000,393.99571730931706 -40000,490.88473397631077 -50000,320.05211627885603 -60000,384.58238267789056 -70000,396.2844666032928 -80000,218.5764206733955 -90000,262.65680456680604 -100000,299.2429471058123 -110000,369.3886586851513 -120000,284.1042518064086 -130000,176.294389343281 -140000,289.74352227099087 -150000,234.5195998517319 -160000,322.10318739826295 -170000,237.2732012460521 -180000,232.44162700273318 -190000,279.49071137143085 -200000,300.1707133934198 -210000,223.10764933437477 -220000,268.3720780292373 -230000,292.23611643604187 -240000,267.459997194981 -250000,253.89316213372203 -260000,257.4370963547818 -270000,274.0222557287335 -280000,224.6027890832257 -290000,178.1898750462496 -300000,318.35203574577315 -310000,330.7231053997351 -320000,330.24310446527153 -330000,212.153636840653 -340000,308.0790570328474 -350000,370.58852257214596 -360000,262.50534145523704 -370000,190.72743306149565 -380000,229.73535728307166 -390000,313.53832001275725 -400000,253.5955836340119 -410000,265.1606397433903 -420000,162.32476133204244 -430000,200.6773976540574 -440000,301.8505103717224 -450000,233.1227174277485 -460000,159.1632968773344 -470000,264.6795123587674 -480000,191.2474887493455 -490000,384.5217430623141 -500000,263.6331081003685 -510000,151.69436962085948 -520000,227.83292200490632 -530000,295.73807291497434 -540000,265.6917535827903 -550000,279.24785186296185 -560000,123.96314163874581 -570000,299.33004734656043 -580000,305.8845927819827 -590000,264.26538154079435 -600000,283.1216996408241 -610000,250.37371288655945 -620000,223.26463268695565 -630000,235.70963937113075 -640000,242.06564826550363 -650000,309.4529729939613 -660000,266.028143768259 -670000,250.06094879247712 -680000,300.73687448702645 -690000,162.46436323947435 -700000,237.01451429393586 -710000,245.23371074002102 -720000,202.55533999577938 -730000,167.59699450341668 -740000,262.6089689434815 -750000,238.16255190711848 -760000,247.88295829189605 -770000,150.81065229710833 -780000,203.6555483890794 -790000,202.36621170658947 -800000,306.34340004565036 -810000,205.44376731300844 -820000,153.27503150807993 -830000,224.28369950856728 -840000,214.9158195104836 -850000,118.10399495499455 -860000,353.81034613154014 -870000,219.10450794621232 -880000,204.41271405596635 -890000,214.055280459207 -900000,240.02711409260624 -910000,240.21573226710544 -920000,120.029493053943 -930000,184.5036520196827 -940000,265.7351545353132 -950000,334.71741061021373 -960000,218.01338919577182 -970000,259.3918322632975 -980000,182.64720801604682 -990000,120.4558498272451 -1000000,364.9779372279845 +10000,463.3280927474965 +20000,332.7851006840213 +30000,376.2600360215732 +40000,289.7258723297968 +50000,285.2334722494546 +60000,364.95712375333113 +70000,374.69481024421344 +80000,234.78825220162184 +90000,262.8476034525481 +100000,303.9029880908137 +110000,376.99483590487205 +120000,301.4014948757027 +130000,177.6732905280017 +140000,270.2662562651682 +150000,244.9282149589829 +160000,313.83408085771106 +170000,244.8965361890057 +180000,222.20373957850174 +190000,280.00601497441744 +200000,271.4025139482759 +210000,202.73541497817183 +220000,236.973673825559 +230000,257.97181418116315 +240000,248.5972245663914 +250000,229.37198900799893 +260000,248.45442689060536 +270000,246.65685391174762 +280000,212.13912627584128 +290000,161.53667509889743 +300000,295.75009433788114 +310000,289.20796925177507 +320000,304.4605499891807 +330000,201.292599278749 +340000,289.7548799119324 +350000,342.78096270677196 +360000,235.191925810995 +370000,167.91172798610415 +380000,207.5701883103765 +390000,314.6839623560767 +400000,242.18693854868707 +410000,246.1622460370864 +420000,141.76692449142365 +430000,185.989246658768 +440000,283.22076882823006 +450000,222.98451644277185 +460000,132.8068690100609 +470000,247.56004344950276 +480000,176.02813459671583 +490000,370.15769170316855 +500000,264.0263837172133 +510000,139.05191225293748 +520000,217.89815724251122 +530000,293.0625805913543 +540000,257.6272806647954 +550000,277.50127641184633 +560000,116.8023800366088 +570000,290.971835976983 +580000,318.8871806594102 +590000,263.2274076002111 +600000,289.2116662166671 +610000,255.63428109751263 +620000,221.62442232880971 +630000,247.58105558212688 +640000,191.9512605442828 +650000,309.4920368447004 +660000,267.8393213564842 +670000,255.4380441942733 +680000,303.0383066506987 +690000,165.86629450595325 +700000,250.3419102475826 +710000,254.53022164891416 +720000,204.81705484045784 +730000,169.84816540847015 +740000,274.7067533688166 +750000,246.34552137417094 +760000,250.30865274888396 +770000,157.31366019243225 +780000,216.8039513740785 +790000,218.9966732891387 +800000,314.6853939972538 +810000,212.42346145768607 +820000,165.624646579918 +830000,245.5427386197419 +840000,216.54733655400156 +850000,121.59028473607282 +860000,358.01085912462145 +870000,219.84413672237116 +880000,212.1422290499441 +890000,216.3750995782728 +900000,246.47194295313017 +910000,243.24062790702482 +920000,118.14639320187307 +930000,178.35930546290243 +940000,261.8258715251418 +950000,278.44749099420517 +960000,209.61245554553062 +970000,262.56693887131655 +980000,182.64764130814802 +990000,111.33737065015661 +1000000,365.648456429209 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/model_best.pt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/model_best.pt index 478eb865c..318609925 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/model_best.pt and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/model_best.pt differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/model_latest.pt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/model_latest.pt index cea33b9df..3ffacbbe6 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/model_latest.pt and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/model_latest.pt differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-loss-approx_kl.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-loss-approx_kl.jpg index 903820ebf..890399f21 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-loss-approx_kl.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-loss-approx_kl.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-loss-entropy_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-loss-entropy_loss.jpg index 6aeba0481..bedcee482 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-loss-entropy_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-loss-entropy_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-loss-policy_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-loss-policy_loss.jpg index 0c4ad66ec..702ddbe45 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-loss-policy_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-loss-policy_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-loss-value_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-loss-value_loss.jpg index 14c09fa35..2c5d23de6 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-loss-value_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-loss-value_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-constraint_violation.jpg index 14907722f..a94106841 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-ep_constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-ep_constraint_violation.jpg index 0c3f50181..770460d9a 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-ep_constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-ep_constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-ep_length.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-ep_length.jpg index 35e999ab5..c6b35b222 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-ep_length.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-ep_length.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-ep_return.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-ep_return.jpg index fbe71f63e..7dd88e774 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-ep_return.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-ep_return.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-ep_reward.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-ep_reward.jpg index 428f571bc..46f2e8c67 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-ep_reward.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat-ep_reward.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-constraint_violation.jpg index 852d240d2..8084d509a 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-ep_length.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-ep_length.jpg index 85df4c4c4..0786fd60a 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-ep_length.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-ep_length.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-ep_return.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-ep_return.jpg index 9ea978f3d..81fb11fa5 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-ep_return.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-ep_return.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-ep_reward.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-ep_reward.jpg index 9057c06a5..5b327d9ec 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-ep_reward.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-ep_reward.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-mse.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-mse.jpg index e68188100..2773a0647 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-mse.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/plots/-stat_eval-mse.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/std_out.txt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/std_out.txt index b7f58fd76..a3577a336 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/std_out.txt +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/std_out.txt @@ -1,2601 +1,2601 @@ -2023-10-19 14:51:55,210 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 50.646 +/- 24.562 -2023-10-19 14:51:55,225 : +2023-10-27 16:42:36,333 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 39.461 +/- 19.291 +2023-10-27 16:42:36,352 : -------------------------------------- | loss/ | | -| approx_kl | 0.0221 | -| entropy_loss | -3.68 | -| policy_loss | -0.0144 | -| value_loss | 7.49 | +| approx_kl | 0.0252 | +| entropy_loss | -3.67 | +| policy_loss | -0.00928 | +| value_loss | 10.3 | | stat/ | | -| constraint_violation | 35 | -| ep_constraint_vio... | 1.3 | +| constraint_violation | 39 | +| ep_constraint_vio... | 1 | | ep_length | 226 | -| ep_return | 80.4 | -| ep_reward | 0.323 | +| ep_return | 62.8 | +| ep_reward | 0.253 | | stat_eval/ | | -| constraint_violation | 0.3 | +| constraint_violation | 0.2 | | ep_length | 226 | -| ep_return | 50.6 | -| ep_reward | 0.203 | -| mse | 379 | +| ep_return | 39.5 | +| ep_reward | 0.158 | +| mse | 463 | | time/ | | | progress | 0.01 | | step | 1e+04 | -| step_time | 11.1 | +| step_time | 8.17 | -------------------------------------- -2023-10-19 14:54:05,215 : Eval | ep_lengths 201.90 +/- 96.23 | ep_return 44.144 +/- 27.074 -2023-10-19 14:54:05,237 : +2023-10-27 16:44:11,277 : Eval | ep_lengths 201.90 +/- 96.23 | ep_return 39.914 +/- 23.545 +2023-10-27 16:44:11,314 : -------------------------------------- | loss/ | | -| approx_kl | 0.0214 | -| entropy_loss | -3.6 | -| policy_loss | -0.0138 | -| value_loss | 4.33 | +| approx_kl | 0.0175 | +| entropy_loss | -3.65 | +| policy_loss | -0.0144 | +| value_loss | 1.28 | | stat/ | | -| constraint_violation | 46 | -| ep_constraint_vio... | 0.2 | -| ep_length | 226 | -| ep_return | 63.2 | -| ep_reward | 0.253 | +| constraint_violation | 53 | +| ep_constraint_vio... | 0.3 | +| ep_length | 201 | +| ep_return | 43.1 | +| ep_reward | 0.172 | | stat_eval/ | | -| constraint_violation | 0.2 | +| constraint_violation | 0.3 | | ep_length | 202 | -| ep_return | 44.1 | -| ep_reward | 0.177 | -| mse | 306 | +| ep_return | 39.9 | +| ep_reward | 0.16 | +| mse | 333 | | time/ | | | progress | 0.02 | | step | 2e+04 | -| step_time | 10.9 | +| step_time | 8.02 | -------------------------------------- -2023-10-19 14:56:16,017 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 53.398 +/- 28.629 -2023-10-19 14:56:16,026 : +2023-10-27 16:45:46,764 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 56.740 +/- 27.011 +2023-10-27 16:45:46,774 : -------------------------------------- | loss/ | | -| approx_kl | 0.0182 | -| entropy_loss | -3.62 | -| policy_loss | -0.0151 | -| value_loss | 3.96 | +| approx_kl | 0.0205 | +| entropy_loss | -3.66 | +| policy_loss | -0.00633 | +| value_loss | 2.65 | | stat/ | | -| constraint_violation | 124 | -| ep_constraint_vio... | 1.1 | +| constraint_violation | 134 | +| ep_constraint_vio... | 1.3 | | ep_length | 250 | -| ep_return | 77.6 | -| ep_reward | 0.311 | +| ep_return | 77.9 | +| ep_reward | 0.312 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 53.4 | -| ep_reward | 0.214 | -| mse | 394 | +| ep_return | 56.7 | +| ep_reward | 0.227 | +| mse | 376 | | time/ | | | progress | 0.03 | | step | 3e+04 | -| step_time | 10.7 | +| step_time | 7.84 | -------------------------------------- -2023-10-19 14:58:28,603 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 32.841 +/- 18.873 -2023-10-19 14:58:28,605 : +2023-10-27 16:47:24,739 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 73.447 +/- 21.911 +2023-10-27 16:47:24,777 : -------------------------------------- | loss/ | | -| approx_kl | 0.0284 | -| entropy_loss | -3.63 | -| policy_loss | -0.0125 | -| value_loss | 5.55 | +| approx_kl | 0.0263 | +| entropy_loss | -3.59 | +| policy_loss | -0.0154 | +| value_loss | 6.01 | | stat/ | | -| constraint_violation | 132 | +| constraint_violation | 140 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 51.7 | -| ep_reward | 0.224 | +| ep_return | 96 | +| ep_reward | 0.401 | | stat_eval/ | | -| constraint_violation | 1.2 | +| constraint_violation | 1.1 | | ep_length | 250 | -| ep_return | 32.8 | -| ep_reward | 0.131 | -| mse | 491 | +| ep_return | 73.4 | +| ep_reward | 0.294 | +| mse | 290 | | time/ | | | progress | 0.04 | | step | 4e+04 | -| step_time | 10.5 | +| step_time | 7.79 | -------------------------------------- -2023-10-19 15:00:40,241 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 78.659 +/- 19.190 -2023-10-19 15:00:40,250 : +2023-10-27 16:49:02,273 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 88.626 +/- 18.622 +2023-10-27 16:49:02,284 : -------------------------------------- | loss/ | | -| approx_kl | 0.0312 | -| entropy_loss | -3.57 | -| policy_loss | -0.00106 | -| value_loss | 5.12 | +| approx_kl | 0.0283 | +| entropy_loss | -3.59 | +| policy_loss | -0.0147 | +| value_loss | 4.81 | | stat/ | | -| constraint_violation | 166 | -| ep_constraint_vio... | 1.1 | -| ep_length | 226 | -| ep_return | 101 | -| ep_reward | 0.405 | +| constraint_violation | 185 | +| ep_constraint_vio... | 2.2 | +| ep_length | 203 | +| ep_return | 78.8 | +| ep_reward | 0.319 | | stat_eval/ | | -| constraint_violation | 1 | +| constraint_violation | 0.7 | | ep_length | 250 | -| ep_return | 78.7 | -| ep_reward | 0.315 | -| mse | 320 | +| ep_return | 88.6 | +| ep_reward | 0.355 | +| mse | 285 | | time/ | | | progress | 0.05 | | step | 5e+04 | -| step_time | 10.6 | +| step_time | 7.89 | -------------------------------------- -2023-10-19 15:02:51,985 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 73.720 +/- 20.620 -2023-10-19 15:02:51,987 : +2023-10-27 16:50:38,591 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 73.023 +/- 30.169 +2023-10-27 16:50:38,592 : -------------------------------------- | loss/ | | -| approx_kl | 0.0214 | -| entropy_loss | -3.56 | -| policy_loss | -0.0133 | -| value_loss | 4.93 | +| approx_kl | 0.0266 | +| entropy_loss | -3.63 | +| policy_loss | -0.00746 | +| value_loss | 5.23 | | stat/ | | -| constraint_violation | 189 | -| ep_constraint_vio... | 1.3 | +| constraint_violation | 215 | +| ep_constraint_vio... | 2.4 | | ep_length | 227 | -| ep_return | 79 | -| ep_reward | 0.32 | +| ep_return | 89.6 | +| ep_reward | 0.362 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 73.7 | -| ep_reward | 0.295 | -| mse | 385 | +| ep_return | 73 | +| ep_reward | 0.292 | +| mse | 365 | | time/ | | | progress | 0.06 | | step | 6e+04 | -| step_time | 10.9 | +| step_time | 8.03 | -------------------------------------- -2023-10-19 15:05:00,944 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 69.206 +/- 37.460 -2023-10-19 15:05:00,945 : +2023-10-27 16:52:13,311 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 80.969 +/- 31.995 +2023-10-27 16:52:13,312 : -------------------------------------- | loss/ | | -| approx_kl | 0.0186 | -| entropy_loss | -3.57 | -| policy_loss | -0.00955 | -| value_loss | 4.38 | +| approx_kl | 0.0367 | +| entropy_loss | -3.56 | +| policy_loss | -0.00645 | +| value_loss | 7.66 | | stat/ | | -| constraint_violation | 226 | +| constraint_violation | 240 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 106 | -| ep_reward | 0.425 | +| ep_return | 134 | +| ep_reward | 0.535 | | stat_eval/ | | -| constraint_violation | 2 | +| constraint_violation | 1.8 | | ep_length | 225 | -| ep_return | 69.2 | -| ep_reward | 0.277 | -| mse | 396 | +| ep_return | 81 | +| ep_reward | 0.324 | +| mse | 375 | | time/ | | | progress | 0.07 | | step | 7e+04 | -| step_time | 10.6 | +| step_time | 7.74 | -------------------------------------- -2023-10-19 15:07:10,924 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 102.986 +/- 25.481 -2023-10-19 15:07:10,935 : +2023-10-27 16:53:48,361 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 96.451 +/- 30.413 +2023-10-27 16:53:48,396 : -------------------------------------- | loss/ | | -| approx_kl | 0.0227 | -| entropy_loss | -3.59 | -| policy_loss | -0.0116 | -| value_loss | 3 | +| approx_kl | 0.025 | +| entropy_loss | -3.52 | +| policy_loss | -0.00623 | +| value_loss | 5.36 | | stat/ | | -| constraint_violation | 230 | +| constraint_violation | 244 | | ep_constraint_vio... | 0.2 | | ep_length | 226 | -| ep_return | 93.9 | -| ep_reward | 0.376 | +| ep_return | 93 | +| ep_reward | 0.372 | | stat_eval/ | | | constraint_violation | 1.1 | | ep_length | 250 | -| ep_return | 103 | -| ep_reward | 0.412 | -| mse | 219 | +| ep_return | 96.5 | +| ep_reward | 0.386 | +| mse | 235 | | time/ | | | progress | 0.08 | | step | 8e+04 | -| step_time | 10.8 | +| step_time | 7.84 | -------------------------------------- -2023-10-19 15:09:20,293 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 105.229 +/- 21.880 -2023-10-19 15:09:20,303 : +2023-10-27 16:55:22,801 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 108.904 +/- 22.585 +2023-10-27 16:55:22,811 : -------------------------------------- | loss/ | | -| approx_kl | 0.0324 | +| approx_kl | 0.0276 | | entropy_loss | -3.53 | -| policy_loss | -0.0164 | -| value_loss | 1.86 | +| policy_loss | -0.0194 | +| value_loss | 3.66 | | stat/ | | -| constraint_violation | 243 | -| ep_constraint_vio... | 0.1 | +| constraint_violation | 256 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 106 | -| ep_reward | 0.425 | +| ep_return | 101 | +| ep_reward | 0.405 | | stat_eval/ | | | constraint_violation | 1.1 | | ep_length | 250 | -| ep_return | 105 | -| ep_reward | 0.421 | +| ep_return | 109 | +| ep_reward | 0.436 | | mse | 263 | | time/ | | | progress | 0.09 | | step | 9e+04 | -| step_time | 10.5 | +| step_time | 7.56 | -------------------------------------- -2023-10-19 15:11:26,904 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 123.095 +/- 26.037 -2023-10-19 15:11:26,913 : +2023-10-27 16:56:56,658 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 110.754 +/- 24.845 +2023-10-27 16:56:56,694 : -------------------------------------- | loss/ | | -| approx_kl | 0.0297 | -| entropy_loss | -3.53 | -| policy_loss | -0.0139 | -| value_loss | 2.99 | +| approx_kl | 0.0116 | +| entropy_loss | -3.58 | +| policy_loss | -0.021 | +| value_loss | 1.77 | | stat/ | | -| constraint_violation | 270 | +| constraint_violation | 283 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 115 | -| ep_reward | 0.46 | +| ep_return | 109 | +| ep_reward | 0.437 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 123 | -| ep_reward | 0.492 | -| mse | 299 | +| ep_return | 111 | +| ep_reward | 0.443 | +| mse | 304 | | time/ | | | progress | 0.1 | | step | 1e+05 | -| step_time | 10.6 | +| step_time | 7.68 | -------------------------------------- -2023-10-19 15:13:29,262 : Eval | ep_lengths 201.30 +/- 97.42 | ep_return 77.053 +/- 41.226 -2023-10-19 15:13:29,263 : +2023-10-27 16:58:28,119 : Eval | ep_lengths 201.30 +/- 97.42 | ep_return 83.504 +/- 42.314 +2023-10-27 16:58:28,143 : -------------------------------------- | loss/ | | -| approx_kl | 0.0332 | -| entropy_loss | -3.55 | -| policy_loss | -0.0196 | -| value_loss | 2.95 | +| approx_kl | 0.0223 | +| entropy_loss | -3.52 | +| policy_loss | -0.0149 | +| value_loss | 5.17 | | stat/ | | -| constraint_violation | 300 | -| ep_constraint_vio... | 0.7 | +| constraint_violation | 315 | +| ep_constraint_vio... | 0.8 | | ep_length | 226 | -| ep_return | 107 | -| ep_reward | 0.43 | +| ep_return | 100 | +| ep_reward | 0.404 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 77.1 | -| ep_reward | 0.308 | -| mse | 369 | +| ep_return | 83.5 | +| ep_reward | 0.334 | +| mse | 377 | | time/ | | | progress | 0.11 | | step | 1.1e+05 | -| step_time | 10.1 | +| step_time | 7.59 | -------------------------------------- -2023-10-19 15:15:32,895 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 121.458 +/- 29.980 -2023-10-19 15:15:32,897 : +2023-10-27 17:00:00,734 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 117.303 +/- 23.179 +2023-10-27 17:00:00,745 : -------------------------------------- | loss/ | | -| approx_kl | 0.0356 | -| entropy_loss | -3.57 | -| policy_loss | -0.011 | -| value_loss | 1.78 | +| approx_kl | 0.026 | +| entropy_loss | -3.51 | +| policy_loss | -0.00675 | +| value_loss | 1.21 | | stat/ | | -| constraint_violation | 312 | -| ep_constraint_vio... | 0.1 | +| constraint_violation | 323 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 131 | -| ep_reward | 0.523 | +| ep_return | 133 | +| ep_reward | 0.532 | | stat_eval/ | | -| constraint_violation | 0.1 | +| constraint_violation | 0 | | ep_length | 250 | -| ep_return | 121 | -| ep_reward | 0.486 | -| mse | 284 | +| ep_return | 117 | +| ep_reward | 0.469 | +| mse | 301 | | time/ | | | progress | 0.12 | | step | 1.2e+05 | -| step_time | 10 | +| step_time | 7.35 | -------------------------------------- -2023-10-19 15:17:29,419 : Eval | ep_lengths 177.60 +/- 110.62 | ep_return 85.292 +/- 59.503 -2023-10-19 15:17:29,442 : +2023-10-27 17:01:29,012 : Eval | ep_lengths 177.60 +/- 110.62 | ep_return 91.300 +/- 62.804 +2023-10-27 17:01:29,013 : -------------------------------------- | loss/ | | -| approx_kl | 0.0269 | -| entropy_loss | -3.57 | -| policy_loss | -0.0106 | -| value_loss | 1.23 | +| approx_kl | 0.0325 | +| entropy_loss | -3.52 | +| policy_loss | -0.0199 | +| value_loss | 3.34 | | stat/ | | -| constraint_violation | 332 | -| ep_constraint_vio... | 0.2 | +| constraint_violation | 343 | +| ep_constraint_vio... | 0.1 | | ep_length | 225 | | ep_return | 101 | -| ep_reward | 0.403 | +| ep_reward | 0.405 | | stat_eval/ | | -| constraint_violation | 1.2 | +| constraint_violation | 1.1 | | ep_length | 178 | -| ep_return | 85.3 | -| ep_reward | 0.341 | -| mse | 176 | +| ep_return | 91.3 | +| ep_reward | 0.365 | +| mse | 178 | | time/ | | | progress | 0.13 | | step | 1.3e+05 | -| step_time | 10.1 | +| step_time | 7.6 | -------------------------------------- -2023-10-19 15:19:33,393 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 121.493 +/- 23.833 -2023-10-19 15:19:33,395 : +2023-10-27 17:03:00,982 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 130.270 +/- 32.382 +2023-10-27 17:03:01,019 : -------------------------------------- | loss/ | | -| approx_kl | 0.0213 | -| entropy_loss | -3.54 | -| policy_loss | -0.00902 | -| value_loss | 3.57 | +| approx_kl | 0.026 | +| entropy_loss | -3.47 | +| policy_loss | -0.0121 | +| value_loss | 1.42 | | stat/ | | -| constraint_violation | 370 | -| ep_constraint_vio... | 1.3 | +| constraint_violation | 383 | +| ep_constraint_vio... | 0.1 | | ep_length | 250 | -| ep_return | 124 | -| ep_reward | 0.496 | +| ep_return | 130 | +| ep_reward | 0.519 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 121 | -| ep_reward | 0.486 | -| mse | 290 | +| ep_return | 130 | +| ep_reward | 0.521 | +| mse | 270 | | time/ | | | progress | 0.14 | | step | 1.4e+05 | -| step_time | 10.1 | +| step_time | 7.31 | -------------------------------------- -2023-10-19 15:21:35,417 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 142.826 +/- 20.065 -2023-10-19 15:21:35,426 : +2023-10-27 17:04:32,274 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 136.958 +/- 23.347 +2023-10-27 17:04:32,282 : -------------------------------------- | loss/ | | -| approx_kl | 0.0225 | -| entropy_loss | -3.5 | -| policy_loss | -0.00369 | -| value_loss | 1.38 | +| approx_kl | 0.0339 | +| entropy_loss | -3.47 | +| policy_loss | -0.0063 | +| value_loss | 1.59 | | stat/ | | -| constraint_violation | 382 | -| ep_constraint_vio... | 0.1 | +| constraint_violation | 407 | +| ep_constraint_vio... | 1.3 | | ep_length | 226 | -| ep_return | 109 | -| ep_reward | 0.438 | +| ep_return | 114 | +| ep_reward | 0.457 | | stat_eval/ | | -| constraint_violation | 1.3 | +| constraint_violation | 1.4 | | ep_length | 250 | -| ep_return | 143 | -| ep_reward | 0.571 | -| mse | 235 | +| ep_return | 137 | +| ep_reward | 0.548 | +| mse | 245 | | time/ | | | progress | 0.15 | | step | 1.5e+05 | -| step_time | 10.1 | +| step_time | 7.47 | -------------------------------------- -2023-10-19 15:23:36,764 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 113.577 +/- 16.769 -2023-10-19 15:23:36,766 : +2023-10-27 17:06:03,431 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 113.639 +/- 12.843 +2023-10-27 17:06:03,432 : -------------------------------------- | loss/ | | -| approx_kl | 0.0361 | +| approx_kl | 0.0247 | | entropy_loss | -3.5 | -| policy_loss | -0.00643 | -| value_loss | 1.41 | +| policy_loss | -0.0183 | +| value_loss | 1.42 | | stat/ | | -| constraint_violation | 418 | +| constraint_violation | 430 | | ep_constraint_vio... | 0.2 | | ep_length | 200 | -| ep_return | 128 | -| ep_reward | 0.511 | +| ep_return | 122 | +| ep_reward | 0.487 | | stat_eval/ | | -| constraint_violation | 0.1 | +| constraint_violation | 0 | | ep_length | 250 | | ep_return | 114 | -| ep_reward | 0.454 | -| mse | 322 | +| ep_reward | 0.455 | +| mse | 314 | | time/ | | | progress | 0.16 | | step | 1.6e+05 | -| step_time | 9.76 | +| step_time | 7.28 | -------------------------------------- -2023-10-19 15:25:36,109 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 118.571 +/- 45.687 -2023-10-19 15:25:36,110 : +2023-10-27 17:07:33,430 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 110.888 +/- 46.015 +2023-10-27 17:07:33,431 : -------------------------------------- | loss/ | | -| approx_kl | 0.0364 | -| entropy_loss | -3.51 | -| policy_loss | -0.0187 | -| value_loss | 1.22 | +| approx_kl | 0.036 | +| entropy_loss | -3.53 | +| policy_loss | -0.0112 | +| value_loss | 1.54 | | stat/ | | -| constraint_violation | 457 | -| ep_constraint_vio... | 1.9 | -| ep_length | 176 | -| ep_return | 103 | -| ep_reward | 0.416 | +| constraint_violation | 472 | +| ep_constraint_vio... | 2.2 | +| ep_length | 202 | +| ep_return | 99.1 | +| ep_reward | 0.398 | | stat_eval/ | | -| constraint_violation | 0.2 | +| constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 119 | -| ep_reward | 0.474 | -| mse | 237 | +| ep_return | 111 | +| ep_reward | 0.444 | +| mse | 245 | | time/ | | | progress | 0.17 | | step | 1.7e+05 | -| step_time | 10.1 | +| step_time | 7.65 | -------------------------------------- -2023-10-19 15:27:30,013 : Eval | ep_lengths 200.90 +/- 98.20 | ep_return 105.182 +/- 55.081 -2023-10-19 15:27:30,015 : +2023-10-27 17:09:00,553 : Eval | ep_lengths 200.90 +/- 98.20 | ep_return 106.929 +/- 56.746 +2023-10-27 17:09:00,554 : -------------------------------------- | loss/ | | -| approx_kl | 0.0292 | -| entropy_loss | -3.5 | -| policy_loss | -0.0117 | -| value_loss | 1.36 | +| approx_kl | 0.0294 | +| entropy_loss | -3.52 | +| policy_loss | -0.0156 | +| value_loss | 1.26 | | stat/ | | -| constraint_violation | 483 | +| constraint_violation | 494 | | ep_constraint_vio... | 1.5 | | ep_length | 250 | -| ep_return | 151 | -| ep_reward | 0.603 | +| ep_return | 133 | +| ep_reward | 0.532 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 105 | -| ep_reward | 0.421 | -| mse | 232 | +| ep_return | 107 | +| ep_reward | 0.428 | +| mse | 222 | | time/ | | | progress | 0.18 | | step | 1.8e+05 | -| step_time | 9.57 | +| step_time | 7.27 | -------------------------------------- -2023-10-19 15:29:28,331 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 132.334 +/- 19.182 -2023-10-19 15:29:28,351 : +2023-10-27 17:10:31,497 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 126.263 +/- 23.293 +2023-10-27 17:10:31,524 : -------------------------------------- | loss/ | | -| approx_kl | 0.0185 | -| entropy_loss | -3.56 | -| policy_loss | -0.013 | -| value_loss | 1.53 | +| approx_kl | 0.0305 | +| entropy_loss | -3.53 | +| policy_loss | -0.0165 | +| value_loss | 0.6 | | stat/ | | -| constraint_violation | 504 | +| constraint_violation | 517 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 109 | -| ep_reward | 0.441 | +| ep_return | 110 | +| ep_reward | 0.445 | | stat_eval/ | | | constraint_violation | 1.7 | | ep_length | 250 | -| ep_return | 132 | -| ep_reward | 0.529 | -| mse | 279 | +| ep_return | 126 | +| ep_reward | 0.505 | +| mse | 280 | | time/ | | | progress | 0.19 | | step | 1.9e+05 | -| step_time | 9.76 | +| step_time | 7.41 | -------------------------------------- -2023-10-19 15:31:24,798 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 131.138 +/- 24.863 -2023-10-19 15:31:24,800 : +2023-10-27 17:12:02,218 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 137.944 +/- 26.501 +2023-10-27 17:12:02,231 : -------------------------------------- | loss/ | | -| approx_kl | 0.0258 | -| entropy_loss | -3.53 | -| policy_loss | -0.0103 | -| value_loss | 0.501 | +| approx_kl | 0.0298 | +| entropy_loss | -3.55 | +| policy_loss | -0.00739 | +| value_loss | 0.727 | | stat/ | | -| constraint_violation | 538 | +| constraint_violation | 548 | | ep_constraint_vio... | 1.5 | | ep_length | 250 | -| ep_return | 129 | -| ep_reward | 0.517 | +| ep_return | 143 | +| ep_reward | 0.57 | | stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 0.1 | | ep_length | 250 | -| ep_return | 131 | -| ep_reward | 0.525 | -| mse | 300 | +| ep_return | 138 | +| ep_reward | 0.552 | +| mse | 271 | | time/ | | | progress | 0.2 | | step | 2e+05 | -| step_time | 9.32 | +| step_time | 7.41 | -------------------------------------- -2023-10-19 15:33:12,454 : Eval | ep_lengths 176.60 +/- 112.16 | ep_return 93.903 +/- 65.437 -2023-10-19 15:33:12,455 : +2023-10-27 17:13:27,541 : Eval | ep_lengths 176.60 +/- 112.16 | ep_return 100.562 +/- 69.239 +2023-10-27 17:13:27,542 : -------------------------------------- | loss/ | | -| approx_kl | 0.0295 | -| entropy_loss | -3.53 | -| policy_loss | -0.00945 | -| value_loss | 1.18 | +| approx_kl | 0.0294 | +| entropy_loss | -3.56 | +| policy_loss | -0.00994 | +| value_loss | 1.55 | | stat/ | | -| constraint_violation | 559 | -| ep_constraint_vio... | 1.5 | -| ep_length | 226 | -| ep_return | 123 | -| ep_reward | 0.494 | +| constraint_violation | 569 | +| ep_constraint_vio... | 0.1 | +| ep_length | 250 | +| ep_return | 148 | +| ep_reward | 0.592 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 177 | -| ep_return | 93.9 | -| ep_reward | 0.376 | -| mse | 223 | +| ep_return | 101 | +| ep_reward | 0.402 | +| mse | 203 | | time/ | | | progress | 0.21 | | step | 2.1e+05 | -| step_time | 9.09 | +| step_time | 7.18 | -------------------------------------- -2023-10-19 15:35:05,274 : Eval | ep_lengths 226.30 +/- 71.10 | ep_return 114.294 +/- 43.522 -2023-10-19 15:35:05,275 : +2023-10-27 17:14:56,579 : Eval | ep_lengths 226.30 +/- 71.10 | ep_return 127.493 +/- 46.977 +2023-10-27 17:14:56,606 : -------------------------------------- | loss/ | | -| approx_kl | 0.0297 | -| entropy_loss | -3.49 | -| policy_loss | -0.0231 | -| value_loss | 0.625 | +| approx_kl | 0.0337 | +| entropy_loss | -3.61 | +| policy_loss | -0.0116 | +| value_loss | 1.54 | | stat/ | | -| constraint_violation | 589 | -| ep_constraint_vio... | 0.9 | +| constraint_violation | 595 | +| ep_constraint_vio... | 0.4 | | ep_length | 250 | -| ep_return | 137 | -| ep_reward | 0.548 | +| ep_return | 153 | +| ep_reward | 0.612 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 114 | -| ep_reward | 0.457 | -| mse | 268 | +| ep_return | 127 | +| ep_reward | 0.51 | +| mse | 237 | | time/ | | | progress | 0.22 | | step | 2.2e+05 | -| step_time | 9.12 | +| step_time | 7.27 | -------------------------------------- -2023-10-19 15:36:59,109 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 133.387 +/- 27.052 -2023-10-19 15:36:59,110 : +2023-10-27 17:16:27,091 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 143.570 +/- 19.621 +2023-10-27 17:16:27,105 : -------------------------------------- | loss/ | | -| approx_kl | 0.0231 | -| entropy_loss | -3.49 | -| policy_loss | -0.0109 | -| value_loss | 0.89 | +| approx_kl | 0.0336 | +| entropy_loss | -3.65 | +| policy_loss | -0.0143 | +| value_loss | 0.504 | | stat/ | | -| constraint_violation | 607 | +| constraint_violation | 614 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 128 | -| ep_reward | 0.514 | +| ep_return | 143 | +| ep_reward | 0.573 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 133 | -| ep_reward | 0.534 | -| mse | 292 | +| ep_return | 144 | +| ep_reward | 0.574 | +| mse | 258 | | time/ | | | progress | 0.23 | | step | 2.3e+05 | -| step_time | 9.42 | +| step_time | 7.38 | -------------------------------------- -2023-10-19 15:38:51,028 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 118.635 +/- 44.801 -2023-10-19 15:38:51,048 : +2023-10-27 17:17:55,808 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 121.040 +/- 45.073 +2023-10-27 17:17:55,809 : -------------------------------------- | loss/ | | -| approx_kl | 0.0298 | -| entropy_loss | -3.51 | -| policy_loss | -0.00528 | -| value_loss | 1.16 | +| approx_kl | 0.0363 | +| entropy_loss | -3.61 | +| policy_loss | -0.00289 | +| value_loss | 0.701 | | stat/ | | -| constraint_violation | 627 | +| constraint_violation | 641 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 145 | -| ep_reward | 0.579 | +| ep_return | 154 | +| ep_reward | 0.617 | | stat_eval/ | | -| constraint_violation | 0.9 | +| constraint_violation | 0.8 | | ep_length | 225 | -| ep_return | 119 | -| ep_reward | 0.475 | -| mse | 267 | +| ep_return | 121 | +| ep_reward | 0.484 | +| mse | 249 | | time/ | | | progress | 0.24 | | step | 2.4e+05 | -| step_time | 9.57 | +| step_time | 7.43 | -------------------------------------- -2023-10-19 15:40:41,195 : Eval | ep_lengths 201.90 +/- 96.26 | ep_return 110.568 +/- 60.446 -2023-10-19 15:40:41,196 : +2023-10-27 17:19:22,430 : Eval | ep_lengths 201.90 +/- 96.26 | ep_return 120.073 +/- 62.966 +2023-10-27 17:19:22,432 : -------------------------------------- | loss/ | | -| approx_kl | 0.0282 | -| entropy_loss | -3.49 | -| policy_loss | -0.0106 | -| value_loss | 0.441 | +| approx_kl | 0.0323 | +| entropy_loss | -3.6 | +| policy_loss | -0.0132 | +| value_loss | 0.593 | | stat/ | | -| constraint_violation | 644 | -| ep_constraint_vio... | 0.1 | +| constraint_violation | 658 | +| ep_constraint_vio... | 0.2 | | ep_length | 250 | -| ep_return | 142 | -| ep_reward | 0.567 | +| ep_return | 160 | +| ep_reward | 0.642 | | stat_eval/ | | -| constraint_violation | 0.2 | +| constraint_violation | 0.3 | | ep_length | 202 | -| ep_return | 111 | -| ep_reward | 0.442 | -| mse | 254 | +| ep_return | 120 | +| ep_reward | 0.48 | +| mse | 229 | | time/ | | | progress | 0.25 | | step | 2.5e+05 | -| step_time | 9.24 | +| step_time | 7.19 | -------------------------------------- -2023-10-19 15:42:35,149 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 145.607 +/- 23.133 -2023-10-19 15:42:35,159 : +2023-10-27 17:20:52,565 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 148.164 +/- 23.708 +2023-10-27 17:20:52,576 : -------------------------------------- | loss/ | | -| approx_kl | 0.0194 | -| entropy_loss | -3.5 | -| policy_loss | -0.0194 | -| value_loss | 0.631 | +| approx_kl | 0.0461 | +| entropy_loss | -3.6 | +| policy_loss | -0.00999 | +| value_loss | 1.54 | | stat/ | | -| constraint_violation | 666 | +| constraint_violation | 675 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 160 | -| ep_reward | 0.64 | +| ep_return | 158 | +| ep_reward | 0.634 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 146 | -| ep_reward | 0.582 | -| mse | 257 | +| ep_return | 148 | +| ep_reward | 0.593 | +| mse | 248 | | time/ | | | progress | 0.26 | | step | 2.6e+05 | -| step_time | 9.32 | +| step_time | 7.34 | -------------------------------------- -2023-10-19 15:44:30,519 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 139.961 +/- 25.859 -2023-10-19 15:44:30,520 : +2023-10-27 17:22:22,135 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.426 +/- 23.249 +2023-10-27 17:22:22,147 : -------------------------------------- | loss/ | | -| approx_kl | 0.022 | -| entropy_loss | -3.44 | -| policy_loss | -0.0165 | -| value_loss | 0.579 | +| approx_kl | 0.0192 | +| entropy_loss | -3.67 | +| policy_loss | -0.00346 | +| value_loss | 0.415 | | stat/ | | -| constraint_violation | 704 | -| ep_constraint_vio... | 0.6 | +| constraint_violation | 720 | +| ep_constraint_vio... | 1.3 | | ep_length | 250 | -| ep_return | 144 | -| ep_reward | 0.578 | +| ep_return | 150 | +| ep_reward | 0.601 | | stat_eval/ | | -| constraint_violation | 2.9 | +| constraint_violation | 2.5 | | ep_length | 250 | -| ep_return | 140 | -| ep_reward | 0.56 | -| mse | 274 | +| ep_return | 155 | +| ep_reward | 0.622 | +| mse | 247 | | time/ | | | progress | 0.27 | | step | 2.7e+05 | -| step_time | 10.2 | +| step_time | 7.46 | -------------------------------------- -2023-10-19 15:46:33,385 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 160.501 +/- 23.668 -2023-10-19 15:46:33,394 : +2023-10-27 17:23:52,374 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 164.043 +/- 21.630 +2023-10-27 17:23:52,385 : -------------------------------------- | loss/ | | -| approx_kl | 0.033 | -| entropy_loss | -3.43 | -| policy_loss | -0.00229 | -| value_loss | 1.01 | +| approx_kl | 0.0261 | +| entropy_loss | -3.67 | +| policy_loss | -0.0032 | +| value_loss | 1.2 | | stat/ | | -| constraint_violation | 731 | +| constraint_violation | 747 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 141 | -| ep_reward | 0.564 | +| ep_return | 149 | +| ep_reward | 0.595 | | stat_eval/ | | -| constraint_violation | 0.2 | +| constraint_violation | 0 | | ep_length | 250 | -| ep_return | 161 | -| ep_reward | 0.642 | -| mse | 225 | +| ep_return | 164 | +| ep_reward | 0.656 | +| mse | 212 | | time/ | | | progress | 0.28 | | step | 2.8e+05 | -| step_time | 10.1 | +| step_time | 7.49 | -------------------------------------- -2023-10-19 15:48:32,001 : Eval | ep_lengths 201.40 +/- 97.21 | ep_return 109.066 +/- 58.635 -2023-10-19 15:48:32,002 : +2023-10-27 17:25:19,568 : Eval | ep_lengths 201.40 +/- 97.21 | ep_return 119.476 +/- 68.507 +2023-10-27 17:25:19,569 : -------------------------------------- | loss/ | | -| approx_kl | 0.0212 | -| entropy_loss | -3.43 | -| policy_loss | -0.00615 | -| value_loss | 1.02 | +| approx_kl | 0.0334 | +| entropy_loss | -3.69 | +| policy_loss | 0.00354 | +| value_loss | 0.916 | | stat/ | | -| constraint_violation | 740 | +| constraint_violation | 757 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 145 | -| ep_reward | 0.58 | +| ep_return | 154 | +| ep_reward | 0.618 | | stat_eval/ | | -| constraint_violation | 0.2 | +| constraint_violation | 0.3 | | ep_length | 201 | -| ep_return | 109 | -| ep_reward | 0.436 | -| mse | 178 | +| ep_return | 119 | +| ep_reward | 0.478 | +| mse | 162 | | time/ | | | progress | 0.29 | | step | 2.9e+05 | -| step_time | 10.2 | +| step_time | 7.44 | -------------------------------------- -2023-10-19 15:50:32,851 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 106.364 +/- 36.680 -2023-10-19 15:50:32,853 : +2023-10-27 17:26:48,814 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 122.420 +/- 41.447 +2023-10-27 17:26:48,815 : -------------------------------------- | loss/ | | -| approx_kl | 0.0254 | -| entropy_loss | -3.4 | -| policy_loss | 0.00158 | -| value_loss | 0.786 | +| approx_kl | 0.0314 | +| entropy_loss | -3.69 | +| policy_loss | -0.00962 | +| value_loss | 1.07 | | stat/ | | -| constraint_violation | 786 | -| ep_constraint_vio... | 2.4 | -| ep_length | 226 | -| ep_return | 128 | -| ep_reward | 0.513 | +| constraint_violation | 800 | +| ep_constraint_vio... | 2.3 | +| ep_length | 250 | +| ep_return | 152 | +| ep_reward | 0.607 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 106 | -| ep_reward | 0.425 | -| mse | 318 | +| ep_return | 122 | +| ep_reward | 0.49 | +| mse | 296 | | time/ | | | progress | 0.3 | | step | 3e+05 | -| step_time | 10 | +| step_time | 7.52 | -------------------------------------- -2023-10-19 15:52:33,629 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 120.654 +/- 20.245 -2023-10-19 15:52:33,630 : +2023-10-27 17:28:20,696 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 138.811 +/- 22.110 +2023-10-27 17:28:20,697 : -------------------------------------- | loss/ | | -| approx_kl | 0.0288 | -| entropy_loss | -3.36 | -| policy_loss | -0.00536 | -| value_loss | 0.621 | +| approx_kl | 0.0157 | +| entropy_loss | -3.66 | +| policy_loss | -0.0115 | +| value_loss | 3.24 | | stat/ | | -| constraint_violation | 803 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 131 | -| ep_reward | 0.523 | +| constraint_violation | 817 | +| ep_constraint_vio... | 0.1 | +| ep_length | 226 | +| ep_return | 135 | +| ep_reward | 0.541 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 121 | -| ep_reward | 0.483 | -| mse | 331 | +| ep_return | 139 | +| ep_reward | 0.555 | +| mse | 289 | | time/ | | | progress | 0.31 | | step | 3.1e+05 | -| step_time | 9.67 | +| step_time | 7.54 | -------------------------------------- -2023-10-19 15:54:30,914 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 134.735 +/- 19.054 -2023-10-19 15:54:30,915 : +2023-10-27 17:29:52,064 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 141.757 +/- 20.707 +2023-10-27 17:29:52,065 : -------------------------------------- | loss/ | | -| approx_kl | 0.0332 | -| entropy_loss | -3.36 | -| policy_loss | -0.0142 | -| value_loss | 1.69 | +| approx_kl | 0.0348 | +| entropy_loss | -3.67 | +| policy_loss | -0.0106 | +| value_loss | 1.41 | | stat/ | | -| constraint_violation | 824 | -| ep_constraint_vio... | 1.1 | +| constraint_violation | 841 | +| ep_constraint_vio... | 1.3 | | ep_length | 227 | -| ep_return | 124 | -| ep_reward | 0.497 | +| ep_return | 138 | +| ep_reward | 0.551 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 135 | -| ep_reward | 0.539 | -| mse | 330 | +| ep_return | 142 | +| ep_reward | 0.567 | +| mse | 304 | | time/ | | | progress | 0.32 | | step | 3.2e+05 | -| step_time | 9.56 | +| step_time | 7.37 | -------------------------------------- -2023-10-19 15:56:23,054 : Eval | ep_lengths 202.00 +/- 96.08 | ep_return 114.870 +/- 63.428 -2023-10-19 15:56:23,056 : +2023-10-27 17:31:19,801 : Eval | ep_lengths 201.90 +/- 96.28 | ep_return 120.523 +/- 66.637 +2023-10-27 17:31:19,802 : -------------------------------------- | loss/ | | -| approx_kl | 0.0227 | -| entropy_loss | -3.4 | -| policy_loss | -0.012 | -| value_loss | 1.64 | +| approx_kl | 0.0246 | +| entropy_loss | -3.69 | +| policy_loss | -0.00936 | +| value_loss | 0.517 | | stat/ | | -| constraint_violation | 872 | -| ep_constraint_vio... | 1.2 | -| ep_length | 250 | -| ep_return | 144 | -| ep_reward | 0.576 | +| constraint_violation | 890 | +| ep_constraint_vio... | 0.1 | +| ep_length | 228 | +| ep_return | 141 | +| ep_reward | 0.572 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | -| ep_return | 115 | -| ep_reward | 0.459 | -| mse | 212 | +| ep_return | 121 | +| ep_reward | 0.482 | +| mse | 201 | | time/ | | | progress | 0.33 | | step | 3.3e+05 | -| step_time | 9.69 | +| step_time | 7.37 | -------------------------------------- -2023-10-19 15:58:17,706 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 131.029 +/- 21.603 -2023-10-19 15:58:17,708 : +2023-10-27 17:32:50,723 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 142.446 +/- 21.301 +2023-10-27 17:32:50,724 : -------------------------------------- | loss/ | | -| approx_kl | 0.0269 | -| entropy_loss | -3.39 | -| policy_loss | -0.0142 | -| value_loss | 0.462 | +| approx_kl | 0.0228 | +| entropy_loss | -3.64 | +| policy_loss | -0.00586 | +| value_loss | 0.447 | | stat/ | | -| constraint_violation | 884 | -| ep_constraint_vio... | 0.7 | +| constraint_violation | 902 | +| ep_constraint_vio... | 0.6 | | ep_length | 250 | -| ep_return | 146 | -| ep_reward | 0.582 | +| ep_return | 148 | +| ep_reward | 0.593 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 131 | -| ep_reward | 0.524 | -| mse | 308 | +| ep_return | 142 | +| ep_reward | 0.57 | +| mse | 290 | | time/ | | | progress | 0.34 | | step | 3.4e+05 | -| step_time | 9.39 | +| step_time | 7.46 | -------------------------------------- -2023-10-19 16:00:11,404 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 129.460 +/- 39.755 -2023-10-19 16:00:11,406 : +2023-10-27 17:34:22,156 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 140.430 +/- 40.077 +2023-10-27 17:34:22,157 : -------------------------------------- | loss/ | | -| approx_kl | 0.0396 | -| entropy_loss | -3.4 | -| policy_loss | -0.01 | -| value_loss | 2.15 | +| approx_kl | 0.0257 | +| entropy_loss | -3.59 | +| policy_loss | -0.0105 | +| value_loss | 0.801 | | stat/ | | -| constraint_violation | 887 | +| constraint_violation | 905 | | ep_constraint_vio... | 0.1 | | ep_length | 226 | -| ep_return | 125 | -| ep_reward | 0.502 | +| ep_return | 129 | +| ep_reward | 0.515 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 129 | -| ep_reward | 0.518 | -| mse | 371 | +| ep_return | 140 | +| ep_reward | 0.562 | +| mse | 343 | | time/ | | | progress | 0.35 | | step | 3.5e+05 | -| step_time | 9.1 | +| step_time | 7.42 | -------------------------------------- -2023-10-19 16:02:05,070 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 135.231 +/- 22.972 -2023-10-19 16:02:05,071 : +2023-10-27 17:35:53,805 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 145.907 +/- 23.023 +2023-10-27 17:35:53,807 : -------------------------------------- | loss/ | | -| approx_kl | 0.0244 | -| entropy_loss | -3.45 | -| policy_loss | -0.012 | -| value_loss | 0.724 | +| approx_kl | 0.0316 | +| entropy_loss | -3.59 | +| policy_loss | -0.0114 | +| value_loss | 0.692 | | stat/ | | -| constraint_violation | 948 | -| ep_constraint_vio... | 1.1 | +| constraint_violation | 962 | +| ep_constraint_vio... | 0.1 | | ep_length | 226 | -| ep_return | 122 | -| ep_reward | 0.491 | +| ep_return | 137 | +| ep_reward | 0.551 | | stat_eval/ | | | constraint_violation | 1.3 | | ep_length | 250 | -| ep_return | 135 | -| ep_reward | 0.541 | -| mse | 263 | +| ep_return | 146 | +| ep_reward | 0.584 | +| mse | 235 | | time/ | | | progress | 0.36 | | step | 3.6e+05 | -| step_time | 9.09 | +| step_time | 7.47 | -------------------------------------- -2023-10-19 16:03:54,223 : Eval | ep_lengths 202.30 +/- 95.41 | ep_return 111.777 +/- 58.661 -2023-10-19 16:03:54,225 : +2023-10-27 17:37:21,543 : Eval | ep_lengths 202.20 +/- 95.60 | ep_return 124.172 +/- 64.751 +2023-10-27 17:37:21,544 : -------------------------------------- | loss/ | | -| approx_kl | 0.0164 | -| entropy_loss | -3.42 | -| policy_loss | -0.0104 | -| value_loss | 0.838 | +| approx_kl | 0.0323 | +| entropy_loss | -3.64 | +| policy_loss | -0.014 | +| value_loss | 0.891 | | stat/ | | -| constraint_violation | 988 | -| ep_constraint_vio... | 0.5 | +| constraint_violation | 1e+03 | +| ep_constraint_vio... | 0.3 | | ep_length | 225 | -| ep_return | 125 | -| ep_reward | 0.498 | +| ep_return | 132 | +| ep_reward | 0.528 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | -| ep_return | 112 | -| ep_reward | 0.447 | -| mse | 191 | +| ep_return | 124 | +| ep_reward | 0.497 | +| mse | 168 | | time/ | | | progress | 0.37 | | step | 3.7e+05 | -| step_time | 9.43 | +| step_time | 7.46 | -------------------------------------- -2023-10-19 16:05:45,303 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 122.134 +/- 49.264 -2023-10-19 16:05:45,305 : +2023-10-27 17:38:50,725 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 130.917 +/- 50.517 +2023-10-27 17:38:50,726 : -------------------------------------- | loss/ | | -| approx_kl | 0.0305 | -| entropy_loss | -3.41 | -| policy_loss | -0.00777 | -| value_loss | 0.423 | +| approx_kl | 0.0354 | +| entropy_loss | -3.59 | +| policy_loss | -0.0101 | +| value_loss | 0.527 | | stat/ | | -| constraint_violation | 1.01e+03 | +| constraint_violation | 1.02e+03 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 136 | -| ep_reward | 0.544 | +| ep_return | 151 | +| ep_reward | 0.603 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 226 | -| ep_return | 122 | -| ep_reward | 0.489 | -| mse | 230 | +| ep_return | 131 | +| ep_reward | 0.524 | +| mse | 208 | | time/ | | | progress | 0.38 | | step | 3.8e+05 | -| step_time | 9.06 | +| step_time | 7.33 | -------------------------------------- -2023-10-19 16:07:37,286 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 138.710 +/- 20.384 -2023-10-19 16:07:37,287 : +2023-10-27 17:40:21,377 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 138.581 +/- 16.627 +2023-10-27 17:40:21,378 : -------------------------------------- | loss/ | | -| approx_kl | 0.0203 | -| entropy_loss | -3.41 | -| policy_loss | -0.0138 | -| value_loss | 0.216 | +| approx_kl | 0.0247 | +| entropy_loss | -3.63 | +| policy_loss | -0.00346 | +| value_loss | 0.603 | | stat/ | | -| constraint_violation | 1.04e+03 | -| ep_constraint_vio... | 1.5 | -| ep_length | 226 | -| ep_return | 130 | -| ep_reward | 0.523 | +| constraint_violation | 1.06e+03 | +| ep_constraint_vio... | 0.1 | +| ep_length | 250 | +| ep_return | 157 | +| ep_reward | 0.63 | | stat_eval/ | | -| constraint_violation | 1.5 | +| constraint_violation | 1.4 | | ep_length | 250 | | ep_return | 139 | -| ep_reward | 0.555 | -| mse | 314 | +| ep_reward | 0.554 | +| mse | 315 | | time/ | | | progress | 0.39 | | step | 3.9e+05 | -| step_time | 9.15 | +| step_time | 7.38 | -------------------------------------- -2023-10-19 16:09:27,308 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 125.302 +/- 46.197 -2023-10-19 16:09:27,331 : +2023-10-27 17:41:50,540 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 129.448 +/- 47.610 +2023-10-27 17:41:50,541 : -------------------------------------- | loss/ | | -| approx_kl | 0.0341 | -| entropy_loss | -3.43 | -| policy_loss | -0.00245 | -| value_loss | 0.764 | +| approx_kl | 0.0323 | +| entropy_loss | -3.62 | +| policy_loss | -0.0125 | +| value_loss | 2.06 | | stat/ | | -| constraint_violation | 1.07e+03 | -| ep_constraint_vio... | 0.3 | -| ep_length | 250 | +| constraint_violation | 1.09e+03 | +| ep_constraint_vio... | 0.4 | +| ep_length | 226 | | ep_return | 142 | -| ep_reward | 0.566 | +| ep_reward | 0.569 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 125 | -| ep_reward | 0.501 | -| mse | 254 | +| ep_return | 129 | +| ep_reward | 0.518 | +| mse | 242 | | time/ | | | progress | 0.4 | | step | 4e+05 | -| step_time | 9.14 | +| step_time | 7.58 | -------------------------------------- -2023-10-19 16:11:19,039 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 137.644 +/- 17.663 -2023-10-19 16:11:19,061 : +2023-10-27 17:43:21,174 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 148.462 +/- 25.226 +2023-10-27 17:43:21,175 : -------------------------------------- | loss/ | | -| approx_kl | 0.029 | -| entropy_loss | -3.46 | -| policy_loss | -0.00809 | -| value_loss | 1.11 | +| approx_kl | 0.0264 | +| entropy_loss | -3.61 | +| policy_loss | -0.00539 | +| value_loss | 3.47 | | stat/ | | -| constraint_violation | 1.1e+03 | -| ep_constraint_vio... | 1.8 | -| ep_length | 250 | -| ep_return | 139 | -| ep_reward | 0.556 | +| constraint_violation | 1.12e+03 | +| ep_constraint_vio... | 0.2 | +| ep_length | 203 | +| ep_return | 126 | +| ep_reward | 0.507 | | stat_eval/ | | -| constraint_violation | 0.1 | +| constraint_violation | 0.2 | | ep_length | 250 | -| ep_return | 138 | -| ep_reward | 0.551 | -| mse | 265 | +| ep_return | 148 | +| ep_reward | 0.594 | +| mse | 246 | | time/ | | | progress | 0.41 | | step | 4.1e+05 | -| step_time | 9.38 | +| step_time | 7.53 | -------------------------------------- -2023-10-19 16:13:06,667 : Eval | ep_lengths 201.40 +/- 97.20 | ep_return 115.193 +/- 62.616 -2023-10-19 16:13:06,669 : +2023-10-27 17:44:48,364 : Eval | ep_lengths 201.40 +/- 97.20 | ep_return 128.625 +/- 66.232 +2023-10-27 17:44:48,365 : -------------------------------------- | loss/ | | -| approx_kl | 0.0343 | -| entropy_loss | -3.48 | -| policy_loss | -0.0093 | -| value_loss | 0.639 | +| approx_kl | 0.0244 | +| entropy_loss | -3.55 | +| policy_loss | -0.0154 | +| value_loss | 1.08 | | stat/ | | -| constraint_violation | 1.11e+03 | +| constraint_violation | 1.12e+03 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 142 | -| ep_reward | 0.569 | +| ep_return | 156 | +| ep_reward | 0.622 | | stat_eval/ | | -| constraint_violation | 1.3 | +| constraint_violation | 1.2 | | ep_length | 201 | -| ep_return | 115 | -| ep_reward | 0.461 | -| mse | 162 | +| ep_return | 129 | +| ep_reward | 0.515 | +| mse | 142 | | time/ | | | progress | 0.42 | | step | 4.2e+05 | -| step_time | 9.19 | +| step_time | 7.41 | -------------------------------------- -2023-10-19 16:14:54,933 : Eval | ep_lengths 201.90 +/- 96.26 | ep_return 110.810 +/- 56.947 -2023-10-19 16:14:54,934 : +2023-10-27 17:46:16,039 : Eval | ep_lengths 201.90 +/- 96.26 | ep_return 119.274 +/- 60.649 +2023-10-27 17:46:16,040 : -------------------------------------- | loss/ | | -| approx_kl | 0.0319 | -| entropy_loss | -3.51 | -| policy_loss | 0.00373 | -| value_loss | 0.807 | +| approx_kl | 0.0239 | +| entropy_loss | -3.55 | +| policy_loss | -0.0156 | +| value_loss | 0.702 | | stat/ | | -| constraint_violation | 1.12e+03 | +| constraint_violation | 1.14e+03 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.612 | +| ep_return | 159 | +| ep_reward | 0.638 | | stat_eval/ | | -| constraint_violation | 1.9 | +| constraint_violation | 2 | | ep_length | 202 | -| ep_return | 111 | -| ep_reward | 0.443 | -| mse | 201 | +| ep_return | 119 | +| ep_reward | 0.477 | +| mse | 186 | | time/ | | | progress | 0.43 | | step | 4.3e+05 | -| step_time | 9.17 | +| step_time | 7.41 | -------------------------------------- -2023-10-19 16:16:45,806 : Eval | ep_lengths 226.50 +/- 70.50 | ep_return 118.140 +/- 45.287 -2023-10-19 16:16:45,807 : +2023-10-27 17:47:45,646 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 127.692 +/- 46.849 +2023-10-27 17:47:45,647 : -------------------------------------- | loss/ | | -| approx_kl | 0.0274 | -| entropy_loss | -3.48 | -| policy_loss | -0.0105 | -| value_loss | 0.521 | +| approx_kl | 0.0138 | +| entropy_loss | -3.59 | +| policy_loss | -0.00841 | +| value_loss | 0.639 | | stat/ | | -| constraint_violation | 1.12e+03 | +| constraint_violation | 1.14e+03 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 139 | -| ep_reward | 0.556 | +| ep_return | 151 | +| ep_reward | 0.605 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 226 | -| ep_return | 118 | -| ep_reward | 0.473 | -| mse | 302 | +| ep_length | 227 | +| ep_return | 128 | +| ep_reward | 0.511 | +| mse | 283 | | time/ | | | progress | 0.44 | | step | 4.4e+05 | -| step_time | 9.08 | +| step_time | 7.39 | -------------------------------------- -2023-10-19 16:18:37,438 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 138.111 +/- 19.298 -2023-10-19 16:18:37,440 : +2023-10-27 17:49:15,954 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 150.417 +/- 22.687 +2023-10-27 17:49:15,955 : -------------------------------------- | loss/ | | -| approx_kl | 0.0264 | -| entropy_loss | -3.52 | -| policy_loss | -0.0137 | -| value_loss | 0.635 | +| approx_kl | 0.0244 | +| entropy_loss | -3.58 | +| policy_loss | -0.02 | +| value_loss | 0.514 | | stat/ | | -| constraint_violation | 1.15e+03 | -| ep_constraint_vio... | 2.7 | +| constraint_violation | 1.17e+03 | +| ep_constraint_vio... | 2.6 | | ep_length | 204 | -| ep_return | 118 | -| ep_reward | 0.477 | +| ep_return | 131 | +| ep_reward | 0.527 | | stat_eval/ | | -| constraint_violation | 0.1 | +| constraint_violation | 0 | | ep_length | 250 | -| ep_return | 138 | -| ep_reward | 0.552 | -| mse | 233 | +| ep_return | 150 | +| ep_reward | 0.602 | +| mse | 223 | | time/ | | | progress | 0.45 | | step | 4.5e+05 | -| step_time | 9.01 | +| step_time | 7.42 | -------------------------------------- -2023-10-19 16:20:29,433 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 148.520 +/- 23.766 -2023-10-19 16:20:29,435 : +2023-10-27 17:50:45,886 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 162.085 +/- 19.072 +2023-10-27 17:50:45,887 : -------------------------------------- | loss/ | | -| approx_kl | 0.0367 | -| entropy_loss | -3.56 | -| policy_loss | -0.00766 | -| value_loss | 0.551 | +| approx_kl | 0.0327 | +| entropy_loss | -3.57 | +| policy_loss | -0.0124 | +| value_loss | 2.52 | | stat/ | | -| constraint_violation | 1.19e+03 | -| ep_constraint_vio... | 0.8 | -| ep_length | 225 | -| ep_return | 131 | -| ep_reward | 0.527 | +| constraint_violation | 1.2e+03 | +| ep_constraint_vio... | 0.9 | +| ep_length | 202 | +| ep_return | 117 | +| ep_reward | 0.472 | | stat_eval/ | | -| constraint_violation | 0.1 | +| constraint_violation | 0.8 | | ep_length | 250 | -| ep_return | 149 | -| ep_reward | 0.594 | -| mse | 159 | +| ep_return | 162 | +| ep_reward | 0.648 | +| mse | 133 | | time/ | | | progress | 0.46 | | step | 4.6e+05 | -| step_time | 9.42 | +| step_time | 7.52 | -------------------------------------- -2023-10-19 16:22:20,915 : Eval | ep_lengths 226.30 +/- 71.10 | ep_return 115.876 +/- 42.119 -2023-10-19 16:22:20,916 : +2023-10-27 17:52:14,983 : Eval | ep_lengths 226.20 +/- 71.40 | ep_return 125.382 +/- 44.678 +2023-10-27 17:52:14,984 : -------------------------------------- | loss/ | | -| approx_kl | 0.0328 | -| entropy_loss | -3.55 | -| policy_loss | -0.00321 | -| value_loss | 1.88 | +| approx_kl | 0.0279 | +| entropy_loss | -3.59 | +| policy_loss | -0.00223 | +| value_loss | 0.571 | | stat/ | | -| constraint_violation | 1.23e+03 | +| constraint_violation | 1.24e+03 | | ep_constraint_vio... | 1.4 | | ep_length | 200 | -| ep_return | 118 | -| ep_reward | 0.472 | +| ep_return | 133 | +| ep_reward | 0.535 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 226 | -| ep_return | 116 | -| ep_reward | 0.464 | -| mse | 265 | +| ep_return | 125 | +| ep_reward | 0.502 | +| mse | 248 | | time/ | | | progress | 0.47 | | step | 4.7e+05 | -| step_time | 8.99 | +| step_time | 7.16 | -------------------------------------- -2023-10-19 16:24:08,654 : Eval | ep_lengths 201.20 +/- 97.60 | ep_return 118.888 +/- 62.184 -2023-10-19 16:24:08,673 : +2023-10-27 17:53:41,489 : Eval | ep_lengths 201.20 +/- 97.60 | ep_return 127.977 +/- 66.514 +2023-10-27 17:53:41,490 : -------------------------------------- | loss/ | | -| approx_kl | 0.0275 | -| entropy_loss | -3.52 | -| policy_loss | -0.00734 | -| value_loss | 1.73 | +| approx_kl | 0.0306 | +| entropy_loss | -3.58 | +| policy_loss | -0.00268 | +| value_loss | 0.472 | | stat/ | | -| constraint_violation | 1.26e+03 | -| ep_constraint_vio... | 1.5 | +| constraint_violation | 1.27e+03 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 142 | -| ep_reward | 0.568 | +| ep_return | 159 | +| ep_reward | 0.636 | | stat_eval/ | | | constraint_violation | 1.6 | | ep_length | 201 | -| ep_return | 119 | -| ep_reward | 0.476 | -| mse | 191 | -| time/ | | +| ep_return | 128 | +| ep_reward | 0.512 | +| mse | 176 | +| time/ | | | progress | 0.48 | | step | 4.8e+05 | -| step_time | 8.93 | +| step_time | 7.2 | -------------------------------------- -2023-10-19 16:26:01,061 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 131.973 +/- 28.640 -2023-10-19 16:26:01,063 : +2023-10-27 17:55:12,138 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 141.793 +/- 29.579 +2023-10-27 17:55:12,139 : -------------------------------------- | loss/ | | -| approx_kl | 0.0417 | -| entropy_loss | -3.53 | -| policy_loss | -0.00845 | -| value_loss | 1.97 | +| approx_kl | 0.0266 | +| entropy_loss | -3.58 | +| policy_loss | -0.0021 | +| value_loss | 1.08 | | stat/ | | -| constraint_violation | 1.28e+03 | +| constraint_violation | 1.29e+03 | | ep_constraint_vio... | 0.1 | | ep_length | 227 | -| ep_return | 141 | -| ep_reward | 0.566 | +| ep_return | 155 | +| ep_reward | 0.621 | | stat_eval/ | | -| constraint_violation | 0.2 | +| constraint_violation | 0.3 | | ep_length | 250 | -| ep_return | 132 | -| ep_reward | 0.528 | -| mse | 385 | +| ep_return | 142 | +| ep_reward | 0.567 | +| mse | 370 | | time/ | | | progress | 0.49 | | step | 4.9e+05 | -| step_time | 9.07 | +| step_time | 7.34 | -------------------------------------- -2023-10-19 16:27:52,072 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 139.912 +/- 11.831 -2023-10-19 16:27:52,073 : +2023-10-27 17:56:42,449 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 144.464 +/- 11.800 +2023-10-27 17:56:42,451 : -------------------------------------- | loss/ | | -| approx_kl | 0.0327 | -| entropy_loss | -3.49 | -| policy_loss | -0.0025 | -| value_loss | 1.31 | +| approx_kl | 0.0288 | +| entropy_loss | -3.58 | +| policy_loss | -0.0121 | +| value_loss | 1.05 | | stat/ | | -| constraint_violation | 1.29e+03 | +| constraint_violation | 1.3e+03 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 160 | -| ep_reward | 0.64 | +| ep_return | 169 | +| ep_reward | 0.676 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 140 | -| ep_reward | 0.56 | +| ep_return | 144 | +| ep_reward | 0.578 | | mse | 264 | | time/ | | | progress | 0.5 | | step | 5e+05 | -| step_time | 9.18 | +| step_time | 7.42 | -------------------------------------- -2023-10-19 16:29:41,788 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 137.933 +/- 50.599 -2023-10-19 16:29:41,789 : +2023-10-27 17:58:10,687 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 145.575 +/- 54.042 +2023-10-27 17:58:10,688 : -------------------------------------- | loss/ | | -| approx_kl | 0.0266 | -| entropy_loss | -3.52 | -| policy_loss | -0.0125 | -| value_loss | 0.617 | +| approx_kl | 0.0271 | +| entropy_loss | -3.57 | +| policy_loss | -0.0216 | +| value_loss | 1.4 | | stat/ | | -| constraint_violation | 1.31e+03 | -| ep_constraint_vio... | 1.6 | +| constraint_violation | 1.33e+03 | +| ep_constraint_vio... | 2.8 | | ep_length | 250 | -| ep_return | 146 | -| ep_reward | 0.583 | +| ep_return | 165 | +| ep_reward | 0.66 | | stat_eval/ | | | constraint_violation | 0.6 | | ep_length | 225 | -| ep_return | 138 | -| ep_reward | 0.552 | -| mse | 152 | +| ep_return | 146 | +| ep_reward | 0.582 | +| mse | 139 | | time/ | | | progress | 0.51 | | step | 5.1e+05 | -| step_time | 9.31 | +| step_time | 7.39 | -------------------------------------- -2023-10-19 16:31:31,580 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 132.698 +/- 50.304 -2023-10-19 16:31:31,581 : +2023-10-27 17:59:40,013 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 138.071 +/- 49.935 +2023-10-27 17:59:40,014 : -------------------------------------- | loss/ | | -| approx_kl | 0.0218 | -| entropy_loss | -3.51 | -| policy_loss | -0.0122 | -| value_loss | 0.511 | +| approx_kl | 0.0309 | +| entropy_loss | -3.59 | +| policy_loss | -0.0181 | +| value_loss | 1.01 | | stat/ | | -| constraint_violation | 1.35e+03 | +| constraint_violation | 1.36e+03 | | ep_constraint_vio... | 0.7 | | ep_length | 202 | -| ep_return | 124 | -| ep_reward | 0.498 | +| ep_return | 129 | +| ep_reward | 0.516 | | stat_eval/ | | -| constraint_violation | 0.6 | +| constraint_violation | 0.3 | | ep_length | 226 | -| ep_return | 133 | -| ep_reward | 0.531 | -| mse | 228 | +| ep_return | 138 | +| ep_reward | 0.552 | +| mse | 218 | | time/ | | | progress | 0.52 | | step | 5.2e+05 | -| step_time | 9.29 | +| step_time | 7.5 | -------------------------------------- -2023-10-19 16:33:22,894 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 140.096 +/- 17.226 -2023-10-19 16:33:22,895 : +2023-10-27 18:01:10,000 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.209 +/- 18.581 +2023-10-27 18:01:10,002 : -------------------------------------- | loss/ | | -| approx_kl | 0.034 | -| entropy_loss | -3.57 | -| policy_loss | -0.0205 | -| value_loss | 0.749 | +| approx_kl | 0.0273 | +| entropy_loss | -3.58 | +| policy_loss | -0.00877 | +| value_loss | 0.806 | | stat/ | | -| constraint_violation | 1.37e+03 | +| constraint_violation | 1.38e+03 | | ep_constraint_vio... | 0.8 | | ep_length | 250 | -| ep_return | 157 | -| ep_reward | 0.63 | +| ep_return | 158 | +| ep_reward | 0.634 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 140 | -| ep_reward | 0.56 | -| mse | 296 | +| ep_return | 146 | +| ep_reward | 0.585 | +| mse | 293 | | time/ | | | progress | 0.53 | | step | 5.3e+05 | -| step_time | 9.04 | +| step_time | 7.39 | -------------------------------------- -2023-10-19 16:35:06,066 : Eval | ep_lengths 226.30 +/- 71.10 | ep_return 126.316 +/- 47.801 -2023-10-19 16:35:06,068 : +2023-10-27 18:02:38,824 : Eval | ep_lengths 226.30 +/- 71.10 | ep_return 132.982 +/- 52.151 +2023-10-27 18:02:38,825 : -------------------------------------- | loss/ | | -| approx_kl | 0.0289 | -| entropy_loss | -3.56 | -| policy_loss | -0.0144 | -| value_loss | 0.642 | +| approx_kl | 0.0246 | +| entropy_loss | -3.53 | +| policy_loss | -0.0143 | +| value_loss | 1.09 | | stat/ | | | constraint_violation | 1.42e+03 | -| ep_constraint_vio... | 0.5 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 147 | -| ep_reward | 0.589 | +| ep_return | 157 | +| ep_reward | 0.628 | | stat_eval/ | | -| constraint_violation | 2.2 | +| constraint_violation | 2 | | ep_length | 226 | -| ep_return | 126 | -| ep_reward | 0.505 | -| mse | 266 | +| ep_return | 133 | +| ep_reward | 0.532 | +| mse | 258 | | time/ | | | progress | 0.54 | | step | 5.4e+05 | -| step_time | 8.42 | +| step_time | 7.47 | -------------------------------------- -2023-10-19 16:36:48,505 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 122.432 +/- 46.153 -2023-10-19 16:36:48,506 : +2023-10-27 18:04:07,218 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 125.581 +/- 45.669 +2023-10-27 18:04:07,219 : -------------------------------------- | loss/ | | -| approx_kl | 0.0267 | -| entropy_loss | -3.55 | -| policy_loss | -0.0113 | -| value_loss | 0.355 | +| approx_kl | 0.0382 | +| entropy_loss | -3.5 | +| policy_loss | -0.00905 | +| value_loss | 0.798 | | stat/ | | -| constraint_violation | 1.45e+03 | -| ep_constraint_vio... | 0.9 | +| constraint_violation | 1.46e+03 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 152 | -| ep_reward | 0.608 | +| ep_return | 161 | +| ep_reward | 0.646 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 122 | -| ep_reward | 0.49 | -| mse | 279 | +| ep_return | 126 | +| ep_reward | 0.502 | +| mse | 278 | | time/ | | | progress | 0.55 | | step | 5.5e+05 | -| step_time | 8.41 | +| step_time | 7.25 | -------------------------------------- -2023-10-19 16:38:28,991 : Eval | ep_lengths 201.60 +/- 96.80 | ep_return 135.120 +/- 74.999 -2023-10-19 16:38:28,993 : +2023-10-27 18:05:34,007 : Eval | ep_lengths 201.60 +/- 96.80 | ep_return 138.657 +/- 74.695 +2023-10-27 18:05:34,008 : -------------------------------------- | loss/ | | -| approx_kl | 0.0343 | -| entropy_loss | -3.56 | -| policy_loss | -0.00766 | -| value_loss | 0.515 | +| approx_kl | 0.0215 | +| entropy_loss | -3.46 | +| policy_loss | -0.00889 | +| value_loss | 0.375 | | stat/ | | | constraint_violation | 1.46e+03 | -| ep_constraint_vio... | 0.6 | -| ep_length | 225 | -| ep_return | 134 | -| ep_reward | 0.536 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 143 | +| ep_reward | 0.571 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | -| ep_return | 135 | -| ep_reward | 0.54 | -| mse | 124 | +| ep_return | 139 | +| ep_reward | 0.555 | +| mse | 117 | | time/ | | | progress | 0.56 | | step | 5.6e+05 | -| step_time | 8.53 | +| step_time | 7.25 | -------------------------------------- -2023-10-19 16:40:12,630 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 132.584 +/- 15.815 -2023-10-19 16:40:12,631 : +2023-10-27 18:07:04,809 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 138.440 +/- 14.636 +2023-10-27 18:07:04,810 : -------------------------------------- | loss/ | | -| approx_kl | 0.0286 | -| entropy_loss | -3.61 | -| policy_loss | -0.00126 | -| value_loss | 0.443 | +| approx_kl | 0.0259 | +| entropy_loss | -3.44 | +| policy_loss | -0.00984 | +| value_loss | 0.656 | | stat/ | | -| constraint_violation | 1.47e+03 | -| ep_constraint_vio... | 0 | +| constraint_violation | 1.48e+03 | +| ep_constraint_vio... | 0.9 | | ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.612 | +| ep_return | 166 | +| ep_reward | 0.663 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 133 | -| ep_reward | 0.53 | -| mse | 299 | +| ep_return | 138 | +| ep_reward | 0.554 | +| mse | 291 | | time/ | | | progress | 0.57 | | step | 5.7e+05 | -| step_time | 8.3 | +| step_time | 7.42 | -------------------------------------- -2023-10-19 16:41:49,873 : Eval | ep_lengths 202.60 +/- 94.82 | ep_return 108.820 +/- 58.269 -2023-10-19 16:41:49,874 : +2023-10-27 18:08:32,698 : Eval | ep_lengths 202.60 +/- 94.82 | ep_return 104.815 +/- 56.608 +2023-10-27 18:08:32,699 : -------------------------------------- | loss/ | | -| approx_kl | 0.0273 | -| entropy_loss | -3.6 | -| policy_loss | -0.00514 | -| value_loss | 0.372 | +| approx_kl | 0.0234 | +| entropy_loss | -3.44 | +| policy_loss | -0.00944 | +| value_loss | 0.611 | | stat/ | | | constraint_violation | 1.51e+03 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 168 | -| ep_reward | 0.671 | +| ep_return | 158 | +| ep_reward | 0.631 | | stat_eval/ | | -| constraint_violation | 0.2 | +| constraint_violation | 0.5 | | ep_length | 203 | -| ep_return | 109 | -| ep_reward | 0.435 | -| mse | 306 | +| ep_return | 105 | +| ep_reward | 0.419 | +| mse | 319 | | time/ | | | progress | 0.58 | | step | 5.8e+05 | -| step_time | 8.26 | +| step_time | 7.53 | -------------------------------------- -2023-10-19 16:43:29,559 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 130.311 +/- 47.648 -2023-10-19 16:43:29,560 : +2023-10-27 18:10:02,659 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 133.087 +/- 48.847 +2023-10-27 18:10:02,661 : -------------------------------------- | loss/ | | -| approx_kl | 0.0324 | -| entropy_loss | -3.6 | -| policy_loss | -0.0176 | -| value_loss | 0.829 | +| approx_kl | 0.0285 | +| entropy_loss | -3.42 | +| policy_loss | -0.0192 | +| value_loss | 0.97 | | stat/ | | | constraint_violation | 1.51e+03 | | ep_constraint_vio... | 0 | | ep_length | 250 | | ep_return | 157 | -| ep_reward | 0.626 | +| ep_reward | 0.627 | | stat_eval/ | | | constraint_violation | 0.6 | | ep_length | 225 | -| ep_return | 130 | -| ep_reward | 0.521 | -| mse | 264 | +| ep_return | 133 | +| ep_reward | 0.532 | +| mse | 263 | | time/ | | | progress | 0.59 | | step | 5.9e+05 | -| step_time | 8.4 | +| step_time | 7.5 | -------------------------------------- -2023-10-19 16:45:10,551 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 144.583 +/- 27.002 -2023-10-19 16:45:10,553 : +2023-10-27 18:11:33,999 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 143.152 +/- 27.495 +2023-10-27 18:11:34,000 : -------------------------------------- | loss/ | | -| approx_kl | 0.033 | -| entropy_loss | -3.61 | -| policy_loss | -0.00438 | -| value_loss | 3.65 | +| approx_kl | 0.0321 | +| entropy_loss | -3.45 | +| policy_loss | -0.0127 | +| value_loss | 3.13 | | stat/ | | | constraint_violation | 1.55e+03 | -| ep_constraint_vio... | 2.1 | -| ep_length | 227 | -| ep_return | 136 | -| ep_reward | 0.543 | +| ep_constraint_vio... | 2.6 | +| ep_length | 154 | +| ep_return | 101 | +| ep_reward | 0.407 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 145 | -| ep_reward | 0.578 | -| mse | 283 | +| ep_return | 143 | +| ep_reward | 0.573 | +| mse | 289 | | time/ | | | progress | 0.6 | | step | 6e+05 | -| step_time | 8.47 | +| step_time | 7.51 | -------------------------------------- -2023-10-19 16:46:50,964 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.707 +/- 25.905 -2023-10-19 16:46:50,965 : +2023-10-27 18:13:05,003 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.786 +/- 25.851 +2023-10-27 18:13:05,004 : -------------------------------------- | loss/ | | -| approx_kl | 0.0334 | -| entropy_loss | -3.53 | -| policy_loss | -0.0107 | -| value_loss | 1.66 | +| approx_kl | 0.029 | +| entropy_loss | -3.46 | +| policy_loss | -0.0116 | +| value_loss | 0.543 | | stat/ | | | constraint_violation | 1.58e+03 | -| ep_constraint_vio... | 0.3 | -| ep_length | 176 | -| ep_return | 117 | -| ep_reward | 0.467 | +| ep_constraint_vio... | 0.1 | +| ep_length | 226 | +| ep_return | 136 | +| ep_reward | 0.544 | | stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 0.6 | | ep_length | 250 | -| ep_return | 155 | -| ep_reward | 0.619 | -| mse | 250 | +| ep_return | 154 | +| ep_reward | 0.615 | +| mse | 256 | | time/ | | | progress | 0.61 | | step | 6.1e+05 | -| step_time | 8.29 | +| step_time | 7.3 | -------------------------------------- -2023-10-19 16:48:29,759 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 131.389 +/- 49.000 -2023-10-19 16:48:29,760 : +2023-10-27 18:14:34,310 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 135.147 +/- 49.854 +2023-10-27 18:14:34,311 : -------------------------------------- | loss/ | | -| approx_kl | 0.0277 | -| entropy_loss | -3.52 | -| policy_loss | -0.0111 | -| value_loss | 0.829 | +| approx_kl | 0.0356 | +| entropy_loss | -3.48 | +| policy_loss | -0.0107 | +| value_loss | 0.546 | | stat/ | | | constraint_violation | 1.58e+03 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 151 | -| ep_reward | 0.602 | +| ep_constraint_vio... | 0.3 | +| ep_length | 225 | +| ep_return | 131 | +| ep_reward | 0.529 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 131 | -| ep_reward | 0.526 | -| mse | 223 | +| ep_return | 135 | +| ep_reward | 0.541 | +| mse | 222 | | time/ | | | progress | 0.62 | | step | 6.2e+05 | -| step_time | 8.23 | +| step_time | 7.24 | -------------------------------------- -2023-10-19 16:50:11,988 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 149.883 +/- 18.736 -2023-10-19 16:50:11,989 : +2023-10-27 18:16:05,808 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 145.118 +/- 13.848 +2023-10-27 18:16:05,809 : -------------------------------------- | loss/ | | -| approx_kl | 0.0326 | -| entropy_loss | -3.45 | -| policy_loss | -0.0115 | -| value_loss | 2.38 | +| approx_kl | 0.0244 | +| entropy_loss | -3.47 | +| policy_loss | -0.0139 | +| value_loss | 0.583 | | stat/ | | | constraint_violation | 1.6e+03 | -| ep_constraint_vio... | 0.3 | -| ep_length | 178 | -| ep_return | 105 | -| ep_reward | 0.426 | +| ep_constraint_vio... | 0.2 | +| ep_length | 202 | +| ep_return | 120 | +| ep_reward | 0.487 | | stat_eval/ | | -| constraint_violation | 0.8 | +| constraint_violation | 0.7 | | ep_length | 250 | -| ep_return | 150 | -| ep_reward | 0.6 | -| mse | 236 | +| ep_return | 145 | +| ep_reward | 0.58 | +| mse | 248 | | time/ | | | progress | 0.63 | | step | 6.3e+05 | -| step_time | 8.3 | +| step_time | 7.34 | -------------------------------------- -2023-10-19 16:51:52,543 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 148.801 +/- 21.653 -2023-10-19 16:51:52,544 : +2023-10-27 18:17:34,707 : Eval | ep_lengths 227.50 +/- 67.50 | ep_return 135.678 +/- 50.289 +2023-10-27 18:17:34,708 : -------------------------------------- | loss/ | | -| approx_kl | 0.0211 | -| entropy_loss | -3.51 | -| policy_loss | -0.017 | -| value_loss | 0.401 | +| approx_kl | 0.0229 | +| entropy_loss | -3.47 | +| policy_loss | -0.00238 | +| value_loss | 1.1 | | stat/ | | | constraint_violation | 1.61e+03 | -| ep_constraint_vio... | 0 | +| ep_constraint_vio... | 0.1 | | ep_length | 250 | -| ep_return | 171 | -| ep_reward | 0.685 | +| ep_return | 156 | +| ep_reward | 0.624 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 149 | -| ep_reward | 0.595 | -| mse | 242 | +| constraint_violation | 0.1 | +| ep_length | 228 | +| ep_return | 136 | +| ep_reward | 0.543 | +| mse | 192 | | time/ | | | progress | 0.64 | | step | 6.4e+05 | -| step_time | 8.13 | --------------------------------------- - -2023-10-19 16:53:32,511 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 148.879 +/- 25.253 -2023-10-19 16:53:32,512 : --------------------------------------- -| loss/ | | -| approx_kl | 0.0337 | -| entropy_loss | -3.54 | -| policy_loss | -0.0166 | -| value_loss | 1.41 | -| stat/ | | -| constraint_violation | 1.65e+03 | -| ep_constraint_vio... | 2.3 | -| ep_length | 177 | -| ep_return | 107 | -| ep_reward | 0.433 | -| stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 149 | -| ep_reward | 0.596 | -| mse | 309 | -| time/ | | -| progress | 0.65 | -| step | 6.5e+05 | -| step_time | 8.08 | --------------------------------------- - -2023-10-19 16:55:13,232 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 141.284 +/- 18.207 -2023-10-19 16:55:13,233 : --------------------------------------- -| loss/ | | -| approx_kl | 0.0299 | -| entropy_loss | -3.52 | -| policy_loss | -0.00889 | -| value_loss | 0.547 | +| step_time | 7.51 | +-------------------------------------- + +2023-10-27 18:19:05,752 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 148.777 +/- 29.451 +2023-10-27 18:19:05,753 : +--------------------------------------- +| loss/ | | +| approx_kl | 0.0294 | +| entropy_loss | -3.41 | +| policy_loss | -0.000958 | +| value_loss | 1.21 | +| stat/ | | +| constraint_violation | 1.64e+03 | +| ep_constraint_vio... | 0.9 | +| ep_length | 177 | +| ep_return | 115 | +| ep_reward | 0.463 | +| stat_eval/ | | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 149 | +| ep_reward | 0.595 | +| mse | 309 | +| time/ | | +| progress | 0.65 | +| step | 6.5e+05 | +| step_time | 7.48 | +--------------------------------------- + +2023-10-27 18:20:36,891 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 142.611 +/- 14.785 +2023-10-27 18:20:36,892 : +-------------------------------------- +| loss/ | | +| approx_kl | 0.0353 | +| entropy_loss | -3.39 | +| policy_loss | -0.00651 | +| value_loss | 0.836 | | stat/ | | | constraint_violation | 1.65e+03 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 163 | -| ep_reward | 0.652 | +| ep_return | 157 | +| ep_reward | 0.628 | | stat_eval/ | | -| constraint_violation | 2 | +| constraint_violation | 2.2 | | ep_length | 250 | -| ep_return | 141 | -| ep_reward | 0.565 | -| mse | 266 | +| ep_return | 143 | +| ep_reward | 0.57 | +| mse | 268 | | time/ | | | progress | 0.66 | | step | 6.6e+05 | -| step_time | 8.23 | +| step_time | 7.27 | -------------------------------------- -2023-10-19 16:56:51,551 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.614 +/- 15.380 -2023-10-19 16:56:51,552 : +2023-10-27 18:22:07,845 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 149.083 +/- 18.180 +2023-10-27 18:22:07,847 : -------------------------------------- | loss/ | | -| approx_kl | 0.0306 | -| entropy_loss | -3.55 | -| policy_loss | -0.0103 | -| value_loss | 0.919 | +| approx_kl | 0.0276 | +| entropy_loss | -3.36 | +| policy_loss | -0.0143 | +| value_loss | 1.37 | | stat/ | | -| constraint_violation | 1.67e+03 | -| ep_constraint_vio... | 0.6 | -| ep_length | 250 | -| ep_return | 169 | -| ep_reward | 0.674 | +| constraint_violation | 1.68e+03 | +| ep_constraint_vio... | 1.9 | +| ep_length | 228 | +| ep_return | 160 | +| ep_reward | 0.641 | | stat_eval/ | | | constraint_violation | 0.8 | | ep_length | 250 | -| ep_return | 154 | -| ep_reward | 0.614 | -| mse | 250 | +| ep_return | 149 | +| ep_reward | 0.596 | +| mse | 255 | | time/ | | | progress | 0.67 | | step | 6.7e+05 | -| step_time | 8.03 | +| step_time | 7.19 | -------------------------------------- -2023-10-19 16:58:30,488 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 150.623 +/- 23.384 -2023-10-19 16:58:30,489 : +2023-10-27 18:23:39,242 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 144.690 +/- 22.168 +2023-10-27 18:23:39,243 : -------------------------------------- | loss/ | | -| approx_kl | 0.0253 | -| entropy_loss | -3.58 | -| policy_loss | -0.00778 | -| value_loss | 0.216 | +| approx_kl | 0.0345 | +| entropy_loss | -3.36 | +| policy_loss | -0.00756 | +| value_loss | 0.521 | | stat/ | | -| constraint_violation | 1.71e+03 | +| constraint_violation | 1.7e+03 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 133 | -| ep_reward | 0.531 | +| ep_return | 143 | +| ep_reward | 0.571 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 151 | -| ep_reward | 0.602 | -| mse | 301 | +| ep_return | 145 | +| ep_reward | 0.579 | +| mse | 303 | | time/ | | | progress | 0.68 | | step | 6.8e+05 | -| step_time | 8.1 | +| step_time | 7.25 | -------------------------------------- -2023-10-19 17:00:08,639 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 169.554 +/- 33.953 -2023-10-19 17:00:08,650 : +2023-10-27 18:25:10,470 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 164.165 +/- 31.569 +2023-10-27 18:25:10,478 : -------------------------------------- | loss/ | | -| approx_kl | 0.0309 | -| entropy_loss | -3.56 | -| policy_loss | -0.00842 | -| value_loss | 0.38 | +| approx_kl | 0.0351 | +| entropy_loss | -3.34 | +| policy_loss | -0.0162 | +| value_loss | 0.561 | | stat/ | | -| constraint_violation | 1.75e+03 | +| constraint_violation | 1.73e+03 | | ep_constraint_vio... | 0.1 | -| ep_length | 250 | -| ep_return | 170 | -| ep_reward | 0.68 | +| ep_length | 226 | +| ep_return | 136 | +| ep_reward | 0.547 | | stat_eval/ | | | constraint_violation | 0.8 | | ep_length | 250 | -| ep_return | 170 | -| ep_reward | 0.678 | -| mse | 162 | +| ep_return | 164 | +| ep_reward | 0.657 | +| mse | 166 | | time/ | | | progress | 0.69 | | step | 6.9e+05 | -| step_time | 8.28 | +| step_time | 7.47 | -------------------------------------- -2023-10-19 17:01:47,113 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.147 +/- 21.835 -2023-10-19 17:01:47,115 : +2023-10-27 18:26:41,259 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 143.383 +/- 24.413 +2023-10-27 18:26:41,261 : -------------------------------------- | loss/ | | -| approx_kl | 0.0276 | -| entropy_loss | -3.54 | -| policy_loss | -0.0148 | -| value_loss | 0.358 | +| approx_kl | 0.0391 | +| entropy_loss | -3.39 | +| policy_loss | -0.0197 | +| value_loss | 0.931 | | stat/ | | -| constraint_violation | 1.77e+03 | +| constraint_violation | 1.76e+03 | | ep_constraint_vio... | 0.1 | | ep_length | 226 | -| ep_return | 150 | -| ep_reward | 0.601 | +| ep_return | 149 | +| ep_reward | 0.596 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 152 | -| ep_reward | 0.609 | -| mse | 237 | +| ep_return | 143 | +| ep_reward | 0.574 | +| mse | 250 | | time/ | | | progress | 0.7 | | step | 7e+05 | -| step_time | 8.22 | +| step_time | 7.51 | -------------------------------------- -2023-10-19 17:03:26,256 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 157.085 +/- 31.032 -2023-10-19 17:03:26,257 : +2023-10-27 18:28:11,912 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.044 +/- 29.558 +2023-10-27 18:28:11,913 : -------------------------------------- | loss/ | | -| approx_kl | 0.0453 | -| entropy_loss | -3.47 | -| policy_loss | 0.00514 | -| value_loss | 2 | +| approx_kl | 0.0136 | +| entropy_loss | -3.4 | +| policy_loss | -0.0137 | +| value_loss | 0.617 | | stat/ | | -| constraint_violation | 1.78e+03 | -| ep_constraint_vio... | 0.4 | +| constraint_violation | 1.76e+03 | +| ep_constraint_vio... | 0.3 | | ep_length | 201 | -| ep_return | 139 | -| ep_reward | 0.558 | +| ep_return | 120 | +| ep_reward | 0.481 | | stat_eval/ | | | constraint_violation | 1.2 | | ep_length | 250 | -| ep_return | 157 | -| ep_reward | 0.628 | -| mse | 245 | +| ep_return | 152 | +| ep_reward | 0.608 | +| mse | 255 | | time/ | | | progress | 0.71 | | step | 7.1e+05 | -| step_time | 8.21 | +| step_time | 7.36 | -------------------------------------- -2023-10-19 17:05:03,515 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 143.071 +/- 54.200 -2023-10-19 17:05:03,516 : +2023-10-27 18:29:41,455 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 140.722 +/- 53.955 +2023-10-27 18:29:41,456 : -------------------------------------- | loss/ | | -| approx_kl | 0.0281 | -| entropy_loss | -3.51 | -| policy_loss | 0.00253 | -| value_loss | 0.53 | +| approx_kl | 0.0351 | +| entropy_loss | -3.45 | +| policy_loss | -0.00376 | +| value_loss | 0.827 | | stat/ | | | constraint_violation | 1.81e+03 | -| ep_constraint_vio... | 0 | +| ep_constraint_vio... | 1.4 | | ep_length | 250 | -| ep_return | 162 | -| ep_reward | 0.649 | +| ep_return | 141 | +| ep_reward | 0.564 | | stat_eval/ | | -| constraint_violation | 1.5 | +| constraint_violation | 1.3 | | ep_length | 226 | -| ep_return | 143 | -| ep_reward | 0.572 | -| mse | 203 | +| ep_return | 141 | +| ep_reward | 0.563 | +| mse | 205 | | time/ | | | progress | 0.72 | | step | 7.2e+05 | -| step_time | 8.1 | +| step_time | 7.47 | -------------------------------------- -2023-10-19 17:06:39,680 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 147.957 +/- 56.085 -2023-10-19 17:06:39,681 : +2023-10-27 18:31:10,086 : Eval | ep_lengths 226.70 +/- 69.90 | ep_return 149.941 +/- 57.889 +2023-10-27 18:31:10,087 : -------------------------------------- | loss/ | | -| approx_kl | 0.0264 | -| entropy_loss | -3.55 | -| policy_loss | -0.0111 | -| value_loss | 0.647 | +| approx_kl | 0.0332 | +| entropy_loss | -3.47 | +| policy_loss | -0.0112 | +| value_loss | 0.59 | | stat/ | | -| constraint_violation | 1.83e+03 | +| constraint_violation | 1.82e+03 | | ep_constraint_vio... | 0.2 | | ep_length | 201 | -| ep_return | 123 | -| ep_reward | 0.493 | +| ep_return | 139 | +| ep_reward | 0.557 | | stat_eval/ | | -| constraint_violation | 0.1 | +| constraint_violation | 0.2 | | ep_length | 227 | -| ep_return | 148 | -| ep_reward | 0.592 | -| mse | 168 | +| ep_return | 150 | +| ep_reward | 0.6 | +| mse | 170 | | time/ | | | progress | 0.73 | | step | 7.3e+05 | -| step_time | 7.85 | +| step_time | 7.41 | -------------------------------------- -2023-10-19 17:08:18,189 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 135.089 +/- 31.368 -2023-10-19 17:08:18,190 : +2023-10-27 18:32:40,340 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 131.224 +/- 26.973 +2023-10-27 18:32:40,341 : -------------------------------------- | loss/ | | -| approx_kl | 0.0408 | -| entropy_loss | -3.53 | -| policy_loss | 0.00199 | -| value_loss | 0.818 | +| approx_kl | 0.0309 | +| entropy_loss | -3.48 | +| policy_loss | -0.00834 | +| value_loss | 0.863 | | stat/ | | | constraint_violation | 1.85e+03 | -| ep_constraint_vio... | 0.8 | -| ep_length | 250 | -| ep_return | 170 | -| ep_reward | 0.678 | +| ep_constraint_vio... | 1.8 | +| ep_length | 225 | +| ep_return | 145 | +| ep_reward | 0.582 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 135 | -| ep_reward | 0.54 | -| mse | 263 | +| ep_return | 131 | +| ep_reward | 0.525 | +| mse | 275 | | time/ | | | progress | 0.74 | | step | 7.4e+05 | -| step_time | 8.09 | +| step_time | 7.27 | -------------------------------------- -2023-10-19 17:09:55,827 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 132.464 +/- 46.526 -2023-10-19 17:09:55,828 : +2023-10-27 18:34:09,548 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 126.828 +/- 45.612 +2023-10-27 18:34:09,549 : -------------------------------------- | loss/ | | -| approx_kl | 0.0384 | -| entropy_loss | -3.5 | -| policy_loss | 0.0011 | -| value_loss | 0.322 | +| approx_kl | 0.0242 | +| entropy_loss | -3.44 | +| policy_loss | -0.0186 | +| value_loss | 0.483 | | stat/ | | -| constraint_violation | 1.9e+03 | -| ep_constraint_vio... | 1.8 | -| ep_length | 250 | -| ep_return | 150 | -| ep_reward | 0.6 | +| constraint_violation | 1.89e+03 | +| ep_constraint_vio... | 0.2 | +| ep_length | 202 | +| ep_return | 124 | +| ep_reward | 0.497 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 132 | -| ep_reward | 0.53 | -| mse | 238 | +| ep_return | 127 | +| ep_reward | 0.507 | +| mse | 246 | | time/ | | | progress | 0.75 | | step | 7.5e+05 | -| step_time | 8.1 | +| step_time | 7.28 | -------------------------------------- -2023-10-19 17:11:32,156 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 132.804 +/- 51.686 -2023-10-19 17:11:32,157 : +2023-10-27 18:35:38,893 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 131.457 +/- 53.201 +2023-10-27 18:35:38,895 : -------------------------------------- | loss/ | | -| approx_kl | 0.0362 | -| entropy_loss | -3.49 | -| policy_loss | -0.0155 | -| value_loss | 0.603 | +| approx_kl | 0.0326 | +| entropy_loss | -3.44 | +| policy_loss | -0.00803 | +| value_loss | 0.993 | | stat/ | | -| constraint_violation | 1.9e+03 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 170 | -| ep_reward | 0.678 | +| constraint_violation | 1.91e+03 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 117 | +| ep_reward | 0.475 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 133 | -| ep_reward | 0.531 | -| mse | 248 | +| ep_return | 131 | +| ep_reward | 0.526 | +| mse | 250 | | time/ | | | progress | 0.76 | | step | 7.6e+05 | -| step_time | 7.97 | +| step_time | 7.46 | -------------------------------------- -2023-10-19 17:13:07,335 : Eval | ep_lengths 201.30 +/- 97.41 | ep_return 119.988 +/- 60.776 -2023-10-19 17:13:07,336 : +2023-10-27 18:37:07,589 : Eval | ep_lengths 201.30 +/- 97.41 | ep_return 116.022 +/- 59.585 +2023-10-27 18:37:07,590 : -------------------------------------- | loss/ | | -| approx_kl | 0.0238 | -| entropy_loss | -3.54 | -| policy_loss | -0.0119 | -| value_loss | 0.763 | +| approx_kl | 0.0283 | +| entropy_loss | -3.45 | +| policy_loss | -0.0142 | +| value_loss | 0.735 | | stat/ | | -| constraint_violation | 1.93e+03 | -| ep_constraint_vio... | 0.9 | -| ep_length | 227 | -| ep_return | 150 | -| ep_reward | 0.601 | +| constraint_violation | 1.96e+03 | +| ep_constraint_vio... | 3.1 | +| ep_length | 202 | +| ep_return | 110 | +| ep_reward | 0.44 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 120 | -| ep_reward | 0.48 | -| mse | 151 | +| ep_return | 116 | +| ep_reward | 0.464 | +| mse | 157 | | time/ | | | progress | 0.77 | | step | 7.7e+05 | -| step_time | 7.96 | +| step_time | 7.47 | -------------------------------------- -2023-10-19 17:14:44,362 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 146.147 +/- 54.750 -2023-10-19 17:14:44,363 : +2023-10-27 18:38:36,591 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 137.144 +/- 52.817 +2023-10-27 18:38:36,592 : -------------------------------------- | loss/ | | -| approx_kl | 0.04 | +| approx_kl | 0.0307 | | entropy_loss | -3.51 | -| policy_loss | 0.0128 | -| value_loss | 0.807 | +| policy_loss | 0.00101 | +| value_loss | 0.509 | | stat/ | | -| constraint_violation | 1.97e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 160 | -| ep_reward | 0.642 | +| constraint_violation | 1.98e+03 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 165 | +| ep_reward | 0.661 | | stat_eval/ | | -| constraint_violation | 1.3 | +| constraint_violation | 1.5 | | ep_length | 225 | -| ep_return | 146 | -| ep_reward | 0.585 | -| mse | 204 | +| ep_return | 137 | +| ep_reward | 0.549 | +| mse | 217 | | time/ | | | progress | 0.78 | | step | 7.8e+05 | -| step_time | 8.01 | +| step_time | 7.24 | -------------------------------------- -2023-10-19 17:16:20,909 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 134.473 +/- 51.487 -2023-10-19 17:16:20,910 : +2023-10-27 18:40:05,054 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 126.121 +/- 48.985 +2023-10-27 18:40:05,055 : -------------------------------------- | loss/ | | -| approx_kl | 0.0323 | +| approx_kl | 0.0296 | | entropy_loss | -3.5 | -| policy_loss | -0.0247 | -| value_loss | 0.727 | +| policy_loss | -0.0097 | +| value_loss | 1.43 | | stat/ | | -| constraint_violation | 1.99e+03 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 170 | -| ep_reward | 0.681 | +| constraint_violation | 2e+03 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 145 | +| ep_reward | 0.579 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 226 | -| ep_return | 134 | -| ep_reward | 0.538 | -| mse | 202 | +| ep_return | 126 | +| ep_reward | 0.504 | +| mse | 219 | | time/ | | | progress | 0.79 | | step | 7.9e+05 | -| step_time | 8.06 | +| step_time | 7.34 | -------------------------------------- -2023-10-19 17:17:59,391 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 142.931 +/- 14.251 -2023-10-19 17:17:59,392 : +2023-10-27 18:41:35,518 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 137.111 +/- 10.266 +2023-10-27 18:41:35,519 : -------------------------------------- | loss/ | | -| approx_kl | 0.0315 | -| entropy_loss | -3.45 | -| policy_loss | -0.014 | -| value_loss | 0.477 | +| approx_kl | 0.0329 | +| entropy_loss | -3.48 | +| policy_loss | -0.0132 | +| value_loss | 1.09 | | stat/ | | -| constraint_violation | 2.01e+03 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 188 | -| ep_reward | 0.753 | +| constraint_violation | 2.02e+03 | +| ep_constraint_vio... | 0.2 | +| ep_length | 201 | +| ep_return | 127 | +| ep_reward | 0.512 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 143 | -| ep_reward | 0.572 | -| mse | 306 | +| ep_return | 137 | +| ep_reward | 0.548 | +| mse | 315 | | time/ | | | progress | 0.8 | | step | 8e+05 | -| step_time | 7.9 | +| step_time | 7.47 | -------------------------------------- -2023-10-19 17:19:36,300 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.192 +/- 24.769 -2023-10-19 17:19:36,301 : +2023-10-27 18:43:06,316 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 149.729 +/- 25.028 +2023-10-27 18:43:06,317 : -------------------------------------- | loss/ | | -| approx_kl | 0.0231 | -| entropy_loss | -3.44 | -| policy_loss | -0.00564 | -| value_loss | 0.993 | +| approx_kl | 0.0267 | +| entropy_loss | -3.45 | +| policy_loss | -0.00989 | +| value_loss | 2.43 | | stat/ | | -| constraint_violation | 2.01e+03 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 158 | -| ep_reward | 0.633 | +| constraint_violation | 2.02e+03 | +| ep_constraint_vio... | 0.2 | +| ep_length | 200 | +| ep_return | 122 | +| ep_reward | 0.495 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 154 | -| ep_reward | 0.617 | -| mse | 205 | +| ep_return | 150 | +| ep_reward | 0.599 | +| mse | 212 | | time/ | | | progress | 0.81 | | step | 8.1e+05 | -| step_time | 7.68 | +| step_time | 7.58 | -------------------------------------- -2023-10-19 17:21:13,639 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 159.614 +/- 18.221 -2023-10-19 17:21:13,640 : +2023-10-27 18:44:38,115 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.338 +/- 20.692 +2023-10-27 18:44:38,116 : -------------------------------------- | loss/ | | -| approx_kl | 0.0375 | -| entropy_loss | -3.39 | -| policy_loss | -0.0125 | -| value_loss | 1.63 | +| approx_kl | 0.025 | +| entropy_loss | -3.42 | +| policy_loss | -0.00614 | +| value_loss | 1.61 | | stat/ | | -| constraint_violation | 2.04e+03 | -| ep_constraint_vio... | 1.4 | -| ep_length | 226 | -| ep_return | 146 | -| ep_reward | 0.587 | +| constraint_violation | 2.06e+03 | +| ep_constraint_vio... | 0.7 | +| ep_length | 201 | +| ep_return | 123 | +| ep_reward | 0.5 | | stat_eval/ | | | constraint_violation | 0.7 | | ep_length | 250 | -| ep_return | 160 | -| ep_reward | 0.638 | -| mse | 153 | +| ep_return | 152 | +| ep_reward | 0.609 | +| mse | 166 | | time/ | | | progress | 0.82 | | step | 8.2e+05 | -| step_time | 7.8 | +| step_time | 7.52 | -------------------------------------- -2023-10-19 17:22:51,408 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.374 +/- 29.174 -2023-10-19 17:22:51,409 : +2023-10-27 18:46:08,683 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 147.054 +/- 33.781 +2023-10-27 18:46:08,684 : -------------------------------------- | loss/ | | -| approx_kl | 0.0286 | -| entropy_loss | -3.33 | -| policy_loss | -0.00563 | -| value_loss | 1.08 | +| approx_kl | 0.0259 | +| entropy_loss | -3.41 | +| policy_loss | -0.0182 | +| value_loss | 1.1 | | stat/ | | -| constraint_violation | 2.07e+03 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 171 | -| ep_reward | 0.682 | +| constraint_violation | 2.08e+03 | +| ep_constraint_vio... | 0.7 | +| ep_length | 225 | +| ep_return | 137 | +| ep_reward | 0.547 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 154 | -| ep_reward | 0.617 | -| mse | 224 | +| ep_return | 147 | +| ep_reward | 0.588 | +| mse | 246 | | time/ | | | progress | 0.83 | | step | 8.3e+05 | -| step_time | 8.32 | +| step_time | 7.55 | -------------------------------------- -2023-10-19 17:24:35,972 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 156.715 +/- 25.784 -2023-10-19 17:24:35,973 : +2023-10-27 18:47:38,712 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 156.205 +/- 23.707 +2023-10-27 18:47:38,713 : -------------------------------------- | loss/ | | -| approx_kl | 0.0324 | -| entropy_loss | -3.32 | -| policy_loss | -0.00888 | -| value_loss | 0.88 | +| approx_kl | 0.0157 | +| entropy_loss | -3.43 | +| policy_loss | -0.0228 | +| value_loss | 0.678 | | stat/ | | -| constraint_violation | 2.1e+03 | +| constraint_violation | 2.11e+03 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 159 | -| ep_reward | 0.634 | +| ep_return | 148 | +| ep_reward | 0.59 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 157 | -| ep_reward | 0.627 | -| mse | 215 | +| ep_return | 156 | +| ep_reward | 0.625 | +| mse | 217 | | time/ | | | progress | 0.84 | | step | 8.4e+05 | -| step_time | 9.24 | +| step_time | 7.5 | -------------------------------------- -2023-10-19 17:26:20,053 : Eval | ep_lengths 151.60 +/- 120.52 | ep_return 85.990 +/- 72.616 -2023-10-19 17:26:20,054 : +2023-10-27 18:49:04,125 : Eval | ep_lengths 151.60 +/- 120.52 | ep_return 83.743 +/- 70.548 +2023-10-27 18:49:04,126 : -------------------------------------- | loss/ | | -| approx_kl | 0.0306 | -| entropy_loss | -3.35 | -| policy_loss | -0.0101 | -| value_loss | 1.52 | +| approx_kl | 0.0269 | +| entropy_loss | -3.51 | +| policy_loss | -0.00607 | +| value_loss | 1.01 | | stat/ | | -| constraint_violation | 2.13e+03 | -| ep_constraint_vio... | 1 | -| ep_length | 153 | -| ep_return | 96.8 | -| ep_reward | 0.398 | +| constraint_violation | 2.14e+03 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 135 | +| ep_reward | 0.539 | | stat_eval/ | | -| constraint_violation | 0.5 | +| constraint_violation | 0.4 | | ep_length | 152 | -| ep_return | 86 | -| ep_reward | 0.344 | -| mse | 118 | +| ep_return | 83.7 | +| ep_reward | 0.335 | +| mse | 122 | | time/ | | | progress | 0.85 | | step | 8.5e+05 | -| step_time | 8.83 | +| step_time | 7.47 | -------------------------------------- -2023-10-19 17:28:11,482 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 138.630 +/- 30.051 -2023-10-19 17:28:11,483 : +2023-10-27 18:50:35,508 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 134.028 +/- 24.345 +2023-10-27 18:50:35,509 : -------------------------------------- | loss/ | | -| approx_kl | 0.0345 | -| entropy_loss | -3.37 | -| policy_loss | -0.00491 | -| value_loss | 0.364 | +| approx_kl | 0.0162 | +| entropy_loss | -3.5 | +| policy_loss | -0.0204 | +| value_loss | 1.15 | | stat/ | | -| constraint_violation | 2.16e+03 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 161 | -| ep_reward | 0.646 | +| constraint_violation | 2.2e+03 | +| ep_constraint_vio... | 2.3 | +| ep_length | 202 | +| ep_return | 134 | +| ep_reward | 0.543 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 139 | -| ep_reward | 0.555 | -| mse | 354 | +| ep_return | 134 | +| ep_reward | 0.536 | +| mse | 358 | | time/ | | | progress | 0.86 | | step | 8.6e+05 | -| step_time | 9.19 | +| step_time | 7.37 | -------------------------------------- -2023-10-19 17:29:57,747 : Eval | ep_lengths 201.70 +/- 96.61 | ep_return 121.086 +/- 64.252 -2023-10-19 17:29:57,748 : +2023-10-27 18:52:01,931 : Eval | ep_lengths 201.70 +/- 96.61 | ep_return 122.636 +/- 64.775 +2023-10-27 18:52:01,932 : -------------------------------------- | loss/ | | -| approx_kl | 0.0281 | -| entropy_loss | -3.36 | -| policy_loss | -0.00822 | -| value_loss | 0.663 | +| approx_kl | 0.0179 | +| entropy_loss | -3.46 | +| policy_loss | -0.0116 | +| value_loss | 0.697 | | stat/ | | -| constraint_violation | 2.2e+03 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 177 | -| ep_reward | 0.709 | +| constraint_violation | 2.22e+03 | +| ep_constraint_vio... | 0.1 | +| ep_length | 226 | +| ep_return | 137 | +| ep_reward | 0.551 | | stat_eval/ | | | constraint_violation | 1.3 | | ep_length | 202 | -| ep_return | 121 | -| ep_reward | 0.484 | -| mse | 219 | +| ep_return | 123 | +| ep_reward | 0.491 | +| mse | 220 | | time/ | | | progress | 0.87 | | step | 8.7e+05 | -| step_time | 9.24 | +| step_time | 7.35 | -------------------------------------- -2023-10-19 17:31:44,412 : Eval | ep_lengths 201.60 +/- 96.82 | ep_return 122.660 +/- 63.919 -2023-10-19 17:31:44,414 : +2023-10-27 18:53:29,355 : Eval | ep_lengths 201.50 +/- 97.02 | ep_return 119.179 +/- 62.165 +2023-10-27 18:53:29,356 : -------------------------------------- | loss/ | | -| approx_kl | 0.0221 | -| entropy_loss | -3.34 | -| policy_loss | -0.00974 | -| value_loss | 0.4 | +| approx_kl | 0.0246 | +| entropy_loss | -3.48 | +| policy_loss | -0.00763 | +| value_loss | 1.23 | | stat/ | | -| constraint_violation | 2.21e+03 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 152 | -| ep_reward | 0.607 | +| constraint_violation | 2.22e+03 | +| ep_constraint_vio... | 0.1 | +| ep_length | 226 | +| ep_return | 137 | +| ep_reward | 0.55 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | -| ep_return | 123 | -| ep_reward | 0.491 | -| mse | 204 | +| ep_return | 119 | +| ep_reward | 0.477 | +| mse | 212 | | time/ | | | progress | 0.88 | | step | 8.8e+05 | -| step_time | 8.97 | +| step_time | 7.44 | -------------------------------------- -2023-10-19 17:33:32,270 : Eval | ep_lengths 226.10 +/- 71.70 | ep_return 144.173 +/- 53.945 -2023-10-19 17:33:32,271 : +2023-10-27 18:54:58,032 : Eval | ep_lengths 226.10 +/- 71.70 | ep_return 141.037 +/- 51.559 +2023-10-27 18:54:58,033 : -------------------------------------- | loss/ | | -| approx_kl | 0.0266 | -| entropy_loss | -3.41 | -| policy_loss | -0.0104 | -| value_loss | 0.725 | +| approx_kl | 0.0287 | +| entropy_loss | -3.51 | +| policy_loss | -0.0131 | +| value_loss | 0.934 | | stat/ | | -| constraint_violation | 2.22e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 136 | -| ep_reward | 0.543 | +| constraint_violation | 2.24e+03 | +| ep_constraint_vio... | 0.1 | +| ep_length | 226 | +| ep_return | 139 | +| ep_reward | 0.554 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 226 | -| ep_return | 144 | -| ep_reward | 0.577 | -| mse | 214 | +| ep_return | 141 | +| ep_reward | 0.564 | +| mse | 216 | | time/ | | | progress | 0.89 | | step | 8.9e+05 | -| step_time | 9.4 | +| step_time | 7.28 | -------------------------------------- -2023-10-19 17:35:17,357 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 130.538 +/- 50.411 -2023-10-19 17:35:17,358 : +2023-10-27 18:56:27,147 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 126.021 +/- 47.264 +2023-10-27 18:56:27,148 : -------------------------------------- | loss/ | | -| approx_kl | 0.0316 | -| entropy_loss | -3.39 | -| policy_loss | -0.00548 | -| value_loss | 0.366 | +| approx_kl | 0.0245 | +| entropy_loss | -3.53 | +| policy_loss | -0.0118 | +| value_loss | 1.14 | | stat/ | | -| constraint_violation | 2.25e+03 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 171 | -| ep_reward | 0.684 | +| constraint_violation | 2.27e+03 | +| ep_constraint_vio... | 0.2 | +| ep_length | 200 | +| ep_return | 117 | +| ep_reward | 0.471 | | stat_eval/ | | | constraint_violation | 1.4 | | ep_length | 226 | -| ep_return | 131 | -| ep_reward | 0.522 | -| mse | 240 | +| ep_return | 126 | +| ep_reward | 0.504 | +| mse | 246 | | time/ | | | progress | 0.9 | | step | 9e+05 | -| step_time | 8.81 | +| step_time | 7.5 | -------------------------------------- -2023-10-19 17:37:02,572 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 128.930 +/- 47.972 -2023-10-19 17:37:02,573 : +2023-10-27 18:57:56,651 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 127.812 +/- 47.098 +2023-10-27 18:57:56,652 : -------------------------------------- | loss/ | | -| approx_kl | 0.0345 | -| entropy_loss | -3.41 | -| policy_loss | -0.0119 | -| value_loss | 0.961 | +| approx_kl | 0.0256 | +| entropy_loss | -3.55 | +| policy_loss | -0.00515 | +| value_loss | 0.828 | | stat/ | | -| constraint_violation | 2.28e+03 | +| constraint_violation | 2.29e+03 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 145 | -| ep_reward | 0.582 | +| ep_return | 135 | +| ep_reward | 0.541 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 227 | -| ep_return | 129 | -| ep_reward | 0.516 | -| mse | 240 | +| ep_return | 128 | +| ep_reward | 0.511 | +| mse | 243 | | time/ | | | progress | 0.91 | | step | 9.1e+05 | -| step_time | 9.03 | +| step_time | 7.45 | -------------------------------------- -2023-10-19 17:38:47,651 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 148.104 +/- 52.854 -2023-10-19 17:38:47,653 : +2023-10-27 18:59:26,250 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 149.131 +/- 53.913 +2023-10-27 18:59:26,251 : -------------------------------------- | loss/ | | -| approx_kl | 0.039 | -| entropy_loss | -3.42 | -| policy_loss | -0.00848 | -| value_loss | 1.27 | +| approx_kl | 0.0447 | +| entropy_loss | -3.57 | +| policy_loss | -0.0226 | +| value_loss | 0.886 | | stat/ | | -| constraint_violation | 2.29e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 112 | -| ep_reward | 0.452 | +| constraint_violation | 2.31e+03 | +| ep_constraint_vio... | 0.3 | +| ep_length | 178 | +| ep_return | 108 | +| ep_reward | 0.436 | | stat_eval/ | | -| constraint_violation | 1.8 | +| constraint_violation | 1.9 | | ep_length | 225 | -| ep_return | 148 | -| ep_reward | 0.592 | -| mse | 120 | +| ep_return | 149 | +| ep_reward | 0.597 | +| mse | 118 | | time/ | | | progress | 0.92 | | step | 9.2e+05 | -| step_time | 8.87 | +| step_time | 7.53 | -------------------------------------- -2023-10-19 17:40:34,345 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 147.757 +/- 16.982 -2023-10-19 17:40:34,346 : +2023-10-27 19:00:56,664 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.073 +/- 20.196 +2023-10-27 19:00:56,665 : -------------------------------------- | loss/ | | -| approx_kl | 0.0207 | -| entropy_loss | -3.41 | -| policy_loss | -0.0101 | -| value_loss | 0.357 | +| approx_kl | 0.0266 | +| entropy_loss | -3.57 | +| policy_loss | -0.0118 | +| value_loss | 1.34 | | stat/ | | -| constraint_violation | 2.31e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 148 | -| ep_reward | 0.593 | +| constraint_violation | 2.35e+03 | +| ep_constraint_vio... | 2.4 | +| ep_length | 250 | +| ep_return | 146 | +| ep_reward | 0.586 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 148 | -| ep_reward | 0.591 | -| mse | 185 | +| ep_return | 154 | +| ep_reward | 0.616 | +| mse | 178 | | time/ | | | progress | 0.93 | | step | 9.3e+05 | -| step_time | 8.7 | +| step_time | 7.63 | -------------------------------------- -2023-10-19 17:42:20,629 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 139.025 +/- 23.915 -2023-10-19 17:42:20,631 : +2023-10-27 19:02:27,824 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 142.366 +/- 24.005 +2023-10-27 19:02:27,825 : -------------------------------------- | loss/ | | -| approx_kl | 0.0329 | -| entropy_loss | -3.43 | -| policy_loss | -0.0025 | -| value_loss | 0.532 | +| approx_kl | 0.0153 | +| entropy_loss | -3.56 | +| policy_loss | -0.0134 | +| value_loss | 1.51 | | stat/ | | -| constraint_violation | 2.33e+03 | +| constraint_violation | 2.35e+03 | | ep_constraint_vio... | 0.1 | | ep_length | 226 | -| ep_return | 129 | -| ep_reward | 0.515 | +| ep_return | 160 | +| ep_reward | 0.641 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 139 | -| ep_reward | 0.556 | -| mse | 266 | +| ep_return | 142 | +| ep_reward | 0.569 | +| mse | 262 | | time/ | | | progress | 0.94 | | step | 9.4e+05 | -| step_time | 8.78 | +| step_time | 7.42 | -------------------------------------- -2023-10-19 17:44:07,263 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 132.600 +/- 22.983 -2023-10-19 17:44:07,264 : +2023-10-27 19:03:57,410 : Eval | ep_lengths 227.40 +/- 67.80 | ep_return 126.308 +/- 47.408 +2023-10-27 19:03:57,411 : -------------------------------------- | loss/ | | -| approx_kl | 0.0306 | -| entropy_loss | -3.42 | -| policy_loss | -0.00818 | -| value_loss | 0.779 | +| approx_kl | 0.0255 | +| entropy_loss | -3.56 | +| policy_loss | -0.014 | +| value_loss | 0.441 | | stat/ | | -| constraint_violation | 2.35e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | +| constraint_violation | 2.38e+03 | +| ep_constraint_vio... | 1.5 | +| ep_length | 226 | | ep_return | 142 | -| ep_reward | 0.567 | +| ep_reward | 0.569 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 133 | -| ep_reward | 0.53 | -| mse | 335 | +| constraint_violation | 0.1 | +| ep_length | 227 | +| ep_return | 126 | +| ep_reward | 0.505 | +| mse | 278 | | time/ | | | progress | 0.95 | | step | 9.5e+05 | -| step_time | 8.76 | +| step_time | 7.31 | -------------------------------------- -2023-10-19 17:45:52,975 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 149.409 +/- 20.744 -2023-10-19 17:45:52,976 : +2023-10-27 19:05:27,447 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 156.244 +/- 19.066 +2023-10-27 19:05:27,449 : -------------------------------------- | loss/ | | -| approx_kl | 0.019 | -| entropy_loss | -3.49 | -| policy_loss | -0.019 | -| value_loss | 0.394 | +| approx_kl | 0.0241 | +| entropy_loss | -3.5 | +| policy_loss | -0.0139 | +| value_loss | 0.984 | | stat/ | | -| constraint_violation | 2.39e+03 | -| ep_constraint_vio... | 1.1 | -| ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.61 | +| constraint_violation | 2.4e+03 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 152 | +| ep_reward | 0.608 | | stat_eval/ | | -| constraint_violation | 0.4 | +| constraint_violation | 0.5 | | ep_length | 250 | -| ep_return | 149 | -| ep_reward | 0.598 | -| mse | 218 | +| ep_return | 156 | +| ep_reward | 0.625 | +| mse | 210 | | time/ | | | progress | 0.96 | | step | 9.6e+05 | -| step_time | 8.53 | +| step_time | 7.15 | -------------------------------------- -2023-10-19 17:47:37,605 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 129.062 +/- 47.419 -2023-10-19 17:47:37,606 : +2023-10-27 19:06:56,208 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 130.725 +/- 46.991 +2023-10-27 19:06:56,209 : -------------------------------------- | loss/ | | -| approx_kl | 0.0304 | -| entropy_loss | -3.46 | -| policy_loss | -0.00392 | -| value_loss | 0.47 | +| approx_kl | 0.0283 | +| entropy_loss | -3.51 | +| policy_loss | -0.0125 | +| value_loss | 0.544 | | stat/ | | -| constraint_violation | 2.4e+03 | -| ep_constraint_vio... | 0.4 | -| ep_length | 250 | -| ep_return | 173 | -| ep_reward | 0.694 | +| constraint_violation | 2.41e+03 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 147 | +| ep_reward | 0.593 | | stat_eval/ | | | constraint_violation | 1.8 | | ep_length | 225 | -| ep_return | 129 | -| ep_reward | 0.516 | -| mse | 259 | +| ep_return | 131 | +| ep_reward | 0.523 | +| mse | 263 | | time/ | | | progress | 0.97 | | step | 9.7e+05 | -| step_time | 8.85 | +| step_time | 7.44 | -------------------------------------- -2023-10-19 17:49:25,367 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.069 +/- 25.582 -2023-10-19 17:49:25,369 : +2023-10-27 19:08:26,898 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 161.916 +/- 21.366 +2023-10-27 19:08:26,899 : -------------------------------------- | loss/ | | -| approx_kl | 0.0244 | -| entropy_loss | -3.46 | -| policy_loss | -0.0104 | -| value_loss | 0.747 | +| approx_kl | 0.0346 | +| entropy_loss | -3.47 | +| policy_loss | 0.018 | +| value_loss | 1.44 | | stat/ | | -| constraint_violation | 2.41e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 203 | -| ep_return | 113 | -| ep_reward | 0.453 | +| constraint_violation | 2.44e+03 | +| ep_constraint_vio... | 1.1 | +| ep_length | 226 | +| ep_return | 162 | +| ep_reward | 0.65 | | stat_eval/ | | -| constraint_violation | 0.1 | +| constraint_violation | 0 | | ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.612 | +| ep_return | 162 | +| ep_reward | 0.648 | | mse | 183 | | time/ | | | progress | 0.98 | | step | 9.8e+05 | -| step_time | 8.42 | +| step_time | 7.3 | -------------------------------------- -2023-10-19 17:51:09,286 : Eval | ep_lengths 226.80 +/- 69.60 | ep_return 145.714 +/- 51.567 -2023-10-19 17:51:09,287 : +2023-10-27 19:09:55,834 : Eval | ep_lengths 226.80 +/- 69.60 | ep_return 152.797 +/- 53.719 +2023-10-27 19:09:55,835 : -------------------------------------- | loss/ | | -| approx_kl | 0.0298 | -| entropy_loss | -3.46 | -| policy_loss | -0.00974 | -| value_loss | 1.01 | +| approx_kl | 0.0345 | +| entropy_loss | -3.42 | +| policy_loss | -0.0139 | +| value_loss | 1.87 | | stat/ | | -| constraint_violation | 2.43e+03 | -| ep_constraint_vio... | 0.1 | +| constraint_violation | 2.45e+03 | +| ep_constraint_vio... | 0.6 | | ep_length | 226 | -| ep_return | 152 | -| ep_reward | 0.607 | +| ep_return | 148 | +| ep_reward | 0.591 | | stat_eval/ | | -| constraint_violation | 0.9 | +| constraint_violation | 0.1 | | ep_length | 227 | -| ep_return | 146 | -| ep_reward | 0.583 | -| mse | 120 | +| ep_return | 153 | +| ep_reward | 0.611 | +| mse | 111 | | time/ | | | progress | 0.99 | | step | 9.9e+05 | -| step_time | 9.17 | +| step_time | 7.42 | -------------------------------------- -2023-10-19 17:52:35,863 : Checkpoint | ./models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/model_latest.pt -2023-10-19 17:52:54,207 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 140.168 +/- 25.260 -2023-10-19 17:52:54,208 : +2023-10-27 19:11:09,825 : Checkpoint | ./models/rl_models/quadrotor_3D/track/ppo/mpsf_pen/model_latest.pt +2023-10-27 19:11:26,592 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 143.977 +/- 33.872 +2023-10-27 19:11:26,593 : -------------------------------------- | loss/ | | -| approx_kl | 0.0398 | -| entropy_loss | -3.45 | -| policy_loss | -0.0173 | -| value_loss | 0.5 | +| approx_kl | 0.0288 | +| entropy_loss | -3.44 | +| policy_loss | -0.00453 | +| value_loss | 1.65 | | stat/ | | -| constraint_violation | 2.48e+03 | -| ep_constraint_vio... | 0.7 | +| constraint_violation | 2.5e+03 | +| ep_constraint_vio... | 0.1 | | ep_length | 226 | -| ep_return | 147 | -| ep_reward | 0.591 | +| ep_return | 150 | +| ep_reward | 0.602 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 140 | -| ep_reward | 0.561 | -| mse | 365 | +| ep_return | 144 | +| ep_reward | 0.576 | +| mse | 366 | | time/ | | | progress | 1 | | step | 1e+06 | -| step_time | 8.25 | +| step_time | 7.54 | -------------------------------------- diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/loss/approx_kl.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/loss/approx_kl.log index 148e45bd1..1d7f5a6bd 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/loss/approx_kl.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/loss/approx_kl.log @@ -1,101 +1,101 @@ step,loss/approx_kl -10000,0.03265381977738192 -20000,0.021775463898666202 -30000,0.02316179192469766 -40000,0.024124778271652762 -50000,0.028627992724068456 -60000,0.02085759423983594 -70000,0.011239938476743795 -80000,0.029530290622885026 -90000,0.028030629153363407 -100000,0.018958734278567136 -110000,0.0391036922732989 -120000,0.027255278251444297 -130000,0.03211660692468286 -140000,0.03909697880347569 -150000,0.02519556675106287 -160000,0.030763576257353032 -170000,0.02400576916212837 -180000,0.0302190106206884 -190000,0.020241525039697686 -200000,0.02162671306480964 -210000,0.021830518372977774 -220000,0.021229971417536336 -230000,0.02436025834952791 -240000,0.030597075726836925 -250000,0.016491037498538693 -260000,0.03242528331466019 -270000,0.024647185144325096 -280000,0.029697602707892658 -290000,0.023929660767316816 -300000,0.026414347750445204 -310000,0.029035590790833038 -320000,0.015531984887396297 -330000,0.0334560796773682 -340000,0.03349767044807474 -350000,0.014942027690509957 -360000,0.023601270435998835 -370000,0.0265059735160321 -380000,0.02578396610915661 -390000,0.013779489292452731 -400000,0.023644157064457733 -410000,0.012426581187173727 -420000,0.004912394238635898 -430000,0.03118070668230454 -440000,0.016449558595195417 -450000,0.025739875637615716 -460000,0.03174744675246378 -470000,0.02443106897796194 -480000,0.019884683719525734 -490000,0.03535370271032055 -500000,0.029132786455253763 -510000,0.02835068864127 -520000,0.027297656921048956 -530000,0.029103667677069713 -540000,0.030008206019798922 -550000,0.007087711617350579 -560000,0.03681928583731254 -570000,0.01996153173968196 -580000,0.02712391104238729 -590000,0.04039361484659215 -600000,0.01643598689697683 -610000,0.03278604079969228 -620000,0.03198176564959189 -630000,0.03715449332570036 -640000,0.02512102429755032 -650000,0.016577062212551633 -660000,0.02106879614293575 -670000,0.02364783673547208 -680000,0.023179891581336657 -690000,0.030643902827675144 -700000,0.02232454274781048 -710000,0.031039380903045338 -720000,0.03457882730290293 -730000,0.03211476743842164 -740000,0.025143741987024744 -750000,0.029930750668669737 -760000,0.02501478875055909 -770000,0.028120811330154545 -780000,0.024742664520939192 -790000,0.01572516057640314 -800000,0.03677549427375197 -810000,0.024554166011512276 -820000,0.03601869915922483 -830000,0.010434293405463297 -840000,0.03497648552681008 -850000,0.033434741323192914 -860000,0.015901694664110736 -870000,0.019691887622078262 -880000,0.029254918452352285 -890000,0.024006972524027028 -900000,0.02338294352715214 -910000,0.022502275245885054 -920000,0.02380584770192703 -930000,0.025880090271433197 -940000,0.02617169640337428 -950000,0.028716563681761433 -960000,0.02912944086516897 -970000,0.016837612135956685 -980000,0.021550537769993144 -990000,0.023720020552476247 -1000000,0.03141999775543809 +10000,0.024409892084077 +20000,0.020873210811987522 +30000,0.01887003489925216 +40000,0.022667411528527737 +50000,0.029731656331568955 +60000,0.011962372064590453 +70000,0.01663419799103091 +80000,0.010467257831866543 +90000,0.017223113620032867 +100000,0.02660642173141241 +110000,0.027738969400525094 +120000,0.02551850474750002 +130000,0.03234713218795756 +140000,0.026099764586736757 +150000,0.03128995418859025 +160000,0.029145744877556962 +170000,0.022906915952141085 +180000,0.01817881208844483 +190000,0.024038809599975744 +200000,0.025687195500358938 +210000,0.032524119550362225 +220000,0.02516323456851145 +230000,0.019294452449927727 +240000,0.03537712180987 +250000,0.025809577883531647 +260000,0.03451021293488642 +270000,0.02065385828415553 +280000,0.026456198825811343 +290000,0.02144400354785224 +300000,0.03418480179583032 +310000,0.013817668116341036 +320000,0.020040584774687887 +330000,0.01926287985406816 +340000,0.008249890912945071 +350000,0.021741576058169208 +360000,0.012401465699076653 +370000,0.025970733460659784 +380000,0.022163400085022055 +390000,0.017032026809950668 +400000,0.030140641533459227 +410000,0.018022366256142654 +420000,0.022632572753354903 +430000,0.020341162290424108 +440000,0.021064960444346068 +450000,0.02652540168104072 +460000,0.034161009592935436 +470000,0.028803949321930606 +480000,0.015826202059785525 +490000,0.013688181526958939 +500000,0.01668450327900549 +510000,0.026071625435724853 +520000,0.04063696772791445 +530000,0.03442275030538439 +540000,0.014091608611245952 +550000,0.018359024884800117 +560000,0.025633949538071948 +570000,0.02337881272348265 +580000,0.0319602708487461 +590000,0.0201724375753353 +600000,0.023597350856289268 +610000,0.031830246544753506 +620000,0.03126895964766542 +630000,0.02776804917181532 +640000,0.03310259603895248 +650000,0.02917201219437023 +660000,0.033273135001460716 +670000,0.021154668958236776 +680000,0.02162980978998045 +690000,0.020212536786372463 +700000,0.019901996602614724 +710000,0.029255301986510544 +720000,0.03400691469820837 +730000,0.02434050254523754 +740000,0.019558111065998675 +750000,0.0275914346644034 +760000,0.030713359530394273 +770000,0.018060939696927868 +780000,0.026727833940337103 +790000,0.027364129790415366 +800000,0.023601457104086877 +810000,0.030766386414567626 +820000,0.02302152613798777 +830000,0.019467545114457606 +840000,0.024517401649306218 +850000,0.019045606348663573 +860000,0.02562617237369219 +870000,0.016110928263515237 +880000,0.023014805683245262 +890000,0.022738710821916655 +900000,0.02937644872193535 +910000,0.03342266650870442 +920000,0.01166551634669304 +930000,0.028939502220600842 +940000,0.03253429463754097 +950000,0.028075924329459663 +960000,0.03308942668760816 +970000,0.022825662915905313 +980000,0.036958576645702124 +990000,0.025342828190575047 +1000000,0.027805248523751897 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/loss/entropy_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/loss/entropy_loss.log index 2d8aa74ab..328865bda 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/loss/entropy_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/loss/entropy_loss.log @@ -1,101 +1,101 @@ step,loss/entropy_loss -10000,-3.6969534397125243 -20000,-3.775063145160675 -30000,-3.7220268050829572 -40000,-3.76859073638916 -50000,-3.743024973074595 -60000,-3.826722784837088 -70000,-3.8886122345924385 -80000,-3.8762641867001846 -90000,-3.8431564450263975 -100000,-3.8451324462890626 -110000,-3.894517238934834 -120000,-3.989887261390686 -130000,-3.993643184502919 -140000,-4.001635622978211 -150000,-4.043385926882426 -160000,-4.027733484903971 -170000,-4.063088647524515 -180000,-4.172408080101013 -190000,-4.153933652242024 -200000,-4.177964862187704 -210000,-4.2446675380071 -220000,-4.306359219551085 -230000,-4.364014514287313 -240000,-4.407540885607402 -250000,-4.516828362147014 -260000,-4.608917291959127 -270000,-4.6691583236058545 -280000,-4.7048032204310095 -290000,-4.756430395444235 -300000,-4.799324289957683 -310000,-4.770506628354391 -320000,-4.727196939786276 -330000,-4.875374150276184 -340000,-4.858717004458109 -350000,-4.882197467486064 -360000,-4.867225654919943 -370000,-4.883462190628052 -380000,-4.941953388849894 -390000,-5.013260181744894 -400000,-5.056615964571635 -410000,-5.080157868067424 -420000,-5.028430048624673 -430000,-5.112865161895752 -440000,-5.161441357930501 -450000,-5.149726462364197 -460000,-5.150813118616741 -470000,-5.165221548080444 -480000,-5.247920378049215 -490000,-5.323324664433797 -500000,-5.361899948120117 -510000,-5.3652663230896 -520000,-5.338753573099772 -530000,-5.370369919141134 -540000,-5.368333697319031 -550000,-5.335947815577188 -560000,-5.364392399787903 -570000,-5.42099948724111 -580000,-5.502808141708374 -590000,-5.521096086502075 -600000,-5.626682837804158 -610000,-5.66451621055603 -620000,-5.765987634658813 -630000,-5.752792898813883 -640000,-5.827583233515422 -650000,-5.839845951398213 -660000,-5.818006642659505 -670000,-5.856727615992228 -680000,-5.851334190368652 -690000,-5.824840760231018 -700000,-5.842902803421021 -710000,-5.811627801259359 -720000,-5.860547121365864 -730000,-5.869209774335225 -740000,-5.903720172246297 -750000,-5.893747957547506 -760000,-5.951618131001791 -770000,-5.94547164440155 -780000,-5.944915429751078 -790000,-5.96845645904541 -800000,-6.037969382603963 -810000,-6.070719528198242 -820000,-6.0624682346979775 -830000,-6.136061644554138 -840000,-6.127040608723958 -850000,-6.130953486760458 -860000,-6.122255802154542 -870000,-6.19560661315918 -880000,-6.176564280192058 -890000,-6.210381682713827 -900000,-6.197737352053324 -910000,-6.261397337913513 -920000,-6.3820885499318445 -930000,-6.442214155197144 -940000,-6.4295055866241455 -950000,-6.404351393381755 -960000,-6.45036710103353 -970000,-6.4360031366348265 -980000,-6.458836309115091 -990000,-6.382712046305338 -1000000,-6.444634135564169 +10000,-3.7493523716926567 +20000,-3.8053997278213503 +30000,-3.7715113917986556 +40000,-3.8062945882479347 +50000,-3.935110847155253 +60000,-3.8713713606198623 +70000,-3.8701498190561927 +80000,-4.041789038976033 +90000,-4.011696720123291 +100000,-3.982161009311676 +110000,-3.9808328111966453 +120000,-3.979837842782339 +130000,-4.053446269035339 +140000,-4.077817543347677 +150000,-4.065364742279053 +160000,-4.1411681572596235 +170000,-4.131144539515178 +180000,-4.169771949450174 +190000,-4.207228636741638 +200000,-4.230863539377848 +210000,-4.227621785799662 +220000,-4.259302099545797 +230000,-4.369477136929829 +240000,-4.344329166412353 +250000,-4.34578857421875 +260000,-4.414410837491353 +270000,-4.418037676811219 +280000,-4.396328059832255 +290000,-4.390154155095418 +300000,-4.4013120571772255 +310000,-4.411364221572875 +320000,-4.48324059645335 +330000,-4.5318078597386675 +340000,-4.564692902565001 +350000,-4.518355592091878 +360000,-4.569448065757752 +370000,-4.657678469022115 +380000,-4.722244811058045 +390000,-4.724707015355427 +400000,-4.769896022478739 +410000,-4.897361294428508 +420000,-4.9011482238769535 +430000,-4.913275098800659 +440000,-4.91132737795512 +450000,-4.962868197758993 +460000,-4.963744060198466 +470000,-4.9289777358373 +480000,-4.977684505780538 +490000,-5.031881197293599 +500000,-4.993787988026937 +510000,-5.039266236623129 +520000,-5.059879581133524 +530000,-4.971472160021465 +540000,-5.038334409395855 +550000,-5.012669515609741 +560000,-5.07155679066976 +570000,-5.162215693791707 +580000,-5.230323568979899 +590000,-5.298631119728087 +600000,-5.350602364540101 +610000,-5.410236501693726 +620000,-5.5503799994786585 +630000,-5.578881096839905 +640000,-5.653952749570211 +650000,-5.690237299601237 +660000,-5.640542777379354 +670000,-5.757789285977681 +680000,-5.781619246800741 +690000,-5.790744423866272 +700000,-5.862708155314127 +710000,-5.897925567626953 +720000,-5.849413267771403 +730000,-5.893833208084106 +740000,-5.925080561637879 +750000,-5.981245485941569 +760000,-5.9520038286844885 +770000,-6.095157829920451 +780000,-6.087578185399373 +790000,-6.130554064114889 +800000,-6.184746495882671 +810000,-6.235873039563497 +820000,-6.270849967002869 +830000,-6.298664450645447 +840000,-6.334548234939575 +850000,-6.420407366752625 +860000,-6.4873091538747145 +870000,-6.434952044487 +880000,-6.3901166915893555 +890000,-6.379041894276938 +900000,-6.356954797108968 +910000,-6.37474558353424 +920000,-6.359105221430461 +930000,-6.391858331362406 +940000,-6.468619767824808 +950000,-6.480223059654236 +960000,-6.534550166130066 +970000,-6.464309986432393 +980000,-6.390121682484944 +990000,-6.383255767822265 +1000000,-6.375379212697347 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/loss/policy_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/loss/policy_loss.log index a75d6b23f..a6d2fc833 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/loss/policy_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/loss/policy_loss.log @@ -1,101 +1,101 @@ step,loss/policy_loss -10000,-0.006284210409693601 -20000,-0.01714978461854288 -30000,-0.010164725647243083 -40000,-0.017333671028097616 -50000,-0.010935560990919276 -60000,-0.013359498498773362 -70000,-0.021421855015853884 -80000,-0.01065832049570994 -90000,-0.011768634041628171 -100000,-0.018185645061507653 -110000,-0.018537466661154543 -120000,-0.008770103221267969 -130000,-0.00998162422217594 -140000,-0.0163570801594585 -150000,-0.001704667089909282 -160000,-0.012691949433641759 -170000,-0.004802751156455364 -180000,-0.01396452421688385 -190000,-0.024546316618554047 -200000,-0.015611097984514178 -210000,-0.009098668579526207 -220000,-0.014752880896521964 -230000,-0.015150607196063514 -240000,-0.010627141932309615 -250000,-0.013451832036886976 -260000,-0.009017987519149246 -270000,-0.020998729078804068 -280000,-0.016553765060961893 -290000,0.002172037573591509 -300000,-0.013880960854194458 -310000,-0.01457922040032097 -320000,-0.020037995196856284 -330000,-0.013292366807979095 -340000,-0.022919679944061876 -350000,-0.007571355199425206 -360000,-0.013139027430371758 -370000,-0.005184039813321094 -380000,-0.015003307147012385 -390000,-0.013066697389396082 -400000,-0.015809241345851446 -410000,-0.017114165436036527 -420000,-0.01188939307586964 -430000,-0.01564098594688706 -440000,-0.009145957148884104 -450000,-0.021402465459356503 -460000,-0.01296977384093366 -470000,-0.0007624060779461532 -480000,-0.021921233864072104 -490000,-0.00370746045121781 -500000,-0.014997631547848425 -510000,-0.0028824619733785694 -520000,-0.007681408834900203 -530000,-0.0026134624749185433 -540000,-0.0035888563982802687 -550000,-0.021974472826309113 -560000,-0.003044075919671366 -570000,-0.021849704465519974 -580000,-0.011246266397318211 -590000,-0.012647318991822235 -600000,-0.028809184404074983 -610000,-0.02149586998802735 -620000,-0.006723618864330543 -630000,-0.023464432319806088 -640000,-0.007678074541426471 -650000,-0.024825710526537308 -660000,-0.02427580225392331 -670000,-0.007248824679207952 -680000,-0.009101638585586774 -690000,-0.015874120048195356 -700000,-0.011773116747766496 -710000,-0.006134608765664126 -720000,-0.015095345683324917 -730000,-0.003407348531533122 -740000,-0.013700443901632001 -750000,-0.0073697276887236315 -760000,-0.02032117983869148 -770000,-0.010207986352137084 -780000,-0.01882318815589761 -790000,-0.015404941438147909 -800000,-0.011921436384903627 -810000,-0.010647586793647334 -820000,-0.022239648067676757 -830000,-0.024274137604753672 -840000,-0.015554801721000511 -850000,-0.01134195351808667 -860000,-0.013030164838716368 -870000,-0.012914083690144126 -880000,-0.00795018854508838 -890000,-0.008134187571379342 -900000,-0.010473374416989692 -910000,-0.014858256874001377 -920000,-0.009625561034321339 -930000,-0.02218163681798108 -940000,-0.017610877973180214 -950000,-0.010271903377469563 -960000,-0.010254706921738032 -970000,-0.0046597379595217725 -980000,-0.01681802452632091 -990000,-0.015815726962442694 -1000000,-0.012132636415918687 +10000,-0.015480719143693238 +20000,-0.01567353034816445 +30000,-0.007717911893951021 +40000,-0.016799637093241136 +50000,-0.0018177024041900018 +60000,-0.012306855257861595 +70000,-0.009397433927142745 +80000,-0.011539248902511056 +90000,-0.015259181265380523 +100000,-0.010319560743905467 +110000,-0.0092388425170428 +120000,-0.00499378594900124 +130000,-0.01283501375635834 +140000,-0.009052517144645122 +150000,-0.0076691252662939805 +160000,-0.006162406343218327 +170000,-0.006049618819745968 +180000,-0.011145962111465282 +190000,-0.012210232128468085 +200000,-0.015029980107430455 +210000,-0.010668810297236736 +220000,-0.017401489537868083 +230000,-0.008049938074709788 +240000,-0.0006023350922267632 +250000,-0.003591422824366375 +260000,-0.013360862771649168 +270000,-0.01320620227566458 +280000,-0.012896567340442786 +290000,-0.013435027320955074 +300000,-0.002189199854904662 +310000,-0.009649017791589254 +320000,-0.0131115076371559 +330000,-0.016444810144820556 +340000,-0.012112125930814621 +350000,-0.012472583783040853 +360000,-0.017902083828097534 +370000,-0.011669914750504882 +380000,-0.014654459816050982 +390000,-0.007912338499941269 +400000,-0.008895940088827353 +410000,-0.011350314946880295 +420000,-0.0027228508518296127 +430000,-0.0183491814344187 +440000,-0.01931099606533997 +450000,-0.016908909621903976 +460000,0.0028184157660042725 +470000,-0.012635029533560185 +480000,-0.01683750798524581 +490000,-0.010966350371694114 +500000,-0.011717298398638007 +510000,-0.003535799923926719 +520000,-0.0034110111660589386 +530000,-0.018429203215872193 +540000,-0.019739307287412782 +550000,-0.019005126393462034 +560000,-0.008025206748077381 +570000,-0.010238340761725867 +580000,-0.017003959154548366 +590000,-0.010305898401147406 +600000,-0.002758473937989725 +610000,-0.00522182971381453 +620000,-0.014196647786188354 +630000,-0.01242062636221118 +640000,-0.012042535937016766 +650000,-0.008305174339348503 +660000,-0.014064023684441285 +670000,-0.01266220961585977 +680000,-0.01203853288573667 +690000,0.0056673520193189775 +700000,-0.008314650426408672 +710000,-0.013247617294050076 +720000,-0.009539868155901281 +730000,-0.010604625378450154 +740000,-0.018034971749920307 +750000,-0.016663733371045555 +760000,0.0015340036611705733 +770000,-0.014475364115139947 +780000,-0.005117411156209646 +790000,-0.011988302150386754 +800000,-0.006953951247719335 +810000,-0.016227574882268413 +820000,-0.007655494131199989 +830000,-0.021358020675886234 +840000,-0.016911461046128062 +850000,-0.00784686134476591 +860000,-0.015239189288522425 +870000,-0.018261268044454278 +880000,-0.017047224931382825 +890000,-0.015434782686627305 +900000,-0.011999677249261183 +910000,-0.01035502341205136 +920000,-0.016775271392315418 +930000,-0.003246954716115426 +940000,-0.005082654874987959 +950000,-0.013151184363092394 +960000,-0.004434206687415907 +970000,-0.006712359525126614 +980000,-0.017192301603135316 +990000,-0.0013254151760178773 +1000000,-0.020505308253355677 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/loss/value_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/loss/value_loss.log index deb10dd6f..16c25828b 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/loss/value_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/loss/value_loss.log @@ -1,101 +1,101 @@ step,loss/value_loss -10000,9.36910127774009 -20000,18.16829259120988 -30000,8.400530856041843 -40000,4.247865719591795 -50000,6.9104125418540745 -60000,5.197544603339787 -70000,6.312783766730119 -80000,7.14384214249854 -90000,3.062609638171243 -100000,4.03925832054606 -110000,1.8968732176986252 -120000,2.270183491407277 -130000,1.8487586521886243 -140000,0.7880140785713966 -150000,1.069533614118682 -160000,1.342851554823191 -170000,1.1997665160862079 -180000,1.4283932662057883 -190000,1.0622247508746647 -200000,0.7130038633545801 -210000,1.3312511806407694 -220000,2.5841877441165915 -230000,1.2484822492117673 -240000,2.6619967729098972 -250000,1.102308012846105 -260000,0.6416063022135211 -270000,0.7148190004617605 -280000,0.4501012799141824 -290000,0.6356805308845558 -300000,0.7263283404681349 -310000,1.4993266190342975 -320000,2.0132299587379645 -330000,1.2073051869220441 -340000,1.5848837654225318 -350000,1.0661337477786716 -360000,2.8783901737467414 -370000,1.2839547083260165 -380000,1.7695550743163158 -390000,0.7711400052016192 -400000,0.8159400711379716 -410000,0.9561848992863498 -420000,1.4809129553795566 -430000,1.6108856358929635 -440000,3.9281768332286555 -450000,4.7005795465397515 -460000,1.054854029728871 -470000,0.828350320699947 -480000,3.354447745267079 -490000,0.7957071889155676 -500000,0.4668558180787393 -510000,0.8070977538219417 -520000,2.0124435666004388 -530000,3.145691307578134 -540000,2.271487845854114 -550000,2.8879210574396543 -560000,0.6582277429936016 -570000,0.4576047794419118 -580000,0.9288560110571012 -590000,0.62950016605295 -600000,0.5126823586248871 -610000,0.38379027756800604 -620000,0.47456381002259534 -630000,1.2389764817130433 -640000,0.7758797651356771 -650000,0.6741782598506495 -660000,0.6481942760459238 -670000,0.8847584097088502 -680000,0.557280281388026 -690000,0.5286508763487592 -700000,1.0498096759177276 -710000,0.7441028226278281 -720000,0.7553731273257617 -730000,0.6917966239130466 -740000,0.8227752832908866 -750000,0.3659875401893009 -760000,0.5239321476604746 -770000,0.4593191921076504 -780000,0.4552783435196427 -790000,0.359332097357846 -800000,0.5780091507426223 -810000,0.8800698426154551 -820000,0.43132108423297827 -830000,1.4395864821549726 -840000,1.5796126797038075 -850000,0.6629396234353766 -860000,0.8103764687610202 -870000,4.9093973632876935 -880000,0.6636707875485868 -890000,0.3061475771850667 -900000,1.766112902766086 -910000,1.1655160536399172 -920000,1.5426547060110167 -930000,1.2962086413115164 -940000,1.3841038830776107 -950000,0.8487571541402744 -960000,0.9804335506271631 -970000,0.5893948729113198 -980000,0.7789280851248845 -990000,1.7640175965900198 -1000000,0.6949992434301425 +10000,19.768832208211997 +20000,22.358054864638657 +30000,6.988183132539476 +40000,5.090120029763956 +50000,5.5912237450557845 +60000,1.6971782922781198 +70000,2.5895045300572805 +80000,2.101563943035475 +90000,0.9168182903970774 +100000,3.6215362939637155 +110000,2.5822842429338007 +120000,2.357753523583241 +130000,1.1633759856425556 +140000,1.3248090535236843 +150000,2.1613658309331223 +160000,1.7890792921112382 +170000,0.9910170921478219 +180000,0.4901048116164695 +190000,0.5565388419310484 +200000,1.2519450232490574 +210000,0.489152010227014 +220000,0.6338818036904285 +230000,0.4829219963363916 +240000,1.0966337568341973 +250000,0.2180853194321589 +260000,0.7328397470532594 +270000,0.15149303847543544 +280000,0.12591518256849682 +290000,0.16976258250528045 +300000,0.25993411974437847 +310000,0.17874545882443932 +320000,0.30658075185780864 +330000,0.18322667493019876 +340000,0.10610040480165447 +350000,0.29156249875939744 +360000,0.14289174775216923 +370000,0.21867147919174168 +380000,0.20661424254667032 +390000,0.4262791887469608 +400000,0.4025658342126362 +410000,0.20132877109794434 +420000,0.26531176524468914 +430000,0.12229812951008925 +440000,0.09347943812953455 +450000,0.08731336895904786 +460000,0.07058110582745122 +470000,0.15139681439491648 +480000,0.0469733820094662 +490000,0.12856440729771626 +500000,0.16061515470746937 +510000,0.1806301708066686 +520000,0.34520519037920777 +530000,0.0998173094723051 +540000,0.10280037958478805 +550000,0.07554791782526409 +560000,0.2435686115243943 +570000,0.21228942402432974 +580000,0.16935409817893138 +590000,0.18049714365085714 +600000,0.08572891697687626 +610000,0.07938371936350604 +620000,0.0953032246855652 +630000,0.10171028079923743 +640000,0.1295576731483414 +650000,0.10229513241581026 +660000,0.08165563424877545 +670000,0.11584234540677765 +680000,0.1832393328774637 +690000,0.2851564563670163 +700000,0.24610630593597485 +710000,0.22189552516317343 +720000,0.12611139191072235 +730000,0.15544961672778074 +740000,0.12393085692015501 +750000,0.165814965148471 +760000,0.25000233201824973 +770000,0.1494544802944276 +780000,0.2075126818470666 +790000,0.08757176985283699 +800000,0.18977442093935265 +810000,0.22704908693120512 +820000,0.238665623460874 +830000,0.19111496912666898 +840000,0.17536707938637403 +850000,0.46502874074761175 +860000,0.17664790576677902 +870000,0.4346058840423358 +880000,0.1643687145923298 +890000,0.13636068329067025 +900000,0.3006362670712574 +910000,0.07737777982116625 +920000,0.20407943459516137 +930000,0.7209518415891416 +940000,0.11792404217837874 +950000,0.1148497820625339 +960000,0.10021951639984736 +970000,0.11969094362494684 +980000,0.09203210413600786 +990000,0.06087707827345797 +1000000,0.05276742905430119 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/constraint_violation.log index 6edd019ef..a53855ee1 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/constraint_violation.log @@ -1,101 +1,101 @@ step,stat/constraint_violation -10000,46 -20000,47 -30000,50 -40000,73 -50000,112 -60000,146 -70000,207 -80000,246 -90000,278 -100000,301 -110000,343 -120000,358 -130000,380 -140000,399 -150000,433 -160000,473 -170000,515 -180000,522 -190000,548 -200000,569 -210000,622 -220000,634 -230000,678 -240000,683 -250000,743 -260000,756 -270000,759 -280000,818 -290000,859 -300000,885 -310000,902 -320000,932 -330000,937 -340000,954 -350000,970 -360000,986 -370000,1031 -380000,1047 -390000,1066 -400000,1076 -410000,1099 -420000,1104 -430000,1116 -440000,1155 -450000,1156 -460000,1187 -470000,1213 -480000,1237 -490000,1260 -500000,1264 -510000,1293 -520000,1300 -530000,1373 -540000,1389 -550000,1406 -560000,1418 -570000,1428 -580000,1469 -590000,1492 -600000,1531 -610000,1566 -620000,1572 -630000,1609 -640000,1633 -650000,1640 -660000,1645 -670000,1660 -680000,1699 -690000,1721 -700000,1727 -710000,1753 -720000,1762 -730000,1767 -740000,1769 -750000,1808 -760000,1812 -770000,1823 -780000,1826 -790000,1840 -800000,1883 -810000,1895 -820000,1910 -830000,1947 -840000,1948 -850000,1963 -860000,1993 -870000,2028 -880000,2034 -890000,2051 -900000,2055 -910000,2080 -920000,2097 -930000,2112 -940000,2126 -950000,2144 -960000,2178 -970000,2184 -980000,2211 -990000,2234 -1000000,2269 +10000,0 +20000,0 +30000,0 +40000,0 +50000,0 +60000,0 +70000,0 +80000,0 +90000,0 +100000,0 +110000,0 +120000,0 +130000,0 +140000,0 +150000,0 +160000,0 +170000,0 +180000,0 +190000,0 +200000,0 +210000,1 +220000,1 +230000,1 +240000,2 +250000,2 +260000,2 +270000,2 +280000,2 +290000,2 +300000,2 +310000,2 +320000,2 +330000,2 +340000,2 +350000,2 +360000,2 +370000,2 +380000,3 +390000,3 +400000,3 +410000,3 +420000,3 +430000,3 +440000,3 +450000,4 +460000,4 +470000,4 +480000,4 +490000,4 +500000,4 +510000,4 +520000,4 +530000,4 +540000,4 +550000,4 +560000,4 +570000,4 +580000,4 +590000,4 +600000,4 +610000,4 +620000,4 +630000,4 +640000,4 +650000,4 +660000,4 +670000,4 +680000,4 +690000,4 +700000,4 +710000,4 +720000,4 +730000,4 +740000,4 +750000,4 +760000,4 +770000,4 +780000,4 +790000,4 +800000,4 +810000,4 +820000,4 +830000,4 +840000,4 +850000,4 +860000,5 +870000,5 +880000,5 +890000,5 +900000,5 +910000,6 +920000,6 +930000,6 +940000,7 +950000,7 +960000,7 +970000,7 +980000,7 +990000,7 +1000000,7 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/ep_constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/ep_constraint_violation.log index 3803d01c5..32d882659 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/ep_constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/ep_constraint_violation.log @@ -1,101 +1,101 @@ step,stat/ep_constraint_violation -10000,1.5 +10000,0.0 20000,0.0 -30000,0.1 -40000,0.7 +30000,0.0 +40000,0.0 50000,0.0 -60000,0.3 -70000,1.3 +60000,0.0 +70000,0.0 80000,0.0 -90000,1.5 -100000,0.7 -110000,1.4 -120000,0.1 +90000,0.0 +100000,0.0 +110000,0.0 +120000,0.0 130000,0.0 140000,0.0 -150000,1.4 -160000,0.1 -170000,3.4 -180000,0.6 +150000,0.0 +160000,0.0 +170000,0.0 +180000,0.0 190000,0.0 -200000,1.0 -210000,0.6 -220000,0.1 -230000,1.6 -240000,0.1 -250000,2.4 -260000,0.1 -270000,0.1 -280000,1.3 -290000,1.0 -300000,2.4 -310000,0.2 -320000,0.6 -330000,0.1 -340000,0.1 -350000,0.1 -360000,0.1 -370000,1.3 -380000,0.2 -390000,0.1 -400000,0.8 +200000,0.0 +210000,0.1 +220000,0.0 +230000,0.0 +240000,0.0 +250000,0.0 +260000,0.0 +270000,0.0 +280000,0.0 +290000,0.0 +300000,0.0 +310000,0.0 +320000,0.0 +330000,0.0 +340000,0.0 +350000,0.0 +360000,0.0 +370000,0.0 +380000,0.0 +390000,0.0 +400000,0.0 410000,0.0 -420000,0.2 -430000,0.7 -440000,0.2 +420000,0.0 +430000,0.0 +440000,0.0 450000,0.0 -460000,1.1 -470000,0.1 -480000,0.1 -490000,0.1 +460000,0.0 +470000,0.0 +480000,0.0 +490000,0.0 500000,0.0 -510000,1.4 +510000,0.0 520000,0.0 -530000,2.4 -540000,0.6 -550000,0.2 -560000,0.4 -570000,0.1 +530000,0.0 +540000,0.0 +550000,0.0 +560000,0.0 +570000,0.0 580000,0.0 590000,0.0 -600000,2.8 -610000,0.1 -620000,0.2 -630000,2.0 -640000,1.0 -650000,0.1 +600000,0.0 +610000,0.0 +620000,0.0 +630000,0.0 +640000,0.0 +650000,0.0 660000,0.0 -670000,0.8 -680000,1.0 +670000,0.0 +680000,0.0 690000,0.0 700000,0.0 -710000,0.2 -720000,0.4 +710000,0.0 +720000,0.0 730000,0.0 740000,0.0 -750000,1.4 -760000,0.1 +750000,0.0 +760000,0.0 770000,0.0 780000,0.0 -790000,0.8 -800000,1.6 -810000,0.1 -820000,0.1 -830000,1.1 -840000,0.1 -850000,0.9 +790000,0.0 +800000,0.0 +810000,0.0 +820000,0.0 +830000,0.0 +840000,0.0 +850000,0.0 860000,0.0 -870000,1.8 +870000,0.0 880000,0.0 -890000,1.2 -900000,0.1 +890000,0.0 +900000,0.0 910000,0.1 -920000,0.1 -930000,0.1 +920000,0.0 +930000,0.0 940000,0.0 -950000,0.1 -960000,0.1 +950000,0.0 +960000,0.0 970000,0.0 -980000,0.2 -990000,0.5 -1000000,1.7 +980000,0.0 +990000,0.0 +1000000,0.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/ep_length.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/ep_length.log index 18861be01..0a4e50739 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/ep_length.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/ep_length.log @@ -1,67 +1,67 @@ step,stat/ep_length -10000,226.4 +10000,250.0 20000,250.0 -30000,225.8 -40000,202.6 +30000,250.0 +40000,250.0 50000,250.0 -60000,200.5 -70000,226.4 +60000,250.0 +70000,250.0 80000,250.0 90000,250.0 -100000,225.9 -110000,225.6 -120000,225.9 +100000,250.0 +110000,250.0 +120000,250.0 130000,250.0 140000,250.0 -150000,226.8 +150000,250.0 160000,250.0 170000,250.0 -180000,225.3 +180000,250.0 190000,250.0 200000,250.0 -210000,225.9 -220000,226.8 -230000,201.8 -240000,226.7 -250000,227.3 -260000,225.2 -270000,226.7 +210000,250.0 +220000,250.0 +230000,250.0 +240000,250.0 +250000,250.0 +260000,250.0 +270000,250.0 280000,250.0 290000,250.0 300000,250.0 -310000,225.3 -320000,202.0 -330000,225.2 -340000,226.3 -350000,225.5 -360000,225.6 +310000,250.0 +320000,250.0 +330000,250.0 +340000,250.0 +350000,250.0 +360000,250.0 370000,250.0 -380000,201.2 -390000,225.1 -400000,225.4 +380000,250.0 +390000,250.0 +400000,250.0 410000,250.0 -420000,201.1 -430000,203.7 -440000,200.6 +420000,250.0 +430000,250.0 +440000,250.0 450000,250.0 -460000,201.7 -470000,226.0 -480000,226.4 -490000,226.0 +460000,250.0 +470000,250.0 +480000,250.0 +490000,250.0 500000,250.0 -510000,225.8 +510000,250.0 520000,250.0 -530000,201.9 -540000,201.3 -550000,201.2 -560000,225.8 -570000,225.4 +530000,250.0 +540000,250.0 +550000,250.0 +560000,250.0 +570000,250.0 580000,250.0 590000,250.0 -600000,227.0 -610000,225.5 -620000,226.5 -630000,154.1 +600000,250.0 +610000,250.0 +620000,250.0 +630000,250.0 640000,250.0 650000,250.0 660000,250.0 @@ -69,33 +69,33 @@ step,stat/ep_length 680000,250.0 690000,250.0 700000,250.0 -710000,200.9 +710000,250.0 720000,250.0 730000,250.0 740000,250.0 -750000,225.6 -760000,225.1 +750000,250.0 +760000,250.0 770000,250.0 780000,250.0 -790000,225.4 -800000,200.8 -810000,225.1 -820000,225.6 -830000,226.3 -840000,227.2 -850000,225.6 +790000,250.0 +800000,250.0 +810000,250.0 +820000,250.0 +830000,250.0 +840000,250.0 +850000,250.0 860000,250.0 -870000,200.8 +870000,250.0 880000,250.0 890000,250.0 -900000,225.6 -910000,225.3 -920000,225.2 +900000,250.0 +910000,250.0 +920000,250.0 930000,250.0 940000,250.0 -950000,226.0 -960000,225.1 +950000,250.0 +960000,250.0 970000,250.0 -980000,200.4 +980000,250.0 990000,250.0 1000000,250.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/ep_return.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/ep_return.log index dc62dd14f..590f02198 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/ep_return.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/ep_return.log @@ -1,101 +1,101 @@ step,stat/ep_return -10000,46.6209490478024 -20000,75.23891866185814 -30000,64.49645026464967 -40000,60.49736000540942 -50000,98.25634178847648 -60000,81.70842729908324 -70000,81.31401596454148 -80000,91.52359076133955 -90000,110.43904203140207 -100000,89.70945102917945 -110000,79.55414839049358 -120000,95.83321905601612 -130000,118.26933915679645 -140000,125.49493738615584 -150000,119.74743782237044 -160000,124.10734151070355 -170000,123.9411887178217 -180000,124.85104212640337 -190000,124.0220801169352 -200000,134.79549323504187 -210000,110.30385918536228 -220000,112.07516317674435 -230000,97.47368454154818 -240000,119.98184092325536 -250000,122.40120507844999 -260000,122.68077386399383 -270000,130.55114203345093 -280000,136.04925697338956 -290000,152.60671130619625 -300000,145.08640510978043 -310000,140.11722478951225 -320000,124.0672885050758 -330000,125.49606394146979 -340000,139.28331617236051 -350000,126.96128152292063 -360000,141.9247251864935 -370000,136.88555721884342 -380000,126.21323329372478 -390000,139.89615069846099 -400000,131.19737685527724 -410000,147.04204451623372 -420000,127.24631527642032 -430000,95.51928834286113 -440000,117.79028390728145 -450000,137.11514766585475 -460000,118.57511574552476 -470000,130.1294277918199 -480000,140.35280848035381 -490000,131.7194903571603 -500000,140.57646092457557 -510000,128.95297563844736 -520000,158.3514660422225 -530000,124.38718245032851 -540000,132.41972577274777 -550000,121.72260929901017 -560000,146.16416219740296 -570000,134.38784596016163 -580000,166.73600681779652 -590000,147.52424608940888 -600000,139.48373277770202 -610000,143.9611718483045 -620000,139.18114978445425 -630000,90.82425010240601 -640000,161.02109766438406 -650000,177.42610578656735 -660000,148.8325243785581 -670000,159.73571503469142 -680000,147.98628167166552 -690000,145.28833948782858 -700000,141.9772962088137 -710000,112.94945425827636 -720000,147.13007565255836 -730000,158.33127631224812 -740000,155.39019018302338 -750000,139.43133723556306 -760000,143.22644688577313 -770000,153.92305237190544 -780000,163.8484675335869 -790000,151.4815436966386 -800000,126.99612893948438 -810000,140.99609716280696 -820000,134.60584786631412 -830000,153.35990414521433 -840000,135.3711927613691 -850000,137.69902178196702 -860000,164.9580748947524 -870000,131.12161145168315 -880000,156.5858690361898 -890000,153.94039004109047 -900000,143.61289522709237 -910000,143.57064373371125 -920000,158.47209915715658 -930000,168.57326332478937 -940000,162.27689986682654 -950000,145.92619616698386 -960000,145.0940220843391 -970000,164.30102228980255 -980000,124.17568470068257 -990000,161.74240540421727 -1000000,152.82167166278816 +10000,83.1741927774118 +20000,113.45608181761092 +30000,116.07812851379826 +40000,131.6837242030769 +50000,129.89914016681894 +60000,132.9772399293017 +70000,141.35537372477222 +80000,134.9879621597827 +90000,131.96331318649956 +100000,143.9874767087329 +110000,153.81403371496833 +120000,143.2394002698408 +130000,153.10687809035306 +140000,151.3596815900003 +150000,163.5040714708021 +160000,175.40363077394647 +170000,172.30917275897164 +180000,170.2375547285538 +190000,168.04713452692607 +200000,173.48876134487514 +210000,181.96436168276315 +220000,165.62674287882038 +230000,183.66753996458257 +240000,175.75319322896644 +250000,189.06138921797077 +260000,179.15398432408682 +270000,172.50298875070507 +280000,185.6284532558015 +290000,188.93609603804046 +300000,194.05280043184786 +310000,193.9824044443314 +320000,201.43580137252985 +330000,199.49155609103357 +340000,195.39709441327594 +350000,195.70098509749488 +360000,191.2603129748987 +370000,194.24109633698214 +380000,185.15432623184282 +390000,187.7238524504267 +400000,185.97290929029754 +410000,198.4672457682114 +420000,202.62150989849619 +430000,195.23814212192823 +440000,199.833589124558 +450000,203.21061383154392 +460000,205.477600831603 +470000,185.88965161582524 +480000,190.3237222440195 +490000,198.6131673686165 +500000,204.57893661198847 +510000,208.3916531286627 +520000,195.69343481869993 +530000,197.3751959322685 +540000,191.67687489853054 +550000,196.72532336351858 +560000,195.3105003543888 +570000,205.38535595162074 +580000,194.690027561853 +590000,190.35444626927944 +600000,197.93295937762963 +610000,199.52044551022408 +620000,200.85110495031412 +630000,201.76915425552536 +640000,199.23711686082953 +650000,198.05667973316952 +660000,195.1127768667453 +670000,195.82639946021445 +680000,192.29077551672546 +690000,204.03658930318937 +700000,190.29123559787521 +710000,199.90462930703274 +720000,193.35075175920886 +730000,200.31478581004322 +740000,201.78797895222505 +750000,189.01831598727247 +760000,199.0467397872923 +770000,185.2026818609003 +780000,195.1751957405949 +790000,187.4141424784857 +800000,187.91788030379197 +810000,199.13474770326553 +820000,198.29071634461198 +830000,190.46271751647626 +840000,194.7329216950481 +850000,197.96352636811196 +860000,195.5008590204847 +870000,206.6356331237035 +880000,197.63812479511347 +890000,190.23614843069464 +900000,198.91394901449786 +910000,196.71888914339962 +920000,198.86670752582893 +930000,194.08446262414358 +940000,189.09491416002385 +950000,195.6761321974712 +960000,194.0222236010966 +970000,203.49073161931108 +980000,194.46415597737632 +990000,203.69828481919626 +1000000,190.81447533974693 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/ep_reward.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/ep_reward.log index 61cab8c90..fa72b19ce 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/ep_reward.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat/ep_reward.log @@ -1,101 +1,101 @@ step,stat/ep_reward -10000,0.18688569087784637 -20000,0.30095567464743256 -30000,0.2580002355208989 -40000,0.24202955317465608 -50000,0.3930253671539059 -60000,0.32949310902304035 -70000,0.32584108049286 -80000,0.3660943630453582 -90000,0.44175616812560836 -100000,0.3590044428419883 -110000,0.31865061418833945 -120000,0.3869428910960121 -130000,0.4730773566271858 -140000,0.5019797495446233 -150000,0.4796377121279048 -160000,0.4964293660428142 -170000,0.49576475487128685 -180000,0.5002350095301296 -190000,0.49608832046774065 -200000,0.5391819729401675 -210000,0.44130320260936184 -220000,0.44843069701554006 -230000,0.3905176750697711 -240000,0.4820885631109458 -250000,0.48976412754578985 -260000,0.4947623850596591 -270000,0.5222983504134711 -280000,0.5441970278935584 -290000,0.6104268452247851 -300000,0.5803456204391219 -310000,0.5605635587095129 -320000,0.49870663433100554 -330000,0.5059413840213173 -340000,0.5571409396563378 -350000,0.5083137992978285 -360000,0.5725216653069201 -370000,0.5475422288753735 -380000,0.50551964187246 -390000,0.559592063547597 -400000,0.5290262555310518 -410000,0.5881681780649349 -420000,0.5108586799504966 -430000,0.38437372483133925 -440000,0.4873560912069913 -450000,0.5484605906634191 -460000,0.47717029662759264 -470000,0.5219159423525361 -480000,0.5629169540146327 -490000,0.526906737537399 -500000,0.5623058436983024 -510000,0.5161724541262364 -520000,0.6334058641688901 -530000,0.4987335954584887 -540000,0.5301154492204123 -550000,0.48913443391596195 -560000,0.5850505482078185 -570000,0.538017418346282 -580000,0.6669440272711861 -590000,0.5900969843576356 -600000,0.5579649822376902 -610000,0.5764328931321723 -620000,0.5568553035507281 -630000,0.36913762590640503 -640000,0.6440843906575361 -650000,0.7097044231462692 -660000,0.5953300975142325 -670000,0.6389428601387657 -680000,0.5919451266866622 -690000,0.5811533579513144 -700000,0.5679091848352549 -710000,0.45199604905447355 -720000,0.5885203026102334 -730000,0.6333251052489924 -740000,0.6215607607320937 -750000,0.5578816333971229 -760000,0.5763986427556305 -770000,0.6156922094876218 -780000,0.6553938701343476 -790000,0.605932313384272 -800000,0.5108688184434074 -810000,0.5676825440501834 -820000,0.5394121551153077 -830000,0.6137024060029879 -840000,0.5417720133992969 -850000,0.5508283450317725 -860000,0.6598322995790097 -870000,0.5362872853882871 -880000,0.6263434761447593 -890000,0.6157615601643618 -900000,0.5768689392495739 -910000,0.574500233773398 -920000,0.6358785530072588 -930000,0.6742930532991575 -940000,0.6491075994673061 -950000,0.5865029841883946 -960000,0.5806987996523894 -970000,0.65720408915921 -980000,0.4970200086778287 -990000,0.646969621616869 -1000000,0.6112866866511526 +10000,0.33269677110964724 +20000,0.45382432727044364 +30000,0.46431251405519314 +40000,0.5267348968123076 +50000,0.5195965606672759 +60000,0.5319089597172068 +70000,0.5654214948990889 +80000,0.5399518486391308 +90000,0.5278532527459983 +100000,0.5759499068349316 +110000,0.6152561348598733 +120000,0.5729576010793631 +130000,0.6124275123614124 +140000,0.6054387263600012 +150000,0.6540162858832085 +160000,0.701614523095786 +170000,0.6892366910358865 +180000,0.6809502189142151 +190000,0.6721885381077043 +200000,0.6939550453795006 +210000,0.7278574467310526 +220000,0.6625069715152816 +230000,0.7346701598583303 +240000,0.703012772915866 +250000,0.7562455568718833 +260000,0.7166159372963474 +270000,0.6900119550028203 +280000,0.742513813023206 +290000,0.7557443841521618 +300000,0.7762112017273916 +310000,0.7759296177773255 +320000,0.8057432054901195 +330000,0.7979662243641343 +340000,0.7815883776531037 +350000,0.7828039403899794 +360000,0.7650412518995948 +370000,0.7769643853479286 +380000,0.7406173049273713 +390000,0.7508954098017067 +400000,0.7438916371611901 +410000,0.7938689830728456 +420000,0.8104860395939847 +430000,0.7809525684877131 +440000,0.799334356498232 +450000,0.8128424553261757 +460000,0.821910403326412 +470000,0.7435586064633011 +480000,0.761294888976078 +490000,0.794452669474466 +500000,0.8183157464479537 +510000,0.833566612514651 +520000,0.7827737392747998 +530000,0.7895007837290738 +540000,0.7667074995941221 +550000,0.7869012934540742 +560000,0.7812420014175553 +570000,0.8215414238064831 +580000,0.778760110247412 +590000,0.7614177850771177 +600000,0.7917318375105185 +610000,0.7980817820408962 +620000,0.8034044198012564 +630000,0.8070766170221015 +640000,0.796948467443318 +650000,0.792226718932678 +660000,0.7804511074669811 +670000,0.7833055978408578 +680000,0.7691631020669017 +690000,0.8161463572127575 +700000,0.761164942391501 +710000,0.7996185172281309 +720000,0.7734030070368354 +730000,0.8012591432401729 +740000,0.8071519158089003 +750000,0.7560732639490899 +760000,0.796186959149169 +770000,0.7408107274436011 +780000,0.7807007829623795 +790000,0.7496565699139428 +800000,0.7516715212151679 +810000,0.7965389908130621 +820000,0.7931628653784478 +830000,0.7618508700659052 +840000,0.7789316867801923 +850000,0.7918541054724477 +860000,0.7820034360819388 +870000,0.826542532494814 +880000,0.790552499180454 +890000,0.7609445937227786 +900000,0.7956557960579914 +910000,0.7868755565735985 +920000,0.7954668301033158 +930000,0.7763378504965743 +940000,0.7563796566400954 +950000,0.7827045287898848 +960000,0.7760888944043864 +970000,0.8139629264772441 +980000,0.7778566239095055 +990000,0.814793139276785 +1000000,0.7632579013589877 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/constraint_violation.log index 462647132..5191b3d7c 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/constraint_violation.log @@ -1,101 +1,101 @@ step,stat_eval/constraint_violation -10000,0.0 -20000,0.0 -30000,0.0 -40000,2.2 -50000,0.0 -60000,0.1 -70000,0.9 -80000,0.0 -90000,0.3 -100000,0.0 +10000,0.4 +20000,0.3 +30000,0.1 +40000,0.1 +50000,0.9 +60000,0.0 +70000,1.5 +80000,1.1 +90000,1.0 +100000,0.1 110000,0.2 -120000,0.1 -130000,0.0 -140000,1.7 -150000,0.2 -160000,0.0 -170000,0.0 -180000,0.0 -190000,0.3 -200000,0.1 -210000,0.0 -220000,0.9 +120000,0.2 +130000,1.3 +140000,0.1 +150000,1.4 +160000,0.2 +170000,0.3 +180000,0.3 +190000,1.7 +200000,0.2 +210000,0.3 +220000,0.2 230000,0.0 -240000,0.1 -250000,0.1 -260000,0.6 -270000,0.1 +240000,0.9 +250000,0.2 +260000,0.0 +270000,2.5 280000,0.3 -290000,0.2 -300000,0.3 -310000,0.1 +290000,0.3 +300000,0.1 +310000,0.0 320000,0.0 -330000,0.2 -340000,0.3 +330000,0.3 +340000,0.5 350000,0.1 360000,1.4 -370000,0.1 +370000,0.2 380000,0.2 -390000,1.3 -400000,0.0 -410000,0.1 -420000,0.0 -430000,0.2 -440000,0.2 -450000,1.5 -460000,0.7 -470000,0.0 -480000,0.0 -490000,1.5 -500000,0.1 -510000,0.0 +390000,1.5 +400000,0.1 +410000,0.0 +420000,1.3 +430000,1.7 +440000,0.3 +450000,0.0 +460000,0.0 +470000,0.1 +480000,1.6 +490000,0.3 +500000,0.0 +510000,0.6 520000,0.5 -530000,0.0 -540000,1.9 -550000,0.0 -560000,0.0 -570000,0.1 -580000,1.3 -590000,0.0 -600000,0.3 +530000,0.2 +540000,2.1 +550000,0.1 +560000,0.2 +570000,0.0 +580000,0.2 +590000,0.6 +600000,0.0 610000,0.0 -620000,0.3 +620000,0.1 630000,0.0 640000,0.0 -650000,0.6 -660000,0.5 -670000,0.1 +650000,0.0 +660000,1.9 +670000,0.5 680000,0.0 -690000,0.2 -700000,0.1 -710000,0.1 -720000,2.1 -730000,0.2 +690000,0.8 +700000,0.0 +710000,1.2 +720000,1.5 +730000,0.1 740000,0.0 -750000,0.4 -760000,0.3 -770000,0.0 -780000,0.3 -790000,1.4 -800000,0.6 +750000,0.1 +760000,0.1 +770000,0.2 +780000,1.5 +790000,0.1 +800000,0.0 810000,0.1 -820000,1.7 -830000,0.5 -840000,0.1 -850000,1.7 +820000,0.7 +830000,0.0 +840000,0.0 +850000,0.4 860000,0.0 -870000,0.4 +870000,1.3 880000,0.2 -890000,0.1 -900000,0.4 +890000,0.3 +900000,1.4 910000,0.1 -920000,0.3 -930000,0.2 +920000,1.4 +930000,0.0 940000,0.0 950000,0.0 960000,0.0 -970000,1.0 -980000,0.2 -990000,0.9 -1000000,0.7 +970000,1.7 +980000,0.0 +990000,0.4 +1000000,0.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/ep_length.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/ep_length.log index 6397d552f..07650d3f1 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/ep_length.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/ep_length.log @@ -1,101 +1,101 @@ step,stat_eval/ep_length -10000,250.0 -20000,250.0 -30000,250.0 +10000,225.9 +20000,201.9 +30000,225.3 40000,250.0 50000,250.0 -60000,225.2 -70000,201.9 +60000,250.0 +70000,225.4 80000,250.0 -90000,200.9 +90000,250.0 100000,250.0 -110000,200.3 -120000,225.1 -130000,250.0 +110000,201.3 +120000,250.0 +130000,177.5 140000,250.0 -150000,201.4 +150000,250.0 160000,250.0 -170000,250.0 -180000,250.0 -190000,178.2 +170000,225.8 +180000,200.9 +190000,250.0 200000,250.0 -210000,250.0 -220000,225.2 +210000,176.6 +220000,226.3 230000,250.0 -240000,225.8 -250000,250.0 +240000,225.1 +250000,202.0 260000,250.0 -270000,226.3 -280000,177.0 -290000,203.0 -300000,225.3 -310000,225.1 +270000,250.0 +280000,250.0 +290000,201.4 +300000,225.4 +310000,250.0 320000,250.0 -330000,250.0 +330000,201.9 340000,250.0 350000,250.0 -360000,225.7 -370000,226.6 -380000,200.8 -390000,225.1 -400000,250.0 +360000,250.0 +370000,202.2 +380000,225.9 +390000,250.0 +400000,225.6 410000,250.0 -420000,250.0 -430000,200.8 -440000,201.6 -450000,226.1 +420000,201.4 +430000,201.9 +440000,226.6 +450000,250.0 460000,250.0 -470000,250.0 -480000,250.0 +470000,226.2 +480000,201.2 490000,250.0 -500000,226.8 -510000,250.0 -520000,153.7 +500000,250.0 +510000,225.3 +520000,225.7 530000,250.0 -540000,225.9 -550000,250.0 -560000,250.0 -570000,226.7 -580000,250.0 -590000,250.0 -600000,225.3 +540000,226.3 +550000,225.3 +560000,201.6 +570000,250.0 +580000,202.5 +590000,225.3 +600000,250.0 610000,250.0 -620000,250.0 +620000,225.1 630000,250.0 640000,250.0 -650000,201.7 -660000,225.9 -670000,225.7 +650000,250.0 +660000,250.0 +670000,250.0 680000,250.0 -690000,202.0 -700000,227.2 -710000,226.2 +690000,250.0 +700000,250.0 +710000,250.0 720000,225.7 -730000,202.6 +730000,226.8 740000,250.0 -750000,250.0 -760000,176.9 -770000,250.0 -780000,250.0 -790000,250.0 -800000,225.8 -810000,225.7 -820000,225.5 -830000,200.4 -840000,225.4 -850000,226.1 +750000,225.2 +760000,225.6 +770000,201.3 +780000,225.1 +790000,225.7 +800000,250.0 +810000,250.0 +820000,250.0 +830000,250.0 +840000,250.0 +850000,151.6 860000,250.0 -870000,202.0 -880000,225.8 -890000,226.6 -900000,226.9 -910000,225.2 -920000,226.7 -930000,202.2 +870000,201.7 +880000,201.6 +890000,226.1 +900000,225.9 +910000,226.6 +920000,225.2 +930000,250.0 940000,250.0 950000,250.0 960000,250.0 -970000,250.0 -980000,225.2 -990000,250.0 -1000000,225.4 +970000,225.2 +980000,250.0 +990000,226.7 +1000000,250.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/ep_return.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/ep_return.log index 28b859a96..83c73edf9 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/ep_return.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/ep_return.log @@ -1,101 +1,101 @@ step,stat_eval/ep_return -10000,70.84859695809473 -20000,80.59717220112212 -30000,83.86906192038292 -40000,69.97888355594894 -50000,82.2388842506202 -60000,83.81835337834498 -70000,70.52707461302091 -80000,109.02328748191573 -90000,97.62989007325169 -100000,97.44281651546768 -110000,80.226438962058 -120000,105.07085814289312 -130000,124.79732860410316 -140000,127.12975807913759 -150000,95.03641010274417 -160000,123.97845561762003 -170000,127.05844420166348 -180000,141.99546099933622 -190000,97.72420849642386 -200000,126.79023077441448 -210000,135.8407865685552 -220000,126.15871627709744 -230000,147.55789412679422 -240000,122.80649234350233 -250000,133.50049500733869 -260000,148.0944179407292 -270000,122.22034857830226 -280000,96.28217852362879 -290000,100.40988123342824 -300000,121.15371475612073 -310000,131.3381045329332 -320000,135.5662642786056 -330000,142.87758158039014 -340000,131.93875076962772 -350000,169.34745526405612 -360000,143.65147226829234 -370000,134.42061935500004 -380000,126.24619416905321 -390000,141.58951284618826 -400000,140.37756432669545 -410000,154.53397430018455 -420000,161.90041760129162 -430000,109.39704248357097 -440000,115.85730296173097 -450000,137.5402433565857 -460000,170.9496787131727 -470000,152.8122624233379 -480000,147.45980795568804 -490000,163.31537536879773 -500000,141.39756901954286 -510000,146.14369803591396 -520000,103.29960371755058 -530000,144.57245370960356 -540000,147.34706024685573 -550000,153.5974157232909 -560000,156.69187945474806 -570000,141.04359063108555 -580000,164.02860840346463 -590000,165.43265148748782 -600000,151.38249110152597 -610000,156.49483812707166 -620000,148.66910431758552 -630000,155.96831207042112 -640000,178.09632544637 -650000,127.09494213986773 -660000,138.07512963608343 -670000,145.61320617660564 -680000,169.3965990922717 -690000,125.4125572502345 -700000,143.03997335965906 -710000,158.88285088746906 -720000,131.3645475296165 -730000,120.17817013327817 -740000,155.42001820098784 -750000,153.42251084655442 -760000,102.45056065979297 -770000,154.3156233039422 -780000,146.3631480850755 -790000,146.6491486793546 -800000,153.83839771106142 -810000,143.10686513801915 -820000,151.9160343076955 -830000,134.56272410005184 -840000,140.10621606101336 -850000,145.9985000718841 -860000,177.09819793393032 -870000,129.6141414800398 -880000,142.06679697574094 -890000,142.869163307291 -900000,146.74620468558913 -910000,151.75716297976413 -920000,142.66634910529643 -930000,116.57141925111553 -940000,154.77601990121317 -950000,150.57962845884035 -960000,157.37767001172898 -970000,170.62603996273617 -980000,134.0717195023228 -990000,174.63537272424043 -1000000,138.3654114040374 +10000,56.440345454822534 +20000,76.35837512428016 +30000,94.21569030062707 +40000,122.82009445625609 +50000,116.95581225345954 +60000,109.68867438818498 +70000,101.81050235211794 +80000,123.19226181999875 +90000,125.13575219496269 +100000,132.9233793841051 +110000,88.22474793718054 +120000,124.27259873734026 +130000,93.01073341036911 +140000,123.86816176706259 +150000,145.70397838767445 +160000,125.77141789316829 +170000,131.03349070515327 +180000,110.00518374483458 +190000,147.44767115998505 +200000,152.45385706172593 +210000,108.02243593904882 +220000,133.6284252752841 +230000,150.9146434097033 +240000,130.86821111948228 +250000,123.45892634536854 +260000,152.22173974911865 +270000,157.77538468517395 +280000,170.39850088183096 +290000,139.26744337938896 +300000,135.19478646361787 +310000,154.30218398383585 +320000,155.53870607889468 +330000,133.32406916208984 +340000,153.2281401660185 +350000,155.28128718432293 +360000,164.9026087226417 +370000,139.3913234906104 +380000,144.02226211672206 +390000,149.57541834045537 +400000,137.9090184304095 +410000,165.14173870658618 +420000,141.50076337054298 +430000,133.46043742856048 +440000,139.65256950103415 +450000,166.51577893404985 +460000,184.5536175427889 +470000,131.5328871205793 +480000,134.82708512463634 +490000,151.1231804628739 +500000,155.1533572413897 +510000,163.23779690215727 +520000,154.5021305232544 +530000,156.3914821069322 +540000,140.23734341876806 +550000,140.4919574832176 +560000,156.64086948859548 +570000,152.69093580303507 +580000,116.1244535508038 +590000,143.88025734178956 +600000,154.69177265741465 +610000,168.06817794001154 +620000,144.52552429319402 +630000,165.05988676503188 +640000,162.95476428963644 +650000,158.15972267941294 +660000,159.7402507617145 +670000,169.13778423281502 +680000,160.57237243037673 +690000,180.20806230131217 +700000,158.8146158023393 +710000,167.51374740633838 +720000,152.21679238541884 +730000,162.27417102670603 +740000,154.30999131909454 +750000,143.44973668247798 +760000,140.93055033554293 +770000,133.01124167750064 +780000,158.9774164551904 +790000,147.4050339149102 +800000,153.72783666534718 +810000,170.32679883353944 +820000,181.63079367223241 +830000,174.84443366395425 +840000,169.9156619234673 +850000,101.14038906589766 +860000,153.48790318906077 +870000,136.61717436325162 +880000,132.661464310914 +890000,156.26017336734532 +900000,149.57620046507472 +910000,154.49288681724147 +920000,171.00107036603526 +930000,173.05218136634784 +940000,165.36126982091582 +950000,161.06220764266135 +960000,176.82005228401303 +970000,148.62864143296045 +980000,181.44893541061356 +990000,170.97294182474715 +1000000,159.38214129051335 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/ep_reward.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/ep_reward.log index 7af2eac35..ed11e206a 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/ep_reward.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/ep_reward.log @@ -1,101 +1,101 @@ step,stat_eval/ep_reward -10000,0.2833943878323789 -20000,0.3223886888044885 -30000,0.3354762476815317 -40000,0.2799155342237958 -50000,0.32895553700248076 -60000,0.3361298584339981 -70000,0.2835785754894354 -80000,0.43609314992766296 -90000,0.39094205031937995 -100000,0.38977126606187074 -110000,0.32121191037121555 -120000,0.4209000726648342 -130000,0.4991893144164125 -140000,0.5085190323165503 -150000,0.3823805444048824 -160000,0.4959138224704801 -170000,0.5082337768066539 -180000,0.567981843997345 -190000,0.39609053166569497 -200000,0.5071609230976579 -210000,0.5433631462742208 -220000,0.5089344930170467 -230000,0.590231576507177 -240000,0.4927415738366963 -250000,0.5340019800293547 -260000,0.5923776717629167 -270000,0.48891874534098756 -280000,0.38636724326747174 -290000,0.40201618427659236 -300000,0.48491801484500374 -310000,0.5264194663754127 -320000,0.5422650571144223 -330000,0.5715103263215606 -340000,0.527755003078511 -350000,0.6773898210562246 -360000,0.5746333214976819 -370000,0.538030005550129 -380000,0.5111190714203023 -390000,0.5742199324236268 -400000,0.5615102573067817 -410000,0.6181358972007381 -420000,0.6476016704051664 -430000,0.44536630091832485 -440000,0.46484057355707 -450000,0.5501860702409342 -460000,0.6837987148526907 -470000,0.6112490496933516 -480000,0.5898392318227521 -490000,0.6532615014751909 -500000,0.5735606577623981 -510000,0.5845747921436557 -520000,0.43183605987523954 -530000,0.5782898148384141 -540000,0.5894227477998032 -550000,0.6143896628931637 -560000,0.6267675178189924 -570000,0.5652154993409743 -580000,0.6561144336138585 -590000,0.6617306059499513 -600000,0.6059307044141636 -610000,0.6259793525082866 -620000,0.5946764172703422 -630000,0.6238732482816844 -640000,0.71238530178548 -650000,0.5092800489602957 -660000,0.5533738287443352 -670000,0.5872215799877126 -680000,0.6775863963690868 -690000,0.5051938218326157 -700000,0.5722175230448697 -710000,0.6385926111791467 -720000,0.5263642401008211 -730000,0.48941092299202466 -740000,0.6216800728039514 -750000,0.6136900433862176 -760000,0.4140316327645725 -770000,0.6172624932157688 -780000,0.585452592340302 -790000,0.5865965947174183 -800000,0.6180391921049871 -810000,0.5735979063191977 -820000,0.6078498115411721 -830000,0.5676433984183052 -840000,0.5614263129407557 -850000,0.5873238534256708 -860000,0.7083927917357211 -870000,0.5196185782728706 -880000,0.5694951773961607 -890000,0.571510327570345 -900000,0.5872372021989875 -910000,0.6082687561122413 -920000,0.571869894216625 -930000,0.4669861052567012 -940000,0.6191040796048527 -950000,0.6023185138353615 -960000,0.6295106800469157 -970000,0.6825041598509446 -980000,0.5363620239696731 -990000,0.6985414908969618 -1000000,0.5535093162692427 +10000,0.23110724111900144 +20000,0.30616281489981173 +30000,0.3769915363204419 +40000,0.4912803778250243 +50000,0.4678232490138382 +60000,0.4387546975527399 +70000,0.40773753129030654 +80000,0.49276904727999493 +90000,0.5005430087798508 +100000,0.5316935175364204 +110000,0.3541115928715837 +120000,0.4970903949493611 +130000,0.3779567066711356 +140000,0.49547264706825034 +150000,0.5828159135506978 +160000,0.5030856715726733 +170000,0.5242785473222604 +180000,0.44043872416774654 +190000,0.5897906846399401 +200000,0.6098154282469037 +210000,0.43613361301325815 +220000,0.534960142083822 +230000,0.6036585736388133 +240000,0.5240879960644454 +250000,0.4942791279069542 +260000,0.6088869589964746 +270000,0.631101538740696 +280000,0.6815940035273237 +290000,0.559303178352014 +300000,0.5407959826829002 +310000,0.6172087359353434 +320000,0.6221548243155787 +330000,0.5370067898159285 +340000,0.6129125606640741 +350000,0.6211251487372917 +360000,0.6596104348905667 +370000,0.5618769743725162 +380000,0.5768387506256784 +390000,0.5983016733618215 +400000,0.5521101894365257 +410000,0.6605669548263446 +420000,0.5664937550352759 +430000,0.539231676288162 +440000,0.5587190967246116 +450000,0.6660631157361994 +460000,0.7382144701711557 +470000,0.5286496359244849 +480000,0.5412200027756613 +490000,0.6044927218514956 +500000,0.6206134289655588 +510000,0.6572604214546136 +520000,0.6194298222518132 +530000,0.6255659284277288 +540000,0.5609863390148003 +550000,0.5623477024102522 +560000,0.6273934706704701 +570000,0.6107637432121402 +580000,0.46488693526084457 +590000,0.5758233308881322 +600000,0.6187670906296584 +610000,0.6722727117600462 +620000,0.5791746442980219 +630000,0.6602395470601274 +640000,0.6518190571585458 +650000,0.6326388907176519 +660000,0.638961003046858 +670000,0.6765511369312601 +680000,0.642289489721507 +690000,0.7208322492052487 +700000,0.6352584632093572 +710000,0.6700549896253535 +720000,0.6088941121911333 +730000,0.6494210965615393 +740000,0.6172399652763781 +750000,0.5799288790002556 +760000,0.5637397447038521 +770000,0.5363202465398225 +780000,0.6437033566026609 +790000,0.5923959834688175 +800000,0.6149113466613887 +810000,0.6813071953341578 +820000,0.7265231746889298 +830000,0.699377734655817 +840000,0.6796626476938692 +850000,0.4132131247862688 +860000,0.613951612756243 +870000,0.5472080639994361 +880000,0.5320590458774962 +890000,0.6250657953698578 +900000,0.5985517540547376 +910000,0.6189280607515821 +920000,0.6846857379945038 +930000,0.6922087254653915 +940000,0.6614450792836634 +950000,0.6442488305706454 +960000,0.7072802091360522 +970000,0.5951639604794478 +980000,0.7257957416424542 +990000,0.6918270720413886 +1000000,0.6375285651620535 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/mse.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/mse.log index 4d553dec5..a44af3131 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/mse.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/logs/stat_eval/mse.log @@ -1,101 +1,101 @@ step,stat_eval/mse -10000,493.23066045768826 -20000,425.7894329532661 -30000,439.95321656825837 -40000,437.37228883678733 -50000,366.01111454494617 -60000,384.90853327709993 -70000,348.76267241855027 -80000,350.2474832426566 -90000,251.0634692962883 -100000,369.30995206588614 -110000,251.88891694743725 -120000,293.59402544941196 -130000,312.19972298991195 -140000,287.48458572530586 -150000,279.2145814281905 -160000,312.8363436270232 -170000,285.5401700148386 -180000,243.72441851411585 -190000,167.19730634337603 -200000,348.8529199694383 -210000,262.30385560868046 -220000,235.49888725489959 -230000,192.40029808420678 -240000,224.76448498678843 -250000,316.93975423293705 -260000,227.71555670398828 -270000,276.6837752646369 -280000,223.2689023357362 -290000,342.01598552571886 -300000,313.98095428697576 -310000,263.1069785255941 -320000,329.1852496366378 -330000,277.7736204267825 -340000,380.195048302146 -350000,160.65532550461108 -360000,163.12466938306878 -370000,201.05085004797905 -380000,201.20958161134664 -390000,173.7644471253671 -400000,295.87713274148086 -410000,220.14965087251394 -420000,238.70294367491184 -430000,223.93596066339387 -440000,269.84952233161005 -450000,243.6608360636185 -460000,163.56355195431505 -470000,205.91685924364856 -480000,291.6610595557451 -490000,202.448352352257 -500000,211.9021056839333 -510000,268.1835344833734 -520000,107.4462688358943 -530000,283.55753992263897 -540000,154.2889188392111 -550000,256.38126725247196 -560000,222.2171018704783 -570000,231.8855561972645 -580000,217.54353419245336 -590000,203.54887294511963 -600000,177.58967445129448 -610000,291.25994057819645 -620000,253.43540802001462 -630000,221.46392876885474 -640000,116.21158213773246 -650000,163.20981434679555 -660000,219.1322318143296 -670000,202.863897838454 -680000,188.3487481714439 -690000,210.76308409829508 -700000,193.52864784425418 -710000,114.73409128910028 -720000,215.4018371394803 -730000,244.23471728390422 -740000,262.76215325216856 -750000,268.4918593094433 -760000,226.95508964803525 -770000,216.0737042160862 -780000,282.4209977612663 -790000,334.4482942860037 -800000,203.29928729628696 -810000,217.23053400645944 -820000,188.8133686975171 -830000,141.27978142488314 -840000,234.3270856135391 -850000,195.4862091193363 -860000,147.1516732425842 -870000,203.1588921259981 -880000,235.15751460784927 -890000,227.95187957583403 -900000,209.05716880790357 -910000,200.5497865998052 -920000,203.77528165015283 -930000,250.1989572832556 -940000,282.20150606036515 -950000,297.82756039824596 -960000,227.35106137827006 -970000,163.32987416042988 -980000,220.21180479377668 -990000,140.83650928439636 -1000000,226.33557766947803 +10000,465.05769574189736 +20000,320.80999696465983 +30000,335.9847224650206 +40000,275.35875161038285 +50000,288.9955129399633 +60000,346.3815936146224 +70000,372.6451118626167 +80000,228.3613559304512 +90000,242.0747172954732 +100000,296.40635434314817 +110000,351.3753155853386 +120000,300.39583922049206 +130000,175.5766420095602 +140000,302.26382998354177 +150000,233.56130732061462 +160000,304.28073834247414 +170000,219.28891298532417 +180000,226.79496974455833 +190000,260.3693510060508 +200000,243.32150517584014 +210000,192.14119924754172 +220000,239.28845964815778 +230000,251.65601106234263 +240000,241.80720658554407 +250000,229.10268931395225 +260000,250.64431099109373 +270000,245.1911066906768 +280000,215.42786019827298 +290000,145.04783891438427 +300000,286.3271048162506 +310000,291.1675874538899 +320000,297.3756750773912 +330000,189.3365286898682 +340000,294.2815293456239 +350000,330.75683437475635 +360000,233.93541110891198 +370000,157.72845481660255 +380000,205.2410107553359 +390000,307.4667642817356 +400000,233.7114387930112 +410000,231.6624526847075 +420000,134.1545201021237 +430000,177.75598140837934 +440000,279.3959313433555 +450000,203.97303191239945 +460000,118.65610242551163 +470000,244.3612767088673 +480000,176.43846061692884 +490000,368.29476958528653 +500000,256.3147059247477 +510000,131.94036352789396 +520000,206.14489565136932 +530000,285.82931144202763 +540000,254.79104721304284 +550000,266.75109739855463 +560000,104.14322444935706 +570000,284.8573781156915 +580000,302.5280142253306 +590000,265.19761932671383 +600000,283.3215945291977 +610000,241.2797217076887 +620000,213.60515558684534 +630000,223.53668801727736 +640000,234.0015872765447 +650000,305.91493273860556 +660000,256.6335041900071 +670000,231.69120769556912 +680000,293.1271803426938 +690000,156.12960390445915 +700000,228.47943913688928 +710000,247.66307278264736 +720000,195.725414861577 +730000,158.2137050116807 +740000,252.2080714481824 +750000,226.21077397379472 +760000,243.34770042250543 +770000,139.70282733904332 +780000,195.03669927590855 +790000,192.70088570947541 +800000,293.82023259996606 +810000,188.8998079076532 +820000,139.45490300894534 +830000,213.0619829726842 +840000,200.48102647898577 +850000,105.34228713339078 +860000,336.05357744689917 +870000,206.18865069418626 +880000,194.4411512648196 +890000,197.84075761227456 +900000,222.26514258622052 +910000,216.73183407746382 +920000,102.52060221608447 +930000,177.51353841111612 +940000,238.70367812009832 +950000,303.9442956733616 +960000,189.2087512768655 +970000,237.06458295311762 +980000,160.92643565878956 +990000,98.0095243755537 +1000000,337.26396395936536 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/model_best.pt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/model_best.pt index 098627303..13adcf7af 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/model_best.pt and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/model_best.pt differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/model_latest.pt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/model_latest.pt index 9a8003020..d381eb88b 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/model_latest.pt and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/model_latest.pt differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-loss-approx_kl.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-loss-approx_kl.jpg index 19b605a37..0ede7a16e 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-loss-approx_kl.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-loss-approx_kl.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-loss-entropy_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-loss-entropy_loss.jpg index 3af8b8dae..e895ebc21 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-loss-entropy_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-loss-entropy_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-loss-policy_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-loss-policy_loss.jpg index 743bec23b..1b64e4281 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-loss-policy_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-loss-policy_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-loss-value_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-loss-value_loss.jpg index f56bb050f..7291e0c95 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-loss-value_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-loss-value_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-constraint_violation.jpg index b8d399244..0a282686e 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-ep_constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-ep_constraint_violation.jpg index 3c4946844..fe3acc68e 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-ep_constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-ep_constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-ep_length.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-ep_length.jpg index e54547f3f..d51a72069 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-ep_length.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-ep_length.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-ep_return.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-ep_return.jpg index 4938de90f..76f9069ba 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-ep_return.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-ep_return.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-ep_reward.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-ep_reward.jpg index e5dd74061..536bc1d0d 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-ep_reward.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat-ep_reward.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-constraint_violation.jpg index 5aba42dbf..72df91e11 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-ep_length.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-ep_length.jpg index 712058527..23d58ef41 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-ep_length.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-ep_length.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-ep_return.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-ep_return.jpg index c0adc844d..e2dc62d98 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-ep_return.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-ep_return.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-ep_reward.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-ep_reward.jpg index 4dc943cb6..057ad0a7c 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-ep_reward.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-ep_reward.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-mse.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-mse.jpg index 211ef19ed..3b199dc56 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-mse.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/plots/-stat_eval-mse.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/std_out.txt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/std_out.txt index 6443864ce..45b6c581b 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/std_out.txt +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/std_out.txt @@ -1,2601 +1,2601 @@ -2023-10-19 14:52:03,092 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 70.849 +/- 24.113 -2023-10-19 14:52:03,107 : +2023-10-27 16:43:47,502 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 56.440 +/- 33.602 +2023-10-27 16:43:47,516 : -------------------------------------- | loss/ | | -| approx_kl | 0.0327 | -| entropy_loss | -3.7 | -| policy_loss | -0.00628 | -| value_loss | 9.37 | +| approx_kl | 0.0244 | +| entropy_loss | -3.75 | +| policy_loss | -0.0155 | +| value_loss | 19.8 | | stat/ | | -| constraint_violation | 46 | -| ep_constraint_vio... | 1.5 | -| ep_length | 226 | -| ep_return | 46.6 | -| ep_reward | 0.187 | -| stat_eval/ | | | constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 70.8 | -| ep_reward | 0.283 | -| mse | 493 | +| ep_return | 83.2 | +| ep_reward | 0.333 | +| stat_eval/ | | +| constraint_violation | 0.4 | +| ep_length | 226 | +| ep_return | 56.4 | +| ep_reward | 0.231 | +| mse | 465 | | time/ | | | progress | 0.01 | | step | 1e+04 | -| step_time | 11.6 | +| step_time | 12.8 | -------------------------------------- -2023-10-19 14:54:21,786 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 80.597 +/- 28.889 -2023-10-19 14:54:21,812 : +2023-10-27 16:46:35,365 : Eval | ep_lengths 201.90 +/- 96.23 | ep_return 76.358 +/- 46.505 +2023-10-27 16:46:35,384 : -------------------------------------- | loss/ | | -| approx_kl | 0.0218 | -| entropy_loss | -3.78 | -| policy_loss | -0.0171 | -| value_loss | 18.2 | +| approx_kl | 0.0209 | +| entropy_loss | -3.81 | +| policy_loss | -0.0157 | +| value_loss | 22.4 | | stat/ | | -| constraint_violation | 47 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 75.2 | -| ep_reward | 0.301 | +| ep_return | 113 | +| ep_reward | 0.454 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 80.6 | -| ep_reward | 0.322 | -| mse | 426 | +| constraint_violation | 0.3 | +| ep_length | 202 | +| ep_return | 76.4 | +| ep_reward | 0.306 | +| mse | 321 | | time/ | | | progress | 0.02 | | step | 2e+04 | -| step_time | 11.5 | +| step_time | 13.5 | -------------------------------------- -2023-10-19 14:56:41,753 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 83.869 +/- 20.480 -2023-10-19 14:56:41,761 : +2023-10-27 16:49:24,705 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 94.216 +/- 37.282 +2023-10-27 16:49:24,713 : -------------------------------------- | loss/ | | -| approx_kl | 0.0232 | -| entropy_loss | -3.72 | -| policy_loss | -0.0102 | -| value_loss | 8.4 | +| approx_kl | 0.0189 | +| entropy_loss | -3.77 | +| policy_loss | -0.00772 | +| value_loss | 6.99 | | stat/ | | -| constraint_violation | 50 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 64.5 | -| ep_reward | 0.258 | -| stat_eval/ | | | constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 83.9 | -| ep_reward | 0.335 | -| mse | 440 | +| ep_return | 116 | +| ep_reward | 0.464 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 94.2 | +| ep_reward | 0.377 | +| mse | 336 | | time/ | | | progress | 0.03 | | step | 3e+04 | -| step_time | 11.4 | +| step_time | 14.4 | -------------------------------------- -2023-10-19 14:59:01,042 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 69.979 +/- 16.324 -2023-10-19 14:59:01,044 : +2023-10-27 16:52:15,107 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 122.820 +/- 19.764 +2023-10-27 16:52:15,115 : -------------------------------------- | loss/ | | -| approx_kl | 0.0241 | -| entropy_loss | -3.77 | -| policy_loss | -0.0173 | -| value_loss | 4.25 | +| approx_kl | 0.0227 | +| entropy_loss | -3.81 | +| policy_loss | -0.0168 | +| value_loss | 5.09 | | stat/ | | -| constraint_violation | 73 | -| ep_constraint_vio... | 0.7 | -| ep_length | 203 | -| ep_return | 60.5 | -| ep_reward | 0.242 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 132 | +| ep_reward | 0.527 | | stat_eval/ | | -| constraint_violation | 2.2 | +| constraint_violation | 0.1 | | ep_length | 250 | -| ep_return | 70 | -| ep_reward | 0.28 | -| mse | 437 | +| ep_return | 123 | +| ep_reward | 0.491 | +| mse | 275 | | time/ | | | progress | 0.04 | | step | 4e+04 | -| step_time | 11.7 | +| step_time | 13.7 | -------------------------------------- -2023-10-19 15:01:21,739 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 82.239 +/- 20.266 -2023-10-19 15:01:21,741 : +2023-10-27 16:55:03,869 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 116.956 +/- 23.304 +2023-10-27 16:55:03,882 : -------------------------------------- | loss/ | | -| approx_kl | 0.0286 | -| entropy_loss | -3.74 | -| policy_loss | -0.0109 | -| value_loss | 6.91 | +| approx_kl | 0.0297 | +| entropy_loss | -3.94 | +| policy_loss | -0.00182 | +| value_loss | 5.59 | | stat/ | | -| constraint_violation | 112 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 98.3 | -| ep_reward | 0.393 | +| ep_return | 130 | +| ep_reward | 0.52 | | stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 0.9 | | ep_length | 250 | -| ep_return | 82.2 | -| ep_reward | 0.329 | -| mse | 366 | +| ep_return | 117 | +| ep_reward | 0.468 | +| mse | 289 | | time/ | | | progress | 0.05 | | step | 5e+04 | -| step_time | 11.5 | +| step_time | 13 | -------------------------------------- -2023-10-19 15:03:40,951 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 83.818 +/- 38.388 -2023-10-19 15:03:40,952 : +2023-10-27 16:57:56,165 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 109.689 +/- 25.672 +2023-10-27 16:57:56,178 : -------------------------------------- | loss/ | | -| approx_kl | 0.0209 | -| entropy_loss | -3.83 | -| policy_loss | -0.0134 | -| value_loss | 5.2 | +| approx_kl | 0.012 | +| entropy_loss | -3.87 | +| policy_loss | -0.0123 | +| value_loss | 1.7 | | stat/ | | -| constraint_violation | 146 | -| ep_constraint_vio... | 0.3 | -| ep_length | 200 | -| ep_return | 81.7 | -| ep_reward | 0.329 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 133 | +| ep_reward | 0.532 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 83.8 | -| ep_reward | 0.336 | -| mse | 385 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 110 | +| ep_reward | 0.439 | +| mse | 346 | | time/ | | | progress | 0.06 | | step | 6e+04 | -| step_time | 11.4 | +| step_time | 15.9 | -------------------------------------- -2023-10-19 15:05:59,274 : Eval | ep_lengths 201.90 +/- 96.23 | ep_return 70.527 +/- 47.689 -2023-10-19 15:05:59,275 : +2023-10-27 17:00:47,021 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 101.811 +/- 42.220 +2023-10-27 17:00:47,022 : -------------------------------------- | loss/ | | -| approx_kl | 0.0112 | -| entropy_loss | -3.89 | -| policy_loss | -0.0214 | -| value_loss | 6.31 | +| approx_kl | 0.0166 | +| entropy_loss | -3.87 | +| policy_loss | -0.0094 | +| value_loss | 2.59 | | stat/ | | -| constraint_violation | 207 | -| ep_constraint_vio... | 1.3 | -| ep_length | 226 | -| ep_return | 81.3 | -| ep_reward | 0.326 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 141 | +| ep_reward | 0.565 | | stat_eval/ | | -| constraint_violation | 0.9 | -| ep_length | 202 | -| ep_return | 70.5 | -| ep_reward | 0.284 | -| mse | 349 | +| constraint_violation | 1.5 | +| ep_length | 225 | +| ep_return | 102 | +| ep_reward | 0.408 | +| mse | 373 | | time/ | | | progress | 0.07 | | step | 7e+04 | -| step_time | 11.6 | +| step_time | 15.8 | -------------------------------------- -2023-10-19 15:08:21,744 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 109.023 +/- 18.526 -2023-10-19 15:08:21,755 : +2023-10-27 17:03:43,917 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 123.192 +/- 21.805 +2023-10-27 17:03:43,937 : -------------------------------------- | loss/ | | -| approx_kl | 0.0295 | -| entropy_loss | -3.88 | -| policy_loss | -0.0107 | -| value_loss | 7.14 | +| approx_kl | 0.0105 | +| entropy_loss | -4.04 | +| policy_loss | -0.0115 | +| value_loss | 2.1 | | stat/ | | -| constraint_violation | 246 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 91.5 | -| ep_reward | 0.366 | +| ep_return | 135 | +| ep_reward | 0.54 | | stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 1.1 | | ep_length | 250 | -| ep_return | 109 | -| ep_reward | 0.436 | -| mse | 350 | +| ep_return | 123 | +| ep_reward | 0.493 | +| mse | 228 | | time/ | | | progress | 0.08 | | step | 8e+04 | -| step_time | 11.8 | +| step_time | 14.9 | -------------------------------------- -2023-10-19 15:10:38,227 : Eval | ep_lengths 200.90 +/- 98.20 | ep_return 97.630 +/- 53.804 -2023-10-19 15:10:38,229 : +2023-10-27 17:06:40,123 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 125.136 +/- 13.800 +2023-10-27 17:06:40,131 : -------------------------------------- | loss/ | | -| approx_kl | 0.028 | -| entropy_loss | -3.84 | -| policy_loss | -0.0118 | -| value_loss | 3.06 | +| approx_kl | 0.0172 | +| entropy_loss | -4.01 | +| policy_loss | -0.0153 | +| value_loss | 0.917 | | stat/ | | -| constraint_violation | 278 | -| ep_constraint_vio... | 1.5 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 110 | -| ep_reward | 0.442 | +| ep_return | 132 | +| ep_reward | 0.528 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 201 | -| ep_return | 97.6 | -| ep_reward | 0.391 | -| mse | 251 | +| constraint_violation | 1 | +| ep_length | 250 | +| ep_return | 125 | +| ep_reward | 0.501 | +| mse | 242 | | time/ | | | progress | 0.09 | | step | 9e+04 | -| step_time | 11.4 | +| step_time | 15.5 | -------------------------------------- -2023-10-19 15:13:00,836 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 97.443 +/- 27.688 -2023-10-19 15:13:00,838 : +2023-10-27 17:09:36,355 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 132.923 +/- 27.658 +2023-10-27 17:09:36,363 : -------------------------------------- | loss/ | | -| approx_kl | 0.019 | -| entropy_loss | -3.85 | -| policy_loss | -0.0182 | -| value_loss | 4.04 | +| approx_kl | 0.0266 | +| entropy_loss | -3.98 | +| policy_loss | -0.0103 | +| value_loss | 3.62 | | stat/ | | -| constraint_violation | 301 | -| ep_constraint_vio... | 0.7 | -| ep_length | 226 | -| ep_return | 89.7 | -| ep_reward | 0.359 | -| stat_eval/ | | | constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 97.4 | -| ep_reward | 0.39 | -| mse | 369 | +| ep_return | 144 | +| ep_reward | 0.576 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 250 | +| ep_return | 133 | +| ep_reward | 0.532 | +| mse | 296 | | time/ | | | progress | 0.1 | | step | 1e+05 | -| step_time | 11.9 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 15:15:17,184 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 80.226 +/- 43.804 -2023-10-19 15:15:17,185 : +2023-10-27 17:12:20,996 : Eval | ep_lengths 201.30 +/- 97.42 | ep_return 88.225 +/- 44.908 +2023-10-27 17:12:21,009 : -------------------------------------- | loss/ | | -| approx_kl | 0.0391 | -| entropy_loss | -3.89 | -| policy_loss | -0.0185 | -| value_loss | 1.9 | +| approx_kl | 0.0277 | +| entropy_loss | -3.98 | +| policy_loss | -0.00924 | +| value_loss | 2.58 | | stat/ | | -| constraint_violation | 343 | -| ep_constraint_vio... | 1.4 | -| ep_length | 226 | -| ep_return | 79.6 | -| ep_reward | 0.319 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 154 | +| ep_reward | 0.615 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 80.2 | -| ep_reward | 0.321 | -| mse | 252 | +| ep_length | 201 | +| ep_return | 88.2 | +| ep_reward | 0.354 | +| mse | 351 | | time/ | | | progress | 0.11 | | step | 1.1e+05 | -| step_time | 11.6 | +| step_time | 13.3 | -------------------------------------- -2023-10-19 15:17:32,993 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 105.071 +/- 46.697 -2023-10-19 15:17:33,013 : +2023-10-27 17:15:12,827 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 124.273 +/- 33.066 +2023-10-27 17:15:12,840 : -------------------------------------- | loss/ | | -| approx_kl | 0.0273 | -| entropy_loss | -3.99 | -| policy_loss | -0.00877 | -| value_loss | 2.27 | +| approx_kl | 0.0255 | +| entropy_loss | -3.98 | +| policy_loss | -0.00499 | +| value_loss | 2.36 | | stat/ | | -| constraint_violation | 358 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 95.8 | -| ep_reward | 0.387 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 143 | +| ep_reward | 0.573 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 105 | -| ep_reward | 0.421 | -| mse | 294 | +| constraint_violation | 0.2 | +| ep_length | 250 | +| ep_return | 124 | +| ep_reward | 0.497 | +| mse | 300 | | time/ | | | progress | 0.12 | | step | 1.2e+05 | -| step_time | 10.5 | +| step_time | 15.2 | -------------------------------------- -2023-10-19 15:19:51,867 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 124.797 +/- 26.567 -2023-10-19 15:19:51,878 : +2023-10-27 17:17:51,997 : Eval | ep_lengths 177.50 +/- 110.77 | ep_return 93.011 +/- 63.806 +2023-10-27 17:17:51,999 : -------------------------------------- | loss/ | | -| approx_kl | 0.0321 | -| entropy_loss | -3.99 | -| policy_loss | -0.00998 | -| value_loss | 1.85 | +| approx_kl | 0.0323 | +| entropy_loss | -4.05 | +| policy_loss | -0.0128 | +| value_loss | 1.16 | | stat/ | | -| constraint_violation | 380 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 118 | -| ep_reward | 0.473 | +| ep_return | 153 | +| ep_reward | 0.612 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 125 | -| ep_reward | 0.499 | -| mse | 312 | +| constraint_violation | 1.3 | +| ep_length | 178 | +| ep_return | 93 | +| ep_reward | 0.378 | +| mse | 176 | | time/ | | | progress | 0.13 | | step | 1.3e+05 | -| step_time | 11.2 | +| step_time | 13.1 | -------------------------------------- -2023-10-19 15:22:11,430 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 127.130 +/- 18.922 -2023-10-19 15:22:11,438 : +2023-10-27 17:20:43,020 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 123.868 +/- 27.976 +2023-10-27 17:20:43,022 : -------------------------------------- | loss/ | | -| approx_kl | 0.0391 | -| entropy_loss | -4 | -| policy_loss | -0.0164 | -| value_loss | 0.788 | +| approx_kl | 0.0261 | +| entropy_loss | -4.08 | +| policy_loss | -0.00905 | +| value_loss | 1.32 | | stat/ | | -| constraint_violation | 399 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 125 | -| ep_reward | 0.502 | +| ep_return | 151 | +| ep_reward | 0.605 | | stat_eval/ | | -| constraint_violation | 1.7 | +| constraint_violation | 0.1 | | ep_length | 250 | -| ep_return | 127 | -| ep_reward | 0.509 | -| mse | 287 | +| ep_return | 124 | +| ep_reward | 0.495 | +| mse | 302 | | time/ | | | progress | 0.14 | | step | 1.4e+05 | -| step_time | 11.2 | +| step_time | 14.2 | -------------------------------------- -2023-10-19 15:24:25,814 : Eval | ep_lengths 201.40 +/- 97.21 | ep_return 95.036 +/- 50.302 -2023-10-19 15:24:25,815 : +2023-10-27 17:23:33,252 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 145.704 +/- 28.622 +2023-10-27 17:23:33,260 : -------------------------------------- | loss/ | | -| approx_kl | 0.0252 | -| entropy_loss | -4.04 | -| policy_loss | -0.0017 | -| value_loss | 1.07 | +| approx_kl | 0.0313 | +| entropy_loss | -4.07 | +| policy_loss | -0.00767 | +| value_loss | 2.16 | | stat/ | | -| constraint_violation | 433 | -| ep_constraint_vio... | 1.4 | -| ep_length | 227 | -| ep_return | 120 | -| ep_reward | 0.48 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 164 | +| ep_reward | 0.654 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 95 | -| ep_reward | 0.382 | -| mse | 279 | +| constraint_violation | 1.4 | +| ep_length | 250 | +| ep_return | 146 | +| ep_reward | 0.583 | +| mse | 234 | | time/ | | | progress | 0.15 | | step | 1.5e+05 | -| step_time | 11.3 | +| step_time | 13 | -------------------------------------- -2023-10-19 15:26:43,661 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 123.978 +/- 22.474 -2023-10-19 15:26:43,663 : +2023-10-27 17:26:17,841 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 125.771 +/- 24.231 +2023-10-27 17:26:17,842 : -------------------------------------- | loss/ | | -| approx_kl | 0.0308 | -| entropy_loss | -4.03 | -| policy_loss | -0.0127 | -| value_loss | 1.34 | +| approx_kl | 0.0291 | +| entropy_loss | -4.14 | +| policy_loss | -0.00616 | +| value_loss | 1.79 | | stat/ | | -| constraint_violation | 473 | -| ep_constraint_vio... | 0.1 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 124 | -| ep_reward | 0.496 | +| ep_return | 175 | +| ep_reward | 0.702 | | stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 0.2 | | ep_length | 250 | -| ep_return | 124 | -| ep_reward | 0.496 | -| mse | 313 | +| ep_return | 126 | +| ep_reward | 0.503 | +| mse | 304 | | time/ | | | progress | 0.16 | | step | 1.6e+05 | -| step_time | 10.9 | +| step_time | 15.4 | -------------------------------------- -2023-10-19 15:28:58,336 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 127.058 +/- 23.023 -2023-10-19 15:28:58,360 : +2023-10-27 17:29:08,370 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 131.033 +/- 60.489 +2023-10-27 17:29:08,372 : -------------------------------------- | loss/ | | -| approx_kl | 0.024 | -| entropy_loss | -4.06 | -| policy_loss | -0.0048 | -| value_loss | 1.2 | +| approx_kl | 0.0229 | +| entropy_loss | -4.13 | +| policy_loss | -0.00605 | +| value_loss | 0.991 | | stat/ | | -| constraint_violation | 515 | -| ep_constraint_vio... | 3.4 | -| ep_length | 250 | -| ep_return | 124 | -| ep_reward | 0.496 | -| stat_eval/ | | | constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 127 | -| ep_reward | 0.508 | -| mse | 286 | +| ep_return | 172 | +| ep_reward | 0.689 | +| stat_eval/ | | +| constraint_violation | 0.3 | +| ep_length | 226 | +| ep_return | 131 | +| ep_reward | 0.524 | +| mse | 219 | | time/ | | | progress | 0.17 | | step | 1.7e+05 | -| step_time | 11 | +| step_time | 13.7 | -------------------------------------- -2023-10-19 15:31:12,139 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 141.995 +/- 24.300 -2023-10-19 15:31:12,149 : +2023-10-27 17:31:57,456 : Eval | ep_lengths 200.90 +/- 98.20 | ep_return 110.005 +/- 63.815 +2023-10-27 17:31:57,458 : -------------------------------------- | loss/ | | -| approx_kl | 0.0302 | +| approx_kl | 0.0182 | | entropy_loss | -4.17 | -| policy_loss | -0.014 | -| value_loss | 1.43 | +| policy_loss | -0.0111 | +| value_loss | 0.49 | | stat/ | | -| constraint_violation | 522 | -| ep_constraint_vio... | 0.6 | -| ep_length | 225 | -| ep_return | 125 | -| ep_reward | 0.5 | -| stat_eval/ | | | constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 142 | -| ep_reward | 0.568 | -| mse | 244 | +| ep_return | 170 | +| ep_reward | 0.681 | +| stat_eval/ | | +| constraint_violation | 0.3 | +| ep_length | 201 | +| ep_return | 110 | +| ep_reward | 0.44 | +| mse | 227 | | time/ | | | progress | 0.18 | | step | 1.8e+05 | -| step_time | 10.8 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 15:33:17,919 : Eval | ep_lengths 178.20 +/- 109.68 | ep_return 97.724 +/- 66.257 -2023-10-19 15:33:17,920 : +2023-10-27 17:34:50,815 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 147.448 +/- 28.526 +2023-10-27 17:34:50,824 : -------------------------------------- | loss/ | | -| approx_kl | 0.0202 | -| entropy_loss | -4.15 | -| policy_loss | -0.0245 | -| value_loss | 1.06 | +| approx_kl | 0.024 | +| entropy_loss | -4.21 | +| policy_loss | -0.0122 | +| value_loss | 0.557 | | stat/ | | -| constraint_violation | 548 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 124 | -| ep_reward | 0.496 | +| ep_return | 168 | +| ep_reward | 0.672 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 178 | -| ep_return | 97.7 | -| ep_reward | 0.396 | -| mse | 167 | +| constraint_violation | 1.7 | +| ep_length | 250 | +| ep_return | 147 | +| ep_reward | 0.59 | +| mse | 260 | | time/ | | | progress | 0.19 | | step | 1.9e+05 | -| step_time | 10.5 | +| step_time | 15.5 | -------------------------------------- -2023-10-19 15:35:30,790 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 126.790 +/- 19.789 -2023-10-19 15:35:30,792 : +2023-10-27 17:37:41,907 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.454 +/- 27.749 +2023-10-27 17:37:41,915 : -------------------------------------- | loss/ | | -| approx_kl | 0.0216 | -| entropy_loss | -4.18 | -| policy_loss | -0.0156 | -| value_loss | 0.713 | +| approx_kl | 0.0257 | +| entropy_loss | -4.23 | +| policy_loss | -0.015 | +| value_loss | 1.25 | | stat/ | | -| constraint_violation | 569 | -| ep_constraint_vio... | 1 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 135 | -| ep_reward | 0.539 | +| ep_return | 173 | +| ep_reward | 0.694 | | stat_eval/ | | -| constraint_violation | 0.1 | +| constraint_violation | 0.2 | | ep_length | 250 | -| ep_return | 127 | -| ep_reward | 0.507 | -| mse | 349 | +| ep_return | 152 | +| ep_reward | 0.61 | +| mse | 243 | | time/ | | | progress | 0.2 | | step | 2e+05 | -| step_time | 11 | +| step_time | 14.4 | -------------------------------------- -2023-10-19 15:37:44,142 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 135.841 +/- 20.932 -2023-10-19 15:37:44,164 : +2023-10-27 17:40:27,262 : Eval | ep_lengths 176.60 +/- 112.16 | ep_return 108.022 +/- 74.830 +2023-10-27 17:40:27,263 : -------------------------------------- | loss/ | | -| approx_kl | 0.0218 | -| entropy_loss | -4.24 | -| policy_loss | -0.0091 | -| value_loss | 1.33 | +| approx_kl | 0.0325 | +| entropy_loss | -4.23 | +| policy_loss | -0.0107 | +| value_loss | 0.489 | | stat/ | | -| constraint_violation | 622 | -| ep_constraint_vio... | 0.6 | -| ep_length | 226 | -| ep_return | 110 | -| ep_reward | 0.441 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 1 | +| ep_constraint_vio... | 0.1 | | ep_length | 250 | -| ep_return | 136 | -| ep_reward | 0.543 | -| mse | 262 | +| ep_return | 182 | +| ep_reward | 0.728 | +| stat_eval/ | | +| constraint_violation | 0.3 | +| ep_length | 177 | +| ep_return | 108 | +| ep_reward | 0.436 | +| mse | 192 | | time/ | | | progress | 0.21 | | step | 2.1e+05 | -| step_time | 10.9 | +| step_time | 14.6 | -------------------------------------- -2023-10-19 15:39:54,731 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 126.159 +/- 48.397 -2023-10-19 15:39:54,733 : +2023-10-27 17:43:19,840 : Eval | ep_lengths 226.30 +/- 71.10 | ep_return 133.628 +/- 52.029 +2023-10-27 17:43:19,842 : -------------------------------------- | loss/ | | -| approx_kl | 0.0212 | -| entropy_loss | -4.31 | -| policy_loss | -0.0148 | -| value_loss | 2.58 | +| approx_kl | 0.0252 | +| entropy_loss | -4.26 | +| policy_loss | -0.0174 | +| value_loss | 0.634 | | stat/ | | -| constraint_violation | 634 | -| ep_constraint_vio... | 0.1 | -| ep_length | 227 | -| ep_return | 112 | -| ep_reward | 0.448 | +| constraint_violation | 1 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 166 | +| ep_reward | 0.663 | | stat_eval/ | | -| constraint_violation | 0.9 | -| ep_length | 225 | -| ep_return | 126 | -| ep_reward | 0.509 | -| mse | 235 | +| constraint_violation | 0.2 | +| ep_length | 226 | +| ep_return | 134 | +| ep_reward | 0.535 | +| mse | 239 | | time/ | | | progress | 0.22 | | step | 2.2e+05 | -| step_time | 10.7 | +| step_time | 13.1 | -------------------------------------- -2023-10-19 15:42:07,509 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 147.558 +/- 21.549 -2023-10-19 15:42:07,518 : +2023-10-27 17:46:14,264 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 150.915 +/- 24.185 +2023-10-27 17:46:14,265 : -------------------------------------- | loss/ | | -| approx_kl | 0.0244 | -| entropy_loss | -4.36 | -| policy_loss | -0.0152 | -| value_loss | 1.25 | +| approx_kl | 0.0193 | +| entropy_loss | -4.37 | +| policy_loss | -0.00805 | +| value_loss | 0.483 | | stat/ | | -| constraint_violation | 678 | -| ep_constraint_vio... | 1.6 | -| ep_length | 202 | -| ep_return | 97.5 | -| ep_reward | 0.391 | +| constraint_violation | 1 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 184 | +| ep_reward | 0.735 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 148 | -| ep_reward | 0.59 | -| mse | 192 | +| ep_return | 151 | +| ep_reward | 0.604 | +| mse | 252 | | time/ | | | progress | 0.23 | | step | 2.3e+05 | -| step_time | 10.9 | +| step_time | 14.9 | -------------------------------------- -2023-10-19 15:44:20,421 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 122.806 +/- 48.678 -2023-10-19 15:44:20,422 : +2023-10-27 17:49:04,461 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 130.868 +/- 49.057 +2023-10-27 17:49:04,462 : +--------------------------------------- +| loss/ | | +| approx_kl | 0.0354 | +| entropy_loss | -4.34 | +| policy_loss | -0.000602 | +| value_loss | 1.1 | +| stat/ | | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 176 | +| ep_reward | 0.703 | +| stat_eval/ | | +| constraint_violation | 0.9 | +| ep_length | 225 | +| ep_return | 131 | +| ep_reward | 0.524 | +| mse | 242 | +| time/ | | +| progress | 0.24 | +| step | 2.4e+05 | +| step_time | 13.6 | +--------------------------------------- + +2023-10-27 17:51:54,754 : Eval | ep_lengths 202.00 +/- 96.07 | ep_return 123.459 +/- 65.948 +2023-10-27 17:51:54,756 : -------------------------------------- | loss/ | | -| approx_kl | 0.0306 | -| entropy_loss | -4.41 | -| policy_loss | -0.0106 | -| value_loss | 2.66 | +| approx_kl | 0.0258 | +| entropy_loss | -4.35 | +| policy_loss | -0.00359 | +| value_loss | 0.218 | | stat/ | | -| constraint_violation | 683 | -| ep_constraint_vio... | 0.1 | -| ep_length | 227 | -| ep_return | 120 | -| ep_reward | 0.482 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 189 | +| ep_reward | 0.756 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 226 | +| constraint_violation | 0.2 | +| ep_length | 202 | | ep_return | 123 | -| ep_reward | 0.493 | -| mse | 225 | -| time/ | | -| progress | 0.24 | -| step | 2.4e+05 | -| step_time | 10.9 | --------------------------------------- - -2023-10-19 15:46:44,158 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 133.500 +/- 29.008 -2023-10-19 15:46:44,160 : --------------------------------------- -| loss/ | | -| approx_kl | 0.0165 | -| entropy_loss | -4.52 | -| policy_loss | -0.0135 | -| value_loss | 1.1 | -| stat/ | | -| constraint_violation | 743 | -| ep_constraint_vio... | 2.4 | -| ep_length | 227 | -| ep_return | 122 | -| ep_reward | 0.49 | -| stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 250 | -| ep_return | 134 | -| ep_reward | 0.534 | -| mse | 317 | +| ep_reward | 0.494 | +| mse | 229 | | time/ | | | progress | 0.25 | | step | 2.5e+05 | -| step_time | 11.8 | +| step_time | 15.2 | -------------------------------------- -2023-10-19 15:49:07,487 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 148.094 +/- 25.017 -2023-10-19 15:49:07,496 : +2023-10-27 17:54:51,187 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.222 +/- 25.435 +2023-10-27 17:54:51,189 : -------------------------------------- | loss/ | | -| approx_kl | 0.0324 | -| entropy_loss | -4.61 | -| policy_loss | -0.00902 | -| value_loss | 0.642 | +| approx_kl | 0.0345 | +| entropy_loss | -4.41 | +| policy_loss | -0.0134 | +| value_loss | 0.733 | | stat/ | | -| constraint_violation | 756 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 123 | -| ep_reward | 0.495 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 179 | +| ep_reward | 0.717 | | stat_eval/ | | -| constraint_violation | 0.6 | +| constraint_violation | 0 | | ep_length | 250 | -| ep_return | 148 | -| ep_reward | 0.592 | -| mse | 228 | +| ep_return | 152 | +| ep_reward | 0.609 | +| mse | 251 | | time/ | | | progress | 0.26 | | step | 2.6e+05 | -| step_time | 11.4 | +| step_time | 15 | -------------------------------------- -2023-10-19 15:51:27,111 : Eval | ep_lengths 226.30 +/- 71.10 | ep_return 122.220 +/- 44.648 -2023-10-19 15:51:27,112 : +2023-10-27 17:57:45,734 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 157.775 +/- 25.695 +2023-10-27 17:57:45,742 : -------------------------------------- | loss/ | | -| approx_kl | 0.0246 | -| entropy_loss | -4.67 | -| policy_loss | -0.021 | -| value_loss | 0.715 | +| approx_kl | 0.0207 | +| entropy_loss | -4.42 | +| policy_loss | -0.0132 | +| value_loss | 0.151 | | stat/ | | -| constraint_violation | 759 | -| ep_constraint_vio... | 0.1 | -| ep_length | 227 | -| ep_return | 131 | -| ep_reward | 0.522 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 173 | +| ep_reward | 0.69 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 122 | -| ep_reward | 0.489 | -| mse | 277 | +| constraint_violation | 2.5 | +| ep_length | 250 | +| ep_return | 158 | +| ep_reward | 0.631 | +| mse | 245 | | time/ | | | progress | 0.27 | | step | 2.7e+05 | -| step_time | 11.3 | +| step_time | 14.1 | -------------------------------------- -2023-10-19 15:53:36,519 : Eval | ep_lengths 177.00 +/- 111.52 | ep_return 96.282 +/- 65.452 -2023-10-19 15:53:36,520 : +2023-10-27 18:00:43,816 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 170.399 +/- 28.676 +2023-10-27 18:00:43,825 : -------------------------------------- | loss/ | | -| approx_kl | 0.0297 | -| entropy_loss | -4.7 | -| policy_loss | -0.0166 | -| value_loss | 0.45 | +| approx_kl | 0.0265 | +| entropy_loss | -4.4 | +| policy_loss | -0.0129 | +| value_loss | 0.126 | | stat/ | | -| constraint_violation | 818 | -| ep_constraint_vio... | 1.3 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 136 | -| ep_reward | 0.544 | +| ep_return | 186 | +| ep_reward | 0.743 | | stat_eval/ | | | constraint_violation | 0.3 | -| ep_length | 177 | -| ep_return | 96.3 | -| ep_reward | 0.386 | -| mse | 223 | +| ep_length | 250 | +| ep_return | 170 | +| ep_reward | 0.682 | +| mse | 215 | | time/ | | | progress | 0.28 | | step | 2.8e+05 | -| step_time | 11 | +| step_time | 13.8 | -------------------------------------- -2023-10-19 15:55:46,986 : Eval | ep_lengths 203.00 +/- 94.04 | ep_return 100.410 +/- 52.451 -2023-10-19 15:55:46,988 : +2023-10-27 18:03:27,668 : Eval | ep_lengths 201.40 +/- 97.21 | ep_return 139.267 +/- 75.640 +2023-10-27 18:03:27,670 : -------------------------------------- | loss/ | | -| approx_kl | 0.0239 | -| entropy_loss | -4.76 | -| policy_loss | 0.00217 | -| value_loss | 0.636 | +| approx_kl | 0.0214 | +| entropy_loss | -4.39 | +| policy_loss | -0.0134 | +| value_loss | 0.17 | | stat/ | | -| constraint_violation | 859 | -| ep_constraint_vio... | 1 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.61 | +| ep_return | 189 | +| ep_reward | 0.756 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 203 | -| ep_return | 100 | -| ep_reward | 0.402 | -| mse | 342 | +| constraint_violation | 0.3 | +| ep_length | 201 | +| ep_return | 139 | +| ep_reward | 0.559 | +| mse | 145 | | time/ | | | progress | 0.29 | | step | 2.9e+05 | -| step_time | 10.8 | +| step_time | 15.4 | -------------------------------------- -2023-10-19 15:57:58,008 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 121.154 +/- 44.654 -2023-10-19 15:57:58,009 : +2023-10-27 18:06:14,389 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 135.195 +/- 47.432 +2023-10-27 18:06:14,391 : -------------------------------------- | loss/ | | -| approx_kl | 0.0264 | -| entropy_loss | -4.8 | -| policy_loss | -0.0139 | -| value_loss | 0.726 | +| approx_kl | 0.0342 | +| entropy_loss | -4.4 | +| policy_loss | -0.00219 | +| value_loss | 0.26 | | stat/ | | -| constraint_violation | 885 | -| ep_constraint_vio... | 2.4 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 145 | -| ep_reward | 0.58 | +| ep_return | 194 | +| ep_reward | 0.776 | | stat_eval/ | | -| constraint_violation | 0.3 | +| constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 121 | -| ep_reward | 0.485 | -| mse | 314 | +| ep_return | 135 | +| ep_reward | 0.541 | +| mse | 286 | | time/ | | | progress | 0.3 | | step | 3e+05 | -| step_time | 10.7 | +| step_time | 13.9 | -------------------------------------- -2023-10-19 16:00:06,939 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 131.338 +/- 51.978 -2023-10-19 16:00:06,941 : +2023-10-27 18:09:07,445 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.302 +/- 31.859 +2023-10-27 18:09:07,447 : -------------------------------------- | loss/ | | -| approx_kl | 0.029 | -| entropy_loss | -4.77 | -| policy_loss | -0.0146 | -| value_loss | 1.5 | +| approx_kl | 0.0138 | +| entropy_loss | -4.41 | +| policy_loss | -0.00965 | +| value_loss | 0.179 | | stat/ | | -| constraint_violation | 902 | -| ep_constraint_vio... | 0.2 | -| ep_length | 225 | -| ep_return | 140 | -| ep_reward | 0.561 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 194 | +| ep_reward | 0.776 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 131 | -| ep_reward | 0.526 | -| mse | 263 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 154 | +| ep_reward | 0.617 | +| mse | 291 | | time/ | | | progress | 0.31 | | step | 3.1e+05 | -| step_time | 10.5 | +| step_time | 16.2 | -------------------------------------- -2023-10-19 16:02:19,210 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 135.566 +/- 25.229 -2023-10-19 16:02:19,211 : +2023-10-27 18:11:57,853 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.539 +/- 28.719 +2023-10-27 18:11:57,855 : -------------------------------------- | loss/ | | -| approx_kl | 0.0155 | -| entropy_loss | -4.73 | -| policy_loss | -0.02 | -| value_loss | 2.01 | +| approx_kl | 0.02 | +| entropy_loss | -4.48 | +| policy_loss | -0.0131 | +| value_loss | 0.307 | | stat/ | | -| constraint_violation | 932 | -| ep_constraint_vio... | 0.6 | -| ep_length | 202 | -| ep_return | 124 | -| ep_reward | 0.499 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 201 | +| ep_reward | 0.806 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 136 | -| ep_reward | 0.542 | -| mse | 329 | +| ep_return | 156 | +| ep_reward | 0.622 | +| mse | 297 | | time/ | | | progress | 0.32 | | step | 3.2e+05 | -| step_time | 10.8 | +| step_time | 15.3 | -------------------------------------- -2023-10-19 16:04:29,434 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 142.878 +/- 21.621 -2023-10-19 16:04:29,435 : +2023-10-27 18:14:42,311 : Eval | ep_lengths 201.90 +/- 96.28 | ep_return 133.324 +/- 73.925 +2023-10-27 18:14:42,313 : -------------------------------------- | loss/ | | -| approx_kl | 0.0335 | -| entropy_loss | -4.88 | -| policy_loss | -0.0133 | -| value_loss | 1.21 | +| approx_kl | 0.0193 | +| entropy_loss | -4.53 | +| policy_loss | -0.0164 | +| value_loss | 0.183 | | stat/ | | -| constraint_violation | 937 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 125 | -| ep_reward | 0.506 | -| stat_eval/ | | -| constraint_violation | 0.2 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 143 | -| ep_reward | 0.572 | -| mse | 278 | +| ep_return | 199 | +| ep_reward | 0.798 | +| stat_eval/ | | +| constraint_violation | 0.3 | +| ep_length | 202 | +| ep_return | 133 | +| ep_reward | 0.537 | +| mse | 189 | | time/ | | | progress | 0.33 | | step | 3.3e+05 | -| step_time | 10.7 | +| step_time | 14.2 | -------------------------------------- -2023-10-19 16:06:40,337 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 131.939 +/- 31.623 -2023-10-19 16:06:40,339 : +2023-10-27 18:17:34,581 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.228 +/- 34.448 +2023-10-27 18:17:34,582 : -------------------------------------- | loss/ | | -| approx_kl | 0.0335 | -| entropy_loss | -4.86 | -| policy_loss | -0.0229 | -| value_loss | 1.58 | +| approx_kl | 0.00825 | +| entropy_loss | -4.56 | +| policy_loss | -0.0121 | +| value_loss | 0.106 | | stat/ | | -| constraint_violation | 954 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 139 | -| ep_reward | 0.557 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 195 | +| ep_reward | 0.782 | | stat_eval/ | | -| constraint_violation | 0.3 | +| constraint_violation | 0.5 | | ep_length | 250 | -| ep_return | 132 | -| ep_reward | 0.528 | -| mse | 380 | +| ep_return | 153 | +| ep_reward | 0.613 | +| mse | 294 | | time/ | | | progress | 0.34 | | step | 3.4e+05 | -| step_time | 10.7 | +| step_time | 13.9 | -------------------------------------- -2023-10-19 16:08:49,350 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 169.347 +/- 34.306 -2023-10-19 16:08:49,380 : +2023-10-27 18:20:23,041 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.281 +/- 43.263 +2023-10-27 18:20:23,042 : -------------------------------------- | loss/ | | -| approx_kl | 0.0149 | -| entropy_loss | -4.88 | -| policy_loss | -0.00757 | -| value_loss | 1.07 | +| approx_kl | 0.0217 | +| entropy_loss | -4.52 | +| policy_loss | -0.0125 | +| value_loss | 0.292 | | stat/ | | -| constraint_violation | 970 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 127 | -| ep_reward | 0.508 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 196 | +| ep_reward | 0.783 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 250 | -| ep_return | 169 | -| ep_reward | 0.677 | -| mse | 161 | +| ep_return | 155 | +| ep_reward | 0.621 | +| mse | 331 | | time/ | | | progress | 0.35 | | step | 3.5e+05 | -| step_time | 10.7 | +| step_time | 13.2 | -------------------------------------- -2023-10-19 16:10:55,703 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 143.651 +/- 53.694 -2023-10-19 16:10:55,724 : +2023-10-27 18:23:17,278 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 164.903 +/- 30.216 +2023-10-27 18:23:17,280 : -------------------------------------- | loss/ | | -| approx_kl | 0.0236 | -| entropy_loss | -4.87 | -| policy_loss | -0.0131 | -| value_loss | 2.88 | +| approx_kl | 0.0124 | +| entropy_loss | -4.57 | +| policy_loss | -0.0179 | +| value_loss | 0.143 | | stat/ | | -| constraint_violation | 986 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 142 | -| ep_reward | 0.573 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 191 | +| ep_reward | 0.765 | | stat_eval/ | | | constraint_violation | 1.4 | -| ep_length | 226 | -| ep_return | 144 | -| ep_reward | 0.575 | -| mse | 163 | +| ep_length | 250 | +| ep_return | 165 | +| ep_reward | 0.66 | +| mse | 234 | | time/ | | | progress | 0.36 | | step | 3.6e+05 | -| step_time | 10.6 | +| step_time | 14.5 | -------------------------------------- -2023-10-19 16:13:04,342 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 134.421 +/- 49.098 -2023-10-19 16:13:04,344 : +2023-10-27 18:26:01,029 : Eval | ep_lengths 202.20 +/- 95.60 | ep_return 139.391 +/- 73.920 +2023-10-27 18:26:01,030 : -------------------------------------- | loss/ | | -| approx_kl | 0.0265 | -| entropy_loss | -4.88 | -| policy_loss | -0.00518 | -| value_loss | 1.28 | +| approx_kl | 0.026 | +| entropy_loss | -4.66 | +| policy_loss | -0.0117 | +| value_loss | 0.219 | | stat/ | | -| constraint_violation | 1.03e+03 | -| ep_constraint_vio... | 1.3 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 137 | -| ep_reward | 0.548 | +| ep_return | 194 | +| ep_reward | 0.777 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | -| ep_return | 134 | -| ep_reward | 0.538 | -| mse | 201 | +| constraint_violation | 0.2 | +| ep_length | 202 | +| ep_return | 139 | +| ep_reward | 0.562 | +| mse | 158 | | time/ | | | progress | 0.37 | | step | 3.7e+05 | -| step_time | 10.5 | +| step_time | 13.7 | -------------------------------------- -2023-10-19 16:15:10,234 : Eval | ep_lengths 200.80 +/- 98.40 | ep_return 126.246 +/- 66.911 -2023-10-19 16:15:10,236 : +2023-10-27 18:28:46,511 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 144.022 +/- 56.280 +2023-10-27 18:28:46,513 : -------------------------------------- | loss/ | | -| approx_kl | 0.0258 | -| entropy_loss | -4.94 | -| policy_loss | -0.015 | -| value_loss | 1.77 | +| approx_kl | 0.0222 | +| entropy_loss | -4.72 | +| policy_loss | -0.0147 | +| value_loss | 0.207 | | stat/ | | -| constraint_violation | 1.05e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 126 | -| ep_reward | 0.506 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 185 | +| ep_reward | 0.741 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 126 | -| ep_reward | 0.511 | -| mse | 201 | +| ep_length | 226 | +| ep_return | 144 | +| ep_reward | 0.577 | +| mse | 205 | | time/ | | | progress | 0.38 | | step | 3.8e+05 | -| step_time | 10.6 | +| step_time | 14.2 | -------------------------------------- -2023-10-19 16:17:18,862 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 141.590 +/- 51.108 -2023-10-19 16:17:18,863 : +2023-10-27 18:31:46,765 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 149.575 +/- 27.900 +2023-10-27 18:31:46,766 : -------------------------------------- | loss/ | | -| approx_kl | 0.0138 | -| entropy_loss | -5.01 | -| policy_loss | -0.0131 | -| value_loss | 0.771 | +| approx_kl | 0.017 | +| entropy_loss | -4.72 | +| policy_loss | -0.00791 | +| value_loss | 0.426 | | stat/ | | -| constraint_violation | 1.07e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 140 | -| ep_reward | 0.56 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 188 | +| ep_reward | 0.751 | | stat_eval/ | | -| constraint_violation | 1.3 | -| ep_length | 225 | -| ep_return | 142 | -| ep_reward | 0.574 | -| mse | 174 | +| constraint_violation | 1.5 | +| ep_length | 250 | +| ep_return | 150 | +| ep_reward | 0.598 | +| mse | 307 | | time/ | | | progress | 0.39 | | step | 3.9e+05 | -| step_time | 10.4 | +| step_time | 14.4 | -------------------------------------- -2023-10-19 16:19:29,246 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 140.378 +/- 23.006 -2023-10-19 16:19:29,247 : +2023-10-27 18:34:29,161 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 137.909 +/- 51.989 +2023-10-27 18:34:29,162 : -------------------------------------- | loss/ | | -| approx_kl | 0.0236 | -| entropy_loss | -5.06 | -| policy_loss | -0.0158 | -| value_loss | 0.816 | +| approx_kl | 0.0301 | +| entropy_loss | -4.77 | +| policy_loss | -0.0089 | +| value_loss | 0.403 | | stat/ | | -| constraint_violation | 1.08e+03 | -| ep_constraint_vio... | 0.8 | -| ep_length | 225 | -| ep_return | 131 | -| ep_reward | 0.529 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 140 | -| ep_reward | 0.562 | -| mse | 296 | +| ep_return | 186 | +| ep_reward | 0.744 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 138 | +| ep_reward | 0.552 | +| mse | 234 | | time/ | | | progress | 0.4 | | step | 4e+05 | -| step_time | 10.5 | +| step_time | 13.8 | -------------------------------------- -2023-10-19 16:21:39,029 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.534 +/- 25.283 -2023-10-19 16:21:39,031 : +2023-10-27 18:37:15,246 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 165.142 +/- 33.373 +2023-10-27 18:37:15,248 : -------------------------------------- | loss/ | | -| approx_kl | 0.0124 | -| entropy_loss | -5.08 | -| policy_loss | -0.0171 | -| value_loss | 0.956 | +| approx_kl | 0.018 | +| entropy_loss | -4.9 | +| policy_loss | -0.0114 | +| value_loss | 0.201 | | stat/ | | -| constraint_violation | 1.1e+03 | +| constraint_violation | 3 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 147 | -| ep_reward | 0.588 | +| ep_return | 198 | +| ep_reward | 0.794 | | stat_eval/ | | -| constraint_violation | 0.1 | +| constraint_violation | 0 | | ep_length | 250 | -| ep_return | 155 | -| ep_reward | 0.618 | -| mse | 220 | +| ep_return | 165 | +| ep_reward | 0.661 | +| mse | 232 | | time/ | | | progress | 0.41 | | step | 4.1e+05 | -| step_time | 10.7 | +| step_time | 13.9 | -------------------------------------- -2023-10-19 16:23:47,853 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 161.900 +/- 31.530 -2023-10-19 16:23:47,895 : +2023-10-27 18:39:58,531 : Eval | ep_lengths 201.40 +/- 97.20 | ep_return 141.501 +/- 73.899 +2023-10-27 18:39:58,533 : -------------------------------------- | loss/ | | -| approx_kl | 0.00491 | -| entropy_loss | -5.03 | -| policy_loss | -0.0119 | -| value_loss | 1.48 | +| approx_kl | 0.0226 | +| entropy_loss | -4.9 | +| policy_loss | -0.00272 | +| value_loss | 0.265 | | stat/ | | -| constraint_violation | 1.1e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 127 | -| ep_reward | 0.511 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 162 | -| ep_reward | 0.648 | -| mse | 239 | +| ep_return | 203 | +| ep_reward | 0.81 | +| stat_eval/ | | +| constraint_violation | 1.3 | +| ep_length | 201 | +| ep_return | 142 | +| ep_reward | 0.566 | +| mse | 134 | | time/ | | | progress | 0.42 | | step | 4.2e+05 | -| step_time | 10.7 | +| step_time | 13.3 | -------------------------------------- -2023-10-19 16:25:53,250 : Eval | ep_lengths 200.80 +/- 98.40 | ep_return 109.397 +/- 57.549 -2023-10-19 16:25:53,251 : +2023-10-27 18:42:48,577 : Eval | ep_lengths 201.90 +/- 96.26 | ep_return 133.460 +/- 69.368 +2023-10-27 18:42:48,579 : -------------------------------------- | loss/ | | -| approx_kl | 0.0312 | -| entropy_loss | -5.11 | -| policy_loss | -0.0156 | -| value_loss | 1.61 | +| approx_kl | 0.0203 | +| entropy_loss | -4.91 | +| policy_loss | -0.0183 | +| value_loss | 0.122 | | stat/ | | -| constraint_violation | 1.12e+03 | -| ep_constraint_vio... | 0.7 | -| ep_length | 204 | -| ep_return | 95.5 | -| ep_reward | 0.384 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 195 | +| ep_reward | 0.781 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 109 | -| ep_reward | 0.445 | -| mse | 224 | +| constraint_violation | 1.7 | +| ep_length | 202 | +| ep_return | 133 | +| ep_reward | 0.539 | +| mse | 178 | | time/ | | | progress | 0.43 | | step | 4.3e+05 | -| step_time | 10.6 | +| step_time | 14.9 | -------------------------------------- -2023-10-19 16:27:58,042 : Eval | ep_lengths 201.60 +/- 96.82 | ep_return 115.857 +/- 62.216 -2023-10-19 16:27:58,043 : +2023-10-27 18:45:35,527 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 139.653 +/- 57.365 +2023-10-27 18:45:35,529 : -------------------------------------- | loss/ | | -| approx_kl | 0.0164 | -| entropy_loss | -5.16 | -| policy_loss | -0.00915 | -| value_loss | 3.93 | +| approx_kl | 0.0211 | +| entropy_loss | -4.91 | +| policy_loss | -0.0193 | +| value_loss | 0.0935 | | stat/ | | -| constraint_violation | 1.16e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 118 | -| ep_reward | 0.487 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 200 | +| ep_reward | 0.799 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 202 | -| ep_return | 116 | -| ep_reward | 0.465 | -| mse | 270 | +| constraint_violation | 0.3 | +| ep_length | 227 | +| ep_return | 140 | +| ep_reward | 0.559 | +| mse | 279 | | time/ | | | progress | 0.44 | | step | 4.4e+05 | -| step_time | 10.5 | +| step_time | 13.9 | -------------------------------------- -2023-10-19 16:30:04,891 : Eval | ep_lengths 226.10 +/- 71.70 | ep_return 137.540 +/- 53.625 -2023-10-19 16:30:04,893 : +2023-10-27 18:48:24,235 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 166.516 +/- 32.019 +2023-10-27 18:48:24,237 : -------------------------------------- | loss/ | | -| approx_kl | 0.0257 | -| entropy_loss | -5.15 | -| policy_loss | -0.0214 | -| value_loss | 4.7 | +| approx_kl | 0.0265 | +| entropy_loss | -4.96 | +| policy_loss | -0.0169 | +| value_loss | 0.0873 | | stat/ | | -| constraint_violation | 1.16e+03 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 137 | -| ep_reward | 0.548 | +| ep_return | 203 | +| ep_reward | 0.813 | | stat_eval/ | | -| constraint_violation | 1.5 | -| ep_length | 226 | -| ep_return | 138 | -| ep_reward | 0.55 | -| mse | 244 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 167 | +| ep_reward | 0.666 | +| mse | 204 | | time/ | | | progress | 0.45 | | step | 4.5e+05 | -| step_time | 10.3 | +| step_time | 15.5 | -------------------------------------- -2023-10-19 16:32:13,958 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 170.950 +/- 21.388 -2023-10-19 16:32:13,969 : +2023-10-27 18:51:12,473 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 184.554 +/- 24.103 +2023-10-27 18:51:12,483 : -------------------------------------- | loss/ | | -| approx_kl | 0.0317 | -| entropy_loss | -5.15 | -| policy_loss | -0.013 | -| value_loss | 1.05 | +| approx_kl | 0.0342 | +| entropy_loss | -4.96 | +| policy_loss | 0.00282 | +| value_loss | 0.0706 | | stat/ | | -| constraint_violation | 1.19e+03 | -| ep_constraint_vio... | 1.1 | -| ep_length | 202 | -| ep_return | 119 | -| ep_reward | 0.477 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 205 | +| ep_reward | 0.822 | | stat_eval/ | | -| constraint_violation | 0.7 | +| constraint_violation | 0 | | ep_length | 250 | -| ep_return | 171 | -| ep_reward | 0.684 | -| mse | 164 | +| ep_return | 185 | +| ep_reward | 0.738 | +| mse | 119 | | time/ | | | progress | 0.46 | | step | 4.6e+05 | -| step_time | 10.5 | +| step_time | 15.4 | -------------------------------------- -2023-10-19 16:34:20,775 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.812 +/- 14.418 -2023-10-19 16:34:20,776 : ---------------------------------------- -| loss/ | | -| approx_kl | 0.0244 | -| entropy_loss | -5.17 | -| policy_loss | -0.000762 | -| value_loss | 0.828 | -| stat/ | | -| constraint_violation | 1.21e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 130 | -| ep_reward | 0.522 | -| stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.611 | -| mse | 206 | -| time/ | | -| progress | 0.47 | -| step | 4.7e+05 | -| step_time | 9.86 | ---------------------------------------- - -2023-10-19 16:36:22,063 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 147.460 +/- 22.753 -2023-10-19 16:36:22,064 : +2023-10-27 18:53:59,745 : Eval | ep_lengths 226.20 +/- 71.40 | ep_return 131.533 +/- 46.152 +2023-10-27 18:53:59,746 : -------------------------------------- | loss/ | | -| approx_kl | 0.0199 | -| entropy_loss | -5.25 | -| policy_loss | -0.0219 | -| value_loss | 3.35 | +| approx_kl | 0.0288 | +| entropy_loss | -4.93 | +| policy_loss | -0.0126 | +| value_loss | 0.151 | | stat/ | | -| constraint_violation | 1.24e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 140 | -| ep_reward | 0.563 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 186 | +| ep_reward | 0.744 | | stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 132 | +| ep_reward | 0.529 | +| mse | 244 | +| time/ | | +| progress | 0.47 | +| step | 4.7e+05 | +| step_time | 13.5 | +-------------------------------------- + +2023-10-27 18:56:36,341 : Eval | ep_lengths 201.20 +/- 97.60 | ep_return 134.827 +/- 73.581 +2023-10-27 18:56:36,342 : +-------------------------------------- +| loss/ | | +| approx_kl | 0.0158 | +| entropy_loss | -4.98 | +| policy_loss | -0.0168 | +| value_loss | 0.047 | +| stat/ | | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 147 | -| ep_reward | 0.59 | -| mse | 292 | +| ep_return | 190 | +| ep_reward | 0.761 | +| stat_eval/ | | +| constraint_violation | 1.6 | +| ep_length | 201 | +| ep_return | 135 | +| ep_reward | 0.541 | +| mse | 176 | | time/ | | | progress | 0.48 | | step | 4.8e+05 | -| step_time | 9.9 | +| step_time | 14.4 | -------------------------------------- -2023-10-19 16:38:23,188 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 163.315 +/- 18.590 -2023-10-19 16:38:23,189 : +2023-10-27 18:59:25,271 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 151.123 +/- 37.345 +2023-10-27 18:59:25,273 : -------------------------------------- | loss/ | | -| approx_kl | 0.0354 | -| entropy_loss | -5.32 | -| policy_loss | -0.00371 | -| value_loss | 0.796 | +| approx_kl | 0.0137 | +| entropy_loss | -5.03 | +| policy_loss | -0.011 | +| value_loss | 0.129 | | stat/ | | -| constraint_violation | 1.26e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 132 | -| ep_reward | 0.527 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 199 | +| ep_reward | 0.794 | | stat_eval/ | | -| constraint_violation | 1.5 | +| constraint_violation | 0.3 | | ep_length | 250 | -| ep_return | 163 | -| ep_reward | 0.653 | -| mse | 202 | +| ep_return | 151 | +| ep_reward | 0.604 | +| mse | 368 | | time/ | | | progress | 0.49 | | step | 4.9e+05 | -| step_time | 10 | +| step_time | 15.5 | -------------------------------------- -2023-10-19 16:40:22,391 : Eval | ep_lengths 226.80 +/- 69.60 | ep_return 141.398 +/- 52.713 -2023-10-19 16:40:22,392 : +2023-10-27 19:02:15,351 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.153 +/- 19.024 +2023-10-27 19:02:15,353 : -------------------------------------- | loss/ | | -| approx_kl | 0.0291 | -| entropy_loss | -5.36 | -| policy_loss | -0.015 | -| value_loss | 0.467 | +| approx_kl | 0.0167 | +| entropy_loss | -4.99 | +| policy_loss | -0.0117 | +| value_loss | 0.161 | | stat/ | | -| constraint_violation | 1.26e+03 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 141 | -| ep_reward | 0.562 | +| ep_return | 205 | +| ep_reward | 0.818 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | -| ep_return | 141 | -| ep_reward | 0.574 | -| mse | 212 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 155 | +| ep_reward | 0.621 | +| mse | 256 | | time/ | | | progress | 0.5 | | step | 5e+05 | -| step_time | 9.68 | +| step_time | 15.9 | -------------------------------------- -2023-10-19 16:42:19,924 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.144 +/- 17.352 -2023-10-19 16:42:19,926 : +2023-10-27 19:05:07,366 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 163.238 +/- 60.723 +2023-10-27 19:05:07,368 : -------------------------------------- | loss/ | | -| approx_kl | 0.0284 | -| entropy_loss | -5.37 | -| policy_loss | -0.00288 | -| value_loss | 0.807 | +| approx_kl | 0.0261 | +| entropy_loss | -5.04 | +| policy_loss | -0.00354 | +| value_loss | 0.181 | | stat/ | | -| constraint_violation | 1.29e+03 | -| ep_constraint_vio... | 1.4 | -| ep_length | 226 | -| ep_return | 129 | -| ep_reward | 0.516 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 146 | -| ep_reward | 0.585 | -| mse | 268 | +| ep_return | 208 | +| ep_reward | 0.834 | +| stat_eval/ | | +| constraint_violation | 0.6 | +| ep_length | 225 | +| ep_return | 163 | +| ep_reward | 0.657 | +| mse | 132 | | time/ | | | progress | 0.51 | | step | 5.1e+05 | -| step_time | 9.58 | +| step_time | 14.8 | -------------------------------------- -2023-10-19 16:44:09,811 : Eval | ep_lengths 153.70 +/- 117.96 | ep_return 103.300 +/- 85.120 -2023-10-19 16:44:09,812 : +2023-10-27 19:07:53,324 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 154.502 +/- 60.405 +2023-10-27 19:07:53,325 : -------------------------------------- | loss/ | | -| approx_kl | 0.0273 | -| entropy_loss | -5.34 | -| policy_loss | -0.00768 | -| value_loss | 2.01 | +| approx_kl | 0.0406 | +| entropy_loss | -5.06 | +| policy_loss | -0.00341 | +| value_loss | 0.345 | | stat/ | | -| constraint_violation | 1.3e+03 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 158 | -| ep_reward | 0.633 | +| ep_return | 196 | +| ep_reward | 0.783 | | stat_eval/ | | | constraint_violation | 0.5 | -| ep_length | 154 | -| ep_return | 103 | -| ep_reward | 0.432 | -| mse | 107 | +| ep_length | 226 | +| ep_return | 155 | +| ep_reward | 0.619 | +| mse | 206 | | time/ | | | progress | 0.52 | | step | 5.2e+05 | -| step_time | 9.61 | +| step_time | 14.1 | -------------------------------------- -2023-10-19 16:46:06,751 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 144.572 +/- 16.862 -2023-10-19 16:46:06,753 : +2023-10-27 19:10:46,633 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 156.391 +/- 30.043 +2023-10-27 19:10:46,635 : -------------------------------------- | loss/ | | -| approx_kl | 0.0291 | -| entropy_loss | -5.37 | -| policy_loss | -0.00261 | -| value_loss | 3.15 | +| approx_kl | 0.0344 | +| entropy_loss | -4.97 | +| policy_loss | -0.0184 | +| value_loss | 0.0998 | | stat/ | | -| constraint_violation | 1.37e+03 | -| ep_constraint_vio... | 2.4 | -| ep_length | 202 | -| ep_return | 124 | -| ep_reward | 0.499 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 197 | +| ep_reward | 0.79 | | stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 0.2 | | ep_length | 250 | -| ep_return | 145 | -| ep_reward | 0.578 | -| mse | 284 | +| ep_return | 156 | +| ep_reward | 0.626 | +| mse | 286 | | time/ | | | progress | 0.53 | | step | 5.3e+05 | -| step_time | 9.45 | +| step_time | 16.8 | -------------------------------------- -2023-10-19 16:48:02,296 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 147.347 +/- 52.847 -2023-10-19 16:48:02,297 : +2023-10-27 19:13:26,510 : Eval | ep_lengths 226.30 +/- 71.10 | ep_return 140.237 +/- 58.325 +2023-10-27 19:13:26,512 : -------------------------------------- | loss/ | | -| approx_kl | 0.03 | -| entropy_loss | -5.37 | -| policy_loss | -0.00359 | -| value_loss | 2.27 | +| approx_kl | 0.0141 | +| entropy_loss | -5.04 | +| policy_loss | -0.0197 | +| value_loss | 0.103 | | stat/ | | -| constraint_violation | 1.39e+03 | -| ep_constraint_vio... | 0.6 | -| ep_length | 201 | -| ep_return | 132 | -| ep_reward | 0.53 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 192 | +| ep_reward | 0.767 | | stat_eval/ | | -| constraint_violation | 1.9 | +| constraint_violation | 2.1 | | ep_length | 226 | -| ep_return | 147 | -| ep_reward | 0.589 | -| mse | 154 | +| ep_return | 140 | +| ep_reward | 0.561 | +| mse | 255 | | time/ | | | progress | 0.54 | | step | 5.4e+05 | -| step_time | 9.57 | +| step_time | 13.1 | -------------------------------------- -2023-10-19 16:50:00,944 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.597 +/- 24.428 -2023-10-19 16:50:00,945 : +2023-10-27 19:16:17,118 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 140.492 +/- 53.335 +2023-10-27 19:16:17,119 : -------------------------------------- | loss/ | | -| approx_kl | 0.00709 | -| entropy_loss | -5.34 | -| policy_loss | -0.022 | -| value_loss | 2.89 | +| approx_kl | 0.0184 | +| entropy_loss | -5.01 | +| policy_loss | -0.019 | +| value_loss | 0.0755 | | stat/ | | -| constraint_violation | 1.41e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 122 | -| ep_reward | 0.489 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 154 | -| ep_reward | 0.614 | -| mse | 256 | +| ep_return | 197 | +| ep_reward | 0.787 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 140 | +| ep_reward | 0.562 | +| mse | 267 | | time/ | | | progress | 0.55 | | step | 5.5e+05 | -| step_time | 9.79 | +| step_time | 13 | -------------------------------------- -2023-10-19 16:51:59,527 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 156.692 +/- 13.298 -2023-10-19 16:51:59,528 : +2023-10-27 19:18:58,954 : Eval | ep_lengths 201.60 +/- 96.80 | ep_return 156.641 +/- 84.355 +2023-10-27 19:18:58,956 : -------------------------------------- | loss/ | | -| approx_kl | 0.0368 | -| entropy_loss | -5.36 | -| policy_loss | -0.00304 | -| value_loss | 0.658 | +| approx_kl | 0.0256 | +| entropy_loss | -5.07 | +| policy_loss | -0.00803 | +| value_loss | 0.244 | | stat/ | | -| constraint_violation | 1.42e+03 | -| ep_constraint_vio... | 0.4 | -| ep_length | 226 | -| ep_return | 146 | -| ep_reward | 0.585 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | +| ep_return | 195 | +| ep_reward | 0.781 | +| stat_eval/ | | +| constraint_violation | 0.2 | +| ep_length | 202 | | ep_return | 157 | | ep_reward | 0.627 | -| mse | 222 | +| mse | 104 | | time/ | | | progress | 0.56 | | step | 5.6e+05 | -| step_time | 9.59 | +| step_time | 14.7 | -------------------------------------- -2023-10-19 16:53:56,219 : Eval | ep_lengths 226.70 +/- 69.90 | ep_return 141.044 +/- 52.967 -2023-10-19 16:53:56,221 : +2023-10-27 19:21:50,253 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.691 +/- 24.008 +2023-10-27 19:21:50,254 : -------------------------------------- | loss/ | | -| approx_kl | 0.02 | -| entropy_loss | -5.42 | -| policy_loss | -0.0218 | -| value_loss | 0.458 | +| approx_kl | 0.0234 | +| entropy_loss | -5.16 | +| policy_loss | -0.0102 | +| value_loss | 0.212 | | stat/ | | -| constraint_violation | 1.43e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 134 | -| ep_reward | 0.538 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 205 | +| ep_reward | 0.822 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | -| ep_return | 141 | -| ep_reward | 0.565 | -| mse | 232 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 153 | +| ep_reward | 0.611 | +| mse | 285 | | time/ | | | progress | 0.57 | | step | 5.7e+05 | -| step_time | 9.82 | +| step_time | 14.9 | -------------------------------------- -2023-10-19 16:55:55,878 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 164.029 +/- 33.144 -2023-10-19 16:55:55,879 : +2023-10-27 19:24:38,974 : Eval | ep_lengths 202.50 +/- 95.01 | ep_return 116.124 +/- 63.401 +2023-10-27 19:24:38,975 : -------------------------------------- | loss/ | | -| approx_kl | 0.0271 | -| entropy_loss | -5.5 | -| policy_loss | -0.0112 | -| value_loss | 0.929 | +| approx_kl | 0.032 | +| entropy_loss | -5.23 | +| policy_loss | -0.017 | +| value_loss | 0.169 | | stat/ | | -| constraint_violation | 1.47e+03 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 167 | -| ep_reward | 0.667 | +| ep_return | 195 | +| ep_reward | 0.779 | | stat_eval/ | | -| constraint_violation | 1.3 | -| ep_length | 250 | -| ep_return | 164 | -| ep_reward | 0.656 | -| mse | 218 | +| constraint_violation | 0.2 | +| ep_length | 202 | +| ep_return | 116 | +| ep_reward | 0.465 | +| mse | 303 | | time/ | | | progress | 0.58 | | step | 5.8e+05 | -| step_time | 9.73 | +| step_time | 14.2 | -------------------------------------- -2023-10-19 16:57:52,885 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 165.433 +/- 18.268 -2023-10-19 16:57:52,886 : +2023-10-27 19:27:22,414 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 143.880 +/- 57.700 +2023-10-27 19:27:22,416 : -------------------------------------- | loss/ | | -| approx_kl | 0.0404 | -| entropy_loss | -5.52 | -| policy_loss | -0.0126 | -| value_loss | 0.63 | +| approx_kl | 0.0202 | +| entropy_loss | -5.3 | +| policy_loss | -0.0103 | +| value_loss | 0.18 | | stat/ | | -| constraint_violation | 1.49e+03 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 148 | -| ep_reward | 0.59 | +| ep_return | 190 | +| ep_reward | 0.761 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 165 | -| ep_reward | 0.662 | -| mse | 204 | +| constraint_violation | 0.6 | +| ep_length | 225 | +| ep_return | 144 | +| ep_reward | 0.576 | +| mse | 265 | | time/ | | | progress | 0.59 | | step | 5.9e+05 | -| step_time | 9.61 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 16:59:48,334 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 151.382 +/- 55.568 -2023-10-19 16:59:48,335 : +2023-10-27 19:30:07,135 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.692 +/- 34.064 +2023-10-27 19:30:07,137 : -------------------------------------- | loss/ | | -| approx_kl | 0.0164 | -| entropy_loss | -5.63 | -| policy_loss | -0.0288 | -| value_loss | 0.513 | +| approx_kl | 0.0236 | +| entropy_loss | -5.35 | +| policy_loss | -0.00276 | +| value_loss | 0.0857 | | stat/ | | -| constraint_violation | 1.53e+03 | -| ep_constraint_vio... | 2.8 | -| ep_length | 227 | -| ep_return | 139 | -| ep_reward | 0.558 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 198 | +| ep_reward | 0.792 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 225 | -| ep_return | 151 | -| ep_reward | 0.606 | -| mse | 178 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 155 | +| ep_reward | 0.619 | +| mse | 283 | | time/ | | | progress | 0.6 | | step | 6e+05 | -| step_time | 9.61 | +| step_time | 14 | -------------------------------------- -2023-10-19 17:01:45,116 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 156.495 +/- 32.924 -2023-10-19 17:01:45,117 : +2023-10-27 19:32:54,478 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 168.068 +/- 35.027 +2023-10-27 19:32:54,479 : -------------------------------------- | loss/ | | -| approx_kl | 0.0328 | -| entropy_loss | -5.66 | -| policy_loss | -0.0215 | -| value_loss | 0.384 | +| approx_kl | 0.0318 | +| entropy_loss | -5.41 | +| policy_loss | -0.00522 | +| value_loss | 0.0794 | | stat/ | | -| constraint_violation | 1.57e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 144 | -| ep_reward | 0.576 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 200 | +| ep_reward | 0.798 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 156 | -| ep_reward | 0.626 | -| mse | 291 | +| ep_return | 168 | +| ep_reward | 0.672 | +| mse | 241 | | time/ | | | progress | 0.61 | | step | 6.1e+05 | -| step_time | 9.49 | +| step_time | 13.3 | -------------------------------------- -2023-10-19 17:03:42,295 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 148.669 +/- 26.202 -2023-10-19 17:03:42,296 : +2023-10-27 19:35:38,059 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 144.526 +/- 56.173 +2023-10-27 19:35:38,061 : -------------------------------------- | loss/ | | -| approx_kl | 0.032 | -| entropy_loss | -5.77 | -| policy_loss | -0.00672 | -| value_loss | 0.475 | +| approx_kl | 0.0313 | +| entropy_loss | -5.55 | +| policy_loss | -0.0142 | +| value_loss | 0.0953 | | stat/ | | -| constraint_violation | 1.57e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 226 | -| ep_return | 139 | -| ep_reward | 0.557 | -| stat_eval/ | | -| constraint_violation | 0.3 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 149 | -| ep_reward | 0.595 | -| mse | 253 | +| ep_return | 201 | +| ep_reward | 0.803 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 145 | +| ep_reward | 0.579 | +| mse | 214 | | time/ | | | progress | 0.62 | | step | 6.2e+05 | -| step_time | 9.47 | +| step_time | 13.5 | -------------------------------------- -2023-10-19 17:05:39,387 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.968 +/- 26.059 -2023-10-19 17:05:39,388 : +2023-10-27 19:38:24,183 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 165.060 +/- 23.970 +2023-10-27 19:38:24,185 : -------------------------------------- | loss/ | | -| approx_kl | 0.0372 | -| entropy_loss | -5.75 | -| policy_loss | -0.0235 | -| value_loss | 1.24 | +| approx_kl | 0.0278 | +| entropy_loss | -5.58 | +| policy_loss | -0.0124 | +| value_loss | 0.102 | | stat/ | | -| constraint_violation | 1.61e+03 | -| ep_constraint_vio... | 2 | -| ep_length | 154 | -| ep_return | 90.8 | -| ep_reward | 0.369 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 202 | +| ep_reward | 0.807 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 156 | -| ep_reward | 0.624 | -| mse | 221 | +| ep_return | 165 | +| ep_reward | 0.66 | +| mse | 224 | | time/ | | | progress | 0.63 | | step | 6.3e+05 | -| step_time | 9.58 | +| step_time | 12.7 | -------------------------------------- -2023-10-19 17:07:35,314 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 178.096 +/- 18.894 -2023-10-19 17:07:35,323 : +2023-10-27 19:41:16,475 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 162.955 +/- 28.173 +2023-10-27 19:41:16,476 : -------------------------------------- | loss/ | | -| approx_kl | 0.0251 | -| entropy_loss | -5.83 | -| policy_loss | -0.00768 | -| value_loss | 0.776 | +| approx_kl | 0.0331 | +| entropy_loss | -5.65 | +| policy_loss | -0.012 | +| value_loss | 0.13 | | stat/ | | -| constraint_violation | 1.63e+03 | -| ep_constraint_vio... | 1 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 161 | -| ep_reward | 0.644 | +| ep_return | 199 | +| ep_reward | 0.797 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 178 | -| ep_reward | 0.712 | -| mse | 116 | +| ep_return | 163 | +| ep_reward | 0.652 | +| mse | 234 | | time/ | | | progress | 0.64 | | step | 6.4e+05 | -| step_time | 9.37 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 17:09:27,437 : Eval | ep_lengths 201.70 +/- 96.66 | ep_return 127.095 +/- 67.102 -2023-10-19 17:09:27,438 : +2023-10-27 19:44:10,278 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 158.160 +/- 40.657 +2023-10-27 19:44:10,279 : -------------------------------------- | loss/ | | -| approx_kl | 0.0166 | -| entropy_loss | -5.84 | -| policy_loss | -0.0248 | -| value_loss | 0.674 | +| approx_kl | 0.0292 | +| entropy_loss | -5.69 | +| policy_loss | -0.00831 | +| value_loss | 0.102 | | stat/ | | -| constraint_violation | 1.64e+03 | -| ep_constraint_vio... | 0.1 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 177 | -| ep_reward | 0.71 | +| ep_return | 198 | +| ep_reward | 0.792 | | stat_eval/ | | -| constraint_violation | 0.6 | -| ep_length | 202 | -| ep_return | 127 | -| ep_reward | 0.509 | -| mse | 163 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 158 | +| ep_reward | 0.633 | +| mse | 306 | | time/ | | | progress | 0.65 | | step | 6.5e+05 | -| step_time | 9.43 | +| step_time | 13.8 | -------------------------------------- -2023-10-19 17:11:21,195 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 138.075 +/- 50.426 -2023-10-19 17:11:21,197 : +2023-10-27 19:46:59,041 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 159.740 +/- 19.351 +2023-10-27 19:46:59,043 : -------------------------------------- | loss/ | | -| approx_kl | 0.0211 | -| entropy_loss | -5.82 | -| policy_loss | -0.0243 | -| value_loss | 0.648 | +| approx_kl | 0.0333 | +| entropy_loss | -5.64 | +| policy_loss | -0.0141 | +| value_loss | 0.0817 | | stat/ | | -| constraint_violation | 1.64e+03 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 149 | -| ep_reward | 0.595 | +| ep_return | 195 | +| ep_reward | 0.78 | | stat_eval/ | | -| constraint_violation | 0.5 | -| ep_length | 226 | -| ep_return | 138 | -| ep_reward | 0.553 | -| mse | 219 | +| constraint_violation | 1.9 | +| ep_length | 250 | +| ep_return | 160 | +| ep_reward | 0.639 | +| mse | 257 | | time/ | | | progress | 0.66 | | step | 6.6e+05 | -| step_time | 9.35 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 17:13:15,241 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 145.613 +/- 55.275 -2023-10-19 17:13:15,242 : +2023-10-27 19:49:49,254 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 169.138 +/- 28.272 +2023-10-27 19:49:49,255 : -------------------------------------- | loss/ | | -| approx_kl | 0.0236 | -| entropy_loss | -5.86 | -| policy_loss | -0.00725 | -| value_loss | 0.885 | +| approx_kl | 0.0212 | +| entropy_loss | -5.76 | +| policy_loss | -0.0127 | +| value_loss | 0.116 | | stat/ | | -| constraint_violation | 1.66e+03 | -| ep_constraint_vio... | 0.8 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 160 | -| ep_reward | 0.639 | +| ep_return | 196 | +| ep_reward | 0.783 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 146 | -| ep_reward | 0.587 | -| mse | 203 | +| constraint_violation | 0.5 | +| ep_length | 250 | +| ep_return | 169 | +| ep_reward | 0.677 | +| mse | 232 | | time/ | | | progress | 0.67 | | step | 6.7e+05 | -| step_time | 9.39 | +| step_time | 14 | -------------------------------------- -2023-10-19 17:15:10,558 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 169.397 +/- 25.145 -2023-10-19 17:15:10,559 : +2023-10-27 19:52:44,117 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 160.572 +/- 35.497 +2023-10-27 19:52:44,118 : -------------------------------------- | loss/ | | -| approx_kl | 0.0232 | -| entropy_loss | -5.85 | -| policy_loss | -0.0091 | -| value_loss | 0.557 | +| approx_kl | 0.0216 | +| entropy_loss | -5.78 | +| policy_loss | -0.012 | +| value_loss | 0.183 | | stat/ | | -| constraint_violation | 1.7e+03 | -| ep_constraint_vio... | 1 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 148 | -| ep_reward | 0.592 | +| ep_return | 192 | +| ep_reward | 0.769 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 169 | -| ep_reward | 0.678 | -| mse | 188 | +| ep_return | 161 | +| ep_reward | 0.642 | +| mse | 293 | | time/ | | | progress | 0.68 | | step | 6.8e+05 | -| step_time | 9.3 | +| step_time | 14.4 | -------------------------------------- -2023-10-19 17:17:01,617 : Eval | ep_lengths 202.00 +/- 96.02 | ep_return 125.413 +/- 65.397 -2023-10-19 17:17:01,618 : +2023-10-27 19:55:38,104 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 180.208 +/- 35.360 +2023-10-27 19:55:38,105 : -------------------------------------- | loss/ | | -| approx_kl | 0.0306 | -| entropy_loss | -5.82 | -| policy_loss | -0.0159 | -| value_loss | 0.529 | +| approx_kl | 0.0202 | +| entropy_loss | -5.79 | +| policy_loss | 0.00567 | +| value_loss | 0.285 | | stat/ | | -| constraint_violation | 1.72e+03 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 145 | -| ep_reward | 0.581 | +| ep_return | 204 | +| ep_reward | 0.816 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 202 | -| ep_return | 125 | -| ep_reward | 0.505 | -| mse | 211 | +| constraint_violation | 0.8 | +| ep_length | 250 | +| ep_return | 180 | +| ep_reward | 0.721 | +| mse | 156 | | time/ | | | progress | 0.69 | | step | 6.9e+05 | -| step_time | 9.29 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 17:18:55,319 : Eval | ep_lengths 227.20 +/- 68.40 | ep_return 143.040 +/- 52.973 -2023-10-19 17:18:55,320 : +2023-10-27 19:58:30,363 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 158.815 +/- 26.751 +2023-10-27 19:58:30,365 : -------------------------------------- | loss/ | | -| approx_kl | 0.0223 | -| entropy_loss | -5.84 | -| policy_loss | -0.0118 | -| value_loss | 1.05 | +| approx_kl | 0.0199 | +| entropy_loss | -5.86 | +| policy_loss | -0.00831 | +| value_loss | 0.246 | | stat/ | | -| constraint_violation | 1.73e+03 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 142 | -| ep_reward | 0.568 | +| ep_return | 190 | +| ep_reward | 0.761 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | -| ep_return | 143 | -| ep_reward | 0.572 | -| mse | 194 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 159 | +| ep_reward | 0.635 | +| mse | 228 | | time/ | | | progress | 0.7 | | step | 7e+05 | -| step_time | 9.37 | +| step_time | 13.1 | -------------------------------------- -2023-10-19 17:20:46,210 : Eval | ep_lengths 226.20 +/- 71.40 | ep_return 158.883 +/- 56.842 -2023-10-19 17:20:46,211 : +2023-10-27 20:01:24,666 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 167.514 +/- 40.893 +2023-10-27 20:01:24,668 : -------------------------------------- | loss/ | | -| approx_kl | 0.031 | -| entropy_loss | -5.81 | -| policy_loss | -0.00613 | -| value_loss | 0.744 | +| approx_kl | 0.0293 | +| entropy_loss | -5.9 | +| policy_loss | -0.0132 | +| value_loss | 0.222 | | stat/ | | -| constraint_violation | 1.75e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 113 | -| ep_reward | 0.452 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 200 | +| ep_reward | 0.8 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 159 | -| ep_reward | 0.639 | -| mse | 115 | +| constraint_violation | 1.2 | +| ep_length | 250 | +| ep_return | 168 | +| ep_reward | 0.67 | +| mse | 248 | | time/ | | | progress | 0.71 | | step | 7.1e+05 | -| step_time | 9.15 | +| step_time | 14.4 | -------------------------------------- -2023-10-19 17:22:38,122 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 131.365 +/- 49.386 -2023-10-19 17:22:38,123 : +2023-10-27 20:04:13,548 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 152.217 +/- 58.375 +2023-10-27 20:04:13,550 : -------------------------------------- | loss/ | | -| approx_kl | 0.0346 | -| entropy_loss | -5.86 | -| policy_loss | -0.0151 | -| value_loss | 0.755 | +| approx_kl | 0.034 | +| entropy_loss | -5.85 | +| policy_loss | -0.00954 | +| value_loss | 0.126 | | stat/ | | -| constraint_violation | 1.76e+03 | -| ep_constraint_vio... | 0.4 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 147 | -| ep_reward | 0.589 | +| ep_return | 193 | +| ep_reward | 0.773 | | stat_eval/ | | -| constraint_violation | 2.1 | +| constraint_violation | 1.5 | | ep_length | 226 | -| ep_return | 131 | -| ep_reward | 0.526 | -| mse | 215 | +| ep_return | 152 | +| ep_reward | 0.609 | +| mse | 196 | | time/ | | | progress | 0.72 | | step | 7.2e+05 | -| step_time | 9.32 | +| step_time | 15.5 | -------------------------------------- -2023-10-19 17:24:36,412 : Eval | ep_lengths 202.60 +/- 94.84 | ep_return 120.178 +/- 64.602 -2023-10-19 17:24:36,414 : +2023-10-27 20:07:04,345 : Eval | ep_lengths 226.80 +/- 69.60 | ep_return 162.274 +/- 62.759 +2023-10-27 20:07:04,347 : -------------------------------------- | loss/ | | -| approx_kl | 0.0321 | -| entropy_loss | -5.87 | -| policy_loss | -0.00341 | -| value_loss | 0.692 | +| approx_kl | 0.0243 | +| entropy_loss | -5.89 | +| policy_loss | -0.0106 | +| value_loss | 0.155 | | stat/ | | -| constraint_violation | 1.77e+03 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 158 | -| ep_reward | 0.633 | +| ep_return | 200 | +| ep_reward | 0.801 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 203 | -| ep_return | 120 | -| ep_reward | 0.489 | -| mse | 244 | +| constraint_violation | 0.1 | +| ep_length | 227 | +| ep_return | 162 | +| ep_reward | 0.649 | +| mse | 158 | | time/ | | | progress | 0.73 | | step | 7.3e+05 | -| step_time | 10.8 | +| step_time | 14 | -------------------------------------- -2023-10-19 17:26:45,252 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.420 +/- 29.311 -2023-10-19 17:26:45,253 : +2023-10-27 20:09:57,254 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.310 +/- 24.919 +2023-10-27 20:09:57,255 : -------------------------------------- | loss/ | | -| approx_kl | 0.0251 | -| entropy_loss | -5.9 | -| policy_loss | -0.0137 | -| value_loss | 0.823 | +| approx_kl | 0.0196 | +| entropy_loss | -5.93 | +| policy_loss | -0.018 | +| value_loss | 0.124 | | stat/ | | -| constraint_violation | 1.77e+03 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 155 | -| ep_reward | 0.622 | +| ep_return | 202 | +| ep_reward | 0.807 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 155 | -| ep_reward | 0.622 | -| mse | 263 | +| ep_return | 154 | +| ep_reward | 0.617 | +| mse | 252 | | time/ | | | progress | 0.74 | | step | 7.4e+05 | -| step_time | 10.8 | +| step_time | 13.1 | -------------------------------------- -2023-10-19 17:28:54,284 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.423 +/- 18.436 -2023-10-19 17:28:54,285 : +2023-10-27 20:12:47,275 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 143.450 +/- 52.749 +2023-10-27 20:12:47,277 : -------------------------------------- | loss/ | | -| approx_kl | 0.0299 | -| entropy_loss | -5.89 | -| policy_loss | -0.00737 | -| value_loss | 0.366 | +| approx_kl | 0.0276 | +| entropy_loss | -5.98 | +| policy_loss | -0.0167 | +| value_loss | 0.166 | | stat/ | | -| constraint_violation | 1.81e+03 | -| ep_constraint_vio... | 1.4 | -| ep_length | 226 | -| ep_return | 139 | -| ep_reward | 0.558 | -| stat_eval/ | | -| constraint_violation | 0.4 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.614 | -| mse | 268 | +| ep_return | 189 | +| ep_reward | 0.756 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 143 | +| ep_reward | 0.58 | +| mse | 226 | | time/ | | | progress | 0.75 | | step | 7.5e+05 | -| step_time | 9.92 | +| step_time | 13.4 | -------------------------------------- -2023-10-19 17:30:56,718 : Eval | ep_lengths 176.90 +/- 111.73 | ep_return 102.451 +/- 68.383 -2023-10-19 17:30:56,719 : +2023-10-27 20:15:35,400 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 140.931 +/- 56.684 +2023-10-27 20:15:35,401 : -------------------------------------- | loss/ | | -| approx_kl | 0.025 | +| approx_kl | 0.0307 | | entropy_loss | -5.95 | -| policy_loss | -0.0203 | -| value_loss | 0.524 | +| policy_loss | 0.00153 | +| value_loss | 0.25 | | stat/ | | -| constraint_violation | 1.81e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 143 | -| ep_reward | 0.576 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 199 | +| ep_reward | 0.796 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 177 | -| ep_return | 102 | -| ep_reward | 0.414 | -| mse | 227 | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 141 | +| ep_reward | 0.564 | +| mse | 243 | | time/ | | | progress | 0.76 | | step | 7.6e+05 | -| step_time | 10.5 | +| step_time | 15.7 | -------------------------------------- -2023-10-19 17:33:03,397 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.316 +/- 18.280 -2023-10-19 17:33:03,398 : +2023-10-27 20:18:24,796 : Eval | ep_lengths 201.30 +/- 97.41 | ep_return 133.011 +/- 67.682 +2023-10-27 20:18:24,797 : -------------------------------------- | loss/ | | -| approx_kl | 0.0281 | -| entropy_loss | -5.95 | -| policy_loss | -0.0102 | -| value_loss | 0.459 | +| approx_kl | 0.0181 | +| entropy_loss | -6.1 | +| policy_loss | -0.0145 | +| value_loss | 0.149 | | stat/ | | -| constraint_violation | 1.82e+03 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 154 | -| ep_reward | 0.616 | +| ep_return | 185 | +| ep_reward | 0.741 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 154 | -| ep_reward | 0.617 | -| mse | 216 | +| constraint_violation | 0.2 | +| ep_length | 201 | +| ep_return | 133 | +| ep_reward | 0.536 | +| mse | 140 | | time/ | | | progress | 0.77 | | step | 7.7e+05 | -| step_time | 10.2 | +| step_time | 15.5 | -------------------------------------- -2023-10-19 17:35:08,424 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.363 +/- 21.839 -2023-10-19 17:35:08,425 : +2023-10-27 20:21:20,078 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 158.977 +/- 62.530 +2023-10-27 20:21:20,079 : -------------------------------------- | loss/ | | -| approx_kl | 0.0247 | -| entropy_loss | -5.94 | -| policy_loss | -0.0188 | -| value_loss | 0.455 | +| approx_kl | 0.0267 | +| entropy_loss | -6.09 | +| policy_loss | -0.00512 | +| value_loss | 0.208 | | stat/ | | -| constraint_violation | 1.83e+03 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 164 | -| ep_reward | 0.655 | +| ep_return | 195 | +| ep_reward | 0.781 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 250 | -| ep_return | 146 | -| ep_reward | 0.585 | -| mse | 282 | +| constraint_violation | 1.5 | +| ep_length | 225 | +| ep_return | 159 | +| ep_reward | 0.644 | +| mse | 195 | | time/ | | | progress | 0.78 | | step | 7.8e+05 | -| step_time | 9.45 | +| step_time | 13.9 | -------------------------------------- -2023-10-19 17:37:12,463 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.649 +/- 24.375 -2023-10-19 17:37:12,465 : +2023-10-27 20:24:11,056 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 147.405 +/- 52.066 +2023-10-27 20:24:11,058 : -------------------------------------- | loss/ | | -| approx_kl | 0.0157 | -| entropy_loss | -5.97 | -| policy_loss | -0.0154 | -| value_loss | 0.359 | +| approx_kl | 0.0274 | +| entropy_loss | -6.13 | +| policy_loss | -0.012 | +| value_loss | 0.0876 | | stat/ | | -| constraint_violation | 1.84e+03 | -| ep_constraint_vio... | 0.8 | -| ep_length | 225 | -| ep_return | 151 | -| ep_reward | 0.606 | -| stat_eval/ | | -| constraint_violation | 1.4 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | +| ep_return | 187 | +| ep_reward | 0.75 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 226 | | ep_return | 147 | -| ep_reward | 0.587 | -| mse | 334 | +| ep_reward | 0.592 | +| mse | 193 | | time/ | | | progress | 0.79 | | step | 7.9e+05 | -| step_time | 10.4 | +| step_time | 13 | -------------------------------------- -2023-10-19 17:39:14,644 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 153.838 +/- 58.651 -2023-10-19 17:39:14,645 : +2023-10-27 20:27:08,929 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.728 +/- 25.118 +2023-10-27 20:27:08,931 : -------------------------------------- | loss/ | | -| approx_kl | 0.0368 | -| entropy_loss | -6.04 | -| policy_loss | -0.0119 | -| value_loss | 0.578 | +| approx_kl | 0.0236 | +| entropy_loss | -6.18 | +| policy_loss | -0.00695 | +| value_loss | 0.19 | | stat/ | | -| constraint_violation | 1.88e+03 | -| ep_constraint_vio... | 1.6 | -| ep_length | 201 | -| ep_return | 127 | -| ep_reward | 0.511 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 188 | +| ep_reward | 0.752 | | stat_eval/ | | -| constraint_violation | 0.6 | -| ep_length | 226 | +| constraint_violation | 0 | +| ep_length | 250 | | ep_return | 154 | -| ep_reward | 0.618 | -| mse | 203 | +| ep_reward | 0.615 | +| mse | 294 | | time/ | | | progress | 0.8 | | step | 8e+05 | -| step_time | 10.4 | +| step_time | 14.8 | -------------------------------------- -2023-10-19 17:41:17,423 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 143.107 +/- 56.350 -2023-10-19 17:41:17,424 : +2023-10-27 20:30:02,467 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 170.327 +/- 23.502 +2023-10-27 20:30:02,468 : -------------------------------------- | loss/ | | -| approx_kl | 0.0246 | -| entropy_loss | -6.07 | -| policy_loss | -0.0106 | -| value_loss | 0.88 | +| approx_kl | 0.0308 | +| entropy_loss | -6.24 | +| policy_loss | -0.0162 | +| value_loss | 0.227 | | stat/ | | -| constraint_violation | 1.9e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 141 | -| ep_reward | 0.568 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 199 | +| ep_reward | 0.797 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 143 | -| ep_reward | 0.574 | -| mse | 217 | +| ep_length | 250 | +| ep_return | 170 | +| ep_reward | 0.681 | +| mse | 189 | | time/ | | | progress | 0.81 | | step | 8.1e+05 | -| step_time | 10.6 | +| step_time | 13.2 | -------------------------------------- -2023-10-19 17:43:19,521 : Eval | ep_lengths 225.50 +/- 73.50 | ep_return 151.916 +/- 57.316 -2023-10-19 17:43:19,522 : +2023-10-27 20:32:54,543 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 181.631 +/- 24.143 +2023-10-27 20:32:54,545 : -------------------------------------- | loss/ | | -| approx_kl | 0.036 | -| entropy_loss | -6.06 | -| policy_loss | -0.0222 | -| value_loss | 0.431 | +| approx_kl | 0.023 | +| entropy_loss | -6.27 | +| policy_loss | -0.00766 | +| value_loss | 0.239 | | stat/ | | -| constraint_violation | 1.91e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 135 | -| ep_reward | 0.539 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 198 | +| ep_reward | 0.793 | | stat_eval/ | | -| constraint_violation | 1.7 | -| ep_length | 226 | -| ep_return | 152 | -| ep_reward | 0.608 | -| mse | 189 | +| constraint_violation | 0.7 | +| ep_length | 250 | +| ep_return | 182 | +| ep_reward | 0.727 | +| mse | 139 | | time/ | | | progress | 0.82 | | step | 8.2e+05 | -| step_time | 10.4 | +| step_time | 14.4 | -------------------------------------- -2023-10-19 17:45:20,457 : Eval | ep_lengths 200.40 +/- 99.20 | ep_return 134.563 +/- 72.319 -2023-10-19 17:45:20,458 : +2023-10-27 20:35:47,444 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 174.844 +/- 33.145 +2023-10-27 20:35:47,445 : -------------------------------------- | loss/ | | -| approx_kl | 0.0104 | -| entropy_loss | -6.14 | -| policy_loss | -0.0243 | -| value_loss | 1.44 | +| approx_kl | 0.0195 | +| entropy_loss | -6.3 | +| policy_loss | -0.0214 | +| value_loss | 0.191 | | stat/ | | -| constraint_violation | 1.95e+03 | -| ep_constraint_vio... | 1.1 | -| ep_length | 226 | -| ep_return | 153 | -| ep_reward | 0.614 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 190 | +| ep_reward | 0.762 | | stat_eval/ | | -| constraint_violation | 0.5 | -| ep_length | 200 | -| ep_return | 135 | -| ep_reward | 0.568 | -| mse | 141 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 175 | +| ep_reward | 0.699 | +| mse | 213 | | time/ | | | progress | 0.83 | | step | 8.3e+05 | -| step_time | 10.3 | +| step_time | 13.4 | -------------------------------------- -2023-10-19 17:47:22,521 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 140.106 +/- 51.786 -2023-10-19 17:47:22,522 : +2023-10-27 20:38:36,578 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 169.916 +/- 29.655 +2023-10-27 20:38:36,580 : -------------------------------------- | loss/ | | -| approx_kl | 0.035 | -| entropy_loss | -6.13 | -| policy_loss | -0.0156 | -| value_loss | 1.58 | +| approx_kl | 0.0245 | +| entropy_loss | -6.33 | +| policy_loss | -0.0169 | +| value_loss | 0.175 | | stat/ | | -| constraint_violation | 1.95e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 227 | -| ep_return | 135 | -| ep_reward | 0.542 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 195 | +| ep_reward | 0.779 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 140 | -| ep_reward | 0.561 | -| mse | 234 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 170 | +| ep_reward | 0.68 | +| mse | 200 | | time/ | | | progress | 0.84 | | step | 8.4e+05 | -| step_time | 10.5 | +| step_time | 13.1 | -------------------------------------- -2023-10-19 17:49:26,991 : Eval | ep_lengths 226.10 +/- 71.70 | ep_return 145.999 +/- 52.341 -2023-10-19 17:49:26,992 : +2023-10-27 20:41:14,220 : Eval | ep_lengths 151.60 +/- 120.52 | ep_return 101.140 +/- 83.982 +2023-10-27 20:41:14,221 : -------------------------------------- | loss/ | | -| approx_kl | 0.0334 | -| entropy_loss | -6.13 | -| policy_loss | -0.0113 | -| value_loss | 0.663 | +| approx_kl | 0.019 | +| entropy_loss | -6.42 | +| policy_loss | -0.00785 | +| value_loss | 0.465 | | stat/ | | -| constraint_violation | 1.96e+03 | -| ep_constraint_vio... | 0.9 | -| ep_length | 226 | -| ep_return | 138 | -| ep_reward | 0.551 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 198 | +| ep_reward | 0.792 | | stat_eval/ | | -| constraint_violation | 1.7 | -| ep_length | 226 | -| ep_return | 146 | -| ep_reward | 0.587 | -| mse | 195 | +| constraint_violation | 0.4 | +| ep_length | 152 | +| ep_return | 101 | +| ep_reward | 0.413 | +| mse | 105 | | time/ | | | progress | 0.85 | | step | 8.5e+05 | -| step_time | 9.87 | +| step_time | 13.8 | -------------------------------------- -2023-10-19 17:51:32,785 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 177.098 +/- 26.506 -2023-10-19 17:51:32,786 : +2023-10-27 20:44:09,453 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.488 +/- 36.464 +2023-10-27 20:44:09,455 : -------------------------------------- | loss/ | | -| approx_kl | 0.0159 | -| entropy_loss | -6.12 | -| policy_loss | -0.013 | -| value_loss | 0.81 | +| approx_kl | 0.0256 | +| entropy_loss | -6.49 | +| policy_loss | -0.0152 | +| value_loss | 0.177 | | stat/ | | -| constraint_violation | 1.99e+03 | +| constraint_violation | 5 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 165 | -| ep_reward | 0.66 | +| ep_return | 196 | +| ep_reward | 0.782 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 177 | -| ep_reward | 0.708 | -| mse | 147 | +| ep_return | 153 | +| ep_reward | 0.614 | +| mse | 336 | | time/ | | | progress | 0.86 | | step | 8.6e+05 | -| step_time | 10.5 | +| step_time | 14.5 | -------------------------------------- -2023-10-19 17:53:34,178 : Eval | ep_lengths 202.00 +/- 96.00 | ep_return 129.614 +/- 70.644 -2023-10-19 17:53:34,179 : +2023-10-27 20:46:58,010 : Eval | ep_lengths 201.70 +/- 96.61 | ep_return 136.617 +/- 73.640 +2023-10-27 20:46:58,011 : -------------------------------------- | loss/ | | -| approx_kl | 0.0197 | -| entropy_loss | -6.2 | -| policy_loss | -0.0129 | -| value_loss | 4.91 | +| approx_kl | 0.0161 | +| entropy_loss | -6.43 | +| policy_loss | -0.0183 | +| value_loss | 0.435 | | stat/ | | -| constraint_violation | 2.03e+03 | -| ep_constraint_vio... | 1.8 | -| ep_length | 201 | -| ep_return | 131 | -| ep_reward | 0.536 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 207 | +| ep_reward | 0.827 | | stat_eval/ | | -| constraint_violation | 0.4 | +| constraint_violation | 1.3 | | ep_length | 202 | -| ep_return | 130 | -| ep_reward | 0.52 | -| mse | 203 | +| ep_return | 137 | +| ep_reward | 0.547 | +| mse | 206 | | time/ | | | progress | 0.87 | | step | 8.7e+05 | -| step_time | 10.5 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 17:55:37,082 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 142.067 +/- 56.162 -2023-10-19 17:55:37,083 : +2023-10-27 20:49:41,719 : Eval | ep_lengths 201.60 +/- 96.82 | ep_return 132.661 +/- 70.650 +2023-10-27 20:49:41,721 : -------------------------------------- | loss/ | | -| approx_kl | 0.0293 | -| entropy_loss | -6.18 | -| policy_loss | -0.00795 | -| value_loss | 0.664 | +| approx_kl | 0.023 | +| entropy_loss | -6.39 | +| policy_loss | -0.017 | +| value_loss | 0.164 | | stat/ | | -| constraint_violation | 2.03e+03 | +| constraint_violation | 5 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 157 | -| ep_reward | 0.626 | +| ep_return | 198 | +| ep_reward | 0.791 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 226 | -| ep_return | 142 | -| ep_reward | 0.569 | -| mse | 235 | +| ep_length | 202 | +| ep_return | 133 | +| ep_reward | 0.532 | +| mse | 194 | | time/ | | | progress | 0.88 | | step | 8.8e+05 | -| step_time | 10.4 | +| step_time | 13.4 | -------------------------------------- -2023-10-19 17:57:39,155 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 142.869 +/- 54.331 -2023-10-19 17:57:39,156 : +2023-10-27 20:52:32,250 : Eval | ep_lengths 226.10 +/- 71.70 | ep_return 156.260 +/- 59.495 +2023-10-27 20:52:32,252 : -------------------------------------- | loss/ | | -| approx_kl | 0.024 | -| entropy_loss | -6.21 | -| policy_loss | -0.00813 | -| value_loss | 0.306 | +| approx_kl | 0.0227 | +| entropy_loss | -6.38 | +| policy_loss | -0.0154 | +| value_loss | 0.136 | | stat/ | | -| constraint_violation | 2.05e+03 | -| ep_constraint_vio... | 1.2 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 154 | -| ep_reward | 0.616 | +| ep_return | 190 | +| ep_reward | 0.761 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | -| ep_return | 143 | -| ep_reward | 0.572 | -| mse | 228 | +| constraint_violation | 0.3 | +| ep_length | 226 | +| ep_return | 156 | +| ep_reward | 0.625 | +| mse | 198 | | time/ | | | progress | 0.89 | | step | 8.9e+05 | -| step_time | 10.2 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 17:59:43,110 : Eval | ep_lengths 226.90 +/- 69.30 | ep_return 146.746 +/- 55.163 -2023-10-19 17:59:43,111 : +2023-10-27 20:55:22,285 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 149.576 +/- 56.423 +2023-10-27 20:55:22,287 : -------------------------------------- | loss/ | | -| approx_kl | 0.0234 | -| entropy_loss | -6.2 | -| policy_loss | -0.0105 | -| value_loss | 1.77 | +| approx_kl | 0.0294 | +| entropy_loss | -6.36 | +| policy_loss | -0.012 | +| value_loss | 0.301 | | stat/ | | -| constraint_violation | 2.06e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 144 | -| ep_reward | 0.577 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 199 | +| ep_reward | 0.796 | | stat_eval/ | | -| constraint_violation | 0.4 | -| ep_length | 227 | -| ep_return | 147 | -| ep_reward | 0.587 | -| mse | 209 | +| constraint_violation | 1.4 | +| ep_length | 226 | +| ep_return | 150 | +| ep_reward | 0.599 | +| mse | 222 | | time/ | | | progress | 0.9 | | step | 9e+05 | -| step_time | 10.7 | +| step_time | 13.4 | -------------------------------------- -2023-10-19 18:01:44,742 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 151.757 +/- 56.343 -2023-10-19 18:01:44,744 : +2023-10-27 20:58:12,481 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 154.493 +/- 56.934 +2023-10-27 20:58:12,483 : -------------------------------------- | loss/ | | -| approx_kl | 0.0225 | -| entropy_loss | -6.26 | -| policy_loss | -0.0149 | -| value_loss | 1.17 | +| approx_kl | 0.0334 | +| entropy_loss | -6.37 | +| policy_loss | -0.0104 | +| value_loss | 0.0774 | | stat/ | | -| constraint_violation | 2.08e+03 | +| constraint_violation | 6 | | ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 144 | -| ep_reward | 0.575 | +| ep_length | 250 | +| ep_return | 197 | +| ep_reward | 0.787 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 152 | -| ep_reward | 0.608 | -| mse | 201 | +| ep_length | 227 | +| ep_return | 154 | +| ep_reward | 0.619 | +| mse | 217 | | time/ | | | progress | 0.91 | | step | 9.1e+05 | -| step_time | 8.84 | +| step_time | 14.6 | -------------------------------------- -2023-10-19 18:03:45,042 : Eval | ep_lengths 226.70 +/- 69.90 | ep_return 142.666 +/- 52.600 -2023-10-19 18:03:45,043 : +2023-10-27 21:01:00,696 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 171.001 +/- 60.803 +2023-10-27 21:01:00,698 : -------------------------------------- | loss/ | | -| approx_kl | 0.0238 | -| entropy_loss | -6.38 | -| policy_loss | -0.00963 | -| value_loss | 1.54 | +| approx_kl | 0.0117 | +| entropy_loss | -6.36 | +| policy_loss | -0.0168 | +| value_loss | 0.204 | | stat/ | | -| constraint_violation | 2.1e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 158 | -| ep_reward | 0.636 | +| constraint_violation | 6 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 199 | +| ep_reward | 0.795 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 227 | -| ep_return | 143 | -| ep_reward | 0.572 | -| mse | 204 | +| constraint_violation | 1.4 | +| ep_length | 225 | +| ep_return | 171 | +| ep_reward | 0.685 | +| mse | 103 | | time/ | | | progress | 0.92 | | step | 9.2e+05 | -| step_time | 10.4 | +| step_time | 15 | -------------------------------------- -2023-10-19 18:05:42,711 : Eval | ep_lengths 202.20 +/- 95.67 | ep_return 116.571 +/- 63.157 -2023-10-19 18:05:42,712 : +2023-10-27 21:03:53,292 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 173.052 +/- 22.509 +2023-10-27 21:03:53,293 : -------------------------------------- | loss/ | | -| approx_kl | 0.0259 | -| entropy_loss | -6.44 | -| policy_loss | -0.0222 | -| value_loss | 1.3 | +| approx_kl | 0.0289 | +| entropy_loss | -6.39 | +| policy_loss | -0.00325 | +| value_loss | 0.721 | | stat/ | | -| constraint_violation | 2.11e+03 | -| ep_constraint_vio... | 0.1 | +| constraint_violation | 6 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 169 | -| ep_reward | 0.674 | +| ep_return | 194 | +| ep_reward | 0.776 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 202 | -| ep_return | 117 | -| ep_reward | 0.467 | -| mse | 250 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 173 | +| ep_reward | 0.692 | +| mse | 178 | | time/ | | | progress | 0.93 | | step | 9.3e+05 | -| step_time | 9.67 | +| step_time | 13.6 | -------------------------------------- -2023-10-19 18:07:44,170 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.776 +/- 24.142 -2023-10-19 18:07:44,171 : +2023-10-27 21:06:45,670 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 165.361 +/- 29.156 +2023-10-27 21:06:45,672 : -------------------------------------- | loss/ | | -| approx_kl | 0.0262 | -| entropy_loss | -6.43 | -| policy_loss | -0.0176 | -| value_loss | 1.38 | +| approx_kl | 0.0325 | +| entropy_loss | -6.47 | +| policy_loss | -0.00508 | +| value_loss | 0.118 | | stat/ | | -| constraint_violation | 2.13e+03 | +| constraint_violation | 7 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 162 | -| ep_reward | 0.649 | +| ep_return | 189 | +| ep_reward | 0.756 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 155 | -| ep_reward | 0.619 | -| mse | 282 | +| ep_return | 165 | +| ep_reward | 0.661 | +| mse | 239 | | time/ | | | progress | 0.94 | | step | 9.4e+05 | -| step_time | 10.4 | +| step_time | 15.3 | -------------------------------------- -2023-10-19 18:09:43,439 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 150.580 +/- 24.492 -2023-10-19 18:09:43,440 : +2023-10-27 21:09:33,084 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 161.062 +/- 30.792 +2023-10-27 21:09:33,085 : -------------------------------------- | loss/ | | -| approx_kl | 0.0287 | -| entropy_loss | -6.4 | -| policy_loss | -0.0103 | -| value_loss | 0.849 | +| approx_kl | 0.0281 | +| entropy_loss | -6.48 | +| policy_loss | -0.0132 | +| value_loss | 0.115 | | stat/ | | -| constraint_violation | 2.14e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 146 | -| ep_reward | 0.587 | +| constraint_violation | 7 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 196 | +| ep_reward | 0.783 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 151 | -| ep_reward | 0.602 | -| mse | 298 | +| ep_return | 161 | +| ep_reward | 0.644 | +| mse | 304 | | time/ | | | progress | 0.95 | | step | 9.5e+05 | -| step_time | 9.61 | +| step_time | 13.2 | -------------------------------------- -2023-10-19 18:11:45,256 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 157.378 +/- 26.674 -2023-10-19 18:11:45,257 : +2023-10-27 21:12:26,282 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 176.820 +/- 25.097 +2023-10-27 21:12:26,283 : -------------------------------------- | loss/ | | -| approx_kl | 0.0291 | -| entropy_loss | -6.45 | -| policy_loss | -0.0103 | -| value_loss | 0.98 | +| approx_kl | 0.0331 | +| entropy_loss | -6.53 | +| policy_loss | -0.00443 | +| value_loss | 0.1 | | stat/ | | -| constraint_violation | 2.18e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 145 | -| ep_reward | 0.581 | +| constraint_violation | 7 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 194 | +| ep_reward | 0.776 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 157 | -| ep_reward | 0.63 | -| mse | 227 | +| ep_return | 177 | +| ep_reward | 0.707 | +| mse | 189 | | time/ | | | progress | 0.96 | | step | 9.6e+05 | -| step_time | 9.61 | +| step_time | 13.9 | -------------------------------------- -2023-10-19 18:13:45,775 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 170.626 +/- 24.142 -2023-10-19 18:13:45,776 : +2023-10-27 21:15:11,819 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 148.629 +/- 53.210 +2023-10-27 21:15:11,820 : -------------------------------------- | loss/ | | -| approx_kl | 0.0168 | -| entropy_loss | -6.44 | -| policy_loss | -0.00466 | -| value_loss | 0.589 | +| approx_kl | 0.0228 | +| entropy_loss | -6.46 | +| policy_loss | -0.00671 | +| value_loss | 0.12 | | stat/ | | -| constraint_violation | 2.18e+03 | +| constraint_violation | 7 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 164 | -| ep_reward | 0.657 | +| ep_return | 203 | +| ep_reward | 0.814 | | stat_eval/ | | -| constraint_violation | 1 | -| ep_length | 250 | -| ep_return | 171 | -| ep_reward | 0.683 | -| mse | 163 | +| constraint_violation | 1.7 | +| ep_length | 225 | +| ep_return | 149 | +| ep_reward | 0.595 | +| mse | 237 | | time/ | | | progress | 0.97 | | step | 9.7e+05 | -| step_time | 8.95 | +| step_time | 13.3 | -------------------------------------- -2023-10-19 18:15:42,379 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 134.072 +/- 50.971 -2023-10-19 18:15:42,380 : +2023-10-27 21:17:59,599 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 181.449 +/- 23.456 +2023-10-27 21:17:59,601 : -------------------------------------- | loss/ | | -| approx_kl | 0.0216 | -| entropy_loss | -6.46 | -| policy_loss | -0.0168 | -| value_loss | 0.779 | +| approx_kl | 0.037 | +| entropy_loss | -6.39 | +| policy_loss | -0.0172 | +| value_loss | 0.092 | | stat/ | | -| constraint_violation | 2.21e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 124 | -| ep_reward | 0.497 | +| constraint_violation | 7 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 194 | +| ep_reward | 0.778 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 225 | -| ep_return | 134 | -| ep_reward | 0.536 | -| mse | 220 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 181 | +| ep_reward | 0.726 | +| mse | 161 | | time/ | | | progress | 0.98 | | step | 9.8e+05 | -| step_time | 9.69 | +| step_time | 13.7 | -------------------------------------- -2023-10-19 18:17:41,202 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 174.635 +/- 13.036 -2023-10-19 18:17:41,203 : +2023-10-27 21:20:43,746 : Eval | ep_lengths 226.70 +/- 69.90 | ep_return 170.973 +/- 58.609 +2023-10-27 21:20:43,747 : -------------------------------------- | loss/ | | -| approx_kl | 0.0237 | +| approx_kl | 0.0253 | | entropy_loss | -6.38 | -| policy_loss | -0.0158 | -| value_loss | 1.76 | +| policy_loss | -0.00133 | +| value_loss | 0.0609 | | stat/ | | -| constraint_violation | 2.23e+03 | -| ep_constraint_vio... | 0.5 | +| constraint_violation | 7 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 162 | -| ep_reward | 0.647 | +| ep_return | 204 | +| ep_reward | 0.815 | | stat_eval/ | | -| constraint_violation | 0.9 | -| ep_length | 250 | -| ep_return | 175 | -| ep_reward | 0.699 | -| mse | 141 | +| constraint_violation | 0.4 | +| ep_length | 227 | +| ep_return | 171 | +| ep_reward | 0.692 | +| mse | 98 | | time/ | | | progress | 0.99 | | step | 9.9e+05 | -| step_time | 10.3 | +| step_time | 14 | -------------------------------------- -2023-10-19 18:19:16,405 : Checkpoint | ./models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/model_latest.pt -2023-10-19 18:19:36,149 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 138.365 +/- 53.129 -2023-10-19 18:19:36,150 : +2023-10-27 21:23:10,964 : Checkpoint | ./models/rl_models/quadrotor_3D/track/ppo/mpsf_sr/model_latest.pt +2023-10-27 21:23:39,769 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 159.382 +/- 33.899 +2023-10-27 21:23:39,771 : -------------------------------------- | loss/ | | -| approx_kl | 0.0314 | -| entropy_loss | -6.44 | -| policy_loss | -0.0121 | -| value_loss | 0.695 | +| approx_kl | 0.0278 | +| entropy_loss | -6.38 | +| policy_loss | -0.0205 | +| value_loss | 0.0528 | | stat/ | | -| constraint_violation | 2.27e+03 | -| ep_constraint_vio... | 1.7 | +| constraint_violation | 7 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.611 | +| ep_return | 191 | +| ep_reward | 0.763 | | stat_eval/ | | -| constraint_violation | 0.7 | -| ep_length | 225 | -| ep_return | 138 | -| ep_reward | 0.554 | -| mse | 226 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 159 | +| ep_reward | 0.638 | +| mse | 337 | | time/ | | | progress | 1 | | step | 1e+06 | -| step_time | 9.14 | +| step_time | 14.2 | -------------------------------------- diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/loss/approx_kl.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/loss/approx_kl.log index 8fc3c9655..57e6626c0 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/loss/approx_kl.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/loss/approx_kl.log @@ -1,101 +1,101 @@ step,loss/approx_kl -10000,0.0093761728455623 -20000,0.01987679941424479 -30000,0.01486463608064999 -40000,0.013391152458886307 -50000,0.02107272259114931 -60000,0.013506214336181682 -70000,0.029410089280766744 -80000,0.015823704019809766 -90000,0.019913034156585734 -100000,0.014791751342515152 -110000,0.010593784926459193 -120000,0.025862824358046054 -130000,0.023685332232465345 -140000,0.025422214111313225 -150000,0.027255243373413878 -160000,0.03160991783564289 -170000,0.02108971644192934 -180000,0.017741225489104787 -190000,0.03198372142699858 -200000,0.02684378470294178 -210000,0.026940228743478657 -220000,0.02138833945306639 -230000,0.023123973514884714 -240000,0.02085371456729869 -250000,0.023844594694674016 -260000,0.03327609168676038 -270000,0.010866276050607363 -280000,0.022506571700796483 -290000,0.03096077187607686 -300000,0.027054767046744626 -310000,0.027249302047615248 -320000,0.023663806170225142 -330000,0.02494900724850595 -340000,0.015978123977159458 -350000,0.021923863189294932 -360000,0.019547735278805094 -370000,0.021265926087896028 -380000,0.014727905501301089 -390000,0.030305806795756028 -400000,0.026978912064805627 -410000,0.017083235193664826 -420000,0.026365720961863792 -430000,0.035183636114622156 -440000,0.0325952288073798 -450000,0.03974219604084889 -460000,0.03562526850340267 -470000,0.015893360584353405 -480000,0.01806016478997966 -490000,0.025034441146999598 -500000,0.023944977841650443 -510000,0.025254990331207712 -520000,0.017323377837116523 -530000,0.032143193762749434 -540000,0.02643524326073627 -550000,0.02910492598700027 -560000,0.012879214234029254 -570000,0.034773330452541516 -580000,0.010448555648326873 -590000,0.019137029070407156 -600000,0.02863503483434518 -610000,0.018844127872337895 -620000,0.028530985768884416 -630000,0.030602573323994874 -640000,0.024800967269887526 -650000,0.026732811890542506 -660000,0.013594790020336709 -670000,0.019461650463442007 -680000,0.014471999648958445 -690000,0.024828640868266423 -700000,0.022349168080836535 -710000,0.02978917866324384 -720000,0.02917973517129819 -730000,0.026661281380802388 -740000,0.02189814010635018 -750000,0.029756025473276777 -760000,0.015155136305838823 -770000,0.0360929073455433 -780000,0.016999793797731397 -790000,0.020750543444106973 -800000,0.02726367212211092 -810000,0.01674145155896743 -820000,0.022765236285825566 -830000,0.02863887116933862 -840000,0.020159625013669323 -850000,0.019409959825376667 -860000,0.019414858271678284 -870000,0.035184234566986564 -880000,0.03442875832940141 -890000,0.020626786692688866 -900000,0.02477550115436316 -910000,0.03091897564008832 -920000,0.016422046969334282 -930000,0.02293549239014586 -940000,0.013255421910434958 -950000,0.00940948560213049 -960000,0.028025341002891464 -970000,0.026269829645752907 -980000,0.027919603946308297 -990000,0.022075120421747366 -1000000,0.014755425074448186 +10000,0.024409892084077 +20000,0.028237653461595368 +30000,0.02192816279518108 +40000,0.018913381608823936 +50000,0.015509734186343849 +60000,0.022961538385910286 +70000,0.022704355162568392 +80000,0.016238091808433334 +90000,0.03782293587767829 +100000,0.02100219374988228 +110000,0.02141121392293523 +120000,0.025614972536762558 +130000,0.02867369195446372 +140000,0.030952776844302816 +150000,0.03674589168901245 +160000,0.030759305274114006 +170000,0.018543492428337534 +180000,0.02311528446152806 +190000,0.021217896440066396 +200000,0.031230667388687527 +210000,0.024121561770637834 +220000,0.020747226802632218 +230000,0.01734344883201023 +240000,0.016991637321189045 +250000,0.028224200305218494 +260000,0.027590600556383533 +270000,0.030194483169664943 +280000,0.026444100650648276 +290000,0.024258410170053445 +300000,0.01707531190477312 +310000,0.02468934141409894 +320000,0.027207040321081878 +330000,0.018457489802191657 +340000,0.02312239736008147 +350000,0.027141780639067296 +360000,0.026339493893707794 +370000,0.024537863334019982 +380000,0.036668536920721334 +390000,0.02940338659100234 +400000,0.024564059373612206 +410000,0.03284425938812394 +420000,0.024926014539475246 +430000,0.03308476192566256 +440000,0.016210190594817202 +450000,0.021062924759462476 +460000,0.019168460213889675 +470000,0.026289241993799804 +480000,0.017310665796200435 +490000,0.021731064313401778 +500000,0.017607149745648108 +510000,0.027428330915669603 +520000,0.027461038576439018 +530000,0.02992199057092269 +540000,0.026007165961588426 +550000,0.023066577253242338 +560000,0.02526763162265221 +570000,0.03483185063426693 +580000,0.013407502152646581 +590000,0.03282775559152165 +600000,0.029201585892587905 +610000,0.02182137160561979 +620000,0.03182033404397468 +630000,0.027660583332180976 +640000,0.02759469039738179 +650000,0.03766682191441456 +660000,0.017401601265495024 +670000,0.032698770659044384 +680000,0.03184407480681936 +690000,0.028136210578183336 +700000,0.02837487780489028 +710000,0.018661029047022264 +720000,0.029546693712472916 +730000,0.02846863634573916 +740000,0.022014159010723233 +750000,0.030930140521377332 +760000,0.03236540835350752 +770000,0.020043571991845963 +780000,0.01499495853980382 +790000,0.025019547995179893 +800000,0.02685717586427927 +810000,0.022371310399224364 +820000,0.01806757709321876 +830000,0.020140187007685502 +840000,0.033228317042812705 +850000,0.02850924154433111 +860000,0.026548874114329612 +870000,0.020841597874338424 +880000,0.022953611534709737 +890000,0.028976444993168114 +900000,0.026199274836108087 +910000,0.022631567421679693 +920000,0.03397743349584441 +930000,0.022928095593427615 +940000,0.030529379984363912 +950000,0.031547902788346 +960000,0.014810181455686688 +970000,0.0253438250006487 +980000,0.032823247400422896 +990000,0.024108414972821873 +1000000,0.023505812184885143 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/loss/entropy_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/loss/entropy_loss.log index 3f19da416..5b33ddafc 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/loss/entropy_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/loss/entropy_loss.log @@ -1,101 +1,101 @@ step,loss/entropy_loss -10000,-3.68363006512324 -20000,-3.7480220635732016 -30000,-3.787853757540385 -40000,-3.8086586912473037 -50000,-3.9364279111226397 -60000,-3.98047864039739 -70000,-3.9357784509658815 -80000,-4.025039354960123 -90000,-4.175833106040955 -100000,-4.202658541997274 -110000,-4.281058303515117 -120000,-4.339184999465942 -130000,-4.335047340393066 -140000,-4.3225863138834635 -150000,-4.406381646792093 -160000,-4.427156750361124 -170000,-4.480426208178202 -180000,-4.5059504111607875 -190000,-4.477615841229756 -200000,-4.443895284334818 -210000,-4.562194927533467 -220000,-4.5653224388758336 -230000,-4.610391290982564 -240000,-4.63253960609436 -250000,-4.68098882039388 -260000,-4.692388796806336 -270000,-4.7651832580566404 -280000,-4.748435974121093 -290000,-4.74056047598521 -300000,-4.841680749257405 -310000,-4.845942735671997 -320000,-4.885269363721212 -330000,-4.8965121348698935 -340000,-5.006058589617412 -350000,-5.096441721916199 -360000,-5.181162118911743 -370000,-5.2072583119074505 -380000,-5.215379357337952 -390000,-5.284020566940308 -400000,-5.347121238708497 -410000,-5.4663443247477215 -420000,-5.487020842234294 -430000,-5.509136104583741 -440000,-5.559460671742757 -450000,-5.633924690882365 -460000,-5.675931859016418 -470000,-5.68299102783203 -480000,-5.777672870953878 -490000,-5.898061609268188 -500000,-5.835938008626302 -510000,-5.836973826090494 -520000,-5.838691902160645 -530000,-5.880524746576945 -540000,-5.921090173721313 -550000,-5.912160698572795 -560000,-5.95408852895101 -570000,-6.042551898956299 -580000,-6.081379493077596 -590000,-6.177493397394817 -600000,-6.168541661898296 -610000,-6.1304655313491825 -620000,-6.191504081090291 -630000,-6.255839951833089 -640000,-6.290773669878641 -650000,-6.38338680267334 -660000,-6.368849953015646 -670000,-6.403895258903503 -680000,-6.501446390151978 -690000,-6.589324863751729 -700000,-6.5923282384872435 -710000,-6.589562400182087 -720000,-6.668602418899536 -730000,-6.696658444404602 -740000,-6.768911695480346 -750000,-6.746953845024109 -760000,-6.783787099520365 -770000,-6.824056768417359 -780000,-6.823650527000426 -790000,-6.839295848210654 -800000,-6.8947913964589445 -810000,-6.9143599510192875 -820000,-6.914153560002644 -830000,-6.913018226623535 -840000,-6.934711543718974 -850000,-7.004758469263713 -860000,-6.9920307715733845 -870000,-7.0274018128712985 -880000,-7.139731184641521 -890000,-7.057124058405558 -900000,-7.072453411420186 -910000,-7.201297744115193 -920000,-7.243534715970357 -930000,-7.280937870343526 -940000,-7.267527516682942 -950000,-7.2921710809071865 -960000,-7.291986354192099 -970000,-7.411647796630859 -980000,-7.377316872278849 -990000,-7.333533628781636 -1000000,-7.42620926698049 +10000,-3.7493523716926567 +20000,-3.7743594328562424 +30000,-3.7895545800526937 +40000,-3.76532461643219 +50000,-3.7346468170483904 +60000,-3.7631733655929565 +70000,-3.7179981549580896 +80000,-3.806101067860921 +90000,-3.741804246107738 +100000,-3.751055085659027 +110000,-3.8219611167907717 +120000,-3.883691207567851 +130000,-3.8967724879582724 +140000,-3.9214674313863123 +150000,-3.903782423337301 +160000,-3.973929317792256 +170000,-4.010751930872599 +180000,-4.006652569770813 +190000,-3.9773014426231383 +200000,-3.9683888276418045 +210000,-4.101207367579142 +220000,-4.162781500816346 +230000,-4.112813838322958 +240000,-4.284387183189392 +250000,-4.280288529396057 +260000,-4.2667513211568195 +270000,-4.228996817270915 +280000,-4.271712724367777 +290000,-4.220339226722718 +300000,-4.211099529266358 +310000,-4.2414825121561694 +320000,-4.218381849924723 +330000,-4.217833058039347 +340000,-4.190550398826599 +350000,-4.16531895796458 +360000,-4.210639723141988 +370000,-4.202372495333353 +380000,-4.238751816749573 +390000,-4.218455187479655 +400000,-4.273488934834798 +410000,-4.314272888501486 +420000,-4.440344150861105 +430000,-4.419339116414387 +440000,-4.48504822254181 +450000,-4.465920694669087 +460000,-4.496744283040365 +470000,-4.4648193915685015 +480000,-4.490742484728496 +490000,-4.512978967030843 +500000,-4.51129759947459 +510000,-4.46029388109843 +520000,-4.573556637763977 +530000,-4.6092865864435835 +540000,-4.654390017191568 +550000,-4.679288299878438 +560000,-4.617477337519328 +570000,-4.701109258333842 +580000,-4.755168024698894 +590000,-4.754276625315348 +600000,-4.778586665789287 +610000,-4.788113967577617 +620000,-4.770361955960591 +630000,-4.77482251326243 +640000,-4.853216822942097 +650000,-4.8336175918579105 +660000,-4.853139932950338 +670000,-4.97396149635315 +680000,-4.989517331123352 +690000,-5.078101913134257 +700000,-5.086382651329041 +710000,-5.128715960184733 +720000,-5.155402080217998 +730000,-5.18179756005605 +740000,-5.196504147847493 +750000,-5.227960936228434 +760000,-5.247938132286071 +770000,-5.282938241958618 +780000,-5.348073275883992 +790000,-5.383229756355285 +800000,-5.475364232063294 +810000,-5.476680580774943 +820000,-5.485030142466226 +830000,-5.53146185874939 +840000,-5.540514636039734 +850000,-5.527553764979045 +860000,-5.535895427068075 +870000,-5.588350550333658 +880000,-5.583930452664694 +890000,-5.68324777285258 +900000,-5.706240502993266 +910000,-5.714211543401083 +920000,-5.775755310058594 +930000,-5.816256968180339 +940000,-5.821071743965149 +950000,-5.857654198010763 +960000,-5.87427655061086 +970000,-5.982048408190409 +980000,-6.032665379842122 +990000,-6.02129868666331 +1000000,-5.969874429702759 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/loss/policy_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/loss/policy_loss.log index 0da1b6850..bbaf2cbfe 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/loss/policy_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/loss/policy_loss.log @@ -1,101 +1,101 @@ step,loss/policy_loss -10000,-0.010205146481111836 -20000,-0.014132485961784855 -30000,-0.013134038465719158 -40000,-0.0059340420693484485 -50000,-0.01798283706333354 -60000,-0.020402643893777127 -70000,-0.00850798939879297 -80000,-0.0061907399944272756 -90000,-0.015415150279251097 -100000,-0.01712872703608421 -110000,-0.018194267692046098 -120000,-0.01636880438317139 -130000,-0.0149562629026879 -140000,-0.014969389000910419 -150000,-0.013323942942330459 -160000,-0.022997603129651732 -170000,-0.010268822995434653 -180000,-0.012552881963353722 -190000,-0.01568637644247284 -200000,-0.006385611808758076 -210000,-0.016829662260083447 -220000,-0.011320664710147421 -230000,-0.011845576111064446 -240000,-0.008949019930833555 -250000,-0.011991350860261587 -260000,-0.007955144529801426 -270000,-0.02384616166192676 -280000,-0.007115345338673179 -290000,-0.020531490469945055 -300000,0.005471568586666241 -310000,-0.014481016834651427 -320000,-0.005909015366510368 -330000,-0.015215430131506178 -340000,-0.018268491343880408 -350000,-0.001219565502744871 -360000,-0.0027002736753531415 -370000,-0.005577044170850618 -380000,-0.012660906086299764 -390000,-0.0007015093297897257 -400000,-0.009051559154697856 -410000,-0.009354432884122554 -420000,-0.021329175337502802 -430000,-0.0031365395515322953 -440000,-0.006429764482640635 -450000,-0.012396793992888107 -460000,-0.015169022837053897 -470000,-0.019311793764248023 -480000,-0.014824748706289748 -490000,-0.0017624428300000747 -500000,-0.017493329157168943 -510000,-0.012024531741919328 -520000,-0.01739455496081082 -530000,-0.004179516070835042 -540000,-0.004683484137409544 -550000,-0.00376185927064505 -560000,-0.016468485891784533 -570000,0.0006962674644534097 -580000,-0.020623065028163405 -590000,-0.015411034318707736 -600000,-0.019068333581123323 -610000,-0.018796409010707883 -620000,-0.0034527970828285687 -630000,-0.0053659723785365095 -640000,-0.025141686916863127 -650000,-0.009803502903477385 -660000,-0.018365915173150987 -670000,-0.01769219662694637 -680000,-0.02707092221022166 -690000,-0.018568808267683708 -700000,-0.009563763131641928 -710000,-0.00866507926328474 -720000,-0.004875470762593735 -730000,-0.0055848032790726644 -740000,-0.01697378827140672 -750000,-0.01753553005858504 -760000,-0.019741682652752453 -770000,-0.007822665559747106 -780000,-0.018001017461506636 -790000,-0.027317316574050638 -800000,-0.012483848245242062 -810000,-0.015513739943903954 -820000,-0.019403785885996543 -830000,-0.004230631464520194 -840000,-0.014675448016124078 -850000,-0.01824127340163118 -860000,-0.011572495034706629 -870000,-0.01194172062647928 -880000,-0.009538034038881094 -890000,-0.02680421056681951 -900000,-0.01503020530321551 -910000,-0.018964763398439968 -920000,-0.030233219280395275 -930000,-0.009775067536363325 -940000,-0.021519515030788396 -950000,-0.025162934163318725 -960000,-0.02216069516456378 -970000,-0.01901042658330932 -980000,-0.01626001982927767 -990000,-0.020865705058724336 -1000000,-0.03072153724692145 +10000,-0.015480719143693238 +20000,-0.022082521253085453 +30000,-0.008707301295949271 +40000,-0.010557683538964049 +50000,-0.015934944509921174 +60000,-0.011826543596763188 +70000,-0.010749740908632108 +80000,-0.016868353693908864 +90000,-0.010364194609317208 +100000,-0.013432642653630884 +110000,-0.00621782861655705 +120000,-0.01544348397475887 +130000,-0.010799942139027952 +140000,-0.0013483290509275743 +150000,-0.014063235304072618 +160000,-0.017859051550253642 +170000,-0.015157548764705448 +180000,-0.01304218485203974 +190000,-0.02160951205530171 +200000,-0.011580851942029636 +210000,-0.00933572851799541 +220000,-0.010460793987730942 +230000,-0.012026633413828928 +240000,-0.010601833037656746 +250000,-0.0035276775609711145 +260000,-0.020124274126264705 +270000,-0.00921325598927766 +280000,-0.01584617337688302 +290000,0.0030494714642845646 +300000,-0.007821010301356544 +310000,-0.01348397279933993 +320000,-0.016185968666859465 +330000,-0.022443910523236694 +340000,-0.012884438927540298 +350000,-0.00645990718180091 +360000,-0.0157012649429362 +370000,-0.006851271452322265 +380000,-0.012163210659067083 +390000,-0.006062403247352758 +400000,0.0019869477693349237 +410000,-0.019099679656973657 +420000,-0.008824776363382027 +430000,-0.00741253920228867 +440000,-0.013401841968134987 +450000,-0.011132613264209916 +460000,-0.01272246784296091 +470000,0.003213931889792331 +480000,-0.018644218363373467 +490000,-0.014479352071535195 +500000,-0.023002539090647985 +510000,-0.010015904916778465 +520000,-0.011882418332317606 +530000,-0.0068551521976492336 +540000,-0.015199628976874907 +550000,-0.0014957337239448426 +560000,-0.005757272033619493 +570000,-0.006631468501739783 +580000,-0.009538215567665851 +590000,-0.023296277110525457 +600000,-0.004452530788426765 +610000,-0.00924966978812407 +620000,-0.014653264389478126 +630000,-0.015922973280791015 +640000,-0.002787528145355832 +650000,-0.010046794461044981 +660000,-0.008631978111543029 +670000,-0.011198793690218128 +680000,-0.0014049393374124968 +690000,-0.005949650250586741 +700000,-0.010664868262885768 +710000,-0.010047814023148431 +720000,-0.015300555375558486 +730000,-0.019642904630601754 +740000,-0.012377855943325316 +750000,-0.0009512456682862974 +760000,-0.0008453161756842492 +770000,-0.014993025156768248 +780000,-0.021035810916522884 +790000,-0.00958721147683346 +800000,-0.019139535924636138 +810000,-0.00854842987053992 +820000,-0.017321679236682756 +830000,-0.010530300661963119 +840000,-0.008601150665989498 +850000,-0.008534445148221317 +860000,-0.017764530217243647 +870000,-0.0123167013805436 +880000,-0.0146058846243178 +890000,-0.00017982623785441234 +900000,-0.013800955813322543 +910000,-0.011894372716935043 +920000,-0.00793167077396316 +930000,-0.017582175874682495 +940000,-0.0059882964067522184 +950000,0.004707308080915615 +960000,-0.018184911989842675 +970000,-0.028246914493704717 +980000,-0.01058632609396604 +990000,-0.009883353464717495 +1000000,-0.0142325377810649 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/loss/value_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/loss/value_loss.log index 5d1ed09ad..4eda63e73 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/loss/value_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/loss/value_loss.log @@ -1,101 +1,101 @@ step,loss/value_loss -10000,17.885541990205358 -20000,33.26086221595499 -30000,5.05453916182039 -40000,5.360712041784289 -50000,4.915716180741998 -60000,3.6915941908881953 -70000,5.069778253392498 -80000,5.79890985717099 -90000,5.731375602002507 -100000,3.3500848799284584 -110000,2.86793919782753 -120000,2.7081712072322466 -130000,1.6639121015770608 -140000,2.327199495457596 -150000,2.195200522240202 -160000,1.4721421896450382 -170000,2.813245280731672 -180000,1.1582582993719324 -190000,1.803162743284518 -200000,1.7549206467567218 -210000,1.0404729063984612 -220000,1.1604128312800366 -230000,1.6013024840020176 -240000,2.2704233914129177 -250000,1.0601307458801434 -260000,1.478957145465579 -270000,1.0914095510114492 -280000,1.244474333178741 -290000,0.8579564175237528 -300000,1.1349141223724322 -310000,1.561577140650295 -320000,2.61990109251004 -330000,1.1219507903228767 -340000,1.0062601190377038 -350000,0.8092216144688574 -360000,1.9029743283177354 -370000,0.9872549245022558 -380000,0.5515771399828937 -390000,1.040721406158391 -400000,1.1651292029014526 -410000,1.3143205752748008 -420000,0.6545907170207235 -430000,1.4016163037778155 -440000,0.8517621621260266 -450000,2.0760469032674838 -460000,2.100027454037552 -470000,0.624715506191402 -480000,0.8624886596702896 -490000,1.9842795247815537 -500000,0.6055446907774593 -510000,0.8543512752194319 -520000,0.7836609385913609 -530000,1.8814329436938586 -540000,0.6755767942401094 -550000,0.5145401195883312 -560000,1.3092332372556863 -570000,0.4799322721139485 -580000,2.330522327225099 -590000,2.355920971907107 -600000,0.8466676844590916 -610000,0.377164698323982 -620000,0.9107639892915553 -630000,1.7771718294678287 -640000,0.682654944573449 -650000,1.8166946444139473 -660000,1.0998542185576017 -670000,0.5341273936337623 -680000,0.6539863163395657 -690000,1.5875281613662402 -700000,2.171810439279945 -710000,0.7037388991846719 -720000,1.347756687904503 -730000,1.34220977272948 -740000,0.7928848069307274 -750000,0.8343725319191089 -760000,0.6317259553253156 -770000,1.503364720033101 -780000,0.40607058798468465 -790000,0.573981053631179 -800000,0.8133875686007755 -810000,0.5357545856821557 -820000,0.7351937751438549 -830000,0.7369987276022277 -840000,1.6172629936525333 -850000,0.9238859022951992 -860000,1.3765269460053409 -870000,2.4401619109110526 -880000,0.46582353905234203 -890000,0.4334342448993981 -900000,0.5598711019771572 -910000,0.4336806587415879 -920000,0.47749182241144583 -930000,0.6779686970895343 -940000,1.8474903326911225 -950000,0.41508523683654264 -960000,0.4315831071571485 -970000,0.712814333015311 -980000,0.8790365016022923 -990000,4.445141262547419 -1000000,2.285724060243618 +10000,19.768832208211997 +20000,26.79890728165021 +30000,4.577228868526145 +40000,5.620076925226187 +50000,5.116889599962493 +60000,5.3970011584686395 +70000,6.765840604294733 +80000,2.1890866079564053 +90000,1.608217342325942 +100000,1.5038172053829066 +110000,1.5944023746214364 +120000,4.010449863834852 +130000,2.964621076056557 +140000,0.8304386407574977 +150000,1.3723015514162085 +160000,0.5880282321613299 +170000,0.7871422912705014 +180000,0.8895447127477529 +190000,0.6800250043870018 +200000,0.4957362543693975 +210000,0.5817928785766003 +220000,0.3461421804971255 +230000,0.32508949716765684 +240000,0.4655928189470379 +250000,0.4976363052094457 +260000,0.3076959046206057 +270000,0.31050619522284156 +280000,0.6757647849353604 +290000,0.25997471213304957 +300000,0.44287846553428645 +310000,0.3176252235597062 +320000,0.2431785320978503 +330000,0.2582822612972425 +340000,0.551550260937436 +350000,0.35894572428633464 +360000,0.18372958057270308 +370000,0.6618462765982716 +380000,0.19715971348012012 +390000,0.8912456113757363 +400000,0.36927821006947176 +410000,0.15056548454715207 +420000,0.22777230923570618 +430000,0.1926496383883673 +440000,0.19127730314624802 +450000,0.13596070517368658 +460000,0.13288539221448403 +470000,0.21489008181166644 +480000,0.21212505933747666 +490000,0.47491129470367677 +500000,0.20430169324123598 +510000,0.15305574595661653 +520000,0.15629733232555792 +530000,0.18246994949242384 +540000,0.0994321885838101 +550000,0.22292987818761173 +560000,0.1919114365233041 +570000,0.09493895395834648 +580000,0.1339849725016466 +590000,0.31313448723938236 +600000,0.10671763759713962 +610000,0.46501819786546894 +620000,0.33809357223533903 +630000,0.13260738869771818 +640000,0.3084564462934712 +650000,0.16541514478495115 +660000,0.25820225832348076 +670000,0.13313302110212408 +680000,0.17369812865359963 +690000,0.8227336552395741 +700000,0.1802747821640075 +710000,0.09984394678753636 +720000,0.19277389401851372 +730000,0.08712756086682086 +740000,0.10443530318415675 +750000,0.1521795334324061 +760000,0.17733879744494868 +770000,0.10786269992516322 +780000,0.08898480279697246 +790000,0.11521391544468436 +800000,0.09672918046250081 +810000,0.14421306354876676 +820000,0.08412381296670685 +830000,0.10680852019269868 +840000,0.17936955219833914 +850000,0.2512438433352448 +860000,0.08409686813957327 +870000,0.09712973389989273 +880000,0.09985314264107009 +890000,0.25258308821680425 +900000,0.19719961730256647 +910000,0.1607309288277104 +920000,0.13489204662365015 +930000,0.07589764242588655 +940000,0.9952592241462327 +950000,0.09456577569868292 +960000,0.06922475416254585 +970000,0.13692841248555174 +980000,0.11187851377463782 +990000,0.1287650411765165 +1000000,0.06023073153378059 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/constraint_violation.log index f3f461b6e..fe36a86fb 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/constraint_violation.log @@ -1,101 +1,101 @@ step,stat/constraint_violation -10000,10 -20000,10 -30000,12 -40000,18 -50000,26 -60000,38 -70000,46 -80000,54 -90000,60 -100000,69 -110000,78 -120000,85 -130000,90 -140000,96 -150000,103 -160000,109 -170000,113 -180000,115 -190000,126 -200000,132 -210000,139 -220000,146 -230000,156 -240000,163 -250000,167 -260000,177 -270000,183 -280000,189 -290000,198 -300000,208 -310000,212 -320000,218 -330000,222 -340000,229 -350000,235 -360000,241 -370000,245 -380000,253 -390000,255 -400000,262 -410000,273 -420000,275 -430000,283 -440000,289 -450000,298 -460000,303 -470000,308 -480000,316 -490000,322 -500000,331 -510000,338 -520000,345 -530000,352 -540000,358 -550000,363 -560000,370 -570000,376 -580000,380 -590000,392 -600000,401 -610000,406 -620000,410 -630000,417 -640000,423 -650000,433 -660000,437 -670000,444 -680000,446 -690000,448 -700000,455 -710000,460 -720000,461 -730000,466 -740000,475 -750000,482 -760000,489 -770000,496 -780000,501 -790000,503 -800000,513 -810000,519 -820000,528 -830000,531 -840000,536 -850000,546 -860000,555 -870000,560 -880000,566 -890000,568 -900000,574 -910000,579 -920000,585 -930000,592 -940000,600 -950000,604 -960000,611 -970000,619 -980000,626 -990000,634 -1000000,639 +10000,0 +20000,0 +30000,0 +40000,0 +50000,0 +60000,0 +70000,0 +80000,0 +90000,0 +100000,0 +110000,0 +120000,1 +130000,1 +140000,1 +150000,1 +160000,1 +170000,1 +180000,1 +190000,1 +200000,1 +210000,1 +220000,2 +230000,2 +240000,3 +250000,3 +260000,3 +270000,3 +280000,3 +290000,3 +300000,3 +310000,3 +320000,3 +330000,3 +340000,3 +350000,3 +360000,3 +370000,3 +380000,4 +390000,4 +400000,4 +410000,4 +420000,4 +430000,4 +440000,4 +450000,5 +460000,5 +470000,5 +480000,5 +490000,5 +500000,5 +510000,5 +520000,5 +530000,5 +540000,5 +550000,5 +560000,5 +570000,5 +580000,5 +590000,5 +600000,5 +610000,5 +620000,5 +630000,5 +640000,5 +650000,5 +660000,5 +670000,5 +680000,5 +690000,5 +700000,5 +710000,5 +720000,5 +730000,5 +740000,5 +750000,5 +760000,5 +770000,5 +780000,5 +790000,5 +800000,5 +810000,5 +820000,5 +830000,5 +840000,5 +850000,5 +860000,6 +870000,6 +880000,6 +890000,6 +900000,6 +910000,6 +920000,7 +930000,7 +940000,8 +950000,8 +960000,8 +970000,8 +980000,8 +990000,8 +1000000,8 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/ep_constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/ep_constraint_violation.log index b785fd81b..7bcf29f3f 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/ep_constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/ep_constraint_violation.log @@ -1,101 +1,101 @@ step,stat/ep_constraint_violation -10000,0.1 +10000,0.0 20000,0.0 -30000,0.1 -40000,0.1 -50000,0.1 -60000,0.3 -70000,0.1 -80000,0.1 +30000,0.0 +40000,0.0 +50000,0.0 +60000,0.0 +70000,0.0 +80000,0.0 90000,0.0 -100000,0.2 -110000,0.1 +100000,0.0 +110000,0.0 120000,0.0 130000,0.0 -140000,0.2 -150000,0.1 +140000,0.0 +150000,0.0 160000,0.0 -170000,0.1 +170000,0.0 180000,0.0 -190000,0.2 -200000,0.2 -210000,0.2 +190000,0.0 +200000,0.0 +210000,0.0 220000,0.0 -230000,0.3 +230000,0.0 240000,0.1 -250000,0.1 -260000,0.1 +250000,0.0 +260000,0.0 270000,0.0 -280000,0.2 +280000,0.0 290000,0.0 -300000,0.1 -310000,0.1 +300000,0.0 +310000,0.0 320000,0.0 -330000,0.1 -340000,0.2 +330000,0.0 +340000,0.0 350000,0.0 -360000,0.1 -370000,0.1 -380000,0.1 +360000,0.0 +370000,0.0 +380000,0.0 390000,0.0 -400000,0.2 -410000,0.3 +400000,0.0 +410000,0.0 420000,0.0 -430000,0.3 -440000,0.1 -450000,0.2 -460000,0.1 -470000,0.2 -480000,0.2 -490000,0.2 +430000,0.0 +440000,0.0 +450000,0.0 +460000,0.0 +470000,0.0 +480000,0.0 +490000,0.0 500000,0.0 510000,0.0 -520000,0.1 -530000,0.2 -540000,0.2 -550000,0.1 -560000,0.3 -570000,0.1 -580000,0.1 -590000,0.3 -600000,0.1 +520000,0.0 +530000,0.0 +540000,0.0 +550000,0.0 +560000,0.0 +570000,0.0 +580000,0.0 +590000,0.0 +600000,0.0 610000,0.0 -620000,0.1 -630000,0.3 -640000,0.1 -650000,0.3 -660000,0.1 -670000,0.1 +620000,0.0 +630000,0.0 +640000,0.0 +650000,0.0 +660000,0.0 +670000,0.0 680000,0.0 -690000,0.2 -700000,0.3 -710000,0.1 +690000,0.0 +700000,0.0 +710000,0.0 720000,0.0 730000,0.0 -740000,0.2 -750000,0.2 -760000,0.1 -770000,0.2 +740000,0.0 +750000,0.0 +760000,0.0 +770000,0.0 780000,0.0 790000,0.0 -800000,0.2 -810000,0.1 +800000,0.0 +810000,0.0 820000,0.0 -830000,0.1 -840000,0.2 -850000,0.1 -860000,0.0 -870000,0.2 -880000,0.1 +830000,0.0 +840000,0.0 +850000,0.0 +860000,0.1 +870000,0.0 +880000,0.0 890000,0.0 -900000,0.1 +900000,0.0 910000,0.0 -920000,0.1 -930000,0.1 -940000,0.4 +920000,0.0 +930000,0.0 +940000,0.1 950000,0.0 -960000,0.2 +960000,0.0 970000,0.0 -980000,0.1 -990000,0.5 -1000000,0.2 +980000,0.0 +990000,0.0 +1000000,0.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/ep_length.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/ep_length.log index 368b00b38..9cc29ac91 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/ep_length.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/ep_length.log @@ -1,101 +1,101 @@ step,stat/ep_length -10000,226.4 +10000,250.0 20000,250.0 -30000,225.8 -40000,225.5 -50000,225.1 -60000,175.6 -70000,225.5 -80000,225.1 +30000,250.0 +40000,250.0 +50000,250.0 +60000,250.0 +70000,250.0 +80000,250.0 90000,250.0 -100000,200.2 -110000,225.1 +100000,250.0 +110000,250.0 120000,250.0 130000,250.0 -140000,200.2 -150000,225.1 +140000,250.0 +150000,250.0 160000,250.0 -170000,225.1 +170000,250.0 180000,250.0 -190000,201.9 -200000,201.2 -210000,200.2 +190000,250.0 +200000,250.0 +210000,250.0 220000,250.0 -230000,175.3 -240000,225.2 -250000,226.8 -260000,225.2 +230000,250.0 +240000,225.1 +250000,250.0 +260000,250.0 270000,250.0 -280000,200.4 +280000,250.0 290000,250.0 -300000,225.1 -310000,225.3 +300000,250.0 +310000,250.0 320000,250.0 -330000,225.3 -340000,200.5 +330000,250.0 +340000,250.0 350000,250.0 -360000,225.2 -370000,226.1 -380000,225.6 +360000,250.0 +370000,250.0 +380000,250.0 390000,250.0 -400000,200.6 -410000,176.9 +400000,250.0 +410000,250.0 420000,250.0 -430000,177.1 -440000,226.0 -450000,201.5 -460000,227.3 -470000,200.6 -480000,201.2 -490000,200.2 +430000,250.0 +440000,250.0 +450000,250.0 +460000,250.0 +470000,250.0 +480000,250.0 +490000,250.0 500000,250.0 510000,250.0 -520000,225.1 -530000,200.8 -540000,200.2 -550000,225.1 -560000,177.3 -570000,225.5 -580000,226.0 -590000,176.0 -600000,225.1 +520000,250.0 +530000,250.0 +540000,250.0 +550000,250.0 +560000,250.0 +570000,250.0 +580000,250.0 +590000,250.0 +600000,250.0 610000,250.0 -620000,225.1 -630000,176.1 -640000,225.4 -650000,176.9 -660000,225.1 -670000,225.3 +620000,250.0 +630000,250.0 +640000,250.0 +650000,250.0 +660000,250.0 +670000,250.0 680000,250.0 -690000,201.3 -700000,176.8 -710000,225.1 +690000,250.0 +700000,250.0 +710000,250.0 720000,250.0 730000,250.0 -740000,200.6 -750000,200.6 -760000,225.1 -770000,201.1 +740000,250.0 +750000,250.0 +760000,250.0 +770000,250.0 780000,250.0 790000,250.0 -800000,200.9 -810000,225.8 +800000,250.0 +810000,250.0 820000,250.0 -830000,226.3 -840000,200.5 -850000,225.7 -860000,250.0 -870000,200.8 -880000,225.1 +830000,250.0 +840000,250.0 +850000,250.0 +860000,225.1 +870000,250.0 +880000,250.0 890000,250.0 -900000,225.1 +900000,250.0 910000,250.0 -920000,225.3 -930000,225.9 -940000,152.2 +920000,250.0 +930000,250.0 +940000,225.1 950000,250.0 -960000,200.2 +960000,250.0 970000,250.0 -980000,225.2 -990000,127.6 -1000000,201.3 +980000,250.0 +990000,250.0 +1000000,250.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/ep_return.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/ep_return.log index c0215def4..e0a85ee9d 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/ep_return.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/ep_return.log @@ -1,101 +1,101 @@ step,stat/ep_return -10000,52.50227793165806 -20000,86.67617147434976 -30000,79.34866610939346 -40000,81.40214770871329 -50000,93.84543660083888 -60000,64.01425762786337 -70000,84.52896605922284 -80000,91.70670228017994 -90000,116.5143918244953 -100000,107.04640726173382 -110000,103.42013079649709 -120000,111.77658415443398 -130000,118.55929665102519 -140000,97.77339309624269 -150000,117.94487036768962 -160000,120.55575031975268 -170000,117.58889288553192 -180000,126.42169017241216 -190000,102.43018820745449 -200000,101.52408717098422 -210000,115.44847836647716 -220000,129.13772238343503 -230000,92.03714139097211 -240000,115.02624150215877 -250000,122.3248543987838 -260000,112.58213184532694 -270000,135.5933779509928 -280000,103.90360843853837 -290000,122.14719669784623 -300000,130.81382545305055 -310000,130.0781282703139 -320000,144.97901929775236 -330000,137.14448945815514 -340000,97.67911251294383 -350000,141.42689695464995 -360000,142.97771531311153 -370000,135.9967785624128 -380000,144.0757046944648 -390000,152.01402168451096 -400000,126.32741115761164 -410000,109.47777754098615 -420000,144.17646035682438 -430000,107.16203148803865 -440000,132.23811158142666 -450000,127.70895042633711 -460000,147.03239938518 -470000,127.37706591615851 -480000,121.96433209647118 -490000,124.51526037380313 -500000,154.65231285439592 -510000,160.01183848939166 -520000,152.2542943294405 -530000,119.44593735412222 -540000,121.04183050263191 -550000,131.7975764590736 -560000,114.67408987223891 -570000,141.9616687548183 -580000,139.0687798489365 -590000,99.14168109619443 -600000,145.0714565826539 -610000,147.31961207001115 -620000,130.8796979546584 -630000,111.81145762879778 -640000,148.21100772464916 -650000,107.00059656876662 -660000,144.53651917330686 -670000,147.04222005973898 -680000,163.77426344796137 -690000,119.32208431490301 -700000,104.91862582214021 -710000,152.73590939306973 -720000,142.98669724810867 -730000,171.87555287811892 -740000,136.36315685394132 -750000,109.11772905922885 -760000,150.25143689009172 -770000,124.29728162903568 -780000,150.78250534224327 -790000,164.3249900663931 -800000,144.75998332834337 -810000,144.83184397096787 -820000,158.21690743816112 -830000,147.8592469271274 -840000,135.03498566450065 -850000,138.14136001409457 -860000,169.79851421720338 -870000,132.93833818293385 -880000,135.19211151905117 -890000,154.5019270345475 -900000,146.70862537339488 -910000,166.20824889802037 -920000,136.39304909668965 -930000,148.5318204172081 -940000,95.9214077224406 -950000,165.13831256581952 -960000,123.89188064300488 -970000,148.83322365129567 -980000,138.07282037045834 -990000,77.89475030714387 -1000000,126.21010279119919 +10000,83.1741927774118 +20000,118.12167389272516 +30000,111.8780672849997 +40000,135.02630964866557 +50000,119.99114179512912 +60000,123.05092523992803 +70000,146.15816855343573 +80000,145.68550352989004 +90000,159.80256017944902 +100000,156.98876756432918 +110000,165.06324833092938 +120000,139.11995711099308 +130000,151.0933236568713 +140000,158.53291683003076 +150000,151.54034880383702 +160000,165.1079248578476 +170000,170.67949247302707 +180000,169.99648796148892 +190000,178.43672922676228 +200000,161.73190265787963 +210000,162.39989603039686 +220000,189.04999191270525 +230000,170.0607053798455 +240000,167.51483540010403 +250000,182.95679372551768 +260000,183.98209647534344 +270000,176.52135178249844 +280000,175.2468197807679 +290000,190.1091672428683 +300000,194.0442277914678 +310000,189.15728249861837 +320000,190.95698673491282 +330000,183.54569420757474 +340000,201.75351668619928 +350000,190.77536197155135 +360000,186.3819339119386 +370000,190.64891161768819 +380000,183.487066475483 +390000,189.22500303072553 +400000,186.3308945204181 +410000,177.5956741382881 +420000,198.72892605974846 +430000,195.35170130903882 +440000,193.1062821306694 +450000,188.49138828741795 +460000,201.14267203303436 +470000,201.47232949433328 +480000,198.5265538122712 +490000,193.01551432196732 +500000,201.39722589081904 +510000,201.18065813323733 +520000,183.5563034226373 +530000,188.50702887431245 +540000,203.9158418753209 +550000,196.96577569021332 +560000,199.36482333408833 +570000,207.3809770816657 +580000,187.51523809265677 +590000,191.11871326835094 +600000,191.50399160389097 +610000,191.76772179621636 +620000,191.69433484524004 +630000,190.32815004265188 +640000,197.8549991353049 +650000,189.26723567832906 +660000,184.24907587926776 +670000,188.25503206620002 +680000,188.09811107902135 +690000,190.3456860856108 +700000,195.00388579733593 +710000,192.09875156079028 +720000,203.03780039466747 +730000,201.0005647050548 +740000,195.9709298133805 +750000,191.0422551003101 +760000,195.7031020634934 +770000,195.7328215268465 +780000,193.49747860177496 +790000,208.18188463053065 +800000,190.61867174596304 +810000,187.51114914953433 +820000,178.60099452691352 +830000,187.9889404085477 +840000,193.30253100578238 +850000,206.69016814039333 +860000,169.157630376805 +870000,200.5904413186287 +880000,198.93880121269024 +890000,202.9261674958995 +900000,189.00061291481433 +910000,204.14662122748626 +920000,193.69171219879215 +930000,203.28654543731307 +940000,173.33473356064036 +950000,200.45263526617788 +960000,193.7583477153248 +970000,189.60102463981397 +980000,198.96617383334745 +990000,199.03565537164255 +1000000,197.60835884815043 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/ep_reward.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/ep_reward.log index 6f5fa9cbf..a80d2a240 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/ep_reward.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat/ep_reward.log @@ -1,101 +1,101 @@ step,stat/ep_reward -10000,0.21040370710902598 -20000,0.34670468589739906 -30000,0.31740907512075067 -40000,0.32562922119157217 -50000,0.4476058557331644 -60000,0.2609573904715793 -70000,0.3396802636115965 -80000,0.3717428819681164 -90000,0.4660575672979813 -100000,0.4289741638863907 -110000,0.41447054854144394 -120000,0.44710633661773586 -130000,0.47423718660410075 -140000,0.39258603056905034 -150000,0.4850359187167149 -160000,0.4822230012790107 -170000,0.4747743594607078 -180000,0.5056867606896486 -190000,0.41259207657820224 -200000,0.4109736990606236 -210000,0.4621552803264112 -220000,0.5165508895337403 -230000,0.445671615808022 -240000,0.4641451473517546 -250000,0.48938822547211674 -260000,0.45068103623637157 -270000,0.5423735118039712 -280000,0.42471644547502213 -290000,0.48858878679138484 -300000,0.5242845972031043 -310000,0.5292584667359159 -320000,0.5799160771910093 -330000,0.5486329808389839 -340000,0.39561274338370284 -350000,0.5657075878186 -360000,0.5733615030702444 -370000,0.5451477583596591 -380000,0.5829427900032826 -390000,0.6080560867380438 -400000,0.5079070021386742 -410000,0.4430689475620303 -420000,0.5767058414272974 -430000,0.47328402805509057 -440000,0.5303478822196589 -450000,0.5151590268645461 -460000,0.5881409087633499 -470000,0.5095526863839451 -480000,0.4884465304018817 -490000,0.5141877297926273 -500000,0.6186092514175838 -510000,0.6400473539575666 -520000,0.6090718516043686 -530000,0.4784117352952896 -540000,0.49108651853814517 -550000,0.5372239527607305 -560000,0.46540331730723183 -570000,0.5684348872260766 -580000,0.556368999600399 -590000,0.40009855888936946 -600000,0.5805453351440704 -610000,0.5892784482800446 -620000,0.5236252858795885 -630000,0.4599739619993037 -640000,0.6104253079773142 -650000,0.4355056482790821 -660000,0.5821334032115658 -670000,0.5886165632532451 -680000,0.6550970537918455 -690000,0.47800197731039623 -700000,0.4237450037334165 -710000,0.6144361776194345 -720000,0.5719467889924348 -730000,0.6875022115124758 -740000,0.5472842170796631 -750000,0.43770022838957423 -760000,0.6011348928690574 -770000,0.4991731705765954 -780000,0.603130021368973 -790000,0.6572999602655722 -800000,0.5821993793434725 -810000,0.5838389953682075 -820000,0.6328676297526444 -830000,0.5917669477202443 -840000,0.5407502793491921 -850000,0.5527796886491226 -860000,0.6791940568688136 -870000,0.5673125562582223 -880000,0.5408575668560932 -890000,0.61800770813819 -900000,0.5871572222645 -910000,0.6648329955920815 -920000,0.5456373717744639 -930000,0.5945723219369979 -940000,0.3922023259488159 -950000,0.6605532502632782 -960000,0.5477396369940489 -970000,0.5953328946051827 -980000,0.5671219390048254 -990000,0.335229748994312 -1000000,0.5153287403588175 +10000,0.33269677110964724 +20000,0.47248669557090056 +30000,0.4475122691399987 +40000,0.5401052385946624 +50000,0.4799645671805165 +60000,0.49220370095971216 +70000,0.5846326742137429 +80000,0.5827420141195603 +90000,0.6392102407177961 +100000,0.6279550702573168 +110000,0.6602529933237176 +120000,0.5564798284439723 +130000,0.6043732946274851 +140000,0.634131667320123 +150000,0.606161395215348 +160000,0.6604316994313904 +170000,0.6827179698921082 +180000,0.6799859518459556 +190000,0.7137469169070491 +200000,0.6469276106315186 +210000,0.6495995841215875 +220000,0.7561999676508209 +230000,0.6802428215193821 +240000,0.7313520317122062 +250000,0.7318271749020706 +260000,0.7359283859013738 +270000,0.7060854071299937 +280000,0.7009872791230716 +290000,0.7604366689714731 +300000,0.7761769111658712 +310000,0.7566291299944735 +320000,0.7638279469396513 +330000,0.7341827768302988 +340000,0.8070140667447973 +350000,0.7631014478862055 +360000,0.7455277356477544 +370000,0.7625956464707528 +380000,0.7339482659019321 +390000,0.7569000121229019 +400000,0.7453235780816725 +410000,0.7103826965531523 +420000,0.7949157042389938 +430000,0.7814068052361552 +440000,0.7724251285226775 +450000,0.7539655531496716 +460000,0.8045706881321376 +470000,0.8058893179773332 +480000,0.7941062152490848 +490000,0.7720620572878693 +500000,0.8055889035632762 +510000,0.8047226325329493 +520000,0.7342252136905494 +530000,0.7540281154972497 +540000,0.8156633675012837 +550000,0.7878631027608533 +560000,0.7974592933363532 +570000,0.829523908326663 +580000,0.7500609523706272 +590000,0.7644748530734038 +600000,0.7660159664155637 +610000,0.7670708871848654 +620000,0.7667773393809602 +630000,0.7613126001706076 +640000,0.7914199965412196 +650000,0.7570689427133162 +660000,0.736996303517071 +670000,0.7530201282647999 +680000,0.7523924443160854 +690000,0.7613827443424432 +700000,0.7800155431893436 +710000,0.7683950062431613 +720000,0.8121512015786699 +730000,0.8040022588202191 +740000,0.7838837192535222 +750000,0.7641690204012404 +760000,0.7828124082539736 +770000,0.782931286107386 +780000,0.7739899144070999 +790000,0.8327275385221226 +800000,0.7624746869838522 +810000,0.7500445965981373 +820000,0.7144039781076541 +830000,0.7519557616341909 +840000,0.7732101240231296 +850000,0.8267606725615734 +860000,0.7182881037186396 +870000,0.8023617652745149 +880000,0.7957552048507608 +890000,0.8117046699835981 +900000,0.7560024516592574 +910000,0.8165864849099449 +920000,0.7747668487951686 +930000,0.8131461817492521 +940000,0.7335691514021668 +950000,0.8018105410647115 +960000,0.7750333908612993 +970000,0.758404098559256 +980000,0.7958646953333897 +990000,0.7961426214865701 +1000000,0.7904334353926018 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/constraint_violation.log index a79b57563..cfb5c897c 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/constraint_violation.log @@ -1,101 +1,101 @@ step,stat_eval/constraint_violation -10000,0.0 -20000,0.0 -30000,0.0 -40000,0.2 -50000,0.0 -60000,0.2 -70000,0.2 -80000,0.0 -90000,0.3 +10000,0.3 +20000,0.2 +30000,0.1 +40000,0.1 +50000,0.1 +60000,0.0 +70000,0.3 +80000,0.2 +90000,0.1 100000,0.0 110000,0.2 -120000,0.1 -130000,0.0 -140000,0.2 -150000,0.2 -160000,0.0 -170000,0.0 -180000,0.0 -190000,0.3 -200000,0.1 -210000,0.0 -220000,0.2 +120000,0.0 +130000,0.3 +140000,0.0 +150000,0.1 +160000,0.1 +170000,0.1 +180000,0.3 +190000,0.1 +200000,0.2 +210000,0.3 +220000,0.3 230000,0.1 -240000,0.2 -250000,0.0 -260000,0.1 -270000,0.1 -280000,0.3 +240000,0.3 +250000,0.2 +260000,0.0 +270000,0.3 +280000,0.1 290000,0.2 -300000,0.1 +300000,0.2 310000,0.1 320000,0.0 -330000,0.1 -340000,0.1 -350000,0.0 -360000,0.4 -370000,0.1 +330000,0.3 +340000,0.3 +350000,0.1 +360000,0.2 +370000,0.2 380000,0.2 390000,0.2 -400000,0.0 +400000,0.1 410000,0.1 -420000,0.0 -430000,0.2 -440000,0.2 -450000,0.3 +420000,0.3 +430000,0.3 +440000,0.1 +450000,0.1 460000,0.1 -470000,0.0 -480000,0.0 +470000,0.1 +480000,0.3 490000,0.1 -500000,0.1 -510000,0.0 -520000,0.4 -530000,0.0 -540000,0.3 -550000,0.0 -560000,0.0 -570000,0.1 +500000,0.0 +510000,0.2 +520000,0.3 +530000,0.1 +540000,0.2 +550000,0.1 +560000,0.2 +570000,0.0 580000,0.2 -590000,0.0 -600000,0.1 -610000,0.0 +590000,0.2 +600000,0.0 +610000,0.1 620000,0.1 630000,0.0 -640000,0.0 -650000,0.3 -660000,0.3 -670000,0.1 +640000,0.1 +650000,0.0 +660000,0.2 +670000,0.2 680000,0.0 -690000,0.2 -700000,0.1 +690000,0.1 +700000,0.0 710000,0.1 -720000,0.3 -730000,0.2 -740000,0.0 +720000,0.2 +730000,0.1 +740000,0.1 750000,0.1 -760000,0.3 -770000,0.0 -780000,0.1 +760000,0.1 +770000,0.2 +780000,0.2 790000,0.1 -800000,0.2 -810000,0.1 -820000,0.2 -830000,0.3 -840000,0.1 -850000,0.3 +800000,0.0 +810000,0.0 +820000,0.1 +830000,0.1 +840000,0.0 +850000,0.4 860000,0.0 870000,0.3 880000,0.2 -890000,0.1 -900000,0.1 +890000,0.2 +900000,0.3 910000,0.1 -920000,0.2 -930000,0.2 +920000,0.3 +930000,0.0 940000,0.0 -950000,0.0 -960000,0.0 -970000,0.1 -980000,0.2 +950000,0.1 +960000,0.1 +970000,0.3 +980000,0.0 990000,0.1 -1000000,0.2 +1000000,0.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/ep_length.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/ep_length.log index 3f4048ee7..162cd8226 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/ep_length.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/ep_length.log @@ -1,101 +1,101 @@ step,stat_eval/ep_length -10000,250.0 -20000,250.0 -30000,250.0 -40000,200.2 -50000,250.0 -60000,200.3 -70000,200.9 -80000,250.0 -90000,176.0 +10000,176.1 +20000,200.5 +30000,225.3 +40000,225.1 +50000,225.2 +60000,250.0 +70000,175.7 +80000,200.2 +90000,225.1 100000,250.0 -110000,200.3 -120000,225.1 -130000,250.0 -140000,200.3 -150000,201.4 -160000,250.0 -170000,250.0 -180000,250.0 -190000,178.2 -200000,225.1 -210000,250.0 -220000,200.7 +110000,201.3 +120000,250.0 +130000,176.2 +140000,250.0 +150000,225.1 +160000,225.1 +170000,225.1 +180000,176.0 +190000,225.1 +200000,200.2 +210000,176.6 +220000,176.5 230000,225.1 -240000,200.9 -250000,250.0 -260000,225.1 -270000,226.3 -280000,177.0 -290000,202.9 -300000,225.1 +240000,175.3 +250000,200.3 +260000,250.0 +270000,175.3 +280000,225.1 +290000,200.5 +300000,200.5 310000,225.1 320000,250.0 -330000,225.7 -340000,225.1 -350000,250.0 -360000,151.1 -370000,226.7 -380000,200.8 +330000,177.1 +340000,175.4 +350000,225.1 +360000,200.2 +370000,202.2 +380000,201.0 390000,200.3 -400000,250.0 -410000,225.1 -420000,250.0 -430000,200.8 -440000,201.6 -450000,176.4 +400000,225.1 +410000,225.2 +420000,175.8 +430000,175.7 +440000,226.5 +450000,225.1 460000,225.1 -470000,250.0 -480000,250.0 -490000,225.1 -500000,226.8 -510000,250.0 -520000,153.7 -530000,250.0 -540000,176.2 -550000,250.0 -560000,250.0 -570000,226.7 -580000,200.2 -590000,250.0 -600000,225.1 -610000,250.0 +470000,226.2 +480000,176.3 +490000,225.7 +500000,250.0 +510000,200.5 +520000,175.9 +530000,225.2 +540000,200.4 +550000,225.3 +560000,201.6 +570000,250.0 +580000,202.6 +590000,200.4 +600000,250.0 +610000,225.7 620000,225.1 630000,250.0 -640000,250.0 -650000,176.8 -660000,176.3 -670000,225.7 +640000,227.5 +650000,250.0 +660000,200.5 +670000,200.6 680000,250.0 -690000,201.9 -700000,227.3 -710000,226.2 -720000,175.9 -730000,202.6 -740000,250.0 -750000,225.5 -760000,176.9 -770000,250.0 -780000,225.1 -790000,225.1 -800000,200.9 -810000,225.7 -820000,200.6 -830000,175.5 -840000,225.4 -850000,176.5 +690000,225.5 +700000,250.0 +710000,225.2 +720000,200.8 +730000,226.6 +740000,227.1 +750000,225.2 +760000,225.6 +770000,201.3 +780000,200.3 +790000,225.7 +800000,250.0 +810000,250.0 +820000,225.4 +830000,225.2 +840000,250.0 +850000,151.6 860000,250.0 -870000,177.1 -880000,200.9 -890000,226.7 -900000,225.1 -910000,225.2 -920000,200.3 -930000,202.2 +870000,176.8 +880000,201.6 +890000,201.3 +900000,176.3 +910000,226.6 +920000,175.6 +930000,250.0 940000,250.0 -950000,250.0 -960000,250.0 -970000,225.1 -980000,200.3 -990000,225.4 -1000000,200.8 +950000,227.6 +960000,225.3 +970000,175.5 +980000,250.0 +990000,226.8 +1000000,250.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/ep_return.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/ep_return.log index f313248c8..249e2844a 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/ep_return.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/ep_return.log @@ -1,101 +1,101 @@ step,stat_eval/ep_return -10000,56.57521155283558 -20000,95.61666820218788 -30000,97.94485801616234 -40000,82.59569925318941 -50000,114.78445890504604 -60000,90.27097955390593 -70000,74.83254643777441 -80000,106.76288243149142 -90000,86.43849578080696 -100000,115.16485535621646 -110000,108.24391610531313 -120000,93.645040373851 -130000,127.21390870556965 -140000,106.37187476449628 -150000,87.6734109606143 -160000,111.814330749242 -170000,133.76516649115564 -180000,151.69502511960118 -190000,104.60550060472629 -200000,118.01166970192862 -210000,134.72401039962492 -220000,112.2506610181892 -230000,136.31364515757227 -240000,115.1680221150875 -250000,135.32046017201466 -260000,130.60046469155802 -270000,124.70879542651998 -280000,95.93782031336775 -290000,100.57358712246966 -300000,122.0157657873586 -310000,132.16055739867483 -320000,137.19738825245105 -330000,128.58778528347088 -340000,120.49566858833343 -350000,166.6995381166109 -360000,95.21480804008118 -370000,140.42281156067475 -380000,128.21626170449002 -390000,127.89224755175444 -400000,155.00407506844599 -410000,142.2392935276161 -420000,157.63756705710574 -430000,117.30662520945548 -440000,120.2631588217851 -450000,109.61502761654344 -460000,155.7978980193414 -470000,163.4461785616829 -480000,147.6211379880831 -490000,147.42617755361758 -500000,145.90114805963404 -510000,156.66321716229552 -520000,104.84917885637337 -530000,153.3694591968727 -540000,115.90318534299732 -550000,161.32016356812983 -560000,160.40971523152763 -570000,144.16517035704913 -580000,136.48558132661455 -590000,161.11700865841675 -600000,148.50984649922304 -610000,160.5434559204852 -620000,143.58628917692852 -630000,163.86962936263066 -640000,184.91907214102082 -650000,117.6179545607525 -660000,112.36115784869239 -670000,148.7677041848132 -680000,175.36766858609633 -690000,134.02417600500365 -700000,146.49071080844254 -710000,164.40338884228362 -720000,108.4180612314965 -730000,121.12638417930475 -740000,153.57157535078483 -750000,136.35683585952555 -760000,107.22181001134501 -770000,161.97367084928632 -780000,139.1674310019897 -790000,128.92671358886543 -800000,128.3041669135165 -810000,132.00902629126378 -820000,135.45656842217025 -830000,117.52108097266034 -840000,142.15822843940342 -850000,114.1203213901475 -860000,179.2582200806735 -870000,119.57498005914363 -880000,131.15342258039155 -890000,142.93088120195236 -900000,144.69006407938969 -910000,153.15143043883833 -920000,133.74907459048487 -930000,120.91270384831508 -940000,154.4441586162046 -950000,155.32165003890105 -960000,156.63891251665228 -970000,158.2266718884773 -980000,127.3233554992916 -990000,160.4776833146443 -1000000,122.90417767128694 +10000,51.26593158156462 +20000,69.08653695531319 +30000,81.07823861553956 +40000,108.30731953327548 +50000,93.47959984214398 +60000,106.96065376472083 +70000,81.71425152560892 +80000,107.8393930946313 +90000,122.09272729630807 +100000,127.08857333729205 +110000,83.21172449145705 +120000,124.28432928426494 +130000,100.88103034524242 +140000,133.62602171015578 +150000,128.9245647777812 +160000,112.99574979685478 +170000,126.40483280890123 +180000,107.82116669014832 +190000,126.28278433044693 +200000,112.33318938617467 +210000,106.25317051589329 +220000,110.25376376956937 +230000,130.03315958492695 +240000,93.54761543958344 +250000,123.91068772535422 +260000,149.73387169114488 +270000,105.43674361090416 +280000,154.5659456367328 +290000,126.99683246341878 +300000,114.92132816985142 +310000,132.38628999275753 +320000,140.84054250295932 +330000,110.28997792643773 +340000,103.3206536457628 +350000,128.66098327585414 +360000,120.50017395133618 +370000,127.8174352030098 +380000,113.86927086559902 +390000,107.54495094296894 +400000,131.62886396617748 +410000,132.86855711710467 +420000,110.7206232372915 +430000,107.56166635664127 +440000,130.58817159994194 +450000,137.13364003388855 +460000,165.5622671670514 +470000,130.85303851499089 +480000,119.90250098572105 +490000,133.4192971562105 +500000,149.35780497954096 +510000,140.670112103294 +520000,124.39843265171552 +530000,145.63583803129183 +540000,133.05888496106456 +550000,142.14786850553213 +560000,156.877527469097 +570000,156.69471427910247 +580000,117.82302574537206 +590000,125.58795296281956 +600000,154.97949868498398 +610000,151.94313206960396 +620000,147.3790046704974 +630000,166.20001124850745 +640000,152.53678678202252 +650000,159.09472748680537 +660000,128.40198256207867 +670000,130.7188292090163 +680000,155.68748794005523 +690000,164.4786687155026 +700000,161.37850768005245 +710000,147.56383190780505 +720000,135.38894780097183 +730000,162.6150046394867 +740000,139.02763629307805 +750000,140.26859472974917 +760000,141.94588032765677 +770000,134.80435768854474 +780000,129.51676922630557 +790000,141.0536252843918 +800000,148.73559241239946 +810000,167.07313526849612 +820000,158.5176874275364 +830000,153.0858262196471 +840000,168.82042451706923 +850000,98.34476769601835 +860000,153.32498972320636 +870000,120.45919194431238 +880000,134.66125369319138 +890000,142.77248376407994 +900000,105.22871752426295 +910000,143.1405406778383 +920000,138.9263662154538 +930000,170.75569806475082 +940000,162.69806621768396 +950000,141.2763520400998 +960000,159.84871342199642 +970000,109.50075087635773 +980000,176.3831034224653 +990000,169.54209933618182 +1000000,158.9636514916701 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/ep_reward.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/ep_reward.log index 2e667fd95..82e9844af 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/ep_reward.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/ep_reward.log @@ -1,101 +1,101 @@ step,stat_eval/ep_reward -10000,0.22630084621134233 -20000,0.3824666728087515 -30000,0.3917794320646494 -40000,0.3330683310858699 -50000,0.4591378356201842 -60000,0.36863429913827134 -70000,0.3006730512869541 -80000,0.4270515297259657 -90000,0.34883335256193965 -100000,0.46065942142486593 -110000,0.433281801720636 -120000,0.37519755049760695 -130000,0.5088556348222786 -140000,0.4376493929746437 -150000,0.35292760574579185 -160000,0.44725732299696797 -170000,0.5350606659646225 -180000,0.6067801004784047 -190000,0.4236150270377064 -200000,0.47391478343268983 -210000,0.5388960415984997 -220000,0.4740843221983936 -230000,0.5574809373998766 -240000,0.4626268378101538 -250000,0.5412818406880587 -260000,0.5776146232832609 -270000,0.4988725370796237 -280000,0.38498971301079987 -290000,0.4026766962587855 -300000,0.4884061677290797 -310000,0.5297060208933323 -320000,0.5487895530098041 -330000,0.5279987958313879 -340000,0.49235738765014175 -350000,0.6667981524664437 -360000,0.41483940022808585 -370000,0.5620258193097694 -380000,0.5189992726801432 -390000,0.5767678978251596 -400000,0.620016300273784 -410000,0.5827495433449459 -420000,0.6305502682284229 -430000,0.4770046131233238 -440000,0.4824627957419274 -450000,0.444784440181581 -460000,0.6290276160270041 -470000,0.6537847142467317 -480000,0.5904845519523324 -490000,0.5900031574780981 -500000,0.5915363402908579 -510000,0.6266528686491821 -520000,0.4380365626582942 -530000,0.6134778367874908 -540000,0.5443983532345513 -550000,0.6452806542725191 -560000,0.6416388609261106 -570000,0.5776946047158944 -580000,0.5494817911564264 -590000,0.644468034633667 -600000,0.5944178686952385 -610000,0.6421738236819409 -620000,0.5925828812134526 -630000,0.6554785174505227 -640000,0.7396762885640833 -650000,0.4822579924160184 -660000,0.5160217893075252 -670000,0.5998414990462494 -680000,0.7014706743443853 -690000,0.5396972676465579 -700000,0.5860168993283275 -710000,0.6606751114602203 -720000,0.4919503157274364 -730000,0.4932391087556808 -740000,0.6142863014031393 -750000,0.5536215855251483 -760000,0.43309461991844167 -770000,0.6478946833971453 -780000,0.5580423325900102 -790000,0.5249567133490434 -800000,0.530101586336865 -810000,0.5292085856823558 -820000,0.5519013493402081 -830000,0.5041217950899565 -840000,0.5696419946574178 -850000,0.4663691514247918 -860000,0.7170328803226942 -870000,0.4803859726683589 -880000,0.5311069587811776 -890000,0.5717552506641514 -900000,0.5792936537714883 -910000,0.6138458254835345 -920000,0.5441864862194039 -930000,0.48435313233129085 -940000,0.6177766344648183 -950000,0.6212866001556043 -960000,0.6265556500666091 -970000,0.6540679739990588 -980000,0.510096891271034 -990000,0.6493979504868478 -1000000,0.4960461088694926 +10000,0.21162293676847505 +20000,0.27703212736894034 +30000,0.3244415922913241 +40000,0.4359454021790416 +50000,0.3853789852239909 +60000,0.4278426150588833 +70000,0.33325720082633853 +80000,0.43898372667511054 +90000,0.5050816330887942 +100000,0.5083542933491682 +110000,0.33405912890854783 +120000,0.4971373171370598 +130000,0.40928247853431293 +140000,0.534504086840623 +150000,0.522506369048646 +160000,0.48173046512043066 +170000,0.5058360088946923 +180000,0.4343084904877489 +190000,0.5051823290099328 +200000,0.4695536628504719 +210000,0.4290595988283969 +220000,0.44695857807504324 +230000,0.5245762998333506 +240000,0.46311136493611205 +250000,0.496942861703384 +260000,0.5989354867645795 +270000,0.5158996807391845 +280000,0.618372180211683 +290000,0.5097659717361804 +300000,0.47707319122721703 +310000,0.531302027525686 +320000,0.5633621700118374 +330000,0.45619226360408927 +340000,0.45456665864724605 +350000,0.5383085274875801 +360000,0.5367568105939985 +370000,0.5155930701890113 +380000,0.45690236055997885 +390000,0.4413161320914979 +400000,0.5271683459903606 +410000,0.5335032213794093 +420000,0.46240391138105946 +430000,0.4579165653132412 +440000,0.5224635768051381 +450000,0.5669998985893578 +460000,0.6744099033883556 +470000,0.5258932445592592 +480000,0.48801305782709326 +490000,0.5338952875901206 +500000,0.5974312199181637 +510000,0.6218669530066725 +520000,0.5258400123242771 +530000,0.5832696226515421 +540000,0.5414323149129884 +550000,0.5689713022999006 +560000,0.6283407478123365 +570000,0.6267788571164099 +580000,0.4716860901782331 +590000,0.5688408090559436 +600000,0.619917994739936 +610000,0.6627469605114144 +620000,0.5905777992557321 +630000,0.6648000449940297 +640000,0.6108323126459679 +650000,0.6363789099472216 +660000,0.5278654884353066 +670000,0.565573247857732 +680000,0.622749951760221 +690000,0.6633864105204418 +700000,0.6455140307202099 +710000,0.6042752722684456 +720000,0.5576003048253007 +730000,0.650807497907991 +740000,0.5588009274973518 +750000,0.5672014885849975 +760000,0.5678010767118614 +770000,0.5434862238045403 +780000,0.5831068750266672 +790000,0.566951891525342 +800000,0.5949423696495979 +810000,0.6682925410739845 +820000,0.6649556308227238 +830000,0.6125572649858159 +840000,0.6752816980682768 +850000,0.4020771616871917 +860000,0.6132999588928254 +870000,0.48391106700706954 +880000,0.5400566042768122 +890000,0.571494591000848 +900000,0.430411087117864 +910000,0.5735169134782574 +920000,0.5789453547624088 +930000,0.6830227922590033 +940000,0.6507922648707359 +950000,0.565289172199481 +960000,0.6411141742855757 +970000,0.4410832042692604 +980000,0.7055324136898611 +990000,0.6861073334381467 +1000000,0.6358546059666805 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/mse.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/mse.log index 3037aca56..b08f72fd8 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/mse.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/logs/stat_eval/mse.log @@ -1,101 +1,101 @@ step,stat_eval/mse -10000,538.1096376604431 -20000,360.46787690023285 -30000,391.79309901850354 -40000,287.23108196443854 -50000,284.29573644312995 -60000,308.46707082434887 -70000,320.2473121027208 -80000,357.5992979218607 -90000,200.9988770834639 -100000,340.24151451232507 -110000,201.1812996294713 -120000,308.71704396819837 -130000,314.09289617003117 -140000,173.4929413197851 -150000,293.0472454482238 -160000,331.31158458121257 -170000,262.1321344718499 -180000,220.00103329841573 -190000,160.6233399865565 -200000,321.6797717589879 -210000,264.5989775014842 -220000,228.3177868749493 -230000,159.01581383833474 -240000,174.86788720042512 -250000,311.65846739527564 -260000,214.9192141008047 -270000,271.1710007548087 -280000,226.38758103989963 -290000,339.0994458435972 -300000,308.8364139351728 -310000,261.9220600380901 -320000,324.43286373880426 -330000,247.9759961145155 -340000,353.1850816066142 -350000,159.51883230586674 -360000,115.82084330687557 -370000,191.79462978413562 -380000,194.55765352740428 -390000,156.12194183544858 -400000,268.52979863311003 -410000,198.995824162143 -420000,243.17881101317926 -430000,212.12704536369034 -440000,264.63305613252425 -450000,166.91417927669494 -460000,146.34677666470157 -470000,190.21234360831437 -480000,294.98487693999994 -490000,177.95913988776755 -500000,204.79616719803832 -510000,251.5692838573575 -520000,104.86226503700111 -530000,270.7849797084583 -540000,116.67368480510451 -550000,245.36766017980216 -560000,218.04652658606378 -570000,229.54519209207214 -580000,125.47749450402826 -590000,211.47523913019236 -600000,182.1799968844128 -610000,284.56115535816315 -620000,208.66250410811622 -630000,213.39611537862606 -640000,109.71429107105516 -650000,144.4914733034073 -660000,157.66565088971075 -670000,204.15178637283128 -680000,179.47409771420524 -690000,196.82447322586614 -700000,185.9949295924585 -710000,109.22063736530929 -720000,161.65854649388714 -730000,246.00475171108127 -740000,265.7320914488375 -750000,254.02847503264198 -760000,220.1554072059741 -770000,208.45568184458912 -780000,246.54723956104462 -790000,304.97145548042556 -800000,206.66764709479799 -810000,233.77962167271397 -820000,133.64006005212758 -830000,120.1631461160016 -840000,218.29994704763408 -850000,139.52274367506988 -860000,147.70836722558187 -870000,173.19115723202202 -880000,206.14684287046657 -890000,223.8943364751771 -900000,202.98719609140124 -910000,200.97450354829314 -920000,170.21718957571494 -930000,233.18195029284539 -940000,283.057588975855 -950000,291.1286874753363 -960000,228.1806470493312 -970000,145.4034341910505 -980000,184.6742672154349 -990000,130.75780252365524 -1000000,211.31726095787585 +10000,303.59424963505273 +20000,333.45643333115106 +30000,363.5077390560289 +40000,256.6130604868699 +50000,297.78265738927195 +60000,345.4899335869078 +70000,315.49378657447005 +80000,153.6402314116589 +90000,204.86584018276062 +100000,296.9363965183322 +110000,364.8077372427552 +120000,300.5031123443438 +130000,157.77226735537903 +140000,276.7932429274434 +150000,226.50138136484688 +160000,290.2267876151857 +170000,227.36879721154418 +180000,169.04444341385187 +190000,228.6972467125649 +200000,226.0048234706068 +210000,197.05483872142867 +220000,151.88796947738953 +230000,246.348293239123 +240000,225.02717975230354 +250000,218.96917683132887 +260000,253.97712140990612 +270000,176.6898907956656 +280000,129.66449328343563 +290000,158.55728274063787 +300000,252.6135396339584 +310000,281.5112217372103 +320000,326.0692636127278 +330000,184.48808343810688 +340000,216.28647466842352 +350000,337.8959126946022 +360000,206.3044294454735 +370000,176.69081993230114 +380000,218.69136991989663 +390000,291.8100970994482 +400000,250.85169303189423 +410000,245.4224364028712 +420000,142.64664245175197 +430000,178.40713518762539 +440000,299.15099074073663 +450000,222.08225179755854 +460000,97.38191608642676 +470000,245.74680739194133 +480000,151.04565650350386 +490000,323.7672168294804 +500000,260.6143860914452 +510000,128.57242589188044 +520000,112.85716436845522 +530000,215.82286804077518 +540000,185.37622125888765 +550000,263.2719665051196 +560000,102.62593618407304 +570000,276.38035536587665 +580000,297.7597440949933 +590000,252.34120377696019 +600000,281.59078207566296 +610000,218.5222861808931 +620000,211.54910873968257 +630000,221.80545726139468 +640000,177.39010941393434 +650000,304.7863987559053 +660000,186.80281930127094 +670000,212.96637185517255 +680000,312.10519134842957 +690000,133.71952337452063 +700000,225.82290979427142 +710000,243.95247101058982 +720000,175.2605809713058 +730000,156.45859866982784 +740000,243.60376762376532 +750000,235.08678348308007 +760000,249.44121642899773 +770000,136.95663270562144 +780000,206.7005157009475 +790000,206.4050916912454 +800000,314.0221224011018 +810000,201.36531703121787 +820000,135.63700968289484 +830000,187.92858635200884 +840000,202.16113698711675 +850000,111.22948803127649 +860000,344.32255797291134 +870000,179.31602779382902 +880000,195.35714230377644 +890000,138.21463213106756 +900000,207.9867262171928 +910000,242.3274766712735 +920000,52.14092959183987 +930000,181.42532697559605 +940000,245.05247131413154 +950000,276.1122640161439 +960000,136.57471707869573 +970000,204.85685823740604 +980000,169.0980085422304 +990000,99.76913937862557 +1000000,339.65848970621846 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/model_best.pt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/model_best.pt index d696b3c9b..6308ad7fe 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/model_best.pt and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/model_best.pt differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/model_latest.pt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/model_latest.pt index 2a418fcb7..e8b726f8b 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/model_latest.pt and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/model_latest.pt differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-loss-approx_kl.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-loss-approx_kl.jpg index 9ff6a7a17..1952a1602 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-loss-approx_kl.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-loss-approx_kl.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-loss-entropy_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-loss-entropy_loss.jpg index df45d5004..f44ab086a 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-loss-entropy_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-loss-entropy_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-loss-policy_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-loss-policy_loss.jpg index 63689a360..b8e540d87 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-loss-policy_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-loss-policy_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-loss-value_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-loss-value_loss.jpg index d597eeadb..67ee8fd8f 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-loss-value_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-loss-value_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-constraint_violation.jpg index ae41ff1d0..fe62aa971 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-ep_constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-ep_constraint_violation.jpg index dee51931f..30076a5f8 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-ep_constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-ep_constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-ep_length.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-ep_length.jpg index 3328349ef..4e17489d3 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-ep_length.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-ep_length.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-ep_return.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-ep_return.jpg index a5af02dfe..80abda239 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-ep_return.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-ep_return.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-ep_reward.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-ep_reward.jpg index 4daf1c355..8c83763f6 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-ep_reward.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat-ep_reward.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-constraint_violation.jpg index 98dfd9014..0852dcb67 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-ep_length.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-ep_length.jpg index 7ae884a35..d98e71da1 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-ep_length.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-ep_length.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-ep_return.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-ep_return.jpg index 48776bbdb..c80a19e7f 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-ep_return.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-ep_return.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-ep_reward.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-ep_reward.jpg index ae004d09e..6ce15d985 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-ep_reward.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-ep_reward.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-mse.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-mse.jpg index ae088e8a3..bc5c9987c 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-mse.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/plots/-stat_eval-mse.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/std_out.txt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/std_out.txt index 8a44d79d2..33e6f5b1c 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/std_out.txt +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/std_out.txt @@ -1,2601 +1,2601 @@ -2023-10-19 14:52:55,769 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 56.575 +/- 20.891 -2023-10-19 14:52:55,783 : +2023-10-27 16:43:43,199 : Eval | ep_lengths 176.10 +/- 112.90 | ep_return 51.266 +/- 40.939 +2023-10-27 16:43:43,214 : -------------------------------------- | loss/ | | -| approx_kl | 0.00938 | -| entropy_loss | -3.68 | -| policy_loss | -0.0102 | -| value_loss | 17.9 | +| approx_kl | 0.0244 | +| entropy_loss | -3.75 | +| policy_loss | -0.0155 | +| value_loss | 19.8 | | stat/ | | -| constraint_violation | 10 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 52.5 | -| ep_reward | 0.21 | -| stat_eval/ | | | constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 56.6 | -| ep_reward | 0.226 | -| mse | 538 | +| ep_return | 83.2 | +| ep_reward | 0.333 | +| stat_eval/ | | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 51.3 | +| ep_reward | 0.212 | +| mse | 304 | | time/ | | | progress | 0.01 | | step | 1e+04 | -| step_time | 9.84 | +| step_time | 12.9 | -------------------------------------- -2023-10-19 14:54:54,296 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 95.617 +/- 36.843 -2023-10-19 14:54:54,304 : +2023-10-27 16:46:30,803 : Eval | ep_lengths 200.50 +/- 99.00 | ep_return 69.087 +/- 47.087 +2023-10-27 16:46:30,823 : -------------------------------------- | loss/ | | -| approx_kl | 0.0199 | -| entropy_loss | -3.75 | -| policy_loss | -0.0141 | -| value_loss | 33.3 | +| approx_kl | 0.0282 | +| entropy_loss | -3.77 | +| policy_loss | -0.0221 | +| value_loss | 26.8 | | stat/ | | -| constraint_violation | 10 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 86.7 | -| ep_reward | 0.347 | +| ep_return | 118 | +| ep_reward | 0.472 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 95.6 | -| ep_reward | 0.382 | -| mse | 360 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 69.1 | +| ep_reward | 0.277 | +| mse | 333 | | time/ | | | progress | 0.02 | | step | 2e+04 | -| step_time | 9.92 | +| step_time | 14.2 | -------------------------------------- -2023-10-19 14:56:50,978 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 97.945 +/- 45.489 -2023-10-19 14:56:50,986 : +2023-10-27 16:49:23,279 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 81.078 +/- 32.855 +2023-10-27 16:49:23,286 : -------------------------------------- | loss/ | | -| approx_kl | 0.0149 | +| approx_kl | 0.0219 | | entropy_loss | -3.79 | -| policy_loss | -0.0131 | -| value_loss | 5.05 | +| policy_loss | -0.00871 | +| value_loss | 4.58 | | stat/ | | -| constraint_violation | 12 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 79.3 | -| ep_reward | 0.317 | -| stat_eval/ | | | constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 97.9 | -| ep_reward | 0.392 | -| mse | 392 | +| ep_return | 112 | +| ep_reward | 0.448 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 81.1 | +| ep_reward | 0.324 | +| mse | 364 | | time/ | | | progress | 0.03 | | step | 3e+04 | -| step_time | 8.87 | +| step_time | 14.8 | -------------------------------------- -2023-10-19 14:58:32,574 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 82.596 +/- 45.400 -2023-10-19 14:58:32,576 : +2023-10-27 16:52:14,438 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 108.307 +/- 42.250 +2023-10-27 16:52:14,446 : -------------------------------------- | loss/ | | -| approx_kl | 0.0134 | -| entropy_loss | -3.81 | -| policy_loss | -0.00593 | -| value_loss | 5.36 | +| approx_kl | 0.0189 | +| entropy_loss | -3.77 | +| policy_loss | -0.0106 | +| value_loss | 5.62 | | stat/ | | -| constraint_violation | 18 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 81.4 | -| ep_reward | 0.326 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 135 | +| ep_reward | 0.54 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 82.6 | -| ep_reward | 0.333 | -| mse | 287 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 108 | +| ep_reward | 0.436 | +| mse | 257 | | time/ | | | progress | 0.04 | | step | 4e+04 | -| step_time | 8.62 | +| step_time | 13.6 | -------------------------------------- -2023-10-19 15:00:18,024 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 114.784 +/- 19.013 -2023-10-19 15:00:18,032 : +2023-10-27 16:55:03,539 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 93.480 +/- 36.104 +2023-10-27 16:55:03,551 : -------------------------------------- | loss/ | | -| approx_kl | 0.0211 | -| entropy_loss | -3.94 | -| policy_loss | -0.018 | -| value_loss | 4.92 | +| approx_kl | 0.0155 | +| entropy_loss | -3.73 | +| policy_loss | -0.0159 | +| value_loss | 5.12 | | stat/ | | -| constraint_violation | 26 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 93.8 | -| ep_reward | 0.448 | -| stat_eval/ | | | constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 115 | -| ep_reward | 0.459 | -| mse | 284 | +| ep_return | 120 | +| ep_reward | 0.48 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 93.5 | +| ep_reward | 0.385 | +| mse | 298 | | time/ | | | progress | 0.05 | | step | 5e+04 | -| step_time | 8.49 | +| step_time | 16.8 | -------------------------------------- -2023-10-19 15:02:00,022 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 90.271 +/- 52.835 -2023-10-19 15:02:00,023 : +2023-10-27 16:57:53,663 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 106.961 +/- 32.865 +2023-10-27 16:57:53,676 : -------------------------------------- | loss/ | | -| approx_kl | 0.0135 | -| entropy_loss | -3.98 | -| policy_loss | -0.0204 | -| value_loss | 3.69 | +| approx_kl | 0.023 | +| entropy_loss | -3.76 | +| policy_loss | -0.0118 | +| value_loss | 5.4 | | stat/ | | -| constraint_violation | 38 | -| ep_constraint_vio... | 0.3 | -| ep_length | 176 | -| ep_return | 64 | -| ep_reward | 0.261 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 123 | +| ep_reward | 0.492 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 90.3 | -| ep_reward | 0.369 | -| mse | 308 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 107 | +| ep_reward | 0.428 | +| mse | 345 | | time/ | | | progress | 0.06 | | step | 6e+04 | -| step_time | 8.81 | +| step_time | 13.1 | -------------------------------------- -2023-10-19 15:03:41,590 : Eval | ep_lengths 200.90 +/- 98.20 | ep_return 74.833 +/- 45.020 -2023-10-19 15:03:41,591 : +2023-10-27 17:00:40,127 : Eval | ep_lengths 175.70 +/- 113.50 | ep_return 81.714 +/- 56.886 +2023-10-27 17:00:40,128 : -------------------------------------- | loss/ | | -| approx_kl | 0.0294 | -| entropy_loss | -3.94 | -| policy_loss | -0.00851 | -| value_loss | 5.07 | +| approx_kl | 0.0227 | +| entropy_loss | -3.72 | +| policy_loss | -0.0107 | +| value_loss | 6.77 | | stat/ | | -| constraint_violation | 46 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 84.5 | -| ep_reward | 0.34 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 146 | +| ep_reward | 0.585 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 74.8 | -| ep_reward | 0.301 | -| mse | 320 | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 81.7 | +| ep_reward | 0.333 | +| mse | 315 | | time/ | | | progress | 0.07 | | step | 7e+04 | -| step_time | 8.4 | +| step_time | 14.8 | -------------------------------------- -2023-10-19 15:05:25,846 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 106.763 +/- 29.186 -2023-10-19 15:05:25,847 : +2023-10-27 17:03:33,928 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 107.839 +/- 56.969 +2023-10-27 17:03:33,941 : -------------------------------------- | loss/ | | -| approx_kl | 0.0158 | -| entropy_loss | -4.03 | -| policy_loss | -0.00619 | -| value_loss | 5.8 | +| approx_kl | 0.0162 | +| entropy_loss | -3.81 | +| policy_loss | -0.0169 | +| value_loss | 2.19 | | stat/ | | -| constraint_violation | 54 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 91.7 | -| ep_reward | 0.372 | -| stat_eval/ | | | constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 107 | -| ep_reward | 0.427 | -| mse | 358 | +| ep_return | 146 | +| ep_reward | 0.583 | +| stat_eval/ | | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 108 | +| ep_reward | 0.439 | +| mse | 154 | | time/ | | | progress | 0.08 | | step | 8e+04 | -| step_time | 8.65 | +| step_time | 15.7 | -------------------------------------- -2023-10-19 15:07:05,009 : Eval | ep_lengths 176.00 +/- 113.04 | ep_return 86.438 +/- 62.828 -2023-10-19 15:07:05,011 : +2023-10-27 17:06:28,867 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 122.093 +/- 43.111 +2023-10-27 17:06:28,876 : -------------------------------------- | loss/ | | -| approx_kl | 0.0199 | -| entropy_loss | -4.18 | -| policy_loss | -0.0154 | -| value_loss | 5.73 | +| approx_kl | 0.0378 | +| entropy_loss | -3.74 | +| policy_loss | -0.0104 | +| value_loss | 1.61 | | stat/ | | -| constraint_violation | 60 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 117 | -| ep_reward | 0.466 | +| ep_return | 160 | +| ep_reward | 0.639 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 176 | -| ep_return | 86.4 | -| ep_reward | 0.349 | -| mse | 201 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 122 | +| ep_reward | 0.505 | +| mse | 205 | | time/ | | | progress | 0.09 | | step | 9e+04 | -| step_time | 8.62 | +| step_time | 14.7 | -------------------------------------- -2023-10-19 15:08:50,348 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 115.165 +/- 18.541 -2023-10-19 15:08:50,357 : +2023-10-27 17:09:31,450 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 127.089 +/- 24.627 +2023-10-27 17:09:31,458 : -------------------------------------- | loss/ | | -| approx_kl | 0.0148 | -| entropy_loss | -4.2 | -| policy_loss | -0.0171 | -| value_loss | 3.35 | +| approx_kl | 0.021 | +| entropy_loss | -3.75 | +| policy_loss | -0.0134 | +| value_loss | 1.5 | | stat/ | | -| constraint_violation | 69 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 107 | -| ep_reward | 0.429 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 157 | +| ep_reward | 0.628 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 115 | -| ep_reward | 0.461 | -| mse | 340 | +| ep_return | 127 | +| ep_reward | 0.508 | +| mse | 297 | | time/ | | | progress | 0.1 | | step | 1e+05 | -| step_time | 8.39 | +| step_time | 17.2 | -------------------------------------- -2023-10-19 15:10:33,027 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 108.244 +/- 55.092 -2023-10-19 15:10:33,029 : +2023-10-27 17:12:21,971 : Eval | ep_lengths 201.30 +/- 97.42 | ep_return 83.212 +/- 42.946 +2023-10-27 17:12:21,983 : -------------------------------------- | loss/ | | -| approx_kl | 0.0106 | -| entropy_loss | -4.28 | -| policy_loss | -0.0182 | -| value_loss | 2.87 | +| approx_kl | 0.0214 | +| entropy_loss | -3.82 | +| policy_loss | -0.00622 | +| value_loss | 1.59 | | stat/ | | -| constraint_violation | 78 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 103 | -| ep_reward | 0.414 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 165 | +| ep_reward | 0.66 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 108 | -| ep_reward | 0.433 | -| mse | 201 | +| ep_length | 201 | +| ep_return | 83.2 | +| ep_reward | 0.334 | +| mse | 365 | | time/ | | | progress | 0.11 | | step | 1.1e+05 | -| step_time | 8.59 | +| step_time | 15.1 | -------------------------------------- -2023-10-19 15:12:18,076 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 93.645 +/- 36.625 -2023-10-19 15:12:18,078 : +2023-10-27 17:15:15,659 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 124.284 +/- 29.235 +2023-10-27 17:15:15,672 : -------------------------------------- | loss/ | | -| approx_kl | 0.0259 | -| entropy_loss | -4.34 | -| policy_loss | -0.0164 | -| value_loss | 2.71 | +| approx_kl | 0.0256 | +| entropy_loss | -3.88 | +| policy_loss | -0.0154 | +| value_loss | 4.01 | | stat/ | | -| constraint_violation | 85 | +| constraint_violation | 1 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 112 | -| ep_reward | 0.447 | +| ep_return | 139 | +| ep_reward | 0.556 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 93.6 | -| ep_reward | 0.375 | -| mse | 309 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 124 | +| ep_reward | 0.497 | +| mse | 301 | | time/ | | | progress | 0.12 | | step | 1.2e+05 | -| step_time | 8.68 | +| step_time | 13.6 | -------------------------------------- -2023-10-19 15:14:05,325 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 127.214 +/- 27.180 -2023-10-19 15:14:05,334 : +2023-10-27 17:18:00,391 : Eval | ep_lengths 176.20 +/- 112.74 | ep_return 100.881 +/- 68.414 +2023-10-27 17:18:00,392 : -------------------------------------- | loss/ | | -| approx_kl | 0.0237 | -| entropy_loss | -4.34 | -| policy_loss | -0.015 | -| value_loss | 1.66 | +| approx_kl | 0.0287 | +| entropy_loss | -3.9 | +| policy_loss | -0.0108 | +| value_loss | 2.96 | | stat/ | | -| constraint_violation | 90 | +| constraint_violation | 1 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 119 | -| ep_reward | 0.474 | +| ep_return | 151 | +| ep_reward | 0.604 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 127 | -| ep_reward | 0.509 | -| mse | 314 | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 101 | +| ep_reward | 0.409 | +| mse | 158 | | time/ | | | progress | 0.13 | | step | 1.3e+05 | -| step_time | 8.72 | +| step_time | 14.6 | -------------------------------------- -2023-10-19 15:15:47,568 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 106.372 +/- 55.747 -2023-10-19 15:15:47,569 : +2023-10-27 17:20:53,378 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 133.626 +/- 25.596 +2023-10-27 17:20:53,386 : -------------------------------------- | loss/ | | -| approx_kl | 0.0254 | -| entropy_loss | -4.32 | -| policy_loss | -0.015 | -| value_loss | 2.33 | +| approx_kl | 0.031 | +| entropy_loss | -3.92 | +| policy_loss | -0.00135 | +| value_loss | 0.83 | | stat/ | | -| constraint_violation | 96 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 97.8 | -| ep_reward | 0.393 | +| constraint_violation | 1 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 159 | +| ep_reward | 0.634 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 106 | -| ep_reward | 0.438 | -| mse | 173 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 134 | +| ep_reward | 0.535 | +| mse | 277 | | time/ | | | progress | 0.14 | | step | 1.4e+05 | -| step_time | 8.67 | +| step_time | 14.1 | -------------------------------------- -2023-10-19 15:17:30,578 : Eval | ep_lengths 201.40 +/- 97.21 | ep_return 87.673 +/- 46.000 -2023-10-19 15:17:30,601 : +2023-10-27 17:23:43,050 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 128.925 +/- 47.653 +2023-10-27 17:23:43,051 : -------------------------------------- | loss/ | | -| approx_kl | 0.0273 | -| entropy_loss | -4.41 | -| policy_loss | -0.0133 | -| value_loss | 2.2 | +| approx_kl | 0.0367 | +| entropy_loss | -3.9 | +| policy_loss | -0.0141 | +| value_loss | 1.37 | | stat/ | | -| constraint_violation | 103 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 118 | -| ep_reward | 0.485 | +| constraint_violation | 1 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 152 | +| ep_reward | 0.606 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 87.7 | -| ep_reward | 0.353 | -| mse | 293 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 129 | +| ep_reward | 0.523 | +| mse | 227 | | time/ | | | progress | 0.15 | | step | 1.5e+05 | -| step_time | 8.56 | +| step_time | 16.2 | -------------------------------------- -2023-10-19 15:19:17,108 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 111.814 +/- 22.492 -2023-10-19 15:19:17,110 : +2023-10-27 17:26:28,894 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 112.996 +/- 41.072 +2023-10-27 17:26:28,895 : -------------------------------------- | loss/ | | -| approx_kl | 0.0316 | -| entropy_loss | -4.43 | -| policy_loss | -0.023 | -| value_loss | 1.47 | +| approx_kl | 0.0308 | +| entropy_loss | -3.97 | +| policy_loss | -0.0179 | +| value_loss | 0.588 | | stat/ | | -| constraint_violation | 109 | +| constraint_violation | 1 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 121 | -| ep_reward | 0.482 | +| ep_return | 165 | +| ep_reward | 0.66 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 112 | -| ep_reward | 0.447 | -| mse | 331 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 113 | +| ep_reward | 0.482 | +| mse | 290 | | time/ | | | progress | 0.16 | | step | 1.6e+05 | -| step_time | 8.6 | +| step_time | 13.1 | -------------------------------------- -2023-10-19 15:21:02,644 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 133.765 +/- 15.967 -2023-10-19 15:21:02,653 : +2023-10-27 17:29:23,687 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 126.405 +/- 53.855 +2023-10-27 17:29:23,688 : -------------------------------------- | loss/ | | -| approx_kl | 0.0211 | -| entropy_loss | -4.48 | -| policy_loss | -0.0103 | -| value_loss | 2.81 | +| approx_kl | 0.0185 | +| entropy_loss | -4.01 | +| policy_loss | -0.0152 | +| value_loss | 0.787 | | stat/ | | -| constraint_violation | 113 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 118 | -| ep_reward | 0.475 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 1 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 134 | -| ep_reward | 0.535 | -| mse | 262 | +| ep_return | 171 | +| ep_reward | 0.683 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 126 | +| ep_reward | 0.506 | +| mse | 227 | | time/ | | | progress | 0.17 | | step | 1.7e+05 | -| step_time | 8.39 | +| step_time | 16 | -------------------------------------- -2023-10-19 15:22:48,996 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 151.695 +/- 29.423 -2023-10-19 15:22:49,004 : +2023-10-27 17:32:12,400 : Eval | ep_lengths 176.00 +/- 113.04 | ep_return 107.821 +/- 73.904 +2023-10-27 17:32:12,402 : -------------------------------------- | loss/ | | -| approx_kl | 0.0177 | -| entropy_loss | -4.51 | -| policy_loss | -0.0126 | -| value_loss | 1.16 | +| approx_kl | 0.0231 | +| entropy_loss | -4.01 | +| policy_loss | -0.013 | +| value_loss | 0.89 | | stat/ | | -| constraint_violation | 115 | +| constraint_violation | 1 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 126 | -| ep_reward | 0.506 | +| ep_return | 170 | +| ep_reward | 0.68 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 152 | -| ep_reward | 0.607 | -| mse | 220 | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 108 | +| ep_reward | 0.434 | +| mse | 169 | | time/ | | | progress | 0.18 | | step | 1.8e+05 | -| step_time | 8.7 | +| step_time | 13.6 | -------------------------------------- -2023-10-19 15:24:29,979 : Eval | ep_lengths 178.20 +/- 109.68 | ep_return 104.606 +/- 69.974 -2023-10-19 15:24:29,980 : +2023-10-27 17:35:06,965 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 126.283 +/- 49.717 +2023-10-27 17:35:06,966 : -------------------------------------- | loss/ | | -| approx_kl | 0.032 | -| entropy_loss | -4.48 | -| policy_loss | -0.0157 | -| value_loss | 1.8 | +| approx_kl | 0.0212 | +| entropy_loss | -3.98 | +| policy_loss | -0.0216 | +| value_loss | 0.68 | | stat/ | | -| constraint_violation | 126 | -| ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 102 | -| ep_reward | 0.413 | +| constraint_violation | 1 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 178 | +| ep_reward | 0.714 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 178 | -| ep_return | 105 | -| ep_reward | 0.424 | -| mse | 161 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 126 | +| ep_reward | 0.505 | +| mse | 229 | | time/ | | | progress | 0.19 | | step | 1.9e+05 | -| step_time | 8.66 | +| step_time | 14.2 | -------------------------------------- -2023-10-19 15:26:14,255 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 118.012 +/- 44.887 -2023-10-19 15:26:14,256 : +2023-10-27 17:37:57,662 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 112.333 +/- 61.504 +2023-10-27 17:37:57,663 : -------------------------------------- | loss/ | | -| approx_kl | 0.0268 | -| entropy_loss | -4.44 | -| policy_loss | -0.00639 | -| value_loss | 1.75 | +| approx_kl | 0.0312 | +| entropy_loss | -3.97 | +| policy_loss | -0.0116 | +| value_loss | 0.496 | | stat/ | | -| constraint_violation | 132 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 102 | -| ep_reward | 0.411 | +| constraint_violation | 1 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 162 | +| ep_reward | 0.647 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 118 | -| ep_reward | 0.474 | -| mse | 322 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 112 | +| ep_reward | 0.47 | +| mse | 226 | | time/ | | | progress | 0.2 | | step | 2e+05 | -| step_time | 8.7 | +| step_time | 13.5 | -------------------------------------- -2023-10-19 15:27:59,733 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 134.724 +/- 16.853 -2023-10-19 15:27:59,734 : +2023-10-27 17:40:48,456 : Eval | ep_lengths 176.60 +/- 112.16 | ep_return 106.253 +/- 72.739 +2023-10-27 17:40:48,457 : -------------------------------------- | loss/ | | -| approx_kl | 0.0269 | -| entropy_loss | -4.56 | -| policy_loss | -0.0168 | -| value_loss | 1.04 | +| approx_kl | 0.0241 | +| entropy_loss | -4.1 | +| policy_loss | -0.00934 | +| value_loss | 0.582 | | stat/ | | -| constraint_violation | 139 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 115 | -| ep_reward | 0.462 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 1 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 135 | -| ep_reward | 0.539 | -| mse | 265 | +| ep_return | 162 | +| ep_reward | 0.65 | +| stat_eval/ | | +| constraint_violation | 0.3 | +| ep_length | 177 | +| ep_return | 106 | +| ep_reward | 0.429 | +| mse | 197 | | time/ | | | progress | 0.21 | | step | 2.1e+05 | -| step_time | 8.66 | +| step_time | 14.5 | -------------------------------------- -2023-10-19 15:29:42,201 : Eval | ep_lengths 200.70 +/- 98.60 | ep_return 112.251 +/- 61.557 -2023-10-19 15:29:42,231 : +2023-10-27 17:43:40,544 : Eval | ep_lengths 176.50 +/- 112.32 | ep_return 110.254 +/- 74.107 +2023-10-27 17:43:40,546 : -------------------------------------- | loss/ | | -| approx_kl | 0.0214 | -| entropy_loss | -4.57 | -| policy_loss | -0.0113 | -| value_loss | 1.16 | +| approx_kl | 0.0207 | +| entropy_loss | -4.16 | +| policy_loss | -0.0105 | +| value_loss | 0.346 | | stat/ | | -| constraint_violation | 146 | +| constraint_violation | 2 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 129 | -| ep_reward | 0.517 | +| ep_return | 189 | +| ep_reward | 0.756 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 112 | -| ep_reward | 0.474 | -| mse | 228 | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 110 | +| ep_reward | 0.447 | +| mse | 152 | | time/ | | | progress | 0.22 | | step | 2.2e+05 | -| step_time | 8.54 | +| step_time | 14.7 | -------------------------------------- -2023-10-19 15:31:26,594 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 136.314 +/- 48.633 -2023-10-19 15:31:26,596 : +2023-10-27 17:46:35,338 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 130.033 +/- 51.242 +2023-10-27 17:46:35,340 : -------------------------------------- | loss/ | | -| approx_kl | 0.0231 | -| entropy_loss | -4.61 | -| policy_loss | -0.0118 | -| value_loss | 1.6 | +| approx_kl | 0.0173 | +| entropy_loss | -4.11 | +| policy_loss | -0.012 | +| value_loss | 0.325 | | stat/ | | -| constraint_violation | 156 | -| ep_constraint_vio... | 0.3 | -| ep_length | 175 | -| ep_return | 92 | -| ep_reward | 0.446 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 170 | +| ep_reward | 0.68 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 136 | -| ep_reward | 0.557 | -| mse | 159 | +| ep_return | 130 | +| ep_reward | 0.525 | +| mse | 246 | | time/ | | | progress | 0.23 | | step | 2.3e+05 | -| step_time | 8.61 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 15:33:09,185 : Eval | ep_lengths 200.90 +/- 98.21 | ep_return 115.168 +/- 61.659 -2023-10-19 15:33:09,186 : +2023-10-27 17:49:25,329 : Eval | ep_lengths 175.30 +/- 114.11 | ep_return 93.548 +/- 66.920 +2023-10-27 17:49:25,330 : -------------------------------------- | loss/ | | -| approx_kl | 0.0209 | -| entropy_loss | -4.63 | -| policy_loss | -0.00895 | -| value_loss | 2.27 | +| approx_kl | 0.017 | +| entropy_loss | -4.28 | +| policy_loss | -0.0106 | +| value_loss | 0.466 | | stat/ | | -| constraint_violation | 163 | +| constraint_violation | 3 | | ep_constraint_vio... | 0.1 | | ep_length | 225 | -| ep_return | 115 | -| ep_reward | 0.464 | +| ep_return | 168 | +| ep_reward | 0.731 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 115 | +| constraint_violation | 0.3 | +| ep_length | 175 | +| ep_return | 93.5 | | ep_reward | 0.463 | -| mse | 175 | +| mse | 225 | | time/ | | | progress | 0.24 | | step | 2.4e+05 | -| step_time | 8.6 | +| step_time | 16.6 | -------------------------------------- -2023-10-19 15:34:54,861 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 135.320 +/- 21.975 -2023-10-19 15:34:54,862 : +2023-10-27 17:52:16,624 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 123.911 +/- 65.695 +2023-10-27 17:52:16,626 : -------------------------------------- | loss/ | | -| approx_kl | 0.0238 | -| entropy_loss | -4.68 | -| policy_loss | -0.012 | -| value_loss | 1.06 | +| approx_kl | 0.0282 | +| entropy_loss | -4.28 | +| policy_loss | -0.00353 | +| value_loss | 0.498 | | stat/ | | -| constraint_violation | 167 | -| ep_constraint_vio... | 0.1 | -| ep_length | 227 | -| ep_return | 122 | -| ep_reward | 0.489 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 135 | -| ep_reward | 0.541 | -| mse | 312 | +| ep_return | 183 | +| ep_reward | 0.732 | +| stat_eval/ | | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 124 | +| ep_reward | 0.497 | +| mse | 219 | | time/ | | | progress | 0.25 | | step | 2.5e+05 | -| step_time | 8.53 | +| step_time | 13.6 | -------------------------------------- -2023-10-19 15:36:39,363 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 130.600 +/- 48.566 -2023-10-19 15:36:39,364 : +2023-10-27 17:55:15,320 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 149.734 +/- 27.171 +2023-10-27 17:55:15,329 : -------------------------------------- | loss/ | | -| approx_kl | 0.0333 | -| entropy_loss | -4.69 | -| policy_loss | -0.00796 | -| value_loss | 1.48 | +| approx_kl | 0.0276 | +| entropy_loss | -4.27 | +| policy_loss | -0.0201 | +| value_loss | 0.308 | | stat/ | | -| constraint_violation | 177 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 113 | -| ep_reward | 0.451 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 184 | +| ep_reward | 0.736 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 131 | -| ep_reward | 0.578 | -| mse | 215 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 150 | +| ep_reward | 0.599 | +| mse | 254 | | time/ | | | progress | 0.26 | | step | 2.6e+05 | -| step_time | 8.68 | +| step_time | 15.1 | -------------------------------------- -2023-10-19 15:38:22,592 : Eval | ep_lengths 226.30 +/- 71.10 | ep_return 124.709 +/- 44.779 -2023-10-19 15:38:22,620 : +2023-10-27 17:58:01,374 : Eval | ep_lengths 175.30 +/- 114.11 | ep_return 105.437 +/- 73.083 +2023-10-27 17:58:01,375 : -------------------------------------- | loss/ | | -| approx_kl | 0.0109 | -| entropy_loss | -4.77 | -| policy_loss | -0.0238 | -| value_loss | 1.09 | +| approx_kl | 0.0302 | +| entropy_loss | -4.23 | +| policy_loss | -0.00921 | +| value_loss | 0.311 | | stat/ | | -| constraint_violation | 183 | +| constraint_violation | 3 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 136 | -| ep_reward | 0.542 | +| ep_return | 177 | +| ep_reward | 0.706 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 125 | -| ep_reward | 0.499 | -| mse | 271 | +| constraint_violation | 0.3 | +| ep_length | 175 | +| ep_return | 105 | +| ep_reward | 0.516 | +| mse | 177 | | time/ | | | progress | 0.27 | | step | 2.7e+05 | -| step_time | 8.6 | +| step_time | 14.1 | -------------------------------------- -2023-10-19 15:40:02,376 : Eval | ep_lengths 177.00 +/- 111.52 | ep_return 95.938 +/- 66.237 -2023-10-19 15:40:02,377 : +2023-10-27 18:01:00,904 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 154.566 +/- 54.352 +2023-10-27 18:01:00,911 : -------------------------------------- | loss/ | | -| approx_kl | 0.0225 | -| entropy_loss | -4.75 | -| policy_loss | -0.00712 | -| value_loss | 1.24 | +| approx_kl | 0.0264 | +| entropy_loss | -4.27 | +| policy_loss | -0.0158 | +| value_loss | 0.676 | | stat/ | | -| constraint_violation | 189 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 104 | -| ep_reward | 0.425 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 175 | +| ep_reward | 0.701 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 177 | -| ep_return | 95.9 | -| ep_reward | 0.385 | -| mse | 226 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 155 | +| ep_reward | 0.618 | +| mse | 130 | | time/ | | | progress | 0.28 | | step | 2.8e+05 | -| step_time | 8.49 | +| step_time | 16.2 | -------------------------------------- -2023-10-19 15:41:45,080 : Eval | ep_lengths 202.90 +/- 94.23 | ep_return 100.574 +/- 52.942 -2023-10-19 15:41:45,081 : +2023-10-27 18:03:47,849 : Eval | ep_lengths 200.50 +/- 99.00 | ep_return 126.997 +/- 71.519 +2023-10-27 18:03:47,850 : -------------------------------------- | loss/ | | -| approx_kl | 0.031 | -| entropy_loss | -4.74 | -| policy_loss | -0.0205 | -| value_loss | 0.858 | +| approx_kl | 0.0243 | +| entropy_loss | -4.22 | +| policy_loss | 0.00305 | +| value_loss | 0.26 | | stat/ | | -| constraint_violation | 198 | +| constraint_violation | 3 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 122 | -| ep_reward | 0.489 | +| ep_return | 190 | +| ep_reward | 0.76 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 203 | -| ep_return | 101 | -| ep_reward | 0.403 | -| mse | 339 | +| ep_length | 200 | +| ep_return | 127 | +| ep_reward | 0.51 | +| mse | 159 | | time/ | | | progress | 0.29 | | step | 2.9e+05 | -| step_time | 8.62 | +| step_time | 14.4 | -------------------------------------- -2023-10-19 15:43:29,687 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 122.016 +/- 43.438 -2023-10-19 15:43:29,688 : +2023-10-27 18:06:37,180 : Eval | ep_lengths 200.50 +/- 99.00 | ep_return 114.921 +/- 59.455 +2023-10-27 18:06:37,181 : -------------------------------------- | loss/ | | -| approx_kl | 0.0271 | -| entropy_loss | -4.84 | -| policy_loss | 0.00547 | -| value_loss | 1.13 | +| approx_kl | 0.0171 | +| entropy_loss | -4.21 | +| policy_loss | -0.00782 | +| value_loss | 0.443 | | stat/ | | -| constraint_violation | 208 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 131 | -| ep_reward | 0.524 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 194 | +| ep_reward | 0.776 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 122 | -| ep_reward | 0.488 | -| mse | 309 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 115 | +| ep_reward | 0.477 | +| mse | 253 | | time/ | | | progress | 0.3 | | step | 3e+05 | -| step_time | 8.73 | +| step_time | 14.7 | -------------------------------------- -2023-10-19 15:45:25,883 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 132.161 +/- 51.445 -2023-10-19 15:45:25,885 : +2023-10-27 18:09:32,717 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 132.386 +/- 56.221 +2023-10-27 18:09:32,718 : -------------------------------------- | loss/ | | -| approx_kl | 0.0272 | -| entropy_loss | -4.85 | -| policy_loss | -0.0145 | -| value_loss | 1.56 | +| approx_kl | 0.0247 | +| entropy_loss | -4.24 | +| policy_loss | -0.0135 | +| value_loss | 0.318 | | stat/ | | -| constraint_violation | 212 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 130 | -| ep_reward | 0.529 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 189 | +| ep_reward | 0.757 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | | ep_return | 132 | -| ep_reward | 0.53 | -| mse | 262 | +| ep_reward | 0.531 | +| mse | 282 | | time/ | | | progress | 0.31 | | step | 3.1e+05 | -| step_time | 9.99 | +| step_time | 16.9 | -------------------------------------- -2023-10-19 15:47:28,972 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 137.197 +/- 22.296 -2023-10-19 15:47:28,973 : +2023-10-27 18:12:29,039 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 140.841 +/- 34.981 +2023-10-27 18:12:29,041 : -------------------------------------- | loss/ | | -| approx_kl | 0.0237 | -| entropy_loss | -4.89 | -| policy_loss | -0.00591 | -| value_loss | 2.62 | +| approx_kl | 0.0272 | +| entropy_loss | -4.22 | +| policy_loss | -0.0162 | +| value_loss | 0.243 | | stat/ | | -| constraint_violation | 218 | +| constraint_violation | 3 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 145 | -| ep_reward | 0.58 | +| ep_return | 191 | +| ep_reward | 0.764 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 137 | -| ep_reward | 0.549 | -| mse | 324 | +| ep_return | 141 | +| ep_reward | 0.563 | +| mse | 326 | | time/ | | | progress | 0.32 | | step | 3.2e+05 | -| step_time | 9.8 | +| step_time | 13.7 | -------------------------------------- -2023-10-19 15:49:31,196 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 128.588 +/- 47.262 -2023-10-19 15:49:31,197 : +2023-10-27 18:15:16,289 : Eval | ep_lengths 177.10 +/- 111.45 | ep_return 110.290 +/- 81.791 +2023-10-27 18:15:16,290 : -------------------------------------- | loss/ | | -| approx_kl | 0.0249 | -| entropy_loss | -4.9 | -| policy_loss | -0.0152 | -| value_loss | 1.12 | +| approx_kl | 0.0185 | +| entropy_loss | -4.22 | +| policy_loss | -0.0224 | +| value_loss | 0.258 | | stat/ | | -| constraint_violation | 222 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 137 | -| ep_reward | 0.549 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 184 | +| ep_reward | 0.734 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 129 | -| ep_reward | 0.528 | -| mse | 248 | +| constraint_violation | 0.3 | +| ep_length | 177 | +| ep_return | 110 | +| ep_reward | 0.456 | +| mse | 184 | | time/ | | | progress | 0.33 | | step | 3.3e+05 | -| step_time | 10.3 | +| step_time | 13.4 | -------------------------------------- -2023-10-19 15:51:34,919 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 120.496 +/- 48.569 -2023-10-19 15:51:34,921 : +2023-10-27 18:18:04,782 : Eval | ep_lengths 175.40 +/- 113.95 | ep_return 103.321 +/- 72.790 +2023-10-27 18:18:04,784 : -------------------------------------- | loss/ | | -| approx_kl | 0.016 | -| entropy_loss | -5.01 | -| policy_loss | -0.0183 | -| value_loss | 1.01 | +| approx_kl | 0.0231 | +| entropy_loss | -4.19 | +| policy_loss | -0.0129 | +| value_loss | 0.552 | | stat/ | | -| constraint_violation | 229 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 97.7 | -| ep_reward | 0.396 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 202 | +| ep_reward | 0.807 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 120 | -| ep_reward | 0.492 | -| mse | 353 | +| constraint_violation | 0.3 | +| ep_length | 175 | +| ep_return | 103 | +| ep_reward | 0.455 | +| mse | 216 | | time/ | | | progress | 0.34 | | step | 3.4e+05 | -| step_time | 10.2 | +| step_time | 15.3 | -------------------------------------- -2023-10-19 15:53:40,213 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 166.700 +/- 28.958 -2023-10-19 15:53:40,223 : +2023-10-27 18:20:54,762 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 128.661 +/- 61.673 +2023-10-27 18:20:54,764 : -------------------------------------- | loss/ | | -| approx_kl | 0.0219 | -| entropy_loss | -5.1 | -| policy_loss | -0.00122 | -| value_loss | 0.809 | +| approx_kl | 0.0271 | +| entropy_loss | -4.17 | +| policy_loss | -0.00646 | +| value_loss | 0.359 | | stat/ | | -| constraint_violation | 235 | +| constraint_violation | 3 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 141 | -| ep_reward | 0.566 | +| ep_return | 191 | +| ep_reward | 0.763 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 167 | -| ep_reward | 0.667 | -| mse | 160 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 129 | +| ep_reward | 0.538 | +| mse | 338 | | time/ | | | progress | 0.35 | | step | 3.5e+05 | -| step_time | 10.1 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 15:55:37,389 : Eval | ep_lengths 151.10 +/- 121.14 | ep_return 95.215 +/- 78.821 -2023-10-19 15:55:37,390 : +2023-10-27 18:23:45,590 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 120.500 +/- 71.115 +2023-10-27 18:23:45,591 : -------------------------------------- | loss/ | | -| approx_kl | 0.0195 | -| entropy_loss | -5.18 | -| policy_loss | -0.0027 | -| value_loss | 1.9 | +| approx_kl | 0.0263 | +| entropy_loss | -4.21 | +| policy_loss | -0.0157 | +| value_loss | 0.184 | | stat/ | | -| constraint_violation | 241 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 143 | -| ep_reward | 0.573 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 186 | +| ep_reward | 0.746 | | stat_eval/ | | -| constraint_violation | 0.4 | -| ep_length | 151 | -| ep_return | 95.2 | -| ep_reward | 0.415 | -| mse | 116 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 121 | +| ep_reward | 0.537 | +| mse | 206 | | time/ | | | progress | 0.36 | | step | 3.6e+05 | -| step_time | 10.2 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 15:57:39,207 : Eval | ep_lengths 226.70 +/- 69.90 | ep_return 140.423 +/- 49.226 -2023-10-19 15:57:39,209 : +2023-10-27 18:26:33,791 : Eval | ep_lengths 202.20 +/- 95.60 | ep_return 127.817 +/- 71.620 +2023-10-27 18:26:33,793 : -------------------------------------- | loss/ | | -| approx_kl | 0.0213 | -| entropy_loss | -5.21 | -| policy_loss | -0.00558 | -| value_loss | 0.987 | +| approx_kl | 0.0245 | +| entropy_loss | -4.2 | +| policy_loss | -0.00685 | +| value_loss | 0.662 | | stat/ | | -| constraint_violation | 245 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 136 | -| ep_reward | 0.545 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 191 | +| ep_reward | 0.763 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | -| ep_return | 140 | -| ep_reward | 0.562 | -| mse | 192 | +| constraint_violation | 0.2 | +| ep_length | 202 | +| ep_return | 128 | +| ep_reward | 0.516 | +| mse | 177 | | time/ | | | progress | 0.37 | | step | 3.7e+05 | -| step_time | 10.1 | +| step_time | 13.4 | -------------------------------------- -2023-10-19 15:59:38,195 : Eval | ep_lengths 200.80 +/- 98.40 | ep_return 128.216 +/- 68.096 -2023-10-19 15:59:38,196 : +2023-10-27 18:29:21,480 : Eval | ep_lengths 201.00 +/- 98.02 | ep_return 113.869 +/- 71.256 +2023-10-27 18:29:21,482 : -------------------------------------- | loss/ | | -| approx_kl | 0.0147 | -| entropy_loss | -5.22 | -| policy_loss | -0.0127 | -| value_loss | 0.552 | +| approx_kl | 0.0367 | +| entropy_loss | -4.24 | +| policy_loss | -0.0122 | +| value_loss | 0.197 | | stat/ | | -| constraint_violation | 253 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 144 | -| ep_reward | 0.583 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 183 | +| ep_reward | 0.734 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 128 | -| ep_reward | 0.519 | -| mse | 195 | +| ep_return | 114 | +| ep_reward | 0.457 | +| mse | 219 | | time/ | | | progress | 0.38 | | step | 3.8e+05 | -| step_time | 9.99 | +| step_time | 15.1 | -------------------------------------- -2023-10-19 16:01:37,749 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 127.892 +/- 65.119 -2023-10-19 16:01:37,751 : ---------------------------------------- -| loss/ | | -| approx_kl | 0.0303 | -| entropy_loss | -5.28 | -| policy_loss | -0.000702 | -| value_loss | 1.04 | -| stat/ | | -| constraint_violation | 255 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 152 | -| ep_reward | 0.608 | -| stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 128 | -| ep_reward | 0.577 | -| mse | 156 | -| time/ | | -| progress | 0.39 | -| step | 3.9e+05 | -| step_time | 10.1 | ---------------------------------------- - -2023-10-19 16:03:41,798 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.004 +/- 18.381 -2023-10-19 16:03:41,800 : +2023-10-27 18:32:18,057 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 107.545 +/- 60.262 +2023-10-27 18:32:18,059 : -------------------------------------- | loss/ | | -| approx_kl | 0.027 | -| entropy_loss | -5.35 | -| policy_loss | -0.00905 | -| value_loss | 1.17 | +| approx_kl | 0.0294 | +| entropy_loss | -4.22 | +| policy_loss | -0.00606 | +| value_loss | 0.891 | | stat/ | | -| constraint_violation | 262 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 126 | -| ep_reward | 0.508 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 189 | +| ep_reward | 0.757 | | stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 108 | +| ep_reward | 0.441 | +| mse | 292 | +| time/ | | +| progress | 0.39 | +| step | 3.9e+05 | +| step_time | 16.2 | +-------------------------------------- + +2023-10-27 18:35:05,130 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 131.629 +/- 51.786 +2023-10-27 18:35:05,132 : +-------------------------------------- +| loss/ | | +| approx_kl | 0.0246 | +| entropy_loss | -4.27 | +| policy_loss | 0.00199 | +| value_loss | 0.369 | +| stat/ | | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 155 | -| ep_reward | 0.62 | -| mse | 269 | +| ep_return | 186 | +| ep_reward | 0.745 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 132 | +| ep_reward | 0.527 | +| mse | 251 | | time/ | | | progress | 0.4 | | step | 4e+05 | -| step_time | 10.1 | +| step_time | 14.4 | -------------------------------------- -2023-10-19 16:05:43,764 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 142.239 +/- 50.854 -2023-10-19 16:05:43,765 : +2023-10-27 18:37:53,476 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 132.869 +/- 54.769 +2023-10-27 18:37:53,478 : -------------------------------------- | loss/ | | -| approx_kl | 0.0171 | -| entropy_loss | -5.47 | -| policy_loss | -0.00935 | -| value_loss | 1.31 | +| approx_kl | 0.0328 | +| entropy_loss | -4.31 | +| policy_loss | -0.0191 | +| value_loss | 0.151 | | stat/ | | -| constraint_violation | 273 | -| ep_constraint_vio... | 0.3 | -| ep_length | 177 | -| ep_return | 109 | -| ep_reward | 0.443 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 178 | +| ep_reward | 0.71 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 142 | -| ep_reward | 0.583 | -| mse | 199 | +| ep_return | 133 | +| ep_reward | 0.534 | +| mse | 245 | | time/ | | | progress | 0.41 | | step | 4.1e+05 | -| step_time | 10.1 | +| step_time | 12.9 | -------------------------------------- -2023-10-19 16:07:45,490 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 157.638 +/- 23.750 -2023-10-19 16:07:45,492 : +2023-10-27 18:40:38,616 : Eval | ep_lengths 175.80 +/- 113.35 | ep_return 110.721 +/- 76.144 +2023-10-27 18:40:38,617 : -------------------------------------- | loss/ | | -| approx_kl | 0.0264 | -| entropy_loss | -5.49 | -| policy_loss | -0.0213 | -| value_loss | 0.655 | +| approx_kl | 0.0249 | +| entropy_loss | -4.44 | +| policy_loss | -0.00882 | +| value_loss | 0.228 | | stat/ | | -| constraint_violation | 275 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 144 | -| ep_reward | 0.577 | +| ep_return | 199 | +| ep_reward | 0.795 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 158 | -| ep_reward | 0.631 | -| mse | 243 | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 111 | +| ep_reward | 0.462 | +| mse | 143 | | time/ | | | progress | 0.42 | | step | 4.2e+05 | -| step_time | 9.78 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 16:09:43,975 : Eval | ep_lengths 200.80 +/- 98.40 | ep_return 117.307 +/- 63.553 -2023-10-19 16:09:43,998 : +2023-10-27 18:43:25,787 : Eval | ep_lengths 175.70 +/- 113.50 | ep_return 107.562 +/- 73.162 +2023-10-27 18:43:25,788 : -------------------------------------- | loss/ | | -| approx_kl | 0.0352 | -| entropy_loss | -5.51 | -| policy_loss | -0.00314 | -| value_loss | 1.4 | +| approx_kl | 0.0331 | +| entropy_loss | -4.42 | +| policy_loss | -0.00741 | +| value_loss | 0.193 | | stat/ | | -| constraint_violation | 283 | -| ep_constraint_vio... | 0.3 | -| ep_length | 177 | -| ep_return | 107 | -| ep_reward | 0.473 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 195 | +| ep_reward | 0.781 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 117 | -| ep_reward | 0.477 | -| mse | 212 | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 108 | +| ep_reward | 0.458 | +| mse | 178 | | time/ | | | progress | 0.43 | | step | 4.3e+05 | -| step_time | 9.92 | +| step_time | 13.3 | -------------------------------------- -2023-10-19 16:11:43,517 : Eval | ep_lengths 201.60 +/- 96.82 | ep_return 120.263 +/- 62.164 -2023-10-19 16:11:43,538 : +2023-10-27 18:46:19,130 : Eval | ep_lengths 226.50 +/- 70.50 | ep_return 130.588 +/- 54.215 +2023-10-27 18:46:19,132 : -------------------------------------- | loss/ | | -| approx_kl | 0.0326 | -| entropy_loss | -5.56 | -| policy_loss | -0.00643 | -| value_loss | 0.852 | +| approx_kl | 0.0162 | +| entropy_loss | -4.49 | +| policy_loss | -0.0134 | +| value_loss | 0.191 | | stat/ | | -| constraint_violation | 289 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 132 | -| ep_reward | 0.53 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 193 | +| ep_reward | 0.772 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 202 | -| ep_return | 120 | -| ep_reward | 0.482 | -| mse | 265 | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 131 | +| ep_reward | 0.522 | +| mse | 299 | | time/ | | | progress | 0.44 | | step | 4.4e+05 | -| step_time | 10 | +| step_time | 13.2 | -------------------------------------- -2023-10-19 16:13:41,818 : Eval | ep_lengths 176.40 +/- 112.45 | ep_return 109.615 +/- 74.617 -2023-10-19 16:13:41,820 : +2023-10-27 18:49:08,326 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 137.134 +/- 52.638 +2023-10-27 18:49:08,328 : -------------------------------------- | loss/ | | -| approx_kl | 0.0397 | -| entropy_loss | -5.63 | -| policy_loss | -0.0124 | -| value_loss | 2.08 | +| approx_kl | 0.0211 | +| entropy_loss | -4.47 | +| policy_loss | -0.0111 | +| value_loss | 0.136 | | stat/ | | -| constraint_violation | 298 | -| ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 128 | -| ep_reward | 0.515 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 188 | +| ep_reward | 0.754 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 176 | -| ep_return | 110 | -| ep_reward | 0.445 | -| mse | 167 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 137 | +| ep_reward | 0.567 | +| mse | 222 | | time/ | | | progress | 0.45 | | step | 4.5e+05 | -| step_time | 10.2 | +| step_time | 14 | -------------------------------------- -2023-10-19 16:15:43,325 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 155.798 +/- 54.909 -2023-10-19 16:15:43,327 : +2023-10-27 18:52:00,683 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 165.562 +/- 58.299 +2023-10-27 18:52:00,691 : -------------------------------------- | loss/ | | -| approx_kl | 0.0356 | -| entropy_loss | -5.68 | -| policy_loss | -0.0152 | -| value_loss | 2.1 | +| approx_kl | 0.0192 | +| entropy_loss | -4.5 | +| policy_loss | -0.0127 | +| value_loss | 0.133 | | stat/ | | -| constraint_violation | 303 | -| ep_constraint_vio... | 0.1 | -| ep_length | 227 | -| ep_return | 147 | -| ep_reward | 0.588 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 201 | +| ep_reward | 0.805 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 156 | -| ep_reward | 0.629 | -| mse | 146 | +| ep_return | 166 | +| ep_reward | 0.674 | +| mse | 97.4 | | time/ | | | progress | 0.46 | | step | 4.6e+05 | -| step_time | 10.1 | +| step_time | 15.1 | -------------------------------------- -2023-10-19 16:17:47,007 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 163.446 +/- 17.605 -2023-10-19 16:17:47,008 : +2023-10-27 18:54:55,750 : Eval | ep_lengths 226.20 +/- 71.40 | ep_return 130.853 +/- 46.214 +2023-10-27 18:54:55,751 : -------------------------------------- | loss/ | | -| approx_kl | 0.0159 | -| entropy_loss | -5.68 | -| policy_loss | -0.0193 | -| value_loss | 0.625 | +| approx_kl | 0.0263 | +| entropy_loss | -4.46 | +| policy_loss | 0.00321 | +| value_loss | 0.215 | | stat/ | | -| constraint_violation | 308 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 127 | -| ep_reward | 0.51 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 163 | -| ep_reward | 0.654 | -| mse | 190 | +| ep_return | 201 | +| ep_reward | 0.806 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 131 | +| ep_reward | 0.526 | +| mse | 246 | | time/ | | | progress | 0.47 | | step | 4.7e+05 | -| step_time | 9.95 | +| step_time | 13.5 | -------------------------------------- -2023-10-19 16:19:58,750 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 147.621 +/- 23.177 -2023-10-19 16:19:58,751 : +2023-10-27 18:57:34,610 : Eval | ep_lengths 176.30 +/- 112.59 | ep_return 119.903 +/- 82.181 +2023-10-27 18:57:34,611 : -------------------------------------- | loss/ | | -| approx_kl | 0.0181 | -| entropy_loss | -5.78 | -| policy_loss | -0.0148 | -| value_loss | 0.862 | +| approx_kl | 0.0173 | +| entropy_loss | -4.49 | +| policy_loss | -0.0186 | +| value_loss | 0.212 | | stat/ | | -| constraint_violation | 316 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 122 | -| ep_reward | 0.488 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 148 | -| ep_reward | 0.59 | -| mse | 295 | +| ep_return | 199 | +| ep_reward | 0.794 | +| stat_eval/ | | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 120 | +| ep_reward | 0.488 | +| mse | 151 | | time/ | | | progress | 0.48 | | step | 4.8e+05 | -| step_time | 11.8 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 16:21:54,775 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 147.426 +/- 51.426 -2023-10-19 16:21:54,776 : +2023-10-27 19:00:25,727 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 133.419 +/- 56.452 +2023-10-27 19:00:25,729 : -------------------------------------- | loss/ | | -| approx_kl | 0.025 | -| entropy_loss | -5.9 | -| policy_loss | -0.00176 | -| value_loss | 1.98 | +| approx_kl | 0.0217 | +| entropy_loss | -4.51 | +| policy_loss | -0.0145 | +| value_loss | 0.475 | | stat/ | | -| constraint_violation | 322 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 125 | -| ep_reward | 0.514 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 193 | +| ep_reward | 0.772 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 147 | -| ep_reward | 0.59 | -| mse | 178 | +| ep_length | 226 | +| ep_return | 133 | +| ep_reward | 0.534 | +| mse | 324 | | time/ | | | progress | 0.49 | | step | 4.9e+05 | -| step_time | 9.39 | +| step_time | 14 | -------------------------------------- -2023-10-19 16:23:49,582 : Eval | ep_lengths 226.80 +/- 69.60 | ep_return 145.901 +/- 52.753 -2023-10-19 16:23:49,619 : +2023-10-27 19:03:23,165 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 149.358 +/- 19.994 +2023-10-27 19:03:23,167 : -------------------------------------- | loss/ | | -| approx_kl | 0.0239 | -| entropy_loss | -5.84 | -| policy_loss | -0.0175 | -| value_loss | 0.606 | +| approx_kl | 0.0176 | +| entropy_loss | -4.51 | +| policy_loss | -0.023 | +| value_loss | 0.204 | | stat/ | | -| constraint_violation | 331 | +| constraint_violation | 5 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 155 | -| ep_reward | 0.619 | +| ep_return | 201 | +| ep_reward | 0.806 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | -| ep_return | 146 | -| ep_reward | 0.592 | -| mse | 205 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 149 | +| ep_reward | 0.597 | +| mse | 261 | | time/ | | | progress | 0.5 | | step | 5e+05 | -| step_time | 9.53 | +| step_time | 14.7 | -------------------------------------- -2023-10-19 16:25:46,745 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 156.663 +/- 13.599 -2023-10-19 16:25:46,747 : +2023-10-27 19:06:20,397 : Eval | ep_lengths 200.50 +/- 99.00 | ep_return 140.670 +/- 74.596 +2023-10-27 19:06:20,398 : -------------------------------------- | loss/ | | -| approx_kl | 0.0253 | -| entropy_loss | -5.84 | -| policy_loss | -0.012 | -| value_loss | 0.854 | +| approx_kl | 0.0274 | +| entropy_loss | -4.46 | +| policy_loss | -0.01 | +| value_loss | 0.153 | | stat/ | | -| constraint_violation | 338 | +| constraint_violation | 5 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 160 | -| ep_reward | 0.64 | +| ep_return | 201 | +| ep_reward | 0.805 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 157 | -| ep_reward | 0.627 | -| mse | 252 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 141 | +| ep_reward | 0.622 | +| mse | 129 | | time/ | | | progress | 0.51 | | step | 5.1e+05 | -| step_time | 9.44 | +| step_time | 14.2 | -------------------------------------- -2023-10-19 16:27:35,314 : Eval | ep_lengths 153.70 +/- 117.96 | ep_return 104.849 +/- 86.233 -2023-10-19 16:27:35,315 : +2023-10-27 19:09:09,571 : Eval | ep_lengths 175.90 +/- 113.20 | ep_return 124.398 +/- 85.490 +2023-10-27 19:09:09,572 : -------------------------------------- | loss/ | | -| approx_kl | 0.0173 | -| entropy_loss | -5.84 | -| policy_loss | -0.0174 | -| value_loss | 0.784 | +| approx_kl | 0.0275 | +| entropy_loss | -4.57 | +| policy_loss | -0.0119 | +| value_loss | 0.156 | | stat/ | | -| constraint_violation | 345 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 152 | -| ep_reward | 0.609 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 184 | +| ep_reward | 0.734 | | stat_eval/ | | -| constraint_violation | 0.4 | -| ep_length | 154 | -| ep_return | 105 | -| ep_reward | 0.438 | -| mse | 105 | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 124 | +| ep_reward | 0.526 | +| mse | 113 | | time/ | | | progress | 0.52 | | step | 5.2e+05 | -| step_time | 9.48 | +| step_time | 15.4 | -------------------------------------- -2023-10-19 16:29:31,793 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.369 +/- 16.517 -2023-10-19 16:29:31,795 : +2023-10-27 19:12:06,291 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 145.636 +/- 54.967 +2023-10-27 19:12:06,293 : -------------------------------------- | loss/ | | -| approx_kl | 0.0321 | -| entropy_loss | -5.88 | -| policy_loss | -0.00418 | -| value_loss | 1.88 | +| approx_kl | 0.0299 | +| entropy_loss | -4.61 | +| policy_loss | -0.00686 | +| value_loss | 0.182 | | stat/ | | -| constraint_violation | 352 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 119 | -| ep_reward | 0.478 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.613 | -| mse | 271 | +| ep_return | 189 | +| ep_reward | 0.754 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 146 | +| ep_reward | 0.583 | +| mse | 216 | | time/ | | | progress | 0.53 | | step | 5.3e+05 | -| step_time | 9.49 | +| step_time | 15.1 | -------------------------------------- -2023-10-19 16:31:22,076 : Eval | ep_lengths 176.20 +/- 112.75 | ep_return 115.903 +/- 77.831 -2023-10-19 16:31:22,077 : +2023-10-27 19:14:53,625 : Eval | ep_lengths 200.40 +/- 99.20 | ep_return 133.059 +/- 72.255 +2023-10-27 19:14:53,627 : -------------------------------------- | loss/ | | -| approx_kl | 0.0264 | -| entropy_loss | -5.92 | -| policy_loss | -0.00468 | -| value_loss | 0.676 | +| approx_kl | 0.026 | +| entropy_loss | -4.65 | +| policy_loss | -0.0152 | +| value_loss | 0.0994 | | stat/ | | -| constraint_violation | 358 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 121 | -| ep_reward | 0.491 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 204 | +| ep_reward | 0.816 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 176 | -| ep_return | 116 | -| ep_reward | 0.544 | -| mse | 117 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 133 | +| ep_reward | 0.541 | +| mse | 185 | | time/ | | | progress | 0.54 | | step | 5.4e+05 | -| step_time | 9.28 | +| step_time | 14.1 | -------------------------------------- -2023-10-19 16:33:18,476 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 161.320 +/- 24.807 -2023-10-19 16:33:18,478 : +2023-10-27 19:17:51,199 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 142.148 +/- 54.002 +2023-10-27 19:17:51,200 : -------------------------------------- | loss/ | | -| approx_kl | 0.0291 | -| entropy_loss | -5.91 | -| policy_loss | -0.00376 | -| value_loss | 0.515 | +| approx_kl | 0.0231 | +| entropy_loss | -4.68 | +| policy_loss | -0.0015 | +| value_loss | 0.223 | | stat/ | | -| constraint_violation | 363 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 132 | -| ep_reward | 0.537 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 161 | -| ep_reward | 0.645 | -| mse | 245 | +| ep_return | 197 | +| ep_reward | 0.788 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 142 | +| ep_reward | 0.569 | +| mse | 263 | | time/ | | | progress | 0.55 | | step | 5.5e+05 | -| step_time | 9.46 | +| step_time | 15.3 | -------------------------------------- -2023-10-19 16:35:14,795 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 160.410 +/- 17.023 -2023-10-19 16:35:14,796 : +2023-10-27 19:20:38,929 : Eval | ep_lengths 201.60 +/- 96.80 | ep_return 156.878 +/- 83.638 +2023-10-27 19:20:38,931 : -------------------------------------- | loss/ | | -| approx_kl | 0.0129 | -| entropy_loss | -5.95 | -| policy_loss | -0.0165 | -| value_loss | 1.31 | +| approx_kl | 0.0253 | +| entropy_loss | -4.62 | +| policy_loss | -0.00576 | +| value_loss | 0.192 | | stat/ | | -| constraint_violation | 370 | -| ep_constraint_vio... | 0.3 | -| ep_length | 177 | -| ep_return | 115 | -| ep_reward | 0.465 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 160 | -| ep_reward | 0.642 | -| mse | 218 | +| ep_return | 199 | +| ep_reward | 0.797 | +| stat_eval/ | | +| constraint_violation | 0.2 | +| ep_length | 202 | +| ep_return | 157 | +| ep_reward | 0.628 | +| mse | 103 | | time/ | | | progress | 0.56 | | step | 5.6e+05 | -| step_time | 9.45 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 16:37:09,270 : Eval | ep_lengths 226.70 +/- 69.90 | ep_return 144.165 +/- 53.725 -2023-10-19 16:37:09,272 : +2023-10-27 19:23:35,015 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 156.695 +/- 22.249 +2023-10-27 19:23:35,017 : -------------------------------------- | loss/ | | | approx_kl | 0.0348 | -| entropy_loss | -6.04 | -| policy_loss | 0.000696 | -| value_loss | 0.48 | +| entropy_loss | -4.7 | +| policy_loss | -0.00663 | +| value_loss | 0.0949 | | stat/ | | -| constraint_violation | 376 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 142 | -| ep_reward | 0.568 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 207 | +| ep_reward | 0.83 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | -| ep_return | 144 | -| ep_reward | 0.578 | -| mse | 230 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 157 | +| ep_reward | 0.627 | +| mse | 276 | | time/ | | | progress | 0.57 | | step | 5.7e+05 | -| step_time | 9.44 | +| step_time | 14 | -------------------------------------- -2023-10-19 16:39:01,632 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 136.486 +/- 70.050 -2023-10-19 16:39:01,633 : +2023-10-27 19:26:29,392 : Eval | ep_lengths 202.60 +/- 94.82 | ep_return 117.823 +/- 64.473 +2023-10-27 19:26:29,393 : -------------------------------------- | loss/ | | -| approx_kl | 0.0104 | -| entropy_loss | -6.08 | -| policy_loss | -0.0206 | -| value_loss | 2.33 | +| approx_kl | 0.0134 | +| entropy_loss | -4.76 | +| policy_loss | -0.00954 | +| value_loss | 0.134 | | stat/ | | -| constraint_violation | 380 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 139 | -| ep_reward | 0.556 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 188 | +| ep_reward | 0.75 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 136 | -| ep_reward | 0.549 | -| mse | 125 | +| ep_length | 203 | +| ep_return | 118 | +| ep_reward | 0.472 | +| mse | 298 | | time/ | | | progress | 0.58 | | step | 5.8e+05 | -| step_time | 9.6 | +| step_time | 14.4 | -------------------------------------- -2023-10-19 16:40:58,589 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 161.117 +/- 22.305 -2023-10-19 16:40:58,591 : +2023-10-27 19:29:16,410 : Eval | ep_lengths 200.40 +/- 99.20 | ep_return 125.588 +/- 67.175 +2023-10-27 19:29:16,412 : -------------------------------------- | loss/ | | -| approx_kl | 0.0191 | -| entropy_loss | -6.18 | -| policy_loss | -0.0154 | -| value_loss | 2.36 | +| approx_kl | 0.0328 | +| entropy_loss | -4.75 | +| policy_loss | -0.0233 | +| value_loss | 0.313 | | stat/ | | -| constraint_violation | 392 | -| ep_constraint_vio... | 0.3 | -| ep_length | 176 | -| ep_return | 99.1 | -| ep_reward | 0.4 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 161 | -| ep_reward | 0.644 | -| mse | 211 | +| ep_return | 191 | +| ep_reward | 0.764 | +| stat_eval/ | | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 126 | +| ep_reward | 0.569 | +| mse | 252 | | time/ | | | progress | 0.59 | | step | 5.9e+05 | -| step_time | 9.58 | +| step_time | 14.2 | -------------------------------------- -2023-10-19 16:42:53,810 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 148.510 +/- 55.454 -2023-10-19 16:42:53,811 : +2023-10-27 19:32:12,778 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.979 +/- 31.482 +2023-10-27 19:32:12,779 : -------------------------------------- | loss/ | | -| approx_kl | 0.0286 | -| entropy_loss | -6.17 | -| policy_loss | -0.0191 | -| value_loss | 0.847 | +| approx_kl | 0.0292 | +| entropy_loss | -4.78 | +| policy_loss | -0.00445 | +| value_loss | 0.107 | | stat/ | | -| constraint_violation | 401 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 145 | -| ep_reward | 0.581 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 192 | +| ep_reward | 0.766 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 149 | -| ep_reward | 0.594 | -| mse | 182 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 155 | +| ep_reward | 0.62 | +| mse | 282 | | time/ | | | progress | 0.6 | | step | 6e+05 | -| step_time | 9.47 | +| step_time | 14.7 | -------------------------------------- -2023-10-19 16:44:50,017 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 160.543 +/- 26.521 -2023-10-19 16:44:50,018 : +2023-10-27 19:35:05,302 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 151.943 +/- 58.245 +2023-10-27 19:35:05,304 : -------------------------------------- | loss/ | | -| approx_kl | 0.0188 | -| entropy_loss | -6.13 | -| policy_loss | -0.0188 | -| value_loss | 0.377 | +| approx_kl | 0.0218 | +| entropy_loss | -4.79 | +| policy_loss | -0.00925 | +| value_loss | 0.465 | | stat/ | | -| constraint_violation | 406 | +| constraint_violation | 5 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 147 | -| ep_reward | 0.589 | +| ep_return | 192 | +| ep_reward | 0.767 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 161 | -| ep_reward | 0.642 | -| mse | 285 | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 152 | +| ep_reward | 0.663 | +| mse | 219 | | time/ | | | progress | 0.61 | | step | 6.1e+05 | -| step_time | 9.24 | +| step_time | 15.3 | -------------------------------------- -2023-10-19 16:46:44,166 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 143.586 +/- 53.907 -2023-10-19 16:46:44,168 : +2023-10-27 19:37:57,574 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 147.379 +/- 55.018 +2023-10-27 19:37:57,576 : -------------------------------------- | loss/ | | -| approx_kl | 0.0285 | -| entropy_loss | -6.19 | -| policy_loss | -0.00345 | -| value_loss | 0.911 | +| approx_kl | 0.0318 | +| entropy_loss | -4.77 | +| policy_loss | -0.0147 | +| value_loss | 0.338 | | stat/ | | -| constraint_violation | 410 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 131 | -| ep_reward | 0.524 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 192 | +| ep_reward | 0.767 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 144 | -| ep_reward | 0.593 | -| mse | 209 | +| ep_return | 147 | +| ep_reward | 0.591 | +| mse | 212 | | time/ | | | progress | 0.62 | | step | 6.2e+05 | -| step_time | 9.41 | +| step_time | 15 | -------------------------------------- -2023-10-19 16:48:40,457 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 163.870 +/- 23.925 -2023-10-19 16:48:40,458 : +2023-10-27 19:40:53,754 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 166.200 +/- 23.392 +2023-10-27 19:40:53,763 : -------------------------------------- | loss/ | | -| approx_kl | 0.0306 | -| entropy_loss | -6.26 | -| policy_loss | -0.00537 | -| value_loss | 1.78 | +| approx_kl | 0.0277 | +| entropy_loss | -4.77 | +| policy_loss | -0.0159 | +| value_loss | 0.133 | | stat/ | | -| constraint_violation | 417 | -| ep_constraint_vio... | 0.3 | -| ep_length | 176 | -| ep_return | 112 | -| ep_reward | 0.46 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 190 | +| ep_reward | 0.761 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 164 | -| ep_reward | 0.655 | -| mse | 213 | +| ep_return | 166 | +| ep_reward | 0.665 | +| mse | 222 | | time/ | | | progress | 0.63 | | step | 6.3e+05 | -| step_time | 9.74 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 16:50:36,589 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 184.919 +/- 18.355 -2023-10-19 16:50:36,600 : +2023-10-27 19:43:49,675 : Eval | ep_lengths 227.50 +/- 67.50 | ep_return 152.537 +/- 56.364 +2023-10-27 19:43:49,677 : -------------------------------------- | loss/ | | -| approx_kl | 0.0248 | -| entropy_loss | -6.29 | -| policy_loss | -0.0251 | -| value_loss | 0.683 | +| approx_kl | 0.0276 | +| entropy_loss | -4.85 | +| policy_loss | -0.00279 | +| value_loss | 0.308 | | stat/ | | -| constraint_violation | 423 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 148 | -| ep_reward | 0.61 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 185 | -| ep_reward | 0.74 | -| mse | 110 | +| ep_return | 198 | +| ep_reward | 0.791 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 228 | +| ep_return | 153 | +| ep_reward | 0.611 | +| mse | 177 | | time/ | | | progress | 0.64 | | step | 6.4e+05 | -| step_time | 9.38 | +| step_time | 15.4 | -------------------------------------- -2023-10-19 16:52:26,960 : Eval | ep_lengths 176.80 +/- 111.88 | ep_return 117.618 +/- 78.374 -2023-10-19 16:52:26,961 : +2023-10-27 19:46:48,321 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 159.095 +/- 36.542 +2023-10-27 19:46:48,323 : -------------------------------------- | loss/ | | -| approx_kl | 0.0267 | -| entropy_loss | -6.38 | -| policy_loss | -0.0098 | -| value_loss | 1.82 | +| approx_kl | 0.0377 | +| entropy_loss | -4.83 | +| policy_loss | -0.01 | +| value_loss | 0.165 | | stat/ | | -| constraint_violation | 433 | -| ep_constraint_vio... | 0.3 | -| ep_length | 177 | -| ep_return | 107 | -| ep_reward | 0.436 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 189 | +| ep_reward | 0.757 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 177 | -| ep_return | 118 | -| ep_reward | 0.482 | -| mse | 144 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 159 | +| ep_reward | 0.636 | +| mse | 305 | | time/ | | | progress | 0.65 | | step | 6.5e+05 | -| step_time | 9.56 | +| step_time | 13.9 | -------------------------------------- -2023-10-19 16:54:16,409 : Eval | ep_lengths 176.30 +/- 112.59 | ep_return 112.361 +/- 76.237 -2023-10-19 16:54:16,410 : +2023-10-27 19:49:36,984 : Eval | ep_lengths 200.50 +/- 99.00 | ep_return 128.402 +/- 67.251 +2023-10-27 19:49:36,986 : -------------------------------------- | loss/ | | -| approx_kl | 0.0136 | -| entropy_loss | -6.37 | -| policy_loss | -0.0184 | -| value_loss | 1.1 | +| approx_kl | 0.0174 | +| entropy_loss | -4.85 | +| policy_loss | -0.00863 | +| value_loss | 0.258 | | stat/ | | -| constraint_violation | 437 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 145 | -| ep_reward | 0.582 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 184 | +| ep_reward | 0.737 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 176 | -| ep_return | 112 | -| ep_reward | 0.516 | -| mse | 158 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 128 | +| ep_reward | 0.528 | +| mse | 187 | | time/ | | | progress | 0.66 | | step | 6.6e+05 | -| step_time | 9.45 | +| step_time | 15.9 | -------------------------------------- -2023-10-19 16:56:06,348 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 148.768 +/- 55.606 -2023-10-19 16:56:06,349 : +2023-10-27 19:52:25,755 : Eval | ep_lengths 200.60 +/- 98.80 | ep_return 130.719 +/- 68.535 +2023-10-27 19:52:25,756 : -------------------------------------- | loss/ | | -| approx_kl | 0.0195 | -| entropy_loss | -6.4 | -| policy_loss | -0.0177 | -| value_loss | 0.534 | +| approx_kl | 0.0327 | +| entropy_loss | -4.97 | +| policy_loss | -0.0112 | +| value_loss | 0.133 | | stat/ | | -| constraint_violation | 444 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 147 | -| ep_reward | 0.589 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 188 | +| ep_reward | 0.753 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 149 | -| ep_reward | 0.6 | -| mse | 204 | +| constraint_violation | 0.2 | +| ep_length | 201 | +| ep_return | 131 | +| ep_reward | 0.566 | +| mse | 213 | | time/ | | | progress | 0.67 | | step | 6.7e+05 | -| step_time | 8.99 | +| step_time | 13.7 | -------------------------------------- -2023-10-19 16:57:56,671 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 175.368 +/- 23.530 -2023-10-19 16:57:56,672 : +2023-10-27 19:55:22,628 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.687 +/- 39.008 +2023-10-27 19:55:22,630 : -------------------------------------- | loss/ | | -| approx_kl | 0.0145 | -| entropy_loss | -6.5 | -| policy_loss | -0.0271 | -| value_loss | 0.654 | +| approx_kl | 0.0318 | +| entropy_loss | -4.99 | +| policy_loss | -0.0014 | +| value_loss | 0.174 | | stat/ | | -| constraint_violation | 446 | +| constraint_violation | 5 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 164 | -| ep_reward | 0.655 | +| ep_return | 188 | +| ep_reward | 0.752 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 175 | -| ep_reward | 0.701 | -| mse | 179 | +| ep_return | 156 | +| ep_reward | 0.623 | +| mse | 312 | | time/ | | | progress | 0.68 | | step | 6.8e+05 | -| step_time | 9.12 | +| step_time | 14.9 | -------------------------------------- -2023-10-19 16:59:42,779 : Eval | ep_lengths 201.90 +/- 96.21 | ep_return 134.024 +/- 69.828 -2023-10-19 16:59:42,780 : +2023-10-27 19:58:17,834 : Eval | ep_lengths 225.50 +/- 73.50 | ep_return 164.479 +/- 66.006 +2023-10-27 19:58:17,835 : -------------------------------------- | loss/ | | -| approx_kl | 0.0248 | -| entropy_loss | -6.59 | -| policy_loss | -0.0186 | -| value_loss | 1.59 | +| approx_kl | 0.0281 | +| entropy_loss | -5.08 | +| policy_loss | -0.00595 | +| value_loss | 0.823 | | stat/ | | -| constraint_violation | 448 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 119 | -| ep_reward | 0.478 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 190 | +| ep_reward | 0.761 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 202 | -| ep_return | 134 | -| ep_reward | 0.54 | -| mse | 197 | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 164 | +| ep_reward | 0.663 | +| mse | 134 | | time/ | | | progress | 0.69 | | step | 6.9e+05 | -| step_time | 9.1 | +| step_time | 14.6 | -------------------------------------- -2023-10-19 17:01:30,080 : Eval | ep_lengths 227.30 +/- 68.10 | ep_return 146.491 +/- 52.415 -2023-10-19 17:01:30,082 : +2023-10-27 20:01:14,958 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 161.379 +/- 27.097 +2023-10-27 20:01:14,960 : -------------------------------------- | loss/ | | -| approx_kl | 0.0223 | -| entropy_loss | -6.59 | -| policy_loss | -0.00956 | -| value_loss | 2.17 | +| approx_kl | 0.0284 | +| entropy_loss | -5.09 | +| policy_loss | -0.0107 | +| value_loss | 0.18 | | stat/ | | -| constraint_violation | 455 | -| ep_constraint_vio... | 0.3 | -| ep_length | 177 | -| ep_return | 105 | -| ep_reward | 0.424 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 195 | +| ep_reward | 0.78 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | -| ep_return | 146 | -| ep_reward | 0.586 | -| mse | 186 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 161 | +| ep_reward | 0.646 | +| mse | 226 | | time/ | | | progress | 0.7 | | step | 7e+05 | -| step_time | 8.87 | +| step_time | 13.7 | -------------------------------------- -2023-10-19 17:03:14,791 : Eval | ep_lengths 226.20 +/- 71.40 | ep_return 164.403 +/- 58.730 -2023-10-19 17:03:14,792 : +2023-10-27 20:04:08,259 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 147.564 +/- 62.426 +2023-10-27 20:04:08,260 : -------------------------------------- | loss/ | | -| approx_kl | 0.0298 | -| entropy_loss | -6.59 | -| policy_loss | -0.00867 | -| value_loss | 0.704 | +| approx_kl | 0.0187 | +| entropy_loss | -5.13 | +| policy_loss | -0.01 | +| value_loss | 0.0998 | | stat/ | | -| constraint_violation | 460 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 153 | -| ep_reward | 0.614 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 192 | +| ep_reward | 0.768 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 164 | -| ep_reward | 0.661 | -| mse | 109 | +| ep_length | 225 | +| ep_return | 148 | +| ep_reward | 0.604 | +| mse | 244 | | time/ | | | progress | 0.71 | | step | 7.1e+05 | -| step_time | 8.58 | +| step_time | 14 | -------------------------------------- -2023-10-19 17:04:56,534 : Eval | ep_lengths 175.90 +/- 113.20 | ep_return 108.418 +/- 72.582 -2023-10-19 17:04:56,535 : +2023-10-27 20:06:53,740 : Eval | ep_lengths 200.80 +/- 98.41 | ep_return 135.389 +/- 73.556 +2023-10-27 20:06:53,741 : -------------------------------------- | loss/ | | -| approx_kl | 0.0292 | -| entropy_loss | -6.67 | -| policy_loss | -0.00488 | -| value_loss | 1.35 | +| approx_kl | 0.0295 | +| entropy_loss | -5.16 | +| policy_loss | -0.0153 | +| value_loss | 0.193 | | stat/ | | -| constraint_violation | 461 | +| constraint_violation | 5 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 143 | -| ep_reward | 0.572 | +| ep_return | 203 | +| ep_reward | 0.812 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 176 | -| ep_return | 108 | -| ep_reward | 0.492 | -| mse | 162 | +| constraint_violation | 0.2 | +| ep_length | 201 | +| ep_return | 135 | +| ep_reward | 0.558 | +| mse | 175 | | time/ | | | progress | 0.72 | | step | 7.2e+05 | -| step_time | 8.79 | +| step_time | 13.4 | -------------------------------------- -2023-10-19 17:06:40,374 : Eval | ep_lengths 202.60 +/- 94.84 | ep_return 121.126 +/- 65.650 -2023-10-19 17:06:40,375 : +2023-10-27 20:09:50,427 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 162.615 +/- 63.358 +2023-10-27 20:09:50,429 : -------------------------------------- | loss/ | | -| approx_kl | 0.0267 | -| entropy_loss | -6.7 | -| policy_loss | -0.00558 | -| value_loss | 1.34 | +| approx_kl | 0.0285 | +| entropy_loss | -5.18 | +| policy_loss | -0.0196 | +| value_loss | 0.0871 | | stat/ | | -| constraint_violation | 466 | +| constraint_violation | 5 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 172 | -| ep_reward | 0.688 | +| ep_return | 201 | +| ep_reward | 0.804 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 203 | -| ep_return | 121 | -| ep_reward | 0.493 | -| mse | 246 | +| constraint_violation | 0.1 | +| ep_length | 227 | +| ep_return | 163 | +| ep_reward | 0.651 | +| mse | 156 | | time/ | | | progress | 0.73 | | step | 7.3e+05 | -| step_time | 8.71 | +| step_time | 13.6 | -------------------------------------- -2023-10-19 17:08:27,387 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.572 +/- 35.397 -2023-10-19 17:08:27,388 : +2023-10-27 20:12:44,566 : Eval | ep_lengths 227.10 +/- 68.70 | ep_return 139.028 +/- 53.972 +2023-10-27 20:12:44,567 : -------------------------------------- | loss/ | | -| approx_kl | 0.0219 | -| entropy_loss | -6.77 | -| policy_loss | -0.017 | -| value_loss | 0.793 | +| approx_kl | 0.022 | +| entropy_loss | -5.2 | +| policy_loss | -0.0124 | +| value_loss | 0.104 | | stat/ | | -| constraint_violation | 475 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 136 | -| ep_reward | 0.547 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 154 | -| ep_reward | 0.614 | -| mse | 266 | +| ep_return | 196 | +| ep_reward | 0.784 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 227 | +| ep_return | 139 | +| ep_reward | 0.559 | +| mse | 244 | | time/ | | | progress | 0.74 | | step | 7.4e+05 | -| step_time | 8.82 | +| step_time | 14.9 | -------------------------------------- -2023-10-19 17:10:12,690 : Eval | ep_lengths 225.50 +/- 73.50 | ep_return 136.357 +/- 52.824 -2023-10-19 17:10:12,691 : --------------------------------------- -| loss/ | | -| approx_kl | 0.0298 | -| entropy_loss | -6.75 | -| policy_loss | -0.0175 | -| value_loss | 0.834 | -| stat/ | | -| constraint_violation | 482 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 109 | -| ep_reward | 0.438 | -| stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 136 | -| ep_reward | 0.554 | -| mse | 254 | -| time/ | | -| progress | 0.75 | -| step | 7.5e+05 | -| step_time | 8.71 | --------------------------------------- +2023-10-27 20:15:39,423 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 140.269 +/- 52.303 +2023-10-27 20:15:39,425 : +--------------------------------------- +| loss/ | | +| approx_kl | 0.0309 | +| entropy_loss | -5.23 | +| policy_loss | -0.000951 | +| value_loss | 0.152 | +| stat/ | | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 191 | +| ep_reward | 0.764 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 140 | +| ep_reward | 0.567 | +| mse | 235 | +| time/ | | +| progress | 0.75 | +| step | 7.5e+05 | +| step_time | 13.9 | +--------------------------------------- -2023-10-19 17:11:54,597 : Eval | ep_lengths 176.90 +/- 111.73 | ep_return 107.222 +/- 73.154 -2023-10-19 17:11:54,598 : --------------------------------------- -| loss/ | | -| approx_kl | 0.0152 | -| entropy_loss | -6.78 | -| policy_loss | -0.0197 | -| value_loss | 0.632 | -| stat/ | | -| constraint_violation | 489 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 150 | -| ep_reward | 0.601 | -| stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 177 | -| ep_return | 107 | -| ep_reward | 0.433 | -| mse | 220 | -| time/ | | -| progress | 0.76 | -| step | 7.6e+05 | -| step_time | 8.82 | --------------------------------------- +2023-10-27 20:18:29,085 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 141.946 +/- 58.889 +2023-10-27 20:18:29,087 : +--------------------------------------- +| loss/ | | +| approx_kl | 0.0324 | +| entropy_loss | -5.25 | +| policy_loss | -0.000845 | +| value_loss | 0.177 | +| stat/ | | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 196 | +| ep_reward | 0.783 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 142 | +| ep_reward | 0.568 | +| mse | 249 | +| time/ | | +| progress | 0.76 | +| step | 7.6e+05 | +| step_time | 14.6 | +--------------------------------------- -2023-10-19 17:13:41,951 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 161.974 +/- 19.390 -2023-10-19 17:13:41,952 : +2023-10-27 20:21:23,630 : Eval | ep_lengths 201.30 +/- 97.41 | ep_return 134.804 +/- 68.523 +2023-10-27 20:21:23,632 : -------------------------------------- | loss/ | | -| approx_kl | 0.0361 | -| entropy_loss | -6.82 | -| policy_loss | -0.00782 | -| value_loss | 1.5 | +| approx_kl | 0.02 | +| entropy_loss | -5.28 | +| policy_loss | -0.015 | +| value_loss | 0.108 | | stat/ | | -| constraint_violation | 496 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 124 | -| ep_reward | 0.499 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 162 | -| ep_reward | 0.648 | -| mse | 208 | +| ep_return | 196 | +| ep_reward | 0.783 | +| stat_eval/ | | +| constraint_violation | 0.2 | +| ep_length | 201 | +| ep_return | 135 | +| ep_reward | 0.543 | +| mse | 137 | | time/ | | | progress | 0.77 | | step | 7.7e+05 | -| step_time | 8.72 | +| step_time | 14.9 | -------------------------------------- -2023-10-19 17:15:27,398 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 139.167 +/- 51.765 -2023-10-19 17:15:27,399 : +2023-10-27 20:24:20,296 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 129.517 +/- 70.061 +2023-10-27 20:24:20,298 : -------------------------------------- | loss/ | | -| approx_kl | 0.017 | -| entropy_loss | -6.82 | -| policy_loss | -0.018 | -| value_loss | 0.406 | +| approx_kl | 0.015 | +| entropy_loss | -5.35 | +| policy_loss | -0.021 | +| value_loss | 0.089 | | stat/ | | -| constraint_violation | 501 | +| constraint_violation | 5 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 151 | -| ep_reward | 0.603 | +| ep_return | 193 | +| ep_reward | 0.774 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 139 | -| ep_reward | 0.558 | -| mse | 247 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 130 | +| ep_reward | 0.583 | +| mse | 207 | | time/ | | | progress | 0.78 | | step | 7.8e+05 | -| step_time | 8.75 | +| step_time | 13.5 | -------------------------------------- -2023-10-19 17:17:12,389 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 128.927 +/- 49.207 -2023-10-19 17:17:12,390 : +2023-10-27 20:27:17,150 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 141.054 +/- 53.159 +2023-10-27 20:27:17,152 : -------------------------------------- | loss/ | | -| approx_kl | 0.0208 | -| entropy_loss | -6.84 | -| policy_loss | -0.0273 | -| value_loss | 0.574 | +| approx_kl | 0.025 | +| entropy_loss | -5.38 | +| policy_loss | -0.00959 | +| value_loss | 0.115 | | stat/ | | -| constraint_violation | 503 | +| constraint_violation | 5 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 164 | -| ep_reward | 0.657 | +| ep_return | 208 | +| ep_reward | 0.833 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 129 | -| ep_reward | 0.525 | -| mse | 305 | +| ep_length | 226 | +| ep_return | 141 | +| ep_reward | 0.567 | +| mse | 206 | | time/ | | | progress | 0.79 | | step | 7.9e+05 | -| step_time | 8.65 | +| step_time | 15.5 | -------------------------------------- -2023-10-19 17:18:55,522 : Eval | ep_lengths 200.90 +/- 98.21 | ep_return 128.304 +/- 72.031 -2023-10-19 17:18:55,523 : +2023-10-27 20:30:18,137 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 148.736 +/- 24.469 +2023-10-27 20:30:18,138 : -------------------------------------- | loss/ | | -| approx_kl | 0.0273 | -| entropy_loss | -6.89 | -| policy_loss | -0.0125 | -| value_loss | 0.813 | +| approx_kl | 0.0269 | +| entropy_loss | -5.48 | +| policy_loss | -0.0191 | +| value_loss | 0.0967 | | stat/ | | -| constraint_violation | 513 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 145 | -| ep_reward | 0.582 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 191 | +| ep_reward | 0.762 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 128 | -| ep_reward | 0.53 | -| mse | 207 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 149 | +| ep_reward | 0.595 | +| mse | 314 | | time/ | | | progress | 0.8 | | step | 8e+05 | -| step_time | 8.73 | +| step_time | 18.5 | -------------------------------------- -2023-10-19 17:20:41,844 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 132.009 +/- 48.146 -2023-10-19 17:20:41,845 : +2023-10-27 20:33:15,697 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 167.073 +/- 29.940 +2023-10-27 20:33:15,706 : -------------------------------------- | loss/ | | -| approx_kl | 0.0167 | -| entropy_loss | -6.91 | -| policy_loss | -0.0155 | -| value_loss | 0.536 | +| approx_kl | 0.0224 | +| entropy_loss | -5.48 | +| policy_loss | -0.00855 | +| value_loss | 0.144 | | stat/ | | -| constraint_violation | 519 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 145 | -| ep_reward | 0.584 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 188 | +| ep_reward | 0.75 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 132 | -| ep_reward | 0.529 | -| mse | 234 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 167 | +| ep_reward | 0.668 | +| mse | 201 | | time/ | | | progress | 0.81 | | step | 8.1e+05 | -| step_time | 8.8 | +| step_time | 14.7 | -------------------------------------- -2023-10-19 17:22:27,251 : Eval | ep_lengths 200.60 +/- 98.80 | ep_return 135.457 +/- 72.555 -2023-10-19 17:22:27,252 : +2023-10-27 20:36:06,225 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 158.518 +/- 57.145 +2023-10-27 20:36:06,226 : -------------------------------------- | loss/ | | -| approx_kl | 0.0228 | -| entropy_loss | -6.91 | -| policy_loss | -0.0194 | -| value_loss | 0.735 | +| approx_kl | 0.0181 | +| entropy_loss | -5.49 | +| policy_loss | -0.0173 | +| value_loss | 0.0841 | | stat/ | | -| constraint_violation | 528 | +| constraint_violation | 5 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 158 | -| ep_reward | 0.633 | +| ep_return | 179 | +| ep_reward | 0.714 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 135 | -| ep_reward | 0.552 | -| mse | 134 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 159 | +| ep_reward | 0.665 | +| mse | 136 | | time/ | | | progress | 0.82 | | step | 8.2e+05 | -| step_time | 8.76 | +| step_time | 14.6 | -------------------------------------- -2023-10-19 17:24:17,021 : Eval | ep_lengths 175.50 +/- 113.80 | ep_return 117.521 +/- 80.302 -2023-10-19 17:24:17,023 : +2023-10-27 20:39:06,139 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 153.086 +/- 60.373 +2023-10-27 20:39:06,141 : -------------------------------------- | loss/ | | -| approx_kl | 0.0286 | -| entropy_loss | -6.91 | -| policy_loss | -0.00423 | -| value_loss | 0.737 | +| approx_kl | 0.0201 | +| entropy_loss | -5.53 | +| policy_loss | -0.0105 | +| value_loss | 0.107 | | stat/ | | -| constraint_violation | 531 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 148 | -| ep_reward | 0.592 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 188 | +| ep_reward | 0.752 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 176 | -| ep_return | 118 | -| ep_reward | 0.504 | -| mse | 120 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 153 | +| ep_reward | 0.613 | +| mse | 188 | | time/ | | | progress | 0.83 | | step | 8.3e+05 | -| step_time | 9.29 | +| step_time | 14.1 | -------------------------------------- -2023-10-19 17:26:32,295 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 142.158 +/- 50.656 -2023-10-19 17:26:32,296 : +2023-10-27 20:42:01,098 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 168.820 +/- 28.122 +2023-10-27 20:42:01,108 : -------------------------------------- | loss/ | | -| approx_kl | 0.0202 | -| entropy_loss | -6.93 | -| policy_loss | -0.0147 | -| value_loss | 1.62 | +| approx_kl | 0.0332 | +| entropy_loss | -5.54 | +| policy_loss | -0.0086 | +| value_loss | 0.179 | | stat/ | | -| constraint_violation | 536 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 135 | -| ep_reward | 0.541 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 193 | +| ep_reward | 0.773 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 142 | -| ep_reward | 0.57 | -| mse | 218 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 169 | +| ep_reward | 0.675 | +| mse | 202 | | time/ | | | progress | 0.84 | | step | 8.4e+05 | -| step_time | 10.7 | +| step_time | 13.5 | -------------------------------------- -2023-10-19 17:28:42,102 : Eval | ep_lengths 176.50 +/- 112.30 | ep_return 114.120 +/- 76.987 -2023-10-19 17:28:42,103 : +2023-10-27 20:44:41,673 : Eval | ep_lengths 151.60 +/- 120.52 | ep_return 98.345 +/- 82.550 +2023-10-27 20:44:41,674 : -------------------------------------- | loss/ | | -| approx_kl | 0.0194 | -| entropy_loss | -7 | -| policy_loss | -0.0182 | -| value_loss | 0.924 | +| approx_kl | 0.0285 | +| entropy_loss | -5.53 | +| policy_loss | -0.00853 | +| value_loss | 0.251 | | stat/ | | -| constraint_violation | 546 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 138 | -| ep_reward | 0.553 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 207 | +| ep_reward | 0.827 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 176 | -| ep_return | 114 | -| ep_reward | 0.466 | -| mse | 140 | +| constraint_violation | 0.4 | +| ep_length | 152 | +| ep_return | 98.3 | +| ep_reward | 0.402 | +| mse | 111 | | time/ | | | progress | 0.85 | | step | 8.5e+05 | -| step_time | 10.3 | +| step_time | 15.2 | -------------------------------------- -2023-10-19 17:31:01,490 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 179.258 +/- 26.768 -2023-10-19 17:31:01,491 : +2023-10-27 20:47:41,072 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.325 +/- 36.635 +2023-10-27 20:47:41,074 : -------------------------------------- | loss/ | | -| approx_kl | 0.0194 | -| entropy_loss | -6.99 | -| policy_loss | -0.0116 | -| value_loss | 1.38 | +| approx_kl | 0.0265 | +| entropy_loss | -5.54 | +| policy_loss | -0.0178 | +| value_loss | 0.0841 | | stat/ | | -| constraint_violation | 555 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 170 | -| ep_reward | 0.679 | +| constraint_violation | 6 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 169 | +| ep_reward | 0.718 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 179 | -| ep_reward | 0.717 | -| mse | 148 | +| ep_return | 153 | +| ep_reward | 0.613 | +| mse | 344 | | time/ | | | progress | 0.86 | | step | 8.6e+05 | -| step_time | 11.7 | +| step_time | 14.2 | -------------------------------------- -2023-10-19 17:33:07,841 : Eval | ep_lengths 177.10 +/- 111.38 | ep_return 119.575 +/- 83.134 -2023-10-19 17:33:07,842 : +2023-10-27 20:50:26,961 : Eval | ep_lengths 176.80 +/- 111.84 | ep_return 120.459 +/- 82.973 +2023-10-27 20:50:26,963 : -------------------------------------- | loss/ | | -| approx_kl | 0.0352 | -| entropy_loss | -7.03 | -| policy_loss | -0.0119 | -| value_loss | 2.44 | +| approx_kl | 0.0208 | +| entropy_loss | -5.59 | +| policy_loss | -0.0123 | +| value_loss | 0.0971 | | stat/ | | -| constraint_violation | 560 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 133 | -| ep_reward | 0.567 | +| constraint_violation | 6 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 201 | +| ep_reward | 0.802 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 177 | | ep_return | 120 | -| ep_reward | 0.48 | -| mse | 173 | +| ep_reward | 0.484 | +| mse | 179 | | time/ | | | progress | 0.87 | | step | 8.7e+05 | -| step_time | 10.1 | +| step_time | 13.8 | -------------------------------------- -2023-10-19 17:35:18,435 : Eval | ep_lengths 200.90 +/- 98.21 | ep_return 131.153 +/- 73.293 -2023-10-19 17:35:18,437 : +2023-10-27 20:53:18,162 : Eval | ep_lengths 201.60 +/- 96.82 | ep_return 134.661 +/- 71.629 +2023-10-27 20:53:18,163 : -------------------------------------- | loss/ | | -| approx_kl | 0.0344 | -| entropy_loss | -7.14 | -| policy_loss | -0.00954 | -| value_loss | 0.466 | +| approx_kl | 0.023 | +| entropy_loss | -5.58 | +| policy_loss | -0.0146 | +| value_loss | 0.0999 | | stat/ | | -| constraint_violation | 566 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 135 | -| ep_reward | 0.541 | +| constraint_violation | 6 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 199 | +| ep_reward | 0.796 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 131 | -| ep_reward | 0.531 | -| mse | 206 | +| ep_length | 202 | +| ep_return | 135 | +| ep_reward | 0.54 | +| mse | 195 | | time/ | | | progress | 0.88 | | step | 8.8e+05 | -| step_time | 11.7 | +| step_time | 14.7 | -------------------------------------- -2023-10-19 17:37:31,766 : Eval | ep_lengths 226.70 +/- 69.90 | ep_return 142.931 +/- 56.149 -2023-10-19 17:37:31,767 : +2023-10-27 20:56:08,629 : Eval | ep_lengths 201.30 +/- 97.42 | ep_return 142.772 +/- 75.970 +2023-10-27 20:56:08,631 : -------------------------------------- | loss/ | | -| approx_kl | 0.0206 | -| entropy_loss | -7.06 | -| policy_loss | -0.0268 | -| value_loss | 0.433 | +| approx_kl | 0.029 | +| entropy_loss | -5.68 | +| policy_loss | -0.00018 | +| value_loss | 0.253 | | stat/ | | -| constraint_violation | 568 | +| constraint_violation | 6 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 155 | -| ep_reward | 0.618 | +| ep_return | 203 | +| ep_reward | 0.812 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | +| constraint_violation | 0.2 | +| ep_length | 201 | | ep_return | 143 | -| ep_reward | 0.572 | -| mse | 224 | +| ep_reward | 0.571 | +| mse | 138 | | time/ | | | progress | 0.89 | | step | 8.9e+05 | -| step_time | 11 | +| step_time | 14 | -------------------------------------- -2023-10-19 17:39:44,659 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 144.690 +/- 55.863 -2023-10-19 17:39:44,660 : +2023-10-27 20:58:53,089 : Eval | ep_lengths 176.30 +/- 112.59 | ep_return 105.229 +/- 73.346 +2023-10-27 20:58:53,090 : -------------------------------------- | loss/ | | -| approx_kl | 0.0248 | -| entropy_loss | -7.07 | -| policy_loss | -0.015 | -| value_loss | 0.56 | +| approx_kl | 0.0262 | +| entropy_loss | -5.71 | +| policy_loss | -0.0138 | +| value_loss | 0.197 | | stat/ | | -| constraint_violation | 574 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 147 | -| ep_reward | 0.587 | +| constraint_violation | 6 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 189 | +| ep_reward | 0.756 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 145 | -| ep_reward | 0.579 | -| mse | 203 | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 105 | +| ep_reward | 0.43 | +| mse | 208 | | time/ | | | progress | 0.9 | | step | 9e+05 | -| step_time | 10.7 | +| step_time | 14.1 | -------------------------------------- -2023-10-19 17:41:55,790 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 153.151 +/- 59.422 -2023-10-19 17:41:55,792 : +2023-10-27 21:01:47,645 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 143.141 +/- 56.489 +2023-10-27 21:01:47,646 : -------------------------------------- | loss/ | | -| approx_kl | 0.0309 | -| entropy_loss | -7.2 | -| policy_loss | -0.019 | -| value_loss | 0.434 | +| approx_kl | 0.0226 | +| entropy_loss | -5.71 | +| policy_loss | -0.0119 | +| value_loss | 0.161 | | stat/ | | -| constraint_violation | 579 | +| constraint_violation | 6 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 166 | -| ep_reward | 0.665 | +| ep_return | 204 | +| ep_reward | 0.817 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 153 | -| ep_reward | 0.614 | -| mse | 201 | +| ep_length | 227 | +| ep_return | 143 | +| ep_reward | 0.574 | +| mse | 242 | | time/ | | | progress | 0.91 | | step | 9.1e+05 | -| step_time | 10.3 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 17:44:04,550 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 133.749 +/- 69.439 -2023-10-19 17:44:04,551 : +2023-10-27 21:04:32,473 : Eval | ep_lengths 175.60 +/- 113.65 | ep_return 138.926 +/- 91.276 +2023-10-27 21:04:32,474 : -------------------------------------- | loss/ | | -| approx_kl | 0.0164 | -| entropy_loss | -7.24 | -| policy_loss | -0.0302 | -| value_loss | 0.477 | +| approx_kl | 0.034 | +| entropy_loss | -5.78 | +| policy_loss | -0.00793 | +| value_loss | 0.135 | | stat/ | | -| constraint_violation | 585 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 136 | -| ep_reward | 0.546 | +| constraint_violation | 7 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 194 | +| ep_reward | 0.775 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 134 | -| ep_reward | 0.544 | -| mse | 170 | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 139 | +| ep_reward | 0.579 | +| mse | 52.1 | | time/ | | | progress | 0.92 | | step | 9.2e+05 | -| step_time | 9.99 | +| step_time | 14.7 | -------------------------------------- -2023-10-19 17:46:12,980 : Eval | ep_lengths 202.20 +/- 95.67 | ep_return 120.913 +/- 63.289 -2023-10-19 17:46:12,982 : +2023-10-27 21:07:27,597 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 170.756 +/- 25.574 +2023-10-27 21:07:27,606 : -------------------------------------- | loss/ | | | approx_kl | 0.0229 | -| entropy_loss | -7.28 | -| policy_loss | -0.00978 | -| value_loss | 0.678 | +| entropy_loss | -5.82 | +| policy_loss | -0.0176 | +| value_loss | 0.0759 | | stat/ | | -| constraint_violation | 592 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 149 | -| ep_reward | 0.595 | +| constraint_violation | 7 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 203 | +| ep_reward | 0.813 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 202 | -| ep_return | 121 | -| ep_reward | 0.484 | -| mse | 233 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 171 | +| ep_reward | 0.683 | +| mse | 181 | | time/ | | | progress | 0.93 | | step | 9.3e+05 | -| step_time | 11.2 | +| step_time | 15.8 | -------------------------------------- -2023-10-19 17:48:27,482 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.444 +/- 22.984 -2023-10-19 17:48:27,483 : +2023-10-27 21:10:21,849 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 162.698 +/- 34.509 +2023-10-27 21:10:21,851 : -------------------------------------- | loss/ | | -| approx_kl | 0.0133 | -| entropy_loss | -7.27 | -| policy_loss | -0.0215 | -| value_loss | 1.85 | +| approx_kl | 0.0305 | +| entropy_loss | -5.82 | +| policy_loss | -0.00599 | +| value_loss | 0.995 | | stat/ | | -| constraint_violation | 600 | -| ep_constraint_vio... | 0.4 | -| ep_length | 152 | -| ep_return | 95.9 | -| ep_reward | 0.392 | +| constraint_violation | 8 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 173 | +| ep_reward | 0.734 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 154 | -| ep_reward | 0.618 | -| mse | 283 | +| ep_return | 163 | +| ep_reward | 0.651 | +| mse | 245 | | time/ | | | progress | 0.94 | | step | 9.4e+05 | -| step_time | 12.1 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 17:50:39,743 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.322 +/- 26.482 -2023-10-19 17:50:39,745 : +2023-10-27 21:13:11,963 : Eval | ep_lengths 227.60 +/- 67.20 | ep_return 141.276 +/- 54.199 +2023-10-27 21:13:11,964 : -------------------------------------- | loss/ | | -| approx_kl | 0.00941 | -| entropy_loss | -7.29 | -| policy_loss | -0.0252 | -| value_loss | 0.415 | +| approx_kl | 0.0315 | +| entropy_loss | -5.86 | +| policy_loss | 0.00471 | +| value_loss | 0.0946 | | stat/ | | -| constraint_violation | 604 | +| constraint_violation | 8 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 165 | -| ep_reward | 0.661 | +| ep_return | 200 | +| ep_reward | 0.802 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 155 | -| ep_reward | 0.621 | -| mse | 291 | +| constraint_violation | 0.1 | +| ep_length | 228 | +| ep_return | 141 | +| ep_reward | 0.565 | +| mse | 276 | | time/ | | | progress | 0.95 | | step | 9.5e+05 | -| step_time | 11.2 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 17:52:53,263 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 156.639 +/- 21.457 -2023-10-19 17:52:53,264 : +2023-10-27 21:16:04,671 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 159.849 +/- 57.516 +2023-10-27 21:16:04,673 : -------------------------------------- | loss/ | | -| approx_kl | 0.028 | -| entropy_loss | -7.29 | -| policy_loss | -0.0222 | -| value_loss | 0.432 | +| approx_kl | 0.0148 | +| entropy_loss | -5.87 | +| policy_loss | -0.0182 | +| value_loss | 0.0692 | | stat/ | | -| constraint_violation | 611 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 124 | -| ep_reward | 0.548 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 8 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 157 | -| ep_reward | 0.627 | -| mse | 228 | +| ep_return | 194 | +| ep_reward | 0.775 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 160 | +| ep_reward | 0.641 | +| mse | 137 | | time/ | | | progress | 0.96 | | step | 9.6e+05 | -| step_time | 10.6 | +| step_time | 13.9 | -------------------------------------- -2023-10-19 17:55:05,674 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 158.227 +/- 56.095 -2023-10-19 17:55:05,675 : +2023-10-27 21:18:48,535 : Eval | ep_lengths 175.50 +/- 113.80 | ep_return 109.501 +/- 74.980 +2023-10-27 21:18:48,536 : -------------------------------------- | loss/ | | -| approx_kl | 0.0263 | -| entropy_loss | -7.41 | -| policy_loss | -0.019 | -| value_loss | 0.713 | +| approx_kl | 0.0253 | +| entropy_loss | -5.98 | +| policy_loss | -0.0282 | +| value_loss | 0.137 | | stat/ | | -| constraint_violation | 619 | +| constraint_violation | 8 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 149 | -| ep_reward | 0.595 | +| ep_return | 190 | +| ep_reward | 0.758 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 158 | -| ep_reward | 0.654 | -| mse | 145 | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 110 | +| ep_reward | 0.441 | +| mse | 205 | | time/ | | | progress | 0.97 | | step | 9.7e+05 | -| step_time | 9.78 | +| step_time | 13.5 | -------------------------------------- -2023-10-19 17:57:15,400 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 127.323 +/- 65.601 -2023-10-19 17:57:15,402 : +2023-10-27 21:21:36,506 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 176.383 +/- 26.391 +2023-10-27 21:21:36,515 : -------------------------------------- | loss/ | | -| approx_kl | 0.0279 | -| entropy_loss | -7.38 | -| policy_loss | -0.0163 | -| value_loss | 0.879 | +| approx_kl | 0.0328 | +| entropy_loss | -6.03 | +| policy_loss | -0.0106 | +| value_loss | 0.112 | | stat/ | | -| constraint_violation | 626 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 138 | -| ep_reward | 0.567 | +| constraint_violation | 8 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 199 | +| ep_reward | 0.796 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 127 | -| ep_reward | 0.51 | -| mse | 185 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 176 | +| ep_reward | 0.706 | +| mse | 169 | | time/ | | | progress | 0.98 | | step | 9.8e+05 | -| step_time | 11.8 | +| step_time | 13 | -------------------------------------- -2023-10-19 17:59:29,949 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 160.478 +/- 55.896 -2023-10-19 17:59:29,950 : +2023-10-27 21:24:23,260 : Eval | ep_lengths 226.80 +/- 69.60 | ep_return 169.542 +/- 58.147 +2023-10-27 21:24:23,262 : -------------------------------------- | loss/ | | -| approx_kl | 0.0221 | -| entropy_loss | -7.33 | -| policy_loss | -0.0209 | -| value_loss | 4.45 | +| approx_kl | 0.0241 | +| entropy_loss | -6.02 | +| policy_loss | -0.00988 | +| value_loss | 0.129 | | stat/ | | -| constraint_violation | 634 | -| ep_constraint_vio... | 0.5 | -| ep_length | 128 | -| ep_return | 77.9 | -| ep_reward | 0.335 | +| constraint_violation | 8 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 199 | +| ep_reward | 0.796 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 160 | -| ep_reward | 0.649 | -| mse | 131 | +| ep_length | 227 | +| ep_return | 170 | +| ep_reward | 0.686 | +| mse | 99.8 | | time/ | | | progress | 0.99 | | step | 9.9e+05 | -| step_time | 11.2 | +| step_time | 14.2 | -------------------------------------- -2023-10-19 18:01:18,335 : Checkpoint | ./models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/model_latest.pt -2023-10-19 18:01:38,260 : Eval | ep_lengths 200.80 +/- 98.40 | ep_return 122.904 +/- 63.241 -2023-10-19 18:01:38,261 : +2023-10-27 21:26:49,979 : Checkpoint | ./models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es/model_latest.pt +2023-10-27 21:27:19,014 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 158.964 +/- 38.233 +2023-10-27 21:27:19,015 : -------------------------------------- | loss/ | | -| approx_kl | 0.0148 | -| entropy_loss | -7.43 | -| policy_loss | -0.0307 | -| value_loss | 2.29 | +| approx_kl | 0.0235 | +| entropy_loss | -5.97 | +| policy_loss | -0.0142 | +| value_loss | 0.0602 | | stat/ | | -| constraint_violation | 639 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 126 | -| ep_reward | 0.515 | +| constraint_violation | 8 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 198 | +| ep_reward | 0.79 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 123 | -| ep_reward | 0.496 | -| mse | 211 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 159 | +| ep_reward | 0.636 | +| mse | 340 | | time/ | | | progress | 1 | | step | 1e+06 | -| step_time | 11 | +| step_time | 15.7 | -------------------------------------- diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/loss/approx_kl.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/loss/approx_kl.log index bb2a5d917..9a6aefdc0 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/loss/approx_kl.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/loss/approx_kl.log @@ -1,101 +1,101 @@ step,loss/approx_kl -10000,0.028251345269382 -20000,0.020043125624457998 -30000,0.022683016800632075 -40000,0.025255232591492434 -50000,0.02521237265318632 -60000,0.03438403995241969 -70000,0.023316897839928664 -80000,0.01645623234411081 -90000,0.02586820911771307 -100000,0.029268797494781517 -110000,0.023664595326408744 -120000,0.017754528399867315 -130000,0.030075034266337747 -140000,0.02312646654124061 -150000,0.02547850757061193 -160000,0.026959253288805485 -170000,0.026967819849960512 -180000,0.03011329384365429 -190000,0.03644411275939395 -200000,0.03170105362466226 -210000,0.008567788118186096 -220000,0.025656136707402764 -230000,0.028651216075134777 -240000,0.02227510442025959 -250000,0.016729642225739857 -260000,0.019783863122574984 -270000,0.029530189108724396 -280000,0.027674371267979343 -290000,0.0309891627014925 -300000,0.02533214353801062 -310000,0.026382265418457486 -320000,0.03628352271237721 -330000,0.029697815678082405 -340000,0.040996736877908314 -350000,0.02404588942881674 -360000,0.03106794880392651 -370000,0.025971858149083954 -380000,0.028694967172729468 -390000,0.02479205983690917 -400000,0.022336523413347704 -410000,0.027438157858947914 -420000,0.029719650698825716 -430000,0.026566278142854578 -440000,0.022849499092747767 -450000,0.028690259368158878 -460000,0.015611481992527845 -470000,0.031047902838326997 -480000,0.022507287996510666 -490000,0.03014965747327854 -500000,0.03471398902280877 -510000,0.039680613414384426 -520000,0.019408297888003294 -530000,0.031062715624769525 -540000,0.03378693731501699 -550000,0.029801824611301226 -560000,0.026337936699079974 -570000,0.02309273886494339 -580000,0.030717620214757817 -590000,0.030794029535415275 -600000,0.040449672852021945 -610000,0.026337745684819912 -620000,0.024576182624635602 -630000,0.03326918050491561 -640000,0.03204036715130011 -650000,0.04006295531677703 -660000,0.03363581323064863 -670000,0.034682285009572905 -680000,0.034334194885256394 -690000,0.030983006294506295 -700000,0.01628458019501219 -710000,0.03384149555737774 -720000,0.03719114719424397 -730000,0.03666629755559068 -740000,0.0361098542343825 -750000,0.033785364629390344 -760000,0.029683935223147273 -770000,0.037135247567979 -780000,0.017101995398600894 -790000,0.026297630943978828 -800000,0.039243026605496806 -810000,0.03137041261264434 -820000,0.02923133915755897 -830000,0.03108973008735726 -840000,0.036125578684732325 -850000,0.02357752059275905 -860000,0.034656622222003844 -870000,0.029809929290786387 -880000,0.02151393325378497 -890000,0.014571371604688466 -900000,0.024592104096276064 -910000,0.023178974251883727 -920000,0.02932734910864383 -930000,0.020685427589342 -940000,0.030552950687706464 -950000,0.029112245344246425 -960000,0.02785677105033149 -970000,0.02958402377553284 -980000,0.02973549318655084 -990000,0.021267759908611572 -1000000,0.030272093772267306 +10000,0.02623263170632223 +20000,0.032874138575668135 +30000,0.02143441623387237 +40000,0.02508529013333221 +50000,0.018119860207661986 +60000,0.013996073704523345 +70000,0.018093147633286812 +80000,0.020856940435866514 +90000,0.02669796734893074 +100000,0.023157810769043864 +110000,0.025134310607487952 +120000,0.027462668786756694 +130000,0.03113690829680612 +140000,0.03248790910001844 +150000,0.03304915540696432 +160000,0.03577603089312712 +170000,0.042041813776207464 +180000,0.020735341194085776 +190000,0.032325730612501505 +200000,0.03272525472566486 +210000,0.031568329129368064 +220000,0.03478842717595398 +230000,0.04363183149447043 +240000,0.031195875800525148 +250000,0.021090735588222737 +260000,0.017936383102399606 +270000,0.02562436372973025 +280000,0.045024328120052824 +290000,0.032478910180119176 +300000,0.03196899727142105 +310000,0.031299958913587035 +320000,0.033947678934782746 +330000,0.029946243879385288 +340000,0.02792460181905578 +350000,0.03395110726511726 +360000,0.03475664324748019 +370000,0.028809827403165393 +380000,0.03512037484130511 +390000,0.0383377939540272 +400000,0.02176316278831412 +410000,0.02834720032600065 +420000,0.04110547041054816 +430000,0.025822983169928194 +440000,0.035814912725860876 +450000,0.038129433601473764 +460000,0.025311666075140238 +470000,0.035414545012948405 +480000,0.029269901375907164 +490000,0.026826554342793928 +500000,0.03124305245000869 +510000,0.030197267746552826 +520000,0.030583866196684538 +530000,0.034140892109523216 +540000,0.03448817861111214 +550000,0.029584569243403767 +560000,0.029511453839950262 +570000,0.037692595288778344 +580000,0.025650346069596708 +590000,0.03683900115235399 +600000,0.03005117878783494 +610000,0.030783261141429347 +620000,0.033003564900718624 +630000,0.028917018029217922 +640000,0.03144375613580147 +650000,0.027989785062770046 +660000,0.026951259886845946 +670000,0.028779839181030793 +680000,0.02007088129563878 +690000,0.029835309484042234 +700000,0.028399732064766186 +710000,0.03684205702350785 +720000,0.021220482699573044 +730000,0.03697374941160282 +740000,0.02321811264070372 +750000,0.035207578050903965 +760000,0.03826311139079432 +770000,0.02790134944953025 +780000,0.02770048734576752 +790000,0.03798466208390891 +800000,0.029571886593475944 +810000,0.032548801104227705 +820000,0.03676186512845258 +830000,0.021096378588117658 +840000,0.02912758829382559 +850000,0.03226703085626165 +860000,0.02717605972041686 +870000,0.02888509525218978 +880000,0.018713620235212145 +890000,0.03579910882593443 +900000,0.03539903185640772 +910000,0.03038924799766391 +920000,0.03400635728612543 +930000,0.03875541462718198 +940000,0.02154954726574942 +950000,0.02596061118723203 +960000,0.01087456412302951 +970000,0.033814007843223706 +980000,0.02966822643065825 +990000,0.027807982448333247 +1000000,0.03697161805272723 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/loss/entropy_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/loss/entropy_loss.log index 3cc83f057..1a1e8f5b1 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/loss/entropy_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/loss/entropy_loss.log @@ -1,101 +1,101 @@ step,loss/entropy_loss -10000,-3.6961906234423325 -20000,-3.7040054639180497 -30000,-3.6902834256490076 -40000,-3.698279150327047 -50000,-3.6794474124908447 -60000,-3.7197596073150634 -70000,-3.6520874857902528 -80000,-3.581466257572174 -90000,-3.596186538537343 -100000,-3.5541596213976545 -110000,-3.6373242815335596 -120000,-3.6601875861485795 -130000,-3.718436062335968 -140000,-3.744985377788544 -150000,-3.7201598405838014 -160000,-3.7189017852147415 -170000,-3.74609090089798 -180000,-3.686675214767456 -190000,-3.695259153842926 -200000,-3.7254421869913736 -210000,-3.7663419802983604 -220000,-3.7969637910525003 -230000,-3.8287901123364767 -240000,-3.772191083431244 -250000,-3.7416929841041564 -260000,-3.7661458015441895 -270000,-3.8126536647478737 -280000,-3.8391964435577393 -290000,-3.833748070398966 -300000,-3.800921754042308 -310000,-3.800000305970509 -320000,-3.8401187578837073 -330000,-3.842671263217926 -340000,-3.8197595874468484 -350000,-3.800331938266754 -360000,-3.7522564848264053 -370000,-3.7948278745015465 -380000,-3.7085395574569704 -390000,-3.6431018312772117 -400000,-3.654667043685913 -410000,-3.6730506698290513 -420000,-3.6959651827812197 -430000,-3.703691895802816 -440000,-3.666244514783223 -450000,-3.680761011441548 -460000,-3.682841857274373 -470000,-3.6379813591639207 -480000,-3.7035749713579817 -490000,-3.7169115344683332 -500000,-3.6943845947583513 -510000,-3.6533834973971056 -520000,-3.6707302014033005 -530000,-3.603869434197744 -540000,-3.6748090386390686 -550000,-3.678770728905996 -560000,-3.6806810776392616 -570000,-3.6977829217910765 -580000,-3.63530113697052 -590000,-3.622584863503774 -600000,-3.6855364441871643 -610000,-3.6530481974283853 -620000,-3.6292308092117302 -630000,-3.6069786985715226 -640000,-3.5843002597490945 -650000,-3.592022601763407 -660000,-3.593270409107208 -670000,-3.614526943365733 -680000,-3.5825061599413557 -690000,-3.5679255286852523 -700000,-3.5254193186759957 -710000,-3.4758208433787035 -720000,-3.474504379431407 -730000,-3.4913536588350937 -740000,-3.5122412840525308 -750000,-3.5000925938288368 -760000,-3.5228902459144593 -770000,-3.5071127891540526 -780000,-3.469307267665863 -790000,-3.3997237046559645 -800000,-3.4001274903615313 -810000,-3.3933616042137147 -820000,-3.3872439861297607 -830000,-3.3440786719322206 -840000,-3.388782525062561 -850000,-3.4210373361905417 -860000,-3.455586938063304 -870000,-3.4478484392166138 -880000,-3.473407586415609 -890000,-3.500018946329752 -900000,-3.506475718816121 -910000,-3.551804780960083 -920000,-3.5130025148391724 -930000,-3.5044869105021164 -940000,-3.480757701396942 -950000,-3.4785562674204504 -960000,-3.4818499406178787 -970000,-3.495742452144623 -980000,-3.42743015686671 -990000,-3.477353501319885 -1000000,-3.4847856561342874 +10000,-3.7276356418927508 +20000,-3.76526806751887 +30000,-3.726752662658691 +40000,-3.7331008116404214 +50000,-3.7947248776753737 +60000,-3.7250778237978617 +70000,-3.695150836308797 +80000,-3.7061473886171976 +90000,-3.734281611442566 +100000,-3.7803029338518774 +110000,-3.8115508556365967 +120000,-3.8043602466583253 +130000,-3.8367014646530153 +140000,-3.8491341670354204 +150000,-3.851333212852478 +160000,-3.781522218386332 +170000,-3.7548938234647116 +180000,-3.7432215929031374 +190000,-3.7363597671190902 +200000,-3.715828637282054 +210000,-3.6960163950920104 +220000,-3.6557730754216515 +230000,-3.667638532320658 +240000,-3.64206877152125 +250000,-3.630096503098806 +260000,-3.6228256106376646 +270000,-3.552464850743611 +280000,-3.5760295192400617 +290000,-3.530172590414683 +300000,-3.51009958187739 +310000,-3.545306785901387 +320000,-3.4951608419418334 +330000,-3.4684827248255417 +340000,-3.396670377254486 +350000,-3.3608022928237915 +360000,-3.3873506426811217 +370000,-3.4028881947199507 +380000,-3.3837867816289267 +390000,-3.3936331590016686 +400000,-3.382639082272848 +410000,-3.335388922691345 +420000,-3.3829603115717566 +430000,-3.416534356276194 +440000,-3.3320809046427406 +450000,-3.3434762835502623 +460000,-3.268394660949707 +470000,-3.260023033618927 +480000,-3.282551276683807 +490000,-3.3003874222437544 +500000,-3.284747052192688 +510000,-3.2363489985466005 +520000,-3.2024394591649368 +530000,-3.1668108820915224 +540000,-3.1847693522771197 +550000,-3.2026479522387192 +560000,-3.2092654784520462 +570000,-3.214842903614044 +580000,-3.235990357398987 +590000,-3.2385431051254274 +600000,-3.236142973105113 +610000,-3.252828506628673 +620000,-3.2115232745806375 +630000,-3.172441899776459 +640000,-3.171675248940786 +650000,-3.153176041444143 +660000,-3.071997292836507 +670000,-3.1177614609400432 +680000,-3.101255492369334 +690000,-3.081737462679545 +700000,-3.0966175436973566 +710000,-3.126520641644796 +720000,-3.086739885807037 +730000,-3.0878482421239215 +740000,-3.061389700571696 +750000,-3.0699579795201624 +760000,-3.0778496146202086 +770000,-3.1134809255599976 +780000,-3.10864607890447 +790000,-3.129952549934387 +800000,-3.1404227018356323 +810000,-3.062887899080912 +820000,-3.0841390728950495 +830000,-3.079430894056956 +840000,-3.091674721240997 +850000,-3.035227656364441 +860000,-2.978760019938151 +870000,-2.9722038666407267 +880000,-2.9524189313252767 +890000,-2.985902543862661 +900000,-3.0152073184649146 +910000,-2.9510330756505327 +920000,-2.9485486268997194 +930000,-2.9698389331499735 +940000,-2.9522232254346212 +950000,-2.9378395080566406 +960000,-2.964709679285685 +970000,-2.872235453128815 +980000,-2.8777671217918397 +990000,-2.880346826712291 +1000000,-2.856441926956177 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/loss/policy_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/loss/policy_loss.log index 1bcf470a5..0fc5e3b2d 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/loss/policy_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/loss/policy_loss.log @@ -1,101 +1,101 @@ step,loss/policy_loss -10000,-0.006542265661552711 -20000,-0.004107460382600887 -30000,-0.013178361971447224 -40000,-0.00918733897220214 -50000,-0.007738614451663711 -60000,-0.0168631153235178 -70000,-0.014243980173893703 -80000,-0.013347039880427125 -90000,-0.012978289168663438 -100000,-0.017661205059932295 -110000,-0.016655208239987354 -120000,-0.016991319953509377 -130000,-0.017496739108029208 -140000,-0.005731443239700933 -150000,-0.005171559062045185 -160000,-0.02588493765084645 -170000,-0.014900305176763388 -180000,-0.008703099461867187 -190000,-0.006921884092946437 -200000,-0.006462101266913209 -210000,-0.014802255585970947 -220000,-0.0009740271596657812 -230000,-0.01627084342938181 -240000,-0.010264463246965786 -250000,-0.012647160251603149 -260000,-0.017025404034007742 -270000,-0.006015908214939599 -280000,-0.003347906117257697 -290000,-0.0019634475948284397 -300000,-0.00899664255000771 -310000,-0.011914832296094677 -320000,-0.014362568773258097 -330000,-0.012922956468673216 -340000,-0.002111591700132702 -350000,-0.009264069578146166 -360000,-0.0016062562750465148 -370000,-0.001522524315132065 -380000,-0.008701363977402836 -390000,-0.006683993145907788 -400000,-0.015097884503504983 -410000,-0.016778495544513883 -420000,-0.012820643134053445 -430000,-0.007300028468884698 -440000,-0.011590185872336956 -450000,-0.009977618152942524 -460000,-0.03026774391391121 -470000,-0.012823541250490723 -480000,-0.011899595330648896 -490000,-0.015408754603531246 -500000,-0.004049237238925832 -510000,0.01582143561032893 -520000,-0.01345712078761729 -530000,-0.005608135463365187 -540000,-0.006692963778293535 -550000,-0.011255542117813166 -560000,-0.007146891742442646 -570000,-0.008606152554989322 -580000,-0.009688264297849535 -590000,-0.01658600179667505 -600000,-0.004992725092546828 -610000,-0.008631518578623336 -620000,-0.012237489250338405 -630000,-0.012356633664634891 -640000,0.0021511860958985333 -650000,0.004016019587282224 -660000,-0.002594319223451948 -670000,0.0010974818373392869 -680000,-0.004360974902284055 -690000,-0.004825375965252043 -700000,-0.007454068360401097 -710000,-0.0014837171870078019 -720000,-0.0004934361843796514 -730000,0.0018743433710518217 -740000,0.002077720471090579 -750000,-0.008116127992929337 -760000,-0.009951517533626927 -770000,-0.0020539247644470503 -780000,-0.005632237437505857 -790000,-0.010472419219099301 -800000,-0.010018253019233285 -810000,-0.006823035484748756 -820000,-0.005891949349353489 -830000,-0.00796287970069263 -840000,-0.01809791541344432 -850000,-0.013422288954267936 -860000,-0.012798713824106963 -870000,-0.015118901854426926 -880000,-0.008949725489391713 -890000,-0.018387505558526168 -900000,-0.015824063132169407 -910000,-0.011540716228400829 -920000,0.002818010846902332 -930000,-0.01642172549334166 -940000,-0.006429383846710028 -950000,-0.0038669675596387414 -960000,-0.0028548891471466766 -970000,0.0071657014319235325 -980000,-0.008758525787238399 -990000,-0.007379139473188641 -1000000,-0.00433022795757381 +10000,-0.013990977677090424 +20000,-0.00859568195005732 +30000,-0.014605523895921176 +40000,-0.02073443576841438 +50000,-0.012436193528511405 +60000,-0.011570095927860055 +70000,-0.01257339945977085 +80000,-0.022425908502613227 +90000,-0.012557370507805029 +100000,-0.017236205276560217 +110000,-0.01578870435972643 +120000,-0.00649984647827697 +130000,-0.014172338360320006 +140000,-0.0025945299732269356 +150000,-0.002827748927274277 +160000,-0.0004566340780116997 +170000,-0.023986354885076267 +180000,-0.014040680980315306 +190000,-0.006328223047375769 +200000,-0.0015514650397357495 +210000,-0.010358294331257923 +220000,-0.008599015845344737 +230000,-0.011652889267368183 +240000,-0.0064000873023685135 +250000,-0.0002916174638143094 +260000,-0.019359213529766918 +270000,0.00410484526458739 +280000,0.006242669900395781 +290000,-0.009840879179473551 +300000,0.0036069116673145907 +310000,-0.007197787658506057 +320000,-0.011613680268869294 +330000,-0.008522788581783323 +340000,-0.017299788631882345 +350000,-0.0030710358693151005 +360000,-0.0020984541405541153 +370000,-0.01291033667108733 +380000,-0.00459086469593298 +390000,0.0036528716505268887 +400000,0.000802772904493821 +410000,0.0057036457734272175 +420000,-0.00026552713000802956 +430000,0.0019805772261008775 +440000,-0.01828416688838796 +450000,0.0025642180647295794 +460000,-0.00812380870078635 +470000,-0.0007062877780130842 +480000,-0.0009076646727343861 +490000,-0.00473009290953183 +500000,-0.014397894072769743 +510000,-0.012981810945116265 +520000,-0.007342273163005271 +530000,-0.004461848332525535 +540000,0.0043581897747635955 +550000,-0.013881473259503058 +560000,0.003451385090929691 +570000,-0.015062846603299332 +580000,-0.006393543601401821 +590000,-0.00603902593152027 +600000,-0.016308096842547946 +610000,-0.013527004143270235 +620000,-0.0010202943765434032 +630000,-0.008619961271433208 +640000,-0.0058058876472970875 +650000,-0.010379260133874628 +660000,-0.01190046881375957 +670000,-0.008711090689203175 +680000,-0.02337776396921836 +690000,-0.0007410021147606654 +700000,-0.012445480922031762 +710000,-0.0073706108873499805 +720000,-0.003988928024141567 +730000,-0.0060890799417922525 +740000,-0.010823934457410696 +750000,-0.02020865462664656 +760000,0.010012711068019499 +770000,-0.010262398843680203 +780000,-0.008067152901598745 +790000,-0.002387847966287655 +800000,-0.00581441203617764 +810000,-0.008794151437696901 +820000,-0.0016667533714087533 +830000,-0.012707971802324278 +840000,-0.004652911999644844 +850000,-0.005675176205464522 +860000,-0.0018759278183032757 +870000,-0.005688751362148021 +880000,-0.015590049564932271 +890000,-0.009242192825253568 +900000,-0.0095601971044419 +910000,-0.005476772935727884 +920000,-0.008585645052055548 +930000,0.0029926337012753574 +940000,-0.00246705577834813 +950000,-0.012789877428089912 +960000,-0.014877711226685311 +970000,-0.016604720249195633 +980000,0.0030600394008248807 +990000,-0.007314069018364067 +1000000,-0.001673655006897522 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/loss/value_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/loss/value_loss.log index a6d6de3bb..e4e3c0b5e 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/loss/value_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/loss/value_loss.log @@ -1,101 +1,101 @@ step,loss/value_loss -10000,9.620242796799044 -20000,3.7376654240071874 -30000,4.215238346942097 -40000,5.745127264938601 -50000,4.71599797329438 -60000,3.5760580951319563 -70000,4.921808532908456 -80000,7.475787551910488 -90000,2.7392648523929335 -100000,3.6624199968026603 -110000,9.732464435274588 -120000,8.659258136597826 -130000,3.297198718410428 -140000,5.079947301919156 -150000,2.6166053591932417 -160000,1.5993029768892875 -170000,1.5842662926921678 -180000,2.0663339612056904 -190000,3.210500170202468 -200000,0.8872668920707911 -210000,2.284270267728547 -220000,1.8536033232219218 -230000,1.5441440014930967 -240000,0.8112381019430712 -250000,1.6336161735782082 -260000,0.9750766611048732 -270000,2.0670179773628687 -280000,1.6718151009499853 -290000,0.9946607090197552 -300000,0.70163165032949 -310000,0.7982911351146371 -320000,0.7322119611804474 -330000,0.7054612437509011 -340000,1.263015271987553 -350000,0.6972878384111337 -360000,1.9714253983030319 -370000,0.48753841818577637 -380000,1.101163719559635 -390000,1.0815960310326593 -400000,1.8759907310813513 -410000,2.808719751095203 -420000,1.9297111033478138 -430000,2.2998941066106537 -440000,0.4292852594044029 -450000,1.4415827591779566 -460000,0.8601674577975686 -470000,0.561813266667069 -480000,1.4003273387021036 -490000,2.3206254538320903 -500000,2.1042514722134626 -510000,0.8082630629388363 -520000,1.0479953988883415 -530000,1.1499126604995942 -540000,0.39415384132791587 -550000,1.6383282086526196 -560000,0.8094961277817075 -570000,0.5554161657218876 -580000,0.68839798008728 -590000,2.5498760244572667 -600000,3.572925597300493 -610000,1.7887180575231216 -620000,0.7748283582484163 -630000,0.9379265530667779 -640000,1.4248822473860188 -650000,1.0718601954744167 -660000,1.0288239309930938 -670000,0.31448489359832626 -680000,0.810613108922732 -690000,1.1232716661081685 -700000,0.40304625335039057 -710000,1.0015833235073708 -720000,1.3067434990335844 -730000,1.2603359708040431 -740000,0.7667158712219829 -750000,1.0773451647813277 -760000,1.6760441824193968 -770000,1.4799169290941936 -780000,0.7426163504199691 -790000,0.6968985741452361 -800000,1.7096988599513956 -810000,1.2185592787708692 -820000,1.6095703225856586 -830000,0.7152221122204471 -840000,0.9845536881432029 -850000,0.38114803179383305 -860000,0.8257213045456494 -870000,0.8190423905070181 -880000,0.6826125463016381 -890000,0.46883053595242635 -900000,0.6297401368224722 -910000,0.3464036476111858 -920000,0.45623590347484777 -930000,1.149635540129525 -940000,1.2880306355452136 -950000,0.7628065252776237 -960000,0.38965252235748615 -970000,2.8850238367025076 -980000,0.3934324428654209 -990000,0.6746816215807238 -1000000,0.7007826870856606 +10000,12.047117606498855 +20000,6.300138696986365 +30000,8.876858262906698 +40000,3.0536571756250876 +50000,6.086280028904969 +60000,7.044686924639045 +70000,8.150028537399594 +80000,9.292313774611006 +90000,2.221750242610168 +100000,4.564115120407193 +110000,5.2713946038062405 +120000,2.064136648375571 +130000,1.3005812581031224 +140000,2.446605502922279 +150000,1.540732184189387 +160000,1.8276273683076412 +170000,0.7171394203748936 +180000,0.5560502109191903 +190000,0.3363817549011233 +200000,0.6044702681004847 +210000,1.616368609020198 +220000,1.2763392812430756 +230000,0.5420132481156993 +240000,0.5779235105724145 +250000,0.4871454252800258 +260000,0.5535767437149439 +270000,0.8337983644253109 +280000,0.5667577006504585 +290000,0.35577132866159367 +300000,0.45821232311038296 +310000,0.5170678449545016 +320000,0.41314349054515154 +330000,0.280183773687405 +340000,0.2810844612242574 +350000,0.5661162192172636 +360000,0.8495789545775363 +370000,0.34751886362403817 +380000,0.2649565343531467 +390000,0.23624013248991535 +400000,0.15914068100722778 +410000,0.19037737438146973 +420000,0.8784447871880113 +430000,0.3090511915775869 +440000,0.12261016720405551 +450000,0.2553786986355162 +460000,0.3102874978610061 +470000,0.2045466326198308 +480000,0.28085716930550114 +490000,0.6203328452254768 +500000,0.3726874402748229 +510000,0.28031536621688036 +520000,0.1776275474254745 +530000,0.15189080531770113 +540000,0.1847729953132116 +550000,0.15413356716789464 +560000,0.22650532561017278 +570000,0.20989167991442534 +580000,0.11580545561488545 +590000,0.3669132899880078 +600000,0.22573082669700292 +610000,0.1548364320896619 +620000,0.2016862445404819 +630000,0.28728497465867586 +640000,0.07695473485792031 +650000,0.1342465854647777 +660000,0.1702776403028441 +670000,0.1115525408565681 +680000,0.13305749456812044 +690000,0.20878713326660936 +700000,0.40781153505473233 +710000,0.16721064426002846 +720000,0.21787194018958048 +730000,0.38698438760730436 +740000,0.37458452158116406 +750000,0.1974762751557158 +760000,0.2513522155118605 +770000,0.3364239438277265 +780000,0.12537022829699448 +790000,0.08963208760678125 +800000,0.2965953629466153 +810000,0.1689597521591112 +820000,0.2706458902992216 +830000,0.29862284669185113 +840000,0.19664560747396548 +850000,0.25336291938907324 +860000,0.18984647853133202 +870000,0.5237170499822843 +880000,0.26633371321790855 +890000,0.09795433746389884 +900000,0.11351414925917205 +910000,0.16995014158752236 +920000,0.39265665598839644 +930000,0.21653501925921032 +940000,0.4400899126018095 +950000,0.33210742558963025 +960000,0.13770009278264075 +970000,0.24896647280972664 +980000,0.4211978601225426 +990000,0.3573415478245825 +1000000,0.12116401242590069 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/constraint_violation.log index 2dd801ff9..dc36e2b72 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/constraint_violation.log @@ -1,101 +1,101 @@ step,stat/constraint_violation -10000,10 -20000,10 -30000,12 -40000,18 -50000,26 -60000,37 -70000,45 -80000,51 -90000,57 -100000,66 -110000,75 -120000,82 -130000,87 -140000,91 -150000,98 -160000,104 -170000,109 -180000,111 -190000,118 -200000,123 -210000,131 -220000,140 -230000,149 -240000,156 -250000,160 -260000,168 -270000,175 -280000,180 -290000,189 -300000,198 -310000,202 -320000,209 -330000,212 -340000,218 -350000,226 -360000,231 -370000,235 -380000,243 -390000,246 -400000,251 -410000,260 -420000,266 -430000,270 -440000,280 -450000,286 -460000,292 -470000,296 -480000,304 -490000,310 -500000,321 -510000,328 -520000,334 -530000,340 -540000,346 -550000,353 -560000,358 -570000,365 -580000,369 -590000,375 -600000,387 -610000,393 -620000,395 -630000,401 -640000,409 -650000,415 -660000,420 -670000,428 -680000,433 -690000,433 -700000,437 -710000,445 -720000,447 -730000,452 -740000,457 -750000,464 -760000,472 -770000,477 -780000,482 -790000,484 -800000,492 -810000,498 -820000,508 -830000,511 -840000,516 -850000,520 -860000,534 -870000,540 -880000,544 -890000,548 -900000,554 -910000,559 -920000,563 -930000,571 -940000,576 -950000,585 -960000,589 -970000,600 -980000,603 -990000,609 -1000000,619 +10000,0 +20000,0 +30000,0 +40000,0 +50000,0 +60000,0 +70000,0 +80000,0 +90000,0 +100000,0 +110000,0 +120000,0 +130000,0 +140000,0 +150000,0 +160000,0 +170000,0 +180000,0 +190000,0 +200000,0 +210000,1 +220000,1 +230000,1 +240000,2 +250000,2 +260000,2 +270000,2 +280000,2 +290000,2 +300000,2 +310000,2 +320000,2 +330000,2 +340000,2 +350000,2 +360000,2 +370000,3 +380000,3 +390000,3 +400000,3 +410000,3 +420000,3 +430000,3 +440000,3 +450000,4 +460000,4 +470000,4 +480000,4 +490000,4 +500000,4 +510000,4 +520000,4 +530000,4 +540000,4 +550000,4 +560000,4 +570000,4 +580000,4 +590000,4 +600000,4 +610000,4 +620000,4 +630000,4 +640000,4 +650000,4 +660000,4 +670000,4 +680000,4 +690000,4 +700000,4 +710000,4 +720000,4 +730000,4 +740000,4 +750000,4 +760000,4 +770000,4 +780000,4 +790000,4 +800000,4 +810000,4 +820000,4 +830000,4 +840000,4 +850000,4 +860000,5 +870000,5 +880000,5 +890000,5 +900000,5 +910000,5 +920000,5 +930000,5 +940000,6 +950000,6 +960000,6 +970000,6 +980000,6 +990000,6 +1000000,6 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/ep_constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/ep_constraint_violation.log index 5a2475947..11b891828 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/ep_constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/ep_constraint_violation.log @@ -1,101 +1,101 @@ step,stat/ep_constraint_violation -10000,0.1 +10000,0.0 20000,0.0 -30000,0.1 -40000,0.2 -50000,0.1 -60000,0.3 -70000,0.2 +30000,0.0 +40000,0.0 +50000,0.0 +60000,0.0 +70000,0.0 80000,0.0 -90000,0.1 -100000,0.2 -110000,0.2 +90000,0.0 +100000,0.0 +110000,0.0 120000,0.0 130000,0.0 -140000,0.1 +140000,0.0 150000,0.0 -160000,0.2 -170000,0.2 +160000,0.0 +170000,0.0 180000,0.0 -190000,0.1 +190000,0.0 200000,0.0 -210000,0.2 -220000,0.1 -230000,0.2 -240000,0.1 -250000,0.2 -260000,0.2 -270000,0.1 -280000,0.2 -290000,0.1 +210000,0.1 +220000,0.0 +230000,0.0 +240000,0.0 +250000,0.0 +260000,0.0 +270000,0.0 +280000,0.0 +290000,0.0 300000,0.0 310000,0.0 -320000,0.1 -330000,0.1 -340000,0.3 -350000,0.2 -360000,0.2 -370000,0.0 -380000,0.1 -390000,0.1 -400000,0.1 -410000,0.3 +320000,0.0 +330000,0.0 +340000,0.0 +350000,0.0 +360000,0.0 +370000,0.1 +380000,0.0 +390000,0.0 +400000,0.0 +410000,0.0 420000,0.0 -430000,0.2 -440000,0.4 -450000,0.2 -460000,0.1 +430000,0.0 +440000,0.0 +450000,0.0 +460000,0.0 470000,0.0 -480000,0.4 -490000,0.1 -500000,0.1 -510000,0.1 -520000,0.3 -530000,0.2 -540000,0.1 -550000,0.1 -560000,0.1 -570000,0.1 -580000,0.2 -590000,0.4 -600000,0.2 +480000,0.0 +490000,0.0 +500000,0.0 +510000,0.0 +520000,0.0 +530000,0.0 +540000,0.0 +550000,0.0 +560000,0.0 +570000,0.0 +580000,0.0 +590000,0.0 +600000,0.0 610000,0.0 -620000,0.1 -630000,0.2 -640000,0.2 -650000,0.2 -660000,0.1 -670000,0.2 -680000,0.1 +620000,0.0 +630000,0.0 +640000,0.0 +650000,0.0 +660000,0.0 +670000,0.0 +680000,0.0 690000,0.0 -700000,0.2 -710000,0.1 +700000,0.0 +710000,0.0 720000,0.0 -730000,0.2 -740000,0.2 -750000,0.2 -760000,0.1 -770000,0.1 +730000,0.0 +740000,0.0 +750000,0.0 +760000,0.0 +770000,0.0 780000,0.0 790000,0.0 -800000,0.3 +800000,0.0 810000,0.0 -820000,0.2 -830000,0.1 +820000,0.0 +830000,0.0 840000,0.0 -850000,0.1 -860000,0.1 -870000,0.1 +850000,0.0 +860000,0.0 +870000,0.0 880000,0.0 890000,0.0 -900000,0.1 +900000,0.0 910000,0.0 -920000,0.1 -930000,0.3 -940000,0.1 -950000,0.1 -960000,0.1 -970000,0.5 -980000,0.2 +920000,0.0 +930000,0.0 +940000,0.0 +950000,0.0 +960000,0.0 +970000,0.0 +980000,0.0 990000,0.0 -1000000,0.1 +1000000,0.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/ep_length.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/ep_length.log index a9430c783..8e326694b 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/ep_length.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/ep_length.log @@ -1,101 +1,101 @@ step,stat/ep_length -10000,226.4 +10000,250.0 20000,250.0 -30000,225.8 -40000,202.7 -50000,225.1 -60000,175.6 -70000,200.8 +30000,250.0 +40000,250.0 +50000,250.0 +60000,250.0 +70000,250.0 80000,250.0 -90000,227.2 -100000,200.2 -110000,201.0 +90000,250.0 +100000,250.0 +110000,250.0 120000,250.0 130000,250.0 -140000,225.1 +140000,250.0 150000,250.0 -160000,200.2 -170000,200.2 +160000,250.0 +170000,250.0 180000,250.0 -190000,225.1 +190000,250.0 200000,250.0 -210000,200.2 -220000,225.6 -230000,200.2 -240000,225.5 -250000,202.0 -260000,200.6 -270000,225.4 -280000,200.4 -290000,225.3 +210000,225.1 +220000,250.0 +230000,250.0 +240000,250.0 +250000,250.0 +260000,250.0 +270000,250.0 +280000,250.0 +290000,250.0 300000,250.0 310000,250.0 -320000,225.1 -330000,225.7 -340000,176.6 -350000,201.6 -360000,200.7 -370000,250.0 -380000,225.7 -390000,225.4 -400000,226.7 -410000,175.5 +320000,250.0 +330000,250.0 +340000,250.0 +350000,250.0 +360000,250.0 +370000,225.1 +380000,250.0 +390000,250.0 +400000,250.0 +410000,250.0 420000,250.0 -430000,200.4 -440000,151.7 -450000,203.1 -460000,226.0 +430000,250.0 +440000,250.0 +450000,250.0 +460000,250.0 470000,250.0 -480000,151.5 -490000,225.4 -500000,225.1 -510000,225.1 -520000,175.5 -530000,201.5 -540000,225.1 -550000,225.1 -560000,225.1 -570000,225.1 -580000,200.8 -590000,153.2 -600000,201.0 +480000,250.0 +490000,250.0 +500000,250.0 +510000,250.0 +520000,250.0 +530000,250.0 +540000,250.0 +550000,250.0 +560000,250.0 +570000,250.0 +580000,250.0 +590000,250.0 +600000,250.0 610000,250.0 -620000,225.1 -630000,200.4 -640000,200.7 -650000,200.7 -660000,225.2 -670000,202.2 -680000,225.1 +620000,250.0 +630000,250.0 +640000,250.0 +650000,250.0 +660000,250.0 +670000,250.0 +680000,250.0 690000,250.0 -700000,200.2 -710000,225.3 +700000,250.0 +710000,250.0 720000,250.0 -730000,202.0 -740000,200.5 -750000,200.7 -760000,225.1 -770000,225.6 +730000,250.0 +740000,250.0 +750000,250.0 +760000,250.0 +770000,250.0 780000,250.0 790000,250.0 -800000,175.8 +800000,250.0 810000,250.0 -820000,200.8 -830000,226.0 +820000,250.0 +830000,250.0 840000,250.0 -850000,225.6 -860000,225.1 -870000,225.1 +850000,250.0 +860000,250.0 +870000,250.0 880000,250.0 890000,250.0 -900000,225.1 +900000,250.0 910000,250.0 -920000,225.8 -930000,176.2 -940000,225.1 -950000,225.5 -960000,225.1 -970000,128.4 -980000,200.7 +920000,250.0 +930000,250.0 +940000,250.0 +950000,250.0 +960000,250.0 +970000,250.0 +980000,250.0 990000,250.0 -1000000,225.1 +1000000,250.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/ep_return.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/ep_return.log index 4cde1c3ec..9cf815e3a 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/ep_return.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/ep_return.log @@ -1,101 +1,101 @@ step,stat/ep_return -10000,52.3867160232943 -20000,77.89706092767716 -30000,82.24817881168899 -40000,66.17245571082901 -50000,97.34772615953037 -60000,50.104081017215044 -70000,91.38816093253672 -80000,96.1666589857727 -90000,70.25578634003466 -100000,70.94020155592074 -110000,86.08229940031724 -120000,95.70147767449023 -130000,98.58479191315514 -140000,100.67580094827242 -150000,114.31825941926154 -160000,98.32825490898774 -170000,103.69813337210508 -180000,120.0073887187805 -190000,107.04017589656925 -200000,116.72292246314957 -210000,119.77479718060249 -220000,116.92987321778598 -230000,102.00630567925732 -240000,124.79953509033942 -250000,109.2562738253747 -260000,107.35578832953777 -270000,138.1066852162108 -280000,124.34014092235654 -290000,126.29630184631128 -300000,150.35577097998237 -310000,161.7805121447281 -320000,138.06329189806075 -330000,130.77323033881484 -340000,95.80869656661109 -350000,115.74526978214371 -360000,112.61115357758376 -370000,153.48472729445587 -380000,143.1948098840456 -390000,132.62952711731648 -400000,141.482400167358 -410000,98.11271646886084 -420000,159.9137260653159 -430000,116.8806908043681 -440000,98.50793347242987 -450000,118.53547981994348 -460000,132.2220614093783 -470000,140.35084078694962 -480000,88.15719590062074 -490000,140.757390868566 -500000,147.09842280699522 -510000,151.66512388632526 -520000,103.54208824252468 -530000,123.57954435419781 -540000,155.26235459694925 -550000,143.2896669302614 -560000,141.09712567340895 -570000,143.90811247016785 -580000,129.0148229489619 -590000,103.01145871593897 -600000,126.17186624581265 -610000,162.1492447787984 -620000,142.74355468185132 -630000,128.99090399971223 -640000,124.83208148696554 -650000,125.46626135717459 -660000,131.57713785165726 -670000,124.41026394960595 -680000,132.0395856932352 -690000,156.50812725901838 -700000,114.24963241941607 -710000,146.33486803555465 -720000,159.33248908010648 -730000,126.02907460494112 -740000,133.31153346823982 -750000,128.7139169218645 -760000,141.23451773446857 -770000,140.17616473247375 -780000,151.8687416468388 -790000,132.47758239404985 -800000,102.36360652006363 -810000,160.1395986908972 -820000,124.6932784220705 -830000,142.29138501083057 -840000,149.7036685494894 -850000,153.33104687390295 -860000,141.45265475624825 -870000,138.91470900847352 -880000,155.95396536718914 -890000,143.63797288966265 -900000,135.13570678542357 -910000,146.57846403726086 -920000,128.01907523080777 -930000,91.6290002129721 -940000,138.06557499094234 -950000,128.7072224764328 -960000,131.44095613179647 -970000,72.75265905227943 -980000,132.089193759102 -990000,160.67940411692626 -1000000,140.8861688836015 +10000,92.13903846870046 +20000,79.86288638665448 +30000,102.78493497556642 +40000,118.26616384410156 +50000,108.47840178185811 +60000,111.9860517364087 +70000,141.04612053891873 +80000,113.2395247082987 +90000,110.79902377526155 +100000,134.66004764675057 +110000,148.1931105677437 +120000,142.45368134029985 +130000,138.81134812515216 +140000,155.3037286878625 +150000,165.68341891296515 +160000,180.707610707275 +170000,172.38839446885305 +180000,163.26080817319385 +190000,170.63906404258418 +200000,179.86300102163221 +210000,160.50072008956337 +220000,170.91041664574436 +230000,183.57795621598325 +240000,179.08382707173013 +250000,183.68825453594096 +260000,173.29361007909114 +270000,175.34316003254227 +280000,184.74626797406793 +290000,175.80080764861103 +300000,188.67619358212602 +310000,178.5907172051837 +320000,191.58348657328398 +330000,184.95214088761497 +340000,183.65686328042352 +350000,183.186523493813 +360000,186.5453938036864 +370000,173.35287987433531 +380000,180.2488071489531 +390000,190.5444765993802 +400000,187.96906378319986 +410000,185.10283713536103 +420000,183.07911456631143 +430000,190.83660305026964 +440000,186.38631233354144 +450000,194.60590565094114 +460000,186.54373578412196 +470000,190.95304554951116 +480000,185.60794648494138 +490000,174.738737511565 +500000,180.57109595460946 +510000,181.75806866952956 +520000,186.00326560102934 +530000,173.48181083482052 +540000,181.7864341702355 +550000,179.95789549394308 +560000,181.35693426376096 +570000,184.62841394410555 +580000,180.34031671619923 +590000,193.09792567069084 +600000,179.24279919095613 +610000,182.21378366140124 +620000,179.63946942494923 +630000,178.76366438372295 +640000,194.16006109283367 +650000,182.13311075788508 +660000,183.62249302308825 +670000,189.52020011965894 +680000,185.0191306233777 +690000,189.93398551989 +700000,181.9827479262189 +710000,193.3732144939973 +720000,179.32183109076996 +730000,183.00427641895072 +740000,177.3899881617969 +750000,190.00969690670053 +760000,180.9246557908239 +770000,190.51693730710656 +780000,189.6985898484557 +790000,194.86858124576855 +800000,183.2442833755811 +810000,196.1648066922662 +820000,199.959784784175 +830000,178.61868923620426 +840000,179.73373301551547 +850000,193.47775960691757 +860000,183.62996911791532 +870000,191.0356783177258 +880000,189.66400810231795 +890000,186.90023029974023 +900000,184.19904282782628 +910000,185.50484210551565 +920000,184.0281269304613 +930000,185.1259860862845 +940000,182.2150677068634 +950000,195.16355003925923 +960000,172.83698018023293 +970000,187.68602786540006 +980000,194.298420857895 +990000,183.77998928172025 +1000000,196.63082620132062 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/ep_reward.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/ep_reward.log index 40560a519..aa98d2b87 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/ep_reward.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat/ep_reward.log @@ -1,101 +1,101 @@ step,stat/ep_reward -10000,0.20994113129254974 -20000,0.31158824371070865 -30000,0.3290071319659767 -40000,0.26472959618971253 -50000,0.46108726207808975 -60000,0.20529908823733883 -70000,0.36779769701928544 -80000,0.3846666359430908 -90000,0.28651151791905904 -100000,0.28455114654770464 -110000,0.34556867548273884 -120000,0.382805910697961 -130000,0.39433916765262056 -140000,0.4033665720994275 -150000,0.4572730376770461 -160000,0.406344098518188 -170000,0.42071311994778887 -180000,0.48002955487512206 -190000,0.4675107554794873 -200000,0.4668916898525984 -210000,0.4812291369932792 -220000,0.47079083951272904 -230000,0.484423309556525 -240000,0.5002533733694365 -250000,0.43919558667827713 -260000,0.4621351311590365 -270000,0.5913679595038356 -280000,0.49919844015891207 -290000,0.5052799263814194 -300000,0.6014230839199295 -310000,0.6471220485789125 -320000,0.5751349291164278 -330000,0.5281521932199796 -340000,0.3866650997020468 -350000,0.49149272816983897 -360000,0.4575876939761597 -370000,0.6139389091778236 -380000,0.5864215622432635 -390000,0.5311196280343742 -400000,0.566073940128452 -410000,0.4739476256902476 -420000,0.6396549042612636 -430000,0.48741287156761387 -440000,0.39991984836666006 -450000,0.47563099989344887 -460000,0.5289170217860788 -470000,0.5614033631477985 -480000,0.382166793734492 -490000,0.5934184480966618 -500000,0.5926236285481432 -510000,0.6067345053080211 -520000,0.41878312323806466 -530000,0.4956800710849759 -540000,0.6217992832210248 -550000,0.5768653639192544 -560000,0.5649482797716322 -570000,0.5766972187761851 -580000,0.5171887821129445 -590000,0.42550857931611297 -600000,0.6229721426688501 -610000,0.6485969791151935 -620000,0.5710571360832898 -630000,0.5168785604563824 -640000,0.5019861932371704 -650000,0.5026197753653805 -660000,0.5284914048799002 -670000,0.5038175666243172 -680000,0.5422099401183768 -690000,0.6260325090360734 -700000,0.4695708814328558 -710000,0.5853502226756071 -720000,0.637329956320426 -730000,0.5659368159033316 -740000,0.5333787470105159 -750000,0.5155705724172268 -760000,0.5723692639725424 -770000,0.561692059610243 -780000,0.6074749665873551 -790000,0.5299103295761994 -800000,0.4738582501067829 -810000,0.6405583947635888 -820000,0.5105755689590162 -830000,0.5722061569442227 -840000,0.5988146741979576 -850000,0.6157420699388043 -860000,0.5660236888346291 -870000,0.5588650760884676 -880000,0.6238158614687567 -890000,0.5745518915586504 -900000,0.542300218340469 -910000,0.5863138561490433 -920000,0.513355584729245 -930000,0.36870777231113705 -940000,0.5550690106401194 -950000,0.5156793223693318 -960000,0.5259753451740806 -970000,0.3429258740830834 -980000,0.5313788221066116 -990000,0.6427176164677049 -1000000,0.5637030475887115 +10000,0.36855615387480184 +20000,0.3194515455466179 +30000,0.4111397399022657 +40000,0.47306465537640624 +50000,0.4339136071274325 +60000,0.4479442069456348 +70000,0.5641844821556747 +80000,0.4529580988331948 +90000,0.4431960951010462 +100000,0.5386401905870023 +110000,0.5927724422709747 +120000,0.5698147253611994 +130000,0.5552453925006088 +140000,0.6212149147514501 +150000,0.6627336756518606 +160000,0.7228304428291 +170000,0.689553577875412 +180000,0.6530432326927755 +190000,0.6825562561703368 +200000,0.7194520040865289 +210000,0.6875842976881004 +220000,0.6836416665829774 +230000,0.7343118248639329 +240000,0.7163353082869206 +250000,0.7347530181437638 +260000,0.6931744403163648 +270000,0.7013726401301692 +280000,0.7389850718962718 +290000,0.703203230594444 +300000,0.7547047743285039 +310000,0.7143628688207346 +320000,0.766333946293136 +330000,0.7398085635504599 +340000,0.7346274531216941 +350000,0.732746093975252 +360000,0.7461815752147456 +370000,0.7399280079076569 +380000,0.7209952285958126 +390000,0.7621779063975209 +400000,0.7518762551327994 +410000,0.7404113485414442 +420000,0.7323164582652457 +430000,0.7633464122010786 +440000,0.7455452493341659 +450000,0.7784236226037647 +460000,0.7461749431364879 +470000,0.7638121821980446 +480000,0.7424317859397658 +490000,0.6989549500462602 +500000,0.7222843838184378 +510000,0.7270322746781183 +520000,0.7440130624041175 +530000,0.6939272433392821 +540000,0.7271457366809421 +550000,0.7198315819757722 +560000,0.7254277370550438 +570000,0.7385136557764223 +580000,0.721361266864797 +590000,0.7723917026827634 +600000,0.7169711967638246 +610000,0.7288551346456049 +620000,0.7185578776997967 +630000,0.7150546575348918 +640000,0.7766402443713346 +650000,0.7285324430315404 +660000,0.734489972092353 +670000,0.7580808004786357 +680000,0.7400765224935107 +690000,0.7597359420795601 +700000,0.7279309917048756 +710000,0.7734928579759892 +720000,0.7172873243630798 +730000,0.7320171056758029 +740000,0.7095599526471876 +750000,0.7600387876268022 +760000,0.7236986231632954 +770000,0.7620677492284262 +780000,0.7587943593938228 +790000,0.7794743249830743 +800000,0.7329771335023243 +810000,0.7846592267690647 +820000,0.7998391391367001 +830000,0.7144747569448171 +840000,0.7189349320620619 +850000,0.7739110384276704 +860000,0.7345198764716613 +870000,0.7641427132709033 +880000,0.7586560324092717 +890000,0.7476009211989609 +900000,0.736796171311305 +910000,0.7420193684220626 +920000,0.7361125077218453 +930000,0.740503944345138 +940000,0.7288602708274536 +950000,0.7806542001570369 +960000,0.6913479207209317 +970000,0.7507441114616002 +980000,0.77719368343158 +990000,0.735119957126881 +1000000,0.7865233048052823 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/constraint_violation.log index 7ada0605c..a0f4f02d4 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/constraint_violation.log @@ -1,101 +1,101 @@ step,stat_eval/constraint_violation -10000,0.0 -20000,0.0 -30000,0.0 -40000,0.2 -50000,0.0 -60000,0.1 +10000,0.3 +20000,0.2 +30000,0.1 +40000,0.1 +50000,0.1 +60000,0.0 70000,0.2 -80000,0.0 -90000,0.3 +80000,0.1 +90000,0.1 100000,0.0 110000,0.2 -120000,0.1 -130000,0.0 -140000,0.2 -150000,0.2 -160000,0.0 -170000,0.0 -180000,0.0 -190000,0.3 -200000,0.1 -210000,0.0 +120000,0.0 +130000,0.3 +140000,0.0 +150000,0.1 +160000,0.1 +170000,0.1 +180000,0.2 +190000,0.1 +200000,0.2 +210000,0.3 220000,0.2 230000,0.0 -240000,0.1 -250000,0.0 -260000,0.1 -270000,0.1 -280000,0.3 +240000,0.2 +250000,0.2 +260000,0.0 +270000,0.3 +280000,0.1 290000,0.2 300000,0.1 -310000,0.1 +310000,0.0 320000,0.0 -330000,0.1 -340000,0.1 +330000,0.2 +340000,0.2 350000,0.0 -360000,0.4 -370000,0.1 +360000,0.1 +370000,0.2 380000,0.2 390000,0.2 -400000,0.0 -410000,0.1 -420000,0.0 -430000,0.2 +400000,0.1 +410000,0.0 +420000,0.3 +430000,0.3 440000,0.2 -450000,0.3 +450000,0.0 460000,0.1 -470000,0.0 -480000,0.0 +470000,0.1 +480000,0.3 490000,0.1 -500000,0.1 -510000,0.0 -520000,0.4 +500000,0.0 +510000,0.2 +520000,0.3 530000,0.0 540000,0.3 -550000,0.0 -560000,0.0 -570000,0.1 -580000,0.2 -590000,0.0 -600000,0.1 +550000,0.1 +560000,0.2 +570000,0.0 +580000,0.3 +590000,0.2 +600000,0.0 610000,0.0 620000,0.1 -630000,0.0 -640000,0.0 -650000,0.3 -660000,0.3 -670000,0.1 +630000,0.1 +640000,0.1 +650000,0.0 +660000,0.2 +670000,0.2 680000,0.0 -690000,0.2 -700000,0.1 +690000,0.1 +700000,0.0 710000,0.1 -720000,0.3 -730000,0.2 +720000,0.2 +730000,0.1 740000,0.0 750000,0.1 -760000,0.3 -770000,0.0 -780000,0.1 +760000,0.1 +770000,0.2 +780000,0.2 790000,0.1 -800000,0.3 +800000,0.0 810000,0.1 -820000,0.2 -830000,0.3 -840000,0.1 -850000,0.3 +820000,0.1 +830000,0.0 +840000,0.0 +850000,0.4 860000,0.0 -870000,0.2 +870000,0.3 880000,0.2 890000,0.1 -900000,0.1 +900000,0.2 910000,0.1 -920000,0.1 -930000,0.2 +920000,0.3 +930000,0.0 940000,0.0 -950000,0.0 -960000,0.0 -970000,0.1 -980000,0.2 -990000,0.1 -1000000,0.2 +950000,0.1 +960000,0.1 +970000,0.3 +980000,0.0 +990000,0.2 +1000000,0.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/ep_length.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/ep_length.log index 4379b7c53..407ea02dc 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/ep_length.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/ep_length.log @@ -1,101 +1,101 @@ step,stat_eval/ep_length -10000,250.0 -20000,250.0 -30000,250.0 -40000,200.2 -50000,250.0 -60000,225.2 -70000,200.9 -80000,250.0 -90000,176.0 +10000,176.1 +20000,200.5 +30000,225.3 +40000,225.3 +50000,225.1 +60000,250.0 +70000,200.5 +80000,225.1 +90000,225.1 100000,250.0 -110000,200.3 -120000,225.1 -130000,250.0 -140000,200.3 -150000,201.4 -160000,250.0 -170000,250.0 -180000,250.0 -190000,178.2 -200000,225.1 -210000,250.0 -220000,200.7 +110000,201.3 +120000,250.0 +130000,176.2 +140000,250.0 +150000,225.2 +160000,225.1 +170000,225.8 +180000,200.9 +190000,225.1 +200000,200.2 +210000,176.6 +220000,201.6 230000,250.0 -240000,225.8 -250000,250.0 -260000,225.2 -270000,225.1 -280000,177.0 -290000,202.8 -300000,225.1 -310000,225.1 +240000,200.2 +250000,201.9 +260000,250.0 +270000,175.3 +280000,225.2 +290000,201.4 +300000,225.4 +310000,250.0 320000,250.0 -330000,225.7 -340000,225.1 +330000,202.0 +340000,200.5 350000,250.0 -360000,151.1 -370000,226.6 -380000,200.8 +360000,225.1 +370000,202.3 +380000,201.0 390000,200.3 -400000,250.0 -410000,225.3 -420000,250.0 -430000,200.8 -440000,201.6 -450000,176.4 +400000,225.6 +410000,250.0 +420000,176.7 +430000,175.7 +440000,201.8 +450000,250.0 460000,225.1 -470000,250.0 -480000,250.0 -490000,225.1 -500000,226.8 -510000,250.0 -520000,153.7 +470000,226.2 +480000,176.3 +490000,225.7 +500000,250.0 +510000,200.5 +520000,175.9 530000,250.0 -540000,176.4 -550000,250.0 -560000,250.0 -570000,226.7 -580000,200.2 -590000,250.0 -600000,225.1 +540000,175.6 +550000,225.3 +560000,201.6 +570000,250.0 +580000,178.3 +590000,200.3 +600000,250.0 610000,250.0 -620000,225.2 -630000,250.0 -640000,250.0 -650000,176.8 -660000,176.3 -670000,225.7 +620000,225.1 +630000,225.5 +640000,227.6 +650000,250.0 +660000,200.4 +670000,200.6 680000,250.0 -690000,201.9 -700000,227.5 -710000,226.2 -720000,175.8 -730000,202.6 +690000,225.7 +700000,250.0 +710000,225.2 +720000,200.8 +730000,226.7 740000,250.0 -750000,225.4 -760000,176.9 -770000,250.0 -780000,225.1 -790000,225.1 -800000,176.1 -810000,225.7 -820000,200.6 -830000,175.5 -840000,225.4 -850000,176.4 +750000,225.2 +760000,225.6 +770000,201.3 +780000,200.3 +790000,225.7 +800000,250.0 +810000,225.2 +820000,225.2 +830000,250.0 +840000,250.0 +850000,151.6 860000,250.0 -870000,201.1 -880000,200.9 -890000,226.6 -900000,225.1 -910000,225.2 -920000,225.1 -930000,202.2 +870000,176.8 +880000,201.6 +890000,226.1 +900000,201.0 +910000,226.6 +920000,175.6 +930000,250.0 940000,250.0 -950000,250.0 -960000,250.0 -970000,225.4 -980000,200.3 -990000,225.4 -1000000,200.8 +950000,227.5 +960000,225.5 +970000,175.5 +980000,250.0 +990000,202.4 +1000000,250.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/ep_return.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/ep_return.log index 9d2899d04..5ecd76e3b 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/ep_return.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/ep_return.log @@ -1,101 +1,101 @@ step,stat_eval/ep_return -10000,52.66164388041132 -20000,61.19403578981203 -30000,65.52594834429688 -40000,60.11532712694226 -50000,96.19131715195172 -60000,84.5629144084921 -70000,63.29704250239367 -80000,87.136944089671 -90000,59.080291601971794 -100000,73.26950947712668 -110000,88.38115940688144 -120000,89.69522424704508 -130000,98.13420221183688 -140000,88.13871026426145 -150000,89.52950482741407 -160000,122.55111657931764 -170000,126.79107078542765 -180000,137.84286422179377 -190000,84.62998253074929 -200000,108.9813946324499 -210000,115.55413865618212 -220000,105.04810582733683 -230000,136.72817073152106 -240000,117.60776277862885 -250000,131.11739628134768 -260000,136.73047337443103 -270000,118.57702949343522 -280000,95.7748428723195 -290000,100.78974624158364 -300000,115.92345325316194 -310000,132.6560786674852 -320000,133.8456880310186 -330000,118.93550060186865 -340000,122.03397010953226 -350000,156.43295305405258 -360000,89.79476643786174 -370000,126.50175222973414 -380000,120.39393374947437 -390000,118.35108428764013 -400000,137.0093314303836 -410000,119.74542620107222 -420000,145.67004021715348 -430000,97.69329401700796 -440000,111.68281449982605 -450000,99.00630184836473 -460000,136.8821773397515 -470000,139.93126098385534 -480000,135.75035791320806 -490000,133.88326953782092 -500000,131.70415578305375 -510000,137.71689026769297 -520000,93.43625993583638 -530000,138.86207482519927 -540000,110.96078204685196 -550000,152.7992516114462 -560000,151.16509926019947 -570000,136.7578836697533 -580000,129.36142719319545 -590000,153.9930805981028 -600000,141.29551326292193 -610000,145.1875649674414 -620000,120.61738235850298 -630000,149.2574956450861 -640000,164.28740631485863 -650000,102.16083201875733 -660000,103.95216923672089 -670000,140.05582376958176 -680000,155.7172366161862 -690000,118.28525289923785 -700000,137.75619610759765 -710000,152.70378212891086 -720000,92.03437123008659 -730000,114.23239595971373 -740000,146.8780089392809 -750000,130.81214574221363 -760000,99.8270361194304 -770000,144.97525615724604 -780000,126.61965307910118 -790000,111.52484342071614 -800000,111.68894541171612 -810000,122.03390608814561 -820000,126.7011824773507 -830000,109.89831119716465 -840000,127.87017911384937 -850000,107.0022286671086 -860000,163.76080956350086 -870000,120.60609088227133 -880000,123.72234521770422 -890000,125.43286120186522 -900000,129.92140231059116 -910000,139.4831085389049 -920000,127.79977118300471 -930000,111.75532485776009 -940000,140.2682286971507 -950000,147.56134973911975 -960000,157.37960641955465 -970000,145.90522377441067 -980000,115.79291033958584 -990000,146.90947074869052 -1000000,108.78976011092873 +10000,31.000323929124285 +20000,46.773822653181746 +30000,49.88878557632966 +40000,64.83505216909633 +50000,69.20989087032227 +60000,68.75717352474501 +70000,62.14094855655426 +80000,87.2245231032908 +90000,74.11806974540744 +100000,104.3086247026744 +110000,67.64327306348291 +120000,96.94820782853108 +130000,72.99132982469207 +140000,123.09988603865335 +150000,126.60261564355218 +160000,121.53411127217046 +170000,123.84902917519969 +180000,110.17561752718584 +190000,135.92326569207435 +200000,115.83431873355603 +210000,97.29142574034455 +220000,121.19514675584817 +230000,147.13718201616868 +240000,107.00459804640869 +250000,123.5403333761329 +260000,147.99900026810278 +270000,103.96698329039509 +280000,155.12764387202301 +290000,123.74942983694339 +300000,118.4832561010899 +310000,135.65123257710695 +320000,146.77961729028323 +330000,120.73608203796675 +340000,119.3577030126974 +350000,149.13018398471542 +360000,134.9380065267615 +370000,125.65713090713794 +380000,117.14698802658206 +390000,113.06244206872172 +400000,130.82263045192434 +410000,145.76021083954907 +420000,107.41158303446096 +430000,102.96068462310109 +440000,121.96301239303826 +450000,158.86334597816366 +460000,151.85572377660554 +470000,125.77235780587121 +480000,108.85427179121712 +490000,122.98005981054624 +500000,141.1350259422239 +510000,124.89114983365182 +520000,116.36492088764619 +530000,146.94030200114722 +540000,110.05305919234078 +550000,131.32434895936382 +560000,137.7215389299178 +570000,139.24736965759314 +580000,92.28926990187128 +590000,113.90404528205352 +600000,142.27574374780437 +610000,152.7665281890351 +620000,133.9348505578398 +630000,137.13752521425607 +640000,137.0353058304301 +650000,151.53235482755355 +660000,116.72834351191632 +670000,120.37856330736481 +680000,153.29030882167527 +690000,152.20744100384726 +700000,148.85849282482224 +710000,139.0129425535562 +720000,118.33110040018535 +730000,145.24914393031477 +740000,138.93288877353072 +750000,131.6679493640901 +760000,130.04519682108264 +770000,122.39330547239656 +780000,128.52068941058445 +790000,136.37469319797225 +800000,143.42871737886995 +810000,136.55357157762455 +820000,148.74367796543055 +830000,161.59691961421015 +840000,160.23893959224182 +850000,85.94628862876179 +860000,142.10468679409593 +870000,111.63841819057154 +880000,123.99724034792939 +890000,145.5040412938969 +900000,107.51730056570545 +910000,133.14916002614993 +920000,126.02099161310575 +930000,156.28092836164572 +940000,146.43337862450863 +950000,130.0709194516187 +960000,147.30119215857667 +970000,98.68638606654667 +980000,153.59537237632418 +990000,132.83934774343342 +1000000,139.13728250312377 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/ep_reward.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/ep_reward.log index 551091052..a8d79857f 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/ep_reward.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/ep_reward.log @@ -1,101 +1,101 @@ step,stat_eval/ep_reward -10000,0.21064657552164526 -20000,0.24477614315924806 -30000,0.26210379337718753 -40000,0.2404613112338953 -50000,0.38476526860780685 -60000,0.33825165763676285 -70000,0.25318817097124957 -80000,0.34854777635868406 -90000,0.23632116768705452 -100000,0.2930780379085067 -110000,0.35352463762941094 -120000,0.3587808970527693 -130000,0.39253680884734754 -140000,0.35255484264106485 -150000,0.3581180355007452 -160000,0.4902044663172706 -170000,0.5071642831417107 -180000,0.551371456887175 -190000,0.3385256097927868 -200000,0.4359255826111664 -210000,0.46221655462472844 -220000,0.4201924242151204 -230000,0.5469126829260844 -240000,0.4704310511330266 -250000,0.5244695851253909 -260000,0.5476218876812078 -270000,0.4743081180263711 -280000,0.38310646666016035 -290000,0.4031603065888804 -300000,0.46369381313531255 -310000,0.5306243331277601 -320000,0.5353827521240743 -330000,0.4757420026969596 -340000,0.4881358806143298 -350000,0.6257318122162104 -360000,0.3591982895326413 -370000,0.5060070112237269 -380000,0.4815757351924437 -390000,0.47340479554680026 -400000,0.5480373257215345 -410000,0.47898191888351427 -420000,0.582680160868614 -430000,0.39077321675086757 -440000,0.4467312785282146 -450000,0.39602537919794356 -460000,0.5475288751691183 -470000,0.5597250439354214 -480000,0.5430014316528322 -490000,0.5355330781578858 -500000,0.5268167187604094 -510000,0.550867561070772 -520000,0.37374550535595985 -530000,0.5554482993007971 -540000,0.4438495282114121 -550000,0.6111970064457848 -560000,0.6046603970407978 -570000,0.5470328617431474 -580000,0.5174457099288843 -590000,0.6159723223924113 -600000,0.565182054626362 -610000,0.5807502598697656 -620000,0.48246954630595573 -630000,0.5970299825803445 -640000,0.6571496252594347 -650000,0.4086433927769617 -660000,0.41582068227287783 -670000,0.5602235120256063 -680000,0.6228689464647449 -690000,0.47314112403517977 -700000,0.5510247874515415 -710000,0.6108151330551752 -720000,0.36817893560096465 -730000,0.4569333349902582 -740000,0.5875120357571235 -750000,0.5232486281436873 -760000,0.39930815835550215 -770000,0.579901024628984 -780000,0.5064786123164297 -790000,0.44609937567180846 -800000,0.44675849680373414 -810000,0.4881356615506216 -820000,0.5068047299097168 -830000,0.43959324743964984 -840000,0.5114807164791755 -850000,0.4280414426930584 -860000,0.6550432382540035 -870000,0.48242438762894047 -880000,0.4948893831191817 -890000,0.5017314448507099 -900000,0.519685609242589 -910000,0.55793243649025 -920000,0.5111990847389322 -930000,0.44702130839899834 -940000,0.561072914788603 -950000,0.590245398956479 -960000,0.6295184256782187 -970000,0.5836214533698296 -980000,0.46317164144964396 -990000,0.5876378829947628 -1000000,0.4351590407447201 +10000,0.12400129701427756 +20000,0.187095357494693 +30000,0.19955514230691568 +40000,0.2593402086798025 +50000,0.2768395638272674 +60000,0.27502869409898 +70000,0.2485637943289866 +80000,0.3488980924131633 +90000,0.29647238468213477 +100000,0.41723449881069763 +110000,0.27057318768283783 +120000,0.3877928313141243 +130000,0.29196841210212937 +140000,0.49239954415461346 +150000,0.5064104633560953 +160000,0.4861381079741302 +170000,0.49539611672775746 +180000,0.4407025996824041 +190000,0.5436930629643845 +200000,0.4633396013525209 +210000,0.389167095843304 +220000,0.48478059220008046 +230000,0.5885487280646746 +240000,0.4280183963954999 +250000,0.4941613490861944 +260000,0.5919960010724111 +270000,0.4158679331615803 +280000,0.6205105817837318 +290000,0.4949977203934295 +300000,0.4739330244049321 +310000,0.5426049303084278 +320000,0.587118469161133 +330000,0.48294436727307294 +340000,0.47743084395545987 +350000,0.5965207359388617 +360000,0.5397520261070652 +370000,0.5026304939454803 +380000,0.4685882897351482 +390000,0.45225208015399543 +400000,0.523290522282881 +410000,0.5830408433581964 +420000,0.4296463360046414 +430000,0.41184286109479373 +440000,0.48785215833852824 +450000,0.6354533839126547 +460000,0.6074228951064222 +470000,0.5030894381226647 +480000,0.4354170882614034 +490000,0.49192023924278294 +500000,0.5645401037688956 +510000,0.49972069904528366 +520000,0.46546869049590284 +530000,0.5877612080045889 +540000,0.4402123450766962 +550000,0.5252990333951197 +560000,0.5508861661017793 +570000,0.5569894786303725 +580000,0.3691571935436414 +590000,0.4556161953197164 +600000,0.5691029749912176 +610000,0.6110661127561404 +620000,0.535739866419433 +630000,0.5485501008570379 +640000,0.5481412641970744 +650000,0.6061294193102142 +660000,0.46691455867596765 +670000,0.4815149459833429 +680000,0.6131612352867012 +690000,0.6088310723415135 +700000,0.595433971299289 +710000,0.5560776321027112 +720000,0.47332441021197164 +730000,0.5809965830205275 +740000,0.555731555094123 +750000,0.5266761893477494 +760000,0.5201807926344599 +770000,0.4895732548921389 +780000,0.5140830593838988 +790000,0.5455108448398917 +800000,0.5737148695154798 +810000,0.546214286393058 +820000,0.5949747122460451 +830000,0.6463876784568406 +840000,0.6409557583689673 +850000,0.3437928799107224 +860000,0.5684187471763837 +870000,0.446553924340831 +880000,0.4959904541533053 +890000,0.5820161909466621 +900000,0.4300692025867317 +910000,0.532596718110625 +920000,0.5040840246689747 +930000,0.6251237134465828 +940000,0.5857335144980346 +950000,0.5202836819768126 +960000,0.5892047822186517 +970000,0.3947455509156767 +980000,0.6143814895052967 +990000,0.5313579876683706 +1000000,0.5565491300124952 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/mse.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/mse.log index 890619839..03f236873 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/mse.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/logs/stat_eval/mse.log @@ -1,101 +1,101 @@ step,stat_eval/mse -10000,437.77647101119936 -20000,337.81709161854735 -30000,367.93419252786 -40000,267.94282839823137 -50000,281.49132443212966 -60000,342.3956603292638 -70000,336.95997262961674 -80000,367.83568933920424 -90000,236.13817444434864 -100000,424.5981728594849 -110000,224.94942291871763 -120000,309.3880717274104 -130000,346.55000105530905 -140000,189.28200546102164 -150000,271.64568927529274 -160000,299.83850938611056 -170000,260.25243587833245 -180000,234.07034471315723 -190000,176.77118022790188 -200000,333.06100176952566 -210000,297.6010674526682 -220000,229.42056871573772 -230000,205.20958776709375 -240000,231.84152587576082 -250000,337.02135488572094 -260000,200.85958100884358 -270000,277.91948485301015 -280000,218.34774961419262 -290000,343.4802990278099 -300000,324.9548455712349 -310000,262.62238443388935 -320000,337.2016380677841 -330000,271.52957220604003 -340000,356.3809463349361 -350000,176.27254078132123 -360000,119.11151627447862 -370000,220.3108579798547 -380000,218.19965629070037 -390000,177.73242399818065 -400000,297.0897175990995 -410000,218.07428923573462 -420000,257.1347546998945 -430000,231.47502060164874 -440000,273.5589465893686 -450000,177.9691533198474 -460000,164.23426598745033 -470000,213.61973761926325 -480000,305.2295410392395 -490000,198.15220699614747 -500000,231.76358987438053 -510000,283.5407708337998 -520000,122.7484176336305 -530000,291.4887178315733 -540000,120.3755635452306 -550000,253.21021256456902 -560000,224.63430017233117 -570000,236.03718597586766 -580000,133.60460290655067 -590000,215.8338824389764 -600000,186.9835873650488 -610000,295.2529448413883 -620000,233.72090340491408 -630000,226.54319187572264 -640000,127.54610888757206 -650000,166.36083211558537 -660000,160.90866121380895 -670000,209.797528886787 -680000,197.44088497875936 -690000,212.1123166484218 -700000,204.78152171936603 -710000,124.9502785801047 -720000,176.50258770651433 -730000,257.6411159065692 -740000,267.86664591095166 -750000,264.09828828416755 -760000,232.62988733180387 -770000,222.04162306683892 -780000,266.84275509821225 -790000,335.6545083867726 -800000,138.19928211915374 -810000,234.69567409688761 -820000,139.09082925141874 -830000,124.56394843229978 -840000,235.81636350501526 -850000,146.18882038395628 -860000,161.8494992515046 -870000,234.58693363421403 -880000,213.67259947859884 -890000,249.83354017825422 -900000,215.70379734256176 -910000,219.03894770429588 -920000,209.79291343484246 -930000,251.62559876678725 -940000,298.5342078656227 -950000,303.9510295367211 -960000,229.73679932723115 -970000,159.54227957986575 -980000,193.1320835582763 -990000,140.71515180809973 -1000000,229.76549176357238 +10000,271.5525097836586 +20000,309.6573509330246 +30000,392.360250999865 +40000,332.1087272478677 +50000,317.0173192214744 +60000,381.102757652623 +70000,368.220055783724 +80000,203.0437451361925 +90000,265.2860992557419 +100000,327.11898480751773 +110000,388.3599199076326 +120000,340.5200371917273 +130000,211.1946572761476 +140000,282.4028525277295 +150000,223.11903957607078 +160000,272.6792272066585 +170000,220.84201048686728 +180000,225.75111113707294 +190000,206.1816146458597 +200000,220.11257267477077 +210000,205.16204761224458 +220000,174.14482296051648 +230000,260.1854343890634 +240000,239.38788746652426 +250000,224.0814313786963 +260000,247.4365205627154 +270000,171.5264370141103 +280000,122.81195330529347 +290000,160.04373999217376 +300000,297.22231366868283 +310000,292.69076044538417 +320000,303.4973029885241 +330000,196.2850917120985 +340000,212.38834224954047 +350000,324.84881736058543 +360000,227.3982514115931 +370000,173.2917966087584 +380000,190.47391679888878 +390000,269.88802081070025 +400000,232.71985002896767 +410000,241.24003094450282 +420000,140.18456807375028 +430000,173.00303312012383 +440000,210.40173883698307 +450000,200.4325958733079 +460000,99.98833698684768 +470000,241.78217190433347 +480000,158.57832891367232 +490000,326.5312400916977 +500000,261.687794928783 +510000,138.0653266406181 +520000,119.68523872255655 +530000,291.8461100301375 +540000,166.64129858668485 +550000,268.61157956443907 +560000,117.56315829512485 +570000,290.6526334472529 +580000,295.68149212210994 +590000,263.4237149053823 +600000,289.89215893322614 +610000,254.05020336751204 +620000,222.35269856597475 +630000,212.09166646640818 +640000,184.23944627297135 +650000,304.800755450733 +660000,191.47975831543027 +670000,222.23803412609715 +680000,296.30478609083934 +690000,136.3751840724775 +700000,238.11529426209853 +710000,239.8753285458866 +720000,190.67077566381155 +730000,175.95772877395365 +740000,263.6152123799819 +750000,236.12189217266123 +760000,250.55367895006128 +770000,146.55705378276224 +780000,200.19549378128875 +790000,200.56696358337348 +800000,304.8946964842652 +810000,190.7420156072433 +820000,138.69591612059406 +830000,218.9140466522406 +840000,212.44202585121016 +850000,117.672601816147 +860000,346.09279443868024 +870000,181.93996133928664 +880000,202.5579613997815 +890000,215.03428163762956 +900000,233.06223648113723 +910000,232.87017022999544 +920000,56.94347151044915 +930000,174.84411518358087 +940000,257.71049283048774 +950000,270.68794191279176 +960000,150.14962332912896 +970000,216.79954525958965 +980000,180.28598348647793 +990000,107.15447729119217 +1000000,363.46868318062826 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/model_best.pt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/model_best.pt index 50b13319e..41548b379 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/model_best.pt and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/model_best.pt differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/model_latest.pt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/model_latest.pt index 787716aab..65696af39 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/model_latest.pt and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/model_latest.pt differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-loss-approx_kl.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-loss-approx_kl.jpg index 7e00aa3fd..c17b33676 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-loss-approx_kl.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-loss-approx_kl.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-loss-entropy_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-loss-entropy_loss.jpg index bd4d12300..1d71fc5d7 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-loss-entropy_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-loss-entropy_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-loss-policy_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-loss-policy_loss.jpg index a5bf7597c..b36367a8d 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-loss-policy_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-loss-policy_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-loss-value_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-loss-value_loss.jpg index 586af9f63..baf400346 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-loss-value_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-loss-value_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-constraint_violation.jpg index 091cd46cf..2b7e91283 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-ep_constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-ep_constraint_violation.jpg index 2f49fdbf6..efcb80e79 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-ep_constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-ep_constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-ep_length.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-ep_length.jpg index 2d8af3a32..db3cff411 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-ep_length.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-ep_length.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-ep_return.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-ep_return.jpg index 95183f038..6042ebc26 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-ep_return.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-ep_return.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-ep_reward.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-ep_reward.jpg index 08e1d70c1..6e14fcc82 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-ep_reward.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat-ep_reward.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-constraint_violation.jpg index 3d3a86965..a6b68b04f 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-ep_length.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-ep_length.jpg index 82834a191..8364d6fb1 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-ep_length.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-ep_length.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-ep_return.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-ep_return.jpg index f4a12885b..7d8e7a918 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-ep_return.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-ep_return.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-ep_reward.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-ep_reward.jpg index d4e1a409d..5dd3aa1c4 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-ep_reward.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-ep_reward.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-mse.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-mse.jpg index 00f62e42e..c81e7b849 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-mse.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/plots/-stat_eval-mse.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/std_out.txt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/std_out.txt index 4c0e199a0..a0822bb0b 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/std_out.txt +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/std_out.txt @@ -1,2601 +1,2601 @@ -2023-10-19 14:53:14,499 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 52.662 +/- 22.240 -2023-10-19 14:53:14,515 : +2023-10-27 16:43:40,202 : Eval | ep_lengths 176.10 +/- 112.90 | ep_return 31.000 +/- 23.819 +2023-10-27 16:43:40,227 : -------------------------------------- | loss/ | | -| approx_kl | 0.0283 | -| entropy_loss | -3.7 | -| policy_loss | -0.00654 | -| value_loss | 9.62 | +| approx_kl | 0.0262 | +| entropy_loss | -3.73 | +| policy_loss | -0.014 | +| value_loss | 12 | | stat/ | | -| constraint_violation | 10 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 52.4 | -| ep_reward | 0.21 | -| stat_eval/ | | | constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 52.7 | -| ep_reward | 0.211 | -| mse | 438 | +| ep_return | 92.1 | +| ep_reward | 0.369 | +| stat_eval/ | | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 31 | +| ep_reward | 0.124 | +| mse | 272 | | time/ | | | progress | 0.01 | | step | 1e+04 | -| step_time | 11.5 | +| step_time | 12.7 | -------------------------------------- -2023-10-19 14:55:32,432 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 61.194 +/- 20.943 -2023-10-19 14:55:32,440 : +2023-10-27 16:46:23,813 : Eval | ep_lengths 200.50 +/- 99.00 | ep_return 46.774 +/- 34.448 +2023-10-27 16:46:23,842 : -------------------------------------- | loss/ | | -| approx_kl | 0.02 | -| entropy_loss | -3.7 | -| policy_loss | -0.00411 | -| value_loss | 3.74 | +| approx_kl | 0.0329 | +| entropy_loss | -3.77 | +| policy_loss | -0.0086 | +| value_loss | 6.3 | | stat/ | | -| constraint_violation | 10 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 77.9 | -| ep_reward | 0.312 | +| ep_return | 79.9 | +| ep_reward | 0.319 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 61.2 | -| ep_reward | 0.245 | -| mse | 338 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 46.8 | +| ep_reward | 0.187 | +| mse | 310 | | time/ | | | progress | 0.02 | | step | 2e+04 | -| step_time | 11.4 | +| step_time | 12.9 | -------------------------------------- -2023-10-19 14:57:49,091 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 65.526 +/- 15.102 -2023-10-19 14:57:49,099 : +2023-10-27 16:49:07,508 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 49.889 +/- 32.071 +2023-10-27 16:49:07,522 : -------------------------------------- | loss/ | | -| approx_kl | 0.0227 | -| entropy_loss | -3.69 | -| policy_loss | -0.0132 | -| value_loss | 4.22 | +| approx_kl | 0.0214 | +| entropy_loss | -3.73 | +| policy_loss | -0.0146 | +| value_loss | 8.88 | | stat/ | | -| constraint_violation | 12 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 82.2 | -| ep_reward | 0.329 | -| stat_eval/ | | | constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 65.5 | -| ep_reward | 0.262 | -| mse | 368 | +| ep_return | 103 | +| ep_reward | 0.411 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 49.9 | +| ep_reward | 0.2 | +| mse | 392 | | time/ | | | progress | 0.03 | | step | 3e+04 | -| step_time | 11 | +| step_time | 13.1 | -------------------------------------- -2023-10-19 14:59:59,919 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 60.115 +/- 32.740 -2023-10-19 14:59:59,921 : +2023-10-27 16:51:45,344 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 64.835 +/- 38.617 +2023-10-27 16:51:45,360 : -------------------------------------- | loss/ | | -| approx_kl | 0.0253 | -| entropy_loss | -3.7 | -| policy_loss | -0.00919 | -| value_loss | 5.75 | +| approx_kl | 0.0251 | +| entropy_loss | -3.73 | +| policy_loss | -0.0207 | +| value_loss | 3.05 | | stat/ | | -| constraint_violation | 18 | -| ep_constraint_vio... | 0.2 | -| ep_length | 203 | -| ep_return | 66.2 | -| ep_reward | 0.265 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 118 | +| ep_reward | 0.473 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 60.1 | -| ep_reward | 0.24 | -| mse | 268 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 64.8 | +| ep_reward | 0.259 | +| mse | 332 | | time/ | | | progress | 0.04 | | step | 4e+04 | -| step_time | 11.1 | +| step_time | 13 | -------------------------------------- -2023-10-19 15:02:14,445 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 96.191 +/- 23.143 -2023-10-19 15:02:14,455 : +2023-10-27 16:54:21,811 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 69.210 +/- 29.492 +2023-10-27 16:54:21,835 : -------------------------------------- | loss/ | | -| approx_kl | 0.0252 | -| entropy_loss | -3.68 | -| policy_loss | -0.00774 | -| value_loss | 4.72 | +| approx_kl | 0.0181 | +| entropy_loss | -3.79 | +| policy_loss | -0.0124 | +| value_loss | 6.09 | | stat/ | | -| constraint_violation | 26 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 97.3 | -| ep_reward | 0.461 | -| stat_eval/ | | | constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 96.2 | -| ep_reward | 0.385 | -| mse | 281 | +| ep_return | 108 | +| ep_reward | 0.434 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 69.2 | +| ep_reward | 0.277 | +| mse | 317 | | time/ | | | progress | 0.05 | | step | 5e+04 | -| step_time | 11 | +| step_time | 12.1 | -------------------------------------- -2023-10-19 15:04:25,333 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 84.563 +/- 40.502 -2023-10-19 15:04:25,334 : +2023-10-27 16:57:02,630 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 68.757 +/- 26.810 +2023-10-27 16:57:02,658 : -------------------------------------- | loss/ | | -| approx_kl | 0.0344 | -| entropy_loss | -3.72 | -| policy_loss | -0.0169 | -| value_loss | 3.58 | +| approx_kl | 0.014 | +| entropy_loss | -3.73 | +| policy_loss | -0.0116 | +| value_loss | 7.04 | | stat/ | | -| constraint_violation | 37 | -| ep_constraint_vio... | 0.3 | -| ep_length | 176 | -| ep_return | 50.1 | -| ep_reward | 0.205 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 112 | +| ep_reward | 0.448 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 84.6 | -| ep_reward | 0.338 | -| mse | 342 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 68.8 | +| ep_reward | 0.275 | +| mse | 381 | | time/ | | | progress | 0.06 | | step | 6e+04 | -| step_time | 11.1 | +| step_time | 15.2 | -------------------------------------- -2023-10-19 15:06:33,065 : Eval | ep_lengths 200.90 +/- 98.20 | ep_return 63.297 +/- 38.404 -2023-10-19 15:06:33,066 : +2023-10-27 16:59:37,987 : Eval | ep_lengths 200.50 +/- 99.00 | ep_return 62.141 +/- 36.333 +2023-10-27 16:59:38,006 : -------------------------------------- | loss/ | | -| approx_kl | 0.0233 | -| entropy_loss | -3.65 | -| policy_loss | -0.0142 | -| value_loss | 4.92 | +| approx_kl | 0.0181 | +| entropy_loss | -3.7 | +| policy_loss | -0.0126 | +| value_loss | 8.15 | | stat/ | | -| constraint_violation | 45 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 91.4 | -| ep_reward | 0.368 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 141 | +| ep_reward | 0.564 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 63.3 | -| ep_reward | 0.253 | -| mse | 337 | +| ep_length | 200 | +| ep_return | 62.1 | +| ep_reward | 0.249 | +| mse | 368 | | time/ | | | progress | 0.07 | | step | 7e+04 | -| step_time | 10.6 | +| step_time | 14.2 | -------------------------------------- -2023-10-19 15:08:44,766 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 87.137 +/- 22.593 -2023-10-19 15:08:44,768 : +2023-10-27 17:02:17,519 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 87.225 +/- 34.884 +2023-10-27 17:02:17,544 : -------------------------------------- | loss/ | | -| approx_kl | 0.0165 | -| entropy_loss | -3.58 | -| policy_loss | -0.0133 | -| value_loss | 7.48 | +| approx_kl | 0.0209 | +| entropy_loss | -3.71 | +| policy_loss | -0.0224 | +| value_loss | 9.29 | | stat/ | | -| constraint_violation | 51 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 96.2 | -| ep_reward | 0.385 | +| ep_return | 113 | +| ep_reward | 0.453 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 87.1 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 87.2 | | ep_reward | 0.349 | -| mse | 368 | +| mse | 203 | | time/ | | | progress | 0.08 | | step | 8e+04 | -| step_time | 10.7 | +| step_time | 14.2 | -------------------------------------- -2023-10-19 15:10:52,192 : Eval | ep_lengths 176.00 +/- 113.04 | ep_return 59.080 +/- 45.057 -2023-10-19 15:10:52,194 : +2023-10-27 17:04:57,504 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 74.118 +/- 32.959 +2023-10-27 17:04:57,505 : -------------------------------------- | loss/ | | -| approx_kl | 0.0259 | -| entropy_loss | -3.6 | -| policy_loss | -0.013 | -| value_loss | 2.74 | +| approx_kl | 0.0267 | +| entropy_loss | -3.73 | +| policy_loss | -0.0126 | +| value_loss | 2.22 | | stat/ | | -| constraint_violation | 57 | -| ep_constraint_vio... | 0.1 | -| ep_length | 227 | -| ep_return | 70.3 | -| ep_reward | 0.287 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 111 | +| ep_reward | 0.443 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 176 | -| ep_return | 59.1 | -| ep_reward | 0.236 | -| mse | 236 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 74.1 | +| ep_reward | 0.296 | +| mse | 265 | | time/ | | | progress | 0.09 | | step | 9e+04 | -| step_time | 11.1 | +| step_time | 15 | -------------------------------------- -2023-10-19 15:13:05,592 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 73.270 +/- 22.782 -2023-10-19 15:13:05,594 : +2023-10-27 17:07:35,959 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 104.309 +/- 30.477 +2023-10-27 17:07:35,971 : -------------------------------------- | loss/ | | -| approx_kl | 0.0293 | -| entropy_loss | -3.55 | -| policy_loss | -0.0177 | -| value_loss | 3.66 | +| approx_kl | 0.0232 | +| entropy_loss | -3.78 | +| policy_loss | -0.0172 | +| value_loss | 4.56 | | stat/ | | -| constraint_violation | 66 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 70.9 | -| ep_reward | 0.285 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 135 | +| ep_reward | 0.539 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 73.3 | -| ep_reward | 0.293 | -| mse | 425 | +| ep_return | 104 | +| ep_reward | 0.417 | +| mse | 327 | | time/ | | | progress | 0.1 | | step | 1e+05 | -| step_time | 10.9 | +| step_time | 12.1 | -------------------------------------- -2023-10-19 15:15:11,964 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 88.381 +/- 50.042 -2023-10-19 15:15:11,965 : +2023-10-27 17:10:04,462 : Eval | ep_lengths 201.30 +/- 97.42 | ep_return 67.643 +/- 38.588 +2023-10-27 17:10:04,464 : -------------------------------------- | loss/ | | -| approx_kl | 0.0237 | -| entropy_loss | -3.64 | -| policy_loss | -0.0167 | -| value_loss | 9.73 | +| approx_kl | 0.0251 | +| entropy_loss | -3.81 | +| policy_loss | -0.0158 | +| value_loss | 5.27 | | stat/ | | -| constraint_violation | 75 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 86.1 | -| ep_reward | 0.346 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 148 | +| ep_reward | 0.593 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 88.4 | -| ep_reward | 0.354 | -| mse | 225 | +| ep_length | 201 | +| ep_return | 67.6 | +| ep_reward | 0.271 | +| mse | 388 | | time/ | | | progress | 0.11 | | step | 1.1e+05 | -| step_time | 10.4 | +| step_time | 11.5 | -------------------------------------- -2023-10-19 15:17:15,937 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 89.695 +/- 47.630 -2023-10-19 15:17:15,959 : +2023-10-27 17:12:40,488 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 96.948 +/- 28.108 +2023-10-27 17:12:40,502 : -------------------------------------- | loss/ | | -| approx_kl | 0.0178 | -| entropy_loss | -3.66 | -| policy_loss | -0.017 | -| value_loss | 8.66 | +| approx_kl | 0.0275 | +| entropy_loss | -3.8 | +| policy_loss | -0.0065 | +| value_loss | 2.06 | | stat/ | | -| constraint_violation | 82 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 95.7 | -| ep_reward | 0.383 | +| ep_return | 142 | +| ep_reward | 0.57 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 89.7 | -| ep_reward | 0.359 | -| mse | 309 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 96.9 | +| ep_reward | 0.388 | +| mse | 341 | | time/ | | | progress | 0.12 | | step | 1.2e+05 | -| step_time | 10.3 | +| step_time | 13 | -------------------------------------- -2023-10-19 15:19:23,827 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 98.134 +/- 23.739 -2023-10-19 15:19:23,836 : +2023-10-27 17:15:02,289 : Eval | ep_lengths 176.20 +/- 112.74 | ep_return 72.991 +/- 55.454 +2023-10-27 17:15:02,310 : -------------------------------------- | loss/ | | -| approx_kl | 0.0301 | -| entropy_loss | -3.72 | -| policy_loss | -0.0175 | -| value_loss | 3.3 | +| approx_kl | 0.0311 | +| entropy_loss | -3.84 | +| policy_loss | -0.0142 | +| value_loss | 1.3 | | stat/ | | -| constraint_violation | 87 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 98.6 | -| ep_reward | 0.394 | +| ep_return | 139 | +| ep_reward | 0.555 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 98.1 | -| ep_reward | 0.393 | -| mse | 347 | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 73 | +| ep_reward | 0.292 | +| mse | 211 | | time/ | | | progress | 0.13 | | step | 1.3e+05 | -| step_time | 10.3 | +| step_time | 11.3 | -------------------------------------- -2023-10-19 15:21:26,312 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 88.139 +/- 52.769 -2023-10-19 15:21:26,313 : +2023-10-27 17:17:33,791 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 123.100 +/- 20.371 +2023-10-27 17:17:33,802 : -------------------------------------- | loss/ | | -| approx_kl | 0.0231 | -| entropy_loss | -3.74 | -| policy_loss | -0.00573 | -| value_loss | 5.08 | +| approx_kl | 0.0325 | +| entropy_loss | -3.85 | +| policy_loss | -0.00259 | +| value_loss | 2.45 | | stat/ | | -| constraint_violation | 91 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 101 | -| ep_reward | 0.403 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 155 | +| ep_reward | 0.621 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 88.1 | -| ep_reward | 0.353 | -| mse | 189 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 123 | +| ep_reward | 0.492 | +| mse | 282 | | time/ | | | progress | 0.14 | | step | 1.4e+05 | -| step_time | 10.7 | +| step_time | 11.7 | -------------------------------------- -2023-10-19 15:23:28,769 : Eval | ep_lengths 201.40 +/- 97.21 | ep_return 89.530 +/- 56.335 -2023-10-19 15:23:28,771 : +2023-10-27 17:20:02,088 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 126.603 +/- 47.336 +2023-10-27 17:20:02,108 : -------------------------------------- | loss/ | | -| approx_kl | 0.0255 | -| entropy_loss | -3.72 | -| policy_loss | -0.00517 | -| value_loss | 2.62 | +| approx_kl | 0.033 | +| entropy_loss | -3.85 | +| policy_loss | -0.00283 | +| value_loss | 1.54 | | stat/ | | -| constraint_violation | 98 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 114 | -| ep_reward | 0.457 | +| ep_return | 166 | +| ep_reward | 0.663 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 89.5 | -| ep_reward | 0.358 | -| mse | 272 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 127 | +| ep_reward | 0.506 | +| mse | 223 | | time/ | | | progress | 0.15 | | step | 1.5e+05 | -| step_time | 10.4 | +| step_time | 11.6 | -------------------------------------- -2023-10-19 15:25:34,260 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 122.551 +/- 31.037 -2023-10-19 15:25:34,268 : --------------------------------------- -| loss/ | | -| approx_kl | 0.027 | -| entropy_loss | -3.72 | -| policy_loss | -0.0259 | -| value_loss | 1.6 | -| stat/ | | -| constraint_violation | 104 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 98.3 | -| ep_reward | 0.406 | -| stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 123 | -| ep_reward | 0.49 | -| mse | 300 | -| time/ | | -| progress | 0.16 | -| step | 1.6e+05 | -| step_time | 10.3 | --------------------------------------- +2023-10-27 17:22:25,768 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 121.534 +/- 41.076 +2023-10-27 17:22:25,770 : +--------------------------------------- +| loss/ | | +| approx_kl | 0.0358 | +| entropy_loss | -3.78 | +| policy_loss | -0.000457 | +| value_loss | 1.83 | +| stat/ | | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 181 | +| ep_reward | 0.723 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 122 | +| ep_reward | 0.486 | +| mse | 273 | +| time/ | | +| progress | 0.16 | +| step | 1.6e+05 | +| step_time | 12.3 | +--------------------------------------- -2023-10-19 15:27:35,049 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 126.791 +/- 27.924 -2023-10-19 15:27:35,057 : +2023-10-27 17:24:54,024 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 123.849 +/- 47.635 +2023-10-27 17:24:54,025 : -------------------------------------- | loss/ | | -| approx_kl | 0.027 | +| approx_kl | 0.042 | | entropy_loss | -3.75 | -| policy_loss | -0.0149 | -| value_loss | 1.58 | +| policy_loss | -0.024 | +| value_loss | 0.717 | | stat/ | | -| constraint_violation | 109 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 104 | -| ep_reward | 0.421 | -| stat_eval/ | | | constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 127 | -| ep_reward | 0.507 | -| mse | 260 | +| ep_return | 172 | +| ep_reward | 0.69 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 124 | +| ep_reward | 0.495 | +| mse | 221 | | time/ | | | progress | 0.17 | | step | 1.7e+05 | -| step_time | 10 | +| step_time | 12 | -------------------------------------- -2023-10-19 15:29:35,657 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 137.843 +/- 36.440 -2023-10-19 15:29:35,686 : +2023-10-27 17:27:22,392 : Eval | ep_lengths 200.90 +/- 98.20 | ep_return 110.176 +/- 59.976 +2023-10-27 17:27:22,394 : -------------------------------------- | loss/ | | -| approx_kl | 0.0301 | -| entropy_loss | -3.69 | -| policy_loss | -0.0087 | -| value_loss | 2.07 | +| approx_kl | 0.0207 | +| entropy_loss | -3.74 | +| policy_loss | -0.014 | +| value_loss | 0.556 | | stat/ | | -| constraint_violation | 111 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 120 | -| ep_reward | 0.48 | +| ep_return | 163 | +| ep_reward | 0.653 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 138 | -| ep_reward | 0.551 | -| mse | 234 | +| constraint_violation | 0.2 | +| ep_length | 201 | +| ep_return | 110 | +| ep_reward | 0.441 | +| mse | 226 | | time/ | | | progress | 0.18 | | step | 1.8e+05 | -| step_time | 9.92 | +| step_time | 13.2 | -------------------------------------- -2023-10-19 15:31:30,166 : Eval | ep_lengths 178.20 +/- 109.68 | ep_return 84.630 +/- 59.513 -2023-10-19 15:31:30,167 : +2023-10-27 17:29:48,472 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 135.923 +/- 49.482 +2023-10-27 17:29:48,482 : -------------------------------------- | loss/ | | -| approx_kl | 0.0364 | -| entropy_loss | -3.7 | -| policy_loss | -0.00692 | -| value_loss | 3.21 | +| approx_kl | 0.0323 | +| entropy_loss | -3.74 | +| policy_loss | -0.00633 | +| value_loss | 0.336 | | stat/ | | -| constraint_violation | 118 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 107 | -| ep_reward | 0.468 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 171 | +| ep_reward | 0.683 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 178 | -| ep_return | 84.6 | -| ep_reward | 0.339 | -| mse | 177 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 136 | +| ep_reward | 0.544 | +| mse | 206 | | time/ | | | progress | 0.19 | | step | 1.9e+05 | -| step_time | 9.75 | +| step_time | 11.9 | -------------------------------------- -2023-10-19 15:33:26,390 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 108.981 +/- 40.046 -2023-10-19 15:33:26,391 : +2023-10-27 17:32:14,568 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 115.834 +/- 62.659 +2023-10-27 17:32:14,570 : -------------------------------------- | loss/ | | -| approx_kl | 0.0317 | -| entropy_loss | -3.73 | -| policy_loss | -0.00646 | -| value_loss | 0.887 | +| approx_kl | 0.0327 | +| entropy_loss | -3.72 | +| policy_loss | -0.00155 | +| value_loss | 0.604 | | stat/ | | -| constraint_violation | 123 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 117 | -| ep_reward | 0.467 | +| ep_return | 180 | +| ep_reward | 0.719 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 109 | -| ep_reward | 0.436 | -| mse | 333 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 116 | +| ep_reward | 0.463 | +| mse | 220 | | time/ | | | progress | 0.2 | | step | 2e+05 | -| step_time | 9.71 | +| step_time | 14.4 | -------------------------------------- -2023-10-19 15:35:24,119 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 115.554 +/- 16.667 -2023-10-19 15:35:24,120 : +2023-10-27 17:34:40,144 : Eval | ep_lengths 176.60 +/- 112.16 | ep_return 97.291 +/- 68.071 +2023-10-27 17:34:40,145 : -------------------------------------- | loss/ | | -| approx_kl | 0.00857 | -| entropy_loss | -3.77 | -| policy_loss | -0.0148 | -| value_loss | 2.28 | +| approx_kl | 0.0316 | +| entropy_loss | -3.7 | +| policy_loss | -0.0104 | +| value_loss | 1.62 | | stat/ | | -| constraint_violation | 131 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 120 | -| ep_reward | 0.481 | +| constraint_violation | 1 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 161 | +| ep_reward | 0.688 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 116 | -| ep_reward | 0.462 | -| mse | 298 | +| constraint_violation | 0.3 | +| ep_length | 177 | +| ep_return | 97.3 | +| ep_reward | 0.389 | +| mse | 205 | | time/ | | | progress | 0.21 | | step | 2.1e+05 | -| step_time | 9.85 | +| step_time | 14.8 | -------------------------------------- -2023-10-19 15:37:18,078 : Eval | ep_lengths 200.70 +/- 98.60 | ep_return 105.048 +/- 55.470 -2023-10-19 15:37:18,100 : ---------------------------------------- -| loss/ | | -| approx_kl | 0.0257 | -| entropy_loss | -3.8 | -| policy_loss | -0.000974 | -| value_loss | 1.85 | -| stat/ | | -| constraint_violation | 140 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 117 | -| ep_reward | 0.471 | -| stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 105 | -| ep_reward | 0.42 | -| mse | 229 | -| time/ | | -| progress | 0.22 | -| step | 2.2e+05 | -| step_time | 9.61 | ---------------------------------------- +2023-10-27 17:37:04,393 : Eval | ep_lengths 201.60 +/- 96.83 | ep_return 121.195 +/- 62.148 +2023-10-27 17:37:04,394 : +-------------------------------------- +| loss/ | | +| approx_kl | 0.0348 | +| entropy_loss | -3.66 | +| policy_loss | -0.0086 | +| value_loss | 1.28 | +| stat/ | | +| constraint_violation | 1 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 171 | +| ep_reward | 0.684 | +| stat_eval/ | | +| constraint_violation | 0.2 | +| ep_length | 202 | +| ep_return | 121 | +| ep_reward | 0.485 | +| mse | 174 | +| time/ | | +| progress | 0.22 | +| step | 2.2e+05 | +| step_time | 11.3 | +-------------------------------------- -2023-10-19 15:39:15,776 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 136.728 +/- 29.810 -2023-10-19 15:39:15,778 : +2023-10-27 17:39:38,207 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 147.137 +/- 23.180 +2023-10-27 17:39:38,225 : -------------------------------------- | loss/ | | -| approx_kl | 0.0287 | -| entropy_loss | -3.83 | -| policy_loss | -0.0163 | -| value_loss | 1.54 | +| approx_kl | 0.0436 | +| entropy_loss | -3.67 | +| policy_loss | -0.0117 | +| value_loss | 0.542 | | stat/ | | -| constraint_violation | 149 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 102 | -| ep_reward | 0.484 | +| constraint_violation | 1 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 184 | +| ep_reward | 0.734 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 137 | -| ep_reward | 0.547 | -| mse | 205 | +| ep_return | 147 | +| ep_reward | 0.589 | +| mse | 260 | | time/ | | | progress | 0.23 | | step | 2.3e+05 | -| step_time | 10.1 | +| step_time | 13.3 | -------------------------------------- -2023-10-19 15:41:10,364 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 117.608 +/- 44.439 -2023-10-19 15:41:10,366 : +2023-10-27 17:42:04,119 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 107.005 +/- 58.584 +2023-10-27 17:42:04,121 : -------------------------------------- | loss/ | | -| approx_kl | 0.0223 | -| entropy_loss | -3.77 | -| policy_loss | -0.0103 | -| value_loss | 0.811 | +| approx_kl | 0.0312 | +| entropy_loss | -3.64 | +| policy_loss | -0.0064 | +| value_loss | 0.578 | | stat/ | | -| constraint_violation | 156 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 125 | -| ep_reward | 0.5 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 179 | +| ep_reward | 0.716 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 118 | -| ep_reward | 0.47 | -| mse | 232 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 107 | +| ep_reward | 0.428 | +| mse | 239 | | time/ | | | progress | 0.24 | | step | 2.4e+05 | -| step_time | 9.71 | +| step_time | 12.4 | -------------------------------------- -2023-10-19 15:43:06,594 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 131.117 +/- 28.611 -2023-10-19 15:43:06,595 : +2023-10-27 17:44:31,796 : Eval | ep_lengths 201.90 +/- 96.26 | ep_return 123.540 +/- 65.610 +2023-10-27 17:44:31,798 : +--------------------------------------- +| loss/ | | +| approx_kl | 0.0211 | +| entropy_loss | -3.63 | +| policy_loss | -0.000292 | +| value_loss | 0.487 | +| stat/ | | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 184 | +| ep_reward | 0.735 | +| stat_eval/ | | +| constraint_violation | 0.2 | +| ep_length | 202 | +| ep_return | 124 | +| ep_reward | 0.494 | +| mse | 224 | +| time/ | | +| progress | 0.25 | +| step | 2.5e+05 | +| step_time | 11.6 | +--------------------------------------- + +2023-10-27 17:47:05,115 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 147.999 +/- 21.289 +2023-10-27 17:47:05,126 : -------------------------------------- | loss/ | | -| approx_kl | 0.0167 | -| entropy_loss | -3.74 | -| policy_loss | -0.0126 | -| value_loss | 1.63 | +| approx_kl | 0.0179 | +| entropy_loss | -3.62 | +| policy_loss | -0.0194 | +| value_loss | 0.554 | | stat/ | | -| constraint_violation | 160 | -| ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 109 | -| ep_reward | 0.439 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 173 | +| ep_reward | 0.693 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 131 | -| ep_reward | 0.524 | -| mse | 337 | -| time/ | | -| progress | 0.25 | -| step | 2.5e+05 | -| step_time | 9.98 | --------------------------------------- - -2023-10-19 15:45:07,911 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 136.730 +/- 54.696 -2023-10-19 15:45:07,913 : --------------------------------------- -| loss/ | | -| approx_kl | 0.0198 | -| entropy_loss | -3.77 | -| policy_loss | -0.017 | -| value_loss | 0.975 | -| stat/ | | -| constraint_violation | 168 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 107 | -| ep_reward | 0.462 | -| stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 137 | -| ep_reward | 0.548 | -| mse | 201 | +| ep_return | 148 | +| ep_reward | 0.592 | +| mse | 247 | | time/ | | | progress | 0.26 | | step | 2.6e+05 | -| step_time | 10.3 | +| step_time | 11.8 | -------------------------------------- -2023-10-19 15:47:14,447 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 118.577 +/- 40.882 -2023-10-19 15:47:14,449 : +2023-10-27 17:49:29,355 : Eval | ep_lengths 175.30 +/- 114.11 | ep_return 103.967 +/- 71.443 +2023-10-27 17:49:29,356 : -------------------------------------- | loss/ | | -| approx_kl | 0.0295 | -| entropy_loss | -3.81 | -| policy_loss | -0.00602 | -| value_loss | 2.07 | +| approx_kl | 0.0256 | +| entropy_loss | -3.55 | +| policy_loss | 0.0041 | +| value_loss | 0.834 | | stat/ | | -| constraint_violation | 175 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 138 | -| ep_reward | 0.591 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 175 | +| ep_reward | 0.701 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 119 | -| ep_reward | 0.474 | -| mse | 278 | +| constraint_violation | 0.3 | +| ep_length | 175 | +| ep_return | 104 | +| ep_reward | 0.416 | +| mse | 172 | | time/ | | | progress | 0.27 | | step | 2.7e+05 | -| step_time | 10.3 | +| step_time | 12.2 | -------------------------------------- -2023-10-19 15:49:14,734 : Eval | ep_lengths 177.00 +/- 111.52 | ep_return 95.775 +/- 66.466 -2023-10-19 15:49:14,735 : +2023-10-27 17:52:03,574 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 155.128 +/- 54.311 +2023-10-27 17:52:03,586 : -------------------------------------- | loss/ | | -| approx_kl | 0.0277 | -| entropy_loss | -3.84 | -| policy_loss | -0.00335 | -| value_loss | 1.67 | +| approx_kl | 0.045 | +| entropy_loss | -3.58 | +| policy_loss | 0.00624 | +| value_loss | 0.567 | | stat/ | | -| constraint_violation | 180 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 124 | -| ep_reward | 0.499 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 185 | +| ep_reward | 0.739 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 177 | -| ep_return | 95.8 | -| ep_reward | 0.383 | -| mse | 218 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 155 | +| ep_reward | 0.621 | +| mse | 123 | | time/ | | | progress | 0.28 | | step | 2.8e+05 | -| step_time | 10.4 | +| step_time | 12.9 | -------------------------------------- -2023-10-19 15:51:17,581 : Eval | ep_lengths 202.80 +/- 94.43 | ep_return 100.790 +/- 51.291 -2023-10-19 15:51:17,583 : +2023-10-27 17:54:25,735 : Eval | ep_lengths 201.40 +/- 97.21 | ep_return 123.749 +/- 66.818 +2023-10-27 17:54:25,737 : -------------------------------------- | loss/ | | -| approx_kl | 0.031 | -| entropy_loss | -3.83 | -| policy_loss | -0.00196 | -| value_loss | 0.995 | +| approx_kl | 0.0325 | +| entropy_loss | -3.53 | +| policy_loss | -0.00984 | +| value_loss | 0.356 | | stat/ | | -| constraint_violation | 189 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 126 | -| ep_reward | 0.505 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 176 | +| ep_reward | 0.703 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 203 | -| ep_return | 101 | -| ep_reward | 0.403 | -| mse | 343 | +| ep_length | 201 | +| ep_return | 124 | +| ep_reward | 0.495 | +| mse | 160 | | time/ | | | progress | 0.29 | | step | 2.9e+05 | -| step_time | 10.3 | +| step_time | 12.8 | -------------------------------------- -2023-10-19 15:53:18,099 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 115.923 +/- 41.676 -2023-10-19 15:53:18,101 : +2023-10-27 17:56:51,028 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 118.483 +/- 41.952 +2023-10-27 17:56:51,029 : -------------------------------------- | loss/ | | -| approx_kl | 0.0253 | -| entropy_loss | -3.8 | -| policy_loss | -0.009 | -| value_loss | 0.702 | +| approx_kl | 0.032 | +| entropy_loss | -3.51 | +| policy_loss | 0.00361 | +| value_loss | 0.458 | | stat/ | | -| constraint_violation | 198 | +| constraint_violation | 2 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 150 | -| ep_reward | 0.601 | +| ep_return | 189 | +| ep_reward | 0.755 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 116 | -| ep_reward | 0.464 | -| mse | 325 | +| ep_return | 118 | +| ep_reward | 0.474 | +| mse | 297 | | time/ | | | progress | 0.3 | | step | 3e+05 | -| step_time | 10 | +| step_time | 11.4 | -------------------------------------- -2023-10-19 15:55:14,501 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 132.656 +/- 51.071 -2023-10-19 15:55:14,502 : +2023-10-27 17:59:26,689 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 135.651 +/- 16.958 +2023-10-27 17:59:26,691 : -------------------------------------- | loss/ | | -| approx_kl | 0.0264 | -| entropy_loss | -3.8 | -| policy_loss | -0.0119 | -| value_loss | 0.798 | +| approx_kl | 0.0313 | +| entropy_loss | -3.55 | +| policy_loss | -0.0072 | +| value_loss | 0.517 | | stat/ | | -| constraint_violation | 202 | +| constraint_violation | 2 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 162 | -| ep_reward | 0.647 | +| ep_return | 179 | +| ep_reward | 0.714 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 133 | -| ep_reward | 0.531 | -| mse | 263 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 136 | +| ep_reward | 0.543 | +| mse | 293 | | time/ | | | progress | 0.31 | | step | 3.1e+05 | -| step_time | 9.68 | +| step_time | 11.5 | -------------------------------------- -2023-10-19 15:57:13,215 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 133.846 +/- 23.206 -2023-10-19 15:57:13,216 : +2023-10-27 18:01:56,418 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.780 +/- 24.985 +2023-10-27 18:01:56,419 : -------------------------------------- | loss/ | | -| approx_kl | 0.0363 | -| entropy_loss | -3.84 | -| policy_loss | -0.0144 | -| value_loss | 0.732 | +| approx_kl | 0.0339 | +| entropy_loss | -3.5 | +| policy_loss | -0.0116 | +| value_loss | 0.413 | | stat/ | | -| constraint_violation | 209 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 138 | -| ep_reward | 0.575 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 192 | +| ep_reward | 0.766 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 134 | -| ep_reward | 0.535 | -| mse | 337 | +| ep_return | 147 | +| ep_reward | 0.587 | +| mse | 303 | | time/ | | | progress | 0.32 | | step | 3.2e+05 | -| step_time | 9.75 | +| step_time | 12.7 | -------------------------------------- -2023-10-19 15:59:07,348 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 118.936 +/- 45.809 -2023-10-19 15:59:07,350 : +2023-10-27 18:04:20,758 : Eval | ep_lengths 202.00 +/- 96.08 | ep_return 120.736 +/- 65.375 +2023-10-27 18:04:20,759 : -------------------------------------- | loss/ | | -| approx_kl | 0.0297 | -| entropy_loss | -3.84 | -| policy_loss | -0.0129 | -| value_loss | 0.705 | +| approx_kl | 0.0299 | +| entropy_loss | -3.47 | +| policy_loss | -0.00852 | +| value_loss | 0.28 | | stat/ | | -| constraint_violation | 212 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 131 | -| ep_reward | 0.528 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 185 | +| ep_reward | 0.74 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 119 | -| ep_reward | 0.476 | -| mse | 272 | +| constraint_violation | 0.2 | +| ep_length | 202 | +| ep_return | 121 | +| ep_reward | 0.483 | +| mse | 196 | | time/ | | | progress | 0.33 | | step | 3.3e+05 | -| step_time | 9.26 | +| step_time | 11.9 | -------------------------------------- -2023-10-19 16:01:01,546 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 122.034 +/- 44.782 -2023-10-19 16:01:01,547 : +2023-10-27 18:06:47,532 : Eval | ep_lengths 200.50 +/- 99.00 | ep_return 119.358 +/- 62.047 +2023-10-27 18:06:47,534 : -------------------------------------- | loss/ | | -| approx_kl | 0.041 | -| entropy_loss | -3.82 | -| policy_loss | -0.00211 | -| value_loss | 1.26 | +| approx_kl | 0.0279 | +| entropy_loss | -3.4 | +| policy_loss | -0.0173 | +| value_loss | 0.281 | | stat/ | | -| constraint_violation | 218 | -| ep_constraint_vio... | 0.3 | -| ep_length | 177 | -| ep_return | 95.8 | -| ep_reward | 0.387 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 184 | +| ep_reward | 0.735 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 122 | -| ep_reward | 0.488 | -| mse | 356 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 119 | +| ep_reward | 0.477 | +| mse | 212 | | time/ | | | progress | 0.34 | | step | 3.4e+05 | -| step_time | 9.96 | +| step_time | 12.1 | -------------------------------------- -2023-10-19 16:02:57,695 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 156.433 +/- 29.947 -2023-10-19 16:02:57,704 : +2023-10-27 18:09:16,961 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 149.130 +/- 36.094 +2023-10-27 18:09:16,963 : -------------------------------------- | loss/ | | -| approx_kl | 0.024 | -| entropy_loss | -3.8 | -| policy_loss | -0.00926 | -| value_loss | 0.697 | +| approx_kl | 0.034 | +| entropy_loss | -3.36 | +| policy_loss | -0.00307 | +| value_loss | 0.566 | | stat/ | | -| constraint_violation | 226 | -| ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 116 | -| ep_reward | 0.491 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 183 | +| ep_reward | 0.733 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 156 | -| ep_reward | 0.626 | -| mse | 176 | +| ep_return | 149 | +| ep_reward | 0.597 | +| mse | 325 | | time/ | | | progress | 0.35 | | step | 3.5e+05 | -| step_time | 9.51 | +| step_time | 12.3 | -------------------------------------- -2023-10-19 16:04:47,816 : Eval | ep_lengths 151.10 +/- 121.14 | ep_return 89.795 +/- 74.344 -2023-10-19 16:04:47,817 : +2023-10-27 18:11:44,600 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 134.938 +/- 51.511 +2023-10-27 18:11:44,602 : -------------------------------------- | loss/ | | -| approx_kl | 0.0311 | -| entropy_loss | -3.75 | -| policy_loss | -0.00161 | -| value_loss | 1.97 | +| approx_kl | 0.0348 | +| entropy_loss | -3.39 | +| policy_loss | -0.0021 | +| value_loss | 0.85 | | stat/ | | -| constraint_violation | 231 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 113 | -| ep_reward | 0.458 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 187 | +| ep_reward | 0.746 | | stat_eval/ | | -| constraint_violation | 0.4 | -| ep_length | 151 | -| ep_return | 89.8 | -| ep_reward | 0.359 | -| mse | 119 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 135 | +| ep_reward | 0.54 | +| mse | 227 | | time/ | | | progress | 0.36 | | step | 3.6e+05 | -| step_time | 9.53 | +| step_time | 12.3 | -------------------------------------- -2023-10-19 16:06:41,510 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 126.502 +/- 45.855 -2023-10-19 16:06:41,511 : +2023-10-27 18:14:10,098 : Eval | ep_lengths 202.30 +/- 95.41 | ep_return 125.657 +/- 65.734 +2023-10-27 18:14:10,099 : -------------------------------------- | loss/ | | -| approx_kl | 0.026 | -| entropy_loss | -3.79 | -| policy_loss | -0.00152 | -| value_loss | 0.488 | +| approx_kl | 0.0288 | +| entropy_loss | -3.4 | +| policy_loss | -0.0129 | +| value_loss | 0.348 | | stat/ | | -| constraint_violation | 235 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.614 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0.1 | +| ep_length | 225 | +| ep_return | 173 | +| ep_reward | 0.74 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | -| ep_return | 127 | -| ep_reward | 0.506 | -| mse | 220 | +| constraint_violation | 0.2 | +| ep_length | 202 | +| ep_return | 126 | +| ep_reward | 0.503 | +| mse | 173 | | time/ | | | progress | 0.37 | | step | 3.7e+05 | -| step_time | 9.72 | +| step_time | 12.8 | -------------------------------------- -2023-10-19 16:08:33,239 : Eval | ep_lengths 200.80 +/- 98.40 | ep_return 120.394 +/- 64.318 -2023-10-19 16:08:33,240 : +2023-10-27 18:16:36,296 : Eval | ep_lengths 201.00 +/- 98.02 | ep_return 117.147 +/- 63.794 +2023-10-27 18:16:36,297 : -------------------------------------- | loss/ | | -| approx_kl | 0.0287 | -| entropy_loss | -3.71 | -| policy_loss | -0.0087 | -| value_loss | 1.1 | +| approx_kl | 0.0351 | +| entropy_loss | -3.38 | +| policy_loss | -0.00459 | +| value_loss | 0.265 | | stat/ | | -| constraint_violation | 243 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 143 | -| ep_reward | 0.586 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 180 | +| ep_reward | 0.721 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 201 | -| ep_return | 120 | -| ep_reward | 0.482 | -| mse | 218 | +| ep_return | 117 | +| ep_reward | 0.469 | +| mse | 190 | | time/ | | | progress | 0.38 | | step | 3.8e+05 | -| step_time | 9.43 | +| step_time | 12.7 | -------------------------------------- -2023-10-19 16:10:22,602 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 118.351 +/- 61.109 -2023-10-19 16:10:22,627 : +2023-10-27 18:19:07,768 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 113.062 +/- 58.817 +2023-10-27 18:19:07,769 : -------------------------------------- | loss/ | | -| approx_kl | 0.0248 | -| entropy_loss | -3.64 | -| policy_loss | -0.00668 | -| value_loss | 1.08 | +| approx_kl | 0.0383 | +| entropy_loss | -3.39 | +| policy_loss | 0.00365 | +| value_loss | 0.236 | | stat/ | | -| constraint_violation | 246 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 133 | -| ep_reward | 0.531 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 191 | +| ep_reward | 0.762 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 200 | -| ep_return | 118 | -| ep_reward | 0.473 | -| mse | 178 | +| ep_return | 113 | +| ep_reward | 0.452 | +| mse | 270 | | time/ | | | progress | 0.39 | | step | 3.9e+05 | -| step_time | 9.19 | +| step_time | 11.2 | -------------------------------------- -2023-10-19 16:12:17,801 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 137.009 +/- 19.354 -2023-10-19 16:12:17,834 : +2023-10-27 18:21:31,060 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 130.823 +/- 50.931 +2023-10-27 18:21:31,061 : -------------------------------------- | loss/ | | -| approx_kl | 0.0223 | -| entropy_loss | -3.65 | -| policy_loss | -0.0151 | -| value_loss | 1.88 | +| approx_kl | 0.0218 | +| entropy_loss | -3.38 | +| policy_loss | 0.000803 | +| value_loss | 0.159 | | stat/ | | -| constraint_violation | 251 | -| ep_constraint_vio... | 0.1 | -| ep_length | 227 | -| ep_return | 141 | -| ep_reward | 0.566 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 137 | -| ep_reward | 0.548 | -| mse | 297 | -| time/ | | -| progress | 0.4 | -| step | 4e+05 | -| step_time | 9.46 | --------------------------------------- - -2023-10-19 16:14:12,858 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 119.745 +/- 44.475 -2023-10-19 16:14:12,860 : --------------------------------------- -| loss/ | | -| approx_kl | 0.0274 | -| entropy_loss | -3.67 | -| policy_loss | -0.0168 | -| value_loss | 2.81 | -| stat/ | | -| constraint_violation | 260 | -| ep_constraint_vio... | 0.3 | -| ep_length | 176 | -| ep_return | 98.1 | -| ep_reward | 0.474 | +| ep_return | 188 | +| ep_reward | 0.752 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 120 | -| ep_reward | 0.479 | -| mse | 218 | +| ep_length | 226 | +| ep_return | 131 | +| ep_reward | 0.523 | +| mse | 233 | | time/ | | -| progress | 0.41 | -| step | 4.1e+05 | -| step_time | 9.75 | +| progress | 0.4 | +| step | 4e+05 | +| step_time | 11.8 | -------------------------------------- -2023-10-19 16:16:08,742 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 145.670 +/- 21.933 -2023-10-19 16:16:08,744 : +2023-10-27 18:23:59,104 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 145.760 +/- 23.351 +2023-10-27 18:23:59,106 : -------------------------------------- | loss/ | | -| approx_kl | 0.0297 | -| entropy_loss | -3.7 | -| policy_loss | -0.0128 | -| value_loss | 1.93 | +| approx_kl | 0.0283 | +| entropy_loss | -3.34 | +| policy_loss | 0.0057 | +| value_loss | 0.19 | | stat/ | | -| constraint_violation | 266 | +| constraint_violation | 3 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 160 | -| ep_reward | 0.64 | +| ep_return | 185 | +| ep_reward | 0.74 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | | ep_return | 146 | | ep_reward | 0.583 | -| mse | 257 | +| mse | 241 | | time/ | | -| progress | 0.42 | -| step | 4.2e+05 | -| step_time | 9.31 | +| progress | 0.41 | +| step | 4.1e+05 | +| step_time | 13.9 | -------------------------------------- -2023-10-19 16:18:00,615 : Eval | ep_lengths 200.80 +/- 98.40 | ep_return 97.693 +/- 52.694 -2023-10-19 16:18:00,616 : +2023-10-27 18:26:20,899 : Eval | ep_lengths 176.70 +/- 111.97 | ep_return 107.412 +/- 72.228 +2023-10-27 18:26:20,901 : +--------------------------------------- +| loss/ | | +| approx_kl | 0.0411 | +| entropy_loss | -3.38 | +| policy_loss | -0.000266 | +| value_loss | 0.878 | +| stat/ | | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 183 | +| ep_reward | 0.732 | +| stat_eval/ | | +| constraint_violation | 0.3 | +| ep_length | 177 | +| ep_return | 107 | +| ep_reward | 0.43 | +| mse | 140 | +| time/ | | +| progress | 0.42 | +| step | 4.2e+05 | +| step_time | 11.9 | +--------------------------------------- + +2023-10-27 18:28:48,775 : Eval | ep_lengths 175.70 +/- 113.50 | ep_return 102.961 +/- 68.464 +2023-10-27 18:28:48,776 : -------------------------------------- | loss/ | | -| approx_kl | 0.0266 | -| entropy_loss | -3.7 | -| policy_loss | -0.0073 | -| value_loss | 2.3 | +| approx_kl | 0.0258 | +| entropy_loss | -3.42 | +| policy_loss | 0.00198 | +| value_loss | 0.309 | | stat/ | | -| constraint_violation | 270 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 117 | -| ep_reward | 0.487 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 191 | +| ep_reward | 0.763 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 97.7 | -| ep_reward | 0.391 | -| mse | 231 | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 103 | +| ep_reward | 0.412 | +| mse | 173 | | time/ | | | progress | 0.43 | | step | 4.3e+05 | -| step_time | 9.43 | +| step_time | 12.4 | -------------------------------------- -2023-10-19 16:19:52,125 : Eval | ep_lengths 201.60 +/- 96.82 | ep_return 111.683 +/- 57.507 -2023-10-19 16:19:52,126 : +2023-10-27 18:31:14,571 : Eval | ep_lengths 201.80 +/- 96.45 | ep_return 121.963 +/- 63.385 +2023-10-27 18:31:14,573 : -------------------------------------- | loss/ | | -| approx_kl | 0.0228 | -| entropy_loss | -3.67 | -| policy_loss | -0.0116 | -| value_loss | 0.429 | +| approx_kl | 0.0358 | +| entropy_loss | -3.33 | +| policy_loss | -0.0183 | +| value_loss | 0.123 | | stat/ | | -| constraint_violation | 280 | -| ep_constraint_vio... | 0.4 | -| ep_length | 152 | -| ep_return | 98.5 | -| ep_reward | 0.4 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 186 | +| ep_reward | 0.746 | | stat_eval/ | | | constraint_violation | 0.2 | | ep_length | 202 | -| ep_return | 112 | -| ep_reward | 0.447 | -| mse | 274 | +| ep_return | 122 | +| ep_reward | 0.488 | +| mse | 210 | | time/ | | | progress | 0.44 | | step | 4.4e+05 | -| step_time | 9.55 | +| step_time | 12.4 | -------------------------------------- -2023-10-19 16:21:43,161 : Eval | ep_lengths 176.40 +/- 112.45 | ep_return 99.006 +/- 66.628 -2023-10-19 16:21:43,163 : +2023-10-27 18:33:43,979 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 158.863 +/- 23.155 +2023-10-27 18:33:43,994 : -------------------------------------- | loss/ | | -| approx_kl | 0.0287 | -| entropy_loss | -3.68 | -| policy_loss | -0.00998 | -| value_loss | 1.44 | +| approx_kl | 0.0381 | +| entropy_loss | -3.34 | +| policy_loss | 0.00256 | +| value_loss | 0.255 | | stat/ | | -| constraint_violation | 286 | -| ep_constraint_vio... | 0.2 | -| ep_length | 203 | -| ep_return | 119 | -| ep_reward | 0.476 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 195 | +| ep_reward | 0.778 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 176 | -| ep_return | 99 | -| ep_reward | 0.396 | -| mse | 178 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 159 | +| ep_reward | 0.635 | +| mse | 200 | | time/ | | | progress | 0.45 | | step | 4.5e+05 | -| step_time | 9.38 | +| step_time | 11.2 | -------------------------------------- -2023-10-19 16:23:37,504 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 136.882 +/- 49.319 -2023-10-19 16:23:37,542 : +2023-10-27 18:36:16,075 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 151.856 +/- 55.495 +2023-10-27 18:36:16,077 : -------------------------------------- | loss/ | | -| approx_kl | 0.0156 | -| entropy_loss | -3.68 | -| policy_loss | -0.0303 | -| value_loss | 0.86 | +| approx_kl | 0.0253 | +| entropy_loss | -3.27 | +| policy_loss | -0.00812 | +| value_loss | 0.31 | | stat/ | | -| constraint_violation | 292 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 132 | -| ep_reward | 0.529 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 187 | +| ep_reward | 0.746 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 137 | -| ep_reward | 0.548 | -| mse | 164 | +| ep_return | 152 | +| ep_reward | 0.607 | +| mse | 100 | | time/ | | | progress | 0.46 | | step | 4.6e+05 | -| step_time | 9.65 | +| step_time | 12.8 | -------------------------------------- -2023-10-19 16:25:34,085 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 139.931 +/- 20.317 -2023-10-19 16:25:34,086 : --------------------------------------- -| loss/ | | -| approx_kl | 0.031 | -| entropy_loss | -3.64 | -| policy_loss | -0.0128 | -| value_loss | 0.562 | -| stat/ | | -| constraint_violation | 296 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 140 | -| ep_reward | 0.561 | -| stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 140 | -| ep_reward | 0.56 | -| mse | 214 | -| time/ | | -| progress | 0.47 | -| step | 4.7e+05 | -| step_time | 9.66 | --------------------------------------- +2023-10-27 18:38:40,578 : Eval | ep_lengths 226.20 +/- 71.40 | ep_return 125.772 +/- 44.797 +2023-10-27 18:38:40,579 : +--------------------------------------- +| loss/ | | +| approx_kl | 0.0354 | +| entropy_loss | -3.26 | +| policy_loss | -0.000706 | +| value_loss | 0.205 | +| stat/ | | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 191 | +| ep_reward | 0.764 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 126 | +| ep_reward | 0.503 | +| mse | 242 | +| time/ | | +| progress | 0.47 | +| step | 4.7e+05 | +| step_time | 12.1 | +--------------------------------------- -2023-10-19 16:27:30,390 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 135.750 +/- 17.961 -2023-10-19 16:27:30,391 : --------------------------------------- -| loss/ | | -| approx_kl | 0.0225 | -| entropy_loss | -3.7 | -| policy_loss | -0.0119 | -| value_loss | 1.4 | -| stat/ | | -| constraint_violation | 304 | -| ep_constraint_vio... | 0.4 | -| ep_length | 152 | -| ep_return | 88.2 | -| ep_reward | 0.382 | -| stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 136 | -| ep_reward | 0.543 | -| mse | 305 | -| time/ | | -| progress | 0.48 | -| step | 4.8e+05 | -| step_time | 9.7 | --------------------------------------- +2023-10-27 18:40:59,520 : Eval | ep_lengths 176.30 +/- 112.59 | ep_return 108.854 +/- 73.426 +2023-10-27 18:40:59,522 : +--------------------------------------- +| loss/ | | +| approx_kl | 0.0293 | +| entropy_loss | -3.28 | +| policy_loss | -0.000908 | +| value_loss | 0.281 | +| stat/ | | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 186 | +| ep_reward | 0.742 | +| stat_eval/ | | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 109 | +| ep_reward | 0.435 | +| mse | 159 | +| time/ | | +| progress | 0.48 | +| step | 4.8e+05 | +| step_time | 13.6 | +--------------------------------------- -2023-10-19 16:29:23,687 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 133.883 +/- 47.129 -2023-10-19 16:29:23,688 : +2023-10-27 18:43:31,713 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 122.980 +/- 50.625 +2023-10-27 18:43:31,714 : -------------------------------------- | loss/ | | -| approx_kl | 0.0301 | -| entropy_loss | -3.72 | -| policy_loss | -0.0154 | -| value_loss | 2.32 | +| approx_kl | 0.0268 | +| entropy_loss | -3.3 | +| policy_loss | -0.00473 | +| value_loss | 0.62 | | stat/ | | -| constraint_violation | 310 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 141 | -| ep_reward | 0.593 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 175 | +| ep_reward | 0.699 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 134 | -| ep_reward | 0.536 | -| mse | 198 | +| ep_length | 226 | +| ep_return | 123 | +| ep_reward | 0.492 | +| mse | 327 | | time/ | | | progress | 0.49 | | step | 4.9e+05 | -| step_time | 9.64 | +| step_time | 12.7 | -------------------------------------- -2023-10-19 16:31:17,711 : Eval | ep_lengths 226.80 +/- 69.60 | ep_return 131.704 +/- 48.849 -2023-10-19 16:31:17,713 : +2023-10-27 18:46:07,343 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 141.135 +/- 12.514 +2023-10-27 18:46:07,345 : -------------------------------------- | loss/ | | -| approx_kl | 0.0347 | -| entropy_loss | -3.69 | -| policy_loss | -0.00405 | -| value_loss | 2.1 | +| approx_kl | 0.0312 | +| entropy_loss | -3.28 | +| policy_loss | -0.0144 | +| value_loss | 0.373 | | stat/ | | -| constraint_violation | 321 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 147 | -| ep_reward | 0.593 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 181 | +| ep_reward | 0.722 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | -| ep_return | 132 | -| ep_reward | 0.527 | -| mse | 232 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 141 | +| ep_reward | 0.565 | +| mse | 262 | | time/ | | | progress | 0.5 | | step | 5e+05 | -| step_time | 9.27 | +| step_time | 14.8 | -------------------------------------- -2023-10-19 16:33:13,412 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 137.717 +/- 11.520 -2023-10-19 16:33:13,414 : +2023-10-27 18:48:36,609 : Eval | ep_lengths 200.50 +/- 99.00 | ep_return 124.891 +/- 66.395 +2023-10-27 18:48:36,611 : -------------------------------------- | loss/ | | -| approx_kl | 0.0397 | -| entropy_loss | -3.65 | -| policy_loss | 0.0158 | -| value_loss | 0.808 | +| approx_kl | 0.0302 | +| entropy_loss | -3.24 | +| policy_loss | -0.013 | +| value_loss | 0.28 | | stat/ | | -| constraint_violation | 328 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 152 | -| ep_reward | 0.607 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 138 | -| ep_reward | 0.551 | -| mse | 284 | +| ep_return | 182 | +| ep_reward | 0.727 | +| stat_eval/ | | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 125 | +| ep_reward | 0.5 | +| mse | 138 | | time/ | | | progress | 0.51 | | step | 5.1e+05 | -| step_time | 9.45 | +| step_time | 12.8 | -------------------------------------- -2023-10-19 16:34:55,179 : Eval | ep_lengths 153.70 +/- 117.96 | ep_return 93.436 +/- 77.554 -2023-10-19 16:34:55,180 : +2023-10-27 18:51:02,882 : Eval | ep_lengths 175.90 +/- 113.20 | ep_return 116.365 +/- 78.891 +2023-10-27 18:51:02,883 : -------------------------------------- | loss/ | | -| approx_kl | 0.0194 | -| entropy_loss | -3.67 | -| policy_loss | -0.0135 | -| value_loss | 1.05 | +| approx_kl | 0.0306 | +| entropy_loss | -3.2 | +| policy_loss | -0.00734 | +| value_loss | 0.178 | | stat/ | | -| constraint_violation | 334 | -| ep_constraint_vio... | 0.3 | -| ep_length | 176 | -| ep_return | 104 | -| ep_reward | 0.419 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 186 | +| ep_reward | 0.744 | | stat_eval/ | | -| constraint_violation | 0.4 | -| ep_length | 154 | -| ep_return | 93.4 | -| ep_reward | 0.374 | -| mse | 123 | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 116 | +| ep_reward | 0.465 | +| mse | 120 | | time/ | | | progress | 0.52 | | step | 5.2e+05 | -| step_time | 8.86 | +| step_time | 15 | -------------------------------------- -2023-10-19 16:36:42,948 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 138.862 +/- 12.722 -2023-10-19 16:36:42,949 : +2023-10-27 18:53:36,775 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.940 +/- 23.832 +2023-10-27 18:53:36,776 : -------------------------------------- | loss/ | | -| approx_kl | 0.0311 | -| entropy_loss | -3.6 | -| policy_loss | -0.00561 | -| value_loss | 1.15 | +| approx_kl | 0.0341 | +| entropy_loss | -3.17 | +| policy_loss | -0.00446 | +| value_loss | 0.152 | | stat/ | | -| constraint_violation | 340 | -| ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 124 | -| ep_reward | 0.496 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 173 | +| ep_reward | 0.694 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 139 | -| ep_reward | 0.555 | -| mse | 291 | +| ep_return | 147 | +| ep_reward | 0.588 | +| mse | 292 | | time/ | | | progress | 0.53 | | step | 5.3e+05 | -| step_time | 8.84 | +| step_time | 12.4 | -------------------------------------- -2023-10-19 16:38:24,581 : Eval | ep_lengths 176.40 +/- 112.44 | ep_return 110.961 +/- 75.030 -2023-10-19 16:38:24,582 : +2023-10-27 18:55:57,968 : Eval | ep_lengths 175.60 +/- 113.65 | ep_return 110.053 +/- 74.568 +2023-10-27 18:55:57,970 : -------------------------------------- | loss/ | | -| approx_kl | 0.0338 | -| entropy_loss | -3.67 | -| policy_loss | -0.00669 | -| value_loss | 0.394 | +| approx_kl | 0.0345 | +| entropy_loss | -3.18 | +| policy_loss | 0.00436 | +| value_loss | 0.185 | | stat/ | | -| constraint_violation | 346 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 155 | -| ep_reward | 0.622 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 182 | +| ep_reward | 0.727 | | stat_eval/ | | | constraint_violation | 0.3 | | ep_length | 176 | -| ep_return | 111 | -| ep_reward | 0.444 | -| mse | 120 | +| ep_return | 110 | +| ep_reward | 0.44 | +| mse | 167 | | time/ | | | progress | 0.54 | | step | 5.4e+05 | -| step_time | 8.65 | +| step_time | 13.8 | -------------------------------------- -2023-10-19 16:40:11,302 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.799 +/- 23.217 -2023-10-19 16:40:11,304 : +2023-10-27 18:58:25,228 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 131.324 +/- 49.274 +2023-10-27 18:58:25,229 : -------------------------------------- | loss/ | | -| approx_kl | 0.0298 | -| entropy_loss | -3.68 | -| policy_loss | -0.0113 | -| value_loss | 1.64 | +| approx_kl | 0.0296 | +| entropy_loss | -3.2 | +| policy_loss | -0.0139 | +| value_loss | 0.154 | | stat/ | | -| constraint_violation | 353 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 143 | -| ep_reward | 0.577 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.611 | -| mse | 253 | +| ep_return | 180 | +| ep_reward | 0.72 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 131 | +| ep_reward | 0.525 | +| mse | 269 | | time/ | | | progress | 0.55 | | step | 5.5e+05 | -| step_time | 8.65 | +| step_time | 13.2 | -------------------------------------- -2023-10-19 16:41:54,361 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 151.165 +/- 16.086 -2023-10-19 16:41:54,362 : +2023-10-27 19:00:50,313 : Eval | ep_lengths 201.60 +/- 96.80 | ep_return 137.722 +/- 75.220 +2023-10-27 19:00:50,315 : -------------------------------------- | loss/ | | -| approx_kl | 0.0263 | -| entropy_loss | -3.68 | -| policy_loss | -0.00715 | -| value_loss | 0.809 | +| approx_kl | 0.0295 | +| entropy_loss | -3.21 | +| policy_loss | 0.00345 | +| value_loss | 0.227 | | stat/ | | -| constraint_violation | 358 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 141 | -| ep_reward | 0.565 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 151 | -| ep_reward | 0.605 | -| mse | 225 | +| ep_return | 181 | +| ep_reward | 0.725 | +| stat_eval/ | | +| constraint_violation | 0.2 | +| ep_length | 202 | +| ep_return | 138 | +| ep_reward | 0.551 | +| mse | 118 | | time/ | | | progress | 0.56 | | step | 5.6e+05 | -| step_time | 8.49 | +| step_time | 11.9 | -------------------------------------- -2023-10-19 16:43:35,775 : Eval | ep_lengths 226.70 +/- 69.90 | ep_return 136.758 +/- 48.887 -2023-10-19 16:43:35,776 : +2023-10-27 19:03:22,336 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 139.247 +/- 16.222 +2023-10-27 19:03:22,337 : -------------------------------------- | loss/ | | -| approx_kl | 0.0231 | -| entropy_loss | -3.7 | -| policy_loss | -0.00861 | -| value_loss | 0.555 | +| approx_kl | 0.0377 | +| entropy_loss | -3.21 | +| policy_loss | -0.0151 | +| value_loss | 0.21 | | stat/ | | -| constraint_violation | 365 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 144 | -| ep_reward | 0.577 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 185 | +| ep_reward | 0.739 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | -| ep_return | 137 | -| ep_reward | 0.547 | -| mse | 236 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 139 | +| ep_reward | 0.557 | +| mse | 291 | | time/ | | | progress | 0.57 | | step | 5.7e+05 | -| step_time | 8.46 | +| step_time | 14.3 | -------------------------------------- -2023-10-19 16:45:14,627 : Eval | ep_lengths 200.20 +/- 99.60 | ep_return 129.361 +/- 67.914 -2023-10-19 16:45:14,628 : +2023-10-27 19:05:45,845 : Eval | ep_lengths 178.30 +/- 109.54 | ep_return 92.289 +/- 63.370 +2023-10-27 19:05:45,846 : -------------------------------------- | loss/ | | -| approx_kl | 0.0307 | -| entropy_loss | -3.64 | -| policy_loss | -0.00969 | -| value_loss | 0.688 | +| approx_kl | 0.0257 | +| entropy_loss | -3.24 | +| policy_loss | -0.00639 | +| value_loss | 0.116 | | stat/ | | -| constraint_violation | 369 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 129 | -| ep_reward | 0.517 | -| stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 129 | -| ep_reward | 0.517 | -| mse | 134 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 180 | +| ep_reward | 0.721 | +| stat_eval/ | | +| constraint_violation | 0.3 | +| ep_length | 178 | +| ep_return | 92.3 | +| ep_reward | 0.369 | +| mse | 296 | | time/ | | | progress | 0.58 | | step | 5.8e+05 | -| step_time | 8.44 | +| step_time | 12.6 | -------------------------------------- -2023-10-19 16:46:57,485 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.993 +/- 20.213 -2023-10-19 16:46:57,486 : +2023-10-27 19:08:09,225 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 113.904 +/- 60.092 +2023-10-27 19:08:09,227 : -------------------------------------- | loss/ | | -| approx_kl | 0.0308 | -| entropy_loss | -3.62 | -| policy_loss | -0.0166 | -| value_loss | 2.55 | +| approx_kl | 0.0368 | +| entropy_loss | -3.24 | +| policy_loss | -0.00604 | +| value_loss | 0.367 | | stat/ | | -| constraint_violation | 375 | -| ep_constraint_vio... | 0.4 | -| ep_length | 153 | -| ep_return | 103 | -| ep_reward | 0.426 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 154 | -| ep_reward | 0.616 | -| mse | 216 | +| ep_return | 193 | +| ep_reward | 0.772 | +| stat_eval/ | | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 114 | +| ep_reward | 0.456 | +| mse | 263 | | time/ | | | progress | 0.59 | | step | 5.9e+05 | -| step_time | 8.53 | +| step_time | 13 | -------------------------------------- -2023-10-19 16:48:40,369 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 141.296 +/- 50.121 -2023-10-19 16:48:40,370 : +2023-10-27 19:10:36,493 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 142.276 +/- 30.625 +2023-10-27 19:10:36,495 : -------------------------------------- | loss/ | | -| approx_kl | 0.0404 | -| entropy_loss | -3.69 | -| policy_loss | -0.00499 | -| value_loss | 3.57 | +| approx_kl | 0.0301 | +| entropy_loss | -3.24 | +| policy_loss | -0.0163 | +| value_loss | 0.226 | | stat/ | | -| constraint_violation | 387 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 126 | -| ep_reward | 0.623 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 179 | +| ep_reward | 0.717 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 141 | -| ep_reward | 0.565 | -| mse | 187 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 142 | +| ep_reward | 0.569 | +| mse | 290 | | time/ | | | progress | 0.6 | | step | 6e+05 | -| step_time | 8.41 | +| step_time | 12.3 | -------------------------------------- -2023-10-19 16:50:24,806 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 145.188 +/- 26.861 -2023-10-19 16:50:24,807 : +2023-10-27 19:13:03,967 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.767 +/- 21.417 +2023-10-27 19:13:03,969 : -------------------------------------- | loss/ | | -| approx_kl | 0.0263 | -| entropy_loss | -3.65 | -| policy_loss | -0.00863 | -| value_loss | 1.79 | +| approx_kl | 0.0308 | +| entropy_loss | -3.25 | +| policy_loss | -0.0135 | +| value_loss | 0.155 | | stat/ | | -| constraint_violation | 393 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 162 | -| ep_reward | 0.649 | +| ep_return | 182 | +| ep_reward | 0.729 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 145 | -| ep_reward | 0.581 | -| mse | 295 | +| ep_return | 153 | +| ep_reward | 0.611 | +| mse | 254 | | time/ | | | progress | 0.61 | | step | 6.1e+05 | -| step_time | 8.47 | +| step_time | 11.5 | -------------------------------------- -2023-10-19 16:52:07,360 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 120.617 +/- 43.454 -2023-10-19 16:52:07,361 : +2023-10-27 19:15:28,938 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 133.935 +/- 48.418 +2023-10-27 19:15:28,939 : -------------------------------------- | loss/ | | -| approx_kl | 0.0246 | -| entropy_loss | -3.63 | -| policy_loss | -0.0122 | -| value_loss | 0.775 | +| approx_kl | 0.033 | +| entropy_loss | -3.21 | +| policy_loss | -0.00102 | +| value_loss | 0.202 | | stat/ | | -| constraint_violation | 395 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 143 | -| ep_reward | 0.571 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 180 | +| ep_reward | 0.719 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 121 | -| ep_reward | 0.482 | -| mse | 234 | +| ep_return | 134 | +| ep_reward | 0.536 | +| mse | 222 | | time/ | | | progress | 0.62 | | step | 6.2e+05 | -| step_time | 8.49 | +| step_time | 13.4 | -------------------------------------- -2023-10-19 16:53:51,333 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 149.257 +/- 21.570 -2023-10-19 16:53:51,334 : +2023-10-27 19:17:56,267 : Eval | ep_lengths 225.50 +/- 73.50 | ep_return 137.138 +/- 48.393 +2023-10-27 19:17:56,269 : -------------------------------------- | loss/ | | -| approx_kl | 0.0333 | -| entropy_loss | -3.61 | -| policy_loss | -0.0124 | -| value_loss | 0.938 | +| approx_kl | 0.0289 | +| entropy_loss | -3.17 | +| policy_loss | -0.00862 | +| value_loss | 0.287 | | stat/ | | -| constraint_violation | 401 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 129 | -| ep_reward | 0.517 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 149 | -| ep_reward | 0.597 | -| mse | 227 | +| ep_return | 179 | +| ep_reward | 0.715 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 137 | +| ep_reward | 0.549 | +| mse | 212 | | time/ | | | progress | 0.63 | | step | 6.3e+05 | -| step_time | 8.92 | +| step_time | 13.8 | -------------------------------------- -2023-10-19 16:55:35,653 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 164.287 +/- 21.302 -2023-10-19 16:55:35,664 : +2023-10-27 19:20:25,487 : Eval | ep_lengths 227.60 +/- 67.20 | ep_return 137.035 +/- 51.549 +2023-10-27 19:20:25,488 : -------------------------------------- | loss/ | | -| approx_kl | 0.032 | -| entropy_loss | -3.58 | -| policy_loss | 0.00215 | -| value_loss | 1.42 | +| approx_kl | 0.0314 | +| entropy_loss | -3.17 | +| policy_loss | -0.00581 | +| value_loss | 0.077 | | stat/ | | -| constraint_violation | 409 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 125 | -| ep_reward | 0.502 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 164 | -| ep_reward | 0.657 | -| mse | 128 | +| ep_return | 194 | +| ep_reward | 0.777 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 228 | +| ep_return | 137 | +| ep_reward | 0.548 | +| mse | 184 | | time/ | | | progress | 0.64 | | step | 6.4e+05 | -| step_time | 8.58 | +| step_time | 14.5 | -------------------------------------- -2023-10-19 16:57:11,752 : Eval | ep_lengths 176.80 +/- 111.88 | ep_return 102.161 +/- 68.474 -2023-10-19 16:57:11,753 : +2023-10-27 19:22:57,682 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 151.532 +/- 27.194 +2023-10-27 19:22:57,684 : -------------------------------------- | loss/ | | -| approx_kl | 0.0401 | -| entropy_loss | -3.59 | -| policy_loss | 0.00402 | -| value_loss | 1.07 | +| approx_kl | 0.028 | +| entropy_loss | -3.15 | +| policy_loss | -0.0104 | +| value_loss | 0.134 | | stat/ | | -| constraint_violation | 415 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 125 | -| ep_reward | 0.503 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 182 | +| ep_reward | 0.729 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 177 | -| ep_return | 102 | -| ep_reward | 0.409 | -| mse | 166 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 152 | +| ep_reward | 0.606 | +| mse | 305 | | time/ | | | progress | 0.65 | | step | 6.5e+05 | -| step_time | 8.5 | +| step_time | 13.7 | -------------------------------------- -2023-10-19 16:58:49,901 : Eval | ep_lengths 176.30 +/- 112.59 | ep_return 103.952 +/- 72.293 -2023-10-19 16:58:49,902 : +2023-10-27 19:25:22,613 : Eval | ep_lengths 200.40 +/- 99.20 | ep_return 116.728 +/- 60.942 +2023-10-27 19:25:22,614 : -------------------------------------- | loss/ | | -| approx_kl | 0.0336 | -| entropy_loss | -3.59 | -| policy_loss | -0.00259 | -| value_loss | 1.03 | +| approx_kl | 0.027 | +| entropy_loss | -3.07 | +| policy_loss | -0.0119 | +| value_loss | 0.17 | | stat/ | | -| constraint_violation | 420 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 132 | -| ep_reward | 0.528 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 184 | +| ep_reward | 0.734 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 176 | -| ep_return | 104 | -| ep_reward | 0.416 | -| mse | 161 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 117 | +| ep_reward | 0.467 | +| mse | 191 | | time/ | | | progress | 0.66 | | step | 6.6e+05 | -| step_time | 8.58 | +| step_time | 11.9 | -------------------------------------- -2023-10-19 17:00:29,417 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 140.056 +/- 50.724 -2023-10-19 17:00:29,418 : +2023-10-27 19:27:46,628 : Eval | ep_lengths 200.60 +/- 98.80 | ep_return 120.379 +/- 62.062 +2023-10-27 19:27:46,629 : -------------------------------------- | loss/ | | -| approx_kl | 0.0347 | -| entropy_loss | -3.61 | -| policy_loss | 0.0011 | -| value_loss | 0.314 | +| approx_kl | 0.0288 | +| entropy_loss | -3.12 | +| policy_loss | -0.00871 | +| value_loss | 0.112 | | stat/ | | -| constraint_violation | 428 | -| ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 124 | -| ep_reward | 0.504 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 190 | +| ep_reward | 0.758 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 140 | -| ep_reward | 0.56 | -| mse | 210 | +| constraint_violation | 0.2 | +| ep_length | 201 | +| ep_return | 120 | +| ep_reward | 0.482 | +| mse | 222 | | time/ | | | progress | 0.67 | | step | 6.7e+05 | -| step_time | 8.25 | +| step_time | 11.4 | -------------------------------------- -2023-10-19 17:02:11,722 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 155.717 +/- 18.971 -2023-10-19 17:02:11,723 : +2023-10-27 19:30:20,372 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.290 +/- 24.931 +2023-10-27 19:30:20,373 : -------------------------------------- | loss/ | | -| approx_kl | 0.0343 | -| entropy_loss | -3.58 | -| policy_loss | -0.00436 | -| value_loss | 0.811 | +| approx_kl | 0.0201 | +| entropy_loss | -3.1 | +| policy_loss | -0.0234 | +| value_loss | 0.133 | | stat/ | | -| constraint_violation | 433 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 132 | -| ep_reward | 0.542 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 185 | +| ep_reward | 0.74 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 156 | -| ep_reward | 0.623 | -| mse | 197 | +| ep_return | 153 | +| ep_reward | 0.613 | +| mse | 296 | | time/ | | | progress | 0.68 | | step | 6.8e+05 | -| step_time | 8.58 | +| step_time | 11.2 | -------------------------------------- -2023-10-19 17:03:50,019 : Eval | ep_lengths 201.90 +/- 96.21 | ep_return 118.285 +/- 61.814 -2023-10-19 17:03:50,020 : +2023-10-27 19:32:51,161 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 152.207 +/- 58.839 +2023-10-27 19:32:51,162 : +--------------------------------------- +| loss/ | | +| approx_kl | 0.0298 | +| entropy_loss | -3.08 | +| policy_loss | -0.000741 | +| value_loss | 0.209 | +| stat/ | | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 190 | +| ep_reward | 0.76 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 152 | +| ep_reward | 0.609 | +| mse | 136 | +| time/ | | +| progress | 0.69 | +| step | 6.9e+05 | +| step_time | 13.3 | +--------------------------------------- + +2023-10-27 19:35:23,416 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 148.858 +/- 22.650 +2023-10-27 19:35:23,418 : -------------------------------------- | loss/ | | -| approx_kl | 0.031 | -| entropy_loss | -3.57 | -| policy_loss | -0.00483 | -| value_loss | 1.12 | +| approx_kl | 0.0284 | +| entropy_loss | -3.1 | +| policy_loss | -0.0124 | +| value_loss | 0.408 | | stat/ | | -| constraint_violation | 433 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 157 | -| ep_reward | 0.626 | +| ep_return | 182 | +| ep_reward | 0.728 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 202 | -| ep_return | 118 | -| ep_reward | 0.473 | -| mse | 212 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 149 | +| ep_reward | 0.595 | +| mse | 238 | | time/ | | -| progress | 0.69 | -| step | 6.9e+05 | -| step_time | 8.47 | +| progress | 0.7 | +| step | 7e+05 | +| step_time | 13.9 | -------------------------------------- -2023-10-19 17:05:30,139 : Eval | ep_lengths 227.50 +/- 67.50 | ep_return 137.756 +/- 48.341 -2023-10-19 17:05:30,140 : +2023-10-27 19:37:47,158 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 139.013 +/- 53.978 +2023-10-27 19:37:47,160 : -------------------------------------- | loss/ | | -| approx_kl | 0.0163 | -| entropy_loss | -3.53 | -| policy_loss | -0.00745 | -| value_loss | 0.403 | +| approx_kl | 0.0368 | +| entropy_loss | -3.13 | +| policy_loss | -0.00737 | +| value_loss | 0.167 | | stat/ | | -| constraint_violation | 437 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 114 | -| ep_reward | 0.47 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 193 | +| ep_reward | 0.773 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 228 | -| ep_return | 138 | -| ep_reward | 0.551 | -| mse | 205 | +| ep_length | 225 | +| ep_return | 139 | +| ep_reward | 0.556 | +| mse | 240 | | time/ | | -| progress | 0.7 | -| step | 7e+05 | -| step_time | 8.25 | +| progress | 0.71 | +| step | 7.1e+05 | +| step_time | 11.9 | -------------------------------------- -2023-10-19 17:07:09,232 : Eval | ep_lengths 226.20 +/- 71.40 | ep_return 152.704 +/- 53.553 -2023-10-19 17:07:09,233 : +2023-10-27 19:40:13,029 : Eval | ep_lengths 200.80 +/- 98.41 | ep_return 118.331 +/- 61.897 +2023-10-27 19:40:13,031 : -------------------------------------- | loss/ | | -| approx_kl | 0.0338 | -| entropy_loss | -3.48 | -| policy_loss | -0.00148 | -| value_loss | 1 | +| approx_kl | 0.0212 | +| entropy_loss | -3.09 | +| policy_loss | -0.00399 | +| value_loss | 0.218 | | stat/ | | -| constraint_violation | 445 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 146 | -| ep_reward | 0.585 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 179 | +| ep_reward | 0.717 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 153 | -| ep_reward | 0.611 | -| mse | 125 | +| constraint_violation | 0.2 | +| ep_length | 201 | +| ep_return | 118 | +| ep_reward | 0.473 | +| mse | 191 | | time/ | | -| progress | 0.71 | -| step | 7.1e+05 | -| step_time | 8.28 | +| progress | 0.72 | +| step | 7.2e+05 | +| step_time | 12.2 | -------------------------------------- -2023-10-19 17:08:45,153 : Eval | ep_lengths 175.80 +/- 113.35 | ep_return 92.034 +/- 62.563 -2023-10-19 17:08:45,154 : ---------------------------------------- -| loss/ | | -| approx_kl | 0.0372 | -| entropy_loss | -3.47 | -| policy_loss | -0.000493 | -| value_loss | 1.31 | -| stat/ | | -| constraint_violation | 447 | -| ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 159 | -| ep_reward | 0.637 | -| stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 176 | -| ep_return | 92 | -| ep_reward | 0.368 | -| mse | 177 | -| time/ | | -| progress | 0.72 | -| step | 7.2e+05 | -| step_time | 8.28 | ---------------------------------------- - -2023-10-19 17:10:23,391 : Eval | ep_lengths 202.60 +/- 94.84 | ep_return 114.232 +/- 59.751 -2023-10-19 17:10:23,392 : +2023-10-27 19:42:41,406 : Eval | ep_lengths 226.70 +/- 69.90 | ep_return 145.249 +/- 55.640 +2023-10-27 19:42:41,407 : -------------------------------------- | loss/ | | -| approx_kl | 0.0367 | -| entropy_loss | -3.49 | -| policy_loss | 0.00187 | -| value_loss | 1.26 | +| approx_kl | 0.037 | +| entropy_loss | -3.09 | +| policy_loss | -0.00609 | +| value_loss | 0.387 | | stat/ | | -| constraint_violation | 452 | -| ep_constraint_vio... | 0.2 | -| ep_length | 202 | -| ep_return | 126 | -| ep_reward | 0.566 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 183 | +| ep_reward | 0.732 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 203 | -| ep_return | 114 | -| ep_reward | 0.457 | -| mse | 258 | +| constraint_violation | 0.1 | +| ep_length | 227 | +| ep_return | 145 | +| ep_reward | 0.581 | +| mse | 176 | | time/ | | | progress | 0.73 | | step | 7.3e+05 | -| step_time | 8.29 | +| step_time | 13 | -------------------------------------- -2023-10-19 17:12:03,959 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.878 +/- 20.996 -2023-10-19 17:12:03,960 : +2023-10-27 19:45:11,503 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 138.933 +/- 27.125 +2023-10-27 19:45:11,505 : -------------------------------------- | loss/ | | -| approx_kl | 0.0361 | -| entropy_loss | -3.51 | -| policy_loss | 0.00208 | -| value_loss | 0.767 | +| approx_kl | 0.0232 | +| entropy_loss | -3.06 | +| policy_loss | -0.0108 | +| value_loss | 0.375 | | stat/ | | -| constraint_violation | 457 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 133 | -| ep_reward | 0.533 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 177 | +| ep_reward | 0.71 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 147 | -| ep_reward | 0.588 | -| mse | 268 | +| ep_return | 139 | +| ep_reward | 0.556 | +| mse | 264 | | time/ | | | progress | 0.74 | | step | 7.4e+05 | -| step_time | 8.19 | +| step_time | 13.9 | -------------------------------------- -2023-10-19 17:13:43,675 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 130.812 +/- 45.457 -2023-10-19 17:13:43,676 : +2023-10-27 19:47:38,502 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 131.668 +/- 46.994 +2023-10-27 19:47:38,504 : -------------------------------------- | loss/ | | -| approx_kl | 0.0338 | -| entropy_loss | -3.5 | -| policy_loss | -0.00812 | -| value_loss | 1.08 | +| approx_kl | 0.0352 | +| entropy_loss | -3.07 | +| policy_loss | -0.0202 | +| value_loss | 0.197 | | stat/ | | -| constraint_violation | 464 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 129 | -| ep_reward | 0.516 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 190 | +| ep_reward | 0.76 | | stat_eval/ | | | constraint_violation | 0.1 | | ep_length | 225 | -| ep_return | 131 | -| ep_reward | 0.523 | -| mse | 264 | +| ep_return | 132 | +| ep_reward | 0.527 | +| mse | 236 | | time/ | | | progress | 0.75 | | step | 7.5e+05 | -| step_time | 8.3 | +| step_time | 11.2 | -------------------------------------- -2023-10-19 17:15:20,771 : Eval | ep_lengths 176.90 +/- 111.73 | ep_return 99.827 +/- 66.307 -2023-10-19 17:15:20,772 : +2023-10-27 19:50:08,139 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 130.045 +/- 52.546 +2023-10-27 19:50:08,141 : -------------------------------------- | loss/ | | -| approx_kl | 0.0297 | -| entropy_loss | -3.52 | -| policy_loss | -0.00995 | -| value_loss | 1.68 | +| approx_kl | 0.0383 | +| entropy_loss | -3.08 | +| policy_loss | 0.01 | +| value_loss | 0.251 | | stat/ | | -| constraint_violation | 472 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 141 | -| ep_reward | 0.572 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 181 | +| ep_reward | 0.724 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 177 | -| ep_return | 99.8 | -| ep_reward | 0.399 | -| mse | 233 | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 130 | +| ep_reward | 0.52 | +| mse | 251 | | time/ | | | progress | 0.76 | | step | 7.6e+05 | -| step_time | 8.47 | +| step_time | 12.9 | -------------------------------------- -2023-10-19 17:17:02,357 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 144.975 +/- 17.459 -2023-10-19 17:17:02,358 : +2023-10-27 19:52:37,312 : Eval | ep_lengths 201.30 +/- 97.41 | ep_return 122.393 +/- 62.941 +2023-10-27 19:52:37,314 : -------------------------------------- | loss/ | | -| approx_kl | 0.0371 | -| entropy_loss | -3.51 | -| policy_loss | -0.00205 | -| value_loss | 1.48 | +| approx_kl | 0.0279 | +| entropy_loss | -3.11 | +| policy_loss | -0.0103 | +| value_loss | 0.336 | | stat/ | | -| constraint_violation | 477 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 140 | -| ep_reward | 0.562 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 145 | -| ep_reward | 0.58 | -| mse | 222 | +| ep_return | 191 | +| ep_reward | 0.762 | +| stat_eval/ | | +| constraint_violation | 0.2 | +| ep_length | 201 | +| ep_return | 122 | +| ep_reward | 0.49 | +| mse | 147 | | time/ | | | progress | 0.77 | | step | 7.7e+05 | -| step_time | 8.38 | +| step_time | 13.9 | -------------------------------------- -2023-10-19 17:18:42,535 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 126.620 +/- 45.909 -2023-10-19 17:18:42,537 : +2023-10-27 19:55:04,017 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 128.521 +/- 68.175 +2023-10-27 19:55:04,018 : -------------------------------------- | loss/ | | -| approx_kl | 0.0171 | -| entropy_loss | -3.47 | -| policy_loss | -0.00563 | -| value_loss | 0.743 | +| approx_kl | 0.0277 | +| entropy_loss | -3.11 | +| policy_loss | -0.00807 | +| value_loss | 0.125 | | stat/ | | -| constraint_violation | 482 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | -| ep_length | 250 | -| ep_return | 152 | -| ep_reward | 0.607 | +| ep_length | 250 | +| ep_return | 190 | +| ep_reward | 0.759 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 127 | -| ep_reward | 0.506 | -| mse | 267 | +| constraint_violation | 0.2 | +| ep_length | 200 | +| ep_return | 129 | +| ep_reward | 0.514 | +| mse | 200 | | time/ | | | progress | 0.78 | | step | 7.8e+05 | -| step_time | 8.18 | +| step_time | 12.8 | -------------------------------------- -2023-10-19 17:20:20,774 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 111.525 +/- 42.538 -2023-10-19 17:20:20,775 : +2023-10-27 19:57:33,761 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 136.375 +/- 50.689 +2023-10-27 19:57:33,763 : -------------------------------------- | loss/ | | -| approx_kl | 0.0263 | -| entropy_loss | -3.4 | -| policy_loss | -0.0105 | -| value_loss | 0.697 | +| approx_kl | 0.038 | +| entropy_loss | -3.13 | +| policy_loss | -0.00239 | +| value_loss | 0.0896 | | stat/ | | -| constraint_violation | 484 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 132 | -| ep_reward | 0.53 | +| ep_return | 195 | +| ep_reward | 0.779 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 112 | -| ep_reward | 0.446 | -| mse | 336 | +| ep_length | 226 | +| ep_return | 136 | +| ep_reward | 0.546 | +| mse | 201 | | time/ | | | progress | 0.79 | | step | 7.9e+05 | -| step_time | 8.27 | +| step_time | 13.7 | -------------------------------------- -2023-10-19 17:21:55,188 : Eval | ep_lengths 176.10 +/- 112.90 | ep_return 111.689 +/- 76.830 -2023-10-19 17:21:55,189 : +2023-10-27 20:00:07,669 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 143.429 +/- 14.648 +2023-10-27 20:00:07,671 : -------------------------------------- | loss/ | | -| approx_kl | 0.0392 | -| entropy_loss | -3.4 | -| policy_loss | -0.01 | -| value_loss | 1.71 | +| approx_kl | 0.0296 | +| entropy_loss | -3.14 | +| policy_loss | -0.00581 | +| value_loss | 0.297 | | stat/ | | -| constraint_violation | 492 | -| ep_constraint_vio... | 0.3 | -| ep_length | 176 | -| ep_return | 102 | -| ep_reward | 0.474 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 183 | +| ep_reward | 0.733 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 176 | -| ep_return | 112 | -| ep_reward | 0.447 | -| mse | 138 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 143 | +| ep_reward | 0.574 | +| mse | 305 | | time/ | | | progress | 0.8 | | step | 8e+05 | -| step_time | 8.33 | +| step_time | 12.9 | -------------------------------------- -2023-10-19 17:23:38,374 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 122.034 +/- 47.269 -2023-10-19 17:23:38,375 : +2023-10-27 20:02:34,042 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 136.554 +/- 51.142 +2023-10-27 20:02:34,043 : -------------------------------------- | loss/ | | -| approx_kl | 0.0314 | -| entropy_loss | -3.39 | -| policy_loss | -0.00682 | -| value_loss | 1.22 | +| approx_kl | 0.0325 | +| entropy_loss | -3.06 | +| policy_loss | -0.00879 | +| value_loss | 0.169 | | stat/ | | -| constraint_violation | 498 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 160 | -| ep_reward | 0.641 | +| ep_return | 196 | +| ep_reward | 0.785 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 122 | -| ep_reward | 0.488 | -| mse | 235 | +| ep_length | 225 | +| ep_return | 137 | +| ep_reward | 0.546 | +| mse | 191 | | time/ | | | progress | 0.81 | | step | 8.1e+05 | -| step_time | 8.52 | +| step_time | 11.9 | -------------------------------------- -2023-10-19 17:25:27,506 : Eval | ep_lengths 200.60 +/- 98.80 | ep_return 126.701 +/- 66.232 -2023-10-19 17:25:27,507 : +2023-10-27 20:05:02,956 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 148.744 +/- 53.248 +2023-10-27 20:05:02,958 : -------------------------------------- | loss/ | | -| approx_kl | 0.0292 | -| entropy_loss | -3.39 | -| policy_loss | -0.00589 | -| value_loss | 1.61 | +| approx_kl | 0.0368 | +| entropy_loss | -3.08 | +| policy_loss | -0.00167 | +| value_loss | 0.271 | | stat/ | | -| constraint_violation | 508 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 125 | -| ep_reward | 0.511 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 200 | +| ep_reward | 0.8 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 127 | -| ep_reward | 0.507 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 149 | +| ep_reward | 0.595 | | mse | 139 | | time/ | | | progress | 0.82 | | step | 8.2e+05 | -| step_time | 9.21 | +| step_time | 15.6 | -------------------------------------- -2023-10-19 17:27:16,754 : Eval | ep_lengths 175.50 +/- 113.80 | ep_return 109.898 +/- 75.043 -2023-10-19 17:27:16,755 : +2023-10-27 20:07:35,276 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 161.597 +/- 29.541 +2023-10-27 20:07:35,289 : -------------------------------------- | loss/ | | -| approx_kl | 0.0311 | -| entropy_loss | -3.34 | -| policy_loss | -0.00796 | -| value_loss | 0.715 | +| approx_kl | 0.0211 | +| entropy_loss | -3.08 | +| policy_loss | -0.0127 | +| value_loss | 0.299 | | stat/ | | -| constraint_violation | 511 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 142 | -| ep_reward | 0.572 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 179 | +| ep_reward | 0.714 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 176 | -| ep_return | 110 | -| ep_reward | 0.44 | -| mse | 125 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 162 | +| ep_reward | 0.646 | +| mse | 219 | | time/ | | | progress | 0.83 | | step | 8.3e+05 | -| step_time | 9.06 | +| step_time | 14.2 | -------------------------------------- -2023-10-19 17:29:09,303 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 127.870 +/- 46.523 -2023-10-19 17:29:09,304 : +2023-10-27 20:09:59,680 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 160.239 +/- 25.285 +2023-10-27 20:09:59,682 : -------------------------------------- | loss/ | | -| approx_kl | 0.0361 | -| entropy_loss | -3.39 | -| policy_loss | -0.0181 | -| value_loss | 0.985 | +| approx_kl | 0.0291 | +| entropy_loss | -3.09 | +| policy_loss | -0.00465 | +| value_loss | 0.197 | | stat/ | | -| constraint_violation | 516 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 150 | -| ep_reward | 0.599 | +| ep_return | 180 | +| ep_reward | 0.719 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 128 | -| ep_reward | 0.511 | -| mse | 236 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 160 | +| ep_reward | 0.641 | +| mse | 212 | | time/ | | | progress | 0.84 | | step | 8.4e+05 | -| step_time | 9.61 | +| step_time | 11.6 | -------------------------------------- -2023-10-19 17:30:58,305 : Eval | ep_lengths 176.40 +/- 112.45 | ep_return 107.002 +/- 72.128 -2023-10-19 17:30:58,306 : +2023-10-27 20:12:22,413 : Eval | ep_lengths 151.60 +/- 120.52 | ep_return 85.946 +/- 72.539 +2023-10-27 20:12:22,415 : -------------------------------------- | loss/ | | -| approx_kl | 0.0236 | -| entropy_loss | -3.42 | -| policy_loss | -0.0134 | -| value_loss | 0.381 | +| approx_kl | 0.0323 | +| entropy_loss | -3.04 | +| policy_loss | -0.00568 | +| value_loss | 0.253 | | stat/ | | -| constraint_violation | 520 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 153 | -| ep_reward | 0.616 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 193 | +| ep_reward | 0.774 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 176 | -| ep_return | 107 | -| ep_reward | 0.428 | -| mse | 146 | +| constraint_violation | 0.4 | +| ep_length | 152 | +| ep_return | 85.9 | +| ep_reward | 0.344 | +| mse | 118 | | time/ | | | progress | 0.85 | | step | 8.5e+05 | -| step_time | 9 | +| step_time | 12.9 | -------------------------------------- -2023-10-19 17:32:50,415 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 163.761 +/- 19.419 -2023-10-19 17:32:50,416 : +2023-10-27 20:14:52,931 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 142.105 +/- 32.305 +2023-10-27 20:14:52,933 : -------------------------------------- | loss/ | | -| approx_kl | 0.0347 | -| entropy_loss | -3.46 | -| policy_loss | -0.0128 | -| value_loss | 0.826 | +| approx_kl | 0.0272 | +| entropy_loss | -2.98 | +| policy_loss | -0.00188 | +| value_loss | 0.19 | | stat/ | | -| constraint_violation | 534 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 141 | -| ep_reward | 0.566 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 184 | +| ep_reward | 0.735 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 164 | -| ep_reward | 0.655 | -| mse | 162 | +| ep_return | 142 | +| ep_reward | 0.568 | +| mse | 346 | | time/ | | | progress | 0.86 | | step | 8.6e+05 | -| step_time | 9.18 | +| step_time | 14.5 | -------------------------------------- -2023-10-19 17:34:38,681 : Eval | ep_lengths 201.10 +/- 97.82 | ep_return 120.606 +/- 64.928 -2023-10-19 17:34:38,682 : +2023-10-27 20:17:15,834 : Eval | ep_lengths 176.80 +/- 111.84 | ep_return 111.638 +/- 75.813 +2023-10-27 20:17:15,835 : -------------------------------------- | loss/ | | -| approx_kl | 0.0298 | -| entropy_loss | -3.45 | -| policy_loss | -0.0151 | -| value_loss | 0.819 | +| approx_kl | 0.0289 | +| entropy_loss | -2.97 | +| policy_loss | -0.00569 | +| value_loss | 0.524 | | stat/ | | -| constraint_violation | 540 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 139 | -| ep_reward | 0.559 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 191 | +| ep_reward | 0.764 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 121 | -| ep_reward | 0.482 | -| mse | 235 | +| constraint_violation | 0.3 | +| ep_length | 177 | +| ep_return | 112 | +| ep_reward | 0.447 | +| mse | 182 | | time/ | | | progress | 0.87 | | step | 8.7e+05 | -| step_time | 9.11 | +| step_time | 12.8 | -------------------------------------- -2023-10-19 17:36:23,618 : Eval | ep_lengths 200.90 +/- 98.21 | ep_return 123.722 +/- 66.301 -2023-10-19 17:36:23,619 : +2023-10-27 20:19:43,010 : Eval | ep_lengths 201.60 +/- 96.82 | ep_return 123.997 +/- 64.986 +2023-10-27 20:19:43,011 : -------------------------------------- | loss/ | | -| approx_kl | 0.0215 | -| entropy_loss | -3.47 | -| policy_loss | -0.00895 | -| value_loss | 0.683 | +| approx_kl | 0.0187 | +| entropy_loss | -2.95 | +| policy_loss | -0.0156 | +| value_loss | 0.266 | | stat/ | | -| constraint_violation | 544 | +| constraint_violation | 5 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 156 | -| ep_reward | 0.624 | +| ep_return | 190 | +| ep_reward | 0.759 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 201 | +| ep_length | 202 | | ep_return | 124 | -| ep_reward | 0.495 | -| mse | 214 | +| ep_reward | 0.496 | +| mse | 203 | | time/ | | | progress | 0.88 | | step | 8.8e+05 | -| step_time | 8.3 | +| step_time | 12.5 | -------------------------------------- -2023-10-19 17:38:12,012 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 125.433 +/- 49.928 -2023-10-19 17:38:12,013 : +2023-10-27 20:22:06,800 : Eval | ep_lengths 226.10 +/- 71.70 | ep_return 145.504 +/- 55.244 +2023-10-27 20:22:06,802 : -------------------------------------- | loss/ | | -| approx_kl | 0.0146 | -| entropy_loss | -3.5 | -| policy_loss | -0.0184 | -| value_loss | 0.469 | +| approx_kl | 0.0358 | +| entropy_loss | -2.99 | +| policy_loss | -0.00924 | +| value_loss | 0.098 | | stat/ | | -| constraint_violation | 548 | +| constraint_violation | 5 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 144 | -| ep_reward | 0.575 | +| ep_return | 187 | +| ep_reward | 0.748 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 227 | -| ep_return | 125 | -| ep_reward | 0.502 | -| mse | 250 | +| ep_length | 226 | +| ep_return | 146 | +| ep_reward | 0.582 | +| mse | 215 | | time/ | | | progress | 0.89 | | step | 8.9e+05 | -| step_time | 9.39 | +| step_time | 12.5 | -------------------------------------- -2023-10-19 17:40:00,088 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 129.921 +/- 46.231 -2023-10-19 17:40:00,089 : +2023-10-27 20:24:35,279 : Eval | ep_lengths 201.00 +/- 98.02 | ep_return 107.517 +/- 58.468 +2023-10-27 20:24:35,281 : -------------------------------------- | loss/ | | -| approx_kl | 0.0246 | -| entropy_loss | -3.51 | -| policy_loss | -0.0158 | -| value_loss | 0.63 | +| approx_kl | 0.0354 | +| entropy_loss | -3.02 | +| policy_loss | -0.00956 | +| value_loss | 0.114 | | stat/ | | -| constraint_violation | 554 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 135 | -| ep_reward | 0.542 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 184 | +| ep_reward | 0.737 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 130 | -| ep_reward | 0.52 | -| mse | 216 | +| constraint_violation | 0.2 | +| ep_length | 201 | +| ep_return | 108 | +| ep_reward | 0.43 | +| mse | 233 | | time/ | | | progress | 0.9 | | step | 9e+05 | -| step_time | 8.8 | +| step_time | 12.4 | -------------------------------------- -2023-10-19 17:41:46,940 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 139.483 +/- 51.745 -2023-10-19 17:41:46,941 : +2023-10-27 20:27:05,313 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 133.149 +/- 50.028 +2023-10-27 20:27:05,314 : -------------------------------------- | loss/ | | -| approx_kl | 0.0232 | -| entropy_loss | -3.55 | -| policy_loss | -0.0115 | -| value_loss | 0.346 | +| approx_kl | 0.0304 | +| entropy_loss | -2.95 | +| policy_loss | -0.00548 | +| value_loss | 0.17 | | stat/ | | -| constraint_violation | 559 | +| constraint_violation | 5 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 147 | -| ep_reward | 0.586 | +| ep_return | 186 | +| ep_reward | 0.742 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 139 | -| ep_reward | 0.558 | -| mse | 219 | +| ep_length | 227 | +| ep_return | 133 | +| ep_reward | 0.533 | +| mse | 233 | | time/ | | | progress | 0.91 | | step | 9.1e+05 | -| step_time | 9.29 | +| step_time | 11.7 | -------------------------------------- -2023-10-19 17:43:34,819 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 127.800 +/- 45.217 -2023-10-19 17:43:34,820 : +2023-10-27 20:29:33,307 : Eval | ep_lengths 175.60 +/- 113.65 | ep_return 126.021 +/- 83.164 +2023-10-27 20:29:33,309 : -------------------------------------- | loss/ | | -| approx_kl | 0.0293 | -| entropy_loss | -3.51 | -| policy_loss | 0.00282 | -| value_loss | 0.456 | +| approx_kl | 0.034 | +| entropy_loss | -2.95 | +| policy_loss | -0.00859 | +| value_loss | 0.393 | | stat/ | | -| constraint_violation | 563 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 128 | -| ep_reward | 0.513 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 184 | +| ep_reward | 0.736 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 128 | -| ep_reward | 0.511 | -| mse | 210 | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 126 | +| ep_reward | 0.504 | +| mse | 56.9 | | time/ | | | progress | 0.92 | | step | 9.2e+05 | -| step_time | 8.95 | +| step_time | 14.8 | -------------------------------------- -2023-10-19 17:45:21,235 : Eval | ep_lengths 202.20 +/- 95.67 | ep_return 111.755 +/- 61.994 -2023-10-19 17:45:21,236 : +2023-10-27 20:32:06,803 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 156.281 +/- 17.768 +2023-10-27 20:32:06,804 : -------------------------------------- | loss/ | | -| approx_kl | 0.0207 | -| entropy_loss | -3.5 | -| policy_loss | -0.0164 | -| value_loss | 1.15 | +| approx_kl | 0.0388 | +| entropy_loss | -2.97 | +| policy_loss | 0.00299 | +| value_loss | 0.217 | | stat/ | | -| constraint_violation | 571 | -| ep_constraint_vio... | 0.3 | -| ep_length | 176 | -| ep_return | 91.6 | -| ep_reward | 0.369 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 185 | +| ep_reward | 0.741 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 202 | -| ep_return | 112 | -| ep_reward | 0.447 | -| mse | 252 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 156 | +| ep_reward | 0.625 | +| mse | 175 | | time/ | | | progress | 0.93 | | step | 9.3e+05 | -| step_time | 8.97 | +| step_time | 13.3 | -------------------------------------- -2023-10-19 17:47:09,653 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 140.268 +/- 24.149 -2023-10-19 17:47:09,654 : +2023-10-27 20:34:39,119 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.433 +/- 27.599 +2023-10-27 20:34:39,121 : -------------------------------------- | loss/ | | -| approx_kl | 0.0306 | -| entropy_loss | -3.48 | -| policy_loss | -0.00643 | -| value_loss | 1.29 | +| approx_kl | 0.0215 | +| entropy_loss | -2.95 | +| policy_loss | -0.00247 | +| value_loss | 0.44 | | stat/ | | -| constraint_violation | 576 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 138 | -| ep_reward | 0.555 | +| constraint_violation | 6 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 182 | +| ep_reward | 0.729 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 140 | -| ep_reward | 0.561 | -| mse | 299 | +| ep_return | 146 | +| ep_reward | 0.586 | +| mse | 258 | | time/ | | | progress | 0.94 | | step | 9.4e+05 | -| step_time | 9.48 | +| step_time | 11.8 | -------------------------------------- -2023-10-19 17:48:59,597 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 147.561 +/- 25.967 -2023-10-19 17:48:59,598 : +2023-10-27 20:37:06,479 : Eval | ep_lengths 227.50 +/- 67.50 | ep_return 130.071 +/- 48.441 +2023-10-27 20:37:06,480 : -------------------------------------- | loss/ | | -| approx_kl | 0.0291 | -| entropy_loss | -3.48 | -| policy_loss | -0.00387 | -| value_loss | 0.763 | +| approx_kl | 0.026 | +| entropy_loss | -2.94 | +| policy_loss | -0.0128 | +| value_loss | 0.332 | | stat/ | | -| constraint_violation | 585 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 129 | -| ep_reward | 0.516 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 6 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 148 | -| ep_reward | 0.59 | -| mse | 304 | +| ep_return | 195 | +| ep_reward | 0.781 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 228 | +| ep_return | 130 | +| ep_reward | 0.52 | +| mse | 271 | | time/ | | | progress | 0.95 | | step | 9.5e+05 | -| step_time | 9.28 | +| step_time | 15.7 | -------------------------------------- -2023-10-19 17:50:46,638 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 157.380 +/- 28.094 -2023-10-19 17:50:46,639 : +2023-10-27 20:39:34,976 : Eval | ep_lengths 225.50 +/- 73.50 | ep_return 147.301 +/- 51.571 +2023-10-27 20:39:34,978 : -------------------------------------- | loss/ | | -| approx_kl | 0.0279 | -| entropy_loss | -3.48 | -| policy_loss | -0.00285 | -| value_loss | 0.39 | +| approx_kl | 0.0109 | +| entropy_loss | -2.96 | +| policy_loss | -0.0149 | +| value_loss | 0.138 | | stat/ | | -| constraint_violation | 589 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 131 | -| ep_reward | 0.526 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 6 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 157 | -| ep_reward | 0.63 | -| mse | 230 | +| ep_return | 173 | +| ep_reward | 0.691 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 147 | +| ep_reward | 0.589 | +| mse | 150 | | time/ | | | progress | 0.96 | | step | 9.6e+05 | -| step_time | 8.69 | +| step_time | 11.8 | -------------------------------------- -2023-10-19 17:52:35,057 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 145.905 +/- 56.619 -2023-10-19 17:52:35,058 : +2023-10-27 20:41:58,213 : Eval | ep_lengths 175.50 +/- 113.80 | ep_return 98.686 +/- 67.391 +2023-10-27 20:41:58,215 : -------------------------------------- | loss/ | | -| approx_kl | 0.0296 | -| entropy_loss | -3.5 | -| policy_loss | 0.00717 | -| value_loss | 2.89 | +| approx_kl | 0.0338 | +| entropy_loss | -2.87 | +| policy_loss | -0.0166 | +| value_loss | 0.249 | | stat/ | | -| constraint_violation | 600 | -| ep_constraint_vio... | 0.5 | -| ep_length | 128 | -| ep_return | 72.8 | -| ep_reward | 0.343 | +| constraint_violation | 6 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 188 | +| ep_reward | 0.751 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 146 | -| ep_reward | 0.584 | -| mse | 160 | +| constraint_violation | 0.3 | +| ep_length | 176 | +| ep_return | 98.7 | +| ep_reward | 0.395 | +| mse | 217 | | time/ | | | progress | 0.97 | | step | 9.7e+05 | -| step_time | 9.16 | +| step_time | 11.5 | -------------------------------------- -2023-10-19 17:54:19,127 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 115.793 +/- 59.378 -2023-10-19 17:54:19,129 : +2023-10-27 20:44:24,060 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.595 +/- 28.082 +2023-10-27 20:44:24,061 : -------------------------------------- | loss/ | | | approx_kl | 0.0297 | -| entropy_loss | -3.43 | -| policy_loss | -0.00876 | -| value_loss | 0.393 | +| entropy_loss | -2.88 | +| policy_loss | 0.00306 | +| value_loss | 0.421 | | stat/ | | -| constraint_violation | 603 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 132 | -| ep_reward | 0.531 | +| constraint_violation | 6 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 194 | +| ep_reward | 0.777 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 116 | -| ep_reward | 0.463 | -| mse | 193 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 154 | +| ep_reward | 0.614 | +| mse | 180 | | time/ | | | progress | 0.98 | | step | 9.8e+05 | -| step_time | 8.83 | +| step_time | 11.8 | -------------------------------------- -2023-10-19 17:56:04,243 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 146.909 +/- 52.488 -2023-10-19 17:56:04,244 : +2023-10-27 20:46:47,923 : Eval | ep_lengths 202.40 +/- 95.24 | ep_return 132.839 +/- 68.662 +2023-10-27 20:46:47,924 : -------------------------------------- | loss/ | | -| approx_kl | 0.0213 | -| entropy_loss | -3.48 | -| policy_loss | -0.00738 | -| value_loss | 0.675 | +| approx_kl | 0.0278 | +| entropy_loss | -2.88 | +| policy_loss | -0.00731 | +| value_loss | 0.357 | | stat/ | | -| constraint_violation | 609 | +| constraint_violation | 6 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 161 | -| ep_reward | 0.643 | +| ep_return | 184 | +| ep_reward | 0.735 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 147 | -| ep_reward | 0.588 | -| mse | 141 | +| constraint_violation | 0.2 | +| ep_length | 202 | +| ep_return | 133 | +| ep_reward | 0.531 | +| mse | 107 | | time/ | | | progress | 0.99 | | step | 9.9e+05 | -| step_time | 8.57 | +| step_time | 12.3 | -------------------------------------- -2023-10-19 17:57:33,542 : Checkpoint | ./models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/model_latest.pt -2023-10-19 17:57:48,828 : Eval | ep_lengths 200.80 +/- 98.40 | ep_return 108.790 +/- 56.712 -2023-10-19 17:57:48,829 : +2023-10-27 20:48:53,567 : Checkpoint | ./models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_es_pen/model_latest.pt +2023-10-27 20:49:18,280 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 139.137 +/- 35.046 +2023-10-27 20:49:18,282 : -------------------------------------- | loss/ | | -| approx_kl | 0.0303 | -| entropy_loss | -3.48 | -| policy_loss | -0.00433 | -| value_loss | 0.701 | +| approx_kl | 0.037 | +| entropy_loss | -2.86 | +| policy_loss | -0.00167 | +| value_loss | 0.121 | | stat/ | | -| constraint_violation | 619 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 141 | -| ep_reward | 0.564 | +| constraint_violation | 6 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 197 | +| ep_reward | 0.787 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 109 | -| ep_reward | 0.435 | -| mse | 230 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 139 | +| ep_reward | 0.557 | +| mse | 363 | | time/ | | | progress | 1 | | step | 1e+06 | -| step_time | 9.35 | +| step_time | 11.9 | -------------------------------------- diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/loss/approx_kl.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/loss/approx_kl.log index 44f5c7a39..b31c67e33 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/loss/approx_kl.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/loss/approx_kl.log @@ -1,101 +1,101 @@ step,loss/approx_kl -10000,0.03150584173854441 -20000,0.019036924163810903 -30000,0.013138524582609534 -40000,0.030590052374949057 -50000,0.020587278258365888 -60000,0.022227781716113292 -70000,0.030848590478611486 -80000,0.019910760503262282 -90000,0.027950594752716516 -100000,0.03331055624100069 -110000,0.03566807773895562 -120000,0.0316290759248659 -130000,0.023233317583799363 -140000,0.03287895043225338 -150000,0.020458563145560518 -160000,0.02766079579014331 -170000,0.02699751210554192 -180000,0.03500963349360973 -190000,0.01903398547631999 -200000,0.0275072092966487 -210000,0.033015602943487474 -220000,0.01915737491411467 -230000,0.028675647242926056 -240000,0.01641256914784511 -250000,0.028633514923664432 -260000,0.026818697131238876 -270000,0.025035999435931443 -280000,0.02849053070725252 -290000,0.026995940661678708 -300000,0.027831149753183126 -310000,0.03566265808573613 -320000,0.026839422116366522 -330000,0.03481548275643339 -340000,0.00986280411792298 -350000,0.021819725298943615 -360000,0.030763261004661518 -370000,0.024465544056147336 -380000,0.010864127171225844 -390000,0.03831655689670395 -400000,0.03512231298567107 -410000,0.023047639375242093 -420000,0.03080991860479117 -430000,0.022522515042995413 -440000,0.034916622641806803 -450000,0.03634891266313691 -460000,0.020721733399356405 -470000,0.029077752345862497 -480000,0.0162474484105284 -490000,0.032057591380241016 -500000,0.016522815381176767 -510000,0.03212187467919042 -520000,0.032325454607295495 -530000,0.020537930307909846 -540000,0.02707614273919413 -550000,0.030618359132980312 -560000,0.03043451275055607 -570000,0.023263319333394365 -580000,0.03327698361439009 -590000,0.029410923745793606 -600000,0.023750026058405635 -610000,0.03160399413512398 -620000,0.03146546095764886 -630000,0.026165516325272625 -640000,0.03611664802301675 -650000,0.027235906121010577 -660000,0.030227805774969363 -670000,0.012950898621541756 -680000,0.027220205613411964 -690000,0.03253773629354934 -700000,0.03579879565319667 -710000,0.03512034591597815 -720000,0.036465144685159136 -730000,0.029085224121809007 -740000,0.029856936827612424 -750000,0.03340058577402184 -760000,0.03283762799110264 -770000,0.024421226070262494 -780000,0.02405477579062184 -790000,0.02909014061248551 -800000,0.030001919856294985 -810000,0.031031954412659008 -820000,0.03271344718523323 -830000,0.039678339823149146 -840000,0.03953241605001192 -850000,0.026005806929121418 -860000,0.03466618643918386 -870000,0.032004844133431715 -880000,0.036607767928702135 -890000,0.02933514036703854 -900000,0.024361893480333192 -910000,0.030288214453806483 -920000,0.034286751147980486 -930000,0.039043496704349916 -940000,0.01999659587163478 -950000,0.0365903373186787 -960000,0.026887551609737177 -970000,0.03166653622562686 -980000,0.038266137793349725 -990000,0.026510904310271144 -1000000,0.027674465843786793 +10000,0.02623263170632223 +20000,0.015332822509420415 +30000,0.019290378130972386 +40000,0.032674306002445516 +50000,0.019483269168995322 +60000,0.025662755159040297 +70000,0.03346162697610756 +80000,0.03400001426537832 +90000,0.027793961685771744 +100000,0.017702427545251944 +110000,0.018864109778466327 +120000,0.021306668532391392 +130000,0.028996302567732835 +140000,0.03085930490245421 +150000,0.030585603384921944 +160000,0.015972093469463287 +170000,0.028114246934031438 +180000,0.030298997787758707 +190000,0.028242000914178788 +200000,0.022269600521152216 +210000,0.033799930157450336 +220000,0.033414911377864585 +230000,0.028994106245227157 +240000,0.02343093353168418 +250000,0.029055837425403297 +260000,0.030233087049176294 +270000,0.023328311989704767 +280000,0.024165730853565038 +290000,0.025341237809819485 +300000,0.030353388974132638 +310000,0.03255268339999021 +320000,0.019209718331694606 +330000,0.031169823960711558 +340000,0.028072810677501066 +350000,0.03132931458142897 +360000,0.025734486524015665 +370000,0.022927640153405572 +380000,0.03609029843937606 +390000,0.020056612704259652 +400000,0.035980918025597934 +410000,0.026183723836826784 +420000,0.015043787518516183 +430000,0.04307021004303048 +440000,0.035986372634458044 +450000,0.024629724475865568 +460000,0.030845645163208247 +470000,0.02466766112484038 +480000,0.030344337395702803 +490000,0.03294622974159817 +500000,0.03767229695028315 +510000,0.039613006815003855 +520000,0.029853354984273512 +530000,0.034543202227602404 +540000,0.02889725711817543 +550000,0.023008489903683465 +560000,0.024354357412084936 +570000,0.02952615830581635 +580000,0.024029843097863095 +590000,0.030186475813388826 +600000,0.02306691055030872 +610000,0.03098982217876861 +620000,0.02788007107252876 +630000,0.029009502551828825 +640000,0.03282647439433882 +650000,0.025834951417831088 +660000,0.029352048745689297 +670000,0.03355389224986235 +680000,0.027167334384284914 +690000,0.027024308095375697 +700000,0.021329679999810953 +710000,0.029597745067439972 +720000,0.03250967623510709 +730000,0.03142151312592129 +740000,0.030522335522497696 +750000,0.020982156732740505 +760000,0.02322644690672557 +770000,0.020119377225637437 +780000,0.03127514827841272 +790000,0.03278950341045857 +800000,0.0350030400014172 +810000,0.037059249457282326 +820000,0.037371122092008595 +830000,0.02884279303252697 +840000,0.04518606805553038 +850000,0.029385902367842692 +860000,0.030403308702322347 +870000,0.03248170866475751 +880000,0.03295162136976918 +890000,0.02685878467746079 +900000,0.030082635534927248 +910000,0.03153628213719155 +920000,0.026545664109289647 +930000,0.05195139296508085 +940000,0.016731751838233323 +950000,0.030367692327126855 +960000,0.03724668656165401 +970000,0.026144152685689426 +980000,0.02508556566511591 +990000,0.034015668494006 +1000000,0.030018516622173287 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/loss/entropy_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/loss/entropy_loss.log index 60590e6ee..6c150c4c1 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/loss/entropy_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/loss/entropy_loss.log @@ -1,101 +1,101 @@ step,loss/entropy_loss -10000,-3.65323349237442 -20000,-3.701035443941752 -30000,-3.7037034114201868 -40000,-3.7328696489334106 -50000,-3.7435733358065284 -60000,-3.7796807686487837 -70000,-3.751226981480916 -80000,-3.7054781556129455 -90000,-3.6343184113502502 -100000,-3.6104675094286605 -110000,-3.660032184918721 -120000,-3.685955957571666 -130000,-3.690832805633545 -140000,-3.757290395100911 -150000,-3.7415078600247704 -160000,-3.719420909881592 -170000,-3.697119947274526 -180000,-3.7132487336794533 -190000,-3.7352526903152468 -200000,-3.8188056667645776 -210000,-3.8286393523216247 -220000,-3.824943141142527 -230000,-3.793622362613678 -240000,-3.7813159068425493 -250000,-3.780515638987223 -260000,-3.7703279534975684 -270000,-3.702743637561798 -280000,-3.68235315879186 -290000,-3.7359907031059265 -300000,-3.6950501243273424 -310000,-3.713383881251018 -320000,-3.7290507674217226 -330000,-3.709498850504557 -340000,-3.7993091662724807 -350000,-3.821525851885478 -360000,-3.7672860423723855 -370000,-3.7618537108103434 -380000,-3.791294789314269 -390000,-3.7900597969690955 -400000,-3.780118258794149 -410000,-3.7745363434155785 -420000,-3.7287167827288306 -430000,-3.7775597612063097 -440000,-3.790697530905406 -450000,-3.809211746851603 -460000,-3.716394321123759 -470000,-3.6671969056129456 -480000,-3.7107267697652175 -490000,-3.7354627927144364 -500000,-3.7195999463399247 -510000,-3.671659664312999 -520000,-3.637708977858226 -530000,-3.639653428395589 -540000,-3.621651486555735 -550000,-3.613808683554332 -560000,-3.6394702553749085 -570000,-3.687994273503621 -580000,-3.679357035954793 -590000,-3.5949381987253823 -600000,-3.6077461798985793 -610000,-3.6098275542259217 -620000,-3.670778544743856 -630000,-3.6838690638542175 -640000,-3.6611075679461167 -650000,-3.6868650197982786 -660000,-3.712090865770976 -670000,-3.7426234761873873 -680000,-3.7676641027132676 -690000,-3.772209807236989 -700000,-3.755551679929097 -710000,-3.7450237353642786 -720000,-3.7667943437894182 -730000,-3.722085162003835 -740000,-3.7029874682426445 -750000,-3.637842297554016 -760000,-3.6468852718671156 -770000,-3.6233539024988817 -780000,-3.584011419614156 -790000,-3.606018968423208 -800000,-3.619081509113312 -810000,-3.5967085361480713 -820000,-3.6182836651802064 -830000,-3.585476096471151 -840000,-3.5539286613464354 -850000,-3.575441376368205 -860000,-3.5664189020792647 -870000,-3.5669690688451134 -880000,-3.6076539874076845 -890000,-3.6207243601481123 -900000,-3.5757952531178794 -910000,-3.5935214519500733 -920000,-3.6095368425051375 -930000,-3.6211774587631225 -940000,-3.6227912425994875 -950000,-3.6405066569646203 -960000,-3.6673398653666176 -970000,-3.636090727647146 -980000,-3.635312759876251 -990000,-3.6536144733428957 -1000000,-3.6737474123636877 +10000,-3.7276356418927508 +20000,-3.7272743900616967 +30000,-3.6780459880828857 +40000,-3.7456607937812807 +50000,-3.774801476796468 +60000,-3.7906172792116806 +70000,-3.8138393243153885 +80000,-3.8537108421325685 +90000,-3.8236866275469468 +100000,-3.784382609526317 +110000,-3.739926183223724 +120000,-3.737906702359517 +130000,-3.61975855032603 +140000,-3.6335356950759894 +150000,-3.6377027114232385 +160000,-3.6403376102447518 +170000,-3.5226148327191673 +180000,-3.4656345725059508 +190000,-3.452164228757222 +200000,-3.420408479372661 +210000,-3.411036745707194 +220000,-3.434283379713695 +230000,-3.4316326101620986 +240000,-3.4638495564460756 +250000,-3.4793599406878153 +260000,-3.4802823781967165 +270000,-3.413220365842183 +280000,-3.401370743910472 +290000,-3.367477997144063 +300000,-3.3939187049865724 +310000,-3.3685872991879777 +320000,-3.4379008054733275 +330000,-3.4461647033691407 +340000,-3.405877892176311 +350000,-3.2805729269981385 +360000,-3.30343474149704 +370000,-3.310897592703501 +380000,-3.2908093412717188 +390000,-3.243448007106781 +400000,-3.2578505754470823 +410000,-3.2514792044957472 +420000,-3.205496219793956 +430000,-3.158421452840169 +440000,-3.166487165292104 +450000,-3.174847106138865 +460000,-3.1496751983960474 +470000,-3.1620203932126363 +480000,-3.181210235754649 +490000,-3.158882188796997 +500000,-3.136229141553243 +510000,-3.126676865418752 +520000,-3.0990888913472494 +530000,-3.15313435792923 +540000,-3.19796910683314 +550000,-3.185325006643931 +560000,-3.224289484818777 +570000,-3.2659319003423057 +580000,-3.267202786604563 +590000,-3.2083145459493005 +600000,-3.1860716382662457 +610000,-3.1799826661745705 +620000,-3.1807813167572023 +630000,-3.1489359895388285 +640000,-3.1464182972908024 +650000,-3.2076072573661802 +660000,-3.2001526991526292 +670000,-3.2189209500948586 +680000,-3.2585032661755875 +690000,-3.203987717628479 +700000,-3.1995318373044332 +710000,-3.15983939965566 +720000,-3.140552699565887 +730000,-3.1541006247202557 +740000,-3.210055319468181 +750000,-3.2053531448046373 +760000,-3.1707695762316384 +770000,-3.1855959693590803 +780000,-3.1804768562316896 +790000,-3.1823206822077434 +800000,-3.1698323647181192 +810000,-3.125033863385519 +820000,-3.146137674649556 +830000,-3.140541938940684 +840000,-3.149668335914612 +850000,-3.163917116324107 +860000,-3.138861032327016 +870000,-3.103338309129079 +880000,-3.0847053488095604 +890000,-3.034223747253418 +900000,-2.9998667081197103 +910000,-2.9138245026270546 +920000,-2.907511885960897 +930000,-2.914987293879191 +940000,-2.9523264884948732 +950000,-2.967261159420013 +960000,-2.9523579398790996 +970000,-3.0159169236818952 +980000,-3.0062341570854185 +990000,-2.9935204108556115 +1000000,-3.0082746624946597 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/loss/policy_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/loss/policy_loss.log index 17e0223ac..347705c44 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/loss/policy_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/loss/policy_loss.log @@ -1,101 +1,101 @@ step,loss/policy_loss -10000,-0.005262403761332045 -20000,-0.014989961537165124 -30000,-0.023305918529548754 -40000,-0.01377693616956166 -50000,-0.01107592406343339 -60000,-0.0037486833628912982 -70000,-0.01674792837441475 -80000,-0.01665436644049473 -90000,-0.012040735516968173 -100000,-0.0012216286899402208 -110000,-0.010830996871883911 -120000,-0.002289710751791924 -130000,-0.013567517592485043 -140000,-0.025998918849581908 -150000,-0.009081075675333566 -160000,0.0009973001882231688 -170000,-0.012225912727864776 -180000,-0.007764597487586414 -190000,-0.0168376434512921 -200000,-0.004838904354158906 -210000,-0.011220839679138974 -220000,-0.01355766649629398 -230000,-0.013610465998316609 -240000,-0.014687599654385606 -250000,-0.0018918242053381517 -260000,-0.00301410676712391 -270000,-0.008039823898967331 -280000,0.0008779722988582811 -290000,-0.00823064379734465 -300000,-0.011487825513335636 -310000,0.0036751324955185912 -320000,-0.00967692664897572 -330000,-0.0011430216945013546 -340000,-0.021366963966905482 -350000,-0.003412910953337776 -360000,-0.00659419972872152 -370000,-0.011899132496587691 -380000,-0.013113268658015159 -390000,0.002192588414927179 -400000,-0.008962763310970592 -410000,-0.016720651478529282 -420000,-0.011000355938999052 -430000,-0.017302193233160183 -440000,-0.0030318995681206497 -450000,-0.00787804150924975 -460000,-0.003875370008339861 -470000,-0.018404520572705644 -480000,-0.016679143072840012 -490000,-0.0029407844226237772 -500000,-0.008548460645110221 -510000,-0.013601356900188103 -520000,-0.012841617825892705 -530000,0.0003462953717207281 -540000,0.0018515063526147956 -550000,-0.014714355088994279 -560000,0.001521063695799183 -570000,-0.0049485422014333355 -580000,0.00014819148101699272 -590000,-0.010271121392793146 -600000,-0.006970460188446851 -610000,-0.0012404576229509182 -620000,-0.00502797962161559 -630000,-0.02261616506511825 -640000,-0.008171174795014443 -650000,-0.014038358946819188 -660000,-0.015027707953919157 -670000,-0.017438546020064796 -680000,-0.00524529939091488 -690000,-0.007196999558062664 -700000,-0.007326584657605185 -710000,-0.004882357575850472 -720000,-0.013158736993242543 -730000,-0.015820545805284067 -740000,0.00021922920545127742 -750000,-0.011363219807002703 -760000,-0.014330272580665007 -770000,-0.00903830279497775 -780000,-0.009705644970062521 -790000,-0.017096772983582746 -800000,0.0011325453965332865 -810000,-0.004292737879147341 -820000,-0.013643665334432491 -830000,-0.0005606684571545046 -840000,5.7698605618394534e-05 -850000,-0.007690785564014262 -860000,-0.0022481840725609705 -870000,0.0015328457522053006 -880000,-0.009085528621300399 -890000,-0.007167461581913212 -900000,-0.010283859153299308 -910000,-0.015951191475678382 -920000,-0.013075119074685759 -930000,-0.011330717617305187 -940000,-0.01646439504455499 -950000,-0.012320343836875977 -960000,-0.01341271045814188 -970000,-0.01824910904582395 -980000,0.004863843100624365 -990000,-0.009324108958398625 -1000000,-0.004213831168301989 +10000,-0.013990977677090424 +20000,-0.014319922513232816 +30000,-0.01143928398686164 +40000,-0.01186953033462782 +50000,-0.018579161508588345 +60000,-0.017855208643503595 +70000,-0.0026368262459899204 +80000,0.001673521038562315 +90000,-0.015067172751922691 +100000,-0.009904255390093316 +110000,-0.014964266588655828 +120000,-0.012819866886645492 +130000,-0.015585776377666893 +140000,-0.013376684576791448 +150000,-0.007577284127397646 +160000,-0.009040601673705895 +170000,0.008516277036549221 +180000,-0.009028941650067342 +190000,-0.020681947866834825 +200000,-0.015413839046500876 +210000,-0.012410067711464542 +220000,-0.009755359972379122 +230000,-0.008019777485812335 +240000,-0.00556852434373626 +250000,-0.009996558873780101 +260000,-0.012365201758709301 +270000,-0.009221064005730712 +280000,-0.011280650867597468 +290000,-0.009906824372783583 +300000,-0.004895004196389134 +310000,-0.01112401549390939 +320000,-0.004457673531159151 +330000,-0.010197658479293863 +340000,-0.010809330630302975 +350000,-0.006445090610601273 +360000,-0.009885206609692766 +370000,-0.009187793018264567 +380000,0.0021290857117386543 +390000,-0.0026403583391755533 +400000,-0.005109598660571743 +410000,-0.008947223448928874 +420000,-0.010611383733576344 +430000,-0.009704635204247117 +440000,-0.01325508599732351 +450000,-0.00900707784459468 +460000,-0.01808543642729332 +470000,-0.012812851147902487 +480000,-0.009194451746745661 +490000,-0.01235080046555282 +500000,-0.006245564072277618 +510000,-0.009533781439971729 +520000,-0.004834192962587971 +530000,-0.005849504272574913 +540000,-0.014926336671088595 +550000,0.0010970897588260303 +560000,-0.006337021333903447 +570000,-0.018141034889462414 +580000,-0.010333252563118098 +590000,-0.005401580556325596 +600000,-0.011346575991116834 +610000,-0.014454140871602936 +620000,-0.007764788560354316 +630000,-0.0076428511262752186 +640000,-0.012402390226100379 +650000,-0.007398681242842628 +660000,-0.00991922462142392 +670000,-0.007658343604075864 +680000,-0.007829968788905704 +690000,0.00505084795108649 +700000,-0.006250643135288156 +710000,-0.007079926332247181 +720000,-0.011654458033036705 +730000,-0.015863202782990315 +740000,-0.007204520495137067 +750000,-0.009519079328596147 +760000,-0.0064348594550971 +770000,-0.0038951622242565263 +780000,-0.011165914853579834 +790000,-0.010028238303005584 +800000,-0.01053194914496421 +810000,-0.012933499880768765 +820000,0.000751859869406259 +830000,-0.011608547529319248 +840000,0.0038847653933860907 +850000,0.0006508627136076407 +860000,-0.0021245201125261745 +870000,-0.004896501427695122 +880000,-0.008640639912790505 +890000,-0.003415964642402917 +900000,-0.00582755254124469 +910000,-0.0135560748115293 +920000,-0.012818738014968106 +930000,0.0027894488131476325 +940000,-0.006387374261198777 +950000,-0.008587114301793163 +960000,-0.01730580929997894 +970000,-0.015587210724722004 +980000,-0.01498362489952657 +990000,-0.013972049249967245 +1000000,-0.0074273604860648585 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/loss/value_loss.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/loss/value_loss.log index 8604e1af3..d3ff9318d 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/loss/value_loss.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/loss/value_loss.log @@ -1,101 +1,101 @@ step,loss/value_loss -10000,6.6137541046006 -20000,9.480209772459713 -30000,3.373387548975632 -40000,4.123201007596725 -50000,2.9901716059387433 -60000,1.8635364517123392 -70000,3.869318444024652 -80000,3.608207070164543 -90000,4.714634525509333 -100000,2.979916584479951 -110000,3.9510637945504805 -120000,4.606341112220596 -130000,1.8354685359251903 -140000,7.58861086748888 -150000,1.5883396744015121 -160000,3.4333991237438197 -170000,1.2281715429551063 -180000,1.0877628314069228 -190000,0.5812959645217839 -200000,2.519268428121759 -210000,0.8100157387747039 -220000,1.2459359784341337 -230000,0.7032159485801746 -240000,2.1375138540325813 -250000,2.1656716945606034 -260000,0.8135467777248285 -270000,0.40956765625961394 -280000,0.9781904735882636 -290000,1.690510140469506 -300000,0.5723600199025733 -310000,0.3254891142221412 -320000,1.0134867289242655 -330000,1.4375357015139323 -340000,1.5620539933047417 -350000,0.7355494646805936 -360000,2.25473976807064 -370000,1.1497803097112471 -380000,0.7418764048940771 -390000,0.955627236759183 -400000,0.9638724768342446 -410000,0.6151945340091431 -420000,2.4576672723250446 -430000,3.120194414681233 -440000,3.3107318509096126 -450000,3.3871075625001636 -460000,1.218639895308002 -470000,0.5636677253414208 -480000,1.7349285247453063 -490000,0.6643507196497851 -500000,0.828129593212745 -510000,0.6624678897444232 -520000,0.6289282296341465 -530000,2.627829062592058 -540000,1.9548405819701482 -550000,0.48318474933028516 -560000,0.545913481795366 -570000,0.23878377959810346 -580000,0.7706937062915593 -590000,1.5762803271352142 -600000,0.46439913444047304 -610000,0.44684111200753895 -620000,0.5036754221456378 -630000,0.8785530967402883 -640000,0.5175639076947263 -650000,0.49042085507873123 -660000,1.0474509096694677 -670000,0.42243355765843066 -680000,0.41695649441824256 -690000,0.7958507748037927 -700000,0.4410802190243811 -710000,0.26056586969922 -720000,0.7279619311440777 -730000,0.4477770261788976 -740000,0.6537128654301172 -750000,0.3980070612249119 -760000,0.35616373215994995 -770000,0.8230752754223124 -780000,0.4641903045078579 -790000,0.46581280372036804 -800000,0.5730554816260014 -810000,0.7092320240651273 -820000,0.6505275449576647 -830000,0.8958985166145259 -840000,1.8044519360804734 -850000,0.310529552818239 -860000,0.8413853984174041 -870000,2.704047425724238 -880000,0.5025295721734809 -890000,0.7730570596558751 -900000,0.909218716509798 -910000,1.3894941041524993 -920000,0.4474488803155666 -930000,0.5438776745696983 -940000,0.5252890732573815 -950000,0.8286304142592653 -960000,0.5611205397653487 -970000,0.9341773481129769 -980000,0.27561956638264723 -990000,0.9858879405639414 -1000000,0.22354986743895594 +10000,12.047117606498855 +20000,9.040062027155448 +30000,5.950276959945148 +40000,5.0288502194552365 +50000,6.553581631733311 +60000,12.961123124971945 +70000,2.7016522594706394 +80000,8.522352354785495 +90000,2.720099493736041 +100000,4.494552425265949 +110000,2.4808777120646637 +120000,3.017723037980185 +130000,1.4107785338127126 +140000,1.6871734568798409 +150000,2.0534939645934633 +160000,0.5943249409932354 +170000,0.6837072874083285 +180000,1.0956791682971283 +190000,0.6611311451417892 +200000,0.31465484449331 +210000,0.6945935187116797 +220000,0.5553287582091524 +230000,0.8107423159367629 +240000,0.5954214053834627 +250000,1.1882136461680584 +260000,0.6068577538508497 +270000,0.4114835497144408 +280000,0.6882273962020304 +290000,0.35636308601973576 +300000,0.5201733521802832 +310000,0.3775747126211421 +320000,0.8957572279183232 +330000,0.7451876337328996 +340000,0.2900522835814114 +350000,0.3639569014181409 +360000,0.5471808864863082 +370000,0.29671596867855465 +380000,0.41861011380111857 +390000,0.22681863013078285 +400000,0.7874664108576253 +410000,0.4030869576157691 +420000,0.3038561277708988 +430000,0.18091315326601515 +440000,0.1730587311430482 +450000,0.16996705778701032 +460000,0.2990524662381354 +470000,1.3921073597654727 +480000,0.6940603564860212 +490000,0.168037377113631 +500000,0.21319867344403837 +510000,0.38794674888705866 +520000,0.34011447743289186 +530000,0.27203228274480173 +540000,0.266875672149907 +550000,0.19301607114268132 +560000,0.39184262075774845 +570000,0.18855907374977893 +580000,0.24604436766574192 +590000,0.49151068483524163 +600000,0.5384985522951511 +610000,0.2455110149080431 +620000,0.16635817582641826 +630000,0.8289920046377558 +640000,0.2187658899125456 +650000,0.7948563547089409 +660000,0.16563887085228748 +670000,0.6156967120505099 +680000,0.20228231089428314 +690000,0.49238818495532416 +700000,0.7487687135216687 +710000,0.6962456021844288 +720000,0.3260138260667903 +730000,0.48534889200778863 +740000,0.2756822104316994 +750000,0.3492062419436682 +760000,0.3583200314549139 +770000,0.5466189992049915 +780000,0.24152846256204433 +790000,0.19647980561831285 +800000,0.16803486968948173 +810000,0.2176788058304587 +820000,0.3131811292836907 +830000,0.2942513755384812 +840000,0.15208467922547095 +850000,0.20367449873166374 +860000,0.2888900622620546 +870000,0.6110605851746681 +880000,0.2387080674973754 +890000,0.2970851864226167 +900000,0.15219925178394758 +910000,0.280392951465629 +920000,0.2555525042150677 +930000,1.3488251827674347 +940000,0.22229327376764899 +950000,0.48190249590530243 +960000,0.5549847544858408 +970000,0.3990598696154417 +980000,0.6069988354430885 +990000,0.7732500676733306 +1000000,0.1298318236497018 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/constraint_violation.log index fe8cda4c7..f6236acaf 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/constraint_violation.log @@ -1,101 +1,101 @@ step,stat/constraint_violation -10000,45 -20000,45 -30000,51 -40000,73 -50000,112 -60000,148 -70000,211 -80000,249 -90000,283 -100000,307 -110000,349 -120000,365 -130000,384 -140000,409 -150000,442 -160000,478 -170000,518 -180000,524 -190000,549 -200000,568 -210000,621 -220000,632 -230000,673 -240000,696 -250000,741 -260000,754 -270000,758 -280000,815 -290000,855 -300000,882 -310000,898 -320000,931 -330000,936 -340000,953 -350000,969 -360000,985 -370000,1031 -380000,1048 -390000,1068 -400000,1079 -410000,1100 -420000,1104 -430000,1115 -440000,1154 -450000,1155 -460000,1185 -470000,1212 -480000,1233 -490000,1254 -500000,1259 -510000,1288 -520000,1296 -530000,1370 -540000,1386 -550000,1404 -560000,1415 -570000,1425 -580000,1468 -590000,1500 -600000,1530 -610000,1565 -620000,1571 -630000,1608 -640000,1635 -650000,1640 -660000,1645 -670000,1661 -680000,1698 -690000,1720 -700000,1726 -710000,1751 -720000,1760 -730000,1765 -740000,1767 -750000,1806 -760000,1810 -770000,1821 -780000,1827 -790000,1840 -800000,1886 -810000,1897 -820000,1911 -830000,1946 -840000,1947 -850000,1961 -860000,1992 -870000,2027 -880000,2033 -890000,2049 -900000,2053 -910000,2078 -920000,2094 -930000,2110 -940000,2126 -950000,2144 -960000,2179 -970000,2185 -980000,2214 -990000,2237 -1000000,2268 +10000,0 +20000,0 +30000,0 +40000,0 +50000,0 +60000,0 +70000,0 +80000,0 +90000,0 +100000,0 +110000,0 +120000,0 +130000,0 +140000,0 +150000,0 +160000,0 +170000,0 +180000,0 +190000,0 +200000,0 +210000,1 +220000,1 +230000,1 +240000,2 +250000,2 +260000,2 +270000,2 +280000,2 +290000,2 +300000,2 +310000,2 +320000,2 +330000,2 +340000,2 +350000,2 +360000,2 +370000,2 +380000,2 +390000,2 +400000,2 +410000,2 +420000,2 +430000,2 +440000,2 +450000,3 +460000,3 +470000,3 +480000,3 +490000,3 +500000,3 +510000,3 +520000,3 +530000,3 +540000,3 +550000,3 +560000,3 +570000,3 +580000,3 +590000,3 +600000,3 +610000,3 +620000,3 +630000,3 +640000,3 +650000,3 +660000,3 +670000,3 +680000,3 +690000,3 +700000,3 +710000,3 +720000,3 +730000,3 +740000,3 +750000,3 +760000,3 +770000,3 +780000,3 +790000,3 +800000,3 +810000,3 +820000,3 +830000,3 +840000,3 +850000,3 +860000,4 +870000,4 +880000,4 +890000,4 +900000,4 +910000,4 +920000,4 +930000,4 +940000,5 +950000,5 +960000,5 +970000,5 +980000,5 +990000,5 +1000000,5 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/ep_constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/ep_constraint_violation.log index 0867b28d6..bf59146a8 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/ep_constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/ep_constraint_violation.log @@ -1,101 +1,101 @@ step,stat/ep_constraint_violation -10000,1.4 +10000,0.0 20000,0.0 -30000,0.1 -40000,0.7 +30000,0.0 +40000,0.0 50000,0.0 -60000,0.3 -70000,1.4 +60000,0.0 +70000,0.0 80000,0.0 -90000,1.5 -100000,0.7 -110000,1.4 -120000,0.1 +90000,0.0 +100000,0.0 +110000,0.0 +120000,0.0 130000,0.0 140000,0.0 -150000,1.3 +150000,0.0 160000,0.0 -170000,3.4 -180000,0.5 +170000,0.0 +180000,0.0 190000,0.0 -200000,1.0 -210000,0.6 -220000,0.1 -230000,1.6 -240000,1.9 -250000,2.3 -260000,0.1 +200000,0.0 +210000,0.1 +220000,0.0 +230000,0.0 +240000,0.0 +250000,0.0 +260000,0.0 270000,0.0 -280000,1.2 -290000,1.1 -300000,2.5 -310000,0.1 -320000,0.5 -330000,0.1 -340000,0.1 -350000,0.1 -360000,0.1 -370000,1.4 -380000,0.2 -390000,0.1 -400000,0.9 +280000,0.0 +290000,0.0 +300000,0.0 +310000,0.0 +320000,0.0 +330000,0.0 +340000,0.0 +350000,0.0 +360000,0.0 +370000,0.0 +380000,0.0 +390000,0.0 +400000,0.0 410000,0.0 -420000,0.2 -430000,0.6 -440000,0.2 +420000,0.0 +430000,0.0 +440000,0.0 450000,0.0 -460000,0.8 -470000,0.1 -480000,0.1 -490000,0.1 -500000,0.1 -510000,1.4 +460000,0.0 +470000,0.0 +480000,0.0 +490000,0.0 +500000,0.0 +510000,0.0 520000,0.0 -530000,2.4 -540000,0.6 -550000,0.2 -560000,0.4 +530000,0.0 +540000,0.0 +550000,0.0 +560000,0.0 570000,0.0 -580000,0.1 -590000,0.8 -600000,2.7 -610000,0.1 +580000,0.0 +590000,0.0 +600000,0.0 +610000,0.0 620000,0.0 -630000,2.0 -640000,1.0 +630000,0.0 +640000,0.0 650000,0.0 660000,0.0 -670000,0.8 -680000,1.0 +670000,0.0 +680000,0.0 690000,0.0 700000,0.0 -710000,0.2 -720000,0.4 +710000,0.0 +720000,0.0 730000,0.0 740000,0.0 -750000,1.4 -760000,0.1 +750000,0.0 +760000,0.0 770000,0.0 -780000,0.1 -790000,0.8 -800000,1.7 -810000,0.1 -820000,0.1 -830000,1.1 -840000,0.1 -850000,0.9 +780000,0.0 +790000,0.0 +800000,0.0 +810000,0.0 +820000,0.0 +830000,0.0 +840000,0.0 +850000,0.0 860000,0.0 -870000,1.8 +870000,0.0 880000,0.0 -890000,1.1 -900000,0.1 +890000,0.0 +900000,0.0 910000,0.0 -920000,0.1 -930000,0.1 +920000,0.0 +930000,0.0 940000,0.0 -950000,0.1 -960000,0.2 +950000,0.0 +960000,0.0 970000,0.0 -980000,0.2 -990000,0.5 -1000000,1.5 +980000,0.0 +990000,0.0 +1000000,0.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/ep_length.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/ep_length.log index 0d5f8b8cb..0a4e50739 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/ep_length.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/ep_length.log @@ -1,67 +1,67 @@ step,stat/ep_length -10000,226.4 +10000,250.0 20000,250.0 -30000,225.8 -40000,202.7 +30000,250.0 +40000,250.0 50000,250.0 -60000,200.5 -70000,226.4 +60000,250.0 +70000,250.0 80000,250.0 90000,250.0 -100000,225.9 -110000,225.6 -120000,225.9 +100000,250.0 +110000,250.0 +120000,250.0 130000,250.0 140000,250.0 -150000,226.8 +150000,250.0 160000,250.0 170000,250.0 -180000,225.3 +180000,250.0 190000,250.0 200000,250.0 -210000,225.9 -220000,226.8 -230000,201.8 -240000,205.1 -250000,227.3 -260000,225.2 +210000,250.0 +220000,250.0 +230000,250.0 +240000,250.0 +250000,250.0 +260000,250.0 270000,250.0 280000,250.0 290000,250.0 300000,250.0 -310000,225.3 -320000,225.3 -330000,225.2 -340000,226.3 -350000,225.5 -360000,225.6 +310000,250.0 +320000,250.0 +330000,250.0 +340000,250.0 +350000,250.0 +360000,250.0 370000,250.0 -380000,201.2 -390000,225.1 -400000,200.9 +380000,250.0 +390000,250.0 +400000,250.0 410000,250.0 -420000,201.1 -430000,203.5 -440000,200.6 +420000,250.0 +430000,250.0 +440000,250.0 450000,250.0 -460000,226.2 -470000,226.0 -480000,226.4 -490000,226.0 +460000,250.0 +470000,250.0 +480000,250.0 +490000,250.0 500000,250.0 -510000,225.8 +510000,250.0 520000,250.0 -530000,201.9 -540000,201.3 -550000,201.2 -560000,225.8 +530000,250.0 +540000,250.0 +550000,250.0 +560000,250.0 570000,250.0 -580000,225.6 -590000,225.8 -600000,227.0 -610000,225.5 +580000,250.0 +590000,250.0 +600000,250.0 +610000,250.0 620000,250.0 -630000,154.1 +630000,250.0 640000,250.0 650000,250.0 660000,250.0 @@ -69,33 +69,33 @@ step,stat/ep_length 680000,250.0 690000,250.0 700000,250.0 -710000,200.9 +710000,250.0 720000,250.0 730000,250.0 740000,250.0 -750000,225.6 -760000,225.1 +750000,250.0 +760000,250.0 770000,250.0 -780000,225.1 -790000,225.4 -800000,200.8 -810000,225.1 -820000,225.6 -830000,226.3 -840000,227.1 -850000,225.6 +780000,250.0 +790000,250.0 +800000,250.0 +810000,250.0 +820000,250.0 +830000,250.0 +840000,250.0 +850000,250.0 860000,250.0 -870000,200.8 +870000,250.0 880000,250.0 890000,250.0 -900000,225.6 +900000,250.0 910000,250.0 -920000,225.2 +920000,250.0 930000,250.0 940000,250.0 -950000,226.0 -960000,225.1 +950000,250.0 +960000,250.0 970000,250.0 -980000,200.4 +980000,250.0 990000,250.0 1000000,250.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/ep_return.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/ep_return.log index e13518462..00946fe55 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/ep_return.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/ep_return.log @@ -1,101 +1,101 @@ step,stat/ep_return -10000,58.587320151672714 -20000,57.85869956571639 -30000,59.1395959977046 -40000,68.61438511777928 -50000,100.24324527521442 -60000,86.40114259097408 -70000,110.78203596688203 -80000,98.6879339484661 -90000,99.83454496844176 -100000,91.61950583556694 -110000,105.60245364399577 -120000,111.19554031920367 -130000,123.0625041645341 -140000,122.91413889001481 -150000,130.4477752509356 -160000,144.22063126364282 -170000,133.27343090229846 -180000,135.1242849070673 -190000,129.78394715019826 -200000,148.0690120548213 -210000,117.67000956863615 -220000,126.30476736984829 -230000,113.76599919629864 -240000,122.43222786921562 -250000,134.84016894937332 -260000,132.7329802785992 -270000,162.4913936635611 -280000,155.8880357866051 -290000,161.51595475180613 -300000,160.96851064736057 -310000,142.1685789728188 -320000,143.68569094565933 -330000,144.9386769260426 -340000,154.33455479467892 -350000,135.17878780117695 -360000,151.91246397143348 -370000,149.30181016567593 -380000,125.70504306566932 -390000,149.33047383188142 -400000,114.94360622244876 -410000,156.93457616883762 -420000,134.339421071985 -430000,107.41574810869147 -440000,124.09452344711704 -450000,157.11240143886698 -460000,140.18195116020888 -470000,137.60504583069974 -480000,156.07498476984784 -490000,137.02206289266962 -500000,150.46626661836626 -510000,130.7024490415152 -520000,175.69570637032461 -530000,127.54791682168798 -540000,134.3783691862696 -550000,130.27972130522534 -560000,145.4694186515098 -570000,153.32146614692564 -580000,149.442597769889 -590000,134.7291723791532 -600000,144.42365166173477 -610000,141.89214233766845 -620000,145.32268764047862 -630000,90.57550593421307 -640000,146.39266576055746 -650000,163.98653230626087 -660000,151.9810047872453 -670000,167.24965627090327 -680000,155.00476478974787 -690000,157.52774060207847 -700000,146.7472757261579 -710000,116.14273760129649 -720000,150.99189405648244 -730000,159.3346855844789 -740000,156.3758085998031 -750000,138.95644884925275 -760000,148.5901918076382 -770000,151.2731844448598 -780000,157.44583829222918 -790000,150.53839873981195 -800000,125.78581252841518 -810000,133.78551555774308 -820000,144.01312959765738 -830000,153.3491544523156 -840000,142.13622995098805 -850000,138.42565887370864 -860000,172.07063836363352 -870000,129.73893710680701 -880000,156.97217320201887 -890000,152.39340449913664 -900000,155.51773798431054 -910000,175.6022097891117 -920000,161.3583694734427 -930000,168.2458226805829 -940000,160.02289704735148 -950000,142.35546541258364 -960000,148.94474536831453 -970000,168.2140569120166 -980000,125.98744242500182 -990000,162.6806953774681 -1000000,158.34067103515304 +10000,92.13903846870046 +20000,99.3658037909754 +30000,93.44037082099129 +40000,97.9135048023768 +50000,104.31228588366764 +60000,126.81044764672215 +70000,141.11365156000454 +80000,150.69790639100097 +90000,149.83231985658495 +100000,140.58682829078566 +110000,153.07459224455044 +120000,152.52090438212375 +130000,162.45209063908803 +140000,167.80095683092682 +150000,165.70459738945198 +160000,169.65585289742643 +170000,165.9170039611904 +180000,163.91727554518113 +190000,166.89922048672253 +200000,179.25563262930592 +210000,183.11666061910614 +220000,176.87668257647533 +230000,178.7796222770416 +240000,189.30948636064505 +250000,184.69586095751552 +260000,174.2625758855869 +270000,169.88474956983555 +280000,175.91429004926636 +290000,173.07363865758333 +300000,174.76814546421 +310000,176.44959270734302 +320000,176.91743609824405 +330000,178.93189771570093 +340000,183.7757493534363 +350000,180.10759531852608 +360000,174.99298605137602 +370000,177.20693789026245 +380000,177.74271188186222 +390000,178.474078457351 +400000,174.24662967520456 +410000,185.90399966032828 +420000,175.49034779710195 +430000,181.87638240526135 +440000,183.25215457145293 +450000,197.65292056262638 +460000,196.61700757997664 +470000,173.99735751307898 +480000,173.58747827570295 +490000,175.03880524031166 +500000,186.87811387735732 +510000,178.58602233163398 +520000,191.3954154922154 +530000,179.08679649177296 +540000,181.2873858623803 +550000,184.95095015550436 +560000,186.70378077684472 +570000,184.66977261387223 +580000,187.08789899815127 +590000,186.44666475589787 +600000,191.72792651927566 +610000,182.67131934722357 +620000,190.25307837854024 +630000,192.60500242041874 +640000,192.13186044776978 +650000,182.35262992754411 +660000,175.84636745194507 +670000,177.56783227906945 +680000,180.89509703737644 +690000,198.05210539151392 +700000,170.20508102156592 +710000,182.23969967529152 +720000,175.16591595614517 +730000,177.1368336754515 +740000,186.48166156471603 +750000,180.10837553572918 +760000,177.9680149531696 +770000,173.1847877166403 +780000,181.00049144992266 +790000,177.90102149517796 +800000,185.2580730185404 +810000,189.0437758034146 +820000,184.36732226541267 +830000,186.81332314958243 +840000,190.4233347779438 +850000,192.10581872634435 +860000,183.12786590581464 +870000,198.039944144769 +880000,180.92006833871972 +890000,179.28714047726544 +900000,185.41674776492295 +910000,184.42468246145015 +920000,187.8893305683533 +930000,184.66350740978118 +940000,179.6941669950496 +950000,184.21307060645788 +960000,178.84669974233228 +970000,190.78835213576164 +980000,183.82533787510107 +990000,196.6860681513738 +1000000,187.08390563063284 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/ep_reward.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/ep_reward.log index 29f5025b5..d9f2efead 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/ep_reward.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat/ep_reward.log @@ -1,101 +1,101 @@ step,stat/ep_reward -10000,0.2347437267302598 -20000,0.23143479826286556 -30000,0.23657267889335382 -40000,0.2744973199821232 -50000,0.40097298110085766 -60000,0.34826389310618494 -70000,0.4437130554765907 -80000,0.39475173579386447 -90000,0.3993381798737671 -100000,0.3666445449570087 -110000,0.4228425705295476 -120000,0.4484224893491568 -130000,0.4922500166581364 -140000,0.49165655556005927 -150000,0.5224385043508115 -160000,0.5768825250545714 -170000,0.5330937236091939 -180000,0.5413263205956498 -190000,0.5191357886007929 -200000,0.592276048219285 -210000,0.4707677980443938 -220000,0.5053490769266731 -230000,0.45568628765502506 -240000,0.5025434437186156 -250000,0.539520033410652 -260000,0.5349837635782622 -270000,0.6499655746542443 -280000,0.6235521431464204 -290000,0.6460638190072246 -300000,0.6438740425894423 -310000,0.5687689604953137 -320000,0.5751582020801045 -330000,0.5837109208066551 -340000,0.6173458949663305 -350000,0.5411828801958503 -360000,0.6124726269089346 -370000,0.5972072406627037 -380000,0.5034868013667816 -390000,0.5973293560717158 -400000,0.4643044753971181 -410000,0.6277383046753505 -420000,0.5392329843525634 -430000,0.4319945455667673 -440000,0.5125747467039797 -450000,0.628449605755468 -460000,0.5620741881762598 -470000,0.5518178498006077 -480000,0.6258056588922571 -490000,0.5481170277230583 -500000,0.6018650664734649 -510000,0.5231713097919388 -520000,0.7027828254812984 -530000,0.5113764143468007 -540000,0.5379500251417488 -550000,0.523359768758459 -560000,0.5822722732056215 -570000,0.6132858645877026 -580000,0.599889936213833 -590000,0.5400610557947256 -600000,0.5777247018037711 -610000,0.5681566747793655 -620000,0.5812907505619145 -630000,0.36815911943402374 -640000,0.5855706630422299 -650000,0.6559461292250435 -660000,0.6079240191489813 -670000,0.6689986250836133 -680000,0.6200190591589917 -690000,0.6301109624083139 -700000,0.5869891029046316 -710000,0.4647682129589633 -720000,0.6039675762259298 -730000,0.6373387423379155 -740000,0.6255032343992125 -750000,0.555982073003929 -760000,0.5978407871104101 -770000,0.6050927377794392 -780000,0.6302925356733438 -790000,0.6021596804864862 -800000,0.5060272187870227 -810000,0.5388400893778106 -820000,0.5770392766695599 -830000,0.6136569872237146 -840000,0.5688432780014809 -850000,0.5537349768883794 -860000,0.688282553454534 -870000,0.5307588243694138 -880000,0.6278886928080756 -890000,0.6095736179965465 -900000,0.6244898380703215 -910000,0.7024088391564468 -920000,0.6474236324011975 -930000,0.6729832907223317 -940000,0.6400915881894059 -950000,0.5722194731879143 -960000,0.5961017023256232 -970000,0.6728562276480663 -980000,0.5042665621555791 -990000,0.6507227815098726 -1000000,0.6333626841406123 +10000,0.36855615387480184 +20000,0.39746321516390165 +30000,0.37376148328396513 +40000,0.3916540192095072 +50000,0.4172491435346705 +60000,0.5072417905868886 +70000,0.5644546062400182 +80000,0.602791625564004 +90000,0.59932927942634 +100000,0.5623473131631427 +110000,0.6122983689782017 +120000,0.610083617528495 +130000,0.6498083625563521 +140000,0.6712038273237072 +150000,0.6628183895578078 +160000,0.6786234115897057 +170000,0.6636680158447618 +180000,0.6556691021807245 +190000,0.6675968819468902 +200000,0.7170225305172238 +210000,0.7324666424764246 +220000,0.7075067303059013 +230000,0.7151184891081664 +240000,0.7572379454425803 +250000,0.738783443830062 +260000,0.6970503035423475 +270000,0.6795389982793422 +280000,0.7036571601970655 +290000,0.6922945546303334 +300000,0.69907258185684 +310000,0.705798370829372 +320000,0.7076697443929763 +330000,0.7157275908628036 +340000,0.7351029974137452 +350000,0.7204303812741044 +360000,0.6999719442055042 +370000,0.7088277515610497 +380000,0.710970847527449 +390000,0.7138963138294041 +400000,0.6969865187008183 +410000,0.743615998641313 +420000,0.7019613911884078 +430000,0.7275055296210453 +440000,0.7330086182858117 +450000,0.7906116822505056 +460000,0.7864680303199065 +470000,0.6959894300523158 +480000,0.6943499131028118 +490000,0.7001552209612467 +500000,0.7475124555094291 +510000,0.7143440893265359 +520000,0.7655816619688616 +530000,0.7163471859670918 +540000,0.7251495434495212 +550000,0.7398038006220174 +560000,0.7468151231073789 +570000,0.738679090455489 +580000,0.748351595992605 +590000,0.7457866590235915 +600000,0.7669117060771026 +610000,0.7306852773888942 +620000,0.7610123135141608 +630000,0.7704200096816749 +640000,0.7685274417910792 +650000,0.7294105197101766 +660000,0.7033854698077802 +670000,0.7102713291162778 +680000,0.7235803881495058 +690000,0.7922084215660556 +700000,0.6808203240862636 +710000,0.728958798701166 +720000,0.7006636638245807 +730000,0.7085473347018059 +740000,0.7459266462588642 +750000,0.7204335021429167 +760000,0.7118720598126785 +770000,0.6927391508665612 +780000,0.7240019657996908 +790000,0.7116040859807116 +800000,0.7410322920741615 +810000,0.7561751032136586 +820000,0.7374692890616505 +830000,0.7472532925983297 +840000,0.7616933391117751 +850000,0.7684232749053773 +860000,0.7325114636232585 +870000,0.7921597765790762 +880000,0.7236802733548788 +890000,0.717148561909062 +900000,0.7416669910596918 +910000,0.7376987298458005 +920000,0.7515573222734131 +930000,0.7386540296391246 +940000,0.7187766679801983 +950000,0.7368522824258316 +960000,0.7153867989693291 +970000,0.7631534085430466 +980000,0.7353013515004042 +990000,0.7867442726054953 +1000000,0.7483356225225315 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/constraint_violation.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/constraint_violation.log index f4c8de919..8999aebe0 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/constraint_violation.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/constraint_violation.log @@ -1,101 +1,101 @@ step,stat_eval/constraint_violation -10000,0.0 -20000,0.0 -30000,0.0 -40000,2.1 -50000,0.0 -60000,0.2 -70000,1.0 -80000,0.0 -90000,0.3 +10000,0.4 +20000,0.2 +30000,0.1 +40000,0.1 +50000,0.6 +60000,0.0 +70000,1.3 +80000,1.0 +90000,1.0 100000,0.0 110000,0.2 -120000,0.1 -130000,0.0 -140000,1.8 -150000,0.2 +120000,0.0 +130000,1.2 +140000,0.0 +150000,1.2 160000,0.0 -170000,0.0 -180000,0.0 -190000,0.3 -200000,0.1 -210000,0.0 -220000,0.9 +170000,0.1 +180000,0.2 +190000,1.7 +200000,0.2 +210000,0.3 +220000,0.3 230000,0.0 -240000,0.2 -250000,0.0 -260000,0.6 -270000,0.1 -280000,0.3 -290000,0.2 -300000,0.3 -310000,0.1 +240000,0.9 +250000,0.2 +260000,0.0 +270000,3.1 +280000,0.0 +290000,0.3 +300000,0.2 +310000,0.0 320000,0.0 -330000,0.2 -340000,0.4 -350000,0.1 +330000,0.3 +340000,0.1 +350000,0.0 360000,1.4 -370000,0.1 +370000,0.2 380000,0.2 -390000,1.3 -400000,0.0 -410000,0.1 -420000,0.0 -430000,0.2 -440000,0.2 -450000,1.5 -460000,0.7 -470000,0.0 -480000,0.0 -490000,1.5 -500000,0.1 -510000,0.0 -520000,0.5 +390000,1.5 +400000,0.2 +410000,0.0 +420000,1.4 +430000,2.2 +440000,0.1 +450000,0.0 +460000,0.9 +470000,0.5 +480000,1.6 +490000,0.2 +500000,0.0 +510000,0.6 +520000,0.4 530000,0.0 -540000,2.0 -550000,0.0 -560000,0.0 -570000,0.1 -580000,1.3 -590000,0.0 -600000,0.3 -610000,0.0 -620000,0.3 -630000,0.0 -640000,0.0 -650000,0.6 -660000,0.3 -670000,0.1 +540000,2.1 +550000,0.1 +560000,0.2 +570000,0.0 +580000,0.5 +590000,0.6 +600000,0.0 +610000,0.6 +620000,0.1 +630000,0.8 +640000,0.2 +650000,0.0 +660000,2.2 +670000,0.8 680000,0.0 -690000,0.2 -700000,0.1 -710000,0.1 -720000,2.1 -730000,0.2 +690000,1.1 +700000,0.0 +710000,1.3 +720000,1.4 +730000,0.1 740000,0.0 -750000,0.4 -760000,0.3 -770000,0.0 -780000,0.3 -790000,1.4 -800000,0.4 +750000,0.1 +760000,0.1 +770000,0.2 +780000,1.3 +790000,0.1 +800000,0.0 810000,0.1 -820000,1.7 -830000,0.5 -840000,0.1 -850000,1.7 +820000,0.7 +830000,0.0 +840000,0.0 +850000,0.4 860000,0.0 -870000,0.3 -880000,0.6 -890000,0.1 -900000,0.4 +870000,1.3 +880000,0.2 +890000,0.3 +900000,1.4 910000,0.1 -920000,0.2 -930000,0.2 +920000,2.0 +930000,0.0 940000,0.0 -950000,0.0 -960000,0.0 -970000,1.0 -980000,0.2 -990000,0.9 -1000000,0.7 +950000,0.1 +960000,0.2 +970000,1.7 +980000,0.0 +990000,0.2 +1000000,0.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/ep_length.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/ep_length.log index e60ddf1ad..e708c3662 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/ep_length.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/ep_length.log @@ -1,101 +1,101 @@ step,stat_eval/ep_length -10000,250.0 -20000,250.0 -30000,250.0 +10000,225.9 +20000,201.9 +30000,225.3 40000,250.0 50000,250.0 -60000,225.2 -70000,201.9 +60000,250.0 +70000,225.4 80000,250.0 -90000,200.9 +90000,250.0 100000,250.0 -110000,200.3 -120000,225.1 -130000,250.0 +110000,201.3 +120000,250.0 +130000,177.6 140000,250.0 -150000,201.4 +150000,250.0 160000,250.0 -170000,250.0 -180000,250.0 -190000,178.2 +170000,225.8 +180000,200.9 +190000,250.0 200000,250.0 -210000,250.0 -220000,225.2 +210000,176.6 +220000,226.3 230000,250.0 -240000,225.8 -250000,250.0 +240000,225.1 +250000,202.0 260000,250.0 -270000,226.3 -280000,177.0 -290000,202.8 -300000,225.3 -310000,225.1 +270000,250.0 +280000,250.0 +290000,201.4 +300000,225.4 +310000,250.0 320000,250.0 -330000,250.0 +330000,201.9 340000,250.0 350000,250.0 -360000,225.7 -370000,226.7 -380000,200.8 -390000,225.1 -400000,250.0 +360000,250.0 +370000,202.3 +380000,225.9 +390000,250.0 +400000,225.6 410000,250.0 -420000,250.0 -430000,200.8 -440000,201.6 -450000,226.1 +420000,201.4 +430000,201.8 +440000,226.5 +450000,250.0 460000,250.0 -470000,250.0 -480000,250.0 +470000,226.2 +480000,201.2 490000,250.0 -500000,226.8 -510000,250.0 -520000,153.7 +500000,250.0 +510000,225.3 +520000,225.7 530000,250.0 -540000,226.0 -550000,250.0 -560000,250.0 -570000,226.7 -580000,250.0 -590000,250.0 -600000,225.3 +540000,226.3 +550000,225.3 +560000,201.6 +570000,250.0 +580000,202.6 +590000,225.3 +600000,250.0 610000,250.0 -620000,250.0 +620000,225.1 630000,250.0 -640000,250.0 -650000,201.7 -660000,225.9 -670000,225.7 +640000,204.9 +650000,250.0 +660000,250.0 +670000,250.0 680000,250.0 -690000,201.9 -700000,227.2 -710000,226.2 +690000,250.0 +700000,250.0 +710000,250.0 720000,225.7 -730000,202.6 +730000,226.7 740000,250.0 -750000,250.0 -760000,176.9 -770000,250.0 -780000,250.0 -790000,250.0 -800000,225.8 -810000,225.7 -820000,225.5 -830000,200.4 -840000,225.4 -850000,226.1 +750000,225.2 +760000,225.6 +770000,201.3 +780000,225.1 +790000,225.7 +800000,250.0 +810000,250.0 +820000,250.0 +830000,250.0 +840000,250.0 +850000,151.6 860000,250.0 -870000,226.0 -880000,225.8 -890000,226.6 -900000,226.9 -910000,225.2 -920000,226.7 -930000,202.2 +870000,201.7 +880000,201.6 +890000,226.1 +900000,225.9 +910000,226.6 +920000,225.2 +930000,250.0 940000,250.0 -950000,250.0 +950000,227.3 960000,250.0 -970000,250.0 -980000,225.2 -990000,250.0 -1000000,225.4 +970000,225.2 +980000,250.0 +990000,226.8 +1000000,250.0 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/ep_return.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/ep_return.log index e29e3d1dd..76e57a86a 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/ep_return.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/ep_return.log @@ -1,101 +1,101 @@ step,stat_eval/ep_return -10000,40.58563172153616 -20000,40.077870979748 -30000,58.33962419131095 -40000,69.6018620689766 -50000,89.5239233961458 -60000,78.72800326651989 -70000,68.38682474877359 -80000,96.52147250006854 -90000,84.89845613227354 -100000,95.71187710166278 -110000,89.05674795523527 -120000,109.35817601878009 -130000,114.47088138905085 -140000,134.28165106978093 -150000,103.9635816262225 -160000,128.24854963095203 -170000,135.53930838425953 -180000,146.68021983556187 -190000,91.57603932606237 -200000,133.107595830338 -210000,134.4817733461073 -220000,126.24879562236262 -230000,145.40624073946742 -240000,127.86006844985761 -250000,141.00633814742508 -260000,151.6900432510212 -270000,134.2652608865435 -280000,99.04028183326317 -290000,107.06905667061149 -300000,119.49727602602513 -310000,139.6939997104274 -320000,139.35529021045295 -330000,147.74083199126818 -340000,141.26314156737206 -350000,162.14469298373734 -360000,141.92736059350392 -370000,133.2293770495425 -380000,121.43624361879208 -390000,138.59188456308144 -400000,139.80993456301596 -410000,152.3691442612411 -420000,158.1179104757925 -430000,105.81318286414924 -440000,112.80556828911688 -450000,131.72978075359737 -460000,161.13430154395675 -470000,157.64542122893772 -480000,144.67971793369202 -490000,153.276688117327 -500000,132.7607739392939 -510000,144.20793659404316 -520000,95.9600556323694 -530000,141.59797985769944 -540000,149.97598852318487 -550000,153.6087975261567 -560000,152.3813930504772 -570000,136.1814970620321 -580000,160.1952019320707 -590000,151.82011695471687 -600000,135.3435865262278 -610000,139.764734278255 -620000,131.4250511493494 -630000,142.25266955970454 -640000,154.160628187462 -650000,119.254089017296 -660000,132.22487529446377 -670000,139.95899229500924 -680000,154.69577434985726 -690000,116.83154561324906 -700000,138.54874949254742 -710000,157.2543853529662 -720000,125.24551245887338 -730000,114.66478755031645 -740000,157.01909482001037 -750000,149.22598692054854 -760000,103.23066155455237 -770000,154.33278932160238 -780000,149.50220505744673 -790000,134.44922772568748 -800000,150.90480797073175 -810000,129.03667220542496 -820000,141.19365005221974 -830000,126.31493410514642 -840000,131.37513721791453 -850000,138.45452521157551 -860000,170.31728881632029 -870000,132.8425827506831 -880000,142.7670123795223 -890000,137.3374766060867 -900000,139.07477502168248 -910000,139.57044332573813 -920000,130.41077906641118 -930000,114.53745691217702 -940000,143.53646021660944 -950000,150.7037205391579 -960000,153.79509542155137 -970000,157.9988125321411 -980000,128.72593659901085 -990000,165.89675349103806 -1000000,131.18744671768744 +10000,39.483361562074286 +20000,52.17109604044441 +30000,44.34184155811015 +40000,50.7869951161402 +50000,70.99633857818802 +60000,102.39467739564573 +70000,70.20396654301348 +80000,94.93642146183932 +90000,114.82648804187902 +100000,109.4422142718832 +110000,86.05706764003513 +120000,112.09435961817641 +130000,95.79175456457557 +140000,131.96587993412703 +150000,139.48606614920453 +160000,131.73895952219792 +170000,124.71840451886767 +180000,113.10612544005343 +190000,142.21787956760375 +200000,146.3060130227071 +210000,105.49536610413028 +220000,127.68396584096035 +230000,139.00210991357181 +240000,124.39381449618001 +250000,110.22668071279904 +260000,142.22193017798818 +270000,138.76528011911918 +280000,151.64086702527388 +290000,109.14743500469918 +300000,102.43152678427516 +310000,117.58969293964113 +320000,133.52960209686063 +330000,110.0976246669355 +340000,133.7667875037393 +350000,135.75754461623478 +360000,144.39678377164145 +370000,121.03971613798107 +380000,125.07553058792037 +390000,139.51342726039414 +400000,125.02013353300485 +410000,144.39932714687936 +420000,120.81720636932293 +430000,119.50343135429526 +440000,130.631269997216 +450000,150.90741528415734 +460000,157.84826530691245 +470000,122.4418538245473 +480000,118.11646163001285 +490000,131.89675609683718 +500000,138.69621609828323 +510000,134.91495947395165 +520000,137.59494469459176 +530000,135.76373535083357 +540000,129.86311770440184 +550000,123.07847986813519 +560000,142.51765388029307 +570000,139.24478512083152 +580000,97.94616653445621 +590000,130.5116038595081 +600000,140.13074285349748 +610000,152.9864703441937 +620000,133.19475309222852 +630000,145.356174410903 +640000,125.58905442460284 +650000,140.44938159487722 +660000,127.44102383661307 +670000,143.23857402723388 +680000,140.17065504220307 +690000,158.72685705988403 +700000,139.12457394788225 +710000,146.9682162697838 +720000,114.65131073036382 +730000,135.4363761548867 +740000,133.05041538564086 +750000,115.59996992122558 +760000,117.0935567036054 +770000,108.81404087497549 +780000,131.0975805796942 +790000,127.93980537203768 +800000,137.28779313513385 +810000,149.65366232197496 +820000,148.08227373323825 +830000,147.82598696703397 +840000,156.5325005969515 +850000,82.56040677473872 +860000,141.1945727375518 +870000,122.84568542669345 +880000,122.04705958200543 +890000,140.92375586909623 +900000,122.98126212177654 +910000,128.5836750366258 +920000,142.9452979485689 +930000,150.91038381318805 +940000,146.34137531750326 +950000,119.49525572805128 +960000,149.23776051330125 +970000,126.02607912605515 +980000,153.87941879817018 +990000,146.86728280432692 +1000000,146.2827728538557 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/ep_reward.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/ep_reward.log index ea7f3336d..eed18a11b 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/ep_reward.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/ep_reward.log @@ -1,101 +1,101 @@ step,stat_eval/ep_reward -10000,0.16234252688614464 -20000,0.16031148391899194 -30000,0.23335849676524384 -40000,0.2784074482759064 -50000,0.35809569358458326 -60000,0.3149120130690273 -70000,0.27354730286010953 -80000,0.38608589000027427 -90000,0.3395938245464612 -100000,0.3828475084066511 -110000,0.35622699182466666 -120000,0.4374327041256527 -130000,0.45788352555620343 -140000,0.5371266042791237 -150000,0.41585434159157286 -160000,0.512994198523808 -170000,0.5421572335370382 -180000,0.5867208793422475 -190000,0.3663056395932093 -200000,0.5324303833213521 -210000,0.5379270933844291 -220000,0.5049951835978332 -230000,0.5816249629578697 -240000,0.5114402738441857 -250000,0.5640253525897003 -260000,0.6067601730040849 -270000,0.5370610443395556 -280000,0.3961611318906411 -290000,0.42827695894711876 -300000,0.47798910412127615 -310000,0.5587760296061146 -320000,0.5574211608418118 -330000,0.5909633279650727 -340000,0.5650525662694882 -350000,0.6485787719349495 -360000,0.5677094423740745 -370000,0.5329175093309542 -380000,0.48574498589061815 -390000,0.5543675447022641 -400000,0.5592397382520639 -410000,0.6094765770449644 -420000,0.63247164190317 -430000,0.4232527575448143 -440000,0.4512228975465587 -450000,0.5269191306507995 -460000,0.644537206175827 -470000,0.6305816849157508 -480000,0.5787188717347682 -490000,0.613106752469308 -500000,0.5310431895639337 -510000,0.5768317463761727 -520000,0.3838407839019617 -530000,0.5663919194307977 -540000,0.5999039541361928 -550000,0.6144351901046269 -560000,0.6095255722019088 -570000,0.544725995960157 -580000,0.6407808077282826 -590000,0.6072804678188676 -600000,0.5413743461808324 -610000,0.5590589371130199 -620000,0.5257002045973977 -630000,0.5690106782388182 -640000,0.6166425127498483 -650000,0.4770163640240551 -660000,0.5288995342210765 -670000,0.559862592998061 -680000,0.618783097399429 -690000,0.46732626680910466 -700000,0.5541949993947155 -710000,0.6290175943297689 -720000,0.5009820498354938 -730000,0.45865944114361434 -740000,0.6280763792800415 -750000,0.596903947682194 -760000,0.41292446350816175 -770000,0.6173311572864095 -780000,0.598008820229787 -790000,0.53779691090275 -800000,0.6036192318912927 -810000,0.5161467384631729 -820000,0.5647746002150436 -830000,0.5052607855193781 -840000,0.5255005489103702 -850000,0.5538181112487535 -860000,0.6812691552652812 -870000,0.531371470087336 -880000,0.5710680506224025 -890000,0.5493499064793398 -900000,0.5562991001313894 -910000,0.5582817733118205 -920000,0.5216444127909293 -930000,0.45814982835524176 -940000,0.5741458408664377 -950000,0.6028148821566316 -960000,0.6151803816862055 -970000,0.6319952501285644 -980000,0.51490374642143 -990000,0.6635870139641522 -1000000,0.5247497868789577 +10000,0.15793344630235956 +20000,0.2087156404328267 +30000,0.17736736662964964 +40000,0.20314798046456078 +50000,0.28398535431275207 +60000,0.40957870958258286 +70000,0.2808158751425403 +80000,0.37974568584735724 +90000,0.459305952167516 +100000,0.43776885708753277 +110000,0.34422828271532896 +120000,0.4483774384727056 +130000,0.3831694854651114 +140000,0.5278635197365082 +150000,0.5579442645968182 +160000,0.5269558380887917 +170000,0.4988736184721169 +180000,0.45242450929029604 +190000,0.5688715182704149 +200000,0.5852240520908284 +210000,0.421988354549645 +220000,0.5107358637923285 +230000,0.5560084396542871 +240000,0.4975752586223578 +250000,0.4409067391045002 +260000,0.5688877207119527 +270000,0.5550611204764768 +280000,0.6065634681010956 +290000,0.43658975031910485 +300000,0.40972610745993715 +310000,0.4703587717585645 +320000,0.5341184083874425 +330000,0.44039221628683184 +340000,0.5350671500149573 +350000,0.5430301784649393 +360000,0.577587135086566 +370000,0.48416293505434693 +380000,0.5003029582959353 +390000,0.5580537090415765 +400000,0.5000805357547886 +410000,0.5775973085875175 +420000,0.4832688795634284 +430000,0.4780143576079558 +440000,0.5225251307347683 +450000,0.6036296611366293 +460000,0.6313930612276498 +470000,0.48976764188622984 +480000,0.4724658510470638 +490000,0.5275870243873486 +500000,0.5547848643931329 +510000,0.5396602697703952 +520000,0.5503798662275259 +530000,0.5430549414033343 +540000,0.5194525251384882 +550000,0.49231391947421477 +560000,0.570070649294099 +570000,0.5569791404833261 +580000,0.39178510660246996 +590000,0.522046521891839 +600000,0.5605229714139899 +610000,0.6119458813767749 +620000,0.5327790138218227 +630000,0.581424697643612 +640000,0.502356495027465 +650000,0.5617975263795089 +660000,0.5097640953464524 +670000,0.5729542961089356 +680000,0.5606826201688122 +690000,0.6349074282395363 +700000,0.5564982957915291 +710000,0.5878728650791352 +720000,0.4586056708320685 +730000,0.5417455473921237 +740000,0.5322016615425633 +750000,0.4623998801958547 +760000,0.4683742368233311 +770000,0.43525619721254694 +780000,0.5243903226171674 +790000,0.511770945432367 +800000,0.5491511725405354 +810000,0.5986146492878998 +820000,0.592329094932953 +830000,0.5913039478681359 +840000,0.6261300023878059 +850000,0.33024416886961083 +860000,0.5647782909502073 +870000,0.4913827759439576 +880000,0.4881884984738896 +890000,0.563695024464869 +900000,0.4919250487911987 +910000,0.5143353078569135 +920000,0.5717812032953903 +930000,0.6036415352527522 +940000,0.585365501270013 +950000,0.47798102576499024 +960000,0.596951042053205 +970000,0.5041043165053638 +980000,0.6155176751926807 +990000,0.58747289192488 +1000000,0.5851310914154229 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/mse.log b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/mse.log index b7db2cfd1..3af80db68 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/mse.log +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/logs/stat_eval/mse.log @@ -1,101 +1,101 @@ step,stat_eval/mse -10000,485.2302758952551 -20000,462.7261694094162 -30000,441.1005630816061 -40000,385.2889619938677 -50000,320.2220266413998 -60000,362.7634079967613 -70000,322.2775438984253 -80000,353.3222429671633 -90000,256.3007514486262 -100000,349.57083105490926 -110000,213.12211505270798 -120000,252.92898356720212 -130000,318.3589524243365 -140000,282.77382555040265 -150000,256.5563257796589 -160000,287.7386881083972 -170000,248.79273678178816 -180000,219.77155167199604 -190000,165.39522143890082 -200000,335.35867858619235 -210000,253.6468264088826 -220000,220.68682897797345 -230000,178.71919304390426 -240000,204.81384748415877 -250000,303.0808534670631 -260000,209.96397479527454 -270000,257.2438916512975 -280000,208.96572457826815 -290000,326.3633678300779 -300000,309.93169962553804 -310000,242.55386682968623 -320000,320.52112727964305 -330000,260.58106588813905 -340000,361.68765492937854 -350000,158.65232897975974 -360000,156.8778251482563 -370000,193.8894388845094 -380000,201.78860614053315 -390000,168.10780655643276 -400000,288.6485340841677 -410000,210.06617014004865 -420000,235.52746167428222 -430000,213.93188273806442 -440000,269.45465801161663 -450000,237.36426765177356 -460000,153.4004471541008 -470000,187.86027114676833 -480000,291.0724582145282 -490000,201.36139099329858 -500000,210.69542296000358 -510000,260.6152627812461 -520000,109.4475724773566 -530000,279.5783024688575 -540000,147.89999999680418 -550000,247.99731347747593 -560000,221.18446928723225 -570000,233.0285242361524 -580000,216.00519874500023 -590000,213.87343572234437 -600000,187.41804134719277 -610000,299.517817790008 -620000,268.0591365519462 -630000,235.71998991618625 -640000,138.22524497752482 -650000,177.0229624694437 -660000,223.2980109302543 -670000,210.69695435505187 -680000,199.15487781921613 -690000,214.83187199885825 -700000,201.3909402610459 -710000,119.07146201034656 -720000,215.1339908277349 -730000,252.75400144317845 -740000,257.0008214640928 -750000,275.1919635893579 -760000,231.66194620634437 -770000,212.63479550165601 -780000,277.87775984281245 -790000,347.3524949796692 -800000,203.80794014775222 -810000,228.53958169142146 -820000,202.28903252921992 -830000,141.75687522880668 -840000,227.8372274786217 -850000,199.18187654548467 -860000,147.73544298736687 -870000,251.40434480902553 -880000,227.9416226885538 -890000,223.59745724675457 -900000,220.1742356023655 -910000,206.7247008146171 -920000,206.66670304086543 -930000,241.4678177162797 -940000,282.18503500550463 -950000,292.91782412344435 -960000,224.45324654955525 -970000,161.79914310224135 -980000,217.52264299449342 -990000,147.22016508507667 -1000000,230.19231968582693 +10000,399.6176377093038 +20000,276.81352706105315 +30000,384.10249890921193 +40000,407.8719562046013 +50000,346.6746375171962 +60000,341.37363493910686 +70000,396.6774902950893 +80000,247.34688990684413 +90000,244.9005896430952 +100000,310.0343537884871 +110000,358.0510916944546 +120000,305.8432845191843 +130000,165.17922296240485 +140000,274.1635650216925 +150000,230.59375699225475 +160000,292.947394312987 +170000,219.71215855102122 +180000,221.53084896749974 +190000,263.97427372750616 +200000,258.93376985975607 +210000,195.13064534800992 +220000,238.5799333075575 +230000,261.4988813840164 +240000,247.31457456523339 +250000,244.79666317250076 +260000,258.47209463864385 +270000,264.4071440366707 +280000,231.868324054896 +290000,182.96694766365894 +300000,315.35300463166044 +310000,335.05459438431546 +320000,328.97485511030686 +330000,222.16111410286885 +340000,306.3098765888496 +350000,359.37190562205984 +360000,251.58479111247553 +370000,176.55420422202883 +380000,230.62199144809 +390000,313.281086280819 +400000,245.92115753219974 +410000,259.49071372851324 +420000,156.91258297795588 +430000,189.4746002176981 +440000,283.70779268305125 +450000,211.04073309424658 +460000,138.54423243192394 +470000,251.6472658527 +480000,190.57368480976774 +490000,396.2227199548767 +500000,276.36450825409275 +510000,154.70857007419175 +520000,221.30321667856575 +530000,300.466819745278 +540000,262.14864803094235 +550000,288.35439011876775 +560000,112.11088696651143 +570000,297.3498663814083 +580000,325.78520686145566 +590000,272.8055583010585 +600000,297.38233778784496 +610000,257.18503085407303 +620000,228.66097134664886 +630000,243.20338010017903 +640000,168.39932765580983 +650000,317.8536500242353 +660000,283.54124805832726 +670000,255.5422394484774 +680000,306.35547266442546 +690000,176.82328032656147 +700000,251.49542932142398 +710000,253.90362420360034 +720000,228.4542535685886 +730000,184.6307279691104 +740000,274.6053571357479 +750000,247.10629024541862 +760000,264.2808788606579 +770000,162.63693042447744 +780000,221.40066037787088 +790000,211.0142876732304 +800000,304.7199170999576 +810000,206.12986663758488 +820000,158.54471873876892 +830000,230.8446633051748 +840000,210.33887956955715 +850000,118.97529889378613 +860000,350.7968979898222 +870000,217.3313853928242 +880000,204.63556445410978 +890000,219.6088593719431 +900000,243.1022189530035 +910000,237.5553470779818 +920000,118.7175257741955 +930000,186.8493836196389 +940000,253.0424074763292 +950000,286.00644502268267 +960000,210.23177434280515 +970000,262.76064735611305 +980000,182.68475204362701 +990000,114.51940872335305 +1000000,350.75954035818256 diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/model_best.pt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/model_best.pt index 796e4e5a7..a1d453320 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/model_best.pt and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/model_best.pt differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/model_latest.pt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/model_latest.pt index 3ea996a19..349e798c8 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/model_latest.pt and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/model_latest.pt differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-loss-approx_kl.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-loss-approx_kl.jpg index 4c7f4ec2d..ecbbcb3a1 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-loss-approx_kl.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-loss-approx_kl.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-loss-entropy_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-loss-entropy_loss.jpg index fd90148b4..e79b6d30d 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-loss-entropy_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-loss-entropy_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-loss-policy_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-loss-policy_loss.jpg index ea3f8896c..5168b0182 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-loss-policy_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-loss-policy_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-loss-value_loss.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-loss-value_loss.jpg index ebdbbbc04..a97954417 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-loss-value_loss.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-loss-value_loss.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-constraint_violation.jpg index b470fad37..2ea7f878f 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-ep_constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-ep_constraint_violation.jpg index 7c4265ece..d94dcd268 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-ep_constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-ep_constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-ep_length.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-ep_length.jpg index 157ca0db8..d51a72069 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-ep_length.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-ep_length.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-ep_return.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-ep_return.jpg index 465c69c24..17f5e9257 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-ep_return.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-ep_return.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-ep_reward.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-ep_reward.jpg index 7a154c579..2cb0ad9eb 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-ep_reward.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat-ep_reward.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-constraint_violation.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-constraint_violation.jpg index 510146a6b..f0e13b1b7 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-constraint_violation.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-constraint_violation.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-ep_length.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-ep_length.jpg index 788222cee..0834eac5c 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-ep_length.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-ep_length.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-ep_return.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-ep_return.jpg index 9bbbd29e9..fed3a0b84 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-ep_return.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-ep_return.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-ep_reward.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-ep_reward.jpg index ba57260cb..34a0e280a 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-ep_reward.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-ep_reward.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-mse.jpg b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-mse.jpg index 451f5e37d..539983511 100644 Binary files a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-mse.jpg and b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/plots/-stat_eval-mse.jpg differ diff --git a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/std_out.txt b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/std_out.txt index 8ab21b178..89ba9a603 100644 --- a/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/std_out.txt +++ b/experiments/mpsc/models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/std_out.txt @@ -1,2601 +1,2601 @@ -2023-10-19 14:52:03,946 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 40.586 +/- 12.723 -2023-10-19 14:52:03,961 : +2023-10-27 16:43:49,079 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 39.483 +/- 18.411 +2023-10-27 16:43:49,092 : -------------------------------------- | loss/ | | -| approx_kl | 0.0315 | -| entropy_loss | -3.65 | -| policy_loss | -0.00526 | -| value_loss | 6.61 | +| approx_kl | 0.0262 | +| entropy_loss | -3.73 | +| policy_loss | -0.014 | +| value_loss | 12 | | stat/ | | -| constraint_violation | 45 | -| ep_constraint_vio... | 1.4 | -| ep_length | 226 | -| ep_return | 58.6 | -| ep_reward | 0.235 | -| stat_eval/ | | | constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 40.6 | -| ep_reward | 0.162 | -| mse | 485 | +| ep_return | 92.1 | +| ep_reward | 0.369 | +| stat_eval/ | | +| constraint_violation | 0.4 | +| ep_length | 226 | +| ep_return | 39.5 | +| ep_reward | 0.158 | +| mse | 400 | | time/ | | | progress | 0.01 | | step | 1e+04 | -| step_time | 11.7 | +| step_time | 12.9 | -------------------------------------- -2023-10-19 14:54:21,279 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 40.078 +/- 12.268 -2023-10-19 14:54:21,299 : +2023-10-27 16:46:34,735 : Eval | ep_lengths 201.90 +/- 96.23 | ep_return 52.171 +/- 29.923 +2023-10-27 16:46:34,754 : -------------------------------------- | loss/ | | -| approx_kl | 0.019 | -| entropy_loss | -3.7 | -| policy_loss | -0.015 | -| value_loss | 9.48 | +| approx_kl | 0.0153 | +| entropy_loss | -3.73 | +| policy_loss | -0.0143 | +| value_loss | 9.04 | | stat/ | | -| constraint_violation | 45 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 57.9 | -| ep_reward | 0.231 | +| ep_return | 99.4 | +| ep_reward | 0.397 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 40.1 | -| ep_reward | 0.16 | -| mse | 463 | +| constraint_violation | 0.2 | +| ep_length | 202 | +| ep_return | 52.2 | +| ep_reward | 0.209 | +| mse | 277 | | time/ | | | progress | 0.02 | | step | 2e+04 | -| step_time | 11.5 | +| step_time | 13.1 | -------------------------------------- -2023-10-19 14:56:39,144 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 58.340 +/- 30.075 -2023-10-19 14:56:39,153 : +2023-10-27 16:49:19,369 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 44.342 +/- 19.976 +2023-10-27 16:49:19,370 : -------------------------------------- | loss/ | | -| approx_kl | 0.0131 | -| entropy_loss | -3.7 | -| policy_loss | -0.0233 | -| value_loss | 3.37 | +| approx_kl | 0.0193 | +| entropy_loss | -3.68 | +| policy_loss | -0.0114 | +| value_loss | 5.95 | | stat/ | | -| constraint_violation | 51 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 59.1 | -| ep_reward | 0.237 | -| stat_eval/ | | | constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 58.3 | -| ep_reward | 0.233 | -| mse | 441 | +| ep_return | 93.4 | +| ep_reward | 0.374 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 44.3 | +| ep_reward | 0.177 | +| mse | 384 | | time/ | | | progress | 0.03 | | step | 3e+04 | -| step_time | 11.6 | +| step_time | 14.4 | -------------------------------------- -2023-10-19 14:58:55,651 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 69.602 +/- 18.129 -2023-10-19 14:58:55,660 : +2023-10-27 16:52:02,308 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 50.787 +/- 23.505 +2023-10-27 16:52:02,309 : -------------------------------------- | loss/ | | -| approx_kl | 0.0306 | -| entropy_loss | -3.73 | -| policy_loss | -0.0138 | -| value_loss | 4.12 | +| approx_kl | 0.0327 | +| entropy_loss | -3.75 | +| policy_loss | -0.0119 | +| value_loss | 5.03 | | stat/ | | -| constraint_violation | 73 | -| ep_constraint_vio... | 0.7 | -| ep_length | 203 | -| ep_return | 68.6 | -| ep_reward | 0.274 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 97.9 | +| ep_reward | 0.392 | | stat_eval/ | | -| constraint_violation | 2.1 | +| constraint_violation | 0.1 | | ep_length | 250 | -| ep_return | 69.6 | -| ep_reward | 0.278 | -| mse | 385 | +| ep_return | 50.8 | +| ep_reward | 0.203 | +| mse | 408 | | time/ | | | progress | 0.04 | | step | 4e+04 | -| step_time | 11.2 | +| step_time | 12.5 | -------------------------------------- -2023-10-19 15:01:11,042 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 89.524 +/- 16.146 -2023-10-19 15:01:11,051 : +2023-10-27 16:54:44,651 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 70.996 +/- 28.161 +2023-10-27 16:54:44,670 : -------------------------------------- | loss/ | | -| approx_kl | 0.0206 | -| entropy_loss | -3.74 | -| policy_loss | -0.0111 | -| value_loss | 2.99 | +| approx_kl | 0.0195 | +| entropy_loss | -3.77 | +| policy_loss | -0.0186 | +| value_loss | 6.55 | | stat/ | | -| constraint_violation | 112 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 100 | -| ep_reward | 0.401 | +| ep_return | 104 | +| ep_reward | 0.417 | | stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 0.6 | | ep_length | 250 | -| ep_return | 89.5 | -| ep_reward | 0.358 | -| mse | 320 | +| ep_return | 71 | +| ep_reward | 0.284 | +| mse | 347 | | time/ | | | progress | 0.05 | | step | 5e+04 | -| step_time | 11.1 | +| step_time | 16.3 | -------------------------------------- -2023-10-19 15:03:23,422 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 78.728 +/- 30.879 -2023-10-19 15:03:23,423 : +2023-10-27 16:57:21,595 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 102.395 +/- 39.716 +2023-10-27 16:57:21,614 : -------------------------------------- | loss/ | | -| approx_kl | 0.0222 | -| entropy_loss | -3.78 | -| policy_loss | -0.00375 | -| value_loss | 1.86 | +| approx_kl | 0.0257 | +| entropy_loss | -3.79 | +| policy_loss | -0.0179 | +| value_loss | 13 | | stat/ | | -| constraint_violation | 148 | -| ep_constraint_vio... | 0.3 | -| ep_length | 200 | -| ep_return | 86.4 | -| ep_reward | 0.348 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 127 | +| ep_reward | 0.507 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 225 | -| ep_return | 78.7 | -| ep_reward | 0.315 | -| mse | 363 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 102 | +| ep_reward | 0.41 | +| mse | 341 | | time/ | | | progress | 0.06 | | step | 6e+04 | -| step_time | 11 | +| step_time | 12.3 | -------------------------------------- -2023-10-19 15:05:34,461 : Eval | ep_lengths 201.90 +/- 96.23 | ep_return 68.387 +/- 34.961 -2023-10-19 15:05:34,463 : +2023-10-27 17:00:00,668 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 70.204 +/- 34.306 +2023-10-27 17:00:00,670 : -------------------------------------- | loss/ | | -| approx_kl | 0.0308 | -| entropy_loss | -3.75 | -| policy_loss | -0.0167 | -| value_loss | 3.87 | +| approx_kl | 0.0335 | +| entropy_loss | -3.81 | +| policy_loss | -0.00264 | +| value_loss | 2.7 | | stat/ | | -| constraint_violation | 211 | -| ep_constraint_vio... | 1.4 | -| ep_length | 226 | -| ep_return | 111 | -| ep_reward | 0.444 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 141 | +| ep_reward | 0.564 | | stat_eval/ | | -| constraint_violation | 1 | -| ep_length | 202 | -| ep_return | 68.4 | -| ep_reward | 0.274 | -| mse | 322 | +| constraint_violation | 1.3 | +| ep_length | 225 | +| ep_return | 70.2 | +| ep_reward | 0.281 | +| mse | 397 | | time/ | | | progress | 0.07 | | step | 7e+04 | -| step_time | 11.2 | +| step_time | 11.9 | -------------------------------------- -2023-10-19 15:07:47,331 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 96.521 +/- 14.640 -2023-10-19 15:07:47,340 : +2023-10-27 17:02:42,462 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 94.936 +/- 26.421 +2023-10-27 17:02:42,475 : -------------------------------------- | loss/ | | -| approx_kl | 0.0199 | -| entropy_loss | -3.71 | -| policy_loss | -0.0167 | -| value_loss | 3.61 | +| approx_kl | 0.034 | +| entropy_loss | -3.85 | +| policy_loss | 0.00167 | +| value_loss | 8.52 | | stat/ | | -| constraint_violation | 249 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 98.7 | -| ep_reward | 0.395 | +| ep_return | 151 | +| ep_reward | 0.603 | | stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 1 | | ep_length | 250 | -| ep_return | 96.5 | -| ep_reward | 0.386 | -| mse | 353 | +| ep_return | 94.9 | +| ep_reward | 0.38 | +| mse | 247 | | time/ | | | progress | 0.08 | | step | 8e+04 | -| step_time | 11 | +| step_time | 14.6 | -------------------------------------- -2023-10-19 15:09:55,074 : Eval | ep_lengths 200.90 +/- 98.20 | ep_return 84.898 +/- 46.405 -2023-10-19 15:09:55,077 : +2023-10-27 17:05:22,004 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 114.826 +/- 19.941 +2023-10-27 17:05:22,012 : -------------------------------------- | loss/ | | -| approx_kl | 0.028 | -| entropy_loss | -3.63 | -| policy_loss | -0.012 | -| value_loss | 4.71 | +| approx_kl | 0.0278 | +| entropy_loss | -3.82 | +| policy_loss | -0.0151 | +| value_loss | 2.72 | | stat/ | | -| constraint_violation | 283 | -| ep_constraint_vio... | 1.5 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 99.8 | -| ep_reward | 0.399 | +| ep_return | 150 | +| ep_reward | 0.599 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 201 | -| ep_return | 84.9 | -| ep_reward | 0.34 | -| mse | 256 | +| constraint_violation | 1 | +| ep_length | 250 | +| ep_return | 115 | +| ep_reward | 0.459 | +| mse | 245 | | time/ | | | progress | 0.09 | | step | 9e+04 | -| step_time | 10.8 | +| step_time | 13.4 | -------------------------------------- -2023-10-19 15:12:06,355 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 95.712 +/- 22.921 -2023-10-19 15:12:06,356 : +2023-10-27 17:08:02,217 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 109.442 +/- 34.188 +2023-10-27 17:08:02,219 : -------------------------------------- | loss/ | | -| approx_kl | 0.0333 | -| entropy_loss | -3.61 | -| policy_loss | -0.00122 | -| value_loss | 2.98 | +| approx_kl | 0.0177 | +| entropy_loss | -3.78 | +| policy_loss | -0.0099 | +| value_loss | 4.49 | | stat/ | | -| constraint_violation | 307 | -| ep_constraint_vio... | 0.7 | -| ep_length | 226 | -| ep_return | 91.6 | -| ep_reward | 0.367 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 141 | +| ep_reward | 0.562 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 95.7 | -| ep_reward | 0.383 | -| mse | 350 | +| ep_return | 109 | +| ep_reward | 0.438 | +| mse | 310 | | time/ | | | progress | 0.1 | | step | 1e+05 | -| step_time | 10.9 | +| step_time | 13.8 | -------------------------------------- -2023-10-19 15:14:13,769 : Eval | ep_lengths 200.30 +/- 99.40 | ep_return 89.057 +/- 46.435 -2023-10-19 15:14:13,770 : +2023-10-27 17:10:31,777 : Eval | ep_lengths 201.30 +/- 97.42 | ep_return 86.057 +/- 44.772 +2023-10-27 17:10:31,789 : -------------------------------------- | loss/ | | -| approx_kl | 0.0357 | -| entropy_loss | -3.66 | -| policy_loss | -0.0108 | -| value_loss | 3.95 | +| approx_kl | 0.0189 | +| entropy_loss | -3.74 | +| policy_loss | -0.015 | +| value_loss | 2.48 | | stat/ | | -| constraint_violation | 349 | -| ep_constraint_vio... | 1.4 | -| ep_length | 226 | -| ep_return | 106 | -| ep_reward | 0.423 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 153 | +| ep_reward | 0.612 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 200 | -| ep_return | 89.1 | -| ep_reward | 0.356 | -| mse | 213 | +| ep_length | 201 | +| ep_return | 86.1 | +| ep_reward | 0.344 | +| mse | 358 | | time/ | | | progress | 0.11 | | step | 1.1e+05 | -| step_time | 10.8 | +| step_time | 12.7 | -------------------------------------- -2023-10-19 15:16:20,064 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 109.358 +/- 45.422 -2023-10-19 15:16:20,094 : +2023-10-27 17:13:04,785 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 112.094 +/- 31.037 +2023-10-27 17:13:04,786 : -------------------------------------- | loss/ | | -| approx_kl | 0.0316 | -| entropy_loss | -3.69 | -| policy_loss | -0.00229 | -| value_loss | 4.61 | +| approx_kl | 0.0213 | +| entropy_loss | -3.74 | +| policy_loss | -0.0128 | +| value_loss | 3.02 | | stat/ | | -| constraint_violation | 365 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 111 | -| ep_reward | 0.448 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 153 | +| ep_reward | 0.61 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 109 | -| ep_reward | 0.437 | -| mse | 253 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 112 | +| ep_reward | 0.448 | +| mse | 306 | | time/ | | | progress | 0.12 | | step | 1.2e+05 | -| step_time | 10.5 | +| step_time | 13 | -------------------------------------- -2023-10-19 15:18:27,203 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 114.471 +/- 17.481 -2023-10-19 15:18:27,214 : +2023-10-27 17:15:29,579 : Eval | ep_lengths 177.60 +/- 110.62 | ep_return 95.792 +/- 65.515 +2023-10-27 17:15:29,592 : -------------------------------------- | loss/ | | -| approx_kl | 0.0232 | -| entropy_loss | -3.69 | -| policy_loss | -0.0136 | -| value_loss | 1.84 | +| approx_kl | 0.029 | +| entropy_loss | -3.62 | +| policy_loss | -0.0156 | +| value_loss | 1.41 | | stat/ | | -| constraint_violation | 384 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 123 | -| ep_reward | 0.492 | +| ep_return | 162 | +| ep_reward | 0.65 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 114 | -| ep_reward | 0.458 | -| mse | 318 | +| constraint_violation | 1.2 | +| ep_length | 178 | +| ep_return | 95.8 | +| ep_reward | 0.383 | +| mse | 165 | | time/ | | | progress | 0.13 | | step | 1.3e+05 | -| step_time | 10.5 | +| step_time | 12.1 | -------------------------------------- -2023-10-19 15:20:32,930 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 134.282 +/- 21.376 -2023-10-19 15:20:32,939 : +2023-10-27 17:18:01,091 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 131.966 +/- 19.564 +2023-10-27 17:18:01,099 : -------------------------------------- | loss/ | | -| approx_kl | 0.0329 | -| entropy_loss | -3.76 | -| policy_loss | -0.026 | -| value_loss | 7.59 | +| approx_kl | 0.0309 | +| entropy_loss | -3.63 | +| policy_loss | -0.0134 | +| value_loss | 1.69 | | stat/ | | -| constraint_violation | 409 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 123 | -| ep_reward | 0.492 | +| ep_return | 168 | +| ep_reward | 0.671 | | stat_eval/ | | -| constraint_violation | 1.8 | +| constraint_violation | 0 | | ep_length | 250 | -| ep_return | 134 | -| ep_reward | 0.537 | -| mse | 283 | +| ep_return | 132 | +| ep_reward | 0.528 | +| mse | 274 | | time/ | | | progress | 0.14 | | step | 1.4e+05 | -| step_time | 10.2 | +| step_time | 11.7 | -------------------------------------- -2023-10-19 15:22:34,679 : Eval | ep_lengths 201.40 +/- 97.21 | ep_return 103.964 +/- 56.067 -2023-10-19 15:22:34,681 : +2023-10-27 17:20:32,535 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 139.486 +/- 14.881 +2023-10-27 17:20:32,543 : -------------------------------------- | loss/ | | -| approx_kl | 0.0205 | -| entropy_loss | -3.74 | -| policy_loss | -0.00908 | -| value_loss | 1.59 | +| approx_kl | 0.0306 | +| entropy_loss | -3.64 | +| policy_loss | -0.00758 | +| value_loss | 2.05 | | stat/ | | -| constraint_violation | 442 | -| ep_constraint_vio... | 1.3 | -| ep_length | 227 | -| ep_return | 130 | -| ep_reward | 0.522 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 166 | +| ep_reward | 0.663 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 104 | -| ep_reward | 0.416 | -| mse | 257 | +| constraint_violation | 1.2 | +| ep_length | 250 | +| ep_return | 139 | +| ep_reward | 0.558 | +| mse | 231 | | time/ | | | progress | 0.15 | | step | 1.5e+05 | -| step_time | 10.4 | +| step_time | 14.6 | -------------------------------------- -2023-10-19 15:24:39,726 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 128.249 +/- 18.809 -2023-10-19 15:24:39,728 : +2023-10-27 17:22:57,730 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 131.739 +/- 13.309 +2023-10-27 17:22:57,732 : -------------------------------------- | loss/ | | -| approx_kl | 0.0277 | -| entropy_loss | -3.72 | -| policy_loss | 0.000997 | -| value_loss | 3.43 | +| approx_kl | 0.016 | +| entropy_loss | -3.64 | +| policy_loss | -0.00904 | +| value_loss | 0.594 | | stat/ | | -| constraint_violation | 478 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 144 | -| ep_reward | 0.577 | +| ep_return | 170 | +| ep_reward | 0.679 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 128 | -| ep_reward | 0.513 | -| mse | 288 | +| ep_return | 132 | +| ep_reward | 0.527 | +| mse | 293 | | time/ | | | progress | 0.16 | | step | 1.6e+05 | -| step_time | 10.2 | +| step_time | 13.1 | -------------------------------------- -2023-10-19 15:26:43,083 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 135.539 +/- 19.221 -2023-10-19 15:26:43,093 : +2023-10-27 17:25:26,421 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 124.718 +/- 46.716 +2023-10-27 17:25:26,423 : -------------------------------------- | loss/ | | -| approx_kl | 0.027 | -| entropy_loss | -3.7 | -| policy_loss | -0.0122 | -| value_loss | 1.23 | +| approx_kl | 0.0281 | +| entropy_loss | -3.52 | +| policy_loss | 0.00852 | +| value_loss | 0.684 | | stat/ | | -| constraint_violation | 518 | -| ep_constraint_vio... | 3.4 | -| ep_length | 250 | -| ep_return | 133 | -| ep_reward | 0.533 | -| stat_eval/ | | | constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 136 | -| ep_reward | 0.542 | -| mse | 249 | +| ep_return | 166 | +| ep_reward | 0.664 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 125 | +| ep_reward | 0.499 | +| mse | 220 | | time/ | | | progress | 0.17 | | step | 1.7e+05 | -| step_time | 9.75 | +| step_time | 12.5 | -------------------------------------- -2023-10-19 15:28:41,606 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.680 +/- 25.870 -2023-10-19 15:28:41,637 : +2023-10-27 17:27:52,488 : Eval | ep_lengths 200.90 +/- 98.20 | ep_return 113.106 +/- 57.803 +2023-10-27 17:27:52,489 : -------------------------------------- | loss/ | | -| approx_kl | 0.035 | -| entropy_loss | -3.71 | -| policy_loss | -0.00776 | -| value_loss | 1.09 | +| approx_kl | 0.0303 | +| entropy_loss | -3.47 | +| policy_loss | -0.00903 | +| value_loss | 1.1 | | stat/ | | -| constraint_violation | 524 | -| ep_constraint_vio... | 0.5 | -| ep_length | 225 | -| ep_return | 135 | -| ep_reward | 0.541 | -| stat_eval/ | | | constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 147 | -| ep_reward | 0.587 | -| mse | 220 | +| ep_return | 164 | +| ep_reward | 0.656 | +| stat_eval/ | | +| constraint_violation | 0.2 | +| ep_length | 201 | +| ep_return | 113 | +| ep_reward | 0.452 | +| mse | 222 | | time/ | | | progress | 0.18 | | step | 1.8e+05 | -| step_time | 9.45 | +| step_time | 13.3 | -------------------------------------- -2023-10-19 15:30:36,144 : Eval | ep_lengths 178.20 +/- 109.68 | ep_return 91.576 +/- 62.081 -2023-10-19 15:30:36,146 : +2023-10-27 17:30:20,516 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 142.218 +/- 27.935 +2023-10-27 17:30:20,525 : -------------------------------------- | loss/ | | -| approx_kl | 0.019 | -| entropy_loss | -3.74 | -| policy_loss | -0.0168 | -| value_loss | 0.581 | +| approx_kl | 0.0282 | +| entropy_loss | -3.45 | +| policy_loss | -0.0207 | +| value_loss | 0.661 | | stat/ | | -| constraint_violation | 549 | +| constraint_violation | 0 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 130 | -| ep_reward | 0.519 | +| ep_return | 167 | +| ep_reward | 0.668 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 178 | -| ep_return | 91.6 | -| ep_reward | 0.366 | -| mse | 165 | +| constraint_violation | 1.7 | +| ep_length | 250 | +| ep_return | 142 | +| ep_reward | 0.569 | +| mse | 264 | | time/ | | | progress | 0.19 | | step | 1.9e+05 | -| step_time | 9.67 | +| step_time | 13.6 | -------------------------------------- -2023-10-19 15:32:36,392 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 133.108 +/- 15.044 -2023-10-19 15:32:36,393 : +2023-10-27 17:32:49,760 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.306 +/- 25.865 +2023-10-27 17:32:49,768 : -------------------------------------- | loss/ | | -| approx_kl | 0.0275 | -| entropy_loss | -3.82 | -| policy_loss | -0.00484 | -| value_loss | 2.52 | +| approx_kl | 0.0223 | +| entropy_loss | -3.42 | +| policy_loss | -0.0154 | +| value_loss | 0.315 | | stat/ | | -| constraint_violation | 568 | -| ep_constraint_vio... | 1 | +| constraint_violation | 0 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 148 | -| ep_reward | 0.592 | +| ep_return | 179 | +| ep_reward | 0.717 | | stat_eval/ | | -| constraint_violation | 0.1 | +| constraint_violation | 0.2 | | ep_length | 250 | -| ep_return | 133 | -| ep_reward | 0.532 | -| mse | 335 | +| ep_return | 146 | +| ep_reward | 0.585 | +| mse | 259 | | time/ | | | progress | 0.2 | | step | 2e+05 | -| step_time | 9.7 | +| step_time | 12.9 | -------------------------------------- -2023-10-19 15:34:34,174 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 134.482 +/- 18.884 -2023-10-19 15:34:34,175 : +2023-10-27 17:35:13,200 : Eval | ep_lengths 176.60 +/- 112.16 | ep_return 105.495 +/- 73.105 +2023-10-27 17:35:13,201 : -------------------------------------- | loss/ | | -| approx_kl | 0.033 | -| entropy_loss | -3.83 | -| policy_loss | -0.0112 | -| value_loss | 0.81 | +| approx_kl | 0.0338 | +| entropy_loss | -3.41 | +| policy_loss | -0.0124 | +| value_loss | 0.695 | | stat/ | | -| constraint_violation | 621 | -| ep_constraint_vio... | 0.6 | -| ep_length | 226 | -| ep_return | 118 | -| ep_reward | 0.471 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 1 | +| ep_constraint_vio... | 0.1 | | ep_length | 250 | -| ep_return | 134 | -| ep_reward | 0.538 | -| mse | 254 | +| ep_return | 183 | +| ep_reward | 0.732 | +| stat_eval/ | | +| constraint_violation | 0.3 | +| ep_length | 177 | +| ep_return | 105 | +| ep_reward | 0.422 | +| mse | 195 | | time/ | | | progress | 0.21 | | step | 2.1e+05 | -| step_time | 9.97 | +| step_time | 13 | -------------------------------------- -2023-10-19 15:36:29,332 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 126.249 +/- 45.293 -2023-10-19 15:36:29,333 : +2023-10-27 17:37:42,503 : Eval | ep_lengths 226.30 +/- 71.10 | ep_return 127.684 +/- 45.962 +2023-10-27 17:37:42,505 : -------------------------------------- | loss/ | | -| approx_kl | 0.0192 | -| entropy_loss | -3.82 | -| policy_loss | -0.0136 | -| value_loss | 1.25 | +| approx_kl | 0.0334 | +| entropy_loss | -3.43 | +| policy_loss | -0.00976 | +| value_loss | 0.555 | | stat/ | | -| constraint_violation | 632 | -| ep_constraint_vio... | 0.1 | -| ep_length | 227 | -| ep_return | 126 | -| ep_reward | 0.505 | +| constraint_violation | 1 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 177 | +| ep_reward | 0.708 | | stat_eval/ | | -| constraint_violation | 0.9 | -| ep_length | 225 | -| ep_return | 126 | -| ep_reward | 0.505 | -| mse | 221 | +| constraint_violation | 0.3 | +| ep_length | 226 | +| ep_return | 128 | +| ep_reward | 0.511 | +| mse | 239 | | time/ | | | progress | 0.22 | | step | 2.2e+05 | -| step_time | 9.37 | +| step_time | 13 | -------------------------------------- -2023-10-19 15:38:26,459 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 145.406 +/- 18.142 -2023-10-19 15:38:26,482 : +2023-10-27 17:40:14,895 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 139.002 +/- 27.664 +2023-10-27 17:40:14,897 : -------------------------------------- | loss/ | | -| approx_kl | 0.0287 | -| entropy_loss | -3.79 | -| policy_loss | -0.0136 | -| value_loss | 0.703 | +| approx_kl | 0.029 | +| entropy_loss | -3.43 | +| policy_loss | -0.00802 | +| value_loss | 0.811 | | stat/ | | -| constraint_violation | 673 | -| ep_constraint_vio... | 1.6 | -| ep_length | 202 | -| ep_return | 114 | -| ep_reward | 0.456 | +| constraint_violation | 1 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 179 | +| ep_reward | 0.715 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 145 | -| ep_reward | 0.582 | -| mse | 179 | +| ep_return | 139 | +| ep_reward | 0.556 | +| mse | 261 | | time/ | | | progress | 0.23 | | step | 2.3e+05 | -| step_time | 9.54 | +| step_time | 13.6 | -------------------------------------- -2023-10-19 15:40:22,237 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 127.860 +/- 47.926 -2023-10-19 15:40:22,239 : +2023-10-27 17:42:45,080 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 124.394 +/- 47.815 +2023-10-27 17:42:45,082 : -------------------------------------- | loss/ | | -| approx_kl | 0.0164 | -| entropy_loss | -3.78 | -| policy_loss | -0.0147 | -| value_loss | 2.14 | +| approx_kl | 0.0234 | +| entropy_loss | -3.46 | +| policy_loss | -0.00557 | +| value_loss | 0.595 | | stat/ | | -| constraint_violation | 696 | -| ep_constraint_vio... | 1.9 | -| ep_length | 205 | -| ep_return | 122 | -| ep_reward | 0.503 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 189 | +| ep_reward | 0.757 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 226 | -| ep_return | 128 | -| ep_reward | 0.511 | -| mse | 205 | +| constraint_violation | 0.9 | +| ep_length | 225 | +| ep_return | 124 | +| ep_reward | 0.498 | +| mse | 247 | | time/ | | | progress | 0.24 | | step | 2.4e+05 | -| step_time | 9.91 | +| step_time | 12 | -------------------------------------- -2023-10-19 15:42:19,673 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 141.006 +/- 19.220 -2023-10-19 15:42:19,675 : +2023-10-27 17:45:13,709 : Eval | ep_lengths 202.00 +/- 96.07 | ep_return 110.227 +/- 59.980 +2023-10-27 17:45:13,710 : -------------------------------------- | loss/ | | -| approx_kl | 0.0286 | -| entropy_loss | -3.78 | -| policy_loss | -0.00189 | -| value_loss | 2.17 | +| approx_kl | 0.0291 | +| entropy_loss | -3.48 | +| policy_loss | -0.01 | +| value_loss | 1.19 | | stat/ | | -| constraint_violation | 741 | -| ep_constraint_vio... | 2.3 | -| ep_length | 227 | -| ep_return | 135 | -| ep_reward | 0.54 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 141 | -| ep_reward | 0.564 | -| mse | 303 | +| ep_return | 185 | +| ep_reward | 0.739 | +| stat_eval/ | | +| constraint_violation | 0.2 | +| ep_length | 202 | +| ep_return | 110 | +| ep_reward | 0.441 | +| mse | 245 | | time/ | | | progress | 0.25 | | step | 2.5e+05 | -| step_time | 9.59 | +| step_time | 13.4 | -------------------------------------- -2023-10-19 15:44:18,351 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 151.690 +/- 21.718 -2023-10-19 15:44:18,361 : +2023-10-27 17:47:46,847 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 142.222 +/- 27.417 +2023-10-27 17:47:46,849 : -------------------------------------- | loss/ | | -| approx_kl | 0.0268 | -| entropy_loss | -3.77 | -| policy_loss | -0.00301 | -| value_loss | 0.814 | +| approx_kl | 0.0302 | +| entropy_loss | -3.48 | +| policy_loss | -0.0124 | +| value_loss | 0.607 | | stat/ | | -| constraint_violation | 754 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 133 | -| ep_reward | 0.535 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 174 | +| ep_reward | 0.697 | | stat_eval/ | | -| constraint_violation | 0.6 | +| constraint_violation | 0 | | ep_length | 250 | -| ep_return | 152 | -| ep_reward | 0.607 | -| mse | 210 | +| ep_return | 142 | +| ep_reward | 0.569 | +| mse | 258 | | time/ | | | progress | 0.26 | | step | 2.6e+05 | -| step_time | 9.47 | +| step_time | 13.5 | -------------------------------------- -2023-10-19 15:46:22,664 : Eval | ep_lengths 226.30 +/- 71.10 | ep_return 134.265 +/- 47.858 -2023-10-19 15:46:22,666 : +2023-10-27 17:50:18,575 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 138.765 +/- 32.370 +2023-10-27 17:50:18,577 : -------------------------------------- | loss/ | | -| approx_kl | 0.025 | -| entropy_loss | -3.7 | -| policy_loss | -0.00804 | -| value_loss | 0.41 | +| approx_kl | 0.0233 | +| entropy_loss | -3.41 | +| policy_loss | -0.00922 | +| value_loss | 0.411 | | stat/ | | -| constraint_violation | 758 | +| constraint_violation | 2 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 162 | -| ep_reward | 0.65 | +| ep_return | 170 | +| ep_reward | 0.68 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 134 | -| ep_reward | 0.537 | -| mse | 257 | +| constraint_violation | 3.1 | +| ep_length | 250 | +| ep_return | 139 | +| ep_reward | 0.555 | +| mse | 264 | | time/ | | | progress | 0.27 | | step | 2.7e+05 | -| step_time | 10.3 | +| step_time | 12.3 | -------------------------------------- -2023-10-19 15:48:22,707 : Eval | ep_lengths 177.00 +/- 111.52 | ep_return 99.040 +/- 68.244 -2023-10-19 15:48:22,709 : +2023-10-27 17:52:54,372 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 151.641 +/- 18.016 +2023-10-27 17:52:54,381 : -------------------------------------- | loss/ | | -| approx_kl | 0.0285 | -| entropy_loss | -3.68 | -| policy_loss | 0.000878 | -| value_loss | 0.978 | +| approx_kl | 0.0242 | +| entropy_loss | -3.4 | +| policy_loss | -0.0113 | +| value_loss | 0.688 | | stat/ | | -| constraint_violation | 815 | -| ep_constraint_vio... | 1.2 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 156 | -| ep_reward | 0.624 | +| ep_return | 176 | +| ep_reward | 0.704 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 177 | -| ep_return | 99 | -| ep_reward | 0.396 | -| mse | 209 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 152 | +| ep_reward | 0.607 | +| mse | 232 | | time/ | | | progress | 0.28 | | step | 2.8e+05 | -| step_time | 10.5 | +| step_time | 11.9 | -------------------------------------- -2023-10-19 15:50:24,518 : Eval | ep_lengths 202.80 +/- 94.43 | ep_return 107.069 +/- 55.041 -2023-10-19 15:50:24,520 : +2023-10-27 17:55:18,437 : Eval | ep_lengths 201.40 +/- 97.21 | ep_return 109.147 +/- 65.627 +2023-10-27 17:55:18,438 : -------------------------------------- | loss/ | | -| approx_kl | 0.027 | -| entropy_loss | -3.74 | -| policy_loss | -0.00823 | -| value_loss | 1.69 | +| approx_kl | 0.0253 | +| entropy_loss | -3.37 | +| policy_loss | -0.00991 | +| value_loss | 0.356 | | stat/ | | -| constraint_violation | 855 | -| ep_constraint_vio... | 1.1 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 162 | -| ep_reward | 0.646 | +| ep_return | 173 | +| ep_reward | 0.692 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 203 | -| ep_return | 107 | -| ep_reward | 0.428 | -| mse | 326 | +| constraint_violation | 0.3 | +| ep_length | 201 | +| ep_return | 109 | +| ep_reward | 0.437 | +| mse | 183 | | time/ | | | progress | 0.29 | | step | 2.9e+05 | -| step_time | 10 | +| step_time | 13.1 | -------------------------------------- -2023-10-19 15:52:25,754 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 119.497 +/- 45.021 -2023-10-19 15:52:25,756 : +2023-10-27 17:57:45,293 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 102.432 +/- 36.565 +2023-10-27 17:57:45,295 : -------------------------------------- | loss/ | | -| approx_kl | 0.0278 | -| entropy_loss | -3.7 | -| policy_loss | -0.0115 | -| value_loss | 0.572 | +| approx_kl | 0.0304 | +| entropy_loss | -3.39 | +| policy_loss | -0.0049 | +| value_loss | 0.52 | | stat/ | | -| constraint_violation | 882 | -| ep_constraint_vio... | 2.5 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 161 | -| ep_reward | 0.644 | +| ep_return | 175 | +| ep_reward | 0.699 | | stat_eval/ | | -| constraint_violation | 0.3 | +| constraint_violation | 0.2 | | ep_length | 225 | -| ep_return | 119 | -| ep_reward | 0.478 | -| mse | 310 | +| ep_return | 102 | +| ep_reward | 0.41 | +| mse | 315 | | time/ | | | progress | 0.3 | | step | 3e+05 | -| step_time | 9.74 | +| step_time | 11.7 | -------------------------------------- -2023-10-19 15:54:24,954 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 139.694 +/- 54.969 -2023-10-19 15:54:24,956 : +2023-10-27 18:00:19,154 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 117.590 +/- 37.651 +2023-10-27 18:00:19,156 : -------------------------------------- | loss/ | | -| approx_kl | 0.0357 | -| entropy_loss | -3.71 | -| policy_loss | 0.00368 | -| value_loss | 0.325 | +| approx_kl | 0.0326 | +| entropy_loss | -3.37 | +| policy_loss | -0.0111 | +| value_loss | 0.378 | | stat/ | | -| constraint_violation | 898 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 142 | -| ep_reward | 0.569 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 176 | +| ep_reward | 0.706 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 140 | -| ep_reward | 0.559 | -| mse | 243 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 118 | +| ep_reward | 0.47 | +| mse | 335 | | time/ | | | progress | 0.31 | | step | 3.1e+05 | -| step_time | 10.1 | +| step_time | 12.9 | -------------------------------------- -2023-10-19 15:56:26,186 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 139.355 +/- 30.000 -2023-10-19 15:56:26,187 : +2023-10-27 18:02:49,187 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 133.530 +/- 27.140 +2023-10-27 18:02:49,188 : -------------------------------------- | loss/ | | -| approx_kl | 0.0268 | -| entropy_loss | -3.73 | -| policy_loss | -0.00968 | -| value_loss | 1.01 | +| approx_kl | 0.0192 | +| entropy_loss | -3.44 | +| policy_loss | -0.00446 | +| value_loss | 0.896 | | stat/ | | -| constraint_violation | 931 | -| ep_constraint_vio... | 0.5 | -| ep_length | 225 | -| ep_return | 144 | -| ep_reward | 0.575 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 177 | +| ep_reward | 0.708 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 139 | -| ep_reward | 0.557 | -| mse | 321 | +| ep_return | 134 | +| ep_reward | 0.534 | +| mse | 329 | | time/ | | | progress | 0.32 | | step | 3.2e+05 | -| step_time | 9.85 | +| step_time | 12.7 | -------------------------------------- -2023-10-19 15:58:24,760 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 147.741 +/- 23.556 -2023-10-19 15:58:24,761 : +2023-10-27 18:05:14,529 : Eval | ep_lengths 201.90 +/- 96.28 | ep_return 110.098 +/- 66.394 +2023-10-27 18:05:14,530 : -------------------------------------- | loss/ | | -| approx_kl | 0.0348 | -| entropy_loss | -3.71 | -| policy_loss | -0.00114 | -| value_loss | 1.44 | +| approx_kl | 0.0312 | +| entropy_loss | -3.45 | +| policy_loss | -0.0102 | +| value_loss | 0.745 | | stat/ | | -| constraint_violation | 936 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 145 | -| ep_reward | 0.584 | -| stat_eval/ | | -| constraint_violation | 0.2 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 148 | -| ep_reward | 0.591 | -| mse | 261 | +| ep_return | 179 | +| ep_reward | 0.716 | +| stat_eval/ | | +| constraint_violation | 0.3 | +| ep_length | 202 | +| ep_return | 110 | +| ep_reward | 0.44 | +| mse | 222 | | time/ | | | progress | 0.33 | | step | 3.3e+05 | -| step_time | 9.83 | +| step_time | 12.1 | -------------------------------------- -2023-10-19 16:00:22,170 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 141.263 +/- 22.832 -2023-10-19 16:00:22,172 : +2023-10-27 18:07:44,615 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 133.767 +/- 19.114 +2023-10-27 18:07:44,617 : -------------------------------------- | loss/ | | -| approx_kl | 0.00986 | -| entropy_loss | -3.8 | -| policy_loss | -0.0214 | -| value_loss | 1.56 | +| approx_kl | 0.0281 | +| entropy_loss | -3.41 | +| policy_loss | -0.0108 | +| value_loss | 0.29 | | stat/ | | -| constraint_violation | 953 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 154 | -| ep_reward | 0.617 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 184 | +| ep_reward | 0.735 | | stat_eval/ | | -| constraint_violation | 0.4 | +| constraint_violation | 0.1 | | ep_length | 250 | -| ep_return | 141 | -| ep_reward | 0.565 | -| mse | 362 | +| ep_return | 134 | +| ep_reward | 0.535 | +| mse | 306 | | time/ | | | progress | 0.34 | | step | 3.4e+05 | -| step_time | 9.71 | +| step_time | 12 | -------------------------------------- -2023-10-19 16:02:18,992 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 162.145 +/- 28.287 -2023-10-19 16:02:19,002 : +2023-10-27 18:10:14,306 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 135.758 +/- 45.736 +2023-10-27 18:10:14,307 : -------------------------------------- | loss/ | | -| approx_kl | 0.0218 | -| entropy_loss | -3.82 | -| policy_loss | -0.00341 | -| value_loss | 0.736 | +| approx_kl | 0.0313 | +| entropy_loss | -3.28 | +| policy_loss | -0.00645 | +| value_loss | 0.364 | | stat/ | | -| constraint_violation | 969 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 135 | -| ep_reward | 0.541 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 180 | +| ep_reward | 0.72 | | stat_eval/ | | -| constraint_violation | 0.1 | +| constraint_violation | 0 | | ep_length | 250 | -| ep_return | 162 | -| ep_reward | 0.649 | -| mse | 159 | +| ep_return | 136 | +| ep_reward | 0.543 | +| mse | 359 | | time/ | | | progress | 0.35 | | step | 3.5e+05 | -| step_time | 9.58 | +| step_time | 12.9 | -------------------------------------- -2023-10-19 16:04:13,115 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 141.927 +/- 51.474 -2023-10-19 16:04:13,116 : +2023-10-27 18:12:44,242 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 144.397 +/- 29.645 +2023-10-27 18:12:44,243 : -------------------------------------- | loss/ | | -| approx_kl | 0.0308 | -| entropy_loss | -3.77 | -| policy_loss | -0.00659 | -| value_loss | 2.25 | +| approx_kl | 0.0257 | +| entropy_loss | -3.3 | +| policy_loss | -0.00989 | +| value_loss | 0.547 | | stat/ | | -| constraint_violation | 985 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 152 | -| ep_reward | 0.612 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 175 | +| ep_reward | 0.7 | | stat_eval/ | | | constraint_violation | 1.4 | -| ep_length | 226 | -| ep_return | 142 | -| ep_reward | 0.568 | -| mse | 157 | +| ep_length | 250 | +| ep_return | 144 | +| ep_reward | 0.578 | +| mse | 252 | | time/ | | | progress | 0.36 | | step | 3.6e+05 | -| step_time | 9.69 | +| step_time | 12.8 | -------------------------------------- -2023-10-19 16:06:08,334 : Eval | ep_lengths 226.70 +/- 69.90 | ep_return 133.229 +/- 46.682 -2023-10-19 16:06:08,336 : +2023-10-27 18:15:06,498 : Eval | ep_lengths 202.30 +/- 95.41 | ep_return 121.040 +/- 65.380 +2023-10-27 18:15:06,500 : -------------------------------------- | loss/ | | -| approx_kl | 0.0245 | -| entropy_loss | -3.76 | -| policy_loss | -0.0119 | -| value_loss | 1.15 | +| approx_kl | 0.0229 | +| entropy_loss | -3.31 | +| policy_loss | -0.00919 | +| value_loss | 0.297 | | stat/ | | -| constraint_violation | 1.03e+03 | -| ep_constraint_vio... | 1.4 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 149 | -| ep_reward | 0.597 | +| ep_return | 177 | +| ep_reward | 0.709 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | -| ep_return | 133 | -| ep_reward | 0.533 | -| mse | 194 | +| constraint_violation | 0.2 | +| ep_length | 202 | +| ep_return | 121 | +| ep_reward | 0.484 | +| mse | 177 | | time/ | | | progress | 0.37 | | step | 3.7e+05 | -| step_time | 9.76 | +| step_time | 11.9 | -------------------------------------- -2023-10-19 16:08:00,071 : Eval | ep_lengths 200.80 +/- 98.40 | ep_return 121.436 +/- 62.776 -2023-10-19 16:08:00,073 : +2023-10-27 18:17:33,103 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 125.076 +/- 53.103 +2023-10-27 18:17:33,104 : -------------------------------------- | loss/ | | -| approx_kl | 0.0109 | -| entropy_loss | -3.79 | -| policy_loss | -0.0131 | -| value_loss | 0.742 | +| approx_kl | 0.0361 | +| entropy_loss | -3.29 | +| policy_loss | 0.00213 | +| value_loss | 0.419 | | stat/ | | -| constraint_violation | 1.05e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 126 | -| ep_reward | 0.503 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 178 | +| ep_reward | 0.711 | | stat_eval/ | | | constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 121 | -| ep_reward | 0.486 | -| mse | 202 | +| ep_length | 226 | +| ep_return | 125 | +| ep_reward | 0.5 | +| mse | 231 | | time/ | | | progress | 0.38 | | step | 3.8e+05 | -| step_time | 9.45 | +| step_time | 13 | -------------------------------------- -2023-10-19 16:09:54,201 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 138.592 +/- 48.325 -2023-10-19 16:09:54,220 : +2023-10-27 18:20:11,993 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 139.513 +/- 24.377 +2023-10-27 18:20:11,995 : -------------------------------------- | loss/ | | -| approx_kl | 0.0383 | -| entropy_loss | -3.79 | -| policy_loss | 0.00219 | -| value_loss | 0.956 | +| approx_kl | 0.0201 | +| entropy_loss | -3.24 | +| policy_loss | -0.00264 | +| value_loss | 0.227 | | stat/ | | -| constraint_violation | 1.07e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 149 | -| ep_reward | 0.597 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 178 | +| ep_reward | 0.714 | | stat_eval/ | | -| constraint_violation | 1.3 | -| ep_length | 225 | -| ep_return | 139 | -| ep_reward | 0.554 | -| mse | 168 | +| constraint_violation | 1.5 | +| ep_length | 250 | +| ep_return | 140 | +| ep_reward | 0.558 | +| mse | 313 | | time/ | | | progress | 0.39 | | step | 3.9e+05 | -| step_time | 9.24 | +| step_time | 12.6 | -------------------------------------- -2023-10-19 16:11:51,282 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 139.810 +/- 20.752 -2023-10-19 16:11:51,302 : +2023-10-27 18:22:33,044 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 125.020 +/- 49.891 +2023-10-27 18:22:33,045 : -------------------------------------- | loss/ | | -| approx_kl | 0.0351 | -| entropy_loss | -3.78 | -| policy_loss | -0.00896 | -| value_loss | 0.964 | +| approx_kl | 0.036 | +| entropy_loss | -3.26 | +| policy_loss | -0.00511 | +| value_loss | 0.787 | | stat/ | | -| constraint_violation | 1.08e+03 | -| ep_constraint_vio... | 0.9 | -| ep_length | 201 | -| ep_return | 115 | -| ep_reward | 0.464 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 140 | -| ep_reward | 0.559 | -| mse | 289 | +| ep_return | 174 | +| ep_reward | 0.697 | +| stat_eval/ | | +| constraint_violation | 0.2 | +| ep_length | 226 | +| ep_return | 125 | +| ep_reward | 0.5 | +| mse | 246 | | time/ | | | progress | 0.4 | | step | 4e+05 | -| step_time | 9.75 | +| step_time | 12.1 | -------------------------------------- -2023-10-19 16:13:46,755 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.369 +/- 22.662 -2023-10-19 16:13:46,758 : +2023-10-27 18:24:59,305 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 144.399 +/- 25.648 +2023-10-27 18:24:59,306 : -------------------------------------- | loss/ | | -| approx_kl | 0.023 | -| entropy_loss | -3.77 | -| policy_loss | -0.0167 | -| value_loss | 0.615 | +| approx_kl | 0.0262 | +| entropy_loss | -3.25 | +| policy_loss | -0.00895 | +| value_loss | 0.403 | | stat/ | | -| constraint_violation | 1.1e+03 | +| constraint_violation | 2 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 157 | -| ep_reward | 0.628 | +| ep_return | 186 | +| ep_reward | 0.744 | | stat_eval/ | | -| constraint_violation | 0.1 | +| constraint_violation | 0 | | ep_length | 250 | -| ep_return | 152 | -| ep_reward | 0.609 | -| mse | 210 | +| ep_return | 144 | +| ep_reward | 0.578 | +| mse | 259 | | time/ | | | progress | 0.41 | | step | 4.1e+05 | -| step_time | 9.37 | +| step_time | 12.4 | -------------------------------------- -2023-10-19 16:15:42,940 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 158.118 +/- 22.389 -2023-10-19 16:15:42,941 : +2023-10-27 18:27:22,378 : Eval | ep_lengths 201.40 +/- 97.20 | ep_return 120.817 +/- 64.652 +2023-10-27 18:27:22,380 : -------------------------------------- | loss/ | | -| approx_kl | 0.0308 | -| entropy_loss | -3.73 | -| policy_loss | -0.011 | -| value_loss | 2.46 | +| approx_kl | 0.015 | +| entropy_loss | -3.21 | +| policy_loss | -0.0106 | +| value_loss | 0.304 | | stat/ | | -| constraint_violation | 1.1e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 134 | -| ep_reward | 0.539 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 158 | -| ep_reward | 0.632 | -| mse | 236 | +| ep_return | 175 | +| ep_reward | 0.702 | +| stat_eval/ | | +| constraint_violation | 1.4 | +| ep_length | 201 | +| ep_return | 121 | +| ep_reward | 0.483 | +| mse | 157 | | time/ | | | progress | 0.42 | | step | 4.2e+05 | -| step_time | 9.96 | +| step_time | 11 | -------------------------------------- -2023-10-19 16:17:35,573 : Eval | ep_lengths 200.80 +/- 98.40 | ep_return 105.813 +/- 56.409 -2023-10-19 16:17:35,575 : +2023-10-27 18:29:52,757 : Eval | ep_lengths 201.80 +/- 96.45 | ep_return 119.503 +/- 62.083 +2023-10-27 18:29:52,758 : -------------------------------------- | loss/ | | -| approx_kl | 0.0225 | -| entropy_loss | -3.78 | -| policy_loss | -0.0173 | -| value_loss | 3.12 | +| approx_kl | 0.0431 | +| entropy_loss | -3.16 | +| policy_loss | -0.0097 | +| value_loss | 0.181 | | stat/ | | -| constraint_violation | 1.12e+03 | -| ep_constraint_vio... | 0.6 | -| ep_length | 204 | -| ep_return | 107 | -| ep_reward | 0.432 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 182 | +| ep_reward | 0.728 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 201 | -| ep_return | 106 | -| ep_reward | 0.423 | -| mse | 214 | +| constraint_violation | 2.2 | +| ep_length | 202 | +| ep_return | 120 | +| ep_reward | 0.478 | +| mse | 189 | | time/ | | | progress | 0.43 | | step | 4.3e+05 | -| step_time | 9.82 | +| step_time | 13.1 | -------------------------------------- -2023-10-19 16:19:28,562 : Eval | ep_lengths 201.60 +/- 96.82 | ep_return 112.806 +/- 59.194 -2023-10-19 16:19:28,564 : +2023-10-27 18:32:21,592 : Eval | ep_lengths 226.50 +/- 70.50 | ep_return 130.631 +/- 49.742 +2023-10-27 18:32:21,594 : -------------------------------------- | loss/ | | -| approx_kl | 0.0349 | -| entropy_loss | -3.79 | -| policy_loss | -0.00303 | -| value_loss | 3.31 | +| approx_kl | 0.036 | +| entropy_loss | -3.17 | +| policy_loss | -0.0133 | +| value_loss | 0.173 | | stat/ | | -| constraint_violation | 1.15e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 124 | -| ep_reward | 0.513 | +| constraint_violation | 2 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 183 | +| ep_reward | 0.733 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 202 | -| ep_return | 113 | -| ep_reward | 0.451 | -| mse | 269 | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 131 | +| ep_reward | 0.523 | +| mse | 284 | | time/ | | | progress | 0.44 | | step | 4.4e+05 | -| step_time | 9.48 | +| step_time | 13.5 | -------------------------------------- -2023-10-19 16:21:22,946 : Eval | ep_lengths 226.10 +/- 71.70 | ep_return 131.730 +/- 47.330 -2023-10-19 16:21:22,948 : +2023-10-27 18:34:50,838 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 150.907 +/- 24.971 +2023-10-27 18:34:50,840 : -------------------------------------- | loss/ | | -| approx_kl | 0.0363 | -| entropy_loss | -3.81 | -| policy_loss | -0.00788 | -| value_loss | 3.39 | +| approx_kl | 0.0246 | +| entropy_loss | -3.17 | +| policy_loss | -0.00901 | +| value_loss | 0.17 | | stat/ | | -| constraint_violation | 1.16e+03 | +| constraint_violation | 3 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 157 | -| ep_reward | 0.628 | +| ep_return | 198 | +| ep_reward | 0.791 | | stat_eval/ | | -| constraint_violation | 1.5 | -| ep_length | 226 | -| ep_return | 132 | -| ep_reward | 0.527 | -| mse | 237 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 151 | +| ep_reward | 0.604 | +| mse | 211 | | time/ | | | progress | 0.45 | | step | 4.5e+05 | -| step_time | 9.25 | +| step_time | 13.2 | -------------------------------------- -2023-10-19 16:23:18,147 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 161.134 +/- 14.402 -2023-10-19 16:23:18,170 : +2023-10-27 18:37:22,726 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 157.848 +/- 25.011 +2023-10-27 18:37:22,736 : -------------------------------------- | loss/ | | -| approx_kl | 0.0207 | -| entropy_loss | -3.72 | -| policy_loss | -0.00388 | -| value_loss | 1.22 | +| approx_kl | 0.0308 | +| entropy_loss | -3.15 | +| policy_loss | -0.0181 | +| value_loss | 0.299 | | stat/ | | -| constraint_violation | 1.18e+03 | -| ep_constraint_vio... | 0.8 | -| ep_length | 226 | -| ep_return | 140 | -| ep_reward | 0.562 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 197 | +| ep_reward | 0.786 | | stat_eval/ | | -| constraint_violation | 0.7 | +| constraint_violation | 0.9 | | ep_length | 250 | -| ep_return | 161 | -| ep_reward | 0.645 | -| mse | 153 | +| ep_return | 158 | +| ep_reward | 0.631 | +| mse | 139 | | time/ | | | progress | 0.46 | | step | 4.6e+05 | -| step_time | 9.29 | +| step_time | 15.4 | -------------------------------------- -2023-10-19 16:25:13,732 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 157.645 +/- 17.083 -2023-10-19 16:25:13,733 : +2023-10-27 18:39:47,768 : Eval | ep_lengths 226.20 +/- 71.40 | ep_return 122.442 +/- 44.312 +2023-10-27 18:39:47,769 : -------------------------------------- | loss/ | | -| approx_kl | 0.0291 | -| entropy_loss | -3.67 | -| policy_loss | -0.0184 | -| value_loss | 0.564 | +| approx_kl | 0.0247 | +| entropy_loss | -3.16 | +| policy_loss | -0.0128 | +| value_loss | 1.39 | | stat/ | | -| constraint_violation | 1.21e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 138 | -| ep_reward | 0.552 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 158 | -| ep_reward | 0.631 | -| mse | 188 | +| ep_return | 174 | +| ep_reward | 0.696 | +| stat_eval/ | | +| constraint_violation | 0.5 | +| ep_length | 226 | +| ep_return | 122 | +| ep_reward | 0.49 | +| mse | 252 | | time/ | | | progress | 0.47 | | step | 4.7e+05 | -| step_time | 9.64 | +| step_time | 12.1 | -------------------------------------- -2023-10-19 16:27:09,590 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 144.680 +/- 16.995 -2023-10-19 16:27:09,591 : +2023-10-27 18:42:05,675 : Eval | ep_lengths 201.20 +/- 97.60 | ep_return 118.116 +/- 61.863 +2023-10-27 18:42:05,677 : -------------------------------------- | loss/ | | -| approx_kl | 0.0162 | -| entropy_loss | -3.71 | -| policy_loss | -0.0167 | -| value_loss | 1.73 | +| approx_kl | 0.0303 | +| entropy_loss | -3.18 | +| policy_loss | -0.00919 | +| value_loss | 0.694 | | stat/ | | -| constraint_violation | 1.23e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 156 | -| ep_reward | 0.626 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 145 | -| ep_reward | 0.579 | -| mse | 291 | +| ep_return | 174 | +| ep_reward | 0.694 | +| stat_eval/ | | +| constraint_violation | 1.6 | +| ep_length | 201 | +| ep_return | 118 | +| ep_reward | 0.472 | +| mse | 191 | | time/ | | | progress | 0.48 | | step | 4.8e+05 | -| step_time | 9.58 | +| step_time | 11.4 | -------------------------------------- -2023-10-19 16:29:06,519 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.277 +/- 17.265 -2023-10-19 16:29:06,520 : +2023-10-27 18:44:38,669 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 131.897 +/- 34.881 +2023-10-27 18:44:38,670 : -------------------------------------- | loss/ | | -| approx_kl | 0.0321 | -| entropy_loss | -3.74 | -| policy_loss | -0.00294 | -| value_loss | 0.664 | +| approx_kl | 0.0329 | +| entropy_loss | -3.16 | +| policy_loss | -0.0124 | +| value_loss | 0.168 | | stat/ | | -| constraint_violation | 1.25e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 137 | -| ep_reward | 0.548 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 175 | +| ep_reward | 0.7 | | stat_eval/ | | -| constraint_violation | 1.5 | +| constraint_violation | 0.2 | | ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.613 | -| mse | 201 | +| ep_return | 132 | +| ep_reward | 0.528 | +| mse | 396 | | time/ | | | progress | 0.49 | | step | 4.9e+05 | -| step_time | 9.69 | +| step_time | 12.9 | -------------------------------------- -2023-10-19 16:31:00,392 : Eval | ep_lengths 226.80 +/- 69.60 | ep_return 132.761 +/- 46.974 -2023-10-19 16:31:00,394 : +2023-10-27 18:47:08,977 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 138.696 +/- 14.098 +2023-10-27 18:47:08,978 : -------------------------------------- | loss/ | | -| approx_kl | 0.0165 | -| entropy_loss | -3.72 | -| policy_loss | -0.00855 | -| value_loss | 0.828 | +| approx_kl | 0.0377 | +| entropy_loss | -3.14 | +| policy_loss | -0.00625 | +| value_loss | 0.213 | | stat/ | | -| constraint_violation | 1.26e+03 | -| ep_constraint_vio... | 0.1 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 150 | -| ep_reward | 0.602 | +| ep_return | 187 | +| ep_reward | 0.748 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | -| ep_return | 133 | -| ep_reward | 0.531 | -| mse | 211 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 139 | +| ep_reward | 0.555 | +| mse | 276 | | time/ | | | progress | 0.5 | | step | 5e+05 | -| step_time | 9.54 | +| step_time | 12.4 | -------------------------------------- -2023-10-19 16:32:57,358 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 144.208 +/- 16.823 -2023-10-19 16:32:57,359 : +2023-10-27 18:49:44,060 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 134.915 +/- 51.737 +2023-10-27 18:49:44,061 : -------------------------------------- | loss/ | | -| approx_kl | 0.0321 | -| entropy_loss | -3.67 | -| policy_loss | -0.0136 | -| value_loss | 0.662 | +| approx_kl | 0.0396 | +| entropy_loss | -3.13 | +| policy_loss | -0.00953 | +| value_loss | 0.388 | | stat/ | | -| constraint_violation | 1.29e+03 | -| ep_constraint_vio... | 1.4 | -| ep_length | 226 | -| ep_return | 131 | -| ep_reward | 0.523 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 144 | -| ep_reward | 0.577 | -| mse | 261 | +| ep_return | 179 | +| ep_reward | 0.714 | +| stat_eval/ | | +| constraint_violation | 0.6 | +| ep_length | 225 | +| ep_return | 135 | +| ep_reward | 0.54 | +| mse | 155 | | time/ | | | progress | 0.51 | | step | 5.1e+05 | -| step_time | 9.58 | +| step_time | 12.4 | -------------------------------------- -2023-10-19 16:34:40,676 : Eval | ep_lengths 153.70 +/- 117.96 | ep_return 95.960 +/- 78.673 -2023-10-19 16:34:40,677 : +2023-10-27 18:52:13,761 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 137.595 +/- 54.890 +2023-10-27 18:52:13,762 : -------------------------------------- | loss/ | | -| approx_kl | 0.0323 | -| entropy_loss | -3.64 | -| policy_loss | -0.0128 | -| value_loss | 0.629 | +| approx_kl | 0.0299 | +| entropy_loss | -3.1 | +| policy_loss | -0.00483 | +| value_loss | 0.34 | | stat/ | | -| constraint_violation | 1.3e+03 | +| constraint_violation | 3 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 176 | -| ep_reward | 0.703 | +| ep_return | 191 | +| ep_reward | 0.766 | | stat_eval/ | | -| constraint_violation | 0.5 | -| ep_length | 154 | -| ep_return | 96 | -| ep_reward | 0.384 | -| mse | 109 | +| constraint_violation | 0.4 | +| ep_length | 226 | +| ep_return | 138 | +| ep_reward | 0.55 | +| mse | 221 | | time/ | | | progress | 0.52 | | step | 5.2e+05 | -| step_time | 8.69 | +| step_time | 12.9 | -------------------------------------- -2023-10-19 16:36:28,577 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 141.598 +/- 17.437 -2023-10-19 16:36:28,578 : +2023-10-27 18:54:51,063 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 135.764 +/- 26.534 +2023-10-27 18:54:51,064 : -------------------------------------- | loss/ | | -| approx_kl | 0.0205 | -| entropy_loss | -3.64 | -| policy_loss | 0.000346 | -| value_loss | 2.63 | +| approx_kl | 0.0345 | +| entropy_loss | -3.15 | +| policy_loss | -0.00585 | +| value_loss | 0.272 | | stat/ | | -| constraint_violation | 1.37e+03 | -| ep_constraint_vio... | 2.4 | -| ep_length | 202 | -| ep_return | 128 | -| ep_reward | 0.511 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 179 | +| ep_reward | 0.716 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 142 | -| ep_reward | 0.566 | -| mse | 280 | +| ep_return | 136 | +| ep_reward | 0.543 | +| mse | 300 | | time/ | | | progress | 0.53 | | step | 5.3e+05 | -| step_time | 9.05 | +| step_time | 12.5 | -------------------------------------- -2023-10-19 16:38:13,662 : Eval | ep_lengths 226.00 +/- 72.00 | ep_return 149.976 +/- 55.232 -2023-10-19 16:38:13,664 : +2023-10-27 18:57:16,378 : Eval | ep_lengths 226.30 +/- 71.10 | ep_return 129.863 +/- 48.761 +2023-10-27 18:57:16,379 : -------------------------------------- | loss/ | | -| approx_kl | 0.0271 | -| entropy_loss | -3.62 | -| policy_loss | 0.00185 | -| value_loss | 1.95 | +| approx_kl | 0.0289 | +| entropy_loss | -3.2 | +| policy_loss | -0.0149 | +| value_loss | 0.267 | | stat/ | | -| constraint_violation | 1.39e+03 | -| ep_constraint_vio... | 0.6 | -| ep_length | 201 | -| ep_return | 134 | -| ep_reward | 0.538 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 181 | +| ep_reward | 0.725 | | stat_eval/ | | -| constraint_violation | 2 | +| constraint_violation | 2.1 | | ep_length | 226 | -| ep_return | 150 | -| ep_reward | 0.6 | -| mse | 148 | +| ep_return | 130 | +| ep_reward | 0.519 | +| mse | 262 | | time/ | | | progress | 0.54 | | step | 5.4e+05 | -| step_time | 8.68 | +| step_time | 16.1 | -------------------------------------- -2023-10-19 16:40:00,881 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.609 +/- 22.024 -2023-10-19 16:40:00,882 : +2023-10-27 18:59:44,218 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 123.078 +/- 48.524 +2023-10-27 18:59:44,220 : -------------------------------------- | loss/ | | -| approx_kl | 0.0306 | -| entropy_loss | -3.61 | -| policy_loss | -0.0147 | -| value_loss | 0.483 | +| approx_kl | 0.023 | +| entropy_loss | -3.19 | +| policy_loss | 0.0011 | +| value_loss | 0.193 | | stat/ | | -| constraint_violation | 1.4e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 130 | -| ep_reward | 0.523 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 154 | -| ep_reward | 0.614 | -| mse | 248 | +| ep_return | 185 | +| ep_reward | 0.74 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 123 | +| ep_reward | 0.492 | +| mse | 288 | | time/ | | | progress | 0.55 | | step | 5.5e+05 | -| step_time | 8.8 | +| step_time | 13.2 | -------------------------------------- -2023-10-19 16:41:45,294 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.381 +/- 15.927 -2023-10-19 16:41:45,295 : +2023-10-27 19:02:11,400 : Eval | ep_lengths 201.60 +/- 96.80 | ep_return 142.518 +/- 78.298 +2023-10-27 19:02:11,402 : -------------------------------------- | loss/ | | -| approx_kl | 0.0304 | -| entropy_loss | -3.64 | -| policy_loss | 0.00152 | -| value_loss | 0.546 | +| approx_kl | 0.0244 | +| entropy_loss | -3.22 | +| policy_loss | -0.00634 | +| value_loss | 0.392 | | stat/ | | -| constraint_violation | 1.42e+03 | -| ep_constraint_vio... | 0.4 | -| ep_length | 226 | -| ep_return | 145 | -| ep_reward | 0.582 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 152 | -| ep_reward | 0.61 | -| mse | 221 | +| ep_return | 187 | +| ep_reward | 0.747 | +| stat_eval/ | | +| constraint_violation | 0.2 | +| ep_length | 202 | +| ep_return | 143 | +| ep_reward | 0.57 | +| mse | 112 | | time/ | | | progress | 0.56 | | step | 5.6e+05 | -| step_time | 8.69 | +| step_time | 14 | -------------------------------------- -2023-10-19 16:43:28,897 : Eval | ep_lengths 226.70 +/- 69.90 | ep_return 136.181 +/- 49.142 -2023-10-19 16:43:28,898 : +2023-10-27 19:04:42,462 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 139.245 +/- 19.428 +2023-10-27 19:04:42,464 : -------------------------------------- | loss/ | | -| approx_kl | 0.0233 | -| entropy_loss | -3.69 | -| policy_loss | -0.00495 | -| value_loss | 0.239 | +| approx_kl | 0.0295 | +| entropy_loss | -3.27 | +| policy_loss | -0.0181 | +| value_loss | 0.189 | | stat/ | | -| constraint_violation | 1.42e+03 | +| constraint_violation | 3 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 153 | -| ep_reward | 0.613 | +| ep_return | 185 | +| ep_reward | 0.739 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | -| ep_return | 136 | -| ep_reward | 0.545 | -| mse | 233 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 139 | +| ep_reward | 0.557 | +| mse | 297 | | time/ | | | progress | 0.57 | | step | 5.7e+05 | -| step_time | 8.69 | +| step_time | 12.6 | -------------------------------------- -2023-10-19 16:45:13,726 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 160.195 +/- 22.748 -2023-10-19 16:45:13,727 : +2023-10-27 19:07:10,830 : Eval | ep_lengths 202.60 +/- 94.82 | ep_return 97.946 +/- 54.251 +2023-10-27 19:07:10,831 : -------------------------------------- | loss/ | | -| approx_kl | 0.0333 | -| entropy_loss | -3.68 | -| policy_loss | 0.000148 | -| value_loss | 0.771 | +| approx_kl | 0.024 | +| entropy_loss | -3.27 | +| policy_loss | -0.0103 | +| value_loss | 0.246 | | stat/ | | -| constraint_violation | 1.47e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 149 | -| ep_reward | 0.6 | -| stat_eval/ | | -| constraint_violation | 1.3 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 160 | -| ep_reward | 0.641 | -| mse | 216 | +| ep_return | 187 | +| ep_reward | 0.748 | +| stat_eval/ | | +| constraint_violation | 0.5 | +| ep_length | 203 | +| ep_return | 97.9 | +| ep_reward | 0.392 | +| mse | 326 | | time/ | | | progress | 0.58 | | step | 5.8e+05 | -| step_time | 8.69 | +| step_time | 12.4 | -------------------------------------- -2023-10-19 16:46:58,272 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 151.820 +/- 17.250 -2023-10-19 16:46:58,274 : +2023-10-27 19:09:35,693 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 130.512 +/- 50.402 +2023-10-27 19:09:35,695 : -------------------------------------- | loss/ | | -| approx_kl | 0.0294 | -| entropy_loss | -3.59 | -| policy_loss | -0.0103 | -| value_loss | 1.58 | +| approx_kl | 0.0302 | +| entropy_loss | -3.21 | +| policy_loss | -0.0054 | +| value_loss | 0.492 | | stat/ | | -| constraint_violation | 1.5e+03 | -| ep_constraint_vio... | 0.8 | -| ep_length | 226 | -| ep_return | 135 | -| ep_reward | 0.54 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 152 | -| ep_reward | 0.607 | -| mse | 214 | +| ep_return | 186 | +| ep_reward | 0.746 | +| stat_eval/ | | +| constraint_violation | 0.6 | +| ep_length | 225 | +| ep_return | 131 | +| ep_reward | 0.522 | +| mse | 273 | | time/ | | | progress | 0.59 | | step | 5.9e+05 | -| step_time | 8.68 | +| step_time | 11.3 | -------------------------------------- -2023-10-19 16:48:40,909 : Eval | ep_lengths 225.30 +/- 74.10 | ep_return 135.344 +/- 46.589 -2023-10-19 16:48:40,910 : +2023-10-27 19:12:05,881 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 140.131 +/- 34.299 +2023-10-27 19:12:05,882 : -------------------------------------- | loss/ | | -| approx_kl | 0.0238 | -| entropy_loss | -3.61 | -| policy_loss | -0.00697 | -| value_loss | 0.464 | +| approx_kl | 0.0231 | +| entropy_loss | -3.19 | +| policy_loss | -0.0113 | +| value_loss | 0.538 | | stat/ | | -| constraint_violation | 1.53e+03 | -| ep_constraint_vio... | 2.7 | -| ep_length | 227 | -| ep_return | 144 | -| ep_reward | 0.578 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 192 | +| ep_reward | 0.767 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 225 | -| ep_return | 135 | -| ep_reward | 0.541 | -| mse | 187 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 140 | +| ep_reward | 0.561 | +| mse | 297 | | time/ | | | progress | 0.6 | | step | 6e+05 | -| step_time | 8.52 | +| step_time | 12.5 | -------------------------------------- -2023-10-19 16:50:26,231 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 139.765 +/- 18.833 -2023-10-19 16:50:26,232 : +2023-10-27 19:14:34,992 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 152.986 +/- 30.723 +2023-10-27 19:14:34,993 : -------------------------------------- | loss/ | | -| approx_kl | 0.0316 | -| entropy_loss | -3.61 | -| policy_loss | -0.00124 | -| value_loss | 0.447 | +| approx_kl | 0.031 | +| entropy_loss | -3.18 | +| policy_loss | -0.0145 | +| value_loss | 0.246 | | stat/ | | -| constraint_violation | 1.56e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 142 | -| ep_reward | 0.568 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 183 | +| ep_reward | 0.731 | | stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 0.6 | | ep_length | 250 | -| ep_return | 140 | -| ep_reward | 0.559 | -| mse | 300 | +| ep_return | 153 | +| ep_reward | 0.612 | +| mse | 257 | | time/ | | | progress | 0.61 | | step | 6.1e+05 | -| step_time | 8.62 | +| step_time | 12.3 | -------------------------------------- -2023-10-19 16:52:11,210 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 131.425 +/- 12.315 -2023-10-19 16:52:11,211 : +2023-10-27 19:17:01,165 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 133.195 +/- 50.716 +2023-10-27 19:17:01,166 : -------------------------------------- | loss/ | | -| approx_kl | 0.0315 | -| entropy_loss | -3.67 | -| policy_loss | -0.00503 | -| value_loss | 0.504 | +| approx_kl | 0.0279 | +| entropy_loss | -3.18 | +| policy_loss | -0.00776 | +| value_loss | 0.166 | | stat/ | | -| constraint_violation | 1.57e+03 | +| constraint_violation | 3 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 145 | -| ep_reward | 0.581 | +| ep_return | 190 | +| ep_reward | 0.761 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 250 | -| ep_return | 131 | -| ep_reward | 0.526 | -| mse | 268 | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 133 | +| ep_reward | 0.533 | +| mse | 229 | | time/ | | | progress | 0.62 | | step | 6.2e+05 | -| step_time | 8.62 | +| step_time | 11.7 | -------------------------------------- -2023-10-19 16:53:55,756 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 142.253 +/- 17.978 -2023-10-19 16:53:55,757 : +2023-10-27 19:19:30,285 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 145.356 +/- 16.855 +2023-10-27 19:19:30,287 : -------------------------------------- | loss/ | | -| approx_kl | 0.0262 | -| entropy_loss | -3.68 | -| policy_loss | -0.0226 | -| value_loss | 0.879 | +| approx_kl | 0.029 | +| entropy_loss | -3.15 | +| policy_loss | -0.00764 | +| value_loss | 0.829 | | stat/ | | -| constraint_violation | 1.61e+03 | -| ep_constraint_vio... | 2 | -| ep_length | 154 | -| ep_return | 90.6 | -| ep_reward | 0.368 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 193 | +| ep_reward | 0.77 | | stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 0.8 | | ep_length | 250 | -| ep_return | 142 | -| ep_reward | 0.569 | -| mse | 236 | +| ep_return | 145 | +| ep_reward | 0.581 | +| mse | 243 | | time/ | | | progress | 0.63 | | step | 6.3e+05 | -| step_time | 8.89 | +| step_time | 12.3 | -------------------------------------- -2023-10-19 16:55:40,940 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.161 +/- 12.254 -2023-10-19 16:55:40,942 : +2023-10-27 19:21:57,529 : Eval | ep_lengths 204.90 +/- 90.20 | ep_return 125.589 +/- 63.936 +2023-10-27 19:21:57,530 : -------------------------------------- | loss/ | | -| approx_kl | 0.0361 | -| entropy_loss | -3.66 | -| policy_loss | -0.00817 | -| value_loss | 0.518 | +| approx_kl | 0.0328 | +| entropy_loss | -3.15 | +| policy_loss | -0.0124 | +| value_loss | 0.219 | | stat/ | | -| constraint_violation | 1.64e+03 | -| ep_constraint_vio... | 1 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 146 | -| ep_reward | 0.586 | +| ep_return | 192 | +| ep_reward | 0.769 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 154 | -| ep_reward | 0.617 | -| mse | 138 | +| constraint_violation | 0.2 | +| ep_length | 205 | +| ep_return | 126 | +| ep_reward | 0.502 | +| mse | 168 | | time/ | | | progress | 0.64 | | step | 6.4e+05 | -| step_time | 8.47 | +| step_time | 11.9 | -------------------------------------- -2023-10-19 16:57:20,459 : Eval | ep_lengths 201.70 +/- 96.66 | ep_return 119.254 +/- 60.600 -2023-10-19 16:57:20,460 : +2023-10-27 19:24:31,481 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 140.449 +/- 27.524 +2023-10-27 19:24:31,482 : -------------------------------------- | loss/ | | -| approx_kl | 0.0272 | -| entropy_loss | -3.69 | -| policy_loss | -0.014 | -| value_loss | 0.49 | +| approx_kl | 0.0258 | +| entropy_loss | -3.21 | +| policy_loss | -0.0074 | +| value_loss | 0.795 | | stat/ | | -| constraint_violation | 1.64e+03 | +| constraint_violation | 3 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 164 | -| ep_reward | 0.656 | +| ep_return | 182 | +| ep_reward | 0.729 | | stat_eval/ | | -| constraint_violation | 0.6 | -| ep_length | 202 | -| ep_return | 119 | -| ep_reward | 0.477 | -| mse | 177 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 140 | +| ep_reward | 0.562 | +| mse | 318 | | time/ | | | progress | 0.65 | | step | 6.5e+05 | -| step_time | 8.18 | +| step_time | 12.4 | -------------------------------------- -2023-10-19 16:59:01,814 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 132.225 +/- 49.855 -2023-10-19 16:59:01,816 : +2023-10-27 19:27:03,377 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 127.441 +/- 29.203 +2023-10-27 19:27:03,379 : -------------------------------------- | loss/ | | -| approx_kl | 0.0302 | -| entropy_loss | -3.71 | -| policy_loss | -0.015 | -| value_loss | 1.05 | +| approx_kl | 0.0294 | +| entropy_loss | -3.2 | +| policy_loss | -0.00992 | +| value_loss | 0.166 | | stat/ | | -| constraint_violation | 1.64e+03 | +| constraint_violation | 3 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 152 | -| ep_reward | 0.608 | +| ep_return | 176 | +| ep_reward | 0.703 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 226 | -| ep_return | 132 | -| ep_reward | 0.529 | -| mse | 223 | +| constraint_violation | 2.2 | +| ep_length | 250 | +| ep_return | 127 | +| ep_reward | 0.51 | +| mse | 284 | | time/ | | | progress | 0.66 | | step | 6.6e+05 | -| step_time | 8.4 | +| step_time | 13.3 | -------------------------------------- -2023-10-19 17:00:42,652 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 139.959 +/- 51.294 -2023-10-19 17:00:42,653 : +2023-10-27 19:29:32,826 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 143.239 +/- 20.334 +2023-10-27 19:29:32,828 : -------------------------------------- | loss/ | | -| approx_kl | 0.013 | -| entropy_loss | -3.74 | -| policy_loss | -0.0174 | -| value_loss | 0.422 | +| approx_kl | 0.0336 | +| entropy_loss | -3.22 | +| policy_loss | -0.00766 | +| value_loss | 0.616 | | stat/ | | -| constraint_violation | 1.66e+03 | -| ep_constraint_vio... | 0.8 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 167 | -| ep_reward | 0.669 | +| ep_return | 178 | +| ep_reward | 0.71 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 140 | -| ep_reward | 0.56 | -| mse | 211 | +| constraint_violation | 0.8 | +| ep_length | 250 | +| ep_return | 143 | +| ep_reward | 0.573 | +| mse | 256 | | time/ | | | progress | 0.67 | | step | 6.7e+05 | -| step_time | 8.58 | +| step_time | 12.3 | -------------------------------------- -2023-10-19 17:02:25,653 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.696 +/- 18.982 -2023-10-19 17:02:25,655 : +2023-10-27 19:32:06,634 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 140.171 +/- 34.486 +2023-10-27 19:32:06,636 : -------------------------------------- | loss/ | | | approx_kl | 0.0272 | -| entropy_loss | -3.77 | -| policy_loss | -0.00525 | -| value_loss | 0.417 | +| entropy_loss | -3.26 | +| policy_loss | -0.00783 | +| value_loss | 0.202 | | stat/ | | -| constraint_violation | 1.7e+03 | -| ep_constraint_vio... | 1 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 155 | -| ep_reward | 0.62 | +| ep_return | 181 | +| ep_reward | 0.724 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 155 | -| ep_reward | 0.619 | -| mse | 199 | +| ep_return | 140 | +| ep_reward | 0.561 | +| mse | 306 | | time/ | | | progress | 0.68 | | step | 6.8e+05 | -| step_time | 8.4 | +| step_time | 12.8 | -------------------------------------- -2023-10-19 17:04:05,009 : Eval | ep_lengths 201.90 +/- 96.21 | ep_return 116.832 +/- 61.489 -2023-10-19 17:04:05,010 : +2023-10-27 19:34:38,839 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 158.727 +/- 33.843 +2023-10-27 19:34:38,849 : -------------------------------------- | loss/ | | -| approx_kl | 0.0325 | -| entropy_loss | -3.77 | -| policy_loss | -0.0072 | -| value_loss | 0.796 | +| approx_kl | 0.027 | +| entropy_loss | -3.2 | +| policy_loss | 0.00505 | +| value_loss | 0.492 | | stat/ | | -| constraint_violation | 1.72e+03 | +| constraint_violation | 3 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 158 | -| ep_reward | 0.63 | +| ep_return | 198 | +| ep_reward | 0.792 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 202 | -| ep_return | 117 | -| ep_reward | 0.467 | -| mse | 215 | +| constraint_violation | 1.1 | +| ep_length | 250 | +| ep_return | 159 | +| ep_reward | 0.635 | +| mse | 177 | | time/ | | | progress | 0.69 | | step | 6.9e+05 | -| step_time | 8.54 | +| step_time | 12 | -------------------------------------- -2023-10-19 17:05:46,256 : Eval | ep_lengths 227.20 +/- 68.40 | ep_return 138.549 +/- 49.431 -2023-10-19 17:05:46,257 : +2023-10-27 19:37:11,074 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 139.125 +/- 23.153 +2023-10-27 19:37:11,075 : -------------------------------------- | loss/ | | -| approx_kl | 0.0358 | -| entropy_loss | -3.76 | -| policy_loss | -0.00733 | -| value_loss | 0.441 | +| approx_kl | 0.0213 | +| entropy_loss | -3.2 | +| policy_loss | -0.00625 | +| value_loss | 0.749 | | stat/ | | -| constraint_violation | 1.73e+03 | +| constraint_violation | 3 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 147 | -| ep_reward | 0.587 | +| ep_return | 170 | +| ep_reward | 0.681 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | +| constraint_violation | 0 | +| ep_length | 250 | | ep_return | 139 | -| ep_reward | 0.554 | -| mse | 201 | +| ep_reward | 0.556 | +| mse | 251 | | time/ | | | progress | 0.7 | | step | 7e+05 | -| step_time | 8.45 | +| step_time | 13 | -------------------------------------- -2023-10-19 17:07:25,511 : Eval | ep_lengths 226.20 +/- 71.40 | ep_return 157.254 +/- 55.726 -2023-10-19 17:07:25,512 : +2023-10-27 19:39:43,144 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.968 +/- 31.142 +2023-10-27 19:39:43,146 : -------------------------------------- | loss/ | | -| approx_kl | 0.0351 | -| entropy_loss | -3.75 | -| policy_loss | -0.00488 | -| value_loss | 0.261 | +| approx_kl | 0.0296 | +| entropy_loss | -3.16 | +| policy_loss | -0.00708 | +| value_loss | 0.696 | | stat/ | | -| constraint_violation | 1.75e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 201 | -| ep_return | 116 | -| ep_reward | 0.465 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 182 | +| ep_reward | 0.729 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 157 | -| ep_reward | 0.629 | -| mse | 119 | +| constraint_violation | 1.3 | +| ep_length | 250 | +| ep_return | 147 | +| ep_reward | 0.588 | +| mse | 254 | | time/ | | | progress | 0.71 | | step | 7.1e+05 | -| step_time | 8.17 | +| step_time | 12.3 | -------------------------------------- -2023-10-19 17:09:06,419 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 125.246 +/- 47.724 -2023-10-19 17:09:06,420 : +2023-10-27 19:42:12,433 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 114.651 +/- 44.037 +2023-10-27 19:42:12,435 : -------------------------------------- | loss/ | | -| approx_kl | 0.0365 | -| entropy_loss | -3.77 | -| policy_loss | -0.0132 | -| value_loss | 0.728 | +| approx_kl | 0.0325 | +| entropy_loss | -3.14 | +| policy_loss | -0.0117 | +| value_loss | 0.326 | | stat/ | | -| constraint_violation | 1.76e+03 | -| ep_constraint_vio... | 0.4 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 151 | -| ep_reward | 0.604 | +| ep_return | 175 | +| ep_reward | 0.701 | | stat_eval/ | | -| constraint_violation | 2.1 | +| constraint_violation | 1.4 | | ep_length | 226 | -| ep_return | 125 | -| ep_reward | 0.501 | -| mse | 215 | +| ep_return | 115 | +| ep_reward | 0.459 | +| mse | 228 | | time/ | | | progress | 0.72 | | step | 7.2e+05 | -| step_time | 8.51 | +| step_time | 13.3 | -------------------------------------- -2023-10-19 17:10:44,849 : Eval | ep_lengths 202.60 +/- 94.84 | ep_return 114.665 +/- 61.155 -2023-10-19 17:10:44,851 : +2023-10-27 19:44:40,978 : Eval | ep_lengths 226.70 +/- 69.90 | ep_return 135.436 +/- 53.887 +2023-10-27 19:44:40,979 : -------------------------------------- | loss/ | | -| approx_kl | 0.0291 | -| entropy_loss | -3.72 | -| policy_loss | -0.0158 | -| value_loss | 0.448 | +| approx_kl | 0.0314 | +| entropy_loss | -3.15 | +| policy_loss | -0.0159 | +| value_loss | 0.485 | | stat/ | | -| constraint_violation | 1.76e+03 | +| constraint_violation | 3 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 159 | -| ep_reward | 0.637 | +| ep_return | 177 | +| ep_reward | 0.709 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 203 | -| ep_return | 115 | -| ep_reward | 0.459 | -| mse | 253 | +| constraint_violation | 0.1 | +| ep_length | 227 | +| ep_return | 135 | +| ep_reward | 0.542 | +| mse | 185 | | time/ | | | progress | 0.73 | | step | 7.3e+05 | -| step_time | 8.5 | +| step_time | 12.1 | -------------------------------------- -2023-10-19 17:12:26,012 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 157.019 +/- 28.248 -2023-10-19 17:12:26,013 : +2023-10-27 19:47:12,186 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 133.050 +/- 26.505 +2023-10-27 19:47:12,188 : -------------------------------------- | loss/ | | -| approx_kl | 0.0299 | -| entropy_loss | -3.7 | -| policy_loss | 0.000219 | -| value_loss | 0.654 | +| approx_kl | 0.0305 | +| entropy_loss | -3.21 | +| policy_loss | -0.0072 | +| value_loss | 0.276 | | stat/ | | -| constraint_violation | 1.77e+03 | +| constraint_violation | 3 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 156 | -| ep_reward | 0.626 | +| ep_return | 186 | +| ep_reward | 0.746 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 157 | -| ep_reward | 0.628 | -| mse | 257 | +| ep_return | 133 | +| ep_reward | 0.532 | +| mse | 275 | | time/ | | | progress | 0.74 | | step | 7.4e+05 | -| step_time | 8.37 | +| step_time | 11.4 | -------------------------------------- -2023-10-19 17:14:07,533 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 149.226 +/- 24.406 -2023-10-19 17:14:07,534 : +2023-10-27 19:49:42,162 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 115.600 +/- 41.048 +2023-10-27 19:49:42,163 : -------------------------------------- | loss/ | | -| approx_kl | 0.0334 | -| entropy_loss | -3.64 | -| policy_loss | -0.0114 | -| value_loss | 0.398 | +| approx_kl | 0.021 | +| entropy_loss | -3.21 | +| policy_loss | -0.00952 | +| value_loss | 0.349 | | stat/ | | -| constraint_violation | 1.81e+03 | -| ep_constraint_vio... | 1.4 | -| ep_length | 226 | -| ep_return | 139 | -| ep_reward | 0.556 | -| stat_eval/ | | -| constraint_violation | 0.4 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 149 | -| ep_reward | 0.597 | -| mse | 275 | +| ep_return | 180 | +| ep_reward | 0.72 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 225 | +| ep_return | 116 | +| ep_reward | 0.462 | +| mse | 247 | | time/ | | | progress | 0.75 | | step | 7.5e+05 | -| step_time | 8.04 | +| step_time | 11.3 | -------------------------------------- -2023-10-19 17:15:43,462 : Eval | ep_lengths 176.90 +/- 111.73 | ep_return 103.231 +/- 68.977 -2023-10-19 17:15:43,463 : +2023-10-27 19:52:11,423 : Eval | ep_lengths 225.60 +/- 73.20 | ep_return 117.094 +/- 49.438 +2023-10-27 19:52:11,424 : -------------------------------------- | loss/ | | -| approx_kl | 0.0328 | -| entropy_loss | -3.65 | -| policy_loss | -0.0143 | -| value_loss | 0.356 | +| approx_kl | 0.0232 | +| entropy_loss | -3.17 | +| policy_loss | -0.00643 | +| value_loss | 0.358 | | stat/ | | -| constraint_violation | 1.81e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 149 | -| ep_reward | 0.598 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 178 | +| ep_reward | 0.712 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 177 | -| ep_return | 103 | -| ep_reward | 0.413 | -| mse | 232 | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 117 | +| ep_reward | 0.468 | +| mse | 264 | | time/ | | | progress | 0.76 | | step | 7.6e+05 | -| step_time | 8.22 | +| step_time | 14 | -------------------------------------- -2023-10-19 17:17:24,608 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 154.333 +/- 17.998 -2023-10-19 17:17:24,609 : +2023-10-27 19:54:41,377 : Eval | ep_lengths 201.30 +/- 97.41 | ep_return 108.814 +/- 58.286 +2023-10-27 19:54:41,378 : -------------------------------------- | loss/ | | -| approx_kl | 0.0244 | -| entropy_loss | -3.62 | -| policy_loss | -0.00904 | -| value_loss | 0.823 | +| approx_kl | 0.0201 | +| entropy_loss | -3.19 | +| policy_loss | -0.0039 | +| value_loss | 0.547 | | stat/ | | -| constraint_violation | 1.82e+03 | +| constraint_violation | 3 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 151 | -| ep_reward | 0.605 | +| ep_return | 173 | +| ep_reward | 0.693 | | stat_eval/ | | -| constraint_violation | 0 | -| ep_length | 250 | -| ep_return | 154 | -| ep_reward | 0.617 | -| mse | 213 | +| constraint_violation | 0.2 | +| ep_length | 201 | +| ep_return | 109 | +| ep_reward | 0.435 | +| mse | 163 | | time/ | | | progress | 0.77 | | step | 7.7e+05 | -| step_time | 8.18 | +| step_time | 14.6 | -------------------------------------- -2023-10-19 17:19:06,053 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 149.502 +/- 18.116 -2023-10-19 17:19:06,054 : +2023-10-27 19:57:12,993 : Eval | ep_lengths 225.10 +/- 74.70 | ep_return 131.098 +/- 51.976 +2023-10-27 19:57:12,994 : -------------------------------------- | loss/ | | -| approx_kl | 0.0241 | -| entropy_loss | -3.58 | -| policy_loss | -0.00971 | -| value_loss | 0.464 | +| approx_kl | 0.0313 | +| entropy_loss | -3.18 | +| policy_loss | -0.0112 | +| value_loss | 0.242 | | stat/ | | -| constraint_violation | 1.83e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 157 | -| ep_reward | 0.63 | -| stat_eval/ | | -| constraint_violation | 0.3 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 150 | -| ep_reward | 0.598 | -| mse | 278 | +| ep_return | 181 | +| ep_reward | 0.724 | +| stat_eval/ | | +| constraint_violation | 1.3 | +| ep_length | 225 | +| ep_return | 131 | +| ep_reward | 0.524 | +| mse | 221 | | time/ | | | progress | 0.78 | | step | 7.8e+05 | -| step_time | 8.31 | +| step_time | 12.1 | -------------------------------------- -2023-10-19 17:20:45,647 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 134.449 +/- 19.523 -2023-10-19 17:20:45,648 : +2023-10-27 19:59:42,655 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 127.940 +/- 47.203 +2023-10-27 19:59:42,656 : -------------------------------------- | loss/ | | -| approx_kl | 0.0291 | -| entropy_loss | -3.61 | -| policy_loss | -0.0171 | -| value_loss | 0.466 | +| approx_kl | 0.0328 | +| entropy_loss | -3.18 | +| policy_loss | -0.01 | +| value_loss | 0.196 | | stat/ | | -| constraint_violation | 1.84e+03 | -| ep_constraint_vio... | 0.8 | -| ep_length | 225 | -| ep_return | 151 | -| ep_reward | 0.602 | -| stat_eval/ | | -| constraint_violation | 1.4 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 134 | -| ep_reward | 0.538 | -| mse | 347 | +| ep_return | 178 | +| ep_reward | 0.712 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 226 | +| ep_return | 128 | +| ep_reward | 0.512 | +| mse | 211 | | time/ | | | progress | 0.79 | | step | 7.9e+05 | -| step_time | 7.98 | +| step_time | 12.2 | -------------------------------------- -2023-10-19 17:22:23,006 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 150.905 +/- 58.904 -2023-10-19 17:22:23,007 : +2023-10-27 20:02:18,045 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 137.288 +/- 13.843 +2023-10-27 20:02:18,046 : -------------------------------------- | loss/ | | -| approx_kl | 0.03 | -| entropy_loss | -3.62 | -| policy_loss | 0.00113 | -| value_loss | 0.573 | +| approx_kl | 0.035 | +| entropy_loss | -3.17 | +| policy_loss | -0.0105 | +| value_loss | 0.168 | | stat/ | | -| constraint_violation | 1.89e+03 | -| ep_constraint_vio... | 1.7 | -| ep_length | 201 | -| ep_return | 126 | -| ep_reward | 0.506 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 185 | +| ep_reward | 0.741 | | stat_eval/ | | -| constraint_violation | 0.4 | -| ep_length | 226 | -| ep_return | 151 | -| ep_reward | 0.604 | -| mse | 204 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 137 | +| ep_reward | 0.549 | +| mse | 305 | | time/ | | | progress | 0.8 | | step | 8e+05 | -| step_time | 7.97 | +| step_time | 12.7 | -------------------------------------- -2023-10-19 17:24:06,873 : Eval | ep_lengths 225.70 +/- 72.90 | ep_return 129.037 +/- 50.851 -2023-10-19 17:24:06,874 : +2023-10-27 20:04:51,102 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 149.654 +/- 22.539 +2023-10-27 20:04:51,104 : -------------------------------------- | loss/ | | -| approx_kl | 0.031 | -| entropy_loss | -3.6 | -| policy_loss | -0.00429 | -| value_loss | 0.709 | +| approx_kl | 0.0371 | +| entropy_loss | -3.13 | +| policy_loss | -0.0129 | +| value_loss | 0.218 | | stat/ | | -| constraint_violation | 1.9e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 134 | -| ep_reward | 0.539 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 189 | +| ep_reward | 0.756 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 226 | -| ep_return | 129 | -| ep_reward | 0.516 | -| mse | 229 | +| ep_length | 250 | +| ep_return | 150 | +| ep_reward | 0.599 | +| mse | 206 | | time/ | | | progress | 0.81 | | step | 8.1e+05 | -| step_time | 8.63 | +| step_time | 11.3 | -------------------------------------- -2023-10-19 17:25:59,163 : Eval | ep_lengths 225.50 +/- 73.50 | ep_return 141.194 +/- 53.567 -2023-10-19 17:25:59,164 : +2023-10-27 20:07:20,583 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 148.082 +/- 23.374 +2023-10-27 20:07:20,585 : -------------------------------------- | loss/ | | -| approx_kl | 0.0327 | -| entropy_loss | -3.62 | -| policy_loss | -0.0136 | -| value_loss | 0.651 | +| approx_kl | 0.0374 | +| entropy_loss | -3.15 | +| policy_loss | 0.000752 | +| value_loss | 0.313 | | stat/ | | -| constraint_violation | 1.91e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 144 | -| ep_reward | 0.577 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 184 | +| ep_reward | 0.737 | | stat_eval/ | | -| constraint_violation | 1.7 | -| ep_length | 226 | -| ep_return | 141 | -| ep_reward | 0.565 | -| mse | 202 | +| constraint_violation | 0.7 | +| ep_length | 250 | +| ep_return | 148 | +| ep_reward | 0.592 | +| mse | 159 | | time/ | | | progress | 0.82 | | step | 8.2e+05 | -| step_time | 9.74 | --------------------------------------- - -2023-10-19 17:27:49,897 : Eval | ep_lengths 200.40 +/- 99.20 | ep_return 126.315 +/- 68.337 -2023-10-19 17:27:49,898 : ---------------------------------------- -| loss/ | | -| approx_kl | 0.0397 | -| entropy_loss | -3.59 | -| policy_loss | -0.000561 | -| value_loss | 0.896 | -| stat/ | | -| constraint_violation | 1.95e+03 | -| ep_constraint_vio... | 1.1 | -| ep_length | 226 | -| ep_return | 153 | -| ep_reward | 0.614 | -| stat_eval/ | | -| constraint_violation | 0.5 | -| ep_length | 200 | -| ep_return | 126 | -| ep_reward | 0.505 | -| mse | 142 | -| time/ | | -| progress | 0.83 | -| step | 8.3e+05 | -| step_time | 9 | ---------------------------------------- - -2023-10-19 17:29:42,694 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 131.375 +/- 48.024 -2023-10-19 17:29:42,695 : --------------------------------------- -| loss/ | | -| approx_kl | 0.0395 | -| entropy_loss | -3.55 | -| policy_loss | 5.77e-05 | -| value_loss | 1.8 | -| stat/ | | -| constraint_violation | 1.95e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 227 | -| ep_return | 142 | -| ep_reward | 0.569 | +| step_time | 13 | +-------------------------------------- + +2023-10-27 20:09:55,486 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 147.826 +/- 27.348 +2023-10-27 20:09:55,487 : +-------------------------------------- +| loss/ | | +| approx_kl | 0.0288 | +| entropy_loss | -3.14 | +| policy_loss | -0.0116 | +| value_loss | 0.294 | +| stat/ | | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 187 | +| ep_reward | 0.747 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 131 | -| ep_reward | 0.526 | -| mse | 228 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 148 | +| ep_reward | 0.591 | +| mse | 231 | +| time/ | | +| progress | 0.83 | +| step | 8.3e+05 | +| step_time | 11.9 | +-------------------------------------- + +2023-10-27 20:12:24,551 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 156.533 +/- 20.077 +2023-10-27 20:12:24,552 : +-------------------------------------- +| loss/ | | +| approx_kl | 0.0452 | +| entropy_loss | -3.15 | +| policy_loss | 0.00388 | +| value_loss | 0.152 | +| stat/ | | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 190 | +| ep_reward | 0.762 | +| stat_eval/ | | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 157 | +| ep_reward | 0.626 | +| mse | 210 | | time/ | | | progress | 0.84 | | step | 8.4e+05 | -| step_time | 9.21 | +| step_time | 11.4 | -------------------------------------- -2023-10-19 17:31:35,606 : Eval | ep_lengths 226.10 +/- 71.70 | ep_return 138.455 +/- 50.180 -2023-10-19 17:31:35,607 : +2023-10-27 20:14:43,778 : Eval | ep_lengths 151.60 +/- 120.52 | ep_return 82.560 +/- 69.752 +2023-10-27 20:14:43,779 : -------------------------------------- | loss/ | | -| approx_kl | 0.026 | -| entropy_loss | -3.58 | -| policy_loss | -0.00769 | -| value_loss | 0.311 | +| approx_kl | 0.0294 | +| entropy_loss | -3.16 | +| policy_loss | 0.000651 | +| value_loss | 0.204 | | stat/ | | -| constraint_violation | 1.96e+03 | -| ep_constraint_vio... | 0.9 | -| ep_length | 226 | -| ep_return | 138 | -| ep_reward | 0.554 | +| constraint_violation | 3 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 192 | +| ep_reward | 0.768 | | stat_eval/ | | -| constraint_violation | 1.7 | -| ep_length | 226 | -| ep_return | 138 | -| ep_reward | 0.554 | -| mse | 199 | +| constraint_violation | 0.4 | +| ep_length | 152 | +| ep_return | 82.6 | +| ep_reward | 0.33 | +| mse | 119 | | time/ | | | progress | 0.85 | | step | 8.5e+05 | -| step_time | 9.61 | +| step_time | 13 | -------------------------------------- -2023-10-19 17:33:28,779 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 170.317 +/- 22.861 -2023-10-19 17:33:28,789 : +2023-10-27 20:17:19,382 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 141.195 +/- 34.582 +2023-10-27 20:17:19,383 : -------------------------------------- | loss/ | | -| approx_kl | 0.0347 | -| entropy_loss | -3.57 | -| policy_loss | -0.00225 | -| value_loss | 0.841 | +| approx_kl | 0.0304 | +| entropy_loss | -3.14 | +| policy_loss | -0.00212 | +| value_loss | 0.289 | | stat/ | | -| constraint_violation | 1.99e+03 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 172 | -| ep_reward | 0.688 | +| ep_return | 183 | +| ep_reward | 0.733 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 170 | -| ep_reward | 0.681 | -| mse | 148 | +| ep_return | 141 | +| ep_reward | 0.565 | +| mse | 351 | | time/ | | | progress | 0.86 | | step | 8.6e+05 | -| step_time | 9.61 | +| step_time | 14.1 | -------------------------------------- -2023-10-19 17:35:18,649 : Eval | ep_lengths 226.00 +/- 72.00 | ep_return 132.843 +/- 54.713 -2023-10-19 17:35:18,650 : +2023-10-27 20:19:46,358 : Eval | ep_lengths 201.70 +/- 96.61 | ep_return 122.846 +/- 65.707 +2023-10-27 20:19:46,359 : -------------------------------------- | loss/ | | -| approx_kl | 0.032 | -| entropy_loss | -3.57 | -| policy_loss | 0.00153 | -| value_loss | 2.7 | +| approx_kl | 0.0325 | +| entropy_loss | -3.1 | +| policy_loss | -0.0049 | +| value_loss | 0.611 | | stat/ | | -| constraint_violation | 2.03e+03 | -| ep_constraint_vio... | 1.8 | -| ep_length | 201 | -| ep_return | 130 | -| ep_reward | 0.531 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 198 | +| ep_reward | 0.792 | | stat_eval/ | | -| constraint_violation | 0.3 | -| ep_length | 226 | -| ep_return | 133 | -| ep_reward | 0.531 | -| mse | 251 | +| constraint_violation | 1.3 | +| ep_length | 202 | +| ep_return | 123 | +| ep_reward | 0.491 | +| mse | 217 | | time/ | | | progress | 0.87 | | step | 8.7e+05 | -| step_time | 9.47 | +| step_time | 14.2 | -------------------------------------- -2023-10-19 17:37:06,314 : Eval | ep_lengths 225.80 +/- 72.60 | ep_return 142.767 +/- 54.363 -2023-10-19 17:37:06,315 : +2023-10-27 20:22:10,591 : Eval | ep_lengths 201.60 +/- 96.82 | ep_return 122.047 +/- 63.426 +2023-10-27 20:22:10,593 : -------------------------------------- | loss/ | | -| approx_kl | 0.0366 | -| entropy_loss | -3.61 | -| policy_loss | -0.00909 | -| value_loss | 0.503 | +| approx_kl | 0.033 | +| entropy_loss | -3.08 | +| policy_loss | -0.00864 | +| value_loss | 0.239 | | stat/ | | -| constraint_violation | 2.03e+03 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 157 | -| ep_reward | 0.628 | +| ep_return | 181 | +| ep_reward | 0.724 | | stat_eval/ | | -| constraint_violation | 0.6 | -| ep_length | 226 | -| ep_return | 143 | -| ep_reward | 0.571 | -| mse | 228 | +| constraint_violation | 0.2 | +| ep_length | 202 | +| ep_return | 122 | +| ep_reward | 0.488 | +| mse | 205 | | time/ | | | progress | 0.88 | | step | 8.8e+05 | -| step_time | 9.25 | +| step_time | 13.5 | -------------------------------------- -2023-10-19 17:38:54,647 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 137.337 +/- 54.643 -2023-10-19 17:38:54,648 : +2023-10-27 20:24:40,324 : Eval | ep_lengths 226.10 +/- 71.70 | ep_return 140.924 +/- 53.436 +2023-10-27 20:24:40,325 : -------------------------------------- | loss/ | | -| approx_kl | 0.0293 | -| entropy_loss | -3.62 | -| policy_loss | -0.00717 | -| value_loss | 0.773 | +| approx_kl | 0.0269 | +| entropy_loss | -3.03 | +| policy_loss | -0.00342 | +| value_loss | 0.297 | | stat/ | | -| constraint_violation | 2.05e+03 | -| ep_constraint_vio... | 1.1 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 152 | -| ep_reward | 0.61 | +| ep_return | 179 | +| ep_reward | 0.717 | | stat_eval/ | | -| constraint_violation | 0.1 | -| ep_length | 227 | -| ep_return | 137 | -| ep_reward | 0.549 | -| mse | 224 | +| constraint_violation | 0.3 | +| ep_length | 226 | +| ep_return | 141 | +| ep_reward | 0.564 | +| mse | 220 | | time/ | | | progress | 0.89 | | step | 8.9e+05 | -| step_time | 8.73 | +| step_time | 12.5 | -------------------------------------- -2023-10-19 17:40:42,230 : Eval | ep_lengths 226.90 +/- 69.30 | ep_return 139.075 +/- 52.323 -2023-10-19 17:40:42,231 : +2023-10-27 20:27:09,547 : Eval | ep_lengths 225.90 +/- 72.30 | ep_return 122.981 +/- 49.120 +2023-10-27 20:27:09,548 : -------------------------------------- | loss/ | | -| approx_kl | 0.0244 | -| entropy_loss | -3.58 | -| policy_loss | -0.0103 | -| value_loss | 0.909 | +| approx_kl | 0.0301 | +| entropy_loss | -3 | +| policy_loss | -0.00583 | +| value_loss | 0.152 | | stat/ | | -| constraint_violation | 2.05e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 156 | -| ep_reward | 0.624 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 185 | +| ep_reward | 0.742 | | stat_eval/ | | -| constraint_violation | 0.4 | -| ep_length | 227 | -| ep_return | 139 | -| ep_reward | 0.556 | -| mse | 220 | +| constraint_violation | 1.4 | +| ep_length | 226 | +| ep_return | 123 | +| ep_reward | 0.492 | +| mse | 243 | | time/ | | | progress | 0.9 | | step | 9e+05 | -| step_time | 8.64 | +| step_time | 11.5 | -------------------------------------- -2023-10-19 17:42:30,346 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 139.570 +/- 51.765 -2023-10-19 17:42:30,347 : +2023-10-27 20:29:40,119 : Eval | ep_lengths 226.60 +/- 70.20 | ep_return 128.584 +/- 48.233 +2023-10-27 20:29:40,120 : -------------------------------------- | loss/ | | -| approx_kl | 0.0303 | -| entropy_loss | -3.59 | -| policy_loss | -0.016 | -| value_loss | 1.39 | +| approx_kl | 0.0315 | +| entropy_loss | -2.91 | +| policy_loss | -0.0136 | +| value_loss | 0.28 | | stat/ | | -| constraint_violation | 2.08e+03 | +| constraint_violation | 4 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 176 | -| ep_reward | 0.702 | +| ep_return | 184 | +| ep_reward | 0.738 | | stat_eval/ | | | constraint_violation | 0.1 | -| ep_length | 225 | -| ep_return | 140 | -| ep_reward | 0.558 | -| mse | 207 | +| ep_length | 227 | +| ep_return | 129 | +| ep_reward | 0.514 | +| mse | 238 | | time/ | | | progress | 0.91 | | step | 9.1e+05 | -| step_time | 8.1 | +| step_time | 13 | -------------------------------------- -2023-10-19 17:44:17,070 : Eval | ep_lengths 226.70 +/- 69.90 | ep_return 130.411 +/- 45.139 -2023-10-19 17:44:17,071 : +2023-10-27 20:32:10,766 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 142.945 +/- 49.984 +2023-10-27 20:32:10,768 : -------------------------------------- | loss/ | | -| approx_kl | 0.0343 | -| entropy_loss | -3.61 | -| policy_loss | -0.0131 | -| value_loss | 0.447 | +| approx_kl | 0.0265 | +| entropy_loss | -2.91 | +| policy_loss | -0.0128 | +| value_loss | 0.256 | | stat/ | | -| constraint_violation | 2.09e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 225 | -| ep_return | 161 | -| ep_reward | 0.647 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 188 | +| ep_reward | 0.752 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 227 | -| ep_return | 130 | -| ep_reward | 0.522 | -| mse | 207 | +| constraint_violation | 2 | +| ep_length | 225 | +| ep_return | 143 | +| ep_reward | 0.572 | +| mse | 119 | | time/ | | | progress | 0.92 | | step | 9.2e+05 | -| step_time | 9.12 | +| step_time | 12.4 | -------------------------------------- -2023-10-19 17:46:02,220 : Eval | ep_lengths 202.20 +/- 95.67 | ep_return 114.537 +/- 61.446 -2023-10-19 17:46:02,221 : +2023-10-27 20:34:43,090 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 150.910 +/- 20.319 +2023-10-27 20:34:43,092 : -------------------------------------- | loss/ | | -| approx_kl | 0.039 | -| entropy_loss | -3.62 | -| policy_loss | -0.0113 | -| value_loss | 0.544 | +| approx_kl | 0.052 | +| entropy_loss | -2.91 | +| policy_loss | 0.00279 | +| value_loss | 1.35 | | stat/ | | -| constraint_violation | 2.11e+03 | -| ep_constraint_vio... | 0.1 | +| constraint_violation | 4 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 168 | -| ep_reward | 0.673 | +| ep_return | 185 | +| ep_reward | 0.739 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 202 | -| ep_return | 115 | -| ep_reward | 0.458 | -| mse | 241 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 151 | +| ep_reward | 0.604 | +| mse | 187 | | time/ | | | progress | 0.93 | | step | 9.3e+05 | -| step_time | 8.86 | +| step_time | 12 | -------------------------------------- -2023-10-19 17:47:52,436 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 143.536 +/- 24.040 -2023-10-19 17:47:52,437 : +2023-10-27 20:37:15,676 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.341 +/- 29.702 +2023-10-27 20:37:15,677 : -------------------------------------- | loss/ | | -| approx_kl | 0.02 | -| entropy_loss | -3.62 | -| policy_loss | -0.0165 | -| value_loss | 0.525 | +| approx_kl | 0.0167 | +| entropy_loss | -2.95 | +| policy_loss | -0.00639 | +| value_loss | 0.222 | | stat/ | | -| constraint_violation | 2.13e+03 | +| constraint_violation | 5 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 160 | -| ep_reward | 0.64 | +| ep_return | 180 | +| ep_reward | 0.719 | | stat_eval/ | | | constraint_violation | 0 | | ep_length | 250 | -| ep_return | 144 | -| ep_reward | 0.574 | -| mse | 282 | +| ep_return | 146 | +| ep_reward | 0.585 | +| mse | 253 | | time/ | | | progress | 0.94 | | step | 9.4e+05 | -| step_time | 8.74 | +| step_time | 13.7 | -------------------------------------- -2023-10-19 17:49:42,065 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 150.704 +/- 25.734 -2023-10-19 17:49:42,066 : +2023-10-27 20:39:41,953 : Eval | ep_lengths 227.30 +/- 68.10 | ep_return 119.495 +/- 44.206 +2023-10-27 20:39:41,954 : -------------------------------------- | loss/ | | -| approx_kl | 0.0366 | -| entropy_loss | -3.64 | -| policy_loss | -0.0123 | -| value_loss | 0.829 | +| approx_kl | 0.0304 | +| entropy_loss | -2.97 | +| policy_loss | -0.00859 | +| value_loss | 0.482 | | stat/ | | -| constraint_violation | 2.14e+03 | -| ep_constraint_vio... | 0.1 | -| ep_length | 226 | -| ep_return | 142 | -| ep_reward | 0.572 | -| stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 151 | -| ep_reward | 0.603 | -| mse | 293 | +| ep_return | 184 | +| ep_reward | 0.737 | +| stat_eval/ | | +| constraint_violation | 0.1 | +| ep_length | 227 | +| ep_return | 119 | +| ep_reward | 0.478 | +| mse | 286 | | time/ | | | progress | 0.95 | | step | 9.5e+05 | -| step_time | 9.38 | +| step_time | 11.8 | -------------------------------------- -2023-10-19 17:51:31,858 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.795 +/- 24.313 -2023-10-19 17:51:31,859 : +2023-10-27 20:42:18,644 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 149.238 +/- 16.614 +2023-10-27 20:42:18,646 : -------------------------------------- | loss/ | | -| approx_kl | 0.0269 | -| entropy_loss | -3.67 | -| policy_loss | -0.0134 | -| value_loss | 0.561 | +| approx_kl | 0.0372 | +| entropy_loss | -2.95 | +| policy_loss | -0.0173 | +| value_loss | 0.555 | | stat/ | | -| constraint_violation | 2.18e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 225 | -| ep_return | 149 | -| ep_reward | 0.596 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 179 | +| ep_reward | 0.715 | | stat_eval/ | | -| constraint_violation | 0 | +| constraint_violation | 0.2 | | ep_length | 250 | -| ep_return | 154 | -| ep_reward | 0.615 | -| mse | 224 | +| ep_return | 149 | +| ep_reward | 0.597 | +| mse | 210 | | time/ | | | progress | 0.96 | | step | 9.6e+05 | -| step_time | 8.87 | +| step_time | 12.7 | -------------------------------------- -2023-10-19 17:53:19,967 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 157.999 +/- 24.304 -2023-10-19 17:53:19,968 : +2023-10-27 20:44:46,160 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 126.026 +/- 46.216 +2023-10-27 20:44:46,162 : -------------------------------------- | loss/ | | -| approx_kl | 0.0317 | -| entropy_loss | -3.64 | -| policy_loss | -0.0182 | -| value_loss | 0.934 | +| approx_kl | 0.0261 | +| entropy_loss | -3.02 | +| policy_loss | -0.0156 | +| value_loss | 0.399 | | stat/ | | -| constraint_violation | 2.18e+03 | +| constraint_violation | 5 | | ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 168 | -| ep_reward | 0.673 | +| ep_return | 191 | +| ep_reward | 0.763 | | stat_eval/ | | -| constraint_violation | 1 | -| ep_length | 250 | -| ep_return | 158 | -| ep_reward | 0.632 | -| mse | 162 | +| constraint_violation | 1.7 | +| ep_length | 225 | +| ep_return | 126 | +| ep_reward | 0.504 | +| mse | 263 | | time/ | | | progress | 0.97 | | step | 9.7e+05 | -| step_time | 8.73 | +| step_time | 11.8 | -------------------------------------- -2023-10-19 17:55:06,324 : Eval | ep_lengths 225.20 +/- 74.40 | ep_return 128.726 +/- 46.190 -2023-10-19 17:55:06,325 : +2023-10-27 20:47:16,205 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 153.879 +/- 29.640 +2023-10-27 20:47:16,207 : -------------------------------------- | loss/ | | -| approx_kl | 0.0383 | -| entropy_loss | -3.64 | -| policy_loss | 0.00486 | -| value_loss | 0.276 | +| approx_kl | 0.0251 | +| entropy_loss | -3.01 | +| policy_loss | -0.015 | +| value_loss | 0.607 | | stat/ | | -| constraint_violation | 2.21e+03 | -| ep_constraint_vio... | 0.2 | -| ep_length | 200 | -| ep_return | 126 | -| ep_reward | 0.504 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | +| ep_length | 250 | +| ep_return | 184 | +| ep_reward | 0.735 | | stat_eval/ | | -| constraint_violation | 0.2 | -| ep_length | 225 | -| ep_return | 129 | -| ep_reward | 0.515 | -| mse | 218 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 154 | +| ep_reward | 0.616 | +| mse | 183 | | time/ | | | progress | 0.98 | | step | 9.8e+05 | -| step_time | 8.9 | +| step_time | 12.7 | -------------------------------------- -2023-10-19 17:56:53,794 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 165.897 +/- 20.721 -2023-10-19 17:56:53,795 : +2023-10-27 20:49:40,540 : Eval | ep_lengths 226.80 +/- 69.60 | ep_return 146.867 +/- 52.068 +2023-10-27 20:49:40,541 : -------------------------------------- | loss/ | | -| approx_kl | 0.0265 | -| entropy_loss | -3.65 | -| policy_loss | -0.00932 | -| value_loss | 0.986 | +| approx_kl | 0.034 | +| entropy_loss | -2.99 | +| policy_loss | -0.014 | +| value_loss | 0.773 | | stat/ | | -| constraint_violation | 2.24e+03 | -| ep_constraint_vio... | 0.5 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 163 | -| ep_reward | 0.651 | +| ep_return | 197 | +| ep_reward | 0.787 | | stat_eval/ | | -| constraint_violation | 0.9 | -| ep_length | 250 | -| ep_return | 166 | -| ep_reward | 0.664 | -| mse | 147 | +| constraint_violation | 0.2 | +| ep_length | 227 | +| ep_return | 147 | +| ep_reward | 0.587 | +| mse | 115 | | time/ | | | progress | 0.99 | | step | 9.9e+05 | -| step_time | 8.34 | +| step_time | 14 | -------------------------------------- -2023-10-19 17:58:20,538 : Checkpoint | ./models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/model_latest.pt -2023-10-19 17:58:38,010 : Eval | ep_lengths 225.40 +/- 73.80 | ep_return 131.187 +/- 48.805 -2023-10-19 17:58:38,011 : +2023-10-27 20:51:50,471 : Checkpoint | ./models/rl_models/quadrotor_3D/track/ppo/mpsf_sr_pen/model_latest.pt +2023-10-27 20:52:14,901 : Eval | ep_lengths 250.00 +/- 0.00 | ep_return 146.283 +/- 30.870 +2023-10-27 20:52:14,902 : -------------------------------------- | loss/ | | -| approx_kl | 0.0277 | -| entropy_loss | -3.67 | -| policy_loss | -0.00421 | -| value_loss | 0.224 | +| approx_kl | 0.03 | +| entropy_loss | -3.01 | +| policy_loss | -0.00743 | +| value_loss | 0.13 | | stat/ | | -| constraint_violation | 2.27e+03 | -| ep_constraint_vio... | 1.5 | +| constraint_violation | 5 | +| ep_constraint_vio... | 0 | | ep_length | 250 | -| ep_return | 158 | -| ep_reward | 0.633 | +| ep_return | 187 | +| ep_reward | 0.748 | | stat_eval/ | | -| constraint_violation | 0.7 | -| ep_length | 225 | -| ep_return | 131 | -| ep_reward | 0.525 | -| mse | 230 | +| constraint_violation | 0 | +| ep_length | 250 | +| ep_return | 146 | +| ep_reward | 0.585 | +| mse | 351 | | time/ | | | progress | 1 | | step | 1e+06 | -| step_time | 9.3 | +| step_time | 12 | -------------------------------------- diff --git a/experiments/mpsc/mpsc_experiment.sh b/experiments/mpsc/mpsc_experiment.sh index 447afe653..d93274f07 100755 --- a/experiments/mpsc/mpsc_experiment.sh +++ b/experiments/mpsc/mpsc_experiment.sh @@ -26,11 +26,11 @@ fi # SAFETY_FILTER='linear_mpsc' SAFETY_FILTER='nl_mpsc' -MPSC_COST='one_step_cost' +# MPSC_COST='one_step_cost' # MPSC_COST='constant_cost' # MPSC_COST='regularized_cost' # MPSC_COST='lqr_cost' -# MPSC_COST='precomputed_cost' +MPSC_COST='precomputed_cost' # MPSC_COST='learned_cost' MPSC_COST_HORIZON=2 diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_constraint_violations_uncert.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_constraint_violations_uncert.png index 831b67a34..db0549826 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_constraint_violations_uncert.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_constraint_violations_uncert.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_failed_uncert.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_failed_uncert.png index 3f6e43c0e..54836b785 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_failed_uncert.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_failed_uncert.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_length_uncert.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_length_uncert.png index 43ae46e9c..b4726ed0b 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_length_uncert.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_length_uncert.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_loss__approx_kl.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_loss__approx_kl.png index 3f965f914..ea1676fb6 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_loss__approx_kl.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_loss__approx_kl.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_loss__entropy_loss.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_loss__entropy_loss.png index 16cf18fbd..0d754cdf1 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_loss__entropy_loss.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_loss__entropy_loss.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_loss__policy_loss.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_loss__policy_loss.png index d9aaad632..d4d9ff7c8 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_loss__policy_loss.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_loss__policy_loss.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_loss__value_loss.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_loss__value_loss.png index 47de3a153..59bbb4ce4 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_loss__value_loss.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_loss__value_loss.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_magnitude_of_corrections.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_magnitude_of_corrections.png index d0f88b321..43986eaa7 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_magnitude_of_corrections.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_magnitude_of_corrections.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_max_correction.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_max_correction.png index a7b6eca27..830abd5a4 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_max_correction.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_max_correction.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_number_of_corrections.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_number_of_corrections.png index 3dc417a2a..9f372ed96 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_number_of_corrections.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_number_of_corrections.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_percent_magnitude_of_corrections.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_percent_magnitude_of_corrections.png index 3ef87bb04..b4d43f5e4 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_percent_magnitude_of_corrections.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_percent_magnitude_of_corrections.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_percent_max_correction.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_percent_max_correction.png index d29cb149e..c9cc16c92 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_percent_max_correction.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_percent_max_correction.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_reward_cert.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_reward_cert.png index 68c12e253..94dd76811 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_reward_cert.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_reward_cert.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_reward_uncert.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_reward_uncert.png index c6faaace6..cd031f90d 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_reward_uncert.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_reward_uncert.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_rmse_cert.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_rmse_cert.png index 7dea40bbb..1fe69c933 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_rmse_cert.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_rmse_cert.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_rmse_uncert.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_rmse_uncert.png index 906ec0074..33fc8836d 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_rmse_uncert.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_rmse_uncert.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_roc_cert.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_roc_cert.png index f3ee48a47..50cbdc877 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_roc_cert.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_roc_cert.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_roc_uncert.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_roc_uncert.png index d67c6edd9..95fa0fd91 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_roc_uncert.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_roc_uncert.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__constraint_violation.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__constraint_violation.png index 865b6f1e5..030cce9d2 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__constraint_violation.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__constraint_violation.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__ep_constraint_violation.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__ep_constraint_violation.png index d31a94338..8328326c0 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__ep_constraint_violation.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__ep_constraint_violation.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__ep_length.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__ep_length.png index f25ea923b..63084e5e7 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__ep_length.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__ep_length.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__ep_return.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__ep_return.png index f2467f1df..f2123d13f 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__ep_return.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__ep_return.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__ep_reward.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__ep_reward.png index 711856da5..22ec69811 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__ep_reward.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat__ep_reward.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__constraint_violation.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__constraint_violation.png index cf07932c7..63098d8c4 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__constraint_violation.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__constraint_violation.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__ep_length.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__ep_length.png index 686850635..87497829c 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__ep_length.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__ep_length.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__ep_return.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__ep_return.png index b3b668cbd..904bca225 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__ep_return.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__ep_return.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__ep_reward.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__ep_reward.png index ac5048e6a..6ae2f243b 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__ep_reward.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__ep_reward.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__mse.png b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__mse.png index 7f188f274..b40c8c4e1 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__mse.png and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/graphs/quadrotor_3D_track_stat_eval__mse.png differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf.pkl b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf.pkl index 0b8e9d3d4..365802a37 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf.pkl and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf.pkl differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_es.pkl b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_es.pkl index ca4e6a38f..a796c041a 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_es.pkl and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_es.pkl differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_es_pen.pkl b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_es_pen.pkl index 0fc218360..de42cf85f 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_es_pen.pkl and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_es_pen.pkl differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_pen.pkl b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_pen.pkl index 75f491f7d..b94c922be 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_pen.pkl and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_pen.pkl differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_sr.pkl b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_sr.pkl index 3c07e5cff..ccee32d7d 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_sr.pkl and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_sr.pkl differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_sr_es.pkl b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_sr_es.pkl index a4d16d2d1..bd9ca5523 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_sr_es.pkl and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_sr_es.pkl differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_sr_es_pen.pkl b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_sr_es_pen.pkl index 9f88436e1..93db7a07e 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_sr_es_pen.pkl and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_sr_es_pen.pkl differ diff --git a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_sr_pen.pkl b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_sr_pen.pkl index ef7132d20..67f6efaf6 100644 Binary files a/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_sr_pen.pkl and b/experiments/mpsc/results_mpsc/quadrotor_3D/track/ppo/results_quadrotor_3D_track_ppo_mpsf_sr_pen.pkl differ diff --git a/experiments/mpsc/train_all_models.sh b/experiments/mpsc/train_all_models.sh index 37a7b9f78..476a3ec9d 100755 --- a/experiments/mpsc/train_all_models.sh +++ b/experiments/mpsc/train_all_models.sh @@ -1,8 +1,8 @@ #!/bin/bash -for SYS in cartpole quadrotor_2D quadrotor_3D; do - for ALGO in ppo sac; do - for TASK in stab track; do - for SF in none mpsf; do +for SYS in quadrotor_3D; do + for ALGO in ppo; do + for TASK in track; do + for SF in mpsf; do for SAFE_RESET in True False; do for EARLY_STOP in True False; do for PENALIZE_SF in True False; do diff --git a/safe_control_gym/controllers/ppo/ppo.py b/safe_control_gym/controllers/ppo/ppo.py index db4955da0..7595c55a1 100644 --- a/safe_control_gym/controllers/ppo/ppo.py +++ b/safe_control_gym/controllers/ppo/ppo.py @@ -226,7 +226,7 @@ def run(self, env.add_tracker('constraint_values', 0, mode='queue') env.add_tracker('mse', 0, mode='queue') - obs, info = self.env_reset(env) + obs, info = env.reset() true_obs = obs obs = self.obs_normalizer(obs) ep_returns, ep_lengths = [], [] @@ -243,11 +243,6 @@ def run(self, certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info) if success and self.filter_train_actions is True: action = env.normalize_action(certified_action) - elif not success: - self.safety_filter.setup_optimizer() - certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info) - if success and self.filter_train_actions is True: - action = env.normalize_action(certified_action) action = np.atleast_2d(np.squeeze([action])) obs, rew, done, info = env.step(action) @@ -266,7 +261,7 @@ def run(self, assert 'episode' in info ep_returns.append(total_return) ep_lengths.append(info['episode']['l']) - obs, info = self.env_reset(env) + obs, info = env.reset() total_return = 0 true_obs = obs obs = self.obs_normalizer(obs) @@ -304,11 +299,6 @@ def train_step(self): certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info) if success and self.filter_train_actions is True: action = self.env.envs[0].normalize_action(certified_action) - # elif not success: - # self.safety_filter.setup_optimizer() - # certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info) - # if success and self.filter_train_actions is True: - # action = self.env.envs[0].normalize_action(certified_action) action = np.atleast_2d(np.squeeze([action])) next_obs, rew, done, info = self.env.step(action) @@ -434,16 +424,12 @@ def env_reset(self, env): self.safety_filter.reset_before_run() if self.use_safe_reset is True and self.safety_filter is not None: - - while success is not True: # or np.any(self.safety_filter.slack_prev > 10e-6): + while success is not True or np.any(self.safety_filter.slack_prev > 1e-4): obs, info = env.reset() info['current_step'] = 1 physical_action = self.env.envs[0].denormalize_action(act) unextended_obs = np.squeeze(obs)[:self.env.envs[0].symbolic.nx] self.safety_filter.reset_before_run() _, success = self.safety_filter.certify_action(unextended_obs, physical_action, info) - # if not success: - # self.safety_filter.setup_optimizer() - # _, success = self.safety_filter.certify_action(unextended_obs, physical_action, info) return obs, info diff --git a/safe_control_gym/controllers/sac/sac.py b/safe_control_gym/controllers/sac/sac.py index 48150d2c7..07bf4338a 100644 --- a/safe_control_gym/controllers/sac/sac.py +++ b/safe_control_gym/controllers/sac/sac.py @@ -234,7 +234,7 @@ def run(self, env=None, render=False, n_episodes=10, verbose=False, **kwargs): env.add_tracker('constraint_values', 0, mode='queue') env.add_tracker('mse', 0, mode='queue') - obs, info = self.env_reset(env) + obs, info = env.reset() true_obs = obs obs = self.obs_normalizer(obs) ep_returns, ep_lengths = [], [] @@ -254,11 +254,6 @@ def run(self, env=None, render=False, n_episodes=10, verbose=False, **kwargs): certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info) if success and self.filter_train_actions is True: applied_action = env.normalize_action(certified_action) - elif not success: - self.safety_filter.setup_optimizer() - certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info) - if success and self.filter_train_actions is True: - applied_action = env.normalize_action(certified_action) action = np.atleast_2d(np.squeeze([applied_action])) obs, rew, done, info = env.step(action) @@ -278,7 +273,7 @@ def run(self, env=None, render=False, n_episodes=10, verbose=False, **kwargs): assert 'episode' in info ep_returns.append(total_return) ep_lengths.append(info['episode']['l']) - obs, info = self.env_reset(env) + obs, info = env.reset() total_return = 0 true_obs = obs obs = self.obs_normalizer(obs) @@ -321,11 +316,6 @@ def train_step(self, **kwargs): certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info) if success and self.filter_train_actions is True: applied_action = self.env.envs[0].normalize_action(certified_action) - elif not success: - self.safety_filter.setup_optimizer() - certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info) - if success and self.filter_train_actions is True: - applied_action = self.env.envs[0].normalize_action(certified_action) action = np.atleast_2d(np.squeeze([applied_action])) next_obs, rew, done, info = self.env.step(action) @@ -486,16 +476,12 @@ def env_reset(self, env): self.safety_filter.reset_before_run() if self.use_safe_reset is True and self.safety_filter is not None: - - while success is not True or np.any(self.safety_filter.slack_prev > 10e-6): + while success is not True or np.any(self.safety_filter.slack_prev > 1e-4): obs, info = env.reset() info['current_step'] = 1 physical_action = self.env.envs[0].denormalize_action(act) unextended_obs = np.squeeze(obs)[:self.env.envs[0].symbolic.nx] self.safety_filter.reset_before_run() _, success = self.safety_filter.certify_action(unextended_obs, physical_action, info) - if not success: - self.safety_filter.setup_optimizer() - _, success = self.safety_filter.certify_action(unextended_obs, physical_action, info) return obs, info diff --git a/safe_control_gym/safety_filters/mpsc/mpsc.py b/safe_control_gym/safety_filters/mpsc/mpsc.py index c891cd3c3..ee7c8521b 100644 --- a/safe_control_gym/safety_filters/mpsc/mpsc.py +++ b/safe_control_gym/safety_filters/mpsc/mpsc.py @@ -259,10 +259,11 @@ def solve_acados_optimization(self, try: action = ocp_solver.solve_for_x0(x0_bar=obs) self.cost_prev = ocp_solver.get_cost() - self.slack_prev = ocp_solver.get(0, 'su') + self.slack_prev = np.zeros((self.horizon, self.p)) x_val = np.zeros((self.horizon + 1, self.model.nx)) u_val = np.zeros((self.horizon, self.model.nu)) for i in range(self.horizon): + self.slack_prev[i, :] = ocp_solver.get(i, 'su') x_val[i, :] = ocp_solver.get(i, 'x') u_val[i, :] = ocp_solver.get(i, 'u') x_val[self.horizon, :] = ocp_solver.get(self.horizon, 'x') diff --git a/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/precomputed_cost.py b/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/precomputed_cost.py index 35e6fc65b..48dc984c9 100644 --- a/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/precomputed_cost.py +++ b/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/precomputed_cost.py @@ -110,10 +110,8 @@ def calculate_unsafe_path(self, obs, uncertified_action, iteration): 'current_step': next_step, 'constraint_values': np.concatenate([self.get_constraint_value(con, obs) for con in self.env.constraints.state_constraints]) } - if self.uncertified_controller.training: - action = self.uncertified_controller.select_action(obs=extended_obs, info=info, training=self.uncertified_controller.training) - else: - action = self.uncertified_controller.select_action(obs=extended_obs, info=info) + + action = self.uncertified_controller.select_action(obs=extended_obs, info=info) if uncert_env.NORMALIZED_RL_ACTION_SPACE: if self.env.NAME == Environment.CARTPOLE: diff --git a/safe_control_gym/safety_filters/mpsc/nl_mpsc.py b/safe_control_gym/safety_filters/mpsc/nl_mpsc.py index 4e6208008..28a43126b 100644 --- a/safe_control_gym/safety_filters/mpsc/nl_mpsc.py +++ b/safe_control_gym/safety_filters/mpsc/nl_mpsc.py @@ -1033,13 +1033,12 @@ def setup_acados_optimizer(self): ocp.solver_options.hpipm_mode = 'BALANCE' ocp.solver_options.integrator_type = 'ERK' ocp.solver_options.nlp_solver_type = 'SQP_RTI' - ocp.solver_options.nlp_solver_max_iter = 10 # set prediction horizon ocp.solver_options.tf = self.dt * self.horizon solver_json = 'acados_ocp_mpsf.json' - ocp_solver = AcadosOcpSolver(ocp, json_file=solver_json, generate=True, build=True) + ocp_solver = AcadosOcpSolver(ocp, json_file=solver_json, generate=False, build=False) for stage in range(self.mpsc_cost_horizon): ocp_solver.cost_set(stage, 'W', (self.cost_function.decay_factor**stage) * ocp.cost.W)