Without tf.function

/Users/ikkamens/open_source/PILCO/venv/bin/python /Users/ikkamens/open_source/PILCO/examples/safe_cars_run.py
Action: [0.16978966]
State : [-5.44107607 1.09639354 -4.43702446 1.0672268 ]
Return so far: -1
Action: [-0.32732755]
State : [-4.8928793 0.93218157 -3.90341106 1.0672268 ]
Return so far: -2
Action: [-0.03697518]
State : [-4.42678851 0.91322789 -3.36979767 1.0672268 ]
Return so far: -3
Action: [0.38949335]
State : [-3.97017457 1.10751795 -2.83618427 1.0672268 ]
Return so far: -4
Action: [0.3099153]
State : [-3.4164156 1.26192184 -2.30257087 1.0672268 ]
Return so far: -5
Action: [-0.29881603]
State : [-2.78545468 1.11188287 -1.76895747 1.0672268 ]
Return so far: -6
Action: [0.39068902]
State : [-2.22951324 1.30667143 -1.23534407 1.0672268 ]
Return so far: -7
Action: [-0.3262917]
State : [-1.57617753 1.14287224 -0.70173068 1.0672268 ]
Return so far: -8
Action: [0.28691477]
State : [-1.0047414 1.28575819 -0.16811728 1.0672268 ]
Return so far: -9
Action: [0.30868325]
State : [-0.36186231 1.43945693 0.36549612 1.0672268 ]
Return so far: -10
Action: [-0.31188625]
State : [0.35786616 1.28279408 0.89910952 1.0672268 ]
Return so far: -9
Action: [-0.3906475]
State : [0.9992632 1.08682893 1.43272292 1.0672268 ]
Return so far: -8
Action: [-0.27364543]
State : [1.54267766 0.9494628 1.96633631 1.0672268 ]
Return so far: -7
Action: [-0.04592199]
State : [2.01740907 0.92602708 2.49994971 1.0672268 ]
Return so far: -6
Action: [0.07318889]
State : [2.48042261 0.96215851 3.03356311 1.0672268 ]
Return so far: -5
Action: [-0.30441505]
State : [2.96150186 0.80946991 3.56717651 1.0672268 ]
Return so far: -4
Action: [-0.24525076]
State : [3.36623681 0.68643979 4.10078991 1.0672268 ]
Return so far: -3
Action: [0.38208273]
State : [3.70945671 0.87713794 4.6344033 1.0672268 ]
Return so far: -2
Action: [0.31640488]
State : [4.14802567 1.03490181 5.1680167 1.0672268 ]
Return so far: -1
Action: [0.18868062]
State : [4.66547658 1.12872466 5.7016301 1.0672268 ]
Return so far: 0
Action: [0.35749334]
State : [5.22983891 1.30690697 6.2352435 1.0672268 ]
Return so far: 1
Action: [0.22143494]
State : [5.8832924 1.41697099 6.7688569 1.0672268 ]
Return so far: 2
Action: [0.35260844]
State : [6.59177789 1.59256672 7.30247029 1.0672268 ]
Return so far: 3
Action: [0.23553482]
State : [7.38806125 1.70953785 7.83608369 1.0672268 ]
Return so far: 4
Action: [0.02151741]
State : [8.24283017 1.71944178 8.36969709 1.0672268 ]
Return so far: 5
Action: [-0.37917536]
State : [-5.45863243 0.78060864 -4.47376771 0.99545928]
Return so far: -1
Action: [-0.04698742]
State : [-5.06832811 0.75672463 -3.97603806 0.99545928]
Return so far: -2
Action: [0.37792987]
State : [-4.68996579 0.9453112 -3.47830842 0.99545928]
Return so far: -3
Action: [0.364157]
State : [-4.2173102 1.12691704 -2.98057878 0.99545928]
Return so far: -4
Action: [-0.28006983]
State : [-3.65385168 0.98631867 -2.48284914 0.99545928]
Return so far: -5
Action: [0.28791967]
State : [-3.16069234 1.12978534 -1.9851195 0.99545928]
Return so far: -6
Action: [0.14078782]
State : [-2.59579967 1.19961436 -1.48738985 0.99545928]
Return so far: -7
Action: [0.2722653]
State : [-1.99599249 1.3351472 -0.98966021 0.99545928]
Return so far: -8
Action: [-0.20404719]
State : [-1.3284189 1.23245603 -0.49193057 0.99545928]
Return so far: -9
Action: [-0.3595193]
State : [-0.71219088 1.05208015 0.00579907 0.99545928]
Return so far: -10
Action: [0.32158285]
State : [-0.18615081 1.21234554 0.50352871 0.99545928]
Return so far: -11
Action: [-0.12946753]
State : [0.42002196 1.1470056 1.00125836 0.99545928]
Return so far: -10
Action: [0.3124291]
State : [0.99352476 1.30264664 1.498988 0.99545928]
Return so far: -9
Action: [0.398115]
State : [1.64484808 1.50105283 1.99671764 0.99545928]
Return so far: -8
Action: [-0.34246263]
State : [2.3953745 1.32907098 2.49444728 0.99545928]
Return so far: -7
Action: [0.12567042]
State : [3.05990999 1.39124166 2.99217692 0.99545928]
Return so far: -6
Action: [-0.2914413]
State : [3.75553082 1.24482539 3.48990656 0.99545928]
Return so far: -5
Action: [-0.27293462]
State : [4.37794351 1.10773567 3.98763621 0.99545928]
Return so far: -4
Action: [0.26431122]
State : [4.93181135 1.23933742 4.48536585 0.99545928]
Return so far: -3
Action: [-0.34714705]
State : [5.55148006 1.06514422 4.98309549 0.99545928]
Return so far: -2
Action: [0.117353]
State : [6.08405217 1.12328815 5.48082513 0.99545928]
Return so far: -1
Action: [-0.16324952]
State : [6.64569624 1.04110175 5.97855477 0.99545928]
Return so far: 0
Action: [-0.03313335]
State : [7.16624712 1.02401452 6.47628442 0.99545928]
Return so far: 1
Action: [0.31958938]
State : [7.67825438 1.1832972 6.97401406 0.99545928]
Return so far: 2
Action: [0.24306479]
State : [8.26990298 1.30423795 7.4717437 0.99545928]
Return so far: 3
Action: [-0.33507168]
State : [-5.49693759 0.84427595 -4.47386459 1.04362821]
Return so far: -1
Action: [0.07581214]
State : [-5.07479961 0.88175989 -3.95205049 1.04362821]
Return so far: -2
Action: [-0.2610984]
State : [-4.63391967 0.7507698 -3.43023639 1.04362821]
Return so far: -3
Action: [0.20965807]
State : [-4.25853477 0.85522345 -2.90842228 1.04362821]
Return so far: -4
Action: [-0.03166769]
State : [-3.83092304 0.83896199 -2.38660818 1.04362821]
Return so far: -5
Action: [0.25956038]
State : [-3.41144205 0.9683227 -1.86479408 1.04362821]
Return so far: -6
Action: [-0.00426573]
State : [-2.9272807 0.96570567 -1.34297997 1.04362821]
Return so far: -7
Action: [-0.37344757]
State : [-2.44442786 0.77849904 -0.82116587 1.04362821]
Return so far: -8
Action: [0.37824446]
State : [-2.05517834 0.96723202 -0.29935177 1.04362821]
Return so far: -9
Action: [0.14484335]
State : [-1.57156233 1.03917008 0.22246233 1.04362821]
Return so far: -10
Action: [-0.15065719]
State : [-1.05197729 0.9633219 0.74427644 1.04362821]
Return so far: -11
Action: [0.3383978]
State : [-0.57031635 1.13203914 1.26609054 1.04362821]
Return so far: -12
Action: [0.31274092]
State : [-0.00429678 1.28784358 1.78790464 1.04362821]
Return so far: -13
Action: [-0.01455236]
State : [0.63962501 1.27992348 2.30971874 1.04362821]
Return so far: -12
Action: [-0.11367872]
State : [1.27958675 1.22244415 2.83153285 1.04362821]
Return so far: -11
Action: [0.1253925]
State : [1.89080883 1.28452918 3.35334695 1.04362821]
Return so far: -10
Action: [-0.02800918]
State : [2.53307342 1.26988233 3.87516105 1.04362821]
Return so far: -9
Action: [0.2884482]
State : [3.16801458 1.41347149 4.39697515 1.04362821]
Return so far: -8
Action: [0.26610497]
State : [3.87475033 1.54581724 4.91878926 1.04362821]
Return so far: -7
Action: [-0.24789731]
State : [4.64765895 1.42109567 5.44060336 1.04362821]
Return so far: -6
Action: [0.01786019]
State : [5.35820678 1.42931522 5.96241746 1.04362821]
Return so far: -5
Action: [-0.29922113]
State : [6.07286439 1.27899 6.48423156 1.04362821]
Return so far: -4
Action: [-0.21909468]
State : [6.71235939 1.16880316 7.00604567 1.04362821]
Return so far: -3
Action: [0.23009826]
State : [7.29676097 1.28326789 7.52785977 1.04362821]
Return so far: -2
Action: [0.12533645]
State : [7.93839492 1.34529449 8.04967387 1.04362821]
Return so far: -1
Action: [0.31978258]
State : [-5.47534374 1.16303972 -4.48167899 1.01001023]
Return so far: -1
Action: [-0.24382353]
State : [-4.89382388 1.04054643 -3.97667387 1.01001023]
Return so far: -2
Action: [-0.05849313]
State : [-4.37355067 1.0107796 -3.47166876 1.01001023]
Return so far: -3
Action: [-0.3824783]
State : [-3.86816087 0.81903506 -2.96666364 1.01001023]
Return so far: -4
Action: [0.10156181]
State : [-3.45864334 0.86940645 -2.46165853 1.01001023]
Return so far: -5
Action: [-0.21993998]
State : [-3.02394012 0.75900175 -1.95665341 1.01001023]
Return so far: -6
Action: [-0.05590066]
State : [-2.64443924 0.73067192 -1.4516483 1.01001023]
Return so far: -7
Action: [0.30733195]
State : [-2.27910328 0.88397256 -0.94664318 1.01001023]
Return so far: -8
Action: [0.29326895]
State : [-1.837117 1.03016505 -0.44163807 1.01001023]
Return so far: -9
Action: [-0.38632226]
State : [-1.32203447 0.83648884 0.06336705 1.01001023]
Return so far: -10
Action: [0.15140791]
State : [-0.90379005 0.91177455 0.56837216 1.01001023]
Return so far: -11
Action: [0.05601744]
State : [-0.44790278 0.93932738 1.07337728 1.01001023]
Return so far: -12
Action: [0.39610255]
State : [0.02176091 1.13690899 1.57838239 1.01001023]
Return so far: -11
Action: [-0.34607416]
State : [0.59021541 0.96330346 2.08338751 1.01001023]
Return so far: -10
Action: [0.25149474]
State : [1.07186714 1.08856917 2.58839262 1.01001023]
Return so far: -9
Action: [-0.37375024]
State : [1.61615173 0.90114977 3.09339774 1.01001023]
Return so far: -8
Action: [-0.3353527]
State : [2.06672661 0.73302285 3.59840285 1.01001023]
Return so far: -7
Action: [0.3083121]
State : [2.43323804 0.88681238 4.10340797 1.01001023]
Return so far: -6
Action: [-0.23268889]
State : [2.87664423 0.77002453 4.60841308 1.01001023]
Return so far: -5
Action: [-0.3956669]
State : [3.26165649 0.57180607 5.1134182 1.01001023]
Return so far: -4
Action: [-0.1333231]
State : [3.54755953 0.50485862 5.61842331 1.01001023]
Return so far: -3
Action: [-0.36290553]
State : [3.79998884 0.32315342 6.12342843 1.01001023]
Return so far: -2
Action: [-0.10030607]
State : [3.96156555 0.27283881 6.62843354 1.01001023]
Return so far: -1
Action: [-0.34542993]
State : [4.09798495 0.09998743 7.13343865 1.01001023]
Return so far: 0
Action: [-0.01827997]
State : [4.14797866 0.09079745 7.63844377 1.01001023]
Return so far: 1
Action: [0.39955]
State : [-5.458255 1.19312332 -4.50341941 0.97437713]
Return so far: -1
Action: [0.33795866]
State : [-4.86169334 1.3615061 -4.01623084 0.97437713]
Return so far: -2
Action: [-0.23655854]
State : [-4.18094029 1.24254607 -3.52904228 0.97437713]
Return so far: -3
Action: [0.13976718]
State : [-3.55966726 1.31180839 -3.04185371 0.97437713]
Return so far: -4
Action: [-0.02221542]
State : [-2.90376306 1.30004478 -2.55466515 0.97437713]
Return so far: -5
Action: [-0.05541071]
State : [-2.25374067 1.2716894 -2.06747659 0.97437713]
Return so far: -6
Action: [-0.28469554]
State : [-1.61789597 1.12870578 -1.58028802 0.97437713]
Return so far: -7
Action: [-0.35522583]
State : [-1.05354308 0.95052852 -1.09309946 0.97437713]
Return so far: -8
Action: [-0.05899723]
State : [-0.57827882 0.92055464 -0.60591089 0.97437713]
Return so far: -9
Action: [-0.12175526]
State : [-0.1180015 0.85921673 -0.11872233 0.97437713]
Return so far: -10
Action: [-0.29502803]
State : [0.31160686 0.71127311 0.36846623 0.97437713]
Return so far: -9
Action: [0.1923095]
State : [0.66724342 0.80707222 0.8556548 0.97437713]
Return so far: -8
Action: [0.12750022]
State : [1.07077953 0.8704188 1.34284336 0.97437713]
Return so far: -7
Action: [0.26456177]
State : [1.50598893 1.00226447 1.83003193 0.97437713]
Return so far: -6
Action: [0.1560931]
State : [2.00712116 1.07980989 2.31722049 0.97437713]
Return so far: -5
Action: [-0.1139823]
State : [2.54702611 1.02227884 2.80440905 0.97437713]
Return so far: -4
Action: [0.36753428]
State : [3.05816553 1.20553484 3.29159762 0.97437713]
Return so far: -3
Action: [-0.20801109]
State : [3.66093295 1.10092653 3.77878618 0.97437713]
Return so far: -2
Action: [-0.09037643]
State : [4.21139622 1.05518785 4.26597475 0.97437713]
Return so far: -1
Action: [-0.13093174]
State : [4.73899014 0.98919439 4.75316331 0.97437713]
Return so far: 0
Action: [0.20768489]
State : [5.23358734 1.09254224 5.24035187 0.97437713]
Return so far: 1
Action: [-0.04807502]
State : [5.77985845 1.06795845 5.72754044 0.97437713]
Return so far: 2
Action: [-0.28807107]
State : [6.31383768 0.92338894 6.214729 0.97437713]
Return so far: 3
Action: [0.34967896]
State : [6.77553215 1.09776673 6.70191756 0.97437713]
Return so far: 4
Action: [-0.3693259]
State : [7.32441552 0.91255489 7.18910613 0.97437713]
Return so far: 5
Action: [0.15532446]
State : [-1.63736512 0.15290285 -1.66331284 -1.21223435]
Return so far: -1
Action: [-0.36733046]
State : [-1.49652283 -0.5222019 -1.53044884 -1.21223435]
Return so far: -2
Action: [0.17023967]
State : [-1.37933663 -0.21192747 -1.39758485 -1.21223435]
Return so far: -3
Action: [-0.33033356]
State : [-1.25127822 -0.81905681 -1.26472086 -1.21223435]
Return so far: -4
Action: [0.01316239]
State : [-1.14449399 -0.79646182 -1.13185687 -1.21223435]
Return so far: -5
Action: [0.24359164]
State : [-1.03691802 -0.35164038 -0.99899288 -1.21223435]
Return so far: -6
Action: [0.34431538]
State : [-0.91375523 0.27752448 -0.86612889 -1.21223435]
Return so far: -7
Action: [-0.07955284]
State : [-0.76854613 0.12968012 -0.7332649 -1.21223435]
Return so far: -8
Action: [0.33439016]
State : [-0.62851758 0.74041739 -0.6004009 -1.21223435]
Return so far: -9
Action: [0.24260269]
State : [-0.46708843 1.18265825 -0.46753691 -1.21223435]
Return so far: -10
Action: [-0.0061861]
State : [-0.29016289 1.16879829 -0.33467292 -1.21223435]
Return so far: -11
Action: [-0.15758629]
State : [-0.11372301 0.87752007 -0.20180893 -1.21223435]
Return so far: -10
Action: [0.19995451]
State : [ 0.05251031 1.241544 -0.06894494 -1.21223435]
Return so far: -9
Action: [-0.2927601]
State : [ 0.23149924 0.7025374 0.06391905 -1.21223435]
Return so far: -8
Action: [-0.12659125]
State : [ 0.39160105 0.46828752 0.19678304 -1.21223435]
Return so far: -7
Action: [-0.04396389]
State : [ 0.54349461 0.38556084 0.32964703 -1.21223435]
Return so far: -6
Action: [0.3753784]
State : [ 0.69248937 1.07127687 0.46251103 -1.21223435]
Return so far: -5
Action: [0.02462499]
State : [ 0.86551204 1.11393073 0.59537502 -1.21223435]
Return so far: -4
Action: [0.3196191]
State : [ 1.04002933 1.69710941 0.72823901 -1.21223435]
Return so far: -3
Action: [0.1530164]
State : [ 1.23498155 1.97471432 0.861103 -1.21223435]
Return so far: -2
Action: [-0.04876944]
State : [ 1.43966122 1.88242877 0.99396699 -1.21223435]
Return so far: -1
Action: [-0.38356417]
State : [ 1.64110715 1.17671266 1.12683098 -1.21223435]
Return so far: 0
Action: [-0.17304412]
State : [ 1.81782435 0.85710562 1.25969497 -1.21223435]
Return so far: 1
Action: [-0.13150941]
State : [ 1.98334233 0.61376644 1.39255897 -1.21223435]
Return so far: 2
Action: [0.099989]
State : [ 2.14033356 0.7947458 1.52542296 -1.21223435]
Return so far: 3
Action: [0.21870165]
State : [-1.60832691 0.03739109 -1.66772534 -0.71271999]
Return so far: -1
Action: [0.09816758]
State : [-1.47153222 0.21532106 -1.53259886 -0.71271999]
Return so far: -2
Action: [0.14037769]
State : [-1.32850276 0.47050773 -1.39747238 -0.71271999]
Return so far: -3
Action: [0.321962]
State : [-1.17653141 1.05830126 -1.26234591 -0.71271999]
Return so far: -4
Action: [-0.35387337]
State : [-1.00396341 0.40740254 -1.12721943 -0.71271999]
Return so far: -5
Action: [-0.36387357]
State : [-0.8542033 -0.26149507 -0.99209296 -0.71271999]
Return so far: -6
Action: [-0.16547798]
State : [-0.72788177 -0.56651883 -0.85696648 -0.71271999]
Return so far: -7
Action: [-0.2880368]
State : [-0.61224846 -1.09596643 -0.72184001 -0.71271999]
Return so far: -8
Action: [0.0976164]
State : [-0.51516731 -0.91847975 -0.58671353 -0.71271999]
Return so far: -9
Action: [0.35806093]
State : [-0.41186692 -0.26384418 -0.45158705 -0.71271999]
Return so far: -10
Action: [-0.24678051]
State : [-0.2856277 -0.71784526 -0.31646058 -0.71271999]
Return so far: -11
Action: [0.00810653]
State : [-0.17529696 -0.7045652 -0.1813341 -0.71271999]
Return so far: -10
Action: [0.07385172]
State : [-0.06450087 -0.57082053 -0.04620763 -0.71271999]
Return so far: -9
Action: [0.3405498]
State : [ 0.0509817 0.05155388 0.08891885 -0.71271999]
Return so far: -8
Action: [0.05365571]
State : [ 0.18827266 0.14791339 0.22404533 -0.71271999]
Return so far: -7
Action: [-0.04120659]
State : [ 0.32894011 0.07039937 0.3591718 -0.71271999]
Return so far: -6
Action: [0.13638723]
State : [ 0.46689143 0.3183464 0.49429828 -0.71271999]
Return so far: -5
Action: [-0.09391204]
State : [ 0.61353096 0.14416989 0.62942475 -0.71271999]
Return so far: -4
Action: [-0.2496078]
State : [ 0.75406724 -0.3152159 0.76455123 -0.71271999]
Return so far: -3
Action: [-0.18839872]
State : [ 0.87850636 -0.66221269 0.89967771 -0.71271999]
Return so far: -2
Action: [-0.1972431]
State : [ 0.9907865 -1.02524239 1.03480418 -0.71271999]
Return so far: -1
Action: [-0.37619245]
State : [ 1.09034586 -1.71599674 1.16993066 -0.71271999]
Return so far: 0
Action: [0.17561625]
State : [ 1.16570077 -1.39527339 1.30505713 -0.71271999]
Return so far: 1
Action: [-0.15261354]
State : [ 1.25229402 -1.67615751 1.44018361 -0.71271999]
Return so far: 2
Action: [-0.39385542]
State : [ 1.32904492 -2.39895198 1.57531008 -0.71271999]
Return so far: 3
Action: [0.38362503]
State : [-1.60274166 0.66088074 -1.66453034 -0.20387637]
Return so far: -1
Action: [0.2963137]
State : [-1.44409952 1.20158124 -1.52709912 -0.20387637]
Return so far: -2
Action: [-0.07526021]
State : [-1.2665109 1.06114066 -1.38966791 -0.20387637]
Return so far: -3
Action: [0.3770923]
State : [-1.09384341 1.74965945 -1.25223669 -0.20387637]
Return so far: -4
Action: [0.11959574]
State : [-0.8970498 1.96599817 -1.11480547 -0.20387637]
Return so far: -5
Action: [-0.11727013]
State : [-0.69267556 1.74819656 -0.97737426 -0.20387637]
Return so far: -6
Action: [0.01785371]
State : [-0.49593321 1.77810428 -0.83994304 -0.20387637]
Return so far: -7
Action: [-0.09437908]
State : [-0.29814288 1.60234207 -0.70251183 -0.20387637]
Return so far: -8
Action: [0.03454421]
State : [-0.10651136 1.66290635 -0.56508061 -0.20387637]
Return so far: -7
Action: [0.2980813]
State : [ 0.08724237 2.20634479 -0.42764939 -0.20387637]
Return so far: -6
Action: [0.187005]
State : [ 0.30003851 2.54597567 -0.29021818 -0.20387637]
Return so far: -5
Action: [-0.09287379]
State : [ 0.52473552 2.37258783 -0.15278696 -0.20387637]
Return so far: -4
Action: [0.2681425]
State : [ 0.74335692 2.86081165 -0.01535575 -0.20387637]
Return so far: -3
Action: [0.36438942]
State : [ 0.97908597 3.52515388 0.12207547 -0.20387637]
Return so far: -2
Action: [-0.27348343]
State : [ 1.23809398 3.02032799 0.25950669 -0.20387637]
Return so far: -1
Action: [0.25104436]
State : [ 1.47941259 3.47689738 0.3969379 -0.20387637]
Return so far: 0
Action: [0.14789124]
State : [ 1.73672966 3.74422107 0.53436912 -0.20387637]
Return so far: 1
Action: [0.29845938]
State : [ 2.00341392 4.28731163 0.67180033 -0.20387637]
Return so far: 2
Action: [0.3175524]
State : [ 2.2891284 4.8651166 0.80923155 -0.20387637]
Return so far: 3
Action: [0.2235866]
State : [ 2.59508952 5.2704501 0.94666277 -0.20387637]
Return so far: 4
Action: [-0.02358235]
State : [ 2.91525377 5.22266941 1.08409398 -0.20387637]
Return so far: 5
Action: [0.32727626]
State : [ 3.23374376 5.81782469 1.2215252 -0.20387637]
Return so far: 6
Action: [-0.04291228]
State : [ 3.57308835 5.73435022 1.35895641 -0.20387637]
Return so far: 7
Action: [-0.11622697]
State : [ 3.90950794 5.51657591 1.49638763 -0.20387637]
Return so far: 8
Action: [-0.05390113]
State : [ 4.23829659 5.41311612 1.63381885 -0.20387637]
Return so far: 9
Action: [0.27784795]
State : [-1.63468381 0.0879217 -1.6695346 -0.40352107]
Return so far: -1
Action: [0.17804173]
State : [-1.4961185 0.41218752 -1.53300765 -0.40352107]
Return so far: -2
Action: [-0.23327751]
State : [-1.34619072 -0.01740872 -1.3964807 -0.40352107]
Return so far: -3
Action: [0.13849293]
State : [-1.21131626 0.23444069 -1.25995375 -0.40352107]
Return so far: -4
Action: [-0.30330217]
State : [-1.06761684 -0.32337961 -1.1234268 -0.40352107]
Return so far: -5
Action: [0.02439052]
State : [-0.94346378 -0.28045806 -0.98689985 -0.40352107]
Return so far: -6
Action: [0.16374399]
State : [-0.81780672 0.01779283 -0.8503729 -0.40352107]
Return so far: -7
Action: [-0.36025095]
State : [-0.68169877 -0.64427189 -0.71384594 -0.40352107]
Return so far: -8
Action: [0.2005186]
State : [-0.56878997 -0.27845344 -0.57731899 -0.40352107]
Return so far: -9
Action: [0.1940921]
State : [-0.44306267 0.07540621 -0.44079204 -0.40352107]
Return so far: -10
Action: [-0.20139907]
State : [-0.30493592 -0.2956077 -0.30426509 -0.40352107]
Return so far: -11
Action: [-0.3576123]
State : [-0.17980971 -0.95268069 -0.16773814 -0.40352107]
Return so far: -10
Action: [0.02216555]
State : [-0.07770774 -0.91352152 -0.03121119 -0.40352107]
Return so far: -9
Action: [0.28716576]
State : [ 0.02576638 -0.38879648 0.10531577 -0.40352107]
Return so far: -8
Action: [-0.04489672]
State : [ 0.1476272 -0.47280393 0.24184272 -0.40352107]
Return so far: -7
Action: [-0.11070743]
State : [ 0.26654434 -0.67736068 0.37836967 -0.40352107]
Return so far: -6
Action: [0.09426664]
State : [ 0.37829369 -0.5062214 0.51489662 -0.40352107]
Return so far: -5
Action: [0.13020816]
State : [ 0.49603986 -0.26930855 0.65142357 -0.40352107]
Return so far: -4
Action: [-0.00125299]
State : [ 0.6220876 -0.27340312 0.78795052 -0.40352107]
Return so far: -3
Action: [-0.29345807]
State : [ 0.74799187 -0.81293121 0.92447748 -0.40352107]
Return so far: -2
Action: [-0.18800065]
State : [ 0.85499074 -1.15894972 1.06100443 -0.40352107]
Return so far: -1
Action: [0.19100107]
State : [ 0.94986492 -0.8103138 1.19753138 -0.40352107]
Return so far: 0
Action: [-0.3179093]
State : [ 1.05695551 -1.39437778 1.33405833 -0.40352107]
Return so far: 1
Action: [0.0782686]
State : [ 1.14358014 -1.25219474 1.47058528 -0.40352107]
Return so far: 2
Action: [0.03033041]
State : [ 1.23518695 -1.19792455 1.60711223 -0.40352107]
Return so far: 3
Action: [0.0046816]
State : [-1.60610087 -0.06283187 -1.67172575 -0.8163335 ]
Return so far: -1
Action: [-0.14874409]
State : [-1.47281805 -0.33729185 -1.53706857 -0.8163335 ]
Return so far: -2
Action: [0.14083466]
State : [-1.34915249 -0.08099152 -1.4024114 -0.8163335 ]
Return so far: -3
Action: [0.20516555]
State : [-1.216506 0.29306036 -1.26775423 -0.8163335 ]
Return so far: -4
Action: [0.18089199]
State : [-1.07075251 0.62244643 -1.13309706 -0.8163335 ]
Return so far: -5
Action: [0.12500432]
State : [-0.91345713 0.84925943 -0.99843989 -0.8163335 ]
Return so far: -6
Action: [-0.2727934]
State : [-0.74821409 0.34703586 -0.86378271 -0.8163335 ]
Return so far: -7
Action: [-0.14421515]
State : [-0.60056926 0.08066976 -0.72912554 -0.8163335 ]
Return so far: -8
Action: [-0.30733392]
State : [-0.46225807 -0.48446141 -0.59446837 -0.8163335 ]
Return so far: -9
Action: [0.04264325]
State : [-0.34374941 -0.4080131 -0.4598112 -0.8163335 ]
Return so far: -10
Action: [0.25794438]
State : [-0.22256196 0.06291402 -0.32515403 -0.8163335 ]
Return so far: -11
Action: [0.30807397]
State : [-0.08487294 0.62546297 -0.19049685 -0.8163335 ]
Return so far: -10
Action: [0.03608084]
State : [ 0.07252814 0.68933141 -0.05583968 -0.8163335 ]
Return so far: -9
Action: [-0.12853599]
State : [ 0.23216721 0.4515246 0.07881749 -0.8163335 ]
Return so far: -8
Action: [-0.35188615]
State : [ 0.38347338 -0.19542936 0.21347466 -0.8163335 ]
Return so far: -7
Action: [-0.10432225]
State : [ 0.5121099 -0.38842463 0.34813184 -0.8163335 ]
Return so far: -6
Action: [-0.19768164]
State : [ 0.63398374 -0.7523948 0.48278901 -0.8163335 ]
Return so far: -5
Action: [0.14583874]
State : [ 0.74310385 -0.48671746 0.61744618 -0.8163335 ]
Return so far: -4
Action: [0.24021602]
State : [ 0.86153345 -0.04823637 0.75210335 -0.8163335 ]
Return so far: -3
Action: [-0.32595435]
State : [ 0.99532769 -0.6474231 0.88676052 -0.8163335 ]
Return so far: -2
Action: [-0.27125475]
State : [ 1.10812607 -1.14607889 1.0214177 -0.8163335 ]
Return so far: -1
Action: [0.2864519]
State : [ 1.20345125 -0.62254563 1.15607487 -0.8163335 ]
Return so far: 0
Action: [-0.32617003]
State : [ 1.31712135 -1.22184042 1.29073204 -0.8163335 ]
Return so far: 1
Action: [-0.13127407]
State : [ 1.4097918 -1.4637089 1.42538921 -0.8163335 ]
Return so far: 2
Action: [-0.07469901]
State : [ 1.49398702 -1.6017885 1.56004638 -0.8163335 ]
Return so far: 3
2021-02-18 17:49:17.468140: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-02-18 17:49:17.471759: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-02-18 17:49:17.541817: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
***ITERATION**** 0
2021-02-18 17:49:19.157661: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
-----Learned models------
---Lengthscales---
GP0 GP1 GP2 GP3
0 12.726 23.027 11.034 1.143
1 8.011 28.299 14.342 1.202
2 11.247 20.587 9.589 1.140
3 9.564 16.732 6.768 1.078
4 8.605 1.316 6.083 1.056
---Variances---
GP0 GP1 GP2 GP3
0 0.028 0.66 0.006 9.244e-06
---Noises---
GP0 GP1 GP2 GP3
0 0.001 0.001 0.001 0.001
Controller's optimization: done in 13.4 seconds with reward=-79.350.
Randomising controller
Controller's optimization: done in 14.3 seconds with reward=-79.780.
Predicted episode's return: -77.76698596425105
Overall risk 0.005275873055015423
Mu is -300.0
bound1 0.25683314501336063 bound1 0.27177913168067597
Action: tf.Tensor([0.00790349], shape=(1,), dtype=float64)
State : [-1.63687737 -0.16142087 -1.68597504 -1.02978357]
Return so far: -1
Action: tf.Tensor([-0.04807604], shape=(1,), dtype=float64)
State : [-1.50704918 -0.25136778 -1.55228466 -1.02978357]
Return so far: -2
Action: tf.Tensor([-0.09054136], shape=(1,), dtype=float64)
State : [-1.38037278 -0.41908302 -1.41859428 -1.02978357]
Return so far: -3
Action: tf.Tensor([-0.12090372], shape=(1,), dtype=float64)
State : [-1.25957323 -0.64235027 -1.2849039 -1.02978357]
Return so far: -4
Action: tf.Tensor([-0.14247572], shape=(1,), dtype=float64)
State : [-1.1465971 -0.90503435 -1.15121353 -1.02978357]
Return so far: -5
Action: tf.Tensor([-0.15817598], shape=(1,), dtype=float64)
State : [-1.04282557 -1.1963562 -1.01752315 -1.02978357]
Return so far: -6
Action: tf.Tensor([-0.17023271], shape=(1,), dtype=float64)
State : [-0.94926214 -1.50962509 -0.88383277 -1.02978357]
Return so far: -7
Action: tf.Tensor([-0.18031117], shape=(1,), dtype=float64)
State : [-0.86667585 -1.84120508 -0.75014239 -1.02978357]
Return so far: -8
Action: tf.Tensor([-0.1896052], shape=(1,), dtype=float64)
State : [-0.79570831 -2.18964963 -0.61645201 -1.02978357]
Return so far: -9
Action: tf.Tensor([-0.19761819], shape=(1,), dtype=float64)
State : [-0.73695049 -2.55260293 -0.48276163 -1.02978357]
Return so far: -10
Action: tf.Tensor([-0.19930406], shape=(1,), dtype=float64)
State : [-0.69091077 -2.91846394 -0.34907126 -1.02978357]
Return so far: -11
Action: tf.Tensor([-0.18571128], shape=(1,), dtype=float64)
State : [-0.65769104 -3.25923465 -0.21538088 -1.02978357]
Return so far: -12
Action: tf.Tensor([-0.15563892], shape=(1,), dtype=float64)
State : [-0.63641213 -3.5447305 -0.0816905 -1.02978357]
Return so far: -13
Action: tf.Tensor([-0.12122412], shape=(1,), dtype=float64)
State : [-0.62513716 -3.76702204 0.05199988 -1.02978357]
Return so far: -14
Action: tf.Tensor([-0.0926808], shape=(1,), dtype=float64)
State : [-0.62165142 -3.93689976 0.18569026 -1.02978357]
Return so far: -15
Action: tf.Tensor([-0.07169906], shape=(1,), dtype=float64)
State : [-0.62411831 -4.06824566 0.31938063 -1.02978357]
Return so far: -16
Action: tf.Tensor([-0.05659517], shape=(1,), dtype=float64)
State : [-0.63118763 -4.17184958 0.45307101 -1.02978357]
Return so far: -17
Action: tf.Tensor([-0.04556208], shape=(1,), dtype=float64)
State : [-0.64188729 -4.25518469 0.58676139 -1.02978357]
Return so far: -18
Action: tf.Tensor([-0.03731055], shape=(1,), dtype=float64)
State : [-0.65550707 -4.32335806 0.72045177 -1.02978357]
Return so far: -19
Action: tf.Tensor([-0.03099266], shape=(1,), dtype=float64)
State : [-0.67151569 -4.37992047 0.85414215 -1.02978357]
Return so far: -20
Action: tf.Tensor([-0.0260528], shape=(1,), dtype=float64)
State : [-0.68950629 -4.42740282 0.98783253 -1.02978357]
Return so far: -21
Action: tf.Tensor([-0.02211952], shape=(1,), dtype=float64)
State : [-0.70916069 -4.4676541 1.1215229 -1.02978357]
Return so far: -22
Action: tf.Tensor([-0.0189377], shape=(1,), dtype=float64)
State : [-0.73022553 -4.50205489 1.25521328 -1.02978357]
Return so far: -23
Action: tf.Tensor([-0.01632731], shape=(1,), dtype=float64)
State : [-0.75249579 -4.53165522 1.38890366 -1.02978357]
Return so far: -24
Action: tf.Tensor([-0.01415818], shape=(1,), dtype=float64)
State : [-0.77580327 -4.55726603 1.52259404 -1.02978357]
Return so far: -25
***ITERATION**** 1
-----Learned models------
---Lengthscales---
GP0 GP1 GP2 GP3
0 13.362 24.129 11.598 1.108
1 9.599 32.375 17.531 1.169
2 11.876 21.392 10.052 1.111
3 9.770 16.877 6.965 1.061
4 8.778 1.298 6.034 1.042
---Variances---
GP0 GP1 GP2 GP3
0 0.031 0.617 0.006 7.497e-06
---Noises---
GP0 GP1 GP2 GP3
0 0.001 0.001 0.001 0.001
Controller's optimization: done in 16.6 seconds with reward=-67.221.
Randomising controller
Controller's optimization: done in 15.0 seconds with reward=-95.612.
Predicted episode's return: -62.89329063441288
Overall risk 0.019233731816750144
Mu is -225.0
bound1 0.25683314501336063 bound1 0.27177913168067597
Action: tf.Tensor([-0.05123159], shape=(1,), dtype=float64)
State : [-1.63600928 -0.43044025 -1.66930315 0.44099232]
Return so far: -1
Action: tf.Tensor([-0.11305505], shape=(1,), dtype=float64)
State : [-1.51560769 -0.63931995 -1.52895109 0.44099232]
Return so far: -2
Action: tf.Tensor([-0.15835163], shape=(1,), dtype=float64)
State : [-1.40252537 -0.93109651 -1.38859902 0.44099232]
Return so far: -3
Action: tf.Tensor([-0.18499381], shape=(1,), dtype=float64)
State : [-1.29966708 -1.27154623 -1.24824696 0.44099232]
Return so far: -4
Action: tf.Tensor([-0.19724705], shape=(1,), dtype=float64)
State : [-1.20873836 -1.63427852 -1.10789489 0.44099232]
Return so far: -5
Action: tf.Tensor([-0.19966009], shape=(1,), dtype=float64)
State : [-1.13051999 -2.00125107 -0.96754283 0.44099232]
Return so far: -6
Action: tf.Tensor([-0.18834854], shape=(1,), dtype=float64)
State : [-1.06516057 -2.3473129 -0.82719077 0.44099232]
Return so far: -7
Action: tf.Tensor([-0.152435], shape=(1,), dtype=float64)
State : [-1.01192736 -2.62739386 -0.6868387 0.44099232]
Return so far: -8
Action: tf.Tensor([-0.09386146], shape=(1,), dtype=float64)
State : [-0.96850836 -2.80000483 -0.54648664 0.44099232]
Return so far: -9
Action: tf.Tensor([-0.03252482], shape=(1,), dtype=float64)
State : [-0.93113775 -2.86013644 -0.40613458 0.44099232]
Return so far: -10
Action: tf.Tensor([0.01989857], shape=(1,), dtype=float64)
State : [-0.8958742 -2.82417755 -0.26578251 0.44099232]
Return so far: -11
Action: tf.Tensor([0.06358879], shape=(1,), dtype=float64)
State : [-0.85935062 -2.70817884 -0.12543045 0.44099232]
Return so far: -12
Action: tf.Tensor([0.10020823], shape=(1,), dtype=float64)
State : [-0.81876237 -2.52513679 0.01492162 0.44099232]
Return so far: -13
Action: tf.Tensor([0.12914875], shape=(1,), dtype=float64)
State : [-0.77176022 -2.28915575 0.15527368 0.44099232]
Return so far: -14
Action: tf.Tensor([0.14813741], shape=(1,), dtype=float64)
State : [-0.71648915 -2.01849795 0.29562574 0.44099232]
Return so far: -15
Action: tf.Tensor([0.15499934], shape=(1,), dtype=float64)
State : [-0.65173407 -1.73540171 0.43597781 0.44099232]
Return so far: -16
Action: tf.Tensor([0.14794795], shape=(1,), dtype=float64)
State : [-0.57705912 -1.46536795 0.57632987 0.44099232]
Return so far: -17
Action: tf.Tensor([0.1257258], shape=(1,), dtype=float64)
State : [-0.49292203 -1.236189 0.71668194 0.44099232]
Return so far: -18
Action: tf.Tensor([0.09241026], shape=(1,), dtype=float64)
State : [-0.40075436 -1.06817193 0.857034 0.44099232]
Return so far: -19
Action: tf.Tensor([0.05902788], shape=(1,), dtype=float64)
State : [-0.30269928 -0.96140861 0.99738606 0.44099232]
Return so far: -20
Action: tf.Tensor([0.03459132], shape=(1,), dtype=float64)
State : [-0.20090314 -0.89947614 1.13773813 0.44099232]
Return so far: -19
Action: tf.Tensor([0.02126374], shape=(1,), dtype=float64)
State : [-0.09693686 -0.86199605 1.27809019 0.44099232]
Return so far: -18
Action: tf.Tensor([0.01733885], shape=(1,), dtype=float64)
State : [ 0.00834276 -0.83172664 1.41844225 0.44099232]
Return so far: -17
Action: tf.Tensor([0.02021606], shape=(1,), dtype=float64)
State : [ 0.11468303 -0.7962002 1.55879432 0.44099232]
Return so far: -16
Action: tf.Tensor([0.02762372], shape=(1,), dtype=float64)
State : [ 0.22226817 -0.74711774 1.69914638 0.44099232]
Return so far: -15
***ITERATION**** 2
-----Learned models------
---Lengthscales---
GP0 GP1 GP2 GP3
0 14.240 24.614 11.929 1.087
1 9.594 33.380 17.787 1.141
2 13.059 22.445 10.545 1.094
3 11.719 19.015 8.207 1.070
4 8.769 1.289 6.057 1.032
---Variances---
GP0 GP1 GP2 GP3
0 0.028 0.59 0.005 6.283e-06
---Noises---
GP0 GP1 GP2 GP3
0 0.001 0.001 0.001 0.001
Controller's optimization: done in 16.7 seconds with reward=-64.378.
Randomising controller
Controller's optimization: done in 17.0 seconds with reward=-47.914.
Predicted episode's return: 86.06920437973156
Overall risk 0.7939746650563976
Mu is -168.75
bound1 0.25683314501336063 bound1 0.27177913168067597
Action: tf.Tensor([0.19889271], shape=(1,), dtype=float64)
State : [-1.62314578 0.00947583 -1.67330459 -1.30055272]
Return so far: -1
Action: tf.Tensor([0.19835347], shape=(1,), dtype=float64)
State : [-1.48732927 0.37100005 -1.54084063 -1.30055272]
Return so far: -2
Action: tf.Tensor([0.19827583], shape=(1,), dtype=float64)
State : [-1.33884472 0.73220124 -1.40837666 -1.30055272]
Return so far: -3
Action: tf.Tensor([0.1982743], shape=(1,), dtype=float64)
State : [-1.17770347 1.09321903 -1.2759127 -1.30055272]
Return so far: -4
Action: tf.Tensor([0.19801018], shape=(1,), dtype=float64)
State : [-1.00391193 1.45357232 -1.14344873 -1.30055272]
Return so far: -5
Action: tf.Tensor([0.19742676], shape=(1,), dtype=float64)
State : [-0.8174934 1.81267638 -1.01098477 -1.30055272]
Return so far: -6
Action: tf.Tensor([0.19698179], shape=(1,), dtype=float64)
State : [-0.61849164 2.17078552 -0.8785208 -1.30055272]
Return so far: -7
Action: tf.Tensor([0.19756666], shape=(1,), dtype=float64)
State : [-0.40694152 2.52978733 -0.74605684 -1.30055272]
Return so far: -8
Action: tf.Tensor([0.19921165], shape=(1,), dtype=float64)
State : [-0.18281176 2.89162391 -0.61359287 -1.30055272]
Return so far: -7
Action: tf.Tensor([0.1999579], shape=(1,), dtype=float64)
State : [ 0.05399698 3.254647 -0.48112891 -1.30055272]
Return so far: -6
Action: tf.Tensor([0.19834029], shape=(1,), dtype=float64)
State : [ 0.30352627 3.61452449 -0.34866494 -1.30055272]
Return so far: -5
Action: tf.Tensor([0.19702121], shape=(1,), dtype=float64)
State : [ 0.56566588 3.97180495 -0.21620098 -1.30055272]
Return so far: -4
Action: tf.Tensor([0.19890302], shape=(1,), dtype=float64)
State : [ 0.84032481 4.33235499 -0.08373701 -1.30055272]
Return so far: -3
Action: tf.Tensor([0.19955875], shape=(1,), dtype=float64)
State : [ 1.12761764 4.69392631 0.04872695 -1.30055272]
Return so far: -2
Action: tf.Tensor([0.19318347], shape=(1,), dtype=float64)
State : [ 1.42758015 5.04363481 0.18119092 -1.30055272]
Return so far: -1
Action: tf.Tensor([0.18623761], shape=(1,), dtype=float64)
State : [ 1.73979666 5.38044089 0.31365488 -1.30055272]
Return so far: 0
Action: tf.Tensor([0.18992765], shape=(1,), dtype=float64)
State : [ 2.06381505 5.72384018 0.44611885 -1.30055272]
Return so far: 1
Action: tf.Tensor([0.19954803], shape=(1,), dtype=float64)
State : [ 2.39986637 6.08469611 0.57858281 -1.30055272]
Return so far: 2
Action: tf.Tensor([0.18922201], shape=(1,), dtype=float64)
State : [ 2.74856229 6.42645026 0.71104678 -1.30055272]
Return so far: 3
Action: tf.Tensor([0.14809342], shape=(1,), dtype=float64)
State : [ 3.10923349 6.69266964 0.84351074 -1.30055272]
Return so far: 4
Action: tf.Tensor([0.09931791], shape=(1,), dtype=float64)
State : [ 3.47923318 6.86937985 0.97597471 -1.30055272]
Return so far: 5
Action: tf.Tensor([0.06058098], shape=(1,), dtype=float64)
State : [ 3.8554249 6.97502028 1.10843867 -1.30055272]
Return so far: 6
Action: tf.Tensor([0.03429926], shape=(1,), dtype=float64)
State : [ 4.23531833 7.03244937 1.24090264 -1.30055272]
Return so far: 7
Action: tf.Tensor([0.01798735], shape=(1,), dtype=float64)
State : [ 4.6172241 7.05995985 1.3733666 -1.30055272]
Return so far: 8
Action: tf.Tensor([0.0086677], shape=(1,), dtype=float64)
State : [ 5.00009387 7.07037928 1.50583057 -1.30055272]
Return so far: 9
[-0.02204646 -0.01689299 -0.01183448 -0.0067131 -0.00144185 0.00401888
0.00967856 0.01551847 0.02146438 0.02732706 0.03278341 0.03748834
0.04118981 0.04357969 0.04405526 0.04194797 0.03697892 0.02900285
0.01728906 0.00052015 -0.02185851 -0.04881571 -0.07875963 -0.11040037
-0.1429146 ]
[ 0.00959173 0.00820641 0.00722458 0.00659827 0.00627613 0.00620311
0.00632015 0.00656382 0.00686598 0.00715363 0.00734923 0.00737089
0.00713187 0.00654055 0.00550313 0.0039264 0.00171429 -0.00124011
-0.00505148 -0.00981548 -0.01557768 -0.02233377 -0.0300625 -0.03875794
-0.04844045]
*********CHANGING***********
tf.Tensor([[-47.91402035]], shape=(1, 1), dtype=float64)
tf.Tensor([[-114.90563271]], shape=(1, 1), dtype=float64)
***ITERATION**** 3
Controller's optimization: done in 16.8 seconds with reward=-113.259.
Randomising controller
Controller's optimization: done in 19.6 seconds with reward=-115.424.
Predicted episode's return: 87.19588203185518
Overall risk 0.7919213336459869
Mu is -253.125
bound1 0.25683314501336063 bound1 0.27177913168067597
Action: tf.Tensor([0.19999819], shape=(1,), dtype=float64)
State : [-1.63652793 0.02309755 -1.65972677 -1.59835445]
Return so far: -1
Action: tf.Tensor([0.19994488], shape=(1,), dtype=float64)
State : [-1.5002341 0.38753105 -1.52861166 -1.59835445]
Return so far: -2
Action: tf.Tensor([0.19991694], shape=(1,), dtype=float64)
State : [-1.3511703 0.75173114 -1.39749655 -1.59835445]
Return so far: -3
Action: tf.Tensor([0.1999565], shape=(1,), dtype=float64)
State : [-1.18934471 1.11582161 -1.26638144 -1.59835445]
Return so far: -4
Action: tf.Tensor([0.19999129], shape=(1,), dtype=float64)
State : [-1.01476116 1.47979379 -1.13526633 -1.59835445]
Return so far: -5
Action: tf.Tensor([0.19999962], shape=(1,), dtype=float64)
State : [-0.82742381 1.84359924 -1.00415122 -1.59835445]
Return so far: -6
Action: tf.Tensor([0.19999981], shape=(1,), dtype=float64)
State : [-0.6273385 2.20722313 -0.87303611 -1.59835445]
Return so far: -7
Action: tf.Tensor([0.19999734], shape=(1,), dtype=float64)
State : [-0.41451158 2.57066069 -0.741921 -1.59835445]
Return so far: -8
Action: tf.Tensor([0.19999117], shape=(1,), dtype=float64)
State : [-0.18894959 2.93390523 -0.61080588 -1.59835445]
Return so far: -7
Action: tf.Tensor([0.19998771], shape=(1,), dtype=float64)
State : [ 0.04934071 3.2969618 -0.47969077 -1.59835445]
Return so far: -6
Action: tf.Tensor([0.19998223], shape=(1,), dtype=float64)
State : [ 0.30035273 3.6598268 -0.34857566 -1.59835445]
Return so far: -5
Action: tf.Tensor([0.1999357], shape=(1,), dtype=float64)
State : [ 0.56407977 4.02242511 -0.21746055 -1.59835445]
Return so far: -4
Action: tf.Tensor([0.19967374], shape=(1,), dtype=float64)
State : [ 0.84051246 4.38436211 -0.08634544 -1.59835445]
Return so far: -3
Action: tf.Tensor([0.19879396], shape=(1,), dtype=float64)
State : [ 1.12962765 4.74450603 0.04476967 -1.59835445]
Return so far: -2
Action: tf.Tensor([0.19717026], shape=(1,), dtype=float64)
State : [ 1.4313625 5.10149463 0.17588478 -1.59835445]
Return so far: -1
Action: tf.Tensor([0.1959583], shape=(1,), dtype=float64)
State : [ 1.74560645 5.45608394 0.30699989 -1.59835445]
Return so far: 0
Action: tf.Tensor([0.19709779], shape=(1,), dtype=float64)
State : [ 2.07227543 5.81258395 0.43811501 -1.59835445]
Return so far: 1
Action: tf.Tensor([0.19978947], shape=(1,), dtype=float64)
State : [ 2.41143638 6.17383792 0.56923012 -1.59835445]
Return so far: 2
Action: tf.Tensor([0.19689341], shape=(1,), dtype=float64)
State : [ 2.76325589 6.52960455 0.70034523 -1.59835445]
Return so far: 3
Action: tf.Tensor([0.17912208], shape=(1,), dtype=float64)
State : [ 3.12754168 6.85262917 0.83146034 -1.59835445]
Return so far: 4
Action: tf.Tensor([0.1466329], shape=(1,), dtype=float64)
State : [ 3.50314644 7.11595922 0.96257545 -1.59835445]
Return so far: 5
Action: tf.Tensor([0.10935117], shape=(1,), dtype=float64)
State : [ 3.88797846 7.3108427 1.09369056 -1.59835445]
Return so far: 6
Action: tf.Tensor([0.07593569], shape=(1,), dtype=float64)
State : [ 4.2796393 7.44439832 1.22480567 -1.59835445]
Return so far: 7
Action: tf.Tensor([0.04968338], shape=(1,), dtype=float64)
State : [ 4.67598002 7.52978253 1.35592078 -1.59835445]
Return so far: 8
Action: tf.Tensor([0.03068352], shape=(1,), dtype=float64)
State : [ 5.07531265 7.58030877 1.4870359 -1.59835445]
Return so far: 9
[-0.00825778 -0.00351119 0.00113978 0.00582903 0.01064715 0.0156374
0.02079663 0.02607406 0.03136776 0.03652291 0.04133159 0.04552979
0.0487918 0.05072895 0.05090104 0.04884288 0.04407789 0.03605893
0.02403939 0.00705523 -0.01571951 -0.0443834 -0.07817261 -0.11590991
-0.15645742]
[-0.00533494 -0.00537205 -0.00500604 -0.00428533 -0.00326177 -0.00199102
-0.00053284 0.00104853 0.00268414 0.00430002 0.00581698 0.00715055
0.0082111 0.0089041 0.0091304 0.00878653 0.00776496 0.00595407
0.00324097 -0.00047863 -0.00528416 -0.01121882 -0.018294 -0.02650663
-0.03585726]
*********CHANGING***********
tf.Tensor([[-113.25920555]], shape=(1, 1), dtype=float64)
tf.Tensor([[-213.48674934]], shape=(1, 1), dtype=float64)
***ITERATION**** 4
Controller's optimization: done in 17.8 seconds with reward=-213.229.
Randomising controller
Controller's optimization: done in 20.8 seconds with reward=-211.428.
Predicted episode's return: 20.856059398710173
Overall risk 0.6117761532811135
Mu is -379.6875
bound1 0.25683314501336063 bound1 0.27177913168067597
Action: tf.Tensor([-0.19142316], shape=(1,), dtype=float64)
State : [-1.61192694 -0.5006789 -1.66405917 1.83454728]
Return so far: -1
Action: tf.Tensor([-0.19442802], shape=(1,), dtype=float64)
State : [-1.49398656 -0.85863103 -1.51739518 1.83454728]
Return so far: -2
Action: tf.Tensor([-0.19666994], shape=(1,), dtype=float64)
State : [-1.38858903 -1.22051228 -1.37073119 1.83454728]
Return so far: -3
Action: tf.Tensor([-0.19795531], shape=(1,), dtype=float64)
State : [-1.29587205 -1.58456788 -1.2240672 1.83454728]
Return so far: -4
Action: tf.Tensor([-0.19837413], shape=(1,), dtype=float64)
State : [-1.21591179 -1.94920891 -1.07740322 1.83454728]
Return so far: -5
Action: tf.Tensor([-0.19796179], shape=(1,), dtype=float64)
State : [-1.14872878 -2.31291205 -0.93073923 1.83454728]
Return so far: -6
Action: tf.Tensor([-0.19633255], shape=(1,), dtype=float64)
State : [-1.09429015 -2.67344792 -0.78407524 1.83454728]
Return so far: -7
Action: tf.Tensor([-0.19258179], shape=(1,), dtype=float64)
State : [-1.05248491 -3.02693064 -0.63741125 1.83454728]
Return so far: -8
Action: tf.Tensor([-0.18558407], shape=(1,), dtype=float64)
State : [-1.02306592 -3.36741402 -0.49074727 1.83454728]
Return so far: -9
Action: tf.Tensor([-0.17462162], shape=(1,), dtype=float64)
State : [-1.00557767 -3.68763961 -0.34408328 1.83454728]
Return so far: -10
Action: tf.Tensor([-0.15997121], shape=(1,), dtype=float64)
State : [-0.99931033 -3.98085971 -0.19741929 1.83454728]
Return so far: -11
Action: tf.Tensor([-0.14292613], shape=(1,), dtype=float64)
State : [-1.0033176 -4.24269987 -0.05075531 1.83454728]
Return so far: -12
Action: tf.Tensor([-0.12518453], shape=(1,), dtype=float64)
State : [-1.0164999 -4.47189946 0.09590868 1.83454728]
Return so far: -13
Action: tf.Tensor([-0.10815196], shape=(1,), dtype=float64)
State : [-1.0377135 -4.66977402 0.24257267 1.83454728]
Return so far: -14
Action: tf.Tensor([-0.09263888], shape=(1,), dtype=float64)
State : [-1.06586074 -4.83912354 0.38923666 1.83454728]
Return so far: -15
Action: tf.Tensor([-0.078934], shape=(1,), dtype=float64)
State : [-1.09994209 -4.98327561 0.53590064 1.83454728]
Return so far: -16
Action: tf.Tensor([-0.06700752], shape=(1,), dtype=float64)
State : [-1.13907462 -5.10550156 0.68256463 1.83454728]
Return so far: -17
Action: tf.Tensor([-0.05668349], shape=(1,), dtype=float64)
State : [-1.18249001 -5.20874868 0.82922862 1.83454728]
Return so far: -18
Action: tf.Tensor([-0.04774335], shape=(1,), dtype=float64)
State : [-1.22952326 -5.29556231 0.97589261 1.83454728]
Return so far: -19
Action: tf.Tensor([-0.03997608], shape=(1,), dtype=float64)
State : [-1.2795985 -5.3680998 1.12255659 1.83454728]
Return so far: -20
Action: tf.Tensor([-0.03319752], shape=(1,), dtype=float64)
State : [-1.3322155 -5.42818001 1.26922058 1.83454728]
Return so far: -21
Action: tf.Tensor([-0.02725466], shape=(1,), dtype=float64)
State : [-1.38693775 -5.47734052 1.41588457 1.83454728]
Return so far: -22
Action: tf.Tensor([-0.02202352], shape=(1,), dtype=float64)
State : [-1.44338261 -5.51689092 1.56254856 1.83454728]
Return so far: -23
Action: tf.Tensor([-0.01740478], shape=(1,), dtype=float64)
State : [-1.50121334 -5.54795818 1.70921254 1.83454728]
Return so far: -24
Action: tf.Tensor([-0.01331924], shape=(1,), dtype=float64)
State : [-1.56013269 -5.57152359 1.85587653 1.83454728]
Return so far: -25
[-2.60763273e-02 -2.80999811e-02 -1.95042311e-02 1.49453135e-03
3.66626233e-02 8.77193062e-02 1.56317268e-01 2.43945772e-01
3.51784833e-01 4.80570377e-01 6.30508138e-01 8.01260833e-01
9.92024628e-01 1.20167819e+00 1.42894998e+00 1.67254851e+00
1.93123244e+00 2.20382947e+00 2.48922520e+00 2.78634154e+00
3.09411553e+00 3.41148227e+00 3.73736231e+00 4.07065457e+00
4.41023651e+00]
[ 0.01454633 -0.00102784 -0.01617797 -0.03095253 -0.04539611 -0.05954854
-0.0734454 -0.08712007 -0.10060699 -0.11394466 -0.12717714 -0.14035394
-0.15352904 -0.16675991 -0.18010699 -0.19363335 -0.20740427 -0.22148622
-0.23594527 -0.25084531 -0.26624653 -0.2822047 -0.29877137 -0.3159947
-0.33392082]
*********CHANGING***********
tf.Tensor([[-211.4276988]], shape=(1, 1), dtype=float64)
tf.Tensor([[-327.5695779]], shape=(1, 1), dtype=float64)

Process finished with exit code 0