<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type="text/xsl" href="style/detail_T.xsl"?>
<bibitem type="V">   <ARLID>0495875</ARLID> <utime>20240103220824.9</utime><mtime>20181105235959.9</mtime>              <title language="eng" primary="1">Balancing Exploitation and Exploration via Fully Probabilistic Design of Decision Policies</title>  <publisher> <place>Praha</place> <name>ÚTIA AV ČR, v.v.i</name> <pub_time>2018</pub_time> </publisher> <specification> <page_count>13 s.</page_count> <media_type>P</media_type> </specification> <edition> <name>Research Report</name> <volume_id>2376</volume_id> </edition>    <keyword>Exploitation</keyword>   <keyword>Exploration</keyword>   <keyword>Bayesian estimation</keyword>   <keyword>Adaptive systems</keyword>   <keyword>Fully probabilistic design</keyword>   <keyword>Kullback-Leibler divergence</keyword>   <keyword>Decision policy</keyword>   <keyword>Markov decision process</keyword>    <author primary="1"> <ARLID>cav_un_auth*0101124</ARLID> <name1>Kárný</name1> <name2>Miroslav</name2> <full_dept language="cz">Adaptivní systémy</full_dept> <full_dept language="eng">Department of Adaptive Systems</full_dept> <department language="cz">AS</department> <department language="eng">AS</department> <institution>UTIA-B</institution> <full_dept>Department of Adaptive Systems</full_dept> <fullinstit>Ústav teorie informace a automatizace AV ČR, v. v. i.</fullinstit> </author> <author primary="0"> <ARLID>cav_un_auth*0333671</ARLID> <name1>Hůla</name1> <name2>František</name2> <full_dept language="cz">Adaptivní systémy</full_dept> <full_dept>Department of Adaptive Systems</full_dept> <department language="cz">AS</department> <department>AS</department> <institution>UTIA-B</institution> <full_dept>Department of Adaptive Systems</full_dept> <country>CZ</country> <fullinstit>Ústav teorie informace a automatizace AV ČR, v. v. i.</fullinstit> </author>   <source> <url>http://library.utia.cas.cz/separaty/2018/AS/karny-0495875.pdf</url> </source>        <cas_special> <project> <ARLID>cav_un_auth*0331019</ARLID> <project_id>GA16-09848S</project_id> <agency>GA ČR</agency> </project> <project> <ARLID>cav_un_auth*0362986</ARLID> <project_id>GA18-15970S</project_id> <agency>GA ČR</agency> <country>CZ</country> </project>  <abstract language="eng" primary="1">Adaptive decision making learns an environment model serving a design of a decision policy. The policy-generated actions influence both the acquired reward and the future knowledge. The optimal policy properly balances exploitation with exploration. The inherent dimensionality curse of decision making under incomplete knowledge prevents the realisation of the optimal design.</abstract>     <RIV>BC</RIV> <FORD0>10000</FORD0> <FORD1>10200</FORD1> <FORD2>10201</FORD2>   <reportyear>2019</reportyear>       <num_of_auth>2</num_of_auth>  <unknown tag="mrcbC52"> 4 O 4o 20231122143532.6 </unknown> <inst_support> RVO:67985556 </inst_support>  <permalink>http://hdl.handle.net/11104/0288947</permalink>   <confidential>S</confidential>        <arlyear>2018</arlyear>    <unknown tag="mrcbTft">  Soubory v repozitáři: 0495875.pdf </unknown>    <unknown tag="mrcbU10"> 2018 </unknown> <unknown tag="mrcbU10"> Praha ÚTIA AV ČR, v.v.i </unknown> </cas_special> </bibitem>