%PDF-1.4
%
1 0 obj
<< /S /GoTo /D (section.0.1) >>
endobj
4 0 obj
(Introduction)
endobj
5 0 obj
<< /S /GoTo /D (section.0.2) >>
endobj
8 0 obj
(Background)
endobj
9 0 obj
<< /S /GoTo /D (subsection.0.2.1) >>
endobj
12 0 obj
(Reinforcement Learning)
endobj
13 0 obj
<< /S /GoTo /D (subsection.0.2.2) >>
endobj
16 0 obj
(Kalman Temporal Differences - KTD)
endobj
17 0 obj
<< /S /GoTo /D (section.0.3) >>
endobj
20 0 obj
(Computing Uncertainty over Values)
endobj
21 0 obj
<< /S /GoTo /D (section.0.4) >>
endobj
24 0 obj
(A Form of Active Learning)
endobj
25 0 obj
<< /S /GoTo /D (subsection.0.4.1) >>
endobj
28 0 obj
(Principle)
endobj
29 0 obj
<< /S /GoTo /D (subsection.0.4.2) >>
endobj
32 0 obj
(Experiment)
endobj
33 0 obj
<< /S /GoTo /D (section.0.5) >>
endobj
36 0 obj
(Exploration/Exploitation Dilemma)
endobj
37 0 obj
<< /S /GoTo /D (subsection.0.5.1) >>
endobj
40 0 obj
(-greedy Policy)
endobj
41 0 obj
<< /S /GoTo /D (subsection.0.5.2) >>
endobj
44 0 obj
(Confident-greedy Policy)
endobj
45 0 obj
<< /S /GoTo /D (subsection.0.5.3) >>
endobj
48 0 obj
(Bonus-greedy Policy)
endobj
49 0 obj
<< /S /GoTo /D (subsection.0.5.4) >>
endobj
52 0 obj
(Thompson Policy)
endobj
53 0 obj
<< /S /GoTo /D (subsection.0.5.5) >>
endobj
56 0 obj
(Experiment)
endobj
57 0 obj
<< /S /GoTo /D (section.0.6) >>
endobj
60 0 obj
(Dialogue management application)
endobj
61 0 obj
<< /S /GoTo /D (section.0.7) >>
endobj
64 0 obj
(Conclusion)
endobj
65 0 obj
<< /S /GoTo /D [66 0 R /Fit ] >>
endobj
91 0 obj <<
/Length 3316
/Filter /FlateDecode
>>
stream
xڝZ[۶~SKZN|9NަqbM
H(RKI^V` >hs6|s䫗l"9lhF&7?߽}Nim^3Rl5ab]v)3p|-ߞG #.{3(IBW
7B+aEʢ07 +qC-ucO 6;ʟP k[Mʡ ji(HT[ӇpF3Ϩeu{\e1P*4[1x{.0D kFCXUq0<]WV<w4A9`.![4Alޱ;nvMqr?h@A哷#[68p~$Aٌ(/m?o
8ѱ!oE
Wl;plpG⠯f&ly[7&l"@?L.[6ҙܰ?2;0aj:$p NIat: FC#Vu'ĽJI2݁U9ݙ$&