algorithmsbooks / decisionmakingproblems.jl Goto Github PK
View Code? Open in Web Editor NEWProblems from Algorithms for Decision Making
License: MIT License
Problems from Algorithms for Decision Making
License: MIT License
Hey and thank you for making your stuff public!
There are various algorithms in the book that are hard to copy/paste from PDF.
Is there a chance they can be made available in this repo?
E.g. my attempt to copy/paste some of chap 7:
struct MDP
ฮณ # discount factor
๐ฎ # state space
๐ # action space
T # transition function
R # reward function
TR # sample transition and reward
end
#
functionlookahead(๐ซ::MDP,U,s,a)
๐ฎ,T,R,ฮณ=๐ซ.๐ฎ,๐ซ.T,๐ซ.R,๐ซ.ฮณ
return R(s,a) + ฮณ*sum(T(s,a,sโฒ)*U(sโฒ) for sโฒ in ๐ฎ)
end
#
functionlookahead(๐ซ::MDP,U::Vector,s,a)
๐ฎ,T,R,ฮณ=๐ซ.๐ฎ,๐ซ.T,๐ซ.R,๐ซ.ฮณ
return R(s,a) + ฮณ*sum(T(s,a,sโฒ)*U[i] for (i,sโฒ) in enumerate(๐ฎ))
end
# 7.3 Iterate k_max times w/o MAX
function iterative_policy_evaluation(๐ซ::MDP,ฯ,k_max)
๐ฎ,T,R,ฮณ=๐ซ.๐ฎ,๐ซ.T,๐ซ.R,๐ซ.ฮณ
U=[0.0 for s in ๐ฎ]
for k in 1:k_max
U = [lookahead(๐ซ,U,s,ฯ(s)) for s in ๐ฎ]
end
return U
end
# 7.4
function policy_evaluation(๐ซ::MDP,ฯ)
๐ฎ,T,R,ฮณ=๐ซ.๐ฎ,๐ซ.T,๐ซ.R,๐ซ.ฮณ
Rโฒ=[R(s,ฯ(s)) for s in ๐ฎ]
Tโฒ=[T(s,ฯ(s),sโฒ) for s in ๐ฎ, sโฒ in ๐ฎ]
return (I-ฮณ*Tโฒ) \ Rโฒ
end
# 7.5
struct ValueFunctionPolicy
๐ซ # problem
U # utility function
end
function greedy(๐ซ::MDP,U,s)
u,a = findmax(a->lookahead(๐ซ,U,s,a),๐ซ.๐)
return(a=a,u=u)
end
(ฯ::ValueFunctionPolicy)(s) = greedy(ฯ.๐ซ,ฯ.U,s).a
# 7.6
struct PolicyIteration
ฯ# initial policy
k_max# maximum number of iterations
end
functionsolve(M::PolicyIteration,๐ซ::MDP)
ฯ,๐ฎ=M.ฯ,๐ซ.๐ฎ
for k=1:M.k_max
U=policy_evaluation(๐ซ,ฯ)
ฯโฒ=ValueFunctionPolicy(๐ซ,U)
if all(ฯ(s)==ฯโฒ(s) for s in ๐ฎ)
break
end
ฯ=ฯโฒ
end
return ฯ
end
#Algorithm 7.7. The backup proce-dure applied to an MDP
#Algorithm 7.8. Value iteration
#Algorithm 7.9. Asynchronous value iteration
#Algorithm 7.10. solv discrete MDP using a linear program formulation
#Algorithm 7.11. LinearQuadraticProblem
#Example 7.4. An example solvinga finite horizon MDP
i run the following code
using DecisionMakingProblems
using Test
using Random
using LinearAlgebra
using GridInterpolations
const p = DecisionMakingProblems
m = HexWorld()
mdp = MDP(m)
function lookahead(๐ซ::MDP, U, s, a)
๐ฎ, T, R, ฮณ = ๐ซ.๐ฎ, ๐ซ.T, ๐ซ.R, ๐ซ.ฮณ
return R(s,a) + ฮณ*sum(T(s,a,sโฒ)U(sโฒ) for sโฒ in ๐ฎ)
end
function lookahead(๐ซ::MDP, U::Vector, s, a)
๐ฎ, T, R, ฮณ = ๐ซ.๐ฎ, ๐ซ.T, ๐ซ.R, ๐ซ.ฮณ
return R(s,a) + ฮณsum(T(s,a,sโฒ)*U[i] for (i,sโฒ) in enumerate(๐ฎ))
end
function ฯ(s)
return 1
end
function iterative_policy_evaluation(๐ซ::MDP, ฯ, k_max)
๐ฎ, T, R, ฮณ = ๐ซ.๐ฎ, ๐ซ.T, ๐ซ.R, ๐ซ.ฮณ
U = [0.0 for s in ๐ฎ]
for k in 1:k_max
U = [lookahead(๐ซ, U, s, ฯ(s)) for s in ๐ฎ]
end
return U
end
k_max = 4
u = iterative_policy_evaluation(mdp, ฯ, k_max)
and got the following errors
ERROR: LoadError: MethodError: no method matching pdf(::Distributions.Categorical{Float64, Vector{Float64}}, ::Int64)
Stacktrace:
[1] (::DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP})(s::Int64, a::Int64, sโฒ::Int64)
@ DecisionMakingProblems ~/code/decision_making/DecisionMakingProblems.jl/src/mdp/discrete_mdp.jl:39
[2] (::var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}})(::Tuple{Int64, Int64})
@ Main ./none:0
[3] MappingRF
@ ./reduce.jl:95 [inlined]
[4] _foldl_impl
@ ./reduce.jl:58 [inlined]
[5] foldl_impl(op::Base.MappingRF{var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}, Base.BottomRF{typeof(Base.add_sum)}}, nt::Base._InitialValue, itr::Base.Iterators.Enumerate{Vector{Int64}})
@ Base ./reduce.jl:48
[6] mapfoldl_impl(f::typeof(identity), op::typeof(Base.add_sum), nt::Base._InitialValue, itr::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}})
@ Base ./reduce.jl:44
[7] mapfoldl(f::Function, op::Function, itr::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}}; init::Base._InitialValue)
@ Base ./reduce.jl:162
[8] mapfoldl(f::Function, op::Function, itr::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}})
@ Base ./reduce.jl:162
[9] mapreduce(f::Function, op::Function, itr::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base ./reduce.jl:289
[10] mapreduce(f::Function, op::Function, itr::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}})
@ Base ./reduce.jl:289
[11] sum(f::Function, a::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base ./reduce.jl:503
[12] sum(f::Function, a::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}})
@ Base ./reduce.jl:503
[13] sum(a::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base ./reduce.jl:532
[14] sum(a::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}})
@ Base ./reduce.jl:532
[15] lookahead(๐ซ::MDP, U::Vector{Float64}, s::Int64, a::Int64)
@ Main ~/code/decision_making/DecisionMakingProblems.jl/test/runtests_discrete_mdp.jl:20
[16] (::var"#10#12"{MDP, typeof(ฯ)})(s::Int64)
@ Main ./none:0
[17] iterate
@ ./generator.jl:47 [inlined]
[18] collect(itr::Base.Generator{Vector{Int64}, var"#10#12"{MDP, typeof(ฯ)}})
@ Base ./array.jl:724
[19] iterative_policy_evaluation(๐ซ::MDP, ฯ::typeof(ฯ), k_max::Int64)
@ Main ~/code/decision_making/DecisionMakingProblems.jl/test/runtests_discrete_mdp.jl:29
[20] top-level scope
@ ~/code/decision_making/DecisionMakingProblems.jl/test/runtests_discrete_mdp.jl:35
Hello folks. I really appreciate the work that went into writing the ALGORITHMS FOR DECISION MAKING book. It is great that the authors included julia code as well as the algorithms for policy iteration, value iteration, SARSA, etc.
My question is whether there is a place to find some plotting routines for these environments?
The biggest issue that I encounter in trying to run an understand these algorithms is that I have not found any suitable way to plot the policies that emerge from the learning process. So if I run some algorithm on a Gridworld problem, I would like to plot the policy on a Gridworld plot and then see the arrows indicating the policy. That would really help to understand whether the algorithm is converging and if the algorithm is implemented correctly.
Is there anything like that available either in this repository--where the problems are defined--or in a different repo? Thanks.
A declarative, efficient, and flexible JavaScript library for building user interfaces.
๐ Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. ๐๐๐
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google โค๏ธ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.