algorithmsbooks / decisionmakingproblems.jl Goto Github PK

View Code? Open in Web Editor NEW

39.0 39.0 11.0 614 KB

Problems from Algorithms for Decision Making

License: MIT License

Julia 100.00%

decisionmakingproblems.jl's People

Contributors

Stargazers

Watchers

Forkers

chuanqichen joyheng hungvo2010 prameshk maze516 awellis carlchan0514 jiyuanthu ethicalsecurity-agency anasshadad

decisionmakingproblems.jl's Issues

Code from the Algorithms in book "Algorithms for Decision Making"?

Hey and thank you for making your stuff public!

There are various algorithms in the book that are hard to copy/paste from PDF.
Is there a chance they can be made available in this repo?

E.g. my attempt to copy/paste some of chap 7:

struct MDP 
    γ # discount factor
    𝒮 # state space 
    𝒜 # action space
    T # transition function
    R # reward function
    TR # sample transition and reward
end
#
functionlookahead(𝒫::MDP,U,s,a)
    𝒮,T,R,γ=𝒫.𝒮,𝒫.T,𝒫.R,𝒫.γ
    return R(s,a) + γ*sum(T(s,a,s′)*U(s′) for s′ in 𝒮)
end
#
functionlookahead(𝒫::MDP,U::Vector,s,a)
    𝒮,T,R,γ=𝒫.𝒮,𝒫.T,𝒫.R,𝒫.γ
    return R(s,a) + γ*sum(T(s,a,s′)*U[i] for (i,s′) in enumerate(𝒮))
end 
# 7.3 Iterate k_max times w/o MAX 
function iterative_policy_evaluation(𝒫::MDP,π,k_max)
    𝒮,T,R,γ=𝒫.𝒮,𝒫.T,𝒫.R,𝒫.γ
    U=[0.0 for s in 𝒮]
    for k in 1:k_max
        U = [lookahead(𝒫,U,s,π(s)) for s in 𝒮]
    end
    return U 
end 
# 7.4 
function policy_evaluation(𝒫::MDP,π)
    𝒮,T,R,γ=𝒫.𝒮,𝒫.T,𝒫.R,𝒫.γ
    R′=[R(s,π(s)) for s in 𝒮]
    T′=[T(s,π(s),s′) for s in 𝒮, s′ in 𝒮]
    return (I-γ*T′) \ R′
end 

# 7.5
struct ValueFunctionPolicy
    𝒫 # problem
    U # utility function
end
function greedy(𝒫::MDP,U,s)
    u,a = findmax(a->lookahead(𝒫,U,s,a),𝒫.𝒜)
    return(a=a,u=u)
end
(π::ValueFunctionPolicy)(s) = greedy(π.𝒫,π.U,s).a

# 7.6
struct PolicyIteration
    π# initial policy
    k_max# maximum number of iterations
end
functionsolve(M::PolicyIteration,𝒫::MDP)
    π,𝒮=M.π,𝒫.𝒮
    for k=1:M.k_max 
        U=policy_evaluation(𝒫,π)
        π′=ValueFunctionPolicy(𝒫,U)
        if all(π(s)==π′(s) for s in 𝒮)
            break
        end
        π=π′
    end
    return π
end

#Algorithm 7.7. The backup proce-dure applied to an MDP
#Algorithm 7.8.  Value iteration
#Algorithm  7.9. Asynchronous value  iteration
#Algorithm 7.10.  solv discrete MDP using a linear program formulation
#Algorithm 7.11. LinearQuadraticProblem
#Example 7.4. An example solvinga finite horizon MDP

error when test hex world problem

i run the following code

using DecisionMakingProblems
using Test
using Random
using LinearAlgebra
using GridInterpolations

const p = DecisionMakingProblems

m = HexWorld()
mdp = MDP(m)
function lookahead(𝒫::MDP, U, s, a)
𝒮, T, R, γ = 𝒫.𝒮, 𝒫.T, 𝒫.R, 𝒫.γ
return R(s,a) + γ*sum(T(s,a,s′)U(s′) for s′ in 𝒮)
end
function lookahead(𝒫::MDP, U::Vector, s, a)
𝒮, T, R, γ = 𝒫.𝒮, 𝒫.T, 𝒫.R, 𝒫.γ
return R(s,a) + γsum(T(s,a,s′)*U[i] for (i,s′) in enumerate(𝒮))
end
function π(s)
return 1
end
function iterative_policy_evaluation(𝒫::MDP, π, k_max)
𝒮, T, R, γ = 𝒫.𝒮, 𝒫.T, 𝒫.R, 𝒫.γ
U = [0.0 for s in 𝒮]
for k in 1:k_max
U = [lookahead(𝒫, U, s, π(s)) for s in 𝒮]
end
return U
end
k_max = 4
u = iterative_policy_evaluation(mdp, π, k_max)

and got the following errors

ERROR: LoadError: MethodError: no method matching pdf(::Distributions.Categorical{Float64, Vector{Float64}}, ::Int64)
Stacktrace:
[1] (::DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP})(s::Int64, a::Int64, s′::Int64)
@ DecisionMakingProblems ~/code/decision_making/DecisionMakingProblems.jl/src/mdp/discrete_mdp.jl:39
[2] (::var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}})(::Tuple{Int64, Int64})
@ Main ./none:0
[3] MappingRF
@ ./reduce.jl:95 [inlined]
[4] _foldl_impl
@ ./reduce.jl:58 [inlined]
[5] foldl_impl(op::Base.MappingRF{var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}, Base.BottomRF{typeof(Base.add_sum)}}, nt::Base._InitialValue, itr::Base.Iterators.Enumerate{Vector{Int64}})
@ Base ./reduce.jl:48
[6] mapfoldl_impl(f::typeof(identity), op::typeof(Base.add_sum), nt::Base._InitialValue, itr::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}})
@ Base ./reduce.jl:44
[7] mapfoldl(f::Function, op::Function, itr::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}}; init::Base._InitialValue)
@ Base ./reduce.jl:162
[8] mapfoldl(f::Function, op::Function, itr::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}})
@ Base ./reduce.jl:162
[9] mapreduce(f::Function, op::Function, itr::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base ./reduce.jl:289
[10] mapreduce(f::Function, op::Function, itr::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}})
@ Base ./reduce.jl:289
[11] sum(f::Function, a::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base ./reduce.jl:503
[12] sum(f::Function, a::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}})
@ Base ./reduce.jl:503
[13] sum(a::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base ./reduce.jl:532
[14] sum(a::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}})
@ Base ./reduce.jl:532
[15] lookahead(𝒫::MDP, U::Vector{Float64}, s::Int64, a::Int64)
@ Main ~/code/decision_making/DecisionMakingProblems.jl/test/runtests_discrete_mdp.jl:20
[16] (::var"#10#12"{MDP, typeof(π)})(s::Int64)
@ Main ./none:0
[17] iterate
@ ./generator.jl:47 [inlined]
[18] collect(itr::Base.Generator{Vector{Int64}, var"#10#12"{MDP, typeof(π)}})
@ Base ./array.jl:724
[19] iterative_policy_evaluation(𝒫::MDP, π::typeof(π), k_max::Int64)
@ Main ~/code/decision_making/DecisionMakingProblems.jl/test/runtests_discrete_mdp.jl:29
[20] top-level scope
@ ~/code/decision_making/DecisionMakingProblems.jl/test/runtests_discrete_mdp.jl:35

Plotting routines for DecisionMaking problems

Hello folks. I really appreciate the work that went into writing the ALGORITHMS FOR DECISION MAKING book. It is great that the authors included julia code as well as the algorithms for policy iteration, value iteration, SARSA, etc.

My question is whether there is a place to find some plotting routines for these environments?

The biggest issue that I encounter in trying to run an understand these algorithms is that I have not found any suitable way to plot the policies that emerge from the learning process. So if I run some algorithm on a Gridworld problem, I would like to plot the policy on a Gridworld plot and then see the arrows indicating the policy. That would really help to understand whether the algorithm is converging and if the algorithm is implemented correctly.

Is there anything like that available either in this repository--where the problems are defined--or in a different repo? Thanks.

Recommend Projects

React

A declarative, efficient, and flexible JavaScript library for building user interfaces.
Vue.js

🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
Typescript

TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
TensorFlow

An Open Source Machine Learning Framework for Everyone
Django

The Web framework for perfectionists with deadlines.
Laravel

A PHP framework for web artisans
D3

Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

javascript

JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
web

Some thing interesting about web. New door for the world.
server

A server is a program made to process requests and deliver data to clients.
Machine learning

Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Visualization

Some thing interesting about visualization, use data art
Game

Some thing interesting about game, make everyone happy.

Recommend Org

Facebook

We are working to build community through open source technology. NB: members must have two-factor auth.
Microsoft

Open source projects and samples from Microsoft.
Google

Google ❤️ Open Source for everyone.
Alibaba

Alibaba Open Source for everyone
D3

Data-Driven Documents codes.
Tencent

China tencent open source team.

Jobs

Jooble