GithubHelp home page GithubHelp logo

decisionmakingproblems.jl's People

Contributors

mykelk avatar sidhartk avatar tawheeler avatar

Stargazers

 avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar

Watchers

 avatar  avatar  avatar  avatar  avatar

decisionmakingproblems.jl's Issues

Code from the Algorithms in book "Algorithms for Decision Making"?

Hey and thank you for making your stuff public!

There are various algorithms in the book that are hard to copy/paste from PDF.
Is there a chance they can be made available in this repo?

E.g. my attempt to copy/paste some of chap 7:

struct MDP 
    ฮณ # discount factor
    ๐’ฎ # state space 
    ๐’œ # action space
    T # transition function
    R # reward function
    TR # sample transition and reward
end
#
functionlookahead(๐’ซ::MDP,U,s,a)
    ๐’ฎ,T,R,ฮณ=๐’ซ.๐’ฎ,๐’ซ.T,๐’ซ.R,๐’ซ.ฮณ
    return R(s,a) + ฮณ*sum(T(s,a,sโ€ฒ)*U(sโ€ฒ) for sโ€ฒ in ๐’ฎ)
end
#
functionlookahead(๐’ซ::MDP,U::Vector,s,a)
    ๐’ฎ,T,R,ฮณ=๐’ซ.๐’ฎ,๐’ซ.T,๐’ซ.R,๐’ซ.ฮณ
    return R(s,a) + ฮณ*sum(T(s,a,sโ€ฒ)*U[i] for (i,sโ€ฒ) in enumerate(๐’ฎ))
end 
# 7.3 Iterate k_max times w/o MAX 
function iterative_policy_evaluation(๐’ซ::MDP,ฯ€,k_max)
    ๐’ฎ,T,R,ฮณ=๐’ซ.๐’ฎ,๐’ซ.T,๐’ซ.R,๐’ซ.ฮณ
    U=[0.0 for s in ๐’ฎ]
    for k in 1:k_max
        U = [lookahead(๐’ซ,U,s,ฯ€(s)) for s in ๐’ฎ]
    end
    return U 
end 
# 7.4 
function policy_evaluation(๐’ซ::MDP,ฯ€)
    ๐’ฎ,T,R,ฮณ=๐’ซ.๐’ฎ,๐’ซ.T,๐’ซ.R,๐’ซ.ฮณ
    Rโ€ฒ=[R(s,ฯ€(s)) for s in ๐’ฎ]
    Tโ€ฒ=[T(s,ฯ€(s),sโ€ฒ) for s in ๐’ฎ, sโ€ฒ in ๐’ฎ]
    return (I-ฮณ*Tโ€ฒ) \ Rโ€ฒ
end 

# 7.5
struct ValueFunctionPolicy
    ๐’ซ # problem
    U # utility function
end
function greedy(๐’ซ::MDP,U,s)
    u,a = findmax(a->lookahead(๐’ซ,U,s,a),๐’ซ.๐’œ)
    return(a=a,u=u)
end
(ฯ€::ValueFunctionPolicy)(s) = greedy(ฯ€.๐’ซ,ฯ€.U,s).a

# 7.6
struct PolicyIteration
    ฯ€# initial policy
    k_max# maximum number of iterations
end
functionsolve(M::PolicyIteration,๐’ซ::MDP)
    ฯ€,๐’ฎ=M.ฯ€,๐’ซ.๐’ฎ
    for k=1:M.k_max 
        U=policy_evaluation(๐’ซ,ฯ€)
        ฯ€โ€ฒ=ValueFunctionPolicy(๐’ซ,U)
        if all(ฯ€(s)==ฯ€โ€ฒ(s) for s in ๐’ฎ)
            break
        end
        ฯ€=ฯ€โ€ฒ
    end
    return ฯ€
end

#Algorithm 7.7. The backup proce-dure applied to an MDP
#Algorithm 7.8.  Value iteration
#Algorithm  7.9. Asynchronous value  iteration
#Algorithm 7.10.  solv discrete MDP using a linear program formulation
#Algorithm 7.11. LinearQuadraticProblem
#Example 7.4. An example solvinga finite horizon MDP

error when test hex world problem

i run the following code

using DecisionMakingProblems
using Test
using Random
using LinearAlgebra
using GridInterpolations

const p = DecisionMakingProblems

m = HexWorld()
mdp = MDP(m)
function lookahead(๐’ซ::MDP, U, s, a)
๐’ฎ, T, R, ฮณ = ๐’ซ.๐’ฎ, ๐’ซ.T, ๐’ซ.R, ๐’ซ.ฮณ
return R(s,a) + ฮณ*sum(T(s,a,sโ€ฒ)U(sโ€ฒ) for sโ€ฒ in ๐’ฎ)
end
function lookahead(๐’ซ::MDP, U::Vector, s, a)
๐’ฎ, T, R, ฮณ = ๐’ซ.๐’ฎ, ๐’ซ.T, ๐’ซ.R, ๐’ซ.ฮณ
return R(s,a) + ฮณ
sum(T(s,a,sโ€ฒ)*U[i] for (i,sโ€ฒ) in enumerate(๐’ฎ))
end
function ฯ€(s)
return 1
end
function iterative_policy_evaluation(๐’ซ::MDP, ฯ€, k_max)
๐’ฎ, T, R, ฮณ = ๐’ซ.๐’ฎ, ๐’ซ.T, ๐’ซ.R, ๐’ซ.ฮณ
U = [0.0 for s in ๐’ฎ]
for k in 1:k_max
U = [lookahead(๐’ซ, U, s, ฯ€(s)) for s in ๐’ฎ]
end
return U
end
k_max = 4
u = iterative_policy_evaluation(mdp, ฯ€, k_max)

and got the following errors

ERROR: LoadError: MethodError: no method matching pdf(::Distributions.Categorical{Float64, Vector{Float64}}, ::Int64)
Stacktrace:
[1] (::DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP})(s::Int64, a::Int64, sโ€ฒ::Int64)
@ DecisionMakingProblems ~/code/decision_making/DecisionMakingProblems.jl/src/mdp/discrete_mdp.jl:39
[2] (::var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}})(::Tuple{Int64, Int64})
@ Main ./none:0
[3] MappingRF
@ ./reduce.jl:95 [inlined]
[4] _foldl_impl
@ ./reduce.jl:58 [inlined]
[5] foldl_impl(op::Base.MappingRF{var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}, Base.BottomRF{typeof(Base.add_sum)}}, nt::Base._InitialValue, itr::Base.Iterators.Enumerate{Vector{Int64}})
@ Base ./reduce.jl:48
[6] mapfoldl_impl(f::typeof(identity), op::typeof(Base.add_sum), nt::Base._InitialValue, itr::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}})
@ Base ./reduce.jl:44
[7] mapfoldl(f::Function, op::Function, itr::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}}; init::Base._InitialValue)
@ Base ./reduce.jl:162
[8] mapfoldl(f::Function, op::Function, itr::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}})
@ Base ./reduce.jl:162
[9] mapreduce(f::Function, op::Function, itr::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base ./reduce.jl:289
[10] mapreduce(f::Function, op::Function, itr::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}})
@ Base ./reduce.jl:289
[11] sum(f::Function, a::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base ./reduce.jl:503
[12] sum(f::Function, a::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}})
@ Base ./reduce.jl:503
[13] sum(a::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base ./reduce.jl:532
[14] sum(a::Base.Generator{Base.Iterators.Enumerate{Vector{Int64}}, var"#7#8"{Vector{Float64}, Int64, Int64, DecisionMakingProblems.var"#17#20"{DecisionMakingProblems.DiscreteMDP}}})
@ Base ./reduce.jl:532
[15] lookahead(๐’ซ::MDP, U::Vector{Float64}, s::Int64, a::Int64)
@ Main ~/code/decision_making/DecisionMakingProblems.jl/test/runtests_discrete_mdp.jl:20
[16] (::var"#10#12"{MDP, typeof(ฯ€)})(s::Int64)
@ Main ./none:0
[17] iterate
@ ./generator.jl:47 [inlined]
[18] collect(itr::Base.Generator{Vector{Int64}, var"#10#12"{MDP, typeof(ฯ€)}})
@ Base ./array.jl:724
[19] iterative_policy_evaluation(๐’ซ::MDP, ฯ€::typeof(ฯ€), k_max::Int64)
@ Main ~/code/decision_making/DecisionMakingProblems.jl/test/runtests_discrete_mdp.jl:29
[20] top-level scope
@ ~/code/decision_making/DecisionMakingProblems.jl/test/runtests_discrete_mdp.jl:35

Plotting routines for DecisionMaking problems

Hello folks. I really appreciate the work that went into writing the ALGORITHMS FOR DECISION MAKING book. It is great that the authors included julia code as well as the algorithms for policy iteration, value iteration, SARSA, etc.

My question is whether there is a place to find some plotting routines for these environments?

The biggest issue that I encounter in trying to run an understand these algorithms is that I have not found any suitable way to plot the policies that emerge from the learning process. So if I run some algorithm on a Gridworld problem, I would like to plot the policy on a Gridworld plot and then see the arrows indicating the policy. That would really help to understand whether the algorithm is converging and if the algorithm is implemented correctly.

Is there anything like that available either in this repository--where the problems are defined--or in a different repo? Thanks.

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    ๐Ÿ–– Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. ๐Ÿ“Š๐Ÿ“ˆ๐ŸŽ‰

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google โค๏ธ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.