izuzak / noam Goto Github PK
View Code? Open in Web Editor NEWJavaScript library for working with automata and grammars for regular and context-free languages
Home Page: http://ivanzuzak.info/noam/
License: Other
JavaScript library for working with automata and grammars for regular and context-free languages
Home Page: http://ivanzuzak.info/noam/
License: Other
The Regex Gym won't simplify (a+ba*)*
any further, but I believe it should simplify to just (a+b)*
The wildcard . matches any character. For example, a.b matches any string that contains an "a", then any other character and then "b", a.*b matches any string that contains an "a", and then the character "b" at some later point.
https://en.wikipedia.org/wiki/Regular_expression
I have tried a.*b and system generate strange DFA and NFA. None of them work correctly.
ivanzuzak.info/:1 Mixed Content: The page at 'https://ivanzuzak.info/noam/webapps/fsm_simulator/' was loaded over HTTPS, but requested an insecure script 'http://mdaines.github.io/viz.js/bower_components/viz.js/viz.js'. This request has been blocked; the content must be served over HTTPS.
It'd be pretty helpful.
These 2 expressions are same but give different output in the Regular Expression Gym
Input : b+ab*a+(b+$+ab*a)(b+ab*a)*(b+$+ab*a)
Gives: (b+ab*a)*
but Input b+ab*a+(b+ab*a)(b+ab*a)*(b+ab*a)
does not simplify at all.
I believe output 1 is the correct answer but I wanted to double-check this
why this repo isnt at npm repo anymore?
Hi! I am getting an unexpected outcome when converting from eNFA to DFA (Codeandbox):
const enfa = {
"states": [
"s0",
"s1",
"s2",
"s3"
],
"alphabet": [
"a",
"b",
"c"
],
"initialState": "s0",
"acceptingStates": [
"s2"
],
"transitions": [
{
"fromState": "s0",
"symbol": "a",
"toStates": [
"s0"
]
},
{
"fromState": "s0",
"symbol": "b",
"toStates": [
"s0",
"s2"
]
},
{
"fromState": "s0",
"symbol": "c",
"toStates": [
"s0",
"s2"
]
},
{
"fromState": "s0",
"symbol": "δ",
"toStates": [
"s1",
"s2"
]
},
{
"fromState": "s1",
"symbol": "a",
"toStates": [
"s3"
]
},
{
"fromState": "s1",
"symbol": "c",
"toStates": [
"s2"
]
},
{
"fromState": "s2",
"symbol": "c",
"toStates": [
"s2",
"s3"
]
},
{
"fromState": "s2",
"symbol": "δ",
"toStates": [
"s3"
]
},
{
"fromState": "s3",
"symbol": "δ",
"toStates": [
"s0",
"s3"
]
}
]
};
const nfa = noam.fsm.convertEnfaToNfa(enfa);
// {
// "states": [
// "s0",
// "s1",
// "s2",
// "s3"
// ],
// "alphabet": [
// "a",
// "b",
// "c"
// ],
// "initialState": "s0",
// "acceptingStates": [
// "s2"
// ],
// "transitions": [
// {
// "fromState": "s0",
// "symbol": "a",
// "toStates": [
// "s0"
// ]
// },
// {
// "fromState": "s0",
// "symbol": "b",
// "toStates": [
// "s0",
// "s2"
// ]
// },
// {
// "fromState": "s0",
// "symbol": "c",
// "toStates": [
// "s0",
// "s2"
// ]
// },
// {
// "fromState": "s0",
// "symbol": "δ",
// "toStates": [
// "s1",
// "s2"
// ]
// },
// {
// "fromState": "s1",
// "symbol": "a",
// "toStates": [
// "s3"
// ]
// },
// {
// "fromState": "s1",
// "symbol": "c",
// "toStates": [
// "s2"
// ]
// },
// {
// "fromState": "s2",
// "symbol": "c",
// "toStates": [
// "s2",
// "s3"
// ]
// },
// {
// "fromState": "s2",
// "symbol": "δ",
// "toStates": [
// "s3"
// ]
// },
// {
// "fromState": "s3",
// "symbol": "δ",
// "toStates": [
// "s0",
// "s3"
// ]
// }
// ]
// }
const dfa = noam.fsm.convertNfaToDfa(nfa);
// {
// "alphabet": [
// "a",
// "b",
// "c"
// ],
// "states": [
// [
// "s0"
// ],
// [
// "s1"
// ],
// [
// "s2"
// ],
// [
// "s3"
// ],
// [
// "s0",
// "s3"
// ],
// [
// "s2",
// "s3"
// ],
// [
// "s1",
// "s2"
// ],
// [
// "s0",
// "s2"
// ],
// [
// "s0",
// "s2",
// "s3"
// ],
// [
// "ERROR"
// ]
// ],
// "acceptingStates": [
// [
// "s2"
// ],
// [
// "s2",
// "s3"
// ],
// [
// "s1",
// "s2"
// ],
// [
// "s0",
// "s2"
// ],
// [
// "s0",
// "s2",
// "s3"
// ]
// ],
// "initialState": [
// "s0"
// ],
// "transitions": [
// {
// "fromState": [
// "s0"
// ],
// "symbol": "a",
// "toStates": [
// [
// "s0"
// ]
// ]
// },
// {
// "fromState": [
// "s0"
// ],
// "symbol": "b",
// "toStates": [
// [
// "s0",
// "s2"
// ]
// ]
// },
// {
// "fromState": [
// "s0"
// ],
// "symbol": "c",
// "toStates": [
// [
// "s0",
// "s2"
// ]
// ]
// },
// {
// "fromState": [
// "s0"
// ],
// "symbol": "δ",
// "toStates": [
// [
// "s1",
// "s2"
// ]
// ]
// },
// {
// "fromState": [
// "s1"
// ],
// "symbol": "a",
// "toStates": [
// [
// "s3"
// ]
// ]
// },
// {
// "fromState": [
// "s1"
// ],
// "symbol": "c",
// "toStates": [
// [
// "s2"
// ]
// ]
// },
// {
// "fromState": [
// "s2"
// ],
// "symbol": "c",
// "toStates": [
// [
// "s2",
// "s3"
// ]
// ]
// },
// {
// "fromState": [
// "s2"
// ],
// "symbol": "δ",
// "toStates": [
// [
// "s3"
// ]
// ]
// },
// {
// "fromState": [
// "s3"
// ],
// "symbol": "δ",
// "toStates": [
// [
// "s0",
// "s3"
// ]
// ]
// },
// {
// "fromState": [
// "s0",
// "s3"
// ],
// "symbol": "a",
// "toStates": [
// [
// "s0"
// ]
// ]
// },
// {
// "fromState": [
// "s0",
// "s3"
// ],
// "symbol": "b",
// "toStates": [
// [
// "s0",
// "s2"
// ]
// ]
// },
// {
// "fromState": [
// "s0",
// "s3"
// ],
// "symbol": "c",
// "toStates": [
// [
// "s0",
// "s2"
// ]
// ]
// },
// {
// "fromState": [
// "s2",
// "s3"
// ],
// "symbol": "c",
// "toStates": [
// [
// "s2",
// "s3"
// ]
// ]
// },
// {
// "fromState": [
// "s1",
// "s2"
// ],
// "symbol": "a",
// "toStates": [
// [
// "s3"
// ]
// ]
// },
// {
// "fromState": [
// "s1",
// "s2"
// ],
// "symbol": "c",
// "toStates": [
// [
// "s2",
// "s3"
// ]
// ]
// },
// {
// "fromState": [
// "s0",
// "s2"
// ],
// "symbol": "a",
// "toStates": [
// [
// "s0"
// ]
// ]
// },
// {
// "fromState": [
// "s0",
// "s2"
// ],
// "symbol": "b",
// "toStates": [
// [
// "s0",
// "s2"
// ]
// ]
// },
// {
// "fromState": [
// "s0",
// "s2"
// ],
// "symbol": "c",
// "toStates": [
// [
// "s0",
// "s2",
// "s3"
// ]
// ]
// },
// {
// "fromState": [
// "s0",
// "s2",
// "s3"
// ],
// "symbol": "a",
// "toStates": [
// [
// "s0"
// ]
// ]
// },
// {
// "fromState": [
// "s0",
// "s2",
// "s3"
// ],
// "symbol": "b",
// "toStates": [
// [
// "s0",
// "s2"
// ]
// ]
// },
// {
// "fromState": [
// "s0",
// "s2",
// "s3"
// ],
// "symbol": "c",
// "toStates": [
// [
// "s0",
// "s2",
// "s3"
// ]
// ]
// },
// {
// "fromState": [
// "s1"
// ],
// "symbol": "b",
// "toStates": [
// [
// "ERROR"
// ]
// ]
// },
// {
// "fromState": [
// "s2"
// ],
// "symbol": "a",
// "toStates": [
// [
// "ERROR"
// ]
// ]
// },
// {
// "fromState": [
// "s2"
// ],
// "symbol": "b",
// "toStates": [
// [
// "ERROR"
// ]
// ]
// },
// {
// "fromState": [
// "s3"
// ],
// "symbol": "a",
// "toStates": [
// [
// "ERROR"
// ]
// ]
// },
// {
// "fromState": [
// "s3"
// ],
// "symbol": "b",
// "toStates": [
// [
// "ERROR"
// ]
// ]
// },
// {
// "fromState": [
// "s3"
// ],
// "symbol": "c",
// "toStates": [
// [
// "ERROR"
// ]
// ]
// },
// {
// "fromState": [
// "s2",
// "s3"
// ],
// "symbol": "a",
// "toStates": [
// [
// "ERROR"
// ]
// ]
// },
// {
// "fromState": [
// "s2",
// "s3"
// ],
// "symbol": "b",
// "toStates": [
// [
// "ERROR"
// ]
// ]
// },
// {
// "fromState": [
// "s1",
// "s2"
// ],
// "symbol": "b",
// "toStates": [
// [
// "ERROR"
// ]
// ]
// },
// {
// "fromState": [
// "ERROR"
// ],
// "symbol": "a",
// "toStates": [
// [
// "ERROR"
// ]
// ]
// },
// {
// "fromState": [
// "ERROR"
// ],
// "symbol": "b",
// "toStates": [
// [
// "ERROR"
// ]
// ]
// },
// {
// "fromState": [
// "ERROR"
// ],
// "symbol": "c",
// "toStates": [
// [
// "ERROR"
// ]
// ]
// }
// ]
// }
Hello first of all thank you very much incredible tool. I wonder if spoporta expressions like
0+(λ+1)(λ+1)*0
Hi,
I notice that the Create Automaton button is missing from the page thus I cannot convert my FSM to regex. Could you please take a look at it? I also wrote you an email about the same problem in case you missed this one.
I expect the regex gym to simplify b+aa*b
to a*b
, but in fact it has no simplifications available for it.
Hi! I am just wondering if this library will have updates that are friendly to Typescript users?
FYI, there is a major flaw in this regex simplifier's logic. $
does not represent the empty string; it represents the end of a string (or, with the /m
modifier, the end of a line). So, $+
is meaningless, and $a
can never match anything.
For example, foo$
matches foo
but not foobar
.
foo$
I run and rerun a few times only to get different DFAs to the same regex expression but if I run the website long enough with the same regex expression the DFA returned is eventually the same one that seemed to match the regex expression
Can someone show me a non-deterministic machine for (aa)* ?
Consider the regex (b+a*c)*aa*b
. When I put it in FSM2Regex, I get this horrible monstrosity:
#states
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#initial
0
#accepting
1
#alphabet
b
a
c
#transitions
6:b>7
4:$>6,8
7:$>5
10:a>11
8:$>9,10
11:$>10,9
12:c>13
9:$>12
13:$>5
2:$>3,4
5:$>4,3
14:a>15
3:$>14
18:a>19
16:$>17,18
19:$>18,17
15:$>16
20:b>21
17:$>20
0:$>2
21:$>1
Why not something much simpler, like this?
#states
s0
s1
s2
#initial
s0
#accepting
s2
#alphabet
a
b
c
#transitions
s0:a>s1
s0:b>s0
s0:c>s0
s1:a>s1
s1:b>s2
s1:c>s0
It generates NFA and DFA but when you click create automation, it does nothing
EDIT: I just discovered that you use + like I use bar (|), i.e., as the choice operator. I think that clears things up. Never mind!
Hello,
Great website. Love the concept, and it's generally very easy to use. However, I think I'm getting some wonky behavior on the FSM2Regex functionality. I put in the following FSM:
states
Q0
Q1
Q2
Q3initial
Q0
accepting
Q3
alphabet
0
1transitions
Q0:0>Q1
Q0:1>Q2
Q1:0>Q3
Q1:1>Q1
Q2:0>Q2
Q2:1>Q3
Q3:0>Q1
Q3:1>Q2
And it gave me this Regex:
1(0+11)*1+(0+1(0+11)*10)(1+0(0+1(0+11)10))(0+01(0+11)*1)
Which....is not right. I can see right off the bat that this regex requires a 1 at the beginning, which the FSM does not (00 is perfectly acceptable in the FSM).
Unless I'm missing something super basic? Any idea what's going on?
Thanks!
Russell
Hello!
Thanks for these web tools, I noticed the link out to the Grammophone is dead...it should now point to:
http://mdaines.github.io/grammophone (.io vs .com)
a(a|aa) incorrectly simplifies to aa|aa
Hi there, I've just noticed that when
(a+b)(b+$)*(b+$)+a+b
is fed into the Regular Expressions Gym, it simplifies to
(a+b)b*(b+$)
However I'm pretty sure the fully simplified regular expression should be
(a+b)b*
Since if you expand the (a+b)b*
onto (b+$)
you get (a+b)b*b+(a+b)b*
which can be simplified to (a+b)b*
?
Uncaught TypeError: Cannot call a class as a function
classCallCheck https://cdnjs.cloudflare.com/ajax/libs/viz.js/2.1.2/viz.js:36
Viz https://cdnjs.cloudflare.com/ajax/libs/viz.js/2.1.2/viz.js:244
drawGraph https://ivanzuzak.info/noam/webapps/fsm_simulator/scripts/fsm_simulator.js:58
https://ivanzuzak.info/noam/webapps/fsm_simulator/scripts/fsm_simulator.js:281
jQuery
2
I needed to create a regex for binary numbers divisible by 15.
DFA for this task has:
#states
s0
s1
s2
s3
s4
s5
s6
s7
s8
s9
s10
s11
s12
s13
s14
#initial
s0
#accepting
s0
#alphabet
0
1
#transitions
s0:0>s0
s0:1>s1
s1:0>s2
s1:1>s3
s2:0>s4
s2:1>s5
s3:0>s6
s3:1>s7
s4:0>s8
s4:1>s9
s5:0>s10
s5:1>s11
s6:0>s12
s6:1>s13
s7:0>s14
s7:1>s0
s8:0>s1
s8:1>s2
s9:0>s3
s9:1>s4
s10:0>s5
s10:1>s6
s11:0>s7
s11:1>s8
s12:0>s9
s12:1>s10
s13:0>s11
s13:1>s12
s14:0>s13
s14:1>s14
Starting from 16 transitions I could feel a major slown down in construction of graph and/or regex.
With 26 transition it takes minutes to construct.
Could you do something with it?
The DFA generator does not create the correct automaton for certain regular expressions.
To reproduce, generate DFA for b*(d+a*)d
.
It will create a state machine that rejects the inputs bdad
and bddd
.
The former was hand written as the obvious acceptable string, and the latter was generated with the "Unacceptable string" button after a few tries. They are both acceptable, and both are rejected by the DFA.
that have any software Visually simulate your DFAs, NFAs and ε-NFAs ?
i want to install to my pc
When i generate a random DFA, $Empty characters doesnt appear, but when i write a regular expression than generate a DFA of it, $Empty characters is appear. How can i generate a DFA without $ by using regular expression
I tried out the tools at http://ivanzuzak.info/noam/webapps/fsm2regex/ and http://ivanzuzak.info/noam/webapps/regex_simplifier/ and found some correctness bugs with both. I'll try to show the issues I found as a jumping-off point for some investigations, I didn't look too deeply.
It's also possible I'm just completely misinterpreting the syntax of the regex, which isn't like any I've seen before.
To start with I made a simple FSM in FSM2Regex corresponding to the Regex (b*a)*
and the resulting Regex from the tool looks like it cannot possibly be correct.
The code for this FSM is as follows:
#states
0
1
#initial
0
#accepting
0
#alphabet
b
a
#transitions
0:a>0
0:b>1
1:a>0
1:b>1
The tool gave me the resulting Regex: a+$+bb*a+(a+$+bb*a)(a+bb*a)*(a+$+bb*a)
which cannot possibly be correct. As you can see the initial state of the FSM is accepting, yet this regex ends with a group (a+$+bb*a)
which at the very least requires a trailing a
character. So it can't possibly accept the empty string. I didn't dig any further into this bug, but there must be some issue with how it is generating this Regex.
I then plugged this regex a+$+bb*a+(a+$+bb*a)(a+bb*a)*(a+$+bb*a)
into the regex simplifier, which in addition to producing an incorrect simplified expression appears to accept different languages at various steps of simplification. I've highlighted one of the rule applications that must be incorrect, though I didn't look for any more bugs.
The end result of the tool was (a+bb*a)*
which now does accept the empty string, but because it looks for at least two characters in its group, one of which must be a trailing a
it won't accept other valid strings like a
or aaa
.
The simplification from R5 to R6 looks especially suspect. It's tried to use the rule, (L1+$)(L2)* => (L2)* if L1 is subset of L2
but in this case the deleted expression is (a+$+bb*a)
which is not in the form (L1+$)
at all.
I hope these reports can help uncover some issues in the code. Thanks!
.
For some reason, every time I write down my automaton description it gets replaced with some random automaton if I try to Ctrl+C the corresponding regular expression. It's frustrating.
I am using Google Chrome on Ubuntu 16, if that helps.
Give the regex E((AT*S?)|(ST*A?))?T*Q
to the Regex Gym and it reports the following simplification as the first step:
R1: E((AT*S?)|(ST*A?))?T*Q
Rule: ab(cd) => abcd
R2: E(AT*S?)|(ST*A?)?T*Q
I can't accurately represent the color coding it does on the page, but the colored parenthesis indicate it's trying to remove one of the first innermost left parenthesis and the outermost right parenthesis.
Does online version support DFA visualization from regex? I've found no such options in GUI.
Ass you could see in http://madebyevan.com/fsm/ is possible to export to png, svg or latex.. that could save much time.
When I try to use the greater than sign (>) I get the Error: The specified object is not an alphabet symbol of the FSM.
Example:
#states
s15
s16
#initial
s15
#accepting
s16
#alphabet
#transitions
s15:>>s16
Error exception
fsm_simulator.js:58 Uncaught ReferenceError: Viz is not defined
at drawGraph (fsm_simulator.js:58)
at HTMLButtonElement. (fsm_simulator.js:281)
at HTMLButtonElement.dispatch (jquery.min.js:3)
at HTMLButtonElement.r.handle (jquery.min.js:3)
I was trying to find the dfa accepting strings containing at least three occurrences of three consecutive 1's on alphabet Σ={0,1} with overlapping permitted.
I come up with following long regex:
(
(0+1)*111(0+1)*111(0+1)*111(0+1)*
+(0+1)*111(0+1)*1111(0+1)*
+(0+1)*1111(0+1)*111(0+1)*
+(0+1)*11111(0+1)*
)
First line for strings containing non overlapping three occurrences of three consecutive 1's
Second line for strings containing first two occurrences overlapping (that is four consecutive 1's)
Third line for strings containing last two occurrences overlapping (that is four consecutive 1's)
Last line for all occurrences overlapping (that is five consecutive 1's)
I fed above regex to FSM simulator and it kept on processing. I can see the CPU utilization in both chrome and windows task managers. So I tried to feed the regex to regex gym. It also showed same behaviour.
Interestingly, when I omit last line (for all overlapping occurrences) in the regex, it returned proper FSM:
(
(0+1)*111(0+1)*111(0+1)*111(0+1)*
+(0+1)*111(0+1)*1111(0+1)*
+(0+1)*1111(0+1)*111(0+1)*
)
So whats going on here? Is it that the original regex is excessively complex?
<([:A-Za-z][-.:A-Za-z0-9](\s+[:A-Za-z][-.:A-Za-z0-9]\s_=\s_("[^"]"|'[^']'))|/[:A-Za-z][-.:A-Za-z0-9])\s*|/?>
The following transformation is incorrect
R5
1.02.(6[8-9]|[7-9][0-9])|(1.(0[3-9]).([0-9][0-9][0-9]?))
Rule
ab(cd) => abcd
R6
1.02.6[8-9]|[7-9][0-9]|(1.(0[3-9]).([0-9][0-9][0-9]?))
For example, by removing the parentheses around the alternation, the expression now permits a match with the bare value "76" instead of needing at least "1.02.76" to match.
The ability to export the resulting automaton to a PNG file (latex would be neat but probably too much).
Draggable Nodes.
Even though FSM Simulator can generate neat diagrams, it would be nice to be able to drag around state nodes. That way if you write down your own FSM on paper but the online FSM does not have the same geometric node positions then you can drag them around to make it easier to compare it to the paper version.
Thanks for the FSM Simulator. It's very helpful for learning about Finite State Machines.
If you wanted to produce a DFA that can decide the language bba|ab*, could your software do that?
The language that this library parses uses a different set of symbols as the operators than the type commonly used by regex libraries in most programming languages: using +
instead of |
for alternation, and no +
(one or more), {n}
(exactly n copies), ?
(optional), or other operators (although they can all be expanded into expressions involving only the alternation and kleene operators). How can I use this library with the more common regex dialect instead?
The apache license link in the README is returns 404 not found.
Greetings.
What algorithms does noam use to convert an FSM to a regex and vice versa?
I tried to do the same thing in Java (apparently there is no such thing available), but the only thing I found was a research paper which proved that for any FSM there is a regex, and gave an algorithm on the side. The generated regex's were enormous and not really of any use.
Can you tell me which algorithms you use (or do you even know of a ready-to-use Java solution)?
Btw. I like your little, handy web app very much!
Cheers!
Hi!
Great tool.
Are there simplification rules to cover non-capturing groups, lookahead, and lookbehind cases?
Here's what I'm running into…
(?# ?: ≡ non-capturing group)
pre-REG === post-REG
([A-G](?:[m♭♯]{1,2})?\(add9\)) ≠ [A-G]?:[m♭♯]{1,2}?\(add9\)
34 matches ≠ no match
(?# Negative|Positive Lookbehind|Lookahead)
pre-REG === post-REG
Negative Lookbehind: (?<!\S)\S+\(add9\) ≠ Error: RegexError is not defined
Negative Lookahead : \S+\(add9\)(?!\S) ≠ Error: RegexError is not defined
Positive Lookahead : \S+(?=\(add9\))\S+ ≠ Error: RegexError is not defined
Positive Lookbehind: \S+(?<=\(add9\)) ≠ Error: RegexError is not defined
34 matches ≠ Error: RegexError is not defined
☛ regex flavour = PCRE (PHP)
A declarative, efficient, and flexible JavaScript library for building user interfaces.
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google ❤️ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.