📅  最后修改于: 2023-12-03 15:36:01.444000             🧑  作者: Mango
本程序使用Python语言,基于正则表达式和NFA转换为DFA的算法,构建出对于正则表达式C(A + B)+
的DFA。首先,程序将根据正则表达式构建出对应的NFA,然后通过子集构造算法把NFA转换为DFA。
import itertools
class State:
"""
State类表示DFA的状态
"""
def __init__(self, id, states):
self.id = 'Q' + str(id) # 状态编号
self.states = states # 子集状态
class DFA:
"""
DFA类表示DFA自动机
"""
def __init__(self, states, inputs, trans, start, accepts):
self.states = states # DFA的所有状态集合
self.inputs = inputs # DFA的所有输入符号集合
self.trans = trans # DFA的转移函数
self.start = start # DFA的开始状态
self.accepts = accepts # DFA的接受状态
def compile(regexp):
"""
构建DFA的函数
"""
nfa_states = set() # NFA状态集合
nfa_start = [] # NFA的开始状态
nfa_accepts = [] # NFA的接受状态
alphabet = set() # 输入符号集合
# ---------------------------------------------------------------------- #
# 正则表达式解析函数
# ---------------------------------------------------------------------- #
def parse(strset):
"""
解析正则表达式的函数
"""
nonlocal alphabet
choices = strset.split('+')
if len(choices)>1:
exprs = [parse(choice) for choice in choices]
exprs.insert(0, 'choices')
return exprs
seqs = strset.split('.')
if len(seqs)>1:
exprs = [parse(seq) for seq in seqs]
exprs.insert(0, 'sequence')
return exprs
stars = strset.split('*')
if len(stars)>1:
exprs = parse(stars[0])
exprs.insert(0, 'closure')
return exprs
if len(strset)>1:
raise ValueError(f'Invalid syntax: {strset}')
alphabet.add(strset)
return strset
# ---------------------------------------------------------------------- #
# 构建NFA
# ---------------------------------------------------------------------- #
# 解析正则表达式
exprs = parse(regexp)
def new_id():
"""
生成新状态id的函数
"""
new_id.counter += 1
return new_id.counter
new_id.counter = 0
def build_nfa(exprs):
"""
根据解析后的正则表达式构建NFA
"""
if isinstance(exprs, str):
# 构建一个基础NFA
start, accept = new_id(), new_id()
nfa_states.add((start, exprs, accept))
nfa_start.append(start)
nfa_accepts.append(accept)
return start, accept
elif isinstance(exprs, list):
# 递归构建NFA
if exprs[0] == 'sequence':
start0, accept0 = build_nfa(exprs[1])
for e in exprs[2:]:
start1, accept1 = build_nfa(e)
nfa_states.add((accept0, None, start1))
accept0 = accept1
return start0, accept0
elif exprs[0] == 'choices':
start, accept = new_id(), new_id()
for e in exprs[1:]:
start1, accept1 = build_nfa(e)
nfa_states |= {(start, None, start1), (accept1, None, accept)}
nfa_start.append(start)
nfa_accepts.append(accept)
return start, accept
elif exprs[0] == 'closure':
start, accept = new_id(), new_id()
start0, accept0 = build_nfa(exprs[1])
nfa_states |= {(start, None, start0), (start0, None, accept0), (accept0, None, accept),
(accept, None, start), (accept, None, start0)}
nfa_start.append(start)
nfa_accepts.append(accept)
return start, accept
build_nfa(exprs)
# ---------------------------------------------------------------------- #
# 子集构造算法,将NFA转换为DFA
# ---------------------------------------------------------------------- #
def e_closure(states):
"""
计算NFA状态集states的epsilon闭包
"""
e_closure_set = set(states)
worklist = list(states)
while worklist:
state = worklist.pop()
for nfa_state in nfa_states:
start, edge, end = nfa_state
if start == state and edge is None and end not in e_closure_set:
e_closure_set.add(end)
worklist.append(end)
return e_closure_set
dfa_states = [] # DFA状态集合
dfa_trans = {} # DFA转移函数
nfa_start_e_closure = e_closure(nfa_start)
dfa_start = State(0, nfa_start_e_closure) # DFA开始状态为NFA开始状态的epsilon闭包
dfa_states.append(dfa_start)
worklist = [dfa_start]
while worklist:
state = worklist.pop()
for symbol in alphabet:
nfa_move_set = set()
for nfa_state in state.states:
start, edge, end = nfa_state
if edge == symbol:
nfa_move_set.add(end)
if not nfa_move_set:
continue
nfa_move_e_closure = e_closure(nfa_move_set)
new_dfa_state = None
for i, s in enumerate(dfa_states):
if s.states == nfa_move_e_closure:
new_dfa_state = s
break
if new_dfa_state is None:
new_dfa_state = State(len(dfa_states), nfa_move_e_closure)
dfa_states.append(new_dfa_state)
worklist.append(new_dfa_state)
dfa_trans[(state.id, symbol)] = new_dfa_state.id
dfa_accepts = [s.id for s in dfa_states if any(nfa_accept in s.states for nfa_accept in nfa_accepts)]
# ---------------------------------------------------------------------- #
# 构建DFA并返回
# ---------------------------------------------------------------------- #
dfa = DFA([s.id for s in dfa_states],
list(alphabet),
dfa_trans,
dfa_start.id,
dfa_accepts)
return dfa
我们以C(A + B)+
为例,构建DFA,并用graphviz将其绘制出来。
from graphviz import Digraph
def draw(dfa):
dot = Digraph(comment='DFA')
dot.attr(rankdir='LR')
for accept in dfa.accepts:
dot.node(accept, shape='doublecircle')
dot.node(dfa.start)
for state in dfa.states:
for input in dfa.inputs:
try:
trans = dfa.trans[(state, input)]
dot.edge(state, trans, label=input)
except KeyError:
pass
return dot
dfa = compile('C(A + B)+')
draw(dfa)