Fixed common generation errors

- Loops are now bound on the number of iterations by max-loop-iterations
- Names now get removed from the list of possible names when used
- Overflow errors in arithmetic are handled gracefully

Took 1 hour 13 minutes
This commit is contained in:
ayrton 2023-11-22 13:50:56 -07:00
parent 88ef999640
commit 1da53dba48
7 changed files with 258 additions and 178 deletions

View file

@ -4,8 +4,11 @@ import xml.etree.ElementTree as ET
from english_words import get_english_words_set
from ast_generator.utils import Variable, Argument, Routine, Scope, build_xml_element
from constants import *
import keyword
class AstGenerator:
"""
@ -33,13 +36,14 @@ class AstGenerator:
"""
self.settings = settings
self.symbol_table = [] # TODO this should be a list of scopes
self.symbol_table = []
global_scope = Scope(None, None)
self.symbol_table.append(global_scope) # NOTE for debug
self.current_scope = global_scope
names = get_english_words_set(['web2'], alpha=True)
possible_names = filter(lambda x: self.settings['properties']['id-length']['max'] <= len(x) <= self.settings['properties']['id-length']['max'], names)
possible_names = filter(lambda x: self.settings['properties']['id-length']['max'] <= len(x) <=
self.settings['properties']['id-length']['max'] and not keyword.iskeyword(x), names)
var_name_list = list(possible_names)
var_name_len = len(var_name_list)
@ -61,7 +65,8 @@ class AstGenerator:
self.bool_op_options, self.bool_op_cutoffs, self.bool_op_numline = (
self.get_numberlines('expression-weights',
['brackets', 'comparison', 'logical', 'unary'],
excluded_values=[[], ['less-than-or-equal', 'greater-than-or-equal', 'less-than', 'greater-than'], [], ['noop', 'negation']]))
excluded_values=[[], ['less-than-or-equal', 'greater-than-or-equal', 'less-than',
'greater-than'], [], ['noop', 'negation']]))
self.bool_unary = ['not']
self.float_op_options, self.float_op_cutoffs, self.float_op_numline = (
@ -166,7 +171,10 @@ class AstGenerator:
self.pop_scope()
self.current_ast_element = parent
def generate_block(self, tag=None, return_stmt=False, return_value=None, return_type=None):
def generate_block(self, tag=None, return_stmt=False, return_value=None, return_type=None, block_type=None,
loop_var=None):
# TODO this should be broken into many functions depending on the block requirements
if tag is None:
tag = []
parent = self.current_ast_element
@ -174,6 +182,12 @@ class AstGenerator:
element = build_xml_element(tag, name=GAZ_BLOCK_TAG)
self.current_ast_element.append(element)
self.current_ast_element = element
# Generate the loop condition increment if we are in a loop
if block_type == GAZ_LOOP_TAG:
self.generate_loop_condition_check(loop_var)
self.generate_loop_condition_increment(loop_var)
self.generate_statements()
if return_stmt:
self.generate_return(return_type=return_type, return_value=return_value)
@ -182,6 +196,81 @@ class AstGenerator:
self.pop_scope()
self.current_ast_element = parent
def generate_loop_condition_check(self, loop_var: Variable):
"""
@brief generates the loop condition check
Ensures that the loop does not iterate more than max-loop-iterations times
@param loop_var:
@return:
"""
# loop var is always an int
assert loop_var.type == GAZ_INT_KEY
# create a conditional xml tag
if_stmt = build_xml_element([], name=GAZ_IF_TAG)
self.current_ast_element.append(if_stmt)
parent = self.current_ast_element
self.current_ast_element = if_stmt
# add the check 'if loop_var >= self.settings['generation_options']['max-loop-iterations']: break'
operation = build_xml_element([("op", ">=")], name=GAZ_OPERATOR_TAG)
self.current_ast_element.append(operation)
self.current_ast_element = operation
lhs = build_xml_element([], name=GAZ_LHS_TAG)
operation.append(lhs)
var = build_xml_element([("name", loop_var.name), ("type", loop_var.type)], name=GAZ_VAR_TAG)
lhs.append(var)
rhs = build_xml_element([], name=GAZ_RHS_TAG)
operation.append(rhs)
rhs.append(self.make_literal(GAZ_INT_KEY, "'" + str(self.settings['generation-options']['max-loop-iterations']) + "'"))
true_block = build_xml_element([], name=GAZ_BLOCK_TAG)
if_stmt.append(true_block)
self.current_ast_element = true_block
break_stmt = build_xml_element([], name=GAZ_BREAK_TAG)
true_block.append(break_stmt)
# return everything to normalcy
self.current_ast_element = parent
def generate_loop_condition_increment(self, loop_var):
assert loop_var.type == GAZ_INT_KEY
parent = self.current_ast_element
assignment = build_xml_element([], name=GAZ_ASSIGNMENT_TAG)
self.current_ast_element.append(assignment)
self.current_ast_element = assignment
# append the variable
self.current_ast_element.append(loop_var.xml)
# add the increment 'loop_var += 1'
assn_rhs = build_xml_element([], name=GAZ_RHS_TAG)
self.current_ast_element.append(assn_rhs)
self.current_ast_element = assn_rhs
operation = build_xml_element([("op", "+")], name=GAZ_OPERATOR_TAG)
self.current_ast_element.append(operation)
self.current_ast_element = operation
lhs = build_xml_element([], name=GAZ_LHS_TAG)
operation.append(lhs)
var = build_xml_element([("name", loop_var.name), ("type", loop_var.type)], name=GAZ_VAR_TAG)
lhs.append(var)
rhs = build_xml_element([], name=GAZ_RHS_TAG)
operation.append(rhs)
rhs.append(self.make_literal(GAZ_INT_KEY, '1'))
# return everything to normalcy
self.current_ast_element = parent
def generate_return(self, return_type=None, return_value=None):
if return_type is None or return_type == GAZ_VOID_TYPE:
self.current_ast_element.append(build_xml_element([], name=GAZ_RETURN_TAG))
@ -242,7 +331,7 @@ class AstGenerator:
def generate_statements(self):
# Number line
number_line = 180 #TODO fix the numberline stuff to reflect the settings
number_line = 180 # TODO fix the numberline stuff to reflect the settings
cutoffs = [10, 30, 50, 80, 100, 140, 180]
options = {
0: self.generate_declaration,
@ -390,13 +479,13 @@ class AstGenerator:
self.current_ast_element = parent
def generate_xhs(self, handedness, op_type):
def generate_xhs(self, handedness, op_type, is_zero=False):
element = build_xml_element([], name=handedness)
parent = self.current_ast_element
self.current_ast_element.append(element)
self.current_ast_element = element
self.generate_expression(op_type)
self.generate_expression(op_type, is_zero=is_zero)
self.current_ast_element = parent
@ -421,7 +510,8 @@ class AstGenerator:
if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']:
return
if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings['block-termination-probability']:
if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings[
'block-termination-probability']:
return
element = build_xml_element([], name=GAZ_IF_TAG)
@ -441,13 +531,16 @@ class AstGenerator:
self.pop_scope()
self.current_ast_element = parent
def generate_loop(self): #fixme generation of infinite loops happens too often...
def generate_loop(self): # fixme generation of infinite loops happens too often...
# FIXME make sure that loop conditions are evaluated at least once (assert true or make a config param)
if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']:
return
if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings['block-termination-probability']:
if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings[
'block-termination-probability']:
return
init_var = self.generate_zero_declaration()
parent = self.current_ast_element
element = build_xml_element([], name=GAZ_LOOP_TAG)
self.current_ast_element.append(element)
@ -456,10 +549,27 @@ class AstGenerator:
self.current_control_flow_nesting_depth += 1
self.push_scope()
self.generate_expression(GAZ_BOOL_KEY)
self.generate_block()
self.generate_block(block_type=GAZ_LOOP_TAG,
loop_var=init_var) # append a variable increment and prepend a break statement if var is > max loop iterations
self.pop_scope()
self.current_ast_element = parent
def generate_zero_declaration(self):
parent = self.current_ast_element
element = build_xml_element([], name=GAZ_DECLARATION_TAG)
self.current_ast_element.append(element)
self.current_ast_element = element
variable = self.generate_variable(GAZ_INT_KEY, 'var')
self.current_ast_element.append(variable.xml)
self.current_scope.append(variable.name, variable)
self.generate_xhs(GAZ_RHS_TAG, variable.type, is_zero=True)
self.current_ast_element = parent
return variable
def generate_assignment(self):
possible_vars = self.current_scope.get_all_defined_mutable_vars()
if len(possible_vars) == 0:
@ -504,11 +614,14 @@ class AstGenerator:
else:
return Variable(self.get_name(GAZ_VAR_TAG), var_type, mut)
def generate_literal(self, var_type: str):
def generate_literal(self, var_type: str, value=None):
if value is None:
value = self.get_value(var_type)
else:
value = value
args = [
("type", var_type),
("value", str(self.get_value(var_type))),
("value", str(value)),
]
element = build_xml_element(args, name=GAZ_LIT_TAG)
self.current_ast_element.append(element)
@ -533,8 +646,11 @@ class AstGenerator:
self.current_scope = current_scope
self.current_ast_element = current_element
def generate_expression(self, expr_type: str):
if expr_type == GAZ_INT_KEY or expr_type == GAZ_FLOAT_KEY:
def generate_expression(self, expr_type: str, is_zero=False):
if is_zero:
self.generate_literal(expr_type, value=0)
return
elif expr_type == GAZ_INT_KEY or expr_type == GAZ_FLOAT_KEY:
self.generate_int_expr()
elif expr_type == GAZ_BOOL_KEY:
if random.random() < 0.5:
@ -577,7 +693,7 @@ class AstGenerator:
@return a qualifier as a string
"""
number_line = (self.settings["misc-weights"]["type-qualifier-weights"]["const"] +
self.settings["misc-weights"]["type-qualifier-weights"]["var"] -1 )
self.settings["misc-weights"]["type-qualifier-weights"]["var"] - 1)
res = random.randint(0, number_line)
if res in range(0, self.settings["misc-weights"]["type-qualifier-weights"]["const"]):
@ -631,23 +747,14 @@ class AstGenerator:
name = ''.join(random.choices(string.ascii_letters, k=length))
return name
else:
return random.choice(self.variable_names)
def get_op(self, type):
if type == GAZ_INT_KEY:
cutoffs = []
values = []
ops = []
for key, value in self.settings["expression-weights"]["arithmetic"]:
cutoffs.append(value + sum(cutoffs))
values.append(value)
ops.append(get_op(key))
res = random.randint(0, sum(values))
for i in range(len(cutoffs)):
if res < cutoffs[i]:
return ops[i]
if name_type == GAZ_VAR_TAG:
choice = random.choice(self.variable_names)
self.variable_names.remove(choice)
return choice
else:
choice = random.choice(self.routine_names)
self.routine_names.remove(choice)
return choice
def get_type(self, tag): # TODO Add support for composite types
return 'int' # TODO Add support for all types
@ -666,139 +773,3 @@ class AstGenerator:
for i in range(len(cutoffs)):
if res < cutoffs[i]:
return types[i]
class Variable:
def __init__(self, name: str, type: str, qualifier: str, value: any = None):
self.name = name
self.type = type
self.value = value
self.qualifier = qualifier
self.xml = self._build_xml()
def _build_xml(self):
args = [
('name', self.name),
('type', self.type),
('mut', self.qualifier),
]
return build_xml_element(args, name=GAZ_VAR_TAG)
class Argument:
def __init__(self, name: str, type: str):
self.name = name
self.type = type
self.xml = self._build_xml()
def __str__(self):
return self.type + " " + self.name
def _build_xml(self):
args = [
('name', self.name),
('type', self.type),
]
return build_xml_element(args, name=GAZ_ARG_TAG)
class Routine:
def __init__(self, name: str, type: str, return_type: str, args: list[Argument], xml: ET.Element = None):
self.name = name
self.type = type
self.return_type = return_type
self.arguments = args
self.xml = xml
self.xml = xml
class Scope:
def __init__(self, enclosing_scope, child_scope=None, associated_xml: ET.Element = None):
self.symbols = {}
self.enclosing_scope = enclosing_scope
self.child_scope = child_scope
self.xml = associated_xml
def resolve(self, name) -> ET.Element or None:
if name in self.symbols:
return self.symbols[name]
else:
return None
def append(self, name, item: Variable or Argument or Routine):
self.symbols[name] = item
def append_element(self, name, value: ET.Element):
self.symbols[name] = value
def set(self, name, value: ET.Element):
self.symbols[name] = value
def get_all_defined_mutable_vars(self) -> list[Variable]:
if self.enclosing_scope is None:
return self._get_mutable_vars()
else:
return self.enclosing_scope.get_all_defined_mutable_vars() + self._get_mutable_vars()
def _get_mutable_vars(self) -> list[Variable]:
mutable_vars = []
for name, var in self.symbols.items():
if not isinstance(var, Variable):
continue
if var.qualifier != 'const':
mutable_vars.append(self.symbols[name])
return mutable_vars
def get_top_scope(self):
if self.enclosing_scope is None:
return self
else:
return self.enclosing_scope.get_top_scope()
def build_xml_element(*keys, name):
elem = ET.Element(name)
for key in list(keys)[0]: # TODO refactor
elem.set(key[0], key[1])
return elem
def get_op(op):
if op == 'addition' or 'noop':
return '+'
elif op == 'subtraction':
return '-'
elif op == 'multiplication':
return '*'
elif op == 'division':
return '/'
elif op == 'modulo':
return '%'
elif op == 'power':
return '^'
elif op == 'or':
return 'or'
elif op == 'and':
return 'and'
elif op == 'equality':
return '=='
elif op == 'inequality':
return '!='
elif op == 'less-than':
return '<'
elif op == 'less-than-or-equal':
return '<='
elif op == 'greater-than':
return '>'
elif op == 'greater-than-or-equal':
return '>='
elif op == 'negation':
return '-'
elif op == 'not':
return 'not'
elif op == 'concatenation':
return '||'
else:
raise Exception("Unknown operator: " + op)

View file

@ -1,12 +1,11 @@
import unittest
import xml
import xml.etree.ElementTree as ET
import xml.dom.minidom
import yaml
from ast_generator.ast_generator import *
from ast_generator.gazprea_ast_grammar import *
from ast_generator.utils import Variable
def reachable_return(block):

99
ast_generator/utils.py Normal file
View file

@ -0,0 +1,99 @@
from xml.etree import ElementTree as ET
from constants import GAZ_VAR_TAG, GAZ_ARG_TAG
class Variable:
def __init__(self, name: str, type: str, qualifier: str, value: any = None):
self.name = name
self.type = type
self.value = value
self.qualifier = qualifier
self.xml = self._build_xml()
def _build_xml(self):
args = [
('name', self.name),
('type', self.type),
('mut', self.qualifier),
]
return build_xml_element(args, name=GAZ_VAR_TAG)
class Argument:
def __init__(self, name: str, type: str):
self.name = name
self.type = type
self.xml = self._build_xml()
def __str__(self):
return self.type + " " + self.name
def _build_xml(self):
args = [
('name', self.name),
('type', self.type),
]
return build_xml_element(args, name=GAZ_ARG_TAG)
class Routine:
def __init__(self, name: str, type: str, return_type: str, args: list[Argument], xml: ET.Element = None):
self.name = name
self.type = type
self.return_type = return_type
self.arguments = args
self.xml = xml
self.xml = xml
class Scope:
def __init__(self, enclosing_scope, child_scope=None, associated_xml: ET.Element = None):
self.symbols = {}
self.enclosing_scope = enclosing_scope
self.child_scope = child_scope
self.xml = associated_xml
def resolve(self, name) -> ET.Element or None:
if name in self.symbols:
return self.symbols[name]
else:
return None
def append(self, name, item: Variable or Argument or Routine):
self.symbols[name] = item
def append_element(self, name, value: ET.Element):
self.symbols[name] = value
def set(self, name, value: ET.Element):
self.symbols[name] = value
def get_all_defined_mutable_vars(self) -> list[Variable]:
if self.enclosing_scope is None:
return self._get_mutable_vars()
else:
return self.enclosing_scope.get_all_defined_mutable_vars() + self._get_mutable_vars()
def _get_mutable_vars(self) -> list[Variable]:
mutable_vars = []
for name, var in self.symbols.items():
if not isinstance(var, Variable):
continue
if var.qualifier != 'const':
mutable_vars.append(self.symbols[name])
return mutable_vars
def get_top_scope(self):
if self.enclosing_scope is None:
return self
else:
return self.enclosing_scope.get_top_scope()
def build_xml_element(*keys, name):
elem = ET.Element(name)
for key in list(keys)[0]: # TODO refactor
elem.set(key[0], key[1])
return elem

View file

@ -105,6 +105,8 @@ class GeneralUnparser:
self.unparse_loop(node)
elif node.tag == GAZ_BRACKET_TAG:
self.unparse_brackets(node)
elif node.tag == GAZ_BREAK_TAG:
self.unparse_break(node)
else:
raise Exception("Unknown tag: " + node.tag)
@ -284,6 +286,9 @@ class GeneralUnparser:
self.unparse_xhs(element_in.find(GAZ_RHS_TAG))
self.source += ")"
def unparse_break(self, element_in: ET.Element):
self.source += "break" + self.endline
def unparse_single_arg(self, param):
return self.format_single_arg(self.translate_type(param.get(GAZ_TY_KEY)), param.get(GAZ_NAME_KEY))

View file

@ -69,7 +69,7 @@ class PythonUnparser(GeneralUnparser):
conditional_else_delimiter="else:",
conditional_end_delimiter=":",
block_start_delimiter="",
block_end_delimiter="",
block_end_delimiter="", # TODO can this contain the pass?
strip_conditionals=True)
def format_variable(self, mut, ty, name, declaration: bool = False):

View file

@ -46,3 +46,4 @@ GAZ_ARG_TAG = "argument"
GAZ_STRING_KEY = "string"
GAZ_CHAR_KEY = "char"
GAZ_BRACKET_TAG = "brackets"
GAZ_BREAK_TAG = "break"

View file

@ -33,16 +33,21 @@ class Fuzzer():
self.fuzzer.fuzz()
dom = xml.dom.minidom.parseString(ET.tostring(self.fuzzer.ast).decode('utf-8'))
pretty: str = dom.toprettyxml()
with open("fuzzer/input/{}_{}.in".format(self.file_name, i), 'w') as f:
f.write(self.fuzzer.source)
with open("fuzzer/debug/{}_{}.out".format(self.file_name, i), 'w') as f:
f.write(pretty)
with open("fuzzer/ground_truth/{}_{}.py".format(self.file_name, i), 'w') as f:
f.write(self.fuzzer.ground_truth)
with open("fuzzer/ground_truth/{}_{}.py".format(self.file_name, i), 'r') as f:
with open("fuzzer/outputs/{}_{}.out".format(self.file_name, i), 'w') as y:
with redirect_stdout(y): # Workaround for fuzzer.py:49
exec(f.read(), globals(), locals())
try:
exec(f.read(), globals(), locals())
except OverflowError:
os.system("rm -f fuzzer/ground_truth/{}_{}.py".format(self.file_name, i))
continue
with open("fuzzer/input/{}_{}.in".format(self.file_name, i), 'w') as f:
f.write(self.fuzzer.source)
with open("fuzzer/debug/{}_{}.out".format(self.file_name, i), 'w') as f:
f.write(pretty)
# y.write(self.fuzzer.out)
# with open("fuzzer/instream/{}.in".format(i), 'w') as f:
# f.write(self.fuzzer.source)