From 1da53dba48fbd24efd09c21c9823d7bf1aa85fc1 Mon Sep 17 00:00:00 2001 From: ayrton Date: Wed, 22 Nov 2023 13:50:56 -0700 Subject: [PATCH] Fixed common generation errors - Loops are now bound on the number of iterations by max-loop-iterations - Names now get removed from the list of possible names when used - Overflow errors in arithmetic are handled gracefully Took 1 hour 13 minutes --- ast_generator/ast_generator.py | 311 ++++++++++------------- ast_generator/test/test_ast_generator.py | 3 +- ast_generator/utils.py | 99 ++++++++ ast_parser/general_unparser.py | 5 + ast_parser/python_unparser.py | 2 +- constants.py | 1 + gazprea_fuzzer.py | 15 +- 7 files changed, 258 insertions(+), 178 deletions(-) create mode 100644 ast_generator/utils.py diff --git a/ast_generator/ast_generator.py b/ast_generator/ast_generator.py index 1684719..27c8d83 100644 --- a/ast_generator/ast_generator.py +++ b/ast_generator/ast_generator.py @@ -4,8 +4,11 @@ import xml.etree.ElementTree as ET from english_words import get_english_words_set +from ast_generator.utils import Variable, Argument, Routine, Scope, build_xml_element from constants import * +import keyword + class AstGenerator: """ @@ -33,13 +36,14 @@ class AstGenerator: """ self.settings = settings - self.symbol_table = [] # TODO this should be a list of scopes + self.symbol_table = [] global_scope = Scope(None, None) self.symbol_table.append(global_scope) # NOTE for debug self.current_scope = global_scope names = get_english_words_set(['web2'], alpha=True) - possible_names = filter(lambda x: self.settings['properties']['id-length']['max'] <= len(x) <= self.settings['properties']['id-length']['max'], names) + possible_names = filter(lambda x: self.settings['properties']['id-length']['max'] <= len(x) <= + self.settings['properties']['id-length']['max'] and not keyword.iskeyword(x), names) var_name_list = list(possible_names) var_name_len = len(var_name_list) @@ -61,7 +65,8 @@ class AstGenerator: self.bool_op_options, self.bool_op_cutoffs, self.bool_op_numline = ( self.get_numberlines('expression-weights', ['brackets', 'comparison', 'logical', 'unary'], - excluded_values=[[], ['less-than-or-equal', 'greater-than-or-equal', 'less-than', 'greater-than'], [], ['noop', 'negation']])) + excluded_values=[[], ['less-than-or-equal', 'greater-than-or-equal', 'less-than', + 'greater-than'], [], ['noop', 'negation']])) self.bool_unary = ['not'] self.float_op_options, self.float_op_cutoffs, self.float_op_numline = ( @@ -166,7 +171,10 @@ class AstGenerator: self.pop_scope() self.current_ast_element = parent - def generate_block(self, tag=None, return_stmt=False, return_value=None, return_type=None): + def generate_block(self, tag=None, return_stmt=False, return_value=None, return_type=None, block_type=None, + loop_var=None): + # TODO this should be broken into many functions depending on the block requirements + if tag is None: tag = [] parent = self.current_ast_element @@ -174,6 +182,12 @@ class AstGenerator: element = build_xml_element(tag, name=GAZ_BLOCK_TAG) self.current_ast_element.append(element) self.current_ast_element = element + + # Generate the loop condition increment if we are in a loop + if block_type == GAZ_LOOP_TAG: + self.generate_loop_condition_check(loop_var) + self.generate_loop_condition_increment(loop_var) + self.generate_statements() if return_stmt: self.generate_return(return_type=return_type, return_value=return_value) @@ -182,6 +196,81 @@ class AstGenerator: self.pop_scope() self.current_ast_element = parent + def generate_loop_condition_check(self, loop_var: Variable): + """ + @brief generates the loop condition check + + Ensures that the loop does not iterate more than max-loop-iterations times + + @param loop_var: + @return: + """ + # loop var is always an int + assert loop_var.type == GAZ_INT_KEY + + # create a conditional xml tag + if_stmt = build_xml_element([], name=GAZ_IF_TAG) + self.current_ast_element.append(if_stmt) + parent = self.current_ast_element + self.current_ast_element = if_stmt + + # add the check 'if loop_var >= self.settings['generation_options']['max-loop-iterations']: break' + operation = build_xml_element([("op", ">=")], name=GAZ_OPERATOR_TAG) + self.current_ast_element.append(operation) + self.current_ast_element = operation + + lhs = build_xml_element([], name=GAZ_LHS_TAG) + operation.append(lhs) + + var = build_xml_element([("name", loop_var.name), ("type", loop_var.type)], name=GAZ_VAR_TAG) + lhs.append(var) + + rhs = build_xml_element([], name=GAZ_RHS_TAG) + operation.append(rhs) + rhs.append(self.make_literal(GAZ_INT_KEY, "'" + str(self.settings['generation-options']['max-loop-iterations']) + "'")) + + true_block = build_xml_element([], name=GAZ_BLOCK_TAG) + if_stmt.append(true_block) + self.current_ast_element = true_block + break_stmt = build_xml_element([], name=GAZ_BREAK_TAG) + true_block.append(break_stmt) + + # return everything to normalcy + self.current_ast_element = parent + + def generate_loop_condition_increment(self, loop_var): + assert loop_var.type == GAZ_INT_KEY + + parent = self.current_ast_element + assignment = build_xml_element([], name=GAZ_ASSIGNMENT_TAG) + self.current_ast_element.append(assignment) + self.current_ast_element = assignment + + # append the variable + self.current_ast_element.append(loop_var.xml) + + # add the increment 'loop_var += 1' + assn_rhs = build_xml_element([], name=GAZ_RHS_TAG) + self.current_ast_element.append(assn_rhs) + self.current_ast_element = assn_rhs + + operation = build_xml_element([("op", "+")], name=GAZ_OPERATOR_TAG) + self.current_ast_element.append(operation) + self.current_ast_element = operation + + lhs = build_xml_element([], name=GAZ_LHS_TAG) + operation.append(lhs) + + var = build_xml_element([("name", loop_var.name), ("type", loop_var.type)], name=GAZ_VAR_TAG) + lhs.append(var) + + rhs = build_xml_element([], name=GAZ_RHS_TAG) + operation.append(rhs) + rhs.append(self.make_literal(GAZ_INT_KEY, '1')) + + # return everything to normalcy + self.current_ast_element = parent + def generate_return(self, return_type=None, return_value=None): if return_type is None or return_type == GAZ_VOID_TYPE: self.current_ast_element.append(build_xml_element([], name=GAZ_RETURN_TAG)) @@ -242,7 +331,7 @@ class AstGenerator: def generate_statements(self): # Number line - number_line = 180 #TODO fix the numberline stuff to reflect the settings + number_line = 180 # TODO fix the numberline stuff to reflect the settings cutoffs = [10, 30, 50, 80, 100, 140, 180] options = { 0: self.generate_declaration, @@ -390,13 +479,13 @@ class AstGenerator: self.current_ast_element = parent - def generate_xhs(self, handedness, op_type): + def generate_xhs(self, handedness, op_type, is_zero=False): element = build_xml_element([], name=handedness) parent = self.current_ast_element self.current_ast_element.append(element) self.current_ast_element = element - self.generate_expression(op_type) + self.generate_expression(op_type, is_zero=is_zero) self.current_ast_element = parent @@ -421,7 +510,8 @@ class AstGenerator: if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']: return - if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings['block-termination-probability']: + if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings[ + 'block-termination-probability']: return element = build_xml_element([], name=GAZ_IF_TAG) @@ -441,13 +531,16 @@ class AstGenerator: self.pop_scope() self.current_ast_element = parent - def generate_loop(self): #fixme generation of infinite loops happens too often... + def generate_loop(self): # fixme generation of infinite loops happens too often... + # FIXME make sure that loop conditions are evaluated at least once (assert true or make a config param) if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']: return - if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings['block-termination-probability']: + if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings[ + 'block-termination-probability']: return + init_var = self.generate_zero_declaration() parent = self.current_ast_element element = build_xml_element([], name=GAZ_LOOP_TAG) self.current_ast_element.append(element) @@ -456,10 +549,27 @@ class AstGenerator: self.current_control_flow_nesting_depth += 1 self.push_scope() self.generate_expression(GAZ_BOOL_KEY) - self.generate_block() + self.generate_block(block_type=GAZ_LOOP_TAG, + loop_var=init_var) # append a variable increment and prepend a break statement if var is > max loop iterations self.pop_scope() self.current_ast_element = parent + def generate_zero_declaration(self): + parent = self.current_ast_element + element = build_xml_element([], name=GAZ_DECLARATION_TAG) + + self.current_ast_element.append(element) + self.current_ast_element = element + + variable = self.generate_variable(GAZ_INT_KEY, 'var') + self.current_ast_element.append(variable.xml) + self.current_scope.append(variable.name, variable) + + self.generate_xhs(GAZ_RHS_TAG, variable.type, is_zero=True) + self.current_ast_element = parent + + return variable + def generate_assignment(self): possible_vars = self.current_scope.get_all_defined_mutable_vars() if len(possible_vars) == 0: @@ -504,11 +614,14 @@ class AstGenerator: else: return Variable(self.get_name(GAZ_VAR_TAG), var_type, mut) - - def generate_literal(self, var_type: str): + def generate_literal(self, var_type: str, value=None): + if value is None: + value = self.get_value(var_type) + else: + value = value args = [ ("type", var_type), - ("value", str(self.get_value(var_type))), + ("value", str(value)), ] element = build_xml_element(args, name=GAZ_LIT_TAG) self.current_ast_element.append(element) @@ -533,8 +646,11 @@ class AstGenerator: self.current_scope = current_scope self.current_ast_element = current_element - def generate_expression(self, expr_type: str): - if expr_type == GAZ_INT_KEY or expr_type == GAZ_FLOAT_KEY: + def generate_expression(self, expr_type: str, is_zero=False): + if is_zero: + self.generate_literal(expr_type, value=0) + return + elif expr_type == GAZ_INT_KEY or expr_type == GAZ_FLOAT_KEY: self.generate_int_expr() elif expr_type == GAZ_BOOL_KEY: if random.random() < 0.5: @@ -577,7 +693,7 @@ class AstGenerator: @return a qualifier as a string """ number_line = (self.settings["misc-weights"]["type-qualifier-weights"]["const"] + - self.settings["misc-weights"]["type-qualifier-weights"]["var"] -1 ) + self.settings["misc-weights"]["type-qualifier-weights"]["var"] - 1) res = random.randint(0, number_line) if res in range(0, self.settings["misc-weights"]["type-qualifier-weights"]["const"]): @@ -631,23 +747,14 @@ class AstGenerator: name = ''.join(random.choices(string.ascii_letters, k=length)) return name else: - return random.choice(self.variable_names) - - def get_op(self, type): - - if type == GAZ_INT_KEY: - cutoffs = [] - values = [] - ops = [] - for key, value in self.settings["expression-weights"]["arithmetic"]: - cutoffs.append(value + sum(cutoffs)) - values.append(value) - ops.append(get_op(key)) - - res = random.randint(0, sum(values)) - for i in range(len(cutoffs)): - if res < cutoffs[i]: - return ops[i] + if name_type == GAZ_VAR_TAG: + choice = random.choice(self.variable_names) + self.variable_names.remove(choice) + return choice + else: + choice = random.choice(self.routine_names) + self.routine_names.remove(choice) + return choice def get_type(self, tag): # TODO Add support for composite types return 'int' # TODO Add support for all types @@ -666,139 +773,3 @@ class AstGenerator: for i in range(len(cutoffs)): if res < cutoffs[i]: return types[i] - - -class Variable: - def __init__(self, name: str, type: str, qualifier: str, value: any = None): - self.name = name - self.type = type - self.value = value - self.qualifier = qualifier - self.xml = self._build_xml() - - def _build_xml(self): - args = [ - ('name', self.name), - ('type', self.type), - ('mut', self.qualifier), - ] - return build_xml_element(args, name=GAZ_VAR_TAG) - - -class Argument: - def __init__(self, name: str, type: str): - self.name = name - self.type = type - self.xml = self._build_xml() - - def __str__(self): - return self.type + " " + self.name - - def _build_xml(self): - args = [ - ('name', self.name), - ('type', self.type), - ] - return build_xml_element(args, name=GAZ_ARG_TAG) - - -class Routine: - def __init__(self, name: str, type: str, return_type: str, args: list[Argument], xml: ET.Element = None): - self.name = name - self.type = type - self.return_type = return_type - self.arguments = args - self.xml = xml - self.xml = xml - - -class Scope: - def __init__(self, enclosing_scope, child_scope=None, associated_xml: ET.Element = None): - self.symbols = {} - self.enclosing_scope = enclosing_scope - self.child_scope = child_scope - self.xml = associated_xml - - def resolve(self, name) -> ET.Element or None: - if name in self.symbols: - return self.symbols[name] - else: - return None - - def append(self, name, item: Variable or Argument or Routine): - self.symbols[name] = item - - def append_element(self, name, value: ET.Element): - self.symbols[name] = value - - def set(self, name, value: ET.Element): - self.symbols[name] = value - - def get_all_defined_mutable_vars(self) -> list[Variable]: - if self.enclosing_scope is None: - return self._get_mutable_vars() - else: - return self.enclosing_scope.get_all_defined_mutable_vars() + self._get_mutable_vars() - - def _get_mutable_vars(self) -> list[Variable]: - mutable_vars = [] - - for name, var in self.symbols.items(): - if not isinstance(var, Variable): - continue - if var.qualifier != 'const': - mutable_vars.append(self.symbols[name]) - return mutable_vars - - def get_top_scope(self): - if self.enclosing_scope is None: - return self - else: - return self.enclosing_scope.get_top_scope() - - -def build_xml_element(*keys, name): - elem = ET.Element(name) - for key in list(keys)[0]: # TODO refactor - elem.set(key[0], key[1]) - return elem - - -def get_op(op): - if op == 'addition' or 'noop': - return '+' - elif op == 'subtraction': - return '-' - elif op == 'multiplication': - return '*' - elif op == 'division': - return '/' - elif op == 'modulo': - return '%' - elif op == 'power': - return '^' - elif op == 'or': - return 'or' - elif op == 'and': - return 'and' - elif op == 'equality': - return '==' - elif op == 'inequality': - return '!=' - elif op == 'less-than': - return '<' - elif op == 'less-than-or-equal': - return '<=' - elif op == 'greater-than': - return '>' - elif op == 'greater-than-or-equal': - return '>=' - elif op == 'negation': - return '-' - elif op == 'not': - return 'not' - elif op == 'concatenation': - return '||' - else: - raise Exception("Unknown operator: " + op) - diff --git a/ast_generator/test/test_ast_generator.py b/ast_generator/test/test_ast_generator.py index 8009b47..e559d60 100644 --- a/ast_generator/test/test_ast_generator.py +++ b/ast_generator/test/test_ast_generator.py @@ -1,12 +1,11 @@ import unittest import xml -import xml.etree.ElementTree as ET import xml.dom.minidom import yaml from ast_generator.ast_generator import * -from ast_generator.gazprea_ast_grammar import * +from ast_generator.utils import Variable def reachable_return(block): diff --git a/ast_generator/utils.py b/ast_generator/utils.py new file mode 100644 index 0000000..563996e --- /dev/null +++ b/ast_generator/utils.py @@ -0,0 +1,99 @@ +from xml.etree import ElementTree as ET + +from constants import GAZ_VAR_TAG, GAZ_ARG_TAG + + +class Variable: + def __init__(self, name: str, type: str, qualifier: str, value: any = None): + self.name = name + self.type = type + self.value = value + self.qualifier = qualifier + self.xml = self._build_xml() + + def _build_xml(self): + args = [ + ('name', self.name), + ('type', self.type), + ('mut', self.qualifier), + ] + return build_xml_element(args, name=GAZ_VAR_TAG) + + +class Argument: + def __init__(self, name: str, type: str): + self.name = name + self.type = type + self.xml = self._build_xml() + + def __str__(self): + return self.type + " " + self.name + + def _build_xml(self): + args = [ + ('name', self.name), + ('type', self.type), + ] + return build_xml_element(args, name=GAZ_ARG_TAG) + + +class Routine: + def __init__(self, name: str, type: str, return_type: str, args: list[Argument], xml: ET.Element = None): + self.name = name + self.type = type + self.return_type = return_type + self.arguments = args + self.xml = xml + self.xml = xml + + +class Scope: + def __init__(self, enclosing_scope, child_scope=None, associated_xml: ET.Element = None): + self.symbols = {} + self.enclosing_scope = enclosing_scope + self.child_scope = child_scope + self.xml = associated_xml + + def resolve(self, name) -> ET.Element or None: + if name in self.symbols: + return self.symbols[name] + else: + return None + + def append(self, name, item: Variable or Argument or Routine): + self.symbols[name] = item + + def append_element(self, name, value: ET.Element): + self.symbols[name] = value + + def set(self, name, value: ET.Element): + self.symbols[name] = value + + def get_all_defined_mutable_vars(self) -> list[Variable]: + if self.enclosing_scope is None: + return self._get_mutable_vars() + else: + return self.enclosing_scope.get_all_defined_mutable_vars() + self._get_mutable_vars() + + def _get_mutable_vars(self) -> list[Variable]: + mutable_vars = [] + + for name, var in self.symbols.items(): + if not isinstance(var, Variable): + continue + if var.qualifier != 'const': + mutable_vars.append(self.symbols[name]) + return mutable_vars + + def get_top_scope(self): + if self.enclosing_scope is None: + return self + else: + return self.enclosing_scope.get_top_scope() + + +def build_xml_element(*keys, name): + elem = ET.Element(name) + for key in list(keys)[0]: # TODO refactor + elem.set(key[0], key[1]) + return elem diff --git a/ast_parser/general_unparser.py b/ast_parser/general_unparser.py index 98d59f9..4744db2 100644 --- a/ast_parser/general_unparser.py +++ b/ast_parser/general_unparser.py @@ -105,6 +105,8 @@ class GeneralUnparser: self.unparse_loop(node) elif node.tag == GAZ_BRACKET_TAG: self.unparse_brackets(node) + elif node.tag == GAZ_BREAK_TAG: + self.unparse_break(node) else: raise Exception("Unknown tag: " + node.tag) @@ -284,6 +286,9 @@ class GeneralUnparser: self.unparse_xhs(element_in.find(GAZ_RHS_TAG)) self.source += ")" + def unparse_break(self, element_in: ET.Element): + self.source += "break" + self.endline + def unparse_single_arg(self, param): return self.format_single_arg(self.translate_type(param.get(GAZ_TY_KEY)), param.get(GAZ_NAME_KEY)) diff --git a/ast_parser/python_unparser.py b/ast_parser/python_unparser.py index 1109788..89dff3e 100644 --- a/ast_parser/python_unparser.py +++ b/ast_parser/python_unparser.py @@ -69,7 +69,7 @@ class PythonUnparser(GeneralUnparser): conditional_else_delimiter="else:", conditional_end_delimiter=":", block_start_delimiter="", - block_end_delimiter="", + block_end_delimiter="", # TODO can this contain the pass? strip_conditionals=True) def format_variable(self, mut, ty, name, declaration: bool = False): diff --git a/constants.py b/constants.py index 55fa513..4761d92 100644 --- a/constants.py +++ b/constants.py @@ -46,3 +46,4 @@ GAZ_ARG_TAG = "argument" GAZ_STRING_KEY = "string" GAZ_CHAR_KEY = "char" GAZ_BRACKET_TAG = "brackets" +GAZ_BREAK_TAG = "break" diff --git a/gazprea_fuzzer.py b/gazprea_fuzzer.py index a979c3c..91c7154 100644 --- a/gazprea_fuzzer.py +++ b/gazprea_fuzzer.py @@ -33,16 +33,21 @@ class Fuzzer(): self.fuzzer.fuzz() dom = xml.dom.minidom.parseString(ET.tostring(self.fuzzer.ast).decode('utf-8')) pretty: str = dom.toprettyxml() - with open("fuzzer/input/{}_{}.in".format(self.file_name, i), 'w') as f: - f.write(self.fuzzer.source) - with open("fuzzer/debug/{}_{}.out".format(self.file_name, i), 'w') as f: - f.write(pretty) + with open("fuzzer/ground_truth/{}_{}.py".format(self.file_name, i), 'w') as f: f.write(self.fuzzer.ground_truth) with open("fuzzer/ground_truth/{}_{}.py".format(self.file_name, i), 'r') as f: with open("fuzzer/outputs/{}_{}.out".format(self.file_name, i), 'w') as y: with redirect_stdout(y): # Workaround for fuzzer.py:49 - exec(f.read(), globals(), locals()) + try: + exec(f.read(), globals(), locals()) + except OverflowError: + os.system("rm -f fuzzer/ground_truth/{}_{}.py".format(self.file_name, i)) + continue + with open("fuzzer/input/{}_{}.in".format(self.file_name, i), 'w') as f: + f.write(self.fuzzer.source) + with open("fuzzer/debug/{}_{}.out".format(self.file_name, i), 'w') as f: + f.write(pretty) # y.write(self.fuzzer.out) # with open("fuzzer/instream/{}.in".format(i), 'w') as f: # f.write(self.fuzzer.source)