diff --git a/ast_generator/ast_generator.py b/ast_generator/ast_generator.py index 3bde50a..0fc6cb5 100644 --- a/ast_generator/ast_generator.py +++ b/ast_generator/ast_generator.py @@ -1,10 +1,9 @@ -import random import string -import xml.etree.ElementTree as ET from english_words import get_english_words_set -from ast_generator.utils import Variable, Argument, Routine, Scope, build_xml_element +from ast_generator.utils import * +from ast_generator.utils import filter_options, _choose_option from constants import * import keyword @@ -25,6 +24,7 @@ class AstGenerator: falls into will be selected. """ + ### INITIALIZATION ### def __init__(self, settings: dict): """ This class is designed to get the settings from some wrapper class that @@ -41,136 +41,92 @@ class AstGenerator: self.symbol_table.append(global_scope) # NOTE for debug self.current_scope = global_scope - names = get_english_words_set(['web2'], alpha=True) - possible_names = filter(lambda x: self.settings['properties']['id-length']['max'] <= len(x) <= - self.settings['properties']['id-length']['max'] and not keyword.iskeyword(x), - names) - - var_name_list = list(possible_names) - var_name_len = len(var_name_list) - self.variable_names = var_name_list[0:var_name_len // 2] - self.routine_names = var_name_list[var_name_len // 2:var_name_len] + self._init_names() self.ast: ET.Element or None = None self.current_ast_element: ET.Element or None = None self.current_nesting_depth = 0 self.current_control_flow_nesting_depth = 0 + self._init_numlines() + + def _init_numlines(self): # Numberlines - For computing probabilities self.int_op_options, self.int_op_cutoffs, self.int_op_numline = ( - self.get_numberlines('expression-weights', - ['brackets', 'arithmetic', 'unary'], - [[], [], ['not']])) + get_numberlines('expression-weights', ['brackets', 'arithmetic', 'unary'], [[], [], ['not']], + self.settings)) self.int_unary = ['negation', 'noop'] - self.bool_op_options, self.bool_op_cutoffs, self.bool_op_numline = ( - self.get_numberlines('expression-weights', - ['brackets', 'comparison', 'logical', 'unary'], - excluded_values=[[], ['less-than-or-equal', 'greater-than-or-equal', 'less-than', - 'greater-than'], [], ['noop', 'negation']])) + get_numberlines('expression-weights', ['brackets', 'comparison', 'logical', 'unary'], + excluded_values=[[], ['less-than-or-equal', 'greater-than-or-equal', 'less-than', + 'greater-than'], [], ['noop', 'negation']], + settings=self.settings)) self.bool_unary = ['not'] - self.float_op_options, self.float_op_cutoffs, self.float_op_numline = ( - self.get_numberlines('expression-weights', - ['brackets', 'arithmetic', 'unary'], - [[], [], ['not']])) + get_numberlines('expression-weights', ['brackets', 'arithmetic', 'unary'], [[], [], ['not']], + self.settings)) self.float_unary = ['negation', 'noop'] - self.char_op_options, self.char_op_cutoffs, self.char_op_numline = ( - self.get_numberlines('expression-weights', - ['brackets', 'comparison'], - [[], ['less-than', 'greater-than', 'less-than-or-equal', 'greater-than-or-equal']])) - + get_numberlines('expression-weights', ['brackets', 'comparison'], + [[], ['less-than', 'greater-than', 'less-than-or-equal', 'greater-than-or-equal']], + self.settings)) self.comp_op_options, self.comp_op_cutoffs, self.comp_op_numline = ( - self.get_numberlines('expression-weights', - ['brackets', 'comparison'], - [[], []])) + get_numberlines('expression-weights', ['brackets', 'comparison'], [[], []], self.settings)) - def get_numberlines(self, settings_section: str, subsettings: list[str], excluded_values): - assert len(subsettings) == len(excluded_values) + self.type_options, self.type_cutoffs, self.type_numline = ( + get_numberlines('type-weights', ['composite', 'atomic'], [[], []], self.settings)) - number_line = 0 - cutoffs = [] - cutoff = 0 - options = {} - option = 0 + self.atomic_type_options, self.atomic_type_cutoffs, self.atomic_type_numline = ( + get_numberlines('type-weights', ['atomic-types'], [[]], self.settings)) - settings = [] + self.composite_type_options, self.composite_type_cutoffs, self.composite_type_numline = ( + get_numberlines('type-weights', ['composite-types'], [[]], self.settings)) - for key, value in self.settings[settings_section].items(): - if key in subsettings and key not in excluded_values: # this check needs to be done recursively - if isinstance(value, int): - t = { - key: value - } - settings.append(t) - elif isinstance(value, dict): - settings.append(value) - else: - raise TypeError("invalid setting type. Found " + str(value) + " instead of expected int or dict") - - for v in range(len(settings)): - for i in excluded_values: - for j in i: - if j in settings[v]: - settings[v].pop(j) - - for v in settings: - if isinstance(v, dict): - for key, value in v.items(): - number_line += value - cutoffs.append(cutoff + value) - cutoff += value - options[option] = key - option += 1 - elif isinstance(v, int): - number_line += v - cutoffs.append(cutoff + v) - cutoff += v - options[option] = v - option += 1 - else: - raise TypeError("invalid setting type. Found " + str(v) + " instead of expected int") - - return options, cutoffs, number_line + def _init_names(self): + names = get_english_words_set(['web2'], alpha=True) + possible_names = filter(lambda x: self.settings['properties']['id-length']['max'] <= len(x) <= + self.settings['properties']['id-length']['max'] and not keyword.iskeyword(x), + names) + var_name_list = list(possible_names) + var_name_len = len(var_name_list) + self.variable_names = var_name_list[0:var_name_len // 2] + self.routine_names = var_name_list[var_name_len // 2:var_name_len] + ### GENERATION ### def generate_ast(self): """ @brief generates an AST from a grammar """ self.generate_top_level_block() - def generate_top_level_block(self): # TODO add constant generation into this block - i = 0 - - element = build_xml_element([], name=GAZ_BLOCK_TAG) - self.current_ast_element = element - + def generate_top_level_block(self): + """ + @brief creates the top-level block containing the whole program + """ + element = self.make_element(GAZ_BLOCK_TAG, []) self.ast = element - # TODO generate constants and forward declarations - while i < self.settings['generation-options']['max-number-of-routines']: + for i in range(random.randint(0, self.settings['generation-options']['max-globals'])): + self.generate_global() + for i in range(self.settings['generation-options']['max-number-of-routines']): if random.random() < self.settings['block-termination-probability']: break self.generate_routine() - i += 1 self.generate_main() + pass def generate_main(self): - parent = self.current_ast_element - self.push_scope() main_args = [ # TODO refactor these into constants - ("name", "main"), - ("return_type", GAZ_INT_KEY), - ("args", "()"), + (GAZ_NAME_KEY, "main"), + (GAZ_RETURN_KEY, GAZ_INT_KEY), ] - element = build_xml_element(main_args, name=GAZ_PROCEDURE_TAG) - self.current_ast_element.append(element) - self.current_ast_element = element + + parent = self.make_scoped_element(GAZ_PROCEDURE_TAG, main_args) + self.generate_block(return_stmt=True, return_value="0", return_type=GAZ_INT_KEY, block_type=GAZ_PROCEDURE_TAG) - self.pop_scope() - self.current_ast_element = parent + + self.exit_scoped_element(parent) def generate_block(self, tag=None, return_stmt=False, return_value=None, return_type=None, block_type=None, loop_var=None): @@ -202,136 +158,71 @@ class AstGenerator: self.pop_scope() self.current_ast_element = parent - def generate_loop_condition_check(self, loop_var: Variable): - """ - @brief generates the loop condition check - - Ensures that the loop does not iterate more than max-loop-iterations times - - @param loop_var: - @return: - """ - # loop var is always an int - assert loop_var.type == GAZ_INT_KEY - - # create a conditional xml tag - if_stmt = build_xml_element([], name=GAZ_IF_TAG) - self.current_ast_element.append(if_stmt) - parent = self.current_ast_element - self.current_ast_element = if_stmt - - # add the check 'if loop_var >= self.settings['generation_options']['max-loop-iterations']: break' - operation = build_xml_element([("op", ">=")], name=GAZ_OPERATOR_TAG) - self.current_ast_element.append(operation) - self.current_ast_element = operation - - lhs = build_xml_element([], name=GAZ_LHS_TAG) - operation.append(lhs) - - var = build_xml_element([("name", loop_var.name), ("type", loop_var.type)], name=GAZ_VAR_TAG) - lhs.append(var) - - rhs = build_xml_element([], name=GAZ_RHS_TAG) - operation.append(rhs) - rhs.append( - self.make_literal(GAZ_INT_KEY, "'" + str(self.settings['generation-options']['max-loop-iterations']) + "'")) - - true_block = build_xml_element([], name=GAZ_BLOCK_TAG) - if_stmt.append(true_block) - self.current_ast_element = true_block - break_stmt = build_xml_element([], name=GAZ_BREAK_TAG) - true_block.append(break_stmt) - - # return everything to normalcy - self.current_ast_element = parent - - def generate_loop_condition_increment(self, loop_var): - assert loop_var.type == GAZ_INT_KEY - - parent = self.current_ast_element - assignment = build_xml_element([], name=GAZ_ASSIGNMENT_TAG) - self.current_ast_element.append(assignment) - self.current_ast_element = assignment - - # append the variable - self.current_ast_element.append(loop_var.xml) - - # add the increment 'loop_var += 1' - assn_rhs = build_xml_element([], name=GAZ_RHS_TAG) - self.current_ast_element.append(assn_rhs) - self.current_ast_element = assn_rhs - - operation = build_xml_element([("op", "+")], name=GAZ_OPERATOR_TAG) - self.current_ast_element.append(operation) - self.current_ast_element = operation - - lhs = build_xml_element([], name=GAZ_LHS_TAG) - operation.append(lhs) - - var = build_xml_element([("name", loop_var.name), ("type", loop_var.type)], name=GAZ_VAR_TAG) - lhs.append(var) - - rhs = build_xml_element([], name=GAZ_RHS_TAG) - operation.append(rhs) - rhs.append(self.make_literal(GAZ_INT_KEY, '1')) - - # return everything to normalcy - self.current_ast_element = parent - def generate_return(self, return_type=None, return_value=None): + """ + @brief generates a return statement + + @param return_type: the type to be returned (if None -> any) + @param return_value: value to be returned (if None -> expr[return_type]) + """ if return_type is None or return_type == GAZ_VOID_TYPE: - self.current_ast_element.append(build_xml_element([], name=GAZ_RETURN_TAG)) - return + self.current_ast_element.append(self.make_element(GAZ_RETURN_TAG, [])) else: + # store the parent + parent = self.current_ast_element + + # initialize element + keys = [("type", return_type)] + self.make_element(GAZ_RETURN_TAG, keys) + + # make either a literal or a random expression based on choice if return_value is None: - xml_element = build_xml_element([("type", return_type)], name=GAZ_RETURN_TAG) - self.current_ast_element.append(xml_element) - parent = self.current_ast_element - self.current_ast_element = xml_element self.generate_expression(return_type) - self.current_ast_element = parent - return else: - xml_element = build_xml_element([("type", return_type)], name=GAZ_RETURN_TAG) - self.current_ast_element.append(xml_element) - parent = self.current_ast_element - self.current_ast_element = xml_element - self.current_ast_element.append(self.make_literal(return_type, return_value)) - self.current_ast_element = parent - return + self.generate_literal(return_type, return_value) + + # return to the parent + self.current_ast_element = parent def generate_routine(self, routine_type=None): + """ + @brief generate a random routine + + @param return_type: the type to be returned (if None -> any (including void)) + """ if routine_type is None: - routine_type = self.get_routine_type() + routine_type = self.get_routine_type() # get a random type else: - routine_type = routine_type + pass + # initialize random variables args = self.generate_routine_args() - name = self.get_name(routine_type) return_type = self.get_type(routine_type) + # initialize the routine routine = Routine(name, routine_type, return_type, args) - routine_args = [ ("name", routine.name), ("return_type", routine.return_type), ] - element = build_xml_element(routine_args, name=routine.type) - self.current_ast_element.append(element) + # Generation parent = self.current_ast_element - self.current_ast_element = element - self.push_scope() + self.make_scoped_element(routine.type, routine_args) self.define_args(routine.arguments) self.generate_block(return_stmt=True, return_type=routine.return_type) - self.pop_scope() - self.current_ast_element = parent + self.exit_scoped_element(parent) def define_args(self, args): + """ + @brief Generate the argument tags in a routine + + @param args: a list of arguments + """ for arg in args: self.current_ast_element.append(arg.xml) self.current_scope.append(arg.name, arg) @@ -353,36 +244,371 @@ class AstGenerator: 6: self.generate_in_stream, } - if include is not None and exclude is not None: - raise ValueError("Cannot specify both include and exclude") - elif include is not None and include in opts: - for i in range(len(opts)): - if opts[i] in include: - continue - else: - options.pop(opts.index(opts[i])) - elif exclude is not None and exclude in opts: - options.pop(opts.index(exclude)) - elif include is None and exclude is None: - pass + # Filter unwanted options + filter_options(exclude, include, options, opts) + + # Generate the statements + self._generate_from_options(cutoffs, number_line, options) + + def _generate_expression(self, expr_type: list[str], number_line, + cutoffs, options, unary=None, comparison: bool = False): + """ + @brief Generate an expression + + @param expr_type: a list of types to be used + @param number_line: number line for probability computation + @param cutoffs: cutoffs to be used + @param options: options to be used + @param unary: a list of unary operators in options + """ + if unary is None: + unary = [] + + parent = self.current_ast_element + self.current_nesting_depth += 1 + + # Check the expression depth against settings + if self.current_nesting_depth > self.settings['generation-options']['max-nesting-depth'] or random.random() < \ + self.settings['block-termination-probability']: + self.generate_literal(random.choice(expr_type)) + self.current_nesting_depth -= 1 + return + + # Generate + op = _choose_option(cutoffs, number_line, options) + self._generate_expr(comparison, expr_type, op, unary) + + # Return to parent + self.current_nesting_depth -= 1 + self.current_ast_element = parent + + def generate_declaration(self, mut=None): # TODO change this to a bool + """ + @brief Generate a declaration + + @param mut: the mutability of the variable ('const' or 'var') + """ + # Initialize the variable + parent = self.current_ast_element + decl_type = self.get_type(GAZ_VAR_TAG) + decl_args = [ + ("type", decl_type), + ] + self.make_element(GAZ_DECLARATION_TAG, decl_args) + + # Generate the variable + variable = self.generate_variable(decl_type, mut=mut) + self.current_ast_element.append(variable.xml) + self.current_scope.append(variable.name, variable) # make sure the variable is in scope + + # Generate the initialization of the variable + self.generate_xhs(GAZ_RHS_TAG, decl_type) + + # Return to parent + self.current_ast_element = parent + + def generate_binary(self, op, op_type): + """ + @brief Generate a binary operation + + @param op: the operator + @param op_type: the type of the expression + """ + parent = self.current_ast_element + + # Check if the operator is valid + if op == "": + raise ValueError("op is empty!") + args = [ + ("op", op), + ("type", op_type), + ] + self.make_element(GAZ_OPERATOR_TAG, args) + + # Gnereate lhs and rhs + self.generate_xhs(GAZ_LHS_TAG, op_type) + self.generate_xhs(GAZ_RHS_TAG, op_type) + + # Return to parent + self.current_ast_element = parent + + def generate_bracket(self, op_type): + """ + @brief Generate a bracket operation + + @param op_type: the type of the expression + """ + parent = self.current_ast_element + + args = [("type", op_type)] + self.make_element(GAZ_BRACKET_TAG, args) + + # Generate the expression in the brackets + self.generate_xhs(GAZ_RHS_TAG, op_type) + + # Return to parent + self.current_ast_element = parent + + def generate_xhs(self, handedness, op_type, is_zero=False): + """ + @brief generate a lhs or a rhs depending on handedness + + @param handedness: the handedness + @param op_type: the type of the expression + @param is_zero: if the expression is zero + """ + parent = self.current_ast_element + + self.make_element(handedness, []) + + self.generate_expression(op_type, is_zero=is_zero) + + self.current_ast_element = parent + + def generate_unary(self, op, op_type=ANY_TYPE): + """ + @brief Generate a unary operation + + @param op_type: the type of the expression + """ + parent = self.current_ast_element + args = [ + ("op", op), + ("type", op_type), + ] + self.make_element(GAZ_UNARY_OPERATOR_TAG, args) + + self.generate_xhs(GAZ_RHS_TAG, op_type) + + self.current_ast_element = parent + + def generate_routine_call(self): # we should generate a test case with arbitrary number of args + pass + + def generate_conditional(self): + """ + @brief generate a conditional statement + + @effects: modifies the current_ast_element + + @return: None + """ + if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']: + return + + if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings[ + 'block-termination-probability']: + return + + parent = self.current_ast_element + + self.make_scoped_element(GAZ_IF_TAG, []) + self.current_control_flow_nesting_depth += 1 + + self.generate_expression(GAZ_BOOL_KEY) + + self.generate_block(tag=[("type", GAZ_TRUE_BLOCK_TAG)]) # FIXME this inhibits elif blocks + self.generate_block(tag=[("type", GAZ_FALSE_BLOCK_TAG)]) + + self.current_control_flow_nesting_depth -= 1 + self.exit_scoped_element(parent) + + def generate_loop(self): + """ + @brief generate a loop + + @return: None + """ + # FIXME make sure that loop conditions are evaluated at least once (assert true or make a config param) + if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']: + return + + if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings[ + 'block-termination-probability']: + return + + init_var = self.generate_zero_declaration() + parent = self.current_ast_element + + self.make_scoped_element(GAZ_LOOP_TAG, []) + self.current_control_flow_nesting_depth += 1 + + self.generate_expression(GAZ_BOOL_KEY) # the loop entry condition #TODO force true + self.generate_block(block_type=GAZ_LOOP_TAG, + loop_var=init_var) # append a variable increment and prepend a break statement if var is > max loop iterations + + self.current_control_flow_nesting_depth -= 1 + self.exit_scoped_element(parent) + + def generate_zero_declaration(self): + """ + @brief generate a declaration int a = 0 for some a + + @return: None + """ + parent = self.current_ast_element + + self.make_element(GAZ_DECLARATION_TAG, []) + + # Initialize variable + variable = self.generate_variable(GAZ_INT_KEY, 'var') + self.current_ast_element.append(variable.xml) + self.current_scope.append(variable.name, variable) + + self.generate_xhs(GAZ_RHS_TAG, variable.type, is_zero=True) + + self.current_ast_element = parent + + return variable + + def generate_assignment(self): + """ + @brief generate an assignment + + @return: None + """ + possible_vars = self.current_scope.get_all_defined_mutable_vars() + if len(possible_vars) == 0: + raise ValueError("No possible variables to assign to!") + + # same structure as a declaration + parent = self.current_ast_element + + self.make_element(GAZ_ASSIGNMENT_TAG, []) + + variable = random.choice(possible_vars) + + self.current_ast_element.append(variable.xml) + self.generate_xhs(GAZ_RHS_TAG, variable.type) + + self.current_ast_element = parent + + def generate_out_stream(self): + self.generate_stream(GAZ_OUT_STREAM) + + def generate_in_stream(self): + self.generate_stream(GAZ_IN_STREAM) + + def generate_stream(self, stream_type): + """ + @brief generate a stream statment from a stream type + + @param stream_type: whether the stream is an input or output + @return: + """ + parent = self.current_ast_element + + args = [ + ("type", stream_type), + ] + self.make_element(GAZ_STREAM_TAG, args) + self.generate_expression(ANY_TYPE) + + self.current_ast_element = parent + + def generate_variable(self, var_type: str, mut=None): + """ + @brief generate a variable + + @param var_type: they type of the variable + @param mut: mutability of the variable + @return: None + """ + if mut is None: + return Variable(self.get_name(GAZ_VAR_TAG), var_type, self.get_qualifier()) else: - raise ValueError("Invalid include/exclude options " + str(include) + " " + str(exclude)) + return Variable(self.get_name(GAZ_VAR_TAG), var_type, mut) - while True: - if random.random() < self.settings['block-termination-probability']: - break + def generate_literal(self, var_type: str, value=None): + """ + @brief generate a literal - a = random.randint(0, number_line) - i = 0 - for i in range(len(cutoffs) - 1): - if cutoffs[i] < a < cutoffs[i + 1]: - try: - options[i]() - except KeyError: - continue - except ValueError: - break - break + @param var_type: Type of the literal + @param value: optional value of the literal + @return: None + """ + if value is None: + value = self.get_value(var_type) + else: + value = value + + args = [ + ("type", var_type), + ("value", str(value)), + ] + element = build_xml_element(args, name=GAZ_LIT_TAG) + self.current_ast_element.append(element) + + def make_literal(self, type, value): # TODO eliminate this function + args = [ + ("type", type), + ("value", value), + ] + element = build_xml_element(args, name=GAZ_LIT_TAG) + return element + + def generate_global(self): + """ + @brief generate a global const declaration + + @return: None + """ + current_scope = self.current_scope + current_element = self.current_ast_element + + self.current_scope = self.current_scope.get_top_scope() + self.current_ast_element = self.ast + + self.generate_declaration(mut='const') + + self.current_scope = current_scope + self.current_ast_element = current_element + + def generate_expression(self, expr_type: str, is_zero=False): + """ + @brief generate an expression + + @param expr_type: the type of the expression + @param is_zero: if the expression should eval to 0 + @return: None + """ + if is_zero: + self.generate_literal(expr_type, value=0) + return + elif expr_type == GAZ_INT_KEY or expr_type == GAZ_FLOAT_KEY: + self.generate_int_expr() + elif expr_type == GAZ_BOOL_KEY: + if random.random() < 0.5: + self.generate_bool_expr() + else: + self.generate_comp_expr() + elif expr_type == GAZ_CHAR_KEY: + self.generate_char_expr() + elif expr_type == GAZ_FLOAT_KEY: + self.generate_float_expr() + elif expr_type == ANY_TYPE: # TODO implement the choice of any type + ty = self.get_type(GAZ_RHS_TAG) + self.generate_expression(ty) + else: + raise NotImplementedError(f"Expression type {expr_type} not implemented") + + def generate_routine_args(self) -> list[Argument]: + """ + @brief generate a list of arguments for a routine + + @return: a list of arguments + """ + number = random.randint(self.settings['properties']['number-of-arguments']['min'], + self.settings['properties']['number-of-arguments']['max']) + args = [] + for i in range(number): + arg = self.generate_arg() + args.append(arg) + self.current_scope.append(arg.name, arg) + return args + + def generate_arg(self): + return Argument(self.get_name(GAZ_VAR_TAG), self.get_type(GAZ_VAR_TAG)) def generate_int_expr(self): self._generate_expression([GAZ_INT_KEY], @@ -418,292 +644,6 @@ class AstGenerator: self.comp_op_options, comparison=True) - def _generate_expression(self, expr_type: list[str], number_line, - cutoffs, options, unary=None, comparison: bool = False): - if unary is None: - unary = [] - - parent = self.current_ast_element - self.current_nesting_depth += 1 - - if self.current_nesting_depth > self.settings['generation-options']['max-nesting-depth'] or random.random() < \ - self.settings['block-termination-probability']: - self.generate_literal(random.choice(expr_type)) - self.current_nesting_depth -= 1 - return - - op = "" - a = random.randint(0, number_line - 1) - i = 0 - for i in range(len(cutoffs) - 1): - if i == 0: - if a < cutoffs[i]: - op = options[i] - break - if cutoffs[i] <= a < cutoffs[i + 1]: - op = options[i] - break - - if op in unary: - self.generate_unary(op, random.choice(expr_type)) - elif op == GAZ_BRACKET_TAG: - self.generate_bracket(random.choice(expr_type)) - elif comparison: - if op in ['equality', 'inequality']: - self.generate_binary(op, random.choice([GAZ_INT_KEY, GAZ_FLOAT_KEY, GAZ_CHAR_KEY])) - else: - self.generate_binary(op, random.choice([GAZ_INT_KEY, GAZ_FLOAT_KEY])) - else: - self.generate_binary(op, random.choice(expr_type)) - - self.current_nesting_depth -= 1 - self.current_ast_element = parent - - def generate_declaration(self, mut=None): - parent = self.current_ast_element - decl_type = self.get_type(GAZ_VAR_TAG) - decl_args = [ - ("type", decl_type), - ] - element = build_xml_element(decl_args, name=GAZ_DECLARATION_TAG) - self.current_ast_element.append(element) - self.current_ast_element = element - - variable = self.generate_variable(decl_type, mut=mut) - self.current_ast_element.append(variable.xml) - self.current_scope.append(variable.name, variable) - - self.generate_xhs(GAZ_RHS_TAG, decl_type) # TODO add real type (decl_type) - self.current_ast_element = parent - - def generate_binary(self, op, op_type): - parent = self.current_ast_element - if op == "": - raise ValueError("op is empty!") - args = [ - ("op", op), - ("type", op_type), - ] - element = build_xml_element(args, name=GAZ_OPERATOR_TAG) - self.current_ast_element.append(element) - self.current_ast_element = element - - self.generate_xhs(GAZ_LHS_TAG, op_type) - self.generate_xhs(GAZ_RHS_TAG, op_type) - - self.current_ast_element = parent - - def generate_bracket(self, op_type): - parent = self.current_ast_element - args = [ - ("type", op_type), - ] - element = build_xml_element(args, name=GAZ_BRACKET_TAG) - self.current_ast_element.append(element) - self.current_ast_element = element - - self.generate_xhs(GAZ_RHS_TAG, op_type) - - self.current_ast_element = parent - - def generate_xhs(self, handedness, op_type, is_zero=False): - element = build_xml_element([], name=handedness) - parent = self.current_ast_element - self.current_ast_element.append(element) - self.current_ast_element = element - - self.generate_expression(op_type, is_zero=is_zero) - - self.current_ast_element = parent - - def generate_unary(self, op, op_type=ANY_TYPE): - parent = self.current_ast_element - args = [ - ("op", op), - ("type", op_type), - ] - element = build_xml_element(args, name=GAZ_UNARY_OPERATOR_TAG) - self.current_ast_element.append(element) - self.current_ast_element = element - - self.generate_xhs(GAZ_RHS_TAG, op_type) - - self.current_ast_element = parent - - def generate_routine_call(self): - pass - - def generate_conditional(self): - if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']: - return - - if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings[ - 'block-termination-probability']: - return - - element = build_xml_element([], name=GAZ_IF_TAG) - self.current_ast_element.append(element) - parent = self.current_ast_element - self.current_ast_element = element - - self.current_control_flow_nesting_depth += 1 - - self.push_scope() - - self.generate_expression(GAZ_BOOL_KEY) - - self.generate_block(tag=[("type", GAZ_TRUE_BLOCK_TAG)]) - self.generate_block(tag=[("type", GAZ_FALSE_BLOCK_TAG)]) - - self.pop_scope() - self.current_ast_element = parent - - def generate_loop(self): # fixme generation of infinite loops happens too often... - # FIXME make sure that loop conditions are evaluated at least once (assert true or make a config param) - if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']: - return - - if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings[ - 'block-termination-probability']: - return - - init_var = self.generate_zero_declaration() - parent = self.current_ast_element - element = build_xml_element([], name=GAZ_LOOP_TAG) - self.current_ast_element.append(element) - self.current_ast_element = element - - self.current_control_flow_nesting_depth += 1 - self.push_scope() - self.generate_expression(GAZ_BOOL_KEY) - self.generate_block(block_type=GAZ_LOOP_TAG, - loop_var=init_var) # append a variable increment and prepend a break statement if var is > max loop iterations - self.pop_scope() - self.current_ast_element = parent - - def generate_zero_declaration(self): - parent = self.current_ast_element - element = build_xml_element([], name=GAZ_DECLARATION_TAG) - - self.current_ast_element.append(element) - self.current_ast_element = element - - variable = self.generate_variable(GAZ_INT_KEY, 'var') - self.current_ast_element.append(variable.xml) - self.current_scope.append(variable.name, variable) - - self.generate_xhs(GAZ_RHS_TAG, variable.type, is_zero=True) - self.current_ast_element = parent - - return variable - - def generate_assignment(self): - possible_vars = self.current_scope.get_all_defined_mutable_vars() - if len(possible_vars) == 0: - raise ValueError("No possible variables to assign to!") - - # same structure as a declaration - parent = self.current_ast_element - - element = build_xml_element([], name=GAZ_ASSIGNMENT_TAG) - self.current_ast_element.append(element) - self.current_ast_element = element - - variable = random.choice(possible_vars) - - self.current_ast_element.append(variable.xml) - self.generate_xhs(GAZ_RHS_TAG, variable.type) - - self.current_ast_element = parent - - def generate_out_stream(self): - self.generate_stream(GAZ_OUT_STREAM) - - def generate_in_stream(self): - self.generate_stream(GAZ_IN_STREAM) - - def generate_stream(self, stream_type): - parent = self.current_ast_element - args = [ - ("type", stream_type), - ] - element = build_xml_element(args, name=GAZ_STREAM_TAG) - self.current_ast_element.append(element) - self.current_ast_element = element - - self.generate_expression(ANY_TYPE) - - self.current_ast_element = parent - - def generate_variable(self, var_type: str, mut=None): - if mut is None: - return Variable(self.get_name(GAZ_VAR_TAG), var_type, self.get_qualifier()) - else: - return Variable(self.get_name(GAZ_VAR_TAG), var_type, mut) - - def generate_literal(self, var_type: str, value=None): - if value is None: - value = self.get_value(var_type) - else: - value = value - args = [ - ("type", var_type), - ("value", str(value)), - ] - element = build_xml_element(args, name=GAZ_LIT_TAG) - self.current_ast_element.append(element) - - def make_literal(self, type, value): - args = [ - ("type", type), - ("value", value), - ] - element = build_xml_element(args, name=GAZ_LIT_TAG) - return element - - def generate_global(self): - current_scope = self.current_scope - current_element = self.current_ast_element - - self.current_scope = self.current_scope.get_top_scope() - self.current_ast_element = self.ast - - self.generate_declaration(mut='const') - - self.current_scope = current_scope - self.current_ast_element = current_element - - def generate_expression(self, expr_type: str, is_zero=False): - if is_zero: - self.generate_literal(expr_type, value=0) - return - elif expr_type == GAZ_INT_KEY or expr_type == GAZ_FLOAT_KEY: - self.generate_int_expr() - elif expr_type == GAZ_BOOL_KEY: - if random.random() < 0.5: - self.generate_bool_expr() - else: - self.generate_comp_expr() - elif expr_type == GAZ_CHAR_KEY: - self.generate_char_expr() - elif expr_type == ANY_TYPE: # TODO implement the choice of any type - self.generate_int_expr() - else: - raise NotImplementedError(f"Expression type {expr_type} not implemented") - - def generate_routine_args(self): - number = random.randint(self.settings['properties']['number-of-arguments']['min'], - self.settings['properties']['number-of-arguments']['max']) - args = [] - for i in range(number): - arg = self.generate_arg() - args.append(arg) - self.current_scope.append(arg.name, arg) - return args - - def generate_arg(self): - return Argument(self.get_name(GAZ_VAR_TAG), self.get_type(GAZ_VAR_TAG)) - def push_scope(self, xml_element: ET.Element = None): scope = Scope(self.current_scope) self.symbol_table.append(scope) @@ -774,29 +714,187 @@ class AstGenerator: name = ''.join(random.choices(string.ascii_letters, k=length)) return name else: - if name_type == GAZ_VAR_TAG: - choice = random.choice(self.variable_names) - self.variable_names.remove(choice) + try: + if name_type == GAZ_VAR_TAG: + choice = random.choice(self.variable_names) + self.variable_names.remove(choice) + return choice + else: + choice = random.choice(self.routine_names) + self.routine_names.remove(choice) + return choice + except IndexError: # if we run out of variable names + length = random.randint(self.settings['properties']['id-length']['min'], + self.settings['properties']['id-length']['max']) + name = ''.join(random.choices(string.ascii_letters, k=length)) + return name + + def get_type(self, tag) -> str: # TODO Add support for composite types + """ + @brief get a random type from the list of possible types + + @param tag: + @return: a type as a string + """ + comp_atom = self.get_choice(self.type_options, self.type_numline, self.type_cutoffs) + choice = "" + if comp_atom == GAZ_ATOMIC_TYPE_KEY: + choice = self.get_choice(self.atomic_type_options, self.atomic_type_numline, self.atomic_type_cutoffs) + elif comp_atom == GAZ_COMPOSITE_TYPE_KEY: + choice = self.get_choice(self.composite_type_options, self.composite_type_numline, self.composite_type_cutoffs) + else: + raise NotImplementedError(f"Unimplemented generator for type: {comp_atom}") + + if tag not in [GAZ_PROCEDURE_TAG]: + if choice != GAZ_VOID_TYPE: return choice else: - choice = random.choice(self.routine_names) - self.routine_names.remove(choice) - return choice + return self.get_type(tag) + else: + return choice - def get_type(self, tag): # TODO Add support for composite types - return 'int' # TODO Add support for all types - if tag in [GAZ_PROCEDURE_TAG, GAZ_FUNCTION_TAG, GAZ_VAR_TAG]: - cutoffs = [] - values = [] - types = [] - for key, value in self.settings["type-weights"]["value-types"].items(): - if key == GAZ_VOID_TYPE and tag != GAZ_PROCEDURE_TAG: - continue - cutoffs.append(value + sum(cutoffs)) - values.append(value) - types.append(key) + def get_choice(self, options, numline, cutoffs): + res = random.randint(0, numline - 1) + for i in range(len(cutoffs)): + if res < cutoffs[i]: + try: + return options[i] + except Exception as e: + raise ValueError(str(e) + "Internal Error, please report the stack trace to me") - res = random.randint(0, sum(values)) - for i in range(len(cutoffs)): - if res < cutoffs[i]: - return types[i] + ### LOOP HELPERS ### + + def generate_loop_condition_check(self, loop_var: Variable): + """ + @brief generates the loop condition check + + Ensures that the loop does not iterate more than max-loop-iterations times + + @param loop_var: + @return: + """ + # loop var is always an int + assert loop_var.type == GAZ_INT_KEY + + # create a conditional xml tag + if_stmt = build_xml_element([], name=GAZ_IF_TAG) + self.current_ast_element.append(if_stmt) + parent = self.current_ast_element + self.current_ast_element = if_stmt + + # add the check 'if loop_var >= self.settings['generation_options']['max-loop-iterations']: break' + operation = build_xml_element([("op", ">=")], name=GAZ_OPERATOR_TAG) + rhs = self._loop_heloper(loop_var, operation) + rhs.append( # TODO refactor this to use generate_literal instead of make_literal + self.make_literal(GAZ_INT_KEY, "'" + str(self.settings['generation-options']['max-loop-iterations']) + "'")) + + true_block = build_xml_element([], name=GAZ_BLOCK_TAG) + if_stmt.append(true_block) + self.current_ast_element = true_block + break_stmt = build_xml_element([], name=GAZ_BREAK_TAG) + true_block.append(break_stmt) + + # return everything to normalcy + self.current_ast_element = parent + + def _loop_heloper(self, loop_var, operation): + self.current_ast_element.append(operation) + self.current_ast_element = operation + lhs = build_xml_element([], name=GAZ_LHS_TAG) + operation.append(lhs) + var = build_xml_element([("name", loop_var.name), ("type", loop_var.type)], name=GAZ_VAR_TAG) + lhs.append(var) + rhs = build_xml_element([], name=GAZ_RHS_TAG) + operation.append(rhs) + return rhs + + def generate_loop_condition_increment(self, loop_var): + assert loop_var.type == GAZ_INT_KEY + + parent = self.current_ast_element + assignment = build_xml_element([], name=GAZ_ASSIGNMENT_TAG) + self.current_ast_element.append(assignment) + self.current_ast_element = assignment + + # append the variable + self.current_ast_element.append(loop_var.xml) + + # add the increment 'loop_var += 1' + assn_rhs = build_xml_element([], name=GAZ_RHS_TAG) + self.current_ast_element.append(assn_rhs) + self.current_ast_element = assn_rhs + + operation = build_xml_element([("op", "+")], name=GAZ_OPERATOR_TAG) + rhs = self._loop_heloper(loop_var, operation) + rhs.append(self.make_literal(GAZ_INT_KEY, '1')) # TODO refactor this to use generate_literal instead of make_literal + + # return everything to normalcy + self.current_ast_element = parent + + ### HELPER FUNCTIONS ### + def make_element(self, name: str, keys: list[tuple[str, any]]) -> ET.Element: + """ + @brief make an xml element for the ast + + @effects modifies self.current_ast_element + + @param name: the tag for the element + @param keys: a list of tuple containing keys for the element + """ + element = build_xml_element(keys, name=name) + if self.current_ast_element is not None: + self.current_ast_element.append(element) + self.current_ast_element = element + + return element + + def make_scoped_element(self, name, keys) -> ET.Element: + """ + @brief make an xml element for the ast with a scope + + @param name: the tag for the element + @param keys: a list of tuple containing keys for the element + """ + parent = self.current_ast_element + self.push_scope() + self.make_element(name, keys) + return parent + + def exit_scoped_element(self, parent): + """ + @brief leave the current element and return to parent + + @param parent: the enclosing element to return to + """ + self.pop_scope() + self.current_ast_element = parent + + def _generate_from_options(self, cutoffs, number_line, options): + while True: + if random.random() < self.settings['block-termination-probability']: + break + + a = random.randint(0, number_line) + i = 0 + for i in range(len(cutoffs) - 1): + if cutoffs[i] < a < cutoffs[i + 1]: + try: + options[i]() + except KeyError: # if the key is not in the options (due to exclusion) + continue + except ValueError: + break + break + + def _generate_expr(self, comparison, expr_type, op, unary): + if op in unary: + self.generate_unary(op, random.choice(expr_type)) + elif op == GAZ_BRACKET_TAG: + self.generate_bracket(random.choice(expr_type)) + elif comparison: + if op in ['equality', 'inequality']: + self.generate_binary(op, random.choice([GAZ_INT_KEY, GAZ_FLOAT_KEY, GAZ_CHAR_KEY])) + else: + self.generate_binary(op, random.choice([GAZ_INT_KEY, GAZ_FLOAT_KEY])) + else: + self.generate_binary(op, random.choice(expr_type)) diff --git a/ast_generator/gazprea_ast_grammar.py b/ast_generator/gazprea_ast_grammar.py deleted file mode 100644 index a35b590..0000000 --- a/ast_generator/gazprea_ast_grammar.py +++ /dev/null @@ -1,53 +0,0 @@ -from constants import Grammar - -GAZPREA_TOP_LEVEL: Grammar = { - # Top level elements - '': [''], - '': ['blockblock'], - # TODO constants - - # Routines - '': ['', ''], # TODO forward_declaration - '': [ - 'function name="_NAME_" return_type="_TYPE_" args="_ARGS_"function'], - '': [ - 'procedure name="_NAME_" return_type="_TYPE_" args="_ARGS_"procedure'], - '': [ - 'procedure name="main" return_type="int" args="()"procedure'], - '': ['', ''], - - # Blocks - '': ['blockblock'], - '': ['blockblock'], - '': [ - '', - '', - # '', - # '', # TODO if/else, loop - ], - '': ['', ''], - - # Things that belong on their own lines - '': ['declarationdeclaration'], - '': [''], #, ''], - '': ['returnreturn'], - - '': ['stream type="std_output"stream'], - # '': ['stream type="std_input"stream'], - - # Things that are part of lines - '': ['', '', ''], - '': ['lhslhs'], - '': ['rhsrhs'], - - # Things that have values - '': ['operatoroperator'], - '': ['variable mut="_MODIFIER_" type="_TYPE_" name="_NAME_"'], - '': ['literal type="_TYPE_" value="_VALUE_"'], - - # Helper rules - '': ['<'], - '': ['>'], - '': ['/>'], - '': [' tag or send it to Ayrton and I'll see what I can see" "".format(r)) - with open("fuzzer/debug/ast/{}.xml".format(r), 'w') as f: + with open("fuzzer/debug/ast_err/{}.xml".format(r), 'w') as f: f.write(xml.dom.minidom.parseString(ET.tostring(self.fuzzer.ast).decode('utf-8')).toprettyxml()) continue dom = xml.dom.minidom.parseString(ET.tostring(self.fuzzer.ast).decode('utf-8')) @@ -58,9 +60,16 @@ class Fuzzer(): except (OverflowError, ZeroDivisionError, ValueError): os.system("rm -f fuzzer/ground_truth/{}_{}.py".format(self.file_name, i)) continue + except KeyboardInterrupt: + r = random.randint(0, 1000000) + warnings.warn("Execution halted, result written to debug/ast/{}.xml\n" + "".format(r)) + with open("fuzzer/debug/ast_err/{}.xml".format(r), 'w') as f: + f.write(xml.dom.minidom.parseString(ET.tostring(self.fuzzer.ast).decode('utf-8')).toprettyxml()) + sys.exit(1) with open("fuzzer/input/{}_{}.in".format(self.file_name, i), 'w') as f: f.write(self.fuzzer.source) - with open("fuzzer/debug/{}_{}.out".format(self.file_name, i), 'w') as f: + with open("fuzzer/debug/{}_{}.xml".format(self.file_name, i), 'w') as f: f.write(pretty) # y.write(self.fuzzer.out) # with open("fuzzer/instream/{}.in".format(i), 'w') as f: diff --git a/test/True b/test/True deleted file mode 100644 index e69de29..0000000 diff --git a/tester_config.json b/tester_config.json new file mode 100644 index 0000000..602d1ff --- /dev/null +++ b/tester_config.json @@ -0,0 +1,36 @@ +{ + "inDir": "", + "outDir": "", + "inStrDir": "", + "testedExecutablePaths": { + "": "" + }, + "runtimes": { + "": "" + }, + "toolchains": { + "gazprea": [ + { + "stepName": "gazc", + "executablePath": "$EXE", + "arguments": [ + "$INPUT", + "$OUTPUT" + ], + "output": "gazc.ll", + "allowError": true + }, + { + "stepName": "lli", + "executablePath": "/cshome/cmput415/415-resources/llvm-project/build/bin/lli", + "arguments": [ + "$INPUT" + ], + "output": "-", + "usesRuntime": true, + "usesInStr": true, + "allowError": true + } + ] + } +}