import string from english_words import get_english_words_set from ast_generator.utils import * from ast_generator.utils import filter_options, _choose_option from constants import * import keyword class AstGenerator: """ Generates an AST from a grammar based on given settings Originally the intention was to use the ISLa library to generate the AST, however I found that ISLa is like taking a buldozer to a sledgehammer's job, so I decided to write a procedural generator instead. The way we select elements is we take all the settings in their category and assign them a range on a number line. Then we pick a random number in that range and whichever category it falls into will be selected. """ ### INITIALIZATION ### def __init__(self, settings: dict): """ This class is designed to get the settings from some wrapper class that better defines the precise constraints of the language being generated the necessary settings are in the .yaml file and #TODO this is not generalizable yet @param settings: settings for weights and probabilities and lengths """ self.settings = settings self.symbol_table = [] global_scope = Scope(None, None) self.symbol_table.append(global_scope) # NOTE for debug self.current_scope = global_scope self._init_names() self.ast: ET.Element or None = None self.current_ast_element: ET.Element or None = None self.current_nesting_depth = 0 self.current_control_flow_nesting_depth = 0 self._init_numlines() def _init_numlines(self): # Numberlines - For computing probabilities self.int_op_options, self.int_op_cutoffs, self.int_op_numline = ( get_numberlines('expression-weights', ['brackets', 'arithmetic', 'unary'], [[], [], ['not']], self.settings)) self.int_unary = ['negation', 'noop'] self.bool_op_options, self.bool_op_cutoffs, self.bool_op_numline = ( get_numberlines('expression-weights', ['brackets', 'comparison', 'logical', 'unary'], excluded_values=[[], ['less-than-or-equal', 'greater-than-or-equal', 'less-than', 'greater-than'], [], ['noop', 'negation']], settings=self.settings)) self.bool_unary = ['not'] self.float_op_options, self.float_op_cutoffs, self.float_op_numline = ( get_numberlines('expression-weights', ['brackets', 'arithmetic', 'unary'], [[], [], ['not']], self.settings)) self.float_unary = ['negation', 'noop'] self.char_op_options, self.char_op_cutoffs, self.char_op_numline = ( get_numberlines('expression-weights', ['brackets', 'comparison'], [[], ['less-than', 'greater-than', 'less-than-or-equal', 'greater-than-or-equal']], self.settings)) self.comp_op_options, self.comp_op_cutoffs, self.comp_op_numline = ( get_numberlines('expression-weights', ['brackets', 'comparison'], [[], []], self.settings)) def _init_names(self): names = get_english_words_set(['web2'], alpha=True) possible_names = filter(lambda x: self.settings['properties']['id-length']['max'] <= len(x) <= self.settings['properties']['id-length']['max'] and not keyword.iskeyword(x), names) var_name_list = list(possible_names) var_name_len = len(var_name_list) self.variable_names = var_name_list[0:var_name_len // 2] self.routine_names = var_name_list[var_name_len // 2:var_name_len] ### GENERATION ### def generate_ast(self): """ @brief generates an AST from a grammar """ self.generate_top_level_block() def generate_top_level_block(self): """ @brief creates the top-level block containing the whole program """ element = self.make_element(GAZ_BLOCK_TAG, []) self.ast = element for i in range(random.randint(0, self.settings['generation-options']['max-globals'])): self.generate_global() for i in range(self.settings['generation-options']['max-number-of-routines']): if random.random() < self.settings['block-termination-probability']: break self.generate_routine() self.generate_main() pass def generate_main(self): main_args = [ # TODO refactor these into constants (GAZ_NAME_KEY, "main"), (GAZ_RETURN_KEY, GAZ_INT_KEY), ] parent = self.make_scoped_element(GAZ_PROCEDURE_TAG, main_args) self.generate_block(return_stmt=True, return_value="0", return_type=GAZ_INT_KEY, block_type=GAZ_PROCEDURE_TAG) self.exit_scoped_element(parent) def generate_block(self, tag=None, return_stmt=False, return_value=None, return_type=None, block_type=None, loop_var=None): # TODO this should be broken into many functions depending on the block requirements if tag is None: tag = [] parent = self.current_ast_element self.push_scope() element = build_xml_element(tag, name=GAZ_BLOCK_TAG) self.current_ast_element.append(element) self.current_ast_element = element if block_type in [GAZ_PROCEDURE_TAG, GAZ_FUNCTION_TAG]: self.generate_statements() else: self.generate_statements(include='declaration') self.generate_statements(exclude='declaration') # Generate the loop condition increment if we are in a loop if block_type == GAZ_LOOP_TAG: self.generate_loop_condition_check(loop_var) self.generate_loop_condition_increment(loop_var) if return_stmt: self.generate_return(return_type=return_type, return_value=return_value) if self.settings['generation-options']['generate-dead-code']: self.generate_statements(exclude='declaration') self.pop_scope() self.current_ast_element = parent def generate_return(self, return_type=None, return_value=None): """ @brief generates a return statement @param return_type: the type to be returned (if None -> any) @param return_value: value to be returned (if None -> expr[return_type]) """ if return_type is None or return_type == GAZ_VOID_TYPE: self.current_ast_element.append(self.make_element(GAZ_RETURN_TAG, [])) else: # store the parent parent = self.current_ast_element # initialize element keys = [("type", return_type)] self.make_element(GAZ_RETURN_TAG, keys) # make either a literal or a random expression based on choice if return_value is None: self.generate_expression(return_type) else: self.current_ast_element.append(self.make_literal(return_type, return_value)) # return to the parent self.current_ast_element = parent def generate_routine(self, routine_type=None): """ @brief generate a random routine @param return_type: the type to be returned (if None -> any (including void)) """ if routine_type is None: routine_type = self.get_routine_type() # get a random type else: pass # initialize random variables args = self.generate_routine_args() name = self.get_name(routine_type) return_type = self.get_type(routine_type) # initialize the routine routine = Routine(name, routine_type, return_type, args) routine_args = [ ("name", routine.name), ("return_type", routine.return_type), ] # Generation parent = self.current_ast_element self.make_scoped_element(routine.type, routine_args) self.define_args(routine.arguments) self.generate_block(return_stmt=True, return_type=routine.return_type) self.exit_scoped_element(parent) def define_args(self, args): """ @brief Generate the argument tags in a routine @param args: a list of arguments """ for arg in args: self.current_ast_element.append(arg.xml) self.current_scope.append(arg.name, arg) def generate_statements(self, include=None, exclude=None): opts = ['declaration', 'routine_call', 'conditional', 'loop', 'assignment', 'out_stream', 'in_stream'] # Number line number_line = 180 # TODO fix the numberline stuff to reflect the settings cutoffs = [10, 30, 50, 80, 100, 140, 180] options = { 0: self.generate_declaration, 1: self.generate_routine_call, 2: self.generate_conditional, 3: self.generate_loop, 4: self.generate_assignment, 5: self.generate_out_stream, 6: self.generate_in_stream, } # Filter unwanted options filter_options(exclude, include, options, opts) # Generate the statements self._generate_from_options(cutoffs, number_line, options) def _generate_expression(self, expr_type: list[str], number_line, cutoffs, options, unary=None, comparison: bool = False): """ @brief Generate an expression @param expr_type: a list of types to be used @param number_line: number line for probability computation @param cutoffs: cutoffs to be used @param options: options to be used @param unary: a list of unary operators in options """ if unary is None: unary = [] parent = self.current_ast_element self.current_nesting_depth += 1 # Check the expression depth against settings if self.current_nesting_depth > self.settings['generation-options']['max-nesting-depth'] or random.random() < \ self.settings['block-termination-probability']: self.generate_literal(random.choice(expr_type)) self.current_nesting_depth -= 1 return # Generate op = _choose_option(cutoffs, number_line, options) self._generate_expr(comparison, expr_type, op, unary) # Return to parent self.current_nesting_depth -= 1 self.current_ast_element = parent def generate_declaration(self, mut=None): # TODO change this to a bool """ @brief Generate a declaration @param mut: the mutability of the variable ('const' or 'var') """ # Initialize the variable parent = self.current_ast_element decl_type = self.get_type(GAZ_VAR_TAG) decl_args = [ ("type", decl_type), ] self.make_element(GAZ_DECLARATION_TAG, decl_args) # Generate the variable variable = self.generate_variable(decl_type, mut=mut) self.current_ast_element.append(variable.xml) self.current_scope.append(variable.name, variable) # make sure the variable is in scope # Generate the initialization of the variable self.generate_xhs(GAZ_RHS_TAG, decl_type) # Return to parent self.current_ast_element = parent def generate_binary(self, op, op_type): """ @brief Generate a binary operation @param op: the operator @param op_type: the type of the expression """ parent = self.current_ast_element # Check if the operator is valid if op == "": raise ValueError("op is empty!") args = [ ("op", op), ("type", op_type), ] self.make_element(GAZ_OPERATOR_TAG, args) # Gnereate lhs and rhs self.generate_xhs(GAZ_LHS_TAG, op_type) self.generate_xhs(GAZ_RHS_TAG, op_type) # Return to parent self.current_ast_element = parent def generate_bracket(self, op_type): """ @brief Generate a bracket operation @param op_type: the type of the expression """ parent = self.current_ast_element args = [("type", op_type)] self.make_element(GAZ_BRACKET_TAG, args) # Generate the expression in the brackets self.generate_xhs(GAZ_RHS_TAG, op_type) # Return to parent self.current_ast_element = parent def generate_xhs(self, handedness, op_type, is_zero=False): """ @brief generate a lhs or a rhs depending on handedness @param handedness: the handedness @param op_type: the type of the expression @param is_zero: if the expression is zero """ parent = self.current_ast_element self.make_element(handedness, []) self.generate_expression(op_type, is_zero=is_zero) self.current_ast_element = parent def generate_unary(self, op, op_type=ANY_TYPE): parent = self.current_ast_element args = [ ("op", op), ("type", op_type), ] element = build_xml_element(args, name=GAZ_UNARY_OPERATOR_TAG) self.current_ast_element.append(element) self.current_ast_element = element self.generate_xhs(GAZ_RHS_TAG, op_type) self.current_ast_element = parent def generate_routine_call(self): pass def generate_conditional(self): if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']: return if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings[ 'block-termination-probability']: return element = build_xml_element([], name=GAZ_IF_TAG) self.current_ast_element.append(element) parent = self.current_ast_element self.current_ast_element = element self.current_control_flow_nesting_depth += 1 self.push_scope() self.generate_expression(GAZ_BOOL_KEY) self.generate_block(tag=[("type", GAZ_TRUE_BLOCK_TAG)]) self.generate_block(tag=[("type", GAZ_FALSE_BLOCK_TAG)]) self.pop_scope() self.current_ast_element = parent def generate_loop(self): # fixme generation of infinite loops happens too often... # FIXME make sure that loop conditions are evaluated at least once (assert true or make a config param) if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']: return if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings[ 'block-termination-probability']: return init_var = self.generate_zero_declaration() parent = self.current_ast_element element = build_xml_element([], name=GAZ_LOOP_TAG) self.current_ast_element.append(element) self.current_ast_element = element self.current_control_flow_nesting_depth += 1 self.push_scope() self.generate_expression(GAZ_BOOL_KEY) self.generate_block(block_type=GAZ_LOOP_TAG, loop_var=init_var) # append a variable increment and prepend a break statement if var is > max loop iterations self.pop_scope() self.current_ast_element = parent def generate_zero_declaration(self): parent = self.current_ast_element element = build_xml_element([], name=GAZ_DECLARATION_TAG) self.current_ast_element.append(element) self.current_ast_element = element variable = self.generate_variable(GAZ_INT_KEY, 'var') self.current_ast_element.append(variable.xml) self.current_scope.append(variable.name, variable) self.generate_xhs(GAZ_RHS_TAG, variable.type, is_zero=True) self.current_ast_element = parent return variable def generate_assignment(self): possible_vars = self.current_scope.get_all_defined_mutable_vars() if len(possible_vars) == 0: raise ValueError("No possible variables to assign to!") # same structure as a declaration parent = self.current_ast_element element = build_xml_element([], name=GAZ_ASSIGNMENT_TAG) self.current_ast_element.append(element) self.current_ast_element = element variable = random.choice(possible_vars) self.current_ast_element.append(variable.xml) self.generate_xhs(GAZ_RHS_TAG, variable.type) self.current_ast_element = parent def generate_out_stream(self): self.generate_stream(GAZ_OUT_STREAM) def generate_in_stream(self): self.generate_stream(GAZ_IN_STREAM) def generate_stream(self, stream_type): parent = self.current_ast_element args = [ ("type", stream_type), ] element = build_xml_element(args, name=GAZ_STREAM_TAG) self.current_ast_element.append(element) self.current_ast_element = element self.generate_expression(ANY_TYPE) self.current_ast_element = parent def generate_variable(self, var_type: str, mut=None): if mut is None: return Variable(self.get_name(GAZ_VAR_TAG), var_type, self.get_qualifier()) else: return Variable(self.get_name(GAZ_VAR_TAG), var_type, mut) def generate_literal(self, var_type: str, value=None): if value is None: value = self.get_value(var_type) else: value = value args = [ ("type", var_type), ("value", str(value)), ] element = build_xml_element(args, name=GAZ_LIT_TAG) self.current_ast_element.append(element) def make_literal(self, type, value): args = [ ("type", type), ("value", value), ] element = build_xml_element(args, name=GAZ_LIT_TAG) return element def generate_global(self): current_scope = self.current_scope current_element = self.current_ast_element self.current_scope = self.current_scope.get_top_scope() self.current_ast_element = self.ast self.generate_declaration(mut='const') self.current_scope = current_scope self.current_ast_element = current_element def generate_expression(self, expr_type: str, is_zero=False): if is_zero: self.generate_literal(expr_type, value=0) return elif expr_type == GAZ_INT_KEY or expr_type == GAZ_FLOAT_KEY: self.generate_int_expr() elif expr_type == GAZ_BOOL_KEY: if random.random() < 0.5: self.generate_bool_expr() else: self.generate_comp_expr() elif expr_type == GAZ_CHAR_KEY: self.generate_char_expr() elif expr_type == ANY_TYPE: # TODO implement the choice of any type self.generate_int_expr() else: raise NotImplementedError(f"Expression type {expr_type} not implemented") def generate_routine_args(self): number = random.randint(self.settings['properties']['number-of-arguments']['min'], self.settings['properties']['number-of-arguments']['max']) args = [] for i in range(number): arg = self.generate_arg() args.append(arg) self.current_scope.append(arg.name, arg) return args def generate_arg(self): return Argument(self.get_name(GAZ_VAR_TAG), self.get_type(GAZ_VAR_TAG)) def generate_int_expr(self): self._generate_expression([GAZ_INT_KEY], self.int_op_numline, self.int_op_cutoffs, self.int_op_options, self.int_unary) def generate_float_expr(self): self._generate_expression([GAZ_FLOAT_KEY, GAZ_INT_KEY], self.float_op_numline, self.float_op_cutoffs, self.float_op_options, self.float_unary) def generate_bool_expr(self): self._generate_expression([GAZ_BOOL_KEY], self.bool_op_numline, self.bool_op_cutoffs, self.bool_op_options, self.bool_unary) def generate_char_expr(self): self._generate_expression([GAZ_CHAR_KEY], self.char_op_numline, self.char_op_cutoffs, self.char_op_options) def generate_comp_expr(self): self._generate_expression([GAZ_BOOL_KEY], self.comp_op_numline, self.comp_op_cutoffs, self.comp_op_options, comparison=True) def push_scope(self, xml_element: ET.Element = None): scope = Scope(self.current_scope) self.symbol_table.append(scope) self.current_scope = scope def pop_scope(self): self.current_scope = self.current_scope.enclosing_scope # TODO revamp the random value generations def get_qualifier(self): """ @brief get a random qualifier from the list of possible qualifiers @return a qualifier as a string """ number_line = (self.settings["misc-weights"]["type-qualifier-weights"]["const"] + self.settings["misc-weights"]["type-qualifier-weights"]["var"] - 1) res = random.randint(0, number_line) if res in range(0, self.settings["misc-weights"]["type-qualifier-weights"]["const"]): return 'const' elif res in range(self.settings["misc-weights"]["type-qualifier-weights"]["const"], self.settings["misc-weights"]["type-qualifier-weights"]["const"] + self.settings["misc-weights"]["type-qualifier-weights"]["var"]): return 'var' else: raise ValueError("Internal Error, please report the stack trace to me") def get_routine_type(self): cutoffs = [] values = [] ops = [] for key, value in self.settings["routine-weights"].items(): cutoffs.append(value + sum(cutoffs)) values.append(value) ops.append(key) res = random.randint(0, sum(values)) for i in range(len(cutoffs)): if res < cutoffs[i]: return ops[i] # TODO everything should be fast faied def get_value(self, type): if type == GAZ_INT_KEY: if self.settings["properties"]["generate-max-int"]: return random.randint(-2147483648, 2147483647) else: return random.randint(-1000, 1000) elif type == GAZ_FLOAT_KEY: return random.uniform(-1000, 1000) elif type == GAZ_BOOL_KEY: return random.choice([True, False]) elif type == GAZ_CHAR_KEY: return "'" + random.choice(string.ascii_letters) + "'" else: raise TypeError("Unimplemented generator for type: " + type) def get_name(self, name_type): """ @brief get a random name from the list of possible names and add it to the current scope @param name_type: @return: """ if not self.settings['properties']['use-english-words']: length = random.randint(self.settings['properties']['id-length']['min'], self.settings['properties']['id-length']['max']) name = ''.join(random.choices(string.ascii_letters, k=length)) return name else: if name_type == GAZ_VAR_TAG: choice = random.choice(self.variable_names) self.variable_names.remove(choice) return choice else: choice = random.choice(self.routine_names) self.routine_names.remove(choice) return choice def get_type(self, tag): # TODO Add support for composite types return 'int' # TODO Add support for all types if tag in [GAZ_PROCEDURE_TAG, GAZ_FUNCTION_TAG, GAZ_VAR_TAG]: cutoffs = [] values = [] types = [] for key, value in self.settings["type-weights"]["value-types"].items(): if key == GAZ_VOID_TYPE and tag != GAZ_PROCEDURE_TAG: continue cutoffs.append(value + sum(cutoffs)) values.append(value) types.append(key) res = random.randint(0, sum(values)) for i in range(len(cutoffs)): if res < cutoffs[i]: return types[i] ### LOOP HELPERS ### def generate_loop_condition_check(self, loop_var: Variable): """ @brief generates the loop condition check Ensures that the loop does not iterate more than max-loop-iterations times @param loop_var: @return: """ # loop var is always an int assert loop_var.type == GAZ_INT_KEY # create a conditional xml tag if_stmt = build_xml_element([], name=GAZ_IF_TAG) self.current_ast_element.append(if_stmt) parent = self.current_ast_element self.current_ast_element = if_stmt # add the check 'if loop_var >= self.settings['generation_options']['max-loop-iterations']: break' operation = build_xml_element([("op", ">=")], name=GAZ_OPERATOR_TAG) rhs = self._loop_heloper(loop_var, operation) rhs.append( self.make_literal(GAZ_INT_KEY, "'" + str(self.settings['generation-options']['max-loop-iterations']) + "'")) true_block = build_xml_element([], name=GAZ_BLOCK_TAG) if_stmt.append(true_block) self.current_ast_element = true_block break_stmt = build_xml_element([], name=GAZ_BREAK_TAG) true_block.append(break_stmt) # return everything to normalcy self.current_ast_element = parent def _loop_heloper(self, loop_var, operation): self.current_ast_element.append(operation) self.current_ast_element = operation lhs = build_xml_element([], name=GAZ_LHS_TAG) operation.append(lhs) var = build_xml_element([("name", loop_var.name), ("type", loop_var.type)], name=GAZ_VAR_TAG) lhs.append(var) rhs = build_xml_element([], name=GAZ_RHS_TAG) operation.append(rhs) return rhs def generate_loop_condition_increment(self, loop_var): assert loop_var.type == GAZ_INT_KEY parent = self.current_ast_element assignment = build_xml_element([], name=GAZ_ASSIGNMENT_TAG) self.current_ast_element.append(assignment) self.current_ast_element = assignment # append the variable self.current_ast_element.append(loop_var.xml) # add the increment 'loop_var += 1' assn_rhs = build_xml_element([], name=GAZ_RHS_TAG) self.current_ast_element.append(assn_rhs) self.current_ast_element = assn_rhs operation = build_xml_element([("op", "+")], name=GAZ_OPERATOR_TAG) rhs = self._loop_heloper(loop_var, operation) rhs.append(self.make_literal(GAZ_INT_KEY, '1')) # return everything to normalcy self.current_ast_element = parent ### HELPER FUNCTIONS ### def make_element(self, name: str, keys: list[tuple[str, any]]) -> ET.Element: """ @brief make an xml element for the ast @effects modifies self.current_ast_element @param name: the tag for the element @param keys: a list of tuple containing keys for the element """ element = build_xml_element(keys, name=name) if self.current_ast_element is not None: self.current_ast_element.append(element) self.current_ast_element = element return element def make_scoped_element(self, name, keys) -> ET.Element: """ @brief make an xml element for the ast with a scope @param name: the tag for the element @param keys: a list of tuple containing keys for the element """ parent = self.current_ast_element self.push_scope() self.make_element(name, keys) return parent def exit_scoped_element(self, parent): """ @brief leave the current element and return to parent @param parent: the enclosing element to return to """ self.pop_scope() self.current_ast_element = parent def _generate_from_options(self, cutoffs, number_line, options): while True: if random.random() < self.settings['block-termination-probability']: break a = random.randint(0, number_line) i = 0 for i in range(len(cutoffs) - 1): if cutoffs[i] < a < cutoffs[i + 1]: try: options[i]() except KeyError: continue except ValueError: break break def _generate_expr(self, comparison, expr_type, op, unary): if op in unary: self.generate_unary(op, random.choice(expr_type)) elif op == GAZ_BRACKET_TAG: self.generate_bracket(random.choice(expr_type)) elif comparison: if op in ['equality', 'inequality']: self.generate_binary(op, random.choice([GAZ_INT_KEY, GAZ_FLOAT_KEY, GAZ_CHAR_KEY])) else: self.generate_binary(op, random.choice([GAZ_INT_KEY, GAZ_FLOAT_KEY])) else: self.generate_binary(op, random.choice(expr_type))