import random import string import xml.etree.ElementTree as ET from english_words import get_english_words_set from constants import * class AstGenerator: """ Generates an AST from a grammar based on given settings Originally the intention was to use the ISLa library to generate the AST, however I found that ISLa is like taking a buldozer to a sledgehammer's job, so I decided to write a procedural generator instead. The way we select elements is we take all the settings in their category and assign them a range on a number line. Then we pick a random number in that range and whichever category it falls into will be selected. """ def __init__(self, settings: dict): """ This class is designed to get the settings from some wrapper class that better defines the precise constraints of the language being generated the necessary settings are in the .yaml file and #TODO this is not generalizable yet @param settings: settings for weights and probabilities and lengths """ self.settings = settings self.symbol_table = [] # TODO this should be a list of scopes global_scope = Scope(None, None) self.symbol_table.append(global_scope) # NOTE for debug self.current_scope = global_scope names = get_english_words_set(['web2'], alpha=True) possible_names = filter(lambda x: self.settings['properties']['id-length']['max'] <= len(x) <= self.settings['properties']['id-length']['max'], names) var_name_list = list(possible_names) var_name_len = len(var_name_list) self.variable_names = var_name_list[0:var_name_len // 2] self.routine_names = var_name_list[var_name_len // 2:var_name_len] self.ast: ET.Element or None = None self.current_ast_element: ET.Element or None = None self.current_nesting_depth = 0 self.current_control_flow_nesting_depth = 0 # Numberlines - For computing probabilities self.int_op_options, self.int_op_cutoffs, self.int_op_numline = ( self.get_numberlines('expression-weights', ['brackets', 'arithmetic', 'unary'], [[], [], ['not']])) self.int_unary = ['negation', 'noop'] self.bool_op_options, self.bool_op_cutoffs, self.bool_op_numline = ( self.get_numberlines('expression-weights', ['brackets', 'comparison', 'logical', 'unary'], excluded_values=[[], ['less-than-or-equal', 'greater-than-or-equal', 'less-than', 'greater-than'], [], ['noop', 'negation']])) self.bool_unary = ['not'] self.float_op_options, self.float_op_cutoffs, self.float_op_numline = ( self.get_numberlines('expression-weights', ['brackets', 'arithmetic', 'unary'], [[], [], ['not']])) self.float_unary = ['negation', 'noop'] self.char_op_options, self.char_op_cutoffs, self.char_op_numline = ( self.get_numberlines('expression-weights', ['brackets', 'comparison'], [[], ['less-than', 'greater-than', 'less-than-or-equal', 'greater-than-or-equal']])) self.comp_op_options, self.comp_op_cutoffs, self.comp_op_numline = ( self.get_numberlines('expression-weights', ['brackets', 'comparison'], [[], []])) def get_numberlines(self, settings_section: str, subsettings: list[str], excluded_values): assert len(subsettings) == len(excluded_values) number_line = 0 cutoffs = [] cutoff = 0 options = {} option = 0 settings = [] for key, value in self.settings[settings_section].items(): if key in subsettings and key not in excluded_values: # this check needs to be done recursively if isinstance(value, int): t = { key: value } settings.append(t) elif isinstance(value, dict): settings.append(value) else: raise TypeError("invalid setting type. Found " + str(value) + " instead of expected int or dict") for v in range(len(settings)): for i in excluded_values: for j in i: if j in settings[v]: settings[v].pop(j) for v in settings: if isinstance(v, dict): for key, value in v.items(): number_line += value cutoffs.append(cutoff + value) cutoff += value options[option] = key option += 1 elif isinstance(v, int): number_line += v cutoffs.append(cutoff + v) cutoff += v options[option] = v option += 1 else: raise TypeError("invalid setting type. Found " + str(v) + " instead of expected int") return options, cutoffs, number_line def generate_ast(self): """ @brief generates an AST from a grammar """ self.generate_top_level_block() def generate_top_level_block(self): # TODO add constant generation into this block i = 0 element = build_xml_element([], name=GAZ_BLOCK_TAG) self.current_ast_element = element self.ast = element # optional constants here too self.generate_main() while i < self.settings['generation-options']['max-number-of-routines']: if random.random() < self.settings['block-termination-probability']: break self.generate_routine() i += 1 def generate_main(self): parent = self.current_ast_element self.push_scope() main_args = [ # TODO refactor these into constants ("name", "main"), ("return_type", GAZ_INT_KEY), ("args", "()"), ] element = build_xml_element(main_args, name=GAZ_PROCEDURE_TAG) self.current_ast_element.append(element) self.current_ast_element = element self.generate_block(return_stmt=True, return_value="0", return_type=GAZ_INT_KEY) self.pop_scope() self.current_ast_element = parent def generate_block(self, tag=None, return_stmt=False, return_value=None, return_type=None): if tag is None: tag = [] parent = self.current_ast_element self.push_scope() element = build_xml_element(tag, name=GAZ_BLOCK_TAG) self.current_ast_element.append(element) self.current_ast_element = element self.generate_statements() if return_stmt: self.generate_return(return_type=return_type, return_value=return_value) if self.settings['generation-options']['generate-dead-code']: self.generate_statements() self.pop_scope() self.current_ast_element = parent def generate_return(self, return_type=None, return_value=None): if return_type is None or return_type == GAZ_VOID_TYPE: self.current_ast_element.append(build_xml_element([], name=GAZ_RETURN_TAG)) return else: if return_value is None: xml_element = build_xml_element([("type", return_type)], name=GAZ_RETURN_TAG) self.current_ast_element.append(xml_element) parent = self.current_ast_element self.current_ast_element = xml_element self.generate_expression(return_type) self.current_ast_element = parent return else: xml_element = build_xml_element([("type", return_type)], name=GAZ_RETURN_TAG) self.current_ast_element.append(xml_element) parent = self.current_ast_element self.current_ast_element = xml_element self.current_ast_element.append(self.make_literal(return_type, return_value)) self.current_ast_element = parent return def generate_routine(self, routine_type=None): if routine_type is None: routine_type = self.get_routine_type() else: routine_type = routine_type args = self.generate_routine_args() name = self.get_name(routine_type) return_type = self.get_type(routine_type) routine = Routine(name, routine_type, return_type, args) routine_args = [ ("name", routine.name), ("return_type", routine.return_type), ] element = build_xml_element(routine_args, name=routine.type) self.current_ast_element.append(element) parent = self.current_ast_element self.current_ast_element = element self.push_scope() self.define_args(routine.arguments) self.generate_block(return_stmt=True, return_type=routine.return_type) self.pop_scope() self.current_ast_element = parent def define_args(self, args): for arg in args: self.current_ast_element.append(arg.xml) self.current_scope.append(arg.name, arg) def generate_statements(self): # Number line number_line = 180 #TODO fix the numberline stuff to reflect the settings cutoffs = [10, 30, 50, 80, 100, 140, 180] options = { 0: self.generate_declaration, 1: self.generate_routine_call, 2: self.generate_conditional, 3: self.generate_loop, 4: self.generate_assignment, 5: self.generate_out_stream, 6: self.generate_in_stream, } while True: if random.random() < self.settings['block-termination-probability']: break a = random.randint(0, number_line) i = 0 for i in range(len(cutoffs) - 1): if cutoffs[i] < a < cutoffs[i + 1]: try: options[i]() except ValueError: break break def generate_int_expr(self): self._generate_expression([GAZ_INT_KEY], self.int_op_numline, self.int_op_cutoffs, self.int_op_options, self.int_unary) def generate_float_expr(self): self._generate_expression([GAZ_FLOAT_KEY, GAZ_INT_KEY], self.float_op_numline, self.float_op_cutoffs, self.float_op_options, self.float_unary) def generate_bool_expr(self): self._generate_expression([GAZ_BOOL_KEY], self.bool_op_numline, self.bool_op_cutoffs, self.bool_op_options, self.bool_unary) def generate_char_expr(self): self._generate_expression([GAZ_CHAR_KEY], self.char_op_numline, self.char_op_cutoffs, self.char_op_options) def generate_comp_expr(self): self._generate_expression([GAZ_BOOL_KEY], self.comp_op_numline, self.comp_op_cutoffs, self.comp_op_options, comparison=True) def _generate_expression(self, expr_type: list[str], number_line, cutoffs, options, unary=None, comparison: bool = False): if unary is None: unary = [] parent = self.current_ast_element self.current_nesting_depth += 1 if self.current_nesting_depth > self.settings['generation-options']['max-nesting-depth'] or random.random() < \ self.settings['block-termination-probability']: self.generate_literal(random.choice(expr_type)) self.current_nesting_depth -= 1 return op = "" a = random.randint(0, number_line - 1) i = 0 for i in range(len(cutoffs) - 1): if i == 0: if a < cutoffs[i]: op = options[i] break if cutoffs[i] <= a < cutoffs[i + 1]: op = options[i] break if op in unary: self.generate_unary(op, random.choice(expr_type)) elif op == GAZ_BRACKET_TAG: self.generate_bracket(random.choice(expr_type)) elif comparison: if op in ['equality', 'inequality']: self.generate_binary(op, random.choice([GAZ_INT_KEY, GAZ_FLOAT_KEY, GAZ_CHAR_KEY])) else: self.generate_binary(op, random.choice([GAZ_INT_KEY, GAZ_FLOAT_KEY])) else: self.generate_binary(op, random.choice(expr_type)) self.current_nesting_depth -= 1 self.current_ast_element = parent def generate_declaration(self, mut=None): parent = self.current_ast_element decl_type = self.get_type(GAZ_VAR_TAG) decl_args = [ ("type", decl_type), ] element = build_xml_element(decl_args, name=GAZ_DECLARATION_TAG) self.current_ast_element.append(element) self.current_ast_element = element variable = self.generate_variable(decl_type, mut=mut) self.current_ast_element.append(variable.xml) self.current_scope.append(variable.name, variable) self.generate_xhs(GAZ_RHS_TAG, decl_type) # TODO add real type (decl_type) self.current_ast_element = parent def generate_binary(self, op, op_type): parent = self.current_ast_element if op == "": raise ValueError("op is empty!") args = [ ("op", op), ("type", op_type), ] element = build_xml_element(args, name=GAZ_OPERATOR_TAG) self.current_ast_element.append(element) self.current_ast_element = element self.generate_xhs(GAZ_LHS_TAG, op_type) self.generate_xhs(GAZ_RHS_TAG, op_type) self.current_ast_element = parent def generate_bracket(self, op_type): parent = self.current_ast_element args = [ ("type", op_type), ] element = build_xml_element(args, name=GAZ_BRACKET_TAG) self.current_ast_element.append(element) self.current_ast_element = element self.generate_xhs(GAZ_RHS_TAG, op_type) self.current_ast_element = parent def generate_xhs(self, handedness, op_type): element = build_xml_element([], name=handedness) parent = self.current_ast_element self.current_ast_element.append(element) self.current_ast_element = element self.generate_expression(op_type) self.current_ast_element = parent def generate_unary(self, op, op_type=ANY_TYPE): parent = self.current_ast_element args = [ ("op", op), ("type", op_type), ] element = build_xml_element(args, name=GAZ_UNARY_OPERATOR_TAG) self.current_ast_element.append(element) self.current_ast_element = element self.generate_xhs(GAZ_RHS_TAG, op_type) self.current_ast_element = parent def generate_routine_call(self): pass def generate_conditional(self): if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']: return if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings['block-termination-probability']: return element = build_xml_element([], name=GAZ_IF_TAG) self.current_ast_element.append(element) parent = self.current_ast_element self.current_ast_element = element self.current_control_flow_nesting_depth += 1 self.push_scope() self.generate_expression(GAZ_BOOL_KEY) self.generate_block(tag=[("type", GAZ_TRUE_BLOCK_TAG)]) self.generate_block(tag=[("type", GAZ_FALSE_BLOCK_TAG)]) self.pop_scope() self.current_ast_element = parent def generate_loop(self): #fixme generation of infinite loops happens too often... if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']: return if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings['block-termination-probability']: return parent = self.current_ast_element element = build_xml_element([], name=GAZ_LOOP_TAG) self.current_ast_element.append(element) self.current_ast_element = element self.current_control_flow_nesting_depth += 1 self.push_scope() self.generate_expression(GAZ_BOOL_KEY) self.generate_block() self.pop_scope() self.current_ast_element = parent def generate_assignment(self): possible_vars = self.current_scope.get_all_defined_mutable_vars() if len(possible_vars) == 0: raise ValueError("No possible variables to assign to!") # same structure as a declaration parent = self.current_ast_element element = build_xml_element([], name=GAZ_ASSIGNMENT_TAG) self.current_ast_element.append(element) self.current_ast_element = element variable = random.choice(possible_vars) self.current_ast_element.append(variable.xml) self.generate_xhs(GAZ_RHS_TAG, variable.type) self.current_ast_element = parent def generate_out_stream(self): self.generate_stream(GAZ_OUT_STREAM) def generate_in_stream(self): self.generate_stream(GAZ_IN_STREAM) def generate_stream(self, stream_type): parent = self.current_ast_element args = [ ("type", stream_type), ] element = build_xml_element(args, name=GAZ_STREAM_TAG) self.current_ast_element.append(element) self.current_ast_element = element self.generate_expression(ANY_TYPE) self.current_ast_element = parent def generate_variable(self, var_type: str, mut=None): if mut is None: return Variable(self.get_name(GAZ_VAR_TAG), var_type, self.get_qualifier()) else: return Variable(self.get_name(GAZ_VAR_TAG), var_type, mut) def generate_literal(self, var_type: str): args = [ ("type", var_type), ("value", str(self.get_value(var_type))), ] element = build_xml_element(args, name=GAZ_LIT_TAG) self.current_ast_element.append(element) def make_literal(self, type, value): args = [ ("type", type), ("value", value), ] element = build_xml_element(args, name=GAZ_LIT_TAG) return element def generate_global(self): current_scope = self.current_scope current_element = self.current_ast_element self.current_scope = self.current_scope.get_top_scope() self.current_ast_element = self.ast self.generate_declaration(mut='const') self.current_scope = current_scope self.current_ast_element = current_element def generate_expression(self, expr_type: str): if expr_type == GAZ_INT_KEY or expr_type == GAZ_FLOAT_KEY: self.generate_int_expr() elif expr_type == GAZ_BOOL_KEY: if random.random() < 0.5: self.generate_bool_expr() else: self.generate_comp_expr() elif expr_type == GAZ_CHAR_KEY: self.generate_char_expr() elif expr_type == ANY_TYPE: # TODO implement the choice of any type self.generate_int_expr() else: raise NotImplementedError(f"Expression type {expr_type} not implemented") def generate_routine_args(self): number = random.randint(self.settings['properties']['number-of-arguments']['min'], self.settings['properties']['number-of-arguments']['max']) args = [] for i in range(number): arg = self.generate_arg() args.append(arg) self.current_scope.append(arg.name, arg) return args def generate_arg(self): return Argument(self.get_name(GAZ_VAR_TAG), self.get_type(GAZ_VAR_TAG)) def push_scope(self, xml_element: ET.Element = None): scope = Scope(self.current_scope) self.symbol_table.append(scope) self.current_scope = scope def pop_scope(self): self.current_scope = self.current_scope.enclosing_scope # TODO revamp the random value generations def get_qualifier(self): """ @brief get a random qualifier from the list of possible qualifiers @return a qualifier as a string """ number_line = (self.settings["misc-weights"]["type-qualifier-weights"]["const"] + self.settings["misc-weights"]["type-qualifier-weights"]["var"] -1 ) res = random.randint(0, number_line) if res in range(0, self.settings["misc-weights"]["type-qualifier-weights"]["const"]): return 'const' elif res in range(self.settings["misc-weights"]["type-qualifier-weights"]["const"], self.settings["misc-weights"]["type-qualifier-weights"]["const"] + self.settings["misc-weights"]["type-qualifier-weights"]["var"]): return 'var' else: raise ValueError("Internal Error, please report the stack trace to me") def get_routine_type(self): cutoffs = [] values = [] ops = [] for key, value in self.settings["routine-weights"].items(): cutoffs.append(value + sum(cutoffs)) values.append(value) ops.append(key) res = random.randint(0, sum(values)) for i in range(len(cutoffs)): if res < cutoffs[i]: return ops[i] # TODO everything should be fast faied def get_value(self, type): if type == GAZ_INT_KEY: if self.settings["properties"]["generate-max-int"]: return random.randint(-2147483648, 2147483647) else: return random.randint(-1000, 1000) elif type == GAZ_FLOAT_KEY: return random.uniform(-1000, 1000) elif type == GAZ_BOOL_KEY: return random.choice([True, False]) elif type == GAZ_CHAR_KEY: return "'" + random.choice(string.ascii_letters) + "'" else: raise TypeError("Unimplemented generator for type: " + type) def get_name(self, name_type): """ @brief get a random name from the list of possible names and add it to the current scope @param name_type: @return: """ if not self.settings['properties']['use-english-words']: length = random.randint(self.settings['properties']['id-length']['min'], self.settings['properties']['id-length']['max']) name = ''.join(random.choices(string.ascii_letters, k=length)) return name else: return random.choice(self.variable_names) def get_op(self, type): if type == GAZ_INT_KEY: cutoffs = [] values = [] ops = [] for key, value in self.settings["expression-weights"]["arithmetic"]: cutoffs.append(value + sum(cutoffs)) values.append(value) ops.append(get_op(key)) res = random.randint(0, sum(values)) for i in range(len(cutoffs)): if res < cutoffs[i]: return ops[i] def get_type(self, tag): # TODO Add support for composite types return 'int' # TODO Add support for all types if tag in [GAZ_PROCEDURE_TAG, GAZ_FUNCTION_TAG, GAZ_VAR_TAG]: cutoffs = [] values = [] types = [] for key, value in self.settings["type-weights"]["value-types"].items(): if key == GAZ_VOID_TYPE and tag != GAZ_PROCEDURE_TAG: continue cutoffs.append(value + sum(cutoffs)) values.append(value) types.append(key) res = random.randint(0, sum(values)) for i in range(len(cutoffs)): if res < cutoffs[i]: return types[i] class Variable: def __init__(self, name: str, type: str, qualifier: str, value: any = None): self.name = name self.type = type self.value = value self.qualifier = qualifier self.xml = self._build_xml() def _build_xml(self): args = [ ('name', self.name), ('type', self.type), ('mut', self.qualifier), ] return build_xml_element(args, name=GAZ_VAR_TAG) class Argument: def __init__(self, name: str, type: str): self.name = name self.type = type self.xml = self._build_xml() def __str__(self): return self.type + " " + self.name def _build_xml(self): args = [ ('name', self.name), ('type', self.type), ] return build_xml_element(args, name=GAZ_ARG_TAG) class Routine: def __init__(self, name: str, type: str, return_type: str, args: list[Argument], xml: ET.Element = None): self.name = name self.type = type self.return_type = return_type self.arguments = args self.xml = xml self.xml = xml class Scope: def __init__(self, enclosing_scope, child_scope=None, associated_xml: ET.Element = None): self.symbols = {} self.enclosing_scope = enclosing_scope self.child_scope = child_scope self.xml = associated_xml def resolve(self, name) -> ET.Element or None: if name in self.symbols: return self.symbols[name] else: return None def append(self, name, item: Variable or Argument or Routine): self.symbols[name] = item def append_element(self, name, value: ET.Element): self.symbols[name] = value def set(self, name, value: ET.Element): self.symbols[name] = value def get_all_defined_mutable_vars(self) -> list[Variable]: if self.enclosing_scope is None: return self._get_mutable_vars() else: return self.enclosing_scope.get_all_defined_mutable_vars() + self._get_mutable_vars() def _get_mutable_vars(self) -> list[Variable]: mutable_vars = [] for name, var in self.symbols.items(): if not isinstance(var, Variable): continue if var.qualifier != 'const': mutable_vars.append(self.symbols[name]) return mutable_vars def get_top_scope(self): if self.enclosing_scope is None: return self else: return self.enclosing_scope.get_top_scope() def build_xml_element(*keys, name): elem = ET.Element(name) for key in list(keys)[0]: # TODO refactor elem.set(key[0], key[1]) return elem def get_op(op): if op == 'addition' or 'noop': return '+' elif op == 'subtraction': return '-' elif op == 'multiplication': return '*' elif op == 'division': return '/' elif op == 'modulo': return '%' elif op == 'power': return '^' elif op == 'or': return 'or' elif op == 'and': return 'and' elif op == 'equality': return '==' elif op == 'inequality': return '!=' elif op == 'less-than': return '<' elif op == 'less-than-or-equal': return '<=' elif op == 'greater-than': return '>' elif op == 'greater-than-or-equal': return '>=' elif op == 'negation': return '-' elif op == 'not': return 'not' elif op == 'concatenation': return '||' else: raise Exception("Unknown operator: " + op)