diff --git a/ast_generator/ast_generator.py b/ast_generator/ast_generator.py index 3bde50a..e839bf2 100644 --- a/ast_generator/ast_generator.py +++ b/ast_generator/ast_generator.py @@ -4,7 +4,7 @@ import xml.etree.ElementTree as ET from english_words import get_english_words_set -from ast_generator.utils import Variable, Argument, Routine, Scope, build_xml_element +from ast_generator.utils import * from constants import * import keyword @@ -41,98 +41,47 @@ class AstGenerator: self.symbol_table.append(global_scope) # NOTE for debug self.current_scope = global_scope - names = get_english_words_set(['web2'], alpha=True) - possible_names = filter(lambda x: self.settings['properties']['id-length']['max'] <= len(x) <= - self.settings['properties']['id-length']['max'] and not keyword.iskeyword(x), - names) - - var_name_list = list(possible_names) - var_name_len = len(var_name_list) - self.variable_names = var_name_list[0:var_name_len // 2] - self.routine_names = var_name_list[var_name_len // 2:var_name_len] + self._init_names() self.ast: ET.Element or None = None self.current_ast_element: ET.Element or None = None self.current_nesting_depth = 0 self.current_control_flow_nesting_depth = 0 + self._init_numlines() + + def _init_numlines(self): # Numberlines - For computing probabilities self.int_op_options, self.int_op_cutoffs, self.int_op_numline = ( - self.get_numberlines('expression-weights', - ['brackets', 'arithmetic', 'unary'], - [[], [], ['not']])) + get_numberlines('expression-weights', ['brackets', 'arithmetic', 'unary'], [[], [], ['not']], + self.settings)) self.int_unary = ['negation', 'noop'] - self.bool_op_options, self.bool_op_cutoffs, self.bool_op_numline = ( - self.get_numberlines('expression-weights', - ['brackets', 'comparison', 'logical', 'unary'], - excluded_values=[[], ['less-than-or-equal', 'greater-than-or-equal', 'less-than', - 'greater-than'], [], ['noop', 'negation']])) + get_numberlines('expression-weights', ['brackets', 'comparison', 'logical', 'unary'], + excluded_values=[[], ['less-than-or-equal', 'greater-than-or-equal', 'less-than', + 'greater-than'], [], ['noop', 'negation']], + settings=self.settings)) self.bool_unary = ['not'] - self.float_op_options, self.float_op_cutoffs, self.float_op_numline = ( - self.get_numberlines('expression-weights', - ['brackets', 'arithmetic', 'unary'], - [[], [], ['not']])) + get_numberlines('expression-weights', ['brackets', 'arithmetic', 'unary'], [[], [], ['not']], + self.settings)) self.float_unary = ['negation', 'noop'] - self.char_op_options, self.char_op_cutoffs, self.char_op_numline = ( - self.get_numberlines('expression-weights', - ['brackets', 'comparison'], - [[], ['less-than', 'greater-than', 'less-than-or-equal', 'greater-than-or-equal']])) - + get_numberlines('expression-weights', ['brackets', 'comparison'], + [[], ['less-than', 'greater-than', 'less-than-or-equal', 'greater-than-or-equal']], + self.settings)) self.comp_op_options, self.comp_op_cutoffs, self.comp_op_numline = ( - self.get_numberlines('expression-weights', - ['brackets', 'comparison'], - [[], []])) + get_numberlines('expression-weights', ['brackets', 'comparison'], [[], []], self.settings)) - def get_numberlines(self, settings_section: str, subsettings: list[str], excluded_values): - assert len(subsettings) == len(excluded_values) - - number_line = 0 - cutoffs = [] - cutoff = 0 - options = {} - option = 0 - - settings = [] - - for key, value in self.settings[settings_section].items(): - if key in subsettings and key not in excluded_values: # this check needs to be done recursively - if isinstance(value, int): - t = { - key: value - } - settings.append(t) - elif isinstance(value, dict): - settings.append(value) - else: - raise TypeError("invalid setting type. Found " + str(value) + " instead of expected int or dict") - - for v in range(len(settings)): - for i in excluded_values: - for j in i: - if j in settings[v]: - settings[v].pop(j) - - for v in settings: - if isinstance(v, dict): - for key, value in v.items(): - number_line += value - cutoffs.append(cutoff + value) - cutoff += value - options[option] = key - option += 1 - elif isinstance(v, int): - number_line += v - cutoffs.append(cutoff + v) - cutoff += v - options[option] = v - option += 1 - else: - raise TypeError("invalid setting type. Found " + str(v) + " instead of expected int") - - return options, cutoffs, number_line + def _init_names(self): + names = get_english_words_set(['web2'], alpha=True) + possible_names = filter(lambda x: self.settings['properties']['id-length']['max'] <= len(x) <= + self.settings['properties']['id-length']['max'] and not keyword.iskeyword(x), + names) + var_name_list = list(possible_names) + var_name_len = len(var_name_list) + self.variable_names = var_name_list[0:var_name_len // 2] + self.routine_names = var_name_list[var_name_len // 2:var_name_len] def generate_ast(self): """ @@ -140,37 +89,71 @@ class AstGenerator: """ self.generate_top_level_block() - def generate_top_level_block(self): # TODO add constant generation into this block - i = 0 + def make_element(self, name: str, keys: list[tuple[str, any]]) -> ET.Element: + """ + @brief make an xml element for the ast - element = build_xml_element([], name=GAZ_BLOCK_TAG) + @effects modifies self.current_ast_element + + @param name: the tag for the element + @param keys: a list of tuple containing keys for the element + """ + element = build_xml_element(keys, name=name) + if self.current_ast_element is not None: + self.current_ast_element.append(element) self.current_ast_element = element + return element + + def make_scoped_element(self, name, keys) -> ET.Element: + """ + @brief make an xml element for the ast with a scope + + @param name: the tag for the element + @param keys: a list of tuple containing keys for the element + """ + parent = self.current_ast_element + self.push_scope() + self.make_element(name, keys) + return parent + + def exit_scoped_element(self, parent): + """ + @brief leave the current element and return to parent + + @param parent: the enclosing element to return to + """ + self.pop_scope() + self.current_ast_element = parent + + def generate_top_level_block(self): + """ + @brief creates the top-level block containing the whole program + """ + element = self.make_element(GAZ_BLOCK_TAG, []) self.ast = element - # TODO generate constants and forward declarations - while i < self.settings['generation-options']['max-number-of-routines']: + for i in range(random.randint(0, self.settings['generation-options']['max-globals'])): + self.generate_global() + for i in range(self.settings['generation-options']['max-number-of-routines']): if random.random() < self.settings['block-termination-probability']: break self.generate_routine() - i += 1 self.generate_main() + pass def generate_main(self): - parent = self.current_ast_element - self.push_scope() main_args = [ # TODO refactor these into constants - ("name", "main"), - ("return_type", GAZ_INT_KEY), - ("args", "()"), + (GAZ_NAME_KEY, "main"), + (GAZ_RETURN_KEY, GAZ_INT_KEY), ] - element = build_xml_element(main_args, name=GAZ_PROCEDURE_TAG) - self.current_ast_element.append(element) - self.current_ast_element = element + + parent = self.make_scoped_element(GAZ_PROCEDURE_TAG, main_args) + self.generate_block(return_stmt=True, return_value="0", return_type=GAZ_INT_KEY, block_type=GAZ_PROCEDURE_TAG) - self.pop_scope() - self.current_ast_element = parent + + self.exit_scoped_element(parent) def generate_block(self, tag=None, return_stmt=False, return_value=None, return_type=None, block_type=None, loop_var=None): diff --git a/ast_generator/gazprea_ast_grammar.py b/ast_generator/gazprea_ast_grammar.py deleted file mode 100644 index a35b590..0000000 --- a/ast_generator/gazprea_ast_grammar.py +++ /dev/null @@ -1,53 +0,0 @@ -from constants import Grammar - -GAZPREA_TOP_LEVEL: Grammar = { - # Top level elements - '': [''], - '': ['blockblock'], - # TODO constants - - # Routines - '': ['', ''], # TODO forward_declaration - '': [ - 'function name="_NAME_" return_type="_TYPE_" args="_ARGS_"function'], - '': [ - 'procedure name="_NAME_" return_type="_TYPE_" args="_ARGS_"procedure'], - '': [ - 'procedure name="main" return_type="int" args="()"procedure'], - '': ['', ''], - - # Blocks - '': ['blockblock'], - '': ['blockblock'], - '': [ - '', - '', - # '', - # '', # TODO if/else, loop - ], - '': ['', ''], - - # Things that belong on their own lines - '': ['declarationdeclaration'], - '': [''], #, ''], - '': ['returnreturn'], - - '': ['stream type="std_output"stream'], - # '': ['stream type="std_input"stream'], - - # Things that are part of lines - '': ['', '', ''], - '': ['lhslhs'], - '': ['rhsrhs'], - - # Things that have values - '': ['operatoroperator'], - '': ['variable mut="_MODIFIER_" type="_TYPE_" name="_NAME_"'], - '': ['literal type="_TYPE_" value="_VALUE_"'], - - # Helper rules - '': ['<'], - '': ['>'], - '': ['/>'], - '': ['