From 839ec97166b6e6da1e2919d0faca6bc4edf92967 Mon Sep 17 00:00:00 2001 From: ayrton Date: Sat, 18 Nov 2023 10:59:00 -0700 Subject: [PATCH] Complete overhaul of the generation methods Took 6 hours 24 minutes --- README.md | 3 +- ast_generator/ast_generator.py | 917 ++++++++++++++++------- ast_generator/constants.py | 45 ++ ast_generator/gazprea_ast_grammar.py | 11 +- ast_generator/test/config.yaml | 94 +++ ast_generator/test/test_ast_generator.py | 353 +++++---- ast_generator/test/test_scope.py | 10 + ast_parser/ast_parser.py | 2 +- config.yaml | 110 +-- fuzzer.py | 16 +- 10 files changed, 1077 insertions(+), 484 deletions(-) create mode 100644 ast_generator/constants.py create mode 100644 ast_generator/test/config.yaml create mode 100644 ast_generator/test/test_scope.py diff --git a/README.md b/README.md index 1a5bc2c..2a3a2f2 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,8 @@ This is a hecking fuzzer. It does the thing. ## Requirements - Python 3.11 -- NumPy +- ISLa Solver (`pip install isla-solver`) +- English Words (`pip install english-words`) (so that you don't have an anurism while reading random names) ## Usage diff --git a/ast_generator/ast_generator.py b/ast_generator/ast_generator.py index 6892fdb..64e146a 100644 --- a/ast_generator/ast_generator.py +++ b/ast_generator/ast_generator.py @@ -1,338 +1,701 @@ -import json import random - -from fuzzingbook.Grammars import is_valid_grammar, convert_ebnf_grammar, Grammar -from isla.solver import ISLaSolver - -# from gazprea_ast_grammar import GAZPREA_TOP_LEVEL -# import gazprea_ast_grammar -from ast_parser.ast_parser import AstParser - +import string import xml.etree.ElementTree as ET -GAZ_VOID_TYPE = 'void' +from english_words import get_english_words_set -VOID_TYPE = 'void' -GAZ_BLOCK_TAG = 'block' -GAZ_RHS_TAG = 'lhs' -GAZ_RHS_TAG = 'rhs' -GAZ_RETURN_KEY = "return_type" -VAR_NAMES = ['alsdjf', 'asldfjlks', 'qowiuut', 'qoiur', 'qwioue', 'qoyiyut', 'llkjfg', 'kdjkdjf', 'asdjkfeey', - 'jdhjfjheee'] -ROUTINE_NAMES = ['bees', 'beans', 'hell'] -GAZ_INT_OPS = ['*', '+', '-', '/', '%'] -GAZ_TYPES = ['int'] -GAZ_FLOAT_KEY = 'float' -GAZ_INT_KEY = 'int' -GAZ_FUNCTION_TAG = 'function' -GAZ_PROCEDURE_TAG = 'procedure' -GAZ_OPERATOR_TAG = "operator" -GAZ_LIT_TAG = "literal" -GAZ_VAR_TAG = "variable" -GAZ_OP_KEY = "op" -GAZ_NAME_KEY = "name" -GAZ_QUALIFIER_KEY = "mut" -GAZ_VAL_KEY = "value" -GAZ_TY_KEY = "type" -ANY_TYPE = "any" - - -def find_variables(AST): - pass - - -def set_variables(variable_names, variables): - pass - - -def set_routines(routine_names, routines): - pass - - -def type_check(AST, routines, variables): - pass +from ast_generator.constants import * class AstGenerator: + """ + Generates an AST from a grammar based on given settings - def __init__(self, grammar: Grammar, params: json): - self.void_probability = 20 - self.int_low = -2 ** 30 - self.int_high = 2 ** 32 - 1 - self.valid_var_names = VAR_NAMES - self.max_number_of_vars = 10 - self.valid_routine_names = ROUTINE_NAMES - self.max_number_of_routines = 3 - self.qualifier_probability = False - self.var_qualifier_probability = None - self.const_qualifier_probability = None - for key, value in params.items(): - setattr(self, key, value) + Originally the intention was to use the ISLa library to generate + the AST, however I found that ISLa is like taking a buldozer to + a sledgehammer's job, so I decided to write a procedural generator + instead. - if self.var_qualifier_probability is not None and self.const_qualifier_probability is not None: - self.qualifier_probability = True - self.ast_list = [] - self.functions = [] - assert (is_valid_grammar(grammar)) - self.grammar = grammar + The way we select elements is we take all the settings in their + category and assign them a range on a number line. Then we + pick a random number in that range and whichever category it + falls into will be selected. + """ - def fix_missing_locations(self, AST: str) -> str: - variable_names = self.get_variable_list() - routine_names = self.get_routine_list() + def __init__(self, settings: dict): + """ + This class is designed to get the settings from some wrapper class that + better defines the precise constraints of the language being generated - routines = find_routines(AST) - variables = find_variables(AST) + the necessary settings are in the .yaml file and #TODO this is not generalizable yet - set_variables(variable_names, variables) # insert types and values - set_routines(routine_names, routines) # insert types - type_check(AST, routines, variables) + @param settings: settings for weights and probabilities and lengths + """ + self.settings = settings + self.symbol_table = [] # TODO this should be a list of scopes + global_scope = Scope(None, None) + self.symbol_table.append(global_scope) # NOTE for debug + self.current_scope = global_scope - def test_samples(self, grammar: Grammar, iterations: int = 10, start_symbol=None, log: bool = True): - g = convert_ebnf_grammar(grammar) - solver = ISLaSolver(g, start_symbol=start_symbol, max_number_free_instantiations=iterations) - for i in range(iterations): - tree_str = str(solver.solve()) - print(tree_str) - # tree = eval(tree_str) - # print(tree) - # tree = self.fix_missing_locations(tree) - # ast = AstParser(tree, from_xml=True) - # if log: - # ast.unparse() - # code = ast.input - # print(f'{code:40} # {tree_str}') + names = get_english_words_set(['web2'], lower=True) + possible_names = filter(lambda x: self.settings['properties']['id-length']['min'] < len(x) + < self.settings['properties']['id-length']['max'], names) - def get_variable_list(self): + var_name_len = len(list(possible_names)) + var_name_list = list(possible_names) + self.variable_names = var_name_list[0:var_name_len // 2] + self.routine_names = var_name_list[var_name_len // 2:var_name_len] + + self.ast: ET.Element or None = None + self.current_ast_element: ET.Element or None = None + self.current_nesting_depth = 0 + self.current_control_flow_nesting_depth = 0 + + def generate_ast(self): + """ + @brief generates an AST from a grammar + """ + self.generate_top_level_block() + + def generate_top_level_block(self): # TODO add constant generation into this block + i = 0 + + element = build_xml_element([], name=GAZ_BLOCK_TAG) + self.current_ast_element = element + + self.ast = element + + # optional constants here too + + self.generate_main() + while i < self.settings['generation-options']['max-number-of-routines']: + if random.random() < self.settings['block-termination-probability']: + break + self.generate_routine() + i += 1 + + def generate_main(self): + parent = self.current_ast_element + self.push_scope() + main_args = [ # TODO refactor these into constants + ("name", "main"), + ("return_type", GAZ_INT_KEY), + ("args", "()"), + ] + element = build_xml_element(main_args, name=GAZ_PROCEDURE_TAG) + self.current_ast_element.append(element) + self.current_ast_element = element + self.generate_block(return_stmt=True, return_value="0", return_type=GAZ_INT_KEY) + self.pop_scope() + self.current_ast_element = parent + + def generate_block(self, tag=None, return_stmt=False, return_value=None, return_type=None): + if tag is None: + tag = [] + parent = self.current_ast_element + self.push_scope() + element = build_xml_element(tag, name=GAZ_BLOCK_TAG) + self.current_ast_element.append(element) + self.current_ast_element = element + self.generate_statements() + if return_stmt: + self.generate_return(return_type=return_type, return_value=return_value) + if self.settings['generation-options']['generate-dead-code']: + self.generate_statements() + self.pop_scope() + self.current_ast_element = parent + + def generate_return(self, return_type=None, return_value=None): + if return_type is None or return_type == GAZ_VOID_TYPE: + self.current_ast_element.append(build_xml_element([], name=GAZ_RETURN_TAG)) + return + else: + if return_value is None: + xml_element = build_xml_element([("type", return_type)], name=GAZ_RETURN_TAG) + self.current_ast_element.append(xml_element) + parent = self.current_ast_element + self.current_ast_element = xml_element + self.generate_expression(return_type) + self.current_ast_element = parent + return + else: + xml_element = build_xml_element([("type", return_type)], name=GAZ_RETURN_TAG) + self.current_ast_element.append(xml_element) + parent = self.current_ast_element + self.current_ast_element = xml_element + self.current_ast_element.append(self.make_literal(return_value, return_type)) + self.current_ast_element = parent + return + + def generate_routine(self, routine_type=None): + if routine_type is None: + routine_type = self.get_routine_type() + else: + routine_type = routine_type + + args = self.generate_routine_args() + + name = self.get_name(routine_type) + return_type = self.get_type(routine_type) + + routine = Routine(name, routine_type, return_type, args) + + routine_args = [ + ("name", routine.name), + ("return_type", routine.return_type), + ] + + element = build_xml_element(routine_args, name=routine.type) + self.current_ast_element.append(element) + parent = self.current_ast_element + self.current_ast_element = element + self.push_scope() + + self.define_args(routine.arguments) + + self.generate_block(return_stmt=True, return_type=routine.return_type) + self.pop_scope() + + self.current_ast_element = parent + + def define_args(self, args): + for arg in args: + self.current_ast_element.append(arg.xml) + self.current_scope.append(arg.name, arg) + + def generate_statements(self): + # Number line + number_line = 100 + cutoffs = [10, 30, 50, 80, 100] + options = { + 0: self.generate_declaration, + 1: self.generate_routine_call, + 2: self.generate_conditional, + 3: self.generate_loop, + 4: self.generate_assignment, + 5: self.generate_out_stream, + 6: self.generate_in_stream, + } + + while True: + if random.random() < self.settings['block-termination-probability']: + break + + a = random.randint(0, number_line) + i = 0 + for i in range(len(cutoffs) - 1): + if cutoffs[i] < a < cutoffs[i + 1]: + options[i]() + break + + def generate_int_real_expr(self): + # Number line + number_line = 100 + cutoffs = [10, 30, 50, 80, 100] + options = { #TODO add brackets + 0: "addition", + 1: "subtraction", + 2: "multiplication", + 3: "division", + 4: "modulo", + 5: "power", + 6: "negation", + 7: "noop", + 8: "equality", + 9: "inequality", + 10: "less-than", + 11: "greater-than", + 12: "less-than-or-equal", + 13: "greater-than-or-equal", + } + + unary = ["negation", "noop"] + + self._generate_expression([GAZ_INT_KEY, GAZ_FLOAT_KEY], number_line, cutoffs, options, unary) + + def generate_bool_expr(self): + # Number line + number_line = 100 + cutoffs = [10, 30, 50, 80, 100] + options = { #TODO add brackets # TODO cannot guarantee correctness of comparison since booleans may appear + 0: "equality", + 1: "inequality", + 2: "less-than", + 3: "greater-than", + 4: "less-than-or-equal", + 5: "greater-than-or-equal", + 6: "and", + 7: "or", + 8: "xor", + 9: "not", + } # FIXME sometimes this will return a "" op, need to figure out why + + unary = ["not"] + + self._generate_expression([GAZ_BOOL_KEY], number_line, cutoffs, options, unary) + + def _generate_expression(self, expr_type: list[str], number_line, cutoffs, options, unary=None): + if unary is None: + unary = [] + + parent = self.current_ast_element + self.current_nesting_depth += 1 + + if self.current_nesting_depth > self.settings['generation-options']['max-nesting-depth'] or random.random() < \ + self.settings['block-termination-probability']: + self.generate_literal(random.choice(expr_type)) # TODO add the reals + self.current_nesting_depth -= 1 + return + + op = "" + a = random.randint(0, number_line) + i = 0 + for i in range(len(cutoffs) - 1): + if cutoffs[i] < a < cutoffs[i + 1]: + op = options[i] + break + + if op in unary: + self.generate_unary(op, random.choice(expr_type)) + else: + self.generate_binary(op, random.choice(expr_type)) + + self.current_nesting_depth -= 1 + self.current_ast_element = parent + + def generate_declaration(self): + parent = self.current_ast_element + decl_type = self.get_type(GAZ_VAR_TAG) + decl_args = [ + ("type", decl_type), + ] + element = build_xml_element(decl_args, name=GAZ_DECLARATION_TAG) + self.current_ast_element.append(element) + self.current_ast_element = element + + variable = self.generate_variable(decl_type) + self.current_ast_element.append(variable.xml) + self.current_scope.append(variable.name, variable) + + self.generate_xhs(GAZ_RHS_TAG, decl_type) # TODO add real type (decl_type) + self.current_ast_element = parent + + def generate_binary(self, op, op_type): + parent = self.current_ast_element + args = [ + ("op", op), + ("type", op_type), + ] + element = build_xml_element(args, name=GAZ_OPERATOR_TAG) + self.current_ast_element.append(element) + self.current_ast_element = element + + self.generate_xhs(GAZ_LHS_TAG, op_type) + self.generate_xhs(GAZ_RHS_TAG, op_type) + + self.current_ast_element = parent + + def generate_xhs(self, handedness, op_type): + element = build_xml_element([], name=handedness) + parent = self.current_ast_element + self.current_ast_element.append(element) + self.current_ast_element = element + + self.generate_expression(op_type) + + self.current_ast_element = parent + + def generate_unary(self, op, op_type=ANY_TYPE): + parent = self.current_ast_element + args = [ + ("op", op), + ("type", op_type), + ] + element = build_xml_element(args, name=GAZ_UNARY_OPERATOR_TAG) + self.current_ast_element.append(element) + self.current_ast_element = element + + self.generate_xhs(GAZ_RHS_TAG, op_type) + + self.current_ast_element = parent + + def generate_routine_call(self): pass - def populate_operator(self, operator: ET.Element, op, type): - operator.set(GAZ_OP_KEY, op) - operator.set(GAZ_TY_KEY, type) - for node in operator: - node = self.populate(node, type) + def generate_conditional(self): + if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']: + return - return operator + if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings['block-termination-probability']: + return - def populate_stream(self, stream: ET.Element, type): - stream.set(GAZ_TY_KEY, type) - for node in stream: - node = self.populate(node, ANY_TYPE) + element = build_xml_element([], name=GAZ_IF_TAG) + self.current_ast_element.append(element) + parent = self.current_ast_element + self.current_ast_element = element - return stream + self.current_control_flow_nesting_depth += 1 - def populate_literal(self, literal: ET.Element, type, value): - literal.set(GAZ_TY_KEY, type) - literal.set(GAZ_VAL_KEY, value) - return literal + self.push_scope() - def populate_variable(self, variable: ET.Element, qualifier, type, name): - variable.set(GAZ_QUALIFIER_KEY, qualifier) - variable.set(GAZ_TY_KEY, type) - variable.set(GAZ_NAME_KEY, name) - return variable + self.generate_expression(GAZ_BOOL_KEY) - def populate_routine(self, routine: ET.Element, type, name): - routine.set(GAZ_RETURN_KEY, type) - if routine.get("name") != "main": - routine.set(GAZ_NAME_KEY, name) + self.generate_block(tag=[("type", GAZ_TRUE_BLOCK_TAG)]) + self.generate_block(tag=[("type", GAZ_FALSE_BLOCK_TAG)]) - if routine.tag == GAZ_PROCEDURE_TAG and type != VOID_TYPE: - routine.find("block").append(self.generate_return(type)) + self.pop_scope() + self.current_ast_element = parent - for block in routine: - for node in block: - if node.tag != "return": - node =self.populate(node, ANY_TYPE) - else: - node.set("type", type) + def generate_loop(self): + if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']: + return - return routine + if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings['block-termination-probability']: + return - def populate_block(self, element): - for node in element: - return self.populate(node, ANY_TYPE) + parent = self.current_ast_element + element = build_xml_element([], name=GAZ_LOOP_TAG) + self.current_ast_element.append(element) + self.current_ast_element = element - def populate_xhs(self, side: ET.Element, type): - for node in side: - return self.populate(node, type) + self.current_control_flow_nesting_depth += 1 + self.push_scope() + self.generate_expression(GAZ_BOOL_KEY) + self.generate_block() + self.pop_scope() + self.current_ast_element = parent - def populate_ast(self, ast: ET.Element): - populated = self.generate_block() - for node in ast: - populated.append(self.populate(node, ANY_TYPE)) - return populated + def generate_assignment(self): + # same structure as a declaration + parent = self.current_ast_element - # def populate_assignment(self, name, type): + element = build_xml_element([], name=GAZ_ASSIGNMENT_TAG) + self.current_ast_element.append(element) + self.current_ast_element = element - def populate(self, element: ET.Element, type: str): - if type == ANY_TYPE: - type = GAZ_TYPES[random.randint(0, len(GAZ_TYPES) - 1)] + possible_vars = self.current_scope.get_all_defined_mutable_vars() + if len(possible_vars) == 0: + self.generate_global() + possible_vars = self.current_scope.get_all_defined_mutable_vars() - if element.tag == GAZ_VAR_TAG: - return self.populate_variable(element, self.get_qualifier(), type, self.get_name(GAZ_VAR_TAG)) - elif element.tag == GAZ_LIT_TAG: - return self.populate_literal(element, type, self.get_value(type)) - elif element.tag == GAZ_OPERATOR_TAG: - return self.populate_operator(element, self.get_op(type), type) - elif element.tag == GAZ_FUNCTION_TAG: - return self.populate_routine(element, type, self.get_name(element.tag)) - elif element.tag == GAZ_PROCEDURE_TAG: - type = self.void(type) - return self.populate_routine(element, type, self.get_name(element.tag)) - elif element.tag in [GAZ_RHS_TAG, GAZ_RHS_TAG]: - return self.populate_xhs(element, type) - elif element.tag == GAZ_BLOCK_TAG: - return self.populate_block(element) + assert len(possible_vars) > 0 + variable = random.choice(possible_vars) + self.current_ast_element.append(variable.xml) + self.generate_xhs(GAZ_RHS_TAG, variable.type) + + self.current_ast_element = parent + + def generate_out_stream(self): + self.generate_stream(GAZ_OUT_STREAM) + + def generate_in_stream(self): + self.generate_stream(GAZ_IN_STREAM) + + def generate_stream(self, stream_type): + parent = self.current_ast_element + args = [ + ("type", stream_type), + ] + element = build_xml_element(args, name=GAZ_STREAM_TAG) + self.current_ast_element.append(element) + self.current_ast_element = element + + self.generate_expression(ANY_TYPE) + + self.current_ast_element = parent + + def generate_variable(self, var_type: str): + return Variable(self.get_name(GAZ_VAR_TAG), var_type, self.get_qualifier()) + + def generate_literal(self, var_type: str): + args = [ + ("type", var_type), + ("value", str(self.get_value(var_type))), + ] + element = build_xml_element(args, name=GAZ_LIT_TAG) + self.current_ast_element.append(element) + + def make_literal(self, type, value): + args = [ + ("type", type), + ("value", value), + ] + element = build_xml_element(args, name=GAZ_LIT_TAG) + return element + + def generate_global(self): + current_scope = self.current_scope + current_element = self.current_ast_element + + self.current_scope = self.current_scope.get_top_scope() + self.current_ast_element = self.ast + + self.generate_declaration() + + self.current_scope = current_scope + self.current_ast_element = current_element + + def generate_expression(self, expr_type: str): + if expr_type == GAZ_INT_KEY or expr_type == GAZ_FLOAT_KEY: + self.generate_int_real_expr() + elif expr_type == GAZ_BOOL_KEY: + self.generate_bool_expr() + elif expr_type == ANY_TYPE: # TODO implement the choice of any type + self.generate_int_real_expr() + else: + raise NotImplementedError(f"Expression type {expr_type} not implemented") + + def generate_routine_args(self): + number = random.randint(self.settings['properties']['number-of-arguments']['min'], + self.settings['properties']['number-of-arguments']['max']) + args = [] + for i in range(number): + arg = self.generate_arg() + args.append(arg) + self.current_scope.append(arg.name, arg) + return args + + def generate_arg(self): + return Argument(self.get_name(GAZ_VAR_TAG), self.get_type(GAZ_VAR_TAG)) + + def push_scope(self, xml_element: ET.Element = None): + scope = Scope(self.current_scope) + self.symbol_table.append(scope) + self.current_scope = scope + + def pop_scope(self): + self.current_scope = self.current_scope.enclosing_scope + + # TODO revamp the random value generations def get_qualifier(self): - var_weight: int = 80 - const_weight: int = 20 - if self.qualifier_probability: - var_weight = self.var_qualifier_probability - const_weight = self.const_qualifier_probability + """ + @brief get a random qualifier from the list of possible qualifiers - a = random.randint(0, var_weight + const_weight) - if a in range(0, var_weight): - return 'var' - elif a in range(var_weight, var_weight + const_weight): - return 'const' - else: - raise ValueError("Internal Error, please report the stack trace to me") + @return a qualifier as a string + """ + number_line = (self.settings["misc-weights"]["type-qualifier-weights"]["const"] + + self.settings["misc-weights"]["type-qualifier-weights"]["var"] -1 ) + + res = random.randint(0, number_line) + if res in range(0, self.settings["misc-weights"]["type-qualifier-weights"]["const"]): + return 'const' + elif res in range(self.settings["misc-weights"]["type-qualifier-weights"]["const"], + self.settings["misc-weights"]["type-qualifier-weights"]["const"] + + self.settings["misc-weights"]["type-qualifier-weights"]["var"]): + return 'var' + else: + raise ValueError("Internal Error, please report the stack trace to me") + + def get_routine_type(self): + cutoffs = [] + values = [] + ops = [] + for key, value in self.settings["routine-weights"].items(): + cutoffs.append(value + sum(cutoffs)) + values.append(value) + ops.append(key) + + res = random.randint(0, sum(values)) + for i in range(len(cutoffs)): + if res < cutoffs[i]: + return ops[i] # TODO everything should be fast faied def get_value(self, type): if type == GAZ_INT_KEY: - return random.randint(self.int_low, self.int_high) + if self.settings["properties"]["generate-max-int"]: + return random.randint(-2147483648, 2147483647) + else: + return random.randint(-1000, 1000) + elif type == GAZ_FLOAT_KEY: + return random.uniform(-1000, 1000) + elif type == GAZ_BOOL_KEY: + return random.choice([True, False]) else: raise TypeError("Unimplemented generator for type: " + type) def get_name(self, name_type): - if name_type == GAZ_VAR_TAG: - return VAR_NAMES[random.randint(0, self.max_number_of_vars - 1)] - elif name_type in [GAZ_PROCEDURE_TAG, GAZ_FUNCTION_TAG]: - r_name = ROUTINE_NAMES[random.randint(0, len(ROUTINE_NAMES) - 1)] - self.functions.append(r_name) - return r_name + """ + @brief get a random name from the list of possible names and add it to the current scope + + @param name_type: + @return: + """ + length = random.randint(self.settings['properties']['id-length']['min'], + self.settings['properties']['id-length']['max']) + name = ''.join(random.choices(string.ascii_letters, k=length)) + return name def get_op(self, type): + if type == GAZ_INT_KEY: - # TODO make this a parameter for peiple to change - return GAZ_INT_OPS[random.randint(0, len(GAZ_INT_OPS) - 1)] - else: - raise TypeError("Unimplemented type: " + type) + cutoffs = [] + values = [] + ops = [] + for key, value in self.settings["expression-weights"]["arithmetic"]: + cutoffs.append(value + sum(cutoffs)) + values.append(value) + ops.append(get_op(key)) - def _generate(self, element: str or None) -> ET.Element: - initial_grammar = convert_ebnf_grammar(self.grammar) - solver = ISLaSolver(initial_grammar, start_symbol=element) - ast_str = str(solver.solve()) + res = random.randint(0, sum(values)) + for i in range(len(cutoffs)): + if res < cutoffs[i]: + return ops[i] - print(ast_str) - elem = ET.fromstring(ast_str) - return elem + def get_type(self, tag): # TODO Add support for composite types + return 'int' # TODO Add support for all types + if tag in [GAZ_PROCEDURE_TAG, GAZ_FUNCTION_TAG, GAZ_VAR_TAG]: + cutoffs = [] + values = [] + types = [] + for key, value in self.settings["type-weights"]["value-types"].items(): + if key == GAZ_VOID_TYPE and tag != GAZ_PROCEDURE_TAG: + continue + cutoffs.append(value + sum(cutoffs)) + values.append(value) + types.append(key) - def generate_ast(self) -> ET.Element: - return self._generate(None) + res = random.randint(0, sum(values)) + for i in range(len(cutoffs)): + if res < cutoffs[i]: + return types[i] - def generate_return(self, type) -> ET.Element: - elem = self._generate('') - elem.set(GAZ_TY_KEY, type) - - return elem - - def generate_literal(self) -> ET.Element: - return self._generate('') - - def generate_variable(self) -> ET.Element: - return self._generate('') - - def generate_operator(self) -> ET.Element: - return self._generate('') - - def generate_block(self) -> ET.Element: - return self._generate('') - - def generate_routine(self) -> ET.Element: - return self._generate('') - - def generate_main_routine(self) -> ET.Element: - return self._generate('') - - def generate_declaration(self) -> ET.Element: - return self._generate('') - - def generate_stream(self) -> ET.Element: - return self._generate('') - - def void(self, type): - b = random.randint(0, 100) - if b < self.void_probability: - return GAZ_VOID_TYPE - else: - return type - - - -class AstElement: - - def __init__(self, xml: ET.Element): - pass - - -class RoutineCall(AstElement): - - def __init__(self, xml: ET.Element, dependents=None, type=None): - """ - @brief initialise a routine call object - :param xml: - :param dependents: - """ - super().__init__(xml) - if dependents is None: - dependents = [] - else: - self.dependents = dependents - self.xml = xml +class Variable: + def __init__(self, name: str, type: str, qualifier: str, value: any = None): + self.name = name self.type = type + self.value = value + self.qualifier = qualifier + self.xml = self._build_xml() + + def _build_xml(self): + args = [ + ('name', self.name), + ('type', self.type), + ('mut', self.qualifier), + ] + return build_xml_element(args, name=GAZ_VAR_TAG) -class Operator(AstElement): +class Argument: + def __init__(self, name: str, type: str): + self.name = name + self.type = type + self.xml = self._build_xml() - def __init__(self, xml: ET.Element, params: json): - super().__init__(xml) - for key, value in params.items(): - setattr(self, key, value) + def __str__(self): + return self.type + " " + self.name + + def _build_xml(self): + args = [ + ('name', self.name), + ('type', self.type), + ] + return build_xml_element(args, name=GAZ_ARG_TAG) -def find_routines(AST: str): - """ - @brief find all of the routine and call elements in the ast - - @param AST: the AST to analyse - @return the list of routine elements - """ - xml = ET.fromstring(AST) - result = list[RoutineCall] - for node in xml: - if node.tag in [GAZ_PROCEDURE_TAG, GAZ_FUNCTION_TAG]: - routine = RoutineCall(node) - result.append(routine) +class Routine: + def __init__(self, name: str, type: str, return_type: str, args: list[Argument], xml: ET.Element = None): + self.name = name + self.type = type + self.return_type = return_type + self.arguments = args + self.xml = xml + self.xml = xml -if __name__ == '__main__': - pass - # ast_gen = AstGenerator(GAZPREA_TOP_LEVEL, json.loads("{}")) - # out: ET.Element = ast_gen.generate_return("int") - # print(out) +class Scope: + def __init__(self, enclosing_scope, child_scope=None, associated_xml: ET.Element = None): + self.symbols = {} + self.enclosing_scope = enclosing_scope + self.child_scope = child_scope + self.xml = associated_xml + + def resolve(self, name) -> ET.Element or None: + if name in self.symbols: + return self.symbols[name] + else: + return None + + def append(self, name, item: Variable or Argument or Routine): + self.symbols[name] = item + + def append_element(self, name, value: ET.Element): + self.symbols[name] = value + + def set(self, name, value: ET.Element): + self.symbols[name] = value + + def get_all_defined_mutable_vars(self) -> list[Variable]: + if self.enclosing_scope is None: + return self._get_mutable_vars() + else: + return self.enclosing_scope.get_all_defined_mutable_vars() + self._get_mutable_vars() + + def _get_mutable_vars(self) -> list[Variable]: + mutable_vars = [] + + for name, var in self.symbols.items(): + if not isinstance(var, Variable): + continue + if var.qualifier != 'const': + mutable_vars.append(self.symbols[name]) + return mutable_vars + + def get_top_scope(self): + if self.enclosing_scope is None: + return self + else: + return self.enclosing_scope.get_top_scope() -# gen = AstGenerator("{}") -# assert is_valid_grammar(gazprea_ast_grammar.GAZPREA_TOP_LEVEL) -# -# gen.test_samples(gazprea_ast_grammar.GAZPREA_TOP_LEVEL, iterations=100) -# -# initial_grammar = convert_ebnf_grammar(gazprea_ast_grammar.GAZPREA_TOP_LEVEL) -# solver = ISLaSolver(initial_grammar) -# constants_tree_str = str(solver.solve()) -# print(constants_tree_str) \ No newline at end of file +def build_xml_element(*keys, name): + elem = ET.Element(name) + for key in list(keys)[0]: # TODO refactor + elem.set(key[0], key[1]) + return elem + + +def get_op(op): + if op == 'addition': + return '+' + elif op == 'subtraction': + return '-' + elif op == 'multiplication': + return '*' + elif op == 'division': + return '/' + elif op == 'modulo': + return '%' + elif op == 'power': + return '^' + elif op == 'or': + return 'or' + elif op == 'and': + return 'and' + elif op == 'equality': + return '==' + elif op == 'inequality': + return '!=' + elif op == 'less-than': + return '<' + elif op == 'less-than-or-equal': + return '<=' + elif op == 'greater-than': + return '>' + elif op == 'greater-than-or-equal': + return '>=' + elif op == 'negation': + return '-' + elif op == 'not': + return 'not' + elif op == 'noop': + return '+' + elif op == 'concatenation': + return '||' + else: + raise Exception("Unknown operator: " + op) + diff --git a/ast_generator/constants.py b/ast_generator/constants.py new file mode 100644 index 0000000..72dd7e7 --- /dev/null +++ b/ast_generator/constants.py @@ -0,0 +1,45 @@ +""" +Constants for the ast_generator + +#TODO this could be a dict that can be loaded from a yaml file and thus make this fuzzer more general +""" + +Grammar = dict[str, list[str]] +GAZ_VOID_TYPE = 'void' +VOID_TYPE = 'void' +GAZ_BLOCK_TAG = 'block' +GAZ_LHS_TAG = 'lhs' +GAZ_RHS_TAG = 'rhs' +GAZ_RETURN_KEY = "return_type" +VAR_NAMES = ['alsdjf', 'asldfjlks', 'qowiuut', 'qoiur', 'qwioue', 'qoyiyut', 'llkjfg', 'kdjkdjf', 'asdjkfeey', + 'jdhjfjheee'] +ROUTINE_NAMES = ['bees', 'beans', 'hell'] +GAZ_INT_OPS = ['*', '+', '-', '/', '%'] +GAZ_TYPES = ['int'] +GAZ_FLOAT_KEY = 'float' +GAZ_INT_KEY = 'int' +GAZ_FUNCTION_TAG = 'function' +GAZ_PROCEDURE_TAG = 'procedure' +GAZ_OPERATOR_TAG = "operator" +GAZ_UNARY_OPERATOR_TAG = "unary" +GAZ_LIT_TAG = "literal" +GAZ_VAR_TAG = "variable" +GAZ_OP_KEY = "op" +GAZ_NAME_KEY = "name" +GAZ_QUALIFIER_KEY = "mut" +GAZ_VAL_KEY = "value" +GAZ_TY_KEY = "type" +ANY_TYPE = "any" +GAZ_DECLARATION_TAG = "declaration" +GAZ_IN_STREAM = "std_input" +GAZ_OUT_STREAM = "std_output" +GAZ_STREAM_TAG = "stream" +GAZ_ASSIGNMENT_TAG = "assignment" +GAZ_RETURN_TAG = "return" +GAZ_MAIN = "main" +GAZ_BOOL_KEY = "bool" +GAZ_IF_TAG = "conditional" +GAZ_LOOP_TAG = "loop" +GAZ_TRUE_BLOCK_TAG = "true" +GAZ_FALSE_BLOCK_TAG = "false" +GAZ_ARG_TAG = "argument" diff --git a/ast_generator/gazprea_ast_grammar.py b/ast_generator/gazprea_ast_grammar.py index b48fd91..d714e70 100644 --- a/ast_generator/gazprea_ast_grammar.py +++ b/ast_generator/gazprea_ast_grammar.py @@ -1,5 +1,4 @@ -from fuzzingbook.Grammars import Grammar, is_valid_grammar, convert_ebnf_grammar -from isla.solver import ISLaSolver +from ast_generator.constants import Grammar GAZPREA_TOP_LEVEL: Grammar = { # Top level elements @@ -52,11 +51,3 @@ GAZPREA_TOP_LEVEL: Grammar = { '': ['/>'], '': ['