From 2182395310fff79963819e9ac9d7f7b058fb1fdb Mon Sep 17 00:00:00 2001 From: ayrton Date: Fri, 17 Nov 2023 16:57:53 -0700 Subject: [PATCH] Initial Commit --- .gitignore | 242 +++++++++++++ README.md | 29 ++ __init__.py | 0 ast_generator/__init__.py | 0 ast_generator/ast_generator.py | 338 ++++++++++++++++++ ast_generator/gazprea_ast_grammar.py | 62 ++++ ast_generator/test/__init__.py | 0 ast_generator/test/test_ast_generator.py | 193 ++++++++++ .../test/test_generator_integration.py | 54 +++ ast_generator/test/xml/literal_1.xml | 1 + ast_generator/test/xml/operator_1.xml | 8 + ast_generator/test/xml/routine_1.xml | 33 ++ ast_generator/test/xml/routine_2.xml | 33 ++ ast_generator/test/xml/routine_3.xml | 29 ++ ast_generator/test/xml/stream_1.xml | 10 + ast_generator/test/xml/variable_1.xml | 1 + ast_parser/__init__.py | 0 ast_parser/ast_parser.py | 165 +++++++++ ast_parser/ast_solver.py | 13 + ast_parser/params.json | 14 + ast_parser/test/__init__.py | 0 ast_parser/test/input.in | 6 + ast_parser/test/test.xml | 28 ++ ast_parser/test/test_parse_code.py | 107 ++++++ config.yaml | 70 ++++ fuzzer.py | 3 + gazprea_fuzzer.py | 83 +++++ test/__init__.py | 0 28 files changed, 1522 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 __init__.py create mode 100644 ast_generator/__init__.py create mode 100644 ast_generator/ast_generator.py create mode 100644 ast_generator/gazprea_ast_grammar.py create mode 100644 ast_generator/test/__init__.py create mode 100644 ast_generator/test/test_ast_generator.py create mode 100644 ast_generator/test/test_generator_integration.py create mode 100644 ast_generator/test/xml/literal_1.xml create mode 100644 ast_generator/test/xml/operator_1.xml create mode 100644 ast_generator/test/xml/routine_1.xml create mode 100644 ast_generator/test/xml/routine_2.xml create mode 100644 ast_generator/test/xml/routine_3.xml create mode 100644 ast_generator/test/xml/stream_1.xml create mode 100644 ast_generator/test/xml/variable_1.xml create mode 100644 ast_parser/__init__.py create mode 100644 ast_parser/ast_parser.py create mode 100644 ast_parser/ast_solver.py create mode 100644 ast_parser/params.json create mode 100644 ast_parser/test/__init__.py create mode 100644 ast_parser/test/input.in create mode 100644 ast_parser/test/test.xml create mode 100644 ast_parser/test/test_parse_code.py create mode 100644 config.yaml create mode 100644 fuzzer.py create mode 100644 gazprea_fuzzer.py create mode 100644 test/__init__.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4b5f56a --- /dev/null +++ b/.gitignore @@ -0,0 +1,242 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +ast_parser/test/.tmp \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..1a5bc2c --- /dev/null +++ b/README.md @@ -0,0 +1,29 @@ +# Gazprea Fuzzer + +This is a hecking fuzzer. It does the thing. + +## Requirements +- Python 3.11 +- NumPy + +## Usage + +``` +usage: python -m gazprea_fuzzer.py [-h] [-b SIZE] [--seed SEED] + +Procedurally generate a test case for Gazprea + +positional arguments: + config path to your configuration file + name name of the test case to generate (name.in, name.ins, name.out) + +optional arguments: + -h, --help show the help message and exit + -b, --batch SIZE generate SIZE cases (fuzzer/input/nameX.in, /instream/..., /outputs/...) + --seed SEED rng seed +``` + +## Configuration + +See the [default config file](config.yaml) for all possible options and their default values + diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ast_generator/__init__.py b/ast_generator/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ast_generator/ast_generator.py b/ast_generator/ast_generator.py new file mode 100644 index 0000000..6892fdb --- /dev/null +++ b/ast_generator/ast_generator.py @@ -0,0 +1,338 @@ +import json +import random + +from fuzzingbook.Grammars import is_valid_grammar, convert_ebnf_grammar, Grammar +from isla.solver import ISLaSolver + +# from gazprea_ast_grammar import GAZPREA_TOP_LEVEL +# import gazprea_ast_grammar +from ast_parser.ast_parser import AstParser + +import xml.etree.ElementTree as ET + +GAZ_VOID_TYPE = 'void' + +VOID_TYPE = 'void' +GAZ_BLOCK_TAG = 'block' +GAZ_RHS_TAG = 'lhs' +GAZ_RHS_TAG = 'rhs' +GAZ_RETURN_KEY = "return_type" +VAR_NAMES = ['alsdjf', 'asldfjlks', 'qowiuut', 'qoiur', 'qwioue', 'qoyiyut', 'llkjfg', 'kdjkdjf', 'asdjkfeey', + 'jdhjfjheee'] +ROUTINE_NAMES = ['bees', 'beans', 'hell'] +GAZ_INT_OPS = ['*', '+', '-', '/', '%'] +GAZ_TYPES = ['int'] +GAZ_FLOAT_KEY = 'float' +GAZ_INT_KEY = 'int' +GAZ_FUNCTION_TAG = 'function' +GAZ_PROCEDURE_TAG = 'procedure' +GAZ_OPERATOR_TAG = "operator" +GAZ_LIT_TAG = "literal" +GAZ_VAR_TAG = "variable" +GAZ_OP_KEY = "op" +GAZ_NAME_KEY = "name" +GAZ_QUALIFIER_KEY = "mut" +GAZ_VAL_KEY = "value" +GAZ_TY_KEY = "type" +ANY_TYPE = "any" + + +def find_variables(AST): + pass + + +def set_variables(variable_names, variables): + pass + + +def set_routines(routine_names, routines): + pass + + +def type_check(AST, routines, variables): + pass + + +class AstGenerator: + + def __init__(self, grammar: Grammar, params: json): + self.void_probability = 20 + self.int_low = -2 ** 30 + self.int_high = 2 ** 32 - 1 + self.valid_var_names = VAR_NAMES + self.max_number_of_vars = 10 + self.valid_routine_names = ROUTINE_NAMES + self.max_number_of_routines = 3 + self.qualifier_probability = False + self.var_qualifier_probability = None + self.const_qualifier_probability = None + for key, value in params.items(): + setattr(self, key, value) + + if self.var_qualifier_probability is not None and self.const_qualifier_probability is not None: + self.qualifier_probability = True + self.ast_list = [] + self.functions = [] + assert (is_valid_grammar(grammar)) + self.grammar = grammar + + def fix_missing_locations(self, AST: str) -> str: + variable_names = self.get_variable_list() + routine_names = self.get_routine_list() + + routines = find_routines(AST) + variables = find_variables(AST) + + set_variables(variable_names, variables) # insert types and values + set_routines(routine_names, routines) # insert types + type_check(AST, routines, variables) + + + def test_samples(self, grammar: Grammar, iterations: int = 10, start_symbol=None, log: bool = True): + g = convert_ebnf_grammar(grammar) + solver = ISLaSolver(g, start_symbol=start_symbol, max_number_free_instantiations=iterations) + for i in range(iterations): + tree_str = str(solver.solve()) + print(tree_str) + # tree = eval(tree_str) + # print(tree) + # tree = self.fix_missing_locations(tree) + # ast = AstParser(tree, from_xml=True) + # if log: + # ast.unparse() + # code = ast.input + # print(f'{code:40} # {tree_str}') + + def get_variable_list(self): + pass + + def populate_operator(self, operator: ET.Element, op, type): + operator.set(GAZ_OP_KEY, op) + operator.set(GAZ_TY_KEY, type) + for node in operator: + node = self.populate(node, type) + + return operator + + def populate_stream(self, stream: ET.Element, type): + stream.set(GAZ_TY_KEY, type) + for node in stream: + node = self.populate(node, ANY_TYPE) + + return stream + + def populate_literal(self, literal: ET.Element, type, value): + literal.set(GAZ_TY_KEY, type) + literal.set(GAZ_VAL_KEY, value) + return literal + + def populate_variable(self, variable: ET.Element, qualifier, type, name): + variable.set(GAZ_QUALIFIER_KEY, qualifier) + variable.set(GAZ_TY_KEY, type) + variable.set(GAZ_NAME_KEY, name) + return variable + + def populate_routine(self, routine: ET.Element, type, name): + routine.set(GAZ_RETURN_KEY, type) + if routine.get("name") != "main": + routine.set(GAZ_NAME_KEY, name) + + if routine.tag == GAZ_PROCEDURE_TAG and type != VOID_TYPE: + routine.find("block").append(self.generate_return(type)) + + for block in routine: + for node in block: + if node.tag != "return": + node =self.populate(node, ANY_TYPE) + else: + node.set("type", type) + + return routine + + def populate_block(self, element): + for node in element: + return self.populate(node, ANY_TYPE) + + def populate_xhs(self, side: ET.Element, type): + for node in side: + return self.populate(node, type) + + def populate_ast(self, ast: ET.Element): + populated = self.generate_block() + for node in ast: + populated.append(self.populate(node, ANY_TYPE)) + + return populated + + # def populate_assignment(self, name, type): + + def populate(self, element: ET.Element, type: str): + if type == ANY_TYPE: + type = GAZ_TYPES[random.randint(0, len(GAZ_TYPES) - 1)] + + if element.tag == GAZ_VAR_TAG: + return self.populate_variable(element, self.get_qualifier(), type, self.get_name(GAZ_VAR_TAG)) + elif element.tag == GAZ_LIT_TAG: + return self.populate_literal(element, type, self.get_value(type)) + elif element.tag == GAZ_OPERATOR_TAG: + return self.populate_operator(element, self.get_op(type), type) + elif element.tag == GAZ_FUNCTION_TAG: + return self.populate_routine(element, type, self.get_name(element.tag)) + elif element.tag == GAZ_PROCEDURE_TAG: + type = self.void(type) + return self.populate_routine(element, type, self.get_name(element.tag)) + elif element.tag in [GAZ_RHS_TAG, GAZ_RHS_TAG]: + return self.populate_xhs(element, type) + elif element.tag == GAZ_BLOCK_TAG: + return self.populate_block(element) + + def get_qualifier(self): + var_weight: int = 80 + const_weight: int = 20 + if self.qualifier_probability: + var_weight = self.var_qualifier_probability + const_weight = self.const_qualifier_probability + + a = random.randint(0, var_weight + const_weight) + if a in range(0, var_weight): + return 'var' + elif a in range(var_weight, var_weight + const_weight): + return 'const' + else: + raise ValueError("Internal Error, please report the stack trace to me") + + def get_value(self, type): + if type == GAZ_INT_KEY: + return random.randint(self.int_low, self.int_high) + else: + raise TypeError("Unimplemented generator for type: " + type) + + def get_name(self, name_type): + if name_type == GAZ_VAR_TAG: + return VAR_NAMES[random.randint(0, self.max_number_of_vars - 1)] + elif name_type in [GAZ_PROCEDURE_TAG, GAZ_FUNCTION_TAG]: + r_name = ROUTINE_NAMES[random.randint(0, len(ROUTINE_NAMES) - 1)] + self.functions.append(r_name) + return r_name + + def get_op(self, type): + if type == GAZ_INT_KEY: + # TODO make this a parameter for peiple to change + return GAZ_INT_OPS[random.randint(0, len(GAZ_INT_OPS) - 1)] + else: + raise TypeError("Unimplemented type: " + type) + + def _generate(self, element: str or None) -> ET.Element: + initial_grammar = convert_ebnf_grammar(self.grammar) + solver = ISLaSolver(initial_grammar, start_symbol=element) + ast_str = str(solver.solve()) + + print(ast_str) + elem = ET.fromstring(ast_str) + return elem + + def generate_ast(self) -> ET.Element: + return self._generate(None) + + + def generate_return(self, type) -> ET.Element: + elem = self._generate('') + elem.set(GAZ_TY_KEY, type) + + return elem + + def generate_literal(self) -> ET.Element: + return self._generate('') + + def generate_variable(self) -> ET.Element: + return self._generate('') + + def generate_operator(self) -> ET.Element: + return self._generate('') + + def generate_block(self) -> ET.Element: + return self._generate('') + + def generate_routine(self) -> ET.Element: + return self._generate('') + + def generate_main_routine(self) -> ET.Element: + return self._generate('') + + def generate_declaration(self) -> ET.Element: + return self._generate('') + + def generate_stream(self) -> ET.Element: + return self._generate('') + + def void(self, type): + b = random.randint(0, 100) + if b < self.void_probability: + return GAZ_VOID_TYPE + else: + return type + + + +class AstElement: + + def __init__(self, xml: ET.Element): + pass + + +class RoutineCall(AstElement): + + def __init__(self, xml: ET.Element, dependents=None, type=None): + """ + @brief initialise a routine call object + :param xml: + :param dependents: + """ + super().__init__(xml) + if dependents is None: + dependents = [] + else: + self.dependents = dependents + self.xml = xml + self.type = type + + +class Operator(AstElement): + + def __init__(self, xml: ET.Element, params: json): + super().__init__(xml) + for key, value in params.items(): + setattr(self, key, value) + + +def find_routines(AST: str): + """ + @brief find all of the routine and call elements in the ast + + @param AST: the AST to analyse + @return the list of routine elements + """ + xml = ET.fromstring(AST) + result = list[RoutineCall] + for node in xml: + if node.tag in [GAZ_PROCEDURE_TAG, GAZ_FUNCTION_TAG]: + routine = RoutineCall(node) + result.append(routine) + + +if __name__ == '__main__': + pass + # ast_gen = AstGenerator(GAZPREA_TOP_LEVEL, json.loads("{}")) + # out: ET.Element = ast_gen.generate_return("int") + # print(out) + + +# gen = AstGenerator("{}") +# assert is_valid_grammar(gazprea_ast_grammar.GAZPREA_TOP_LEVEL) +# +# gen.test_samples(gazprea_ast_grammar.GAZPREA_TOP_LEVEL, iterations=100) +# +# initial_grammar = convert_ebnf_grammar(gazprea_ast_grammar.GAZPREA_TOP_LEVEL) +# solver = ISLaSolver(initial_grammar) +# constants_tree_str = str(solver.solve()) +# print(constants_tree_str) \ No newline at end of file diff --git a/ast_generator/gazprea_ast_grammar.py b/ast_generator/gazprea_ast_grammar.py new file mode 100644 index 0000000..b48fd91 --- /dev/null +++ b/ast_generator/gazprea_ast_grammar.py @@ -0,0 +1,62 @@ +from fuzzingbook.Grammars import Grammar, is_valid_grammar, convert_ebnf_grammar +from isla.solver import ISLaSolver + +GAZPREA_TOP_LEVEL: Grammar = { + # Top level elements + '': [''], + '': ['blockblock'], + # TODO constants + + # Routines + '': ['', ''], # TODO forward_declaration + '': [ + 'function name="_NAME_" return_type="_TYPE_" args="_ARGS_"function'], + '': [ + 'procedure name="_NAME_" return_type="_TYPE_" args="_ARGS_"procedure'], + '': [ + 'procedure name="main" return_type="int" args="()"procedure'], + '': ['', ''], + + # Blocks + '': ['blockblock'], + '': ['blockblock'], + '': [ + '', + '', + # '', + # '', # TODO if/else, loop + ], + '': ['', ''], + + # Things that belong on their own lines + '': ['declarationdeclaration'], + '': [''], #, ''], + '': ['returnreturn'], + + '': ['stream type="std_output"stream'], + # '': ['stream type="std_input"stream'], + + # Things that are part of lines + '': ['', '', ''], + '': ['lhslhs'], + '': ['rhsrhs'], + + # Things that have values + '': ['operatoroperator'], + '': ['variable mut="_MODIFIER_" type="_TYPE_" name="_NAME_"'], + '': ['literal type="_TYPE_" value="_VALUE_"'], + + # Helper rules + '': ['<'], + '': ['>'], + '': ['/>'], + '': [' \ No newline at end of file diff --git a/ast_generator/test/xml/operator_1.xml b/ast_generator/test/xml/operator_1.xml new file mode 100644 index 0000000..129f6fc --- /dev/null +++ b/ast_generator/test/xml/operator_1.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/ast_generator/test/xml/routine_1.xml b/ast_generator/test/xml/routine_1.xml new file mode 100644 index 0000000..9875676 --- /dev/null +++ b/ast_generator/test/xml/routine_1.xml @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/ast_generator/test/xml/routine_2.xml b/ast_generator/test/xml/routine_2.xml new file mode 100644 index 0000000..9986a1c --- /dev/null +++ b/ast_generator/test/xml/routine_2.xml @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/ast_generator/test/xml/routine_3.xml b/ast_generator/test/xml/routine_3.xml new file mode 100644 index 0000000..dee2a9c --- /dev/null +++ b/ast_generator/test/xml/routine_3.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/ast_generator/test/xml/stream_1.xml b/ast_generator/test/xml/stream_1.xml new file mode 100644 index 0000000..a884681 --- /dev/null +++ b/ast_generator/test/xml/stream_1.xml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/ast_generator/test/xml/variable_1.xml b/ast_generator/test/xml/variable_1.xml new file mode 100644 index 0000000..a61ed5f --- /dev/null +++ b/ast_generator/test/xml/variable_1.xml @@ -0,0 +1 @@ + diff --git a/ast_parser/__init__.py b/ast_parser/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ast_parser/ast_parser.py b/ast_parser/ast_parser.py new file mode 100644 index 0000000..2032591 --- /dev/null +++ b/ast_parser/ast_parser.py @@ -0,0 +1,165 @@ +import os +import shutil +import xml.etree.ElementTree as ET + + +def to_gazprea_type(ty: str): + if ty == "int": + return "integer" + elif ty == "bool": + return "boolean" + elif ty == "string": + return "string" + elif ty == 'void': + return 'void' + else: + raise Exception("Unknown type: " + ty) + + +class AstParser: + def __init__(self, input: str or ET.Element, from_xml: bool = False): + if from_xml: + self.xml = input + self.input = None + else: + self.input = input + self.xml = None + + self.indentation = 0 + + + def parse(self): + if os.path.isdir("/home/stormblessed/Code/gazprea_fuzzer_v0.2/ast_parser/test/.tmp"): + os.system("rm -rf /home/stormblessed/Code/gazprea_fuzzer_v0.2/ast_parser/test/.tmp") + os.mkdir("/home/stormblessed/Code/gazprea_fuzzer_v0.2/ast_parser/test/.tmp") + else: + os.mkdir("/home/stormblessed/Code/gazprea_fuzzer_v0.2/ast_parser/test/.tmp") + with open("/home/stormblessed/Code/gazprea_fuzzer_v0.2/ast_parser/test/.tmp/input.in", "x") as f: + f.write(self.input) + os.system("/home/stormblessed/.local/bin/gazc " + "/home/stormblessed/Code/gazprea_fuzzer_v0.2/ast_parser/test/.tmp/input.in " + "/home/stormblessed/Code/gazprea_fuzzer_v0.2/ast_parser/test/.tmp/output.out " + "/home/stormblessed/Code/gazprea_fuzzer_v0.2/ast_parser/test/.tmp/xml.xml") + self.xml = ET.parse(".tmp/xml.xml") + + def unparse(self): + """ + @brief unparses the xml into valid gazprea code + + :return: a string of valid gazprea code + """ + self.input = "" + for node in self.xml: + self._unparse_node(node) + + def _unparse_node(self, node): + if node.tag not in ["variable", "rhs", "lhs", "literal", "operator"]: + self.input += " " * self.indentation + + if node.tag == "block": + self._block_unparse(node) + elif node.tag == "declaration": + self._declaration_unparse(node) + elif node.tag == "return": + self._return_unparse(node) + elif node.tag == "operator": + self._operator_unparse(node) + elif node.tag == "stream": + self._stream_unparse(node) + elif node.tag == "literal": + self._literal_unparse(node) + elif node.tag == "procedure" or node.tag == "function": + self._routine_unparse(node) + elif node.tag == "variable": + self._variable_unparse(node) + elif node.tag == "rhs" or node.tag == "lhs": + self._xhs_unparse(node) + elif node.tag == "literal": + self._literal_unparse(node) + else: + raise Exception("Unknown tag: " + node.tag) + + def _block_unparse(self, node): + self.input += "{\n" + self.indentation += 4 + for child in node: + self._unparse_node(child) + self.indentation -= 4 + self.input += "}\n\n" + + def _declaration_unparse(self, node): + variable = node.find("variable") + rhs = node.find("rhs") + self._variable_unparse(variable, True) + self.input += "=" + self._unparse_node(rhs) + self.input += ";\n" + + def _variable_unparse(self, node, is_declaration = False): + if is_declaration: + mut = node.get("mut") + type = to_gazprea_type(node.get("type")) + name = node.get("name") + + self.input += "{} {} {} ".format(mut, type, name) + else: + self.input += " {} ".format(node.get("name")) + + def _stream_unparse(self, node): + for child in node: + self._unparse_node(child) + + self.input += "-> {};\n".format(node.get("type")) + + def _literal_unparse(self, node): + self.input += " {} ".format(node.get("value")) + + def _xhs_unparse(self, node): + for child in node: + self._unparse_node(child) + + def _operator_unparse(self, node): + self._xhs_unparse(node.find("lhs")) + self.input += "{}".format(node.get("op")) + self._xhs_unparse(node.find("rhs")) + + def _return_unparse(self, node): + self.input += "return" + for child in node: + self._unparse_node(child) + self.input += ";\n" + + def _routine_unparse(self, node): + return_type = "" + if node.get("return_type") != "": + return_type = "returns " + to_gazprea_type(node.get("return_type")) + + self.input += "{} {}{} {} ".format( + node.tag, + node.get("name"), + node.get("args"), + return_type, + ) + + for child in node: + self._unparse_node(child) + + # self.input += "}\n\n" #blocks are already there + + +if __name__ == '__main__': + input = """ + function art() returns integer { + return 3; + } + procedure main() returns integer { + integer b = art(); + integer a = 1; + a * 42 -> std_output; + return 0; + } + """ + + parser = AstParser(input) + parser.parse() + diff --git a/ast_parser/ast_solver.py b/ast_parser/ast_solver.py new file mode 100644 index 0000000..479ae6e --- /dev/null +++ b/ast_parser/ast_solver.py @@ -0,0 +1,13 @@ +import json + + +class AstSolver: + + def __init__(self, ast: str, params: str): + json.loads(params) + self.ast = ast + for key, value in json.loads(params).items(): + setattr(self, key, value) + + def fix_missing_locations(self, AST): + pass diff --git a/ast_parser/params.json b/ast_parser/params.json new file mode 100644 index 0000000..7ec3d00 --- /dev/null +++ b/ast_parser/params.json @@ -0,0 +1,14 @@ +{ + "num_vars": 10, + "num_globals": 10, + "num_functions": 2, + "num_procedures": 2, + + "arith_ops": 60, + "compare_ops": 30, + "outstream_ops": 50, + "instream_ops": 10, + + "largest_number": 900000, + "smallest_number": -900000 +} \ No newline at end of file diff --git a/ast_parser/test/__init__.py b/ast_parser/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ast_parser/test/input.in b/ast_parser/test/input.in new file mode 100644 index 0000000..558f12a --- /dev/null +++ b/ast_parser/test/input.in @@ -0,0 +1,6 @@ +procedure main() returns integer { + var integer a = 1 ; + a * 42 -> std_output; + return 0 ; +} + diff --git a/ast_parser/test/test.xml b/ast_parser/test/test.xml new file mode 100644 index 0000000..2d1ddb2 --- /dev/null +++ b/ast_parser/test/test.xml @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/ast_parser/test/test_parse_code.py b/ast_parser/test/test_parse_code.py new file mode 100644 index 0000000..0b90547 --- /dev/null +++ b/ast_parser/test/test_parse_code.py @@ -0,0 +1,107 @@ +import unittest + +from ast_parser.ast_parser import AstParser + + +class TestParseCode(unittest.TestCase): + def test_parse_code(self): + input = """ + procedure main() returns integer { + integer a = 1; + a * 42 -> std_output; + return 0; + } + """ + + parser = AstParser(input) + parser.parse() + self.assertIsNotNone(parser.xml) + self.assertEqual(parser.xml.getroot().tag, "block") + + def test_unparse_variable_regular(self): + input = '' + parser = AstParser(input, True) + parser.input = "" + parser._unparse_node(parser.xml) + self.assertIsNotNone(parser.input) + self.assertEqual(" a ", parser.input) + + def test_unparse_variable_declaration(self): + input = '' + parser = AstParser(input, True) + parser.input = "" + parser._variable_unparse(parser.xml, True) + self.assertIsNotNone(parser.input) + self.assertEqual("var integer a ", parser.input) + + + def test_unparse_rhs_single(self): + input = '' + parser = AstParser(input, True) + parser.input = "" + parser._unparse_node(parser.xml) + self.assertIsNotNone(parser.input) + self.assertEqual(" 1 ", parser.input) + + def test_unparse_declaration(self): + input = '' + parser = AstParser(input, True) + parser.input = "" + parser._unparse_node(parser.xml) + self.assertIsNotNone(parser.input) + self.assertEqual("var integer a = 1 ;\n", parser.input) + + def test_unparse_stream(self): + input = ' ' + parser = AstParser(input, True) + parser.input = "" + parser._unparse_node(parser.xml) + self.assertIsNotNone(parser.input) + self.assertEqual(" a * 42 -> std_output;\n", parser.input) + + def test_unparse_block(self): + input = ' ' + parser = AstParser(input, True) + parser.input = "" + parser._unparse_node(parser.xml) + self.assertIsNotNone(parser.input) + self.assertEqual("{\n var integer a = 1 ;\n a * 42 -> std_output;\n return 0 ;\n}\n\n", parser.input) + + def test_unparse_operation_single(self): + input = ' ' + parser = AstParser(input, True) + parser.input = "" + parser._unparse_node(parser.xml) + self.assertIsNotNone(parser.input) + self.assertEqual(" a * 42 ", parser.input) + + def test_unparse_return(self): + input = ' ' + parser = AstParser(input, True) + parser.input = "" + parser._unparse_node(parser.xml) + self.assertIsNotNone(parser.input) + self.assertEqual("return 0 ;\n", parser.input) + + def test_unparse_routine(self): + input = '' + parser = AstParser(input, True) + parser.input = "" + parser._unparse_node(parser.xml) + self.assertIsNotNone(parser.input) + i = ' ' * parser.indentation + self.assertEqual("procedure main() returns integer {\n var integer a = 1 ;\n a * 42 -> std_output;\n return 0 ;\n}\n\n", parser.input) + + def test_unparse_code(self): + with open("test.xml", "r") as input: + parser = AstParser(input.read(), True) + parser.unparse() + self.assertIsNotNone(parser.input) + + with open("input.in", "r") as input: + i = input.read() + self.assertEqual(i, parser.input) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..da683c6 --- /dev/null +++ b/config.yaml @@ -0,0 +1,70 @@ +# The default configuration for the Gazprea Fuzzer +--- +generation-options: + max-nesting-depth: 5 # maximum nesting depth for statements + max-conditionals-loops: 5 # maximum number of loops/conditionals per routine +properties: + max-range-length: 5 # maximum length of ranges, vectors and tuples, (axa matrices can exist) + id-length: # length of identifiers + min: 1 + max: 10 + function-name-length: # length of function names + min: 1 + max: 10 +expression-weights: # weights for expressions + # the higher a weight, the more likely (0, +inf) + brackets: 10 + + # arithmetic + addition: 80 + subtraction: 80 + multiplication: 30 + division: 10 + modulo: 10 + power: 5 + negation: 20 + + # comparison + equality: 50 + inequality: 50 + less-than: 30 + greater-than: 30 + less-than-or-equal: 10 + greater-than-or-equal: 10 + + # logical + and: 50 + or: 50 + not: 10 + xor: 10 + + # vector/string + generator: 20 + range: 30 + filter: 10 + reverse: 10 + concatenation: 50 + + +statement-weights: # set to 0 for any statements you wish to exclude + variable-declaration: + int-declaration: 50 + float-declaration: 50 + char-declaration: 50 + string-declaration: 50 + bool-declaration: 50 + vector-declaration: 20 + tuple-declaration: 10 + matrix-declaration: 10 + function-call: 20 + conditional: 30 + loop: 20 + assignment: 40 + print: 20 + input: 5 + + return: 5 # probability for a return statement to be placed arbitrarily in the middle of a generated procedure + +block-termination-probability: 0.2 # probability for a block to terminate + + diff --git a/fuzzer.py b/fuzzer.py new file mode 100644 index 0000000..b40de79 --- /dev/null +++ b/fuzzer.py @@ -0,0 +1,3 @@ +class GazpreaFuzzer: + def __init__(self, config: str, ): + pass \ No newline at end of file diff --git a/gazprea_fuzzer.py b/gazprea_fuzzer.py new file mode 100644 index 0000000..fd51e5c --- /dev/null +++ b/gazprea_fuzzer.py @@ -0,0 +1,83 @@ +import json +from xml import etree + +from fuzzingbook import Grammars +from fuzzingbook.Grammars import is_valid_grammar +from isla.solver import ISLaSolver + +import ast_generator.ast_generator +from ast_parser.ast_solver import AstSolver +from ast_parser.ast_parser import AstParser +from ast_generator import gazprea_ast_grammar +import xml.etree.ElementTree as ET +import xml.dom.minidom +import ast_parser + + + +class GazpreaFuzzer(ISLaSolver): + """Produce Gazprea code""" + def __init__(self, + grammar: Grammars, + start_symbol: str = "", + constraint: str = "", + **kwargs) -> None: + """ + @brief initialize a Gazprea code generator + + :param grammar: the grammar from which you would like to generate code + :param start_symbol: the start symbol of the grammar (default "") + :param constraint: any constraints that you would like to impose on the solver + :param kwargs: any extra arguments passed to the ISLaSolver + """ + assert start_symbol in grammar + assert is_valid_grammar(grammar) + + super().__init__(grammar, constraint, start_symbol=start_symbol, **kwargs) + + def fuzz(self) -> str: + """Produce the hecking code""" + AST = AstParser(eval(str(self.solve())), from_xml=True) + AstSolver.fix_missing_locations(AST) + AST.unparse() + + return AST.input + + +if __name__ == '__main__': + gen = ast_generator.ast_generator.AstGenerator(gazprea_ast_grammar.GAZPREA_TOP_LEVEL, json.loads('{}')) + with open("debug/test.xml", 'w') as t: + et = gen.generate_ast() + dom = xml.dom.minidom.parseString(ET.tostring(et).decode('utf-8')) + pretty: str = dom.toprettyxml() + repretty = "" + for line in pretty.split('\n'): + if line.startswith("