gazprea-fuzzer-python/ast_generator/ast_generator.py

338 lines
10 KiB
Python
Raw Normal View History

2023-11-17 16:57:53 -07:00
import json
import random
from fuzzingbook.Grammars import is_valid_grammar, convert_ebnf_grammar, Grammar
from isla.solver import ISLaSolver
# from gazprea_ast_grammar import GAZPREA_TOP_LEVEL
# import gazprea_ast_grammar
from ast_parser.ast_parser import AstParser
import xml.etree.ElementTree as ET
GAZ_VOID_TYPE = 'void'
VOID_TYPE = 'void'
GAZ_BLOCK_TAG = 'block'
GAZ_RHS_TAG = 'lhs'
GAZ_RHS_TAG = 'rhs'
GAZ_RETURN_KEY = "return_type"
VAR_NAMES = ['alsdjf', 'asldfjlks', 'qowiuut', 'qoiur', 'qwioue', 'qoyiyut', 'llkjfg', 'kdjkdjf', 'asdjkfeey',
'jdhjfjheee']
ROUTINE_NAMES = ['bees', 'beans', 'hell']
GAZ_INT_OPS = ['*', '+', '-', '/', '%']
GAZ_TYPES = ['int']
GAZ_FLOAT_KEY = 'float'
GAZ_INT_KEY = 'int'
GAZ_FUNCTION_TAG = 'function'
GAZ_PROCEDURE_TAG = 'procedure'
GAZ_OPERATOR_TAG = "operator"
GAZ_LIT_TAG = "literal"
GAZ_VAR_TAG = "variable"
GAZ_OP_KEY = "op"
GAZ_NAME_KEY = "name"
GAZ_QUALIFIER_KEY = "mut"
GAZ_VAL_KEY = "value"
GAZ_TY_KEY = "type"
ANY_TYPE = "any"
def find_variables(AST):
pass
def set_variables(variable_names, variables):
pass
def set_routines(routine_names, routines):
pass
def type_check(AST, routines, variables):
pass
class AstGenerator:
def __init__(self, grammar: Grammar, params: json):
self.void_probability = 20
self.int_low = -2 ** 30
self.int_high = 2 ** 32 - 1
self.valid_var_names = VAR_NAMES
self.max_number_of_vars = 10
self.valid_routine_names = ROUTINE_NAMES
self.max_number_of_routines = 3
self.qualifier_probability = False
self.var_qualifier_probability = None
self.const_qualifier_probability = None
for key, value in params.items():
setattr(self, key, value)
if self.var_qualifier_probability is not None and self.const_qualifier_probability is not None:
self.qualifier_probability = True
self.ast_list = []
self.functions = []
assert (is_valid_grammar(grammar))
self.grammar = grammar
def fix_missing_locations(self, AST: str) -> str:
variable_names = self.get_variable_list()
routine_names = self.get_routine_list()
routines = find_routines(AST)
variables = find_variables(AST)
set_variables(variable_names, variables) # insert types and values
set_routines(routine_names, routines) # insert types
type_check(AST, routines, variables)
def test_samples(self, grammar: Grammar, iterations: int = 10, start_symbol=None, log: bool = True):
g = convert_ebnf_grammar(grammar)
solver = ISLaSolver(g, start_symbol=start_symbol, max_number_free_instantiations=iterations)
for i in range(iterations):
tree_str = str(solver.solve())
print(tree_str)
# tree = eval(tree_str)
# print(tree)
# tree = self.fix_missing_locations(tree)
# ast = AstParser(tree, from_xml=True)
# if log:
# ast.unparse()
# code = ast.input
# print(f'{code:40} # {tree_str}')
def get_variable_list(self):
pass
def populate_operator(self, operator: ET.Element, op, type):
operator.set(GAZ_OP_KEY, op)
operator.set(GAZ_TY_KEY, type)
for node in operator:
node = self.populate(node, type)
return operator
def populate_stream(self, stream: ET.Element, type):
stream.set(GAZ_TY_KEY, type)
for node in stream:
node = self.populate(node, ANY_TYPE)
return stream
def populate_literal(self, literal: ET.Element, type, value):
literal.set(GAZ_TY_KEY, type)
literal.set(GAZ_VAL_KEY, value)
return literal
def populate_variable(self, variable: ET.Element, qualifier, type, name):
variable.set(GAZ_QUALIFIER_KEY, qualifier)
variable.set(GAZ_TY_KEY, type)
variable.set(GAZ_NAME_KEY, name)
return variable
def populate_routine(self, routine: ET.Element, type, name):
routine.set(GAZ_RETURN_KEY, type)
if routine.get("name") != "main":
routine.set(GAZ_NAME_KEY, name)
if routine.tag == GAZ_PROCEDURE_TAG and type != VOID_TYPE:
routine.find("block").append(self.generate_return(type))
for block in routine:
for node in block:
if node.tag != "return":
node =self.populate(node, ANY_TYPE)
else:
node.set("type", type)
return routine
def populate_block(self, element):
for node in element:
return self.populate(node, ANY_TYPE)
def populate_xhs(self, side: ET.Element, type):
for node in side:
return self.populate(node, type)
def populate_ast(self, ast: ET.Element):
populated = self.generate_block()
for node in ast:
populated.append(self.populate(node, ANY_TYPE))
return populated
# def populate_assignment(self, name, type):
def populate(self, element: ET.Element, type: str):
if type == ANY_TYPE:
type = GAZ_TYPES[random.randint(0, len(GAZ_TYPES) - 1)]
if element.tag == GAZ_VAR_TAG:
return self.populate_variable(element, self.get_qualifier(), type, self.get_name(GAZ_VAR_TAG))
elif element.tag == GAZ_LIT_TAG:
return self.populate_literal(element, type, self.get_value(type))
elif element.tag == GAZ_OPERATOR_TAG:
return self.populate_operator(element, self.get_op(type), type)
elif element.tag == GAZ_FUNCTION_TAG:
return self.populate_routine(element, type, self.get_name(element.tag))
elif element.tag == GAZ_PROCEDURE_TAG:
type = self.void(type)
return self.populate_routine(element, type, self.get_name(element.tag))
elif element.tag in [GAZ_RHS_TAG, GAZ_RHS_TAG]:
return self.populate_xhs(element, type)
elif element.tag == GAZ_BLOCK_TAG:
return self.populate_block(element)
def get_qualifier(self):
var_weight: int = 80
const_weight: int = 20
if self.qualifier_probability:
var_weight = self.var_qualifier_probability
const_weight = self.const_qualifier_probability
a = random.randint(0, var_weight + const_weight)
if a in range(0, var_weight):
return 'var'
elif a in range(var_weight, var_weight + const_weight):
return 'const'
else:
raise ValueError("Internal Error, please report the stack trace to me")
def get_value(self, type):
if type == GAZ_INT_KEY:
return random.randint(self.int_low, self.int_high)
else:
raise TypeError("Unimplemented generator for type: " + type)
def get_name(self, name_type):
if name_type == GAZ_VAR_TAG:
return VAR_NAMES[random.randint(0, self.max_number_of_vars - 1)]
elif name_type in [GAZ_PROCEDURE_TAG, GAZ_FUNCTION_TAG]:
r_name = ROUTINE_NAMES[random.randint(0, len(ROUTINE_NAMES) - 1)]
self.functions.append(r_name)
return r_name
def get_op(self, type):
if type == GAZ_INT_KEY:
# TODO make this a parameter for peiple to change
return GAZ_INT_OPS[random.randint(0, len(GAZ_INT_OPS) - 1)]
else:
raise TypeError("Unimplemented type: " + type)
def _generate(self, element: str or None) -> ET.Element:
initial_grammar = convert_ebnf_grammar(self.grammar)
solver = ISLaSolver(initial_grammar, start_symbol=element)
ast_str = str(solver.solve())
print(ast_str)
elem = ET.fromstring(ast_str)
return elem
def generate_ast(self) -> ET.Element:
return self._generate(None)
def generate_return(self, type) -> ET.Element:
elem = self._generate('<return>')
elem.set(GAZ_TY_KEY, type)
return elem
def generate_literal(self) -> ET.Element:
return self._generate('<literal>')
def generate_variable(self) -> ET.Element:
return self._generate('<variable>')
def generate_operator(self) -> ET.Element:
return self._generate('<operator>')
def generate_block(self) -> ET.Element:
return self._generate('<block>')
def generate_routine(self) -> ET.Element:
return self._generate('<routine>')
def generate_main_routine(self) -> ET.Element:
return self._generate('<main_routine>')
def generate_declaration(self) -> ET.Element:
return self._generate('<declaration>')
def generate_stream(self) -> ET.Element:
return self._generate('<stream>')
def void(self, type):
b = random.randint(0, 100)
if b < self.void_probability:
return GAZ_VOID_TYPE
else:
return type
class AstElement:
def __init__(self, xml: ET.Element):
pass
class RoutineCall(AstElement):
def __init__(self, xml: ET.Element, dependents=None, type=None):
"""
@brief initialise a routine call object
:param xml:
:param dependents:
"""
super().__init__(xml)
if dependents is None:
dependents = []
else:
self.dependents = dependents
self.xml = xml
self.type = type
class Operator(AstElement):
def __init__(self, xml: ET.Element, params: json):
super().__init__(xml)
for key, value in params.items():
setattr(self, key, value)
def find_routines(AST: str):
"""
@brief find all of the routine and call elements in the ast
@param AST: the AST to analyse
@return the list of routine elements
"""
xml = ET.fromstring(AST)
result = list[RoutineCall]
for node in xml:
if node.tag in [GAZ_PROCEDURE_TAG, GAZ_FUNCTION_TAG]:
routine = RoutineCall(node)
result.append(routine)
if __name__ == '__main__':
pass
# ast_gen = AstGenerator(GAZPREA_TOP_LEVEL, json.loads("{}"))
# out: ET.Element = ast_gen.generate_return("int")
# print(out)
# gen = AstGenerator("{}")
# assert is_valid_grammar(gazprea_ast_grammar.GAZPREA_TOP_LEVEL)
#
# gen.test_samples(gazprea_ast_grammar.GAZPREA_TOP_LEVEL, iterations=100)
#
# initial_grammar = convert_ebnf_grammar(gazprea_ast_grammar.GAZPREA_TOP_LEVEL)
# solver = ISLaSolver(initial_grammar)
# constants_tree_str = str(solver.solve())
# print(constants_tree_str)