338 lines
10 KiB
Python
338 lines
10 KiB
Python
import json
|
|
import random
|
|
|
|
from fuzzingbook.Grammars import is_valid_grammar, convert_ebnf_grammar, Grammar
|
|
from isla.solver import ISLaSolver
|
|
|
|
# from gazprea_ast_grammar import GAZPREA_TOP_LEVEL
|
|
# import gazprea_ast_grammar
|
|
from ast_parser.ast_parser import AstParser
|
|
|
|
import xml.etree.ElementTree as ET
|
|
|
|
GAZ_VOID_TYPE = 'void'
|
|
|
|
VOID_TYPE = 'void'
|
|
GAZ_BLOCK_TAG = 'block'
|
|
GAZ_RHS_TAG = 'lhs'
|
|
GAZ_RHS_TAG = 'rhs'
|
|
GAZ_RETURN_KEY = "return_type"
|
|
VAR_NAMES = ['alsdjf', 'asldfjlks', 'qowiuut', 'qoiur', 'qwioue', 'qoyiyut', 'llkjfg', 'kdjkdjf', 'asdjkfeey',
|
|
'jdhjfjheee']
|
|
ROUTINE_NAMES = ['bees', 'beans', 'hell']
|
|
GAZ_INT_OPS = ['*', '+', '-', '/', '%']
|
|
GAZ_TYPES = ['int']
|
|
GAZ_FLOAT_KEY = 'float'
|
|
GAZ_INT_KEY = 'int'
|
|
GAZ_FUNCTION_TAG = 'function'
|
|
GAZ_PROCEDURE_TAG = 'procedure'
|
|
GAZ_OPERATOR_TAG = "operator"
|
|
GAZ_LIT_TAG = "literal"
|
|
GAZ_VAR_TAG = "variable"
|
|
GAZ_OP_KEY = "op"
|
|
GAZ_NAME_KEY = "name"
|
|
GAZ_QUALIFIER_KEY = "mut"
|
|
GAZ_VAL_KEY = "value"
|
|
GAZ_TY_KEY = "type"
|
|
ANY_TYPE = "any"
|
|
|
|
|
|
def find_variables(AST):
|
|
pass
|
|
|
|
|
|
def set_variables(variable_names, variables):
|
|
pass
|
|
|
|
|
|
def set_routines(routine_names, routines):
|
|
pass
|
|
|
|
|
|
def type_check(AST, routines, variables):
|
|
pass
|
|
|
|
|
|
class AstGenerator:
|
|
|
|
def __init__(self, grammar: Grammar, params: json):
|
|
self.void_probability = 20
|
|
self.int_low = -2 ** 30
|
|
self.int_high = 2 ** 32 - 1
|
|
self.valid_var_names = VAR_NAMES
|
|
self.max_number_of_vars = 10
|
|
self.valid_routine_names = ROUTINE_NAMES
|
|
self.max_number_of_routines = 3
|
|
self.qualifier_probability = False
|
|
self.var_qualifier_probability = None
|
|
self.const_qualifier_probability = None
|
|
for key, value in params.items():
|
|
setattr(self, key, value)
|
|
|
|
if self.var_qualifier_probability is not None and self.const_qualifier_probability is not None:
|
|
self.qualifier_probability = True
|
|
self.ast_list = []
|
|
self.functions = []
|
|
assert (is_valid_grammar(grammar))
|
|
self.grammar = grammar
|
|
|
|
def fix_missing_locations(self, AST: str) -> str:
|
|
variable_names = self.get_variable_list()
|
|
routine_names = self.get_routine_list()
|
|
|
|
routines = find_routines(AST)
|
|
variables = find_variables(AST)
|
|
|
|
set_variables(variable_names, variables) # insert types and values
|
|
set_routines(routine_names, routines) # insert types
|
|
type_check(AST, routines, variables)
|
|
|
|
|
|
def test_samples(self, grammar: Grammar, iterations: int = 10, start_symbol=None, log: bool = True):
|
|
g = convert_ebnf_grammar(grammar)
|
|
solver = ISLaSolver(g, start_symbol=start_symbol, max_number_free_instantiations=iterations)
|
|
for i in range(iterations):
|
|
tree_str = str(solver.solve())
|
|
print(tree_str)
|
|
# tree = eval(tree_str)
|
|
# print(tree)
|
|
# tree = self.fix_missing_locations(tree)
|
|
# ast = AstParser(tree, from_xml=True)
|
|
# if log:
|
|
# ast.unparse()
|
|
# code = ast.input
|
|
# print(f'{code:40} # {tree_str}')
|
|
|
|
def get_variable_list(self):
|
|
pass
|
|
|
|
def populate_operator(self, operator: ET.Element, op, type):
|
|
operator.set(GAZ_OP_KEY, op)
|
|
operator.set(GAZ_TY_KEY, type)
|
|
for node in operator:
|
|
node = self.populate(node, type)
|
|
|
|
return operator
|
|
|
|
def populate_stream(self, stream: ET.Element, type):
|
|
stream.set(GAZ_TY_KEY, type)
|
|
for node in stream:
|
|
node = self.populate(node, ANY_TYPE)
|
|
|
|
return stream
|
|
|
|
def populate_literal(self, literal: ET.Element, type, value):
|
|
literal.set(GAZ_TY_KEY, type)
|
|
literal.set(GAZ_VAL_KEY, value)
|
|
return literal
|
|
|
|
def populate_variable(self, variable: ET.Element, qualifier, type, name):
|
|
variable.set(GAZ_QUALIFIER_KEY, qualifier)
|
|
variable.set(GAZ_TY_KEY, type)
|
|
variable.set(GAZ_NAME_KEY, name)
|
|
return variable
|
|
|
|
def populate_routine(self, routine: ET.Element, type, name):
|
|
routine.set(GAZ_RETURN_KEY, type)
|
|
if routine.get("name") != "main":
|
|
routine.set(GAZ_NAME_KEY, name)
|
|
|
|
if routine.tag == GAZ_PROCEDURE_TAG and type != VOID_TYPE:
|
|
routine.find("block").append(self.generate_return(type))
|
|
|
|
for block in routine:
|
|
for node in block:
|
|
if node.tag != "return":
|
|
node =self.populate(node, ANY_TYPE)
|
|
else:
|
|
node.set("type", type)
|
|
|
|
return routine
|
|
|
|
def populate_block(self, element):
|
|
for node in element:
|
|
return self.populate(node, ANY_TYPE)
|
|
|
|
def populate_xhs(self, side: ET.Element, type):
|
|
for node in side:
|
|
return self.populate(node, type)
|
|
|
|
def populate_ast(self, ast: ET.Element):
|
|
populated = self.generate_block()
|
|
for node in ast:
|
|
populated.append(self.populate(node, ANY_TYPE))
|
|
|
|
return populated
|
|
|
|
# def populate_assignment(self, name, type):
|
|
|
|
def populate(self, element: ET.Element, type: str):
|
|
if type == ANY_TYPE:
|
|
type = GAZ_TYPES[random.randint(0, len(GAZ_TYPES) - 1)]
|
|
|
|
if element.tag == GAZ_VAR_TAG:
|
|
return self.populate_variable(element, self.get_qualifier(), type, self.get_name(GAZ_VAR_TAG))
|
|
elif element.tag == GAZ_LIT_TAG:
|
|
return self.populate_literal(element, type, self.get_value(type))
|
|
elif element.tag == GAZ_OPERATOR_TAG:
|
|
return self.populate_operator(element, self.get_op(type), type)
|
|
elif element.tag == GAZ_FUNCTION_TAG:
|
|
return self.populate_routine(element, type, self.get_name(element.tag))
|
|
elif element.tag == GAZ_PROCEDURE_TAG:
|
|
type = self.void(type)
|
|
return self.populate_routine(element, type, self.get_name(element.tag))
|
|
elif element.tag in [GAZ_RHS_TAG, GAZ_RHS_TAG]:
|
|
return self.populate_xhs(element, type)
|
|
elif element.tag == GAZ_BLOCK_TAG:
|
|
return self.populate_block(element)
|
|
|
|
def get_qualifier(self):
|
|
var_weight: int = 80
|
|
const_weight: int = 20
|
|
if self.qualifier_probability:
|
|
var_weight = self.var_qualifier_probability
|
|
const_weight = self.const_qualifier_probability
|
|
|
|
a = random.randint(0, var_weight + const_weight)
|
|
if a in range(0, var_weight):
|
|
return 'var'
|
|
elif a in range(var_weight, var_weight + const_weight):
|
|
return 'const'
|
|
else:
|
|
raise ValueError("Internal Error, please report the stack trace to me")
|
|
|
|
def get_value(self, type):
|
|
if type == GAZ_INT_KEY:
|
|
return random.randint(self.int_low, self.int_high)
|
|
else:
|
|
raise TypeError("Unimplemented generator for type: " + type)
|
|
|
|
def get_name(self, name_type):
|
|
if name_type == GAZ_VAR_TAG:
|
|
return VAR_NAMES[random.randint(0, self.max_number_of_vars - 1)]
|
|
elif name_type in [GAZ_PROCEDURE_TAG, GAZ_FUNCTION_TAG]:
|
|
r_name = ROUTINE_NAMES[random.randint(0, len(ROUTINE_NAMES) - 1)]
|
|
self.functions.append(r_name)
|
|
return r_name
|
|
|
|
def get_op(self, type):
|
|
if type == GAZ_INT_KEY:
|
|
# TODO make this a parameter for peiple to change
|
|
return GAZ_INT_OPS[random.randint(0, len(GAZ_INT_OPS) - 1)]
|
|
else:
|
|
raise TypeError("Unimplemented type: " + type)
|
|
|
|
def _generate(self, element: str or None) -> ET.Element:
|
|
initial_grammar = convert_ebnf_grammar(self.grammar)
|
|
solver = ISLaSolver(initial_grammar, start_symbol=element)
|
|
ast_str = str(solver.solve())
|
|
|
|
print(ast_str)
|
|
elem = ET.fromstring(ast_str)
|
|
return elem
|
|
|
|
def generate_ast(self) -> ET.Element:
|
|
return self._generate(None)
|
|
|
|
|
|
def generate_return(self, type) -> ET.Element:
|
|
elem = self._generate('<return>')
|
|
elem.set(GAZ_TY_KEY, type)
|
|
|
|
return elem
|
|
|
|
def generate_literal(self) -> ET.Element:
|
|
return self._generate('<literal>')
|
|
|
|
def generate_variable(self) -> ET.Element:
|
|
return self._generate('<variable>')
|
|
|
|
def generate_operator(self) -> ET.Element:
|
|
return self._generate('<operator>')
|
|
|
|
def generate_block(self) -> ET.Element:
|
|
return self._generate('<block>')
|
|
|
|
def generate_routine(self) -> ET.Element:
|
|
return self._generate('<routine>')
|
|
|
|
def generate_main_routine(self) -> ET.Element:
|
|
return self._generate('<main_routine>')
|
|
|
|
def generate_declaration(self) -> ET.Element:
|
|
return self._generate('<declaration>')
|
|
|
|
def generate_stream(self) -> ET.Element:
|
|
return self._generate('<stream>')
|
|
|
|
def void(self, type):
|
|
b = random.randint(0, 100)
|
|
if b < self.void_probability:
|
|
return GAZ_VOID_TYPE
|
|
else:
|
|
return type
|
|
|
|
|
|
|
|
class AstElement:
|
|
|
|
def __init__(self, xml: ET.Element):
|
|
pass
|
|
|
|
|
|
class RoutineCall(AstElement):
|
|
|
|
def __init__(self, xml: ET.Element, dependents=None, type=None):
|
|
"""
|
|
@brief initialise a routine call object
|
|
:param xml:
|
|
:param dependents:
|
|
"""
|
|
super().__init__(xml)
|
|
if dependents is None:
|
|
dependents = []
|
|
else:
|
|
self.dependents = dependents
|
|
self.xml = xml
|
|
self.type = type
|
|
|
|
|
|
class Operator(AstElement):
|
|
|
|
def __init__(self, xml: ET.Element, params: json):
|
|
super().__init__(xml)
|
|
for key, value in params.items():
|
|
setattr(self, key, value)
|
|
|
|
|
|
def find_routines(AST: str):
|
|
"""
|
|
@brief find all of the routine and call elements in the ast
|
|
|
|
@param AST: the AST to analyse
|
|
@return the list of routine elements
|
|
"""
|
|
xml = ET.fromstring(AST)
|
|
result = list[RoutineCall]
|
|
for node in xml:
|
|
if node.tag in [GAZ_PROCEDURE_TAG, GAZ_FUNCTION_TAG]:
|
|
routine = RoutineCall(node)
|
|
result.append(routine)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
pass
|
|
# ast_gen = AstGenerator(GAZPREA_TOP_LEVEL, json.loads("{}"))
|
|
# out: ET.Element = ast_gen.generate_return("int")
|
|
# print(out)
|
|
|
|
|
|
# gen = AstGenerator("{}")
|
|
# assert is_valid_grammar(gazprea_ast_grammar.GAZPREA_TOP_LEVEL)
|
|
#
|
|
# gen.test_samples(gazprea_ast_grammar.GAZPREA_TOP_LEVEL, iterations=100)
|
|
#
|
|
# initial_grammar = convert_ebnf_grammar(gazprea_ast_grammar.GAZPREA_TOP_LEVEL)
|
|
# solver = ISLaSolver(initial_grammar)
|
|
# constants_tree_str = str(solver.solve())
|
|
# print(constants_tree_str) |