Complete overhaul of the generation methods
Took 6 hours 24 minutes
This commit is contained in:
parent
2182395310
commit
839ec97166
|
@ -4,7 +4,8 @@ This is a hecking fuzzer. It does the thing.
|
|||
|
||||
## Requirements
|
||||
- Python 3.11
|
||||
- NumPy
|
||||
- ISLa Solver (`pip install isla-solver`)
|
||||
- English Words (`pip install english-words`) (so that you don't have an anurism while reading random names)
|
||||
|
||||
## Usage
|
||||
|
||||
|
|
|
@ -1,338 +1,701 @@
|
|||
import json
|
||||
import random
|
||||
|
||||
from fuzzingbook.Grammars import is_valid_grammar, convert_ebnf_grammar, Grammar
|
||||
from isla.solver import ISLaSolver
|
||||
|
||||
# from gazprea_ast_grammar import GAZPREA_TOP_LEVEL
|
||||
# import gazprea_ast_grammar
|
||||
from ast_parser.ast_parser import AstParser
|
||||
|
||||
import string
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
GAZ_VOID_TYPE = 'void'
|
||||
from english_words import get_english_words_set
|
||||
|
||||
VOID_TYPE = 'void'
|
||||
GAZ_BLOCK_TAG = 'block'
|
||||
GAZ_RHS_TAG = 'lhs'
|
||||
GAZ_RHS_TAG = 'rhs'
|
||||
GAZ_RETURN_KEY = "return_type"
|
||||
VAR_NAMES = ['alsdjf', 'asldfjlks', 'qowiuut', 'qoiur', 'qwioue', 'qoyiyut', 'llkjfg', 'kdjkdjf', 'asdjkfeey',
|
||||
'jdhjfjheee']
|
||||
ROUTINE_NAMES = ['bees', 'beans', 'hell']
|
||||
GAZ_INT_OPS = ['*', '+', '-', '/', '%']
|
||||
GAZ_TYPES = ['int']
|
||||
GAZ_FLOAT_KEY = 'float'
|
||||
GAZ_INT_KEY = 'int'
|
||||
GAZ_FUNCTION_TAG = 'function'
|
||||
GAZ_PROCEDURE_TAG = 'procedure'
|
||||
GAZ_OPERATOR_TAG = "operator"
|
||||
GAZ_LIT_TAG = "literal"
|
||||
GAZ_VAR_TAG = "variable"
|
||||
GAZ_OP_KEY = "op"
|
||||
GAZ_NAME_KEY = "name"
|
||||
GAZ_QUALIFIER_KEY = "mut"
|
||||
GAZ_VAL_KEY = "value"
|
||||
GAZ_TY_KEY = "type"
|
||||
ANY_TYPE = "any"
|
||||
|
||||
|
||||
def find_variables(AST):
|
||||
pass
|
||||
|
||||
|
||||
def set_variables(variable_names, variables):
|
||||
pass
|
||||
|
||||
|
||||
def set_routines(routine_names, routines):
|
||||
pass
|
||||
|
||||
|
||||
def type_check(AST, routines, variables):
|
||||
pass
|
||||
from ast_generator.constants import *
|
||||
|
||||
|
||||
class AstGenerator:
|
||||
"""
|
||||
Generates an AST from a grammar based on given settings
|
||||
|
||||
def __init__(self, grammar: Grammar, params: json):
|
||||
self.void_probability = 20
|
||||
self.int_low = -2 ** 30
|
||||
self.int_high = 2 ** 32 - 1
|
||||
self.valid_var_names = VAR_NAMES
|
||||
self.max_number_of_vars = 10
|
||||
self.valid_routine_names = ROUTINE_NAMES
|
||||
self.max_number_of_routines = 3
|
||||
self.qualifier_probability = False
|
||||
self.var_qualifier_probability = None
|
||||
self.const_qualifier_probability = None
|
||||
for key, value in params.items():
|
||||
setattr(self, key, value)
|
||||
Originally the intention was to use the ISLa library to generate
|
||||
the AST, however I found that ISLa is like taking a buldozer to
|
||||
a sledgehammer's job, so I decided to write a procedural generator
|
||||
instead.
|
||||
|
||||
if self.var_qualifier_probability is not None and self.const_qualifier_probability is not None:
|
||||
self.qualifier_probability = True
|
||||
self.ast_list = []
|
||||
self.functions = []
|
||||
assert (is_valid_grammar(grammar))
|
||||
self.grammar = grammar
|
||||
The way we select elements is we take all the settings in their
|
||||
category and assign them a range on a number line. Then we
|
||||
pick a random number in that range and whichever category it
|
||||
falls into will be selected.
|
||||
"""
|
||||
|
||||
def fix_missing_locations(self, AST: str) -> str:
|
||||
variable_names = self.get_variable_list()
|
||||
routine_names = self.get_routine_list()
|
||||
def __init__(self, settings: dict):
|
||||
"""
|
||||
This class is designed to get the settings from some wrapper class that
|
||||
better defines the precise constraints of the language being generated
|
||||
|
||||
routines = find_routines(AST)
|
||||
variables = find_variables(AST)
|
||||
the necessary settings are in the .yaml file and #TODO this is not generalizable yet
|
||||
|
||||
set_variables(variable_names, variables) # insert types and values
|
||||
set_routines(routine_names, routines) # insert types
|
||||
type_check(AST, routines, variables)
|
||||
@param settings: settings for weights and probabilities and lengths
|
||||
"""
|
||||
self.settings = settings
|
||||
|
||||
self.symbol_table = [] # TODO this should be a list of scopes
|
||||
global_scope = Scope(None, None)
|
||||
self.symbol_table.append(global_scope) # NOTE for debug
|
||||
self.current_scope = global_scope
|
||||
|
||||
def test_samples(self, grammar: Grammar, iterations: int = 10, start_symbol=None, log: bool = True):
|
||||
g = convert_ebnf_grammar(grammar)
|
||||
solver = ISLaSolver(g, start_symbol=start_symbol, max_number_free_instantiations=iterations)
|
||||
for i in range(iterations):
|
||||
tree_str = str(solver.solve())
|
||||
print(tree_str)
|
||||
# tree = eval(tree_str)
|
||||
# print(tree)
|
||||
# tree = self.fix_missing_locations(tree)
|
||||
# ast = AstParser(tree, from_xml=True)
|
||||
# if log:
|
||||
# ast.unparse()
|
||||
# code = ast.input
|
||||
# print(f'{code:40} # {tree_str}')
|
||||
names = get_english_words_set(['web2'], lower=True)
|
||||
possible_names = filter(lambda x: self.settings['properties']['id-length']['min'] < len(x)
|
||||
< self.settings['properties']['id-length']['max'], names)
|
||||
|
||||
def get_variable_list(self):
|
||||
var_name_len = len(list(possible_names))
|
||||
var_name_list = list(possible_names)
|
||||
self.variable_names = var_name_list[0:var_name_len // 2]
|
||||
self.routine_names = var_name_list[var_name_len // 2:var_name_len]
|
||||
|
||||
self.ast: ET.Element or None = None
|
||||
self.current_ast_element: ET.Element or None = None
|
||||
self.current_nesting_depth = 0
|
||||
self.current_control_flow_nesting_depth = 0
|
||||
|
||||
def generate_ast(self):
|
||||
"""
|
||||
@brief generates an AST from a grammar
|
||||
"""
|
||||
self.generate_top_level_block()
|
||||
|
||||
def generate_top_level_block(self): # TODO add constant generation into this block
|
||||
i = 0
|
||||
|
||||
element = build_xml_element([], name=GAZ_BLOCK_TAG)
|
||||
self.current_ast_element = element
|
||||
|
||||
self.ast = element
|
||||
|
||||
# optional constants here too
|
||||
|
||||
self.generate_main()
|
||||
while i < self.settings['generation-options']['max-number-of-routines']:
|
||||
if random.random() < self.settings['block-termination-probability']:
|
||||
break
|
||||
self.generate_routine()
|
||||
i += 1
|
||||
|
||||
def generate_main(self):
|
||||
parent = self.current_ast_element
|
||||
self.push_scope()
|
||||
main_args = [ # TODO refactor these into constants
|
||||
("name", "main"),
|
||||
("return_type", GAZ_INT_KEY),
|
||||
("args", "()"),
|
||||
]
|
||||
element = build_xml_element(main_args, name=GAZ_PROCEDURE_TAG)
|
||||
self.current_ast_element.append(element)
|
||||
self.current_ast_element = element
|
||||
self.generate_block(return_stmt=True, return_value="0", return_type=GAZ_INT_KEY)
|
||||
self.pop_scope()
|
||||
self.current_ast_element = parent
|
||||
|
||||
def generate_block(self, tag=None, return_stmt=False, return_value=None, return_type=None):
|
||||
if tag is None:
|
||||
tag = []
|
||||
parent = self.current_ast_element
|
||||
self.push_scope()
|
||||
element = build_xml_element(tag, name=GAZ_BLOCK_TAG)
|
||||
self.current_ast_element.append(element)
|
||||
self.current_ast_element = element
|
||||
self.generate_statements()
|
||||
if return_stmt:
|
||||
self.generate_return(return_type=return_type, return_value=return_value)
|
||||
if self.settings['generation-options']['generate-dead-code']:
|
||||
self.generate_statements()
|
||||
self.pop_scope()
|
||||
self.current_ast_element = parent
|
||||
|
||||
def generate_return(self, return_type=None, return_value=None):
|
||||
if return_type is None or return_type == GAZ_VOID_TYPE:
|
||||
self.current_ast_element.append(build_xml_element([], name=GAZ_RETURN_TAG))
|
||||
return
|
||||
else:
|
||||
if return_value is None:
|
||||
xml_element = build_xml_element([("type", return_type)], name=GAZ_RETURN_TAG)
|
||||
self.current_ast_element.append(xml_element)
|
||||
parent = self.current_ast_element
|
||||
self.current_ast_element = xml_element
|
||||
self.generate_expression(return_type)
|
||||
self.current_ast_element = parent
|
||||
return
|
||||
else:
|
||||
xml_element = build_xml_element([("type", return_type)], name=GAZ_RETURN_TAG)
|
||||
self.current_ast_element.append(xml_element)
|
||||
parent = self.current_ast_element
|
||||
self.current_ast_element = xml_element
|
||||
self.current_ast_element.append(self.make_literal(return_value, return_type))
|
||||
self.current_ast_element = parent
|
||||
return
|
||||
|
||||
def generate_routine(self, routine_type=None):
|
||||
if routine_type is None:
|
||||
routine_type = self.get_routine_type()
|
||||
else:
|
||||
routine_type = routine_type
|
||||
|
||||
args = self.generate_routine_args()
|
||||
|
||||
name = self.get_name(routine_type)
|
||||
return_type = self.get_type(routine_type)
|
||||
|
||||
routine = Routine(name, routine_type, return_type, args)
|
||||
|
||||
routine_args = [
|
||||
("name", routine.name),
|
||||
("return_type", routine.return_type),
|
||||
]
|
||||
|
||||
element = build_xml_element(routine_args, name=routine.type)
|
||||
self.current_ast_element.append(element)
|
||||
parent = self.current_ast_element
|
||||
self.current_ast_element = element
|
||||
self.push_scope()
|
||||
|
||||
self.define_args(routine.arguments)
|
||||
|
||||
self.generate_block(return_stmt=True, return_type=routine.return_type)
|
||||
self.pop_scope()
|
||||
|
||||
self.current_ast_element = parent
|
||||
|
||||
def define_args(self, args):
|
||||
for arg in args:
|
||||
self.current_ast_element.append(arg.xml)
|
||||
self.current_scope.append(arg.name, arg)
|
||||
|
||||
def generate_statements(self):
|
||||
# Number line
|
||||
number_line = 100
|
||||
cutoffs = [10, 30, 50, 80, 100]
|
||||
options = {
|
||||
0: self.generate_declaration,
|
||||
1: self.generate_routine_call,
|
||||
2: self.generate_conditional,
|
||||
3: self.generate_loop,
|
||||
4: self.generate_assignment,
|
||||
5: self.generate_out_stream,
|
||||
6: self.generate_in_stream,
|
||||
}
|
||||
|
||||
while True:
|
||||
if random.random() < self.settings['block-termination-probability']:
|
||||
break
|
||||
|
||||
a = random.randint(0, number_line)
|
||||
i = 0
|
||||
for i in range(len(cutoffs) - 1):
|
||||
if cutoffs[i] < a < cutoffs[i + 1]:
|
||||
options[i]()
|
||||
break
|
||||
|
||||
def generate_int_real_expr(self):
|
||||
# Number line
|
||||
number_line = 100
|
||||
cutoffs = [10, 30, 50, 80, 100]
|
||||
options = { #TODO add brackets
|
||||
0: "addition",
|
||||
1: "subtraction",
|
||||
2: "multiplication",
|
||||
3: "division",
|
||||
4: "modulo",
|
||||
5: "power",
|
||||
6: "negation",
|
||||
7: "noop",
|
||||
8: "equality",
|
||||
9: "inequality",
|
||||
10: "less-than",
|
||||
11: "greater-than",
|
||||
12: "less-than-or-equal",
|
||||
13: "greater-than-or-equal",
|
||||
}
|
||||
|
||||
unary = ["negation", "noop"]
|
||||
|
||||
self._generate_expression([GAZ_INT_KEY, GAZ_FLOAT_KEY], number_line, cutoffs, options, unary)
|
||||
|
||||
def generate_bool_expr(self):
|
||||
# Number line
|
||||
number_line = 100
|
||||
cutoffs = [10, 30, 50, 80, 100]
|
||||
options = { #TODO add brackets # TODO cannot guarantee correctness of comparison since booleans may appear
|
||||
0: "equality",
|
||||
1: "inequality",
|
||||
2: "less-than",
|
||||
3: "greater-than",
|
||||
4: "less-than-or-equal",
|
||||
5: "greater-than-or-equal",
|
||||
6: "and",
|
||||
7: "or",
|
||||
8: "xor",
|
||||
9: "not",
|
||||
} # FIXME sometimes this will return a "" op, need to figure out why
|
||||
|
||||
unary = ["not"]
|
||||
|
||||
self._generate_expression([GAZ_BOOL_KEY], number_line, cutoffs, options, unary)
|
||||
|
||||
def _generate_expression(self, expr_type: list[str], number_line, cutoffs, options, unary=None):
|
||||
if unary is None:
|
||||
unary = []
|
||||
|
||||
parent = self.current_ast_element
|
||||
self.current_nesting_depth += 1
|
||||
|
||||
if self.current_nesting_depth > self.settings['generation-options']['max-nesting-depth'] or random.random() < \
|
||||
self.settings['block-termination-probability']:
|
||||
self.generate_literal(random.choice(expr_type)) # TODO add the reals
|
||||
self.current_nesting_depth -= 1
|
||||
return
|
||||
|
||||
op = ""
|
||||
a = random.randint(0, number_line)
|
||||
i = 0
|
||||
for i in range(len(cutoffs) - 1):
|
||||
if cutoffs[i] < a < cutoffs[i + 1]:
|
||||
op = options[i]
|
||||
break
|
||||
|
||||
if op in unary:
|
||||
self.generate_unary(op, random.choice(expr_type))
|
||||
else:
|
||||
self.generate_binary(op, random.choice(expr_type))
|
||||
|
||||
self.current_nesting_depth -= 1
|
||||
self.current_ast_element = parent
|
||||
|
||||
def generate_declaration(self):
|
||||
parent = self.current_ast_element
|
||||
decl_type = self.get_type(GAZ_VAR_TAG)
|
||||
decl_args = [
|
||||
("type", decl_type),
|
||||
]
|
||||
element = build_xml_element(decl_args, name=GAZ_DECLARATION_TAG)
|
||||
self.current_ast_element.append(element)
|
||||
self.current_ast_element = element
|
||||
|
||||
variable = self.generate_variable(decl_type)
|
||||
self.current_ast_element.append(variable.xml)
|
||||
self.current_scope.append(variable.name, variable)
|
||||
|
||||
self.generate_xhs(GAZ_RHS_TAG, decl_type) # TODO add real type (decl_type)
|
||||
self.current_ast_element = parent
|
||||
|
||||
def generate_binary(self, op, op_type):
|
||||
parent = self.current_ast_element
|
||||
args = [
|
||||
("op", op),
|
||||
("type", op_type),
|
||||
]
|
||||
element = build_xml_element(args, name=GAZ_OPERATOR_TAG)
|
||||
self.current_ast_element.append(element)
|
||||
self.current_ast_element = element
|
||||
|
||||
self.generate_xhs(GAZ_LHS_TAG, op_type)
|
||||
self.generate_xhs(GAZ_RHS_TAG, op_type)
|
||||
|
||||
self.current_ast_element = parent
|
||||
|
||||
def generate_xhs(self, handedness, op_type):
|
||||
element = build_xml_element([], name=handedness)
|
||||
parent = self.current_ast_element
|
||||
self.current_ast_element.append(element)
|
||||
self.current_ast_element = element
|
||||
|
||||
self.generate_expression(op_type)
|
||||
|
||||
self.current_ast_element = parent
|
||||
|
||||
def generate_unary(self, op, op_type=ANY_TYPE):
|
||||
parent = self.current_ast_element
|
||||
args = [
|
||||
("op", op),
|
||||
("type", op_type),
|
||||
]
|
||||
element = build_xml_element(args, name=GAZ_UNARY_OPERATOR_TAG)
|
||||
self.current_ast_element.append(element)
|
||||
self.current_ast_element = element
|
||||
|
||||
self.generate_xhs(GAZ_RHS_TAG, op_type)
|
||||
|
||||
self.current_ast_element = parent
|
||||
|
||||
def generate_routine_call(self):
|
||||
pass
|
||||
|
||||
def populate_operator(self, operator: ET.Element, op, type):
|
||||
operator.set(GAZ_OP_KEY, op)
|
||||
operator.set(GAZ_TY_KEY, type)
|
||||
for node in operator:
|
||||
node = self.populate(node, type)
|
||||
def generate_conditional(self):
|
||||
if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']:
|
||||
return
|
||||
|
||||
return operator
|
||||
if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings['block-termination-probability']:
|
||||
return
|
||||
|
||||
def populate_stream(self, stream: ET.Element, type):
|
||||
stream.set(GAZ_TY_KEY, type)
|
||||
for node in stream:
|
||||
node = self.populate(node, ANY_TYPE)
|
||||
element = build_xml_element([], name=GAZ_IF_TAG)
|
||||
self.current_ast_element.append(element)
|
||||
parent = self.current_ast_element
|
||||
self.current_ast_element = element
|
||||
|
||||
return stream
|
||||
self.current_control_flow_nesting_depth += 1
|
||||
|
||||
def populate_literal(self, literal: ET.Element, type, value):
|
||||
literal.set(GAZ_TY_KEY, type)
|
||||
literal.set(GAZ_VAL_KEY, value)
|
||||
return literal
|
||||
self.push_scope()
|
||||
|
||||
def populate_variable(self, variable: ET.Element, qualifier, type, name):
|
||||
variable.set(GAZ_QUALIFIER_KEY, qualifier)
|
||||
variable.set(GAZ_TY_KEY, type)
|
||||
variable.set(GAZ_NAME_KEY, name)
|
||||
return variable
|
||||
self.generate_expression(GAZ_BOOL_KEY)
|
||||
|
||||
def populate_routine(self, routine: ET.Element, type, name):
|
||||
routine.set(GAZ_RETURN_KEY, type)
|
||||
if routine.get("name") != "main":
|
||||
routine.set(GAZ_NAME_KEY, name)
|
||||
self.generate_block(tag=[("type", GAZ_TRUE_BLOCK_TAG)])
|
||||
self.generate_block(tag=[("type", GAZ_FALSE_BLOCK_TAG)])
|
||||
|
||||
if routine.tag == GAZ_PROCEDURE_TAG and type != VOID_TYPE:
|
||||
routine.find("block").append(self.generate_return(type))
|
||||
self.pop_scope()
|
||||
self.current_ast_element = parent
|
||||
|
||||
for block in routine:
|
||||
for node in block:
|
||||
if node.tag != "return":
|
||||
node =self.populate(node, ANY_TYPE)
|
||||
else:
|
||||
node.set("type", type)
|
||||
def generate_loop(self):
|
||||
if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']:
|
||||
return
|
||||
|
||||
return routine
|
||||
if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings['block-termination-probability']:
|
||||
return
|
||||
|
||||
def populate_block(self, element):
|
||||
for node in element:
|
||||
return self.populate(node, ANY_TYPE)
|
||||
parent = self.current_ast_element
|
||||
element = build_xml_element([], name=GAZ_LOOP_TAG)
|
||||
self.current_ast_element.append(element)
|
||||
self.current_ast_element = element
|
||||
|
||||
def populate_xhs(self, side: ET.Element, type):
|
||||
for node in side:
|
||||
return self.populate(node, type)
|
||||
self.current_control_flow_nesting_depth += 1
|
||||
self.push_scope()
|
||||
self.generate_expression(GAZ_BOOL_KEY)
|
||||
self.generate_block()
|
||||
self.pop_scope()
|
||||
self.current_ast_element = parent
|
||||
|
||||
def populate_ast(self, ast: ET.Element):
|
||||
populated = self.generate_block()
|
||||
for node in ast:
|
||||
populated.append(self.populate(node, ANY_TYPE))
|
||||
|
||||
return populated
|
||||
def generate_assignment(self):
|
||||
# same structure as a declaration
|
||||
parent = self.current_ast_element
|
||||
|
||||
# def populate_assignment(self, name, type):
|
||||
element = build_xml_element([], name=GAZ_ASSIGNMENT_TAG)
|
||||
self.current_ast_element.append(element)
|
||||
self.current_ast_element = element
|
||||
|
||||
def populate(self, element: ET.Element, type: str):
|
||||
if type == ANY_TYPE:
|
||||
type = GAZ_TYPES[random.randint(0, len(GAZ_TYPES) - 1)]
|
||||
possible_vars = self.current_scope.get_all_defined_mutable_vars()
|
||||
if len(possible_vars) == 0:
|
||||
self.generate_global()
|
||||
possible_vars = self.current_scope.get_all_defined_mutable_vars()
|
||||
|
||||
if element.tag == GAZ_VAR_TAG:
|
||||
return self.populate_variable(element, self.get_qualifier(), type, self.get_name(GAZ_VAR_TAG))
|
||||
elif element.tag == GAZ_LIT_TAG:
|
||||
return self.populate_literal(element, type, self.get_value(type))
|
||||
elif element.tag == GAZ_OPERATOR_TAG:
|
||||
return self.populate_operator(element, self.get_op(type), type)
|
||||
elif element.tag == GAZ_FUNCTION_TAG:
|
||||
return self.populate_routine(element, type, self.get_name(element.tag))
|
||||
elif element.tag == GAZ_PROCEDURE_TAG:
|
||||
type = self.void(type)
|
||||
return self.populate_routine(element, type, self.get_name(element.tag))
|
||||
elif element.tag in [GAZ_RHS_TAG, GAZ_RHS_TAG]:
|
||||
return self.populate_xhs(element, type)
|
||||
elif element.tag == GAZ_BLOCK_TAG:
|
||||
return self.populate_block(element)
|
||||
assert len(possible_vars) > 0
|
||||
variable = random.choice(possible_vars)
|
||||
|
||||
self.current_ast_element.append(variable.xml)
|
||||
self.generate_xhs(GAZ_RHS_TAG, variable.type)
|
||||
|
||||
self.current_ast_element = parent
|
||||
|
||||
def generate_out_stream(self):
|
||||
self.generate_stream(GAZ_OUT_STREAM)
|
||||
|
||||
def generate_in_stream(self):
|
||||
self.generate_stream(GAZ_IN_STREAM)
|
||||
|
||||
def generate_stream(self, stream_type):
|
||||
parent = self.current_ast_element
|
||||
args = [
|
||||
("type", stream_type),
|
||||
]
|
||||
element = build_xml_element(args, name=GAZ_STREAM_TAG)
|
||||
self.current_ast_element.append(element)
|
||||
self.current_ast_element = element
|
||||
|
||||
self.generate_expression(ANY_TYPE)
|
||||
|
||||
self.current_ast_element = parent
|
||||
|
||||
def generate_variable(self, var_type: str):
|
||||
return Variable(self.get_name(GAZ_VAR_TAG), var_type, self.get_qualifier())
|
||||
|
||||
def generate_literal(self, var_type: str):
|
||||
args = [
|
||||
("type", var_type),
|
||||
("value", str(self.get_value(var_type))),
|
||||
]
|
||||
element = build_xml_element(args, name=GAZ_LIT_TAG)
|
||||
self.current_ast_element.append(element)
|
||||
|
||||
def make_literal(self, type, value):
|
||||
args = [
|
||||
("type", type),
|
||||
("value", value),
|
||||
]
|
||||
element = build_xml_element(args, name=GAZ_LIT_TAG)
|
||||
return element
|
||||
|
||||
def generate_global(self):
|
||||
current_scope = self.current_scope
|
||||
current_element = self.current_ast_element
|
||||
|
||||
self.current_scope = self.current_scope.get_top_scope()
|
||||
self.current_ast_element = self.ast
|
||||
|
||||
self.generate_declaration()
|
||||
|
||||
self.current_scope = current_scope
|
||||
self.current_ast_element = current_element
|
||||
|
||||
def generate_expression(self, expr_type: str):
|
||||
if expr_type == GAZ_INT_KEY or expr_type == GAZ_FLOAT_KEY:
|
||||
self.generate_int_real_expr()
|
||||
elif expr_type == GAZ_BOOL_KEY:
|
||||
self.generate_bool_expr()
|
||||
elif expr_type == ANY_TYPE: # TODO implement the choice of any type
|
||||
self.generate_int_real_expr()
|
||||
else:
|
||||
raise NotImplementedError(f"Expression type {expr_type} not implemented")
|
||||
|
||||
def generate_routine_args(self):
|
||||
number = random.randint(self.settings['properties']['number-of-arguments']['min'],
|
||||
self.settings['properties']['number-of-arguments']['max'])
|
||||
args = []
|
||||
for i in range(number):
|
||||
arg = self.generate_arg()
|
||||
args.append(arg)
|
||||
self.current_scope.append(arg.name, arg)
|
||||
return args
|
||||
|
||||
def generate_arg(self):
|
||||
return Argument(self.get_name(GAZ_VAR_TAG), self.get_type(GAZ_VAR_TAG))
|
||||
|
||||
def push_scope(self, xml_element: ET.Element = None):
|
||||
scope = Scope(self.current_scope)
|
||||
self.symbol_table.append(scope)
|
||||
self.current_scope = scope
|
||||
|
||||
def pop_scope(self):
|
||||
self.current_scope = self.current_scope.enclosing_scope
|
||||
|
||||
# TODO revamp the random value generations
|
||||
def get_qualifier(self):
|
||||
var_weight: int = 80
|
||||
const_weight: int = 20
|
||||
if self.qualifier_probability:
|
||||
var_weight = self.var_qualifier_probability
|
||||
const_weight = self.const_qualifier_probability
|
||||
"""
|
||||
@brief get a random qualifier from the list of possible qualifiers
|
||||
|
||||
a = random.randint(0, var_weight + const_weight)
|
||||
if a in range(0, var_weight):
|
||||
return 'var'
|
||||
elif a in range(var_weight, var_weight + const_weight):
|
||||
return 'const'
|
||||
else:
|
||||
raise ValueError("Internal Error, please report the stack trace to me")
|
||||
@return a qualifier as a string
|
||||
"""
|
||||
number_line = (self.settings["misc-weights"]["type-qualifier-weights"]["const"] +
|
||||
self.settings["misc-weights"]["type-qualifier-weights"]["var"] -1 )
|
||||
|
||||
res = random.randint(0, number_line)
|
||||
if res in range(0, self.settings["misc-weights"]["type-qualifier-weights"]["const"]):
|
||||
return 'const'
|
||||
elif res in range(self.settings["misc-weights"]["type-qualifier-weights"]["const"],
|
||||
self.settings["misc-weights"]["type-qualifier-weights"]["const"] +
|
||||
self.settings["misc-weights"]["type-qualifier-weights"]["var"]):
|
||||
return 'var'
|
||||
else:
|
||||
raise ValueError("Internal Error, please report the stack trace to me")
|
||||
|
||||
def get_routine_type(self):
|
||||
cutoffs = []
|
||||
values = []
|
||||
ops = []
|
||||
for key, value in self.settings["routine-weights"].items():
|
||||
cutoffs.append(value + sum(cutoffs))
|
||||
values.append(value)
|
||||
ops.append(key)
|
||||
|
||||
res = random.randint(0, sum(values))
|
||||
for i in range(len(cutoffs)):
|
||||
if res < cutoffs[i]:
|
||||
return ops[i] # TODO everything should be fast faied
|
||||
|
||||
def get_value(self, type):
|
||||
if type == GAZ_INT_KEY:
|
||||
return random.randint(self.int_low, self.int_high)
|
||||
if self.settings["properties"]["generate-max-int"]:
|
||||
return random.randint(-2147483648, 2147483647)
|
||||
else:
|
||||
return random.randint(-1000, 1000)
|
||||
elif type == GAZ_FLOAT_KEY:
|
||||
return random.uniform(-1000, 1000)
|
||||
elif type == GAZ_BOOL_KEY:
|
||||
return random.choice([True, False])
|
||||
else:
|
||||
raise TypeError("Unimplemented generator for type: " + type)
|
||||
|
||||
def get_name(self, name_type):
|
||||
if name_type == GAZ_VAR_TAG:
|
||||
return VAR_NAMES[random.randint(0, self.max_number_of_vars - 1)]
|
||||
elif name_type in [GAZ_PROCEDURE_TAG, GAZ_FUNCTION_TAG]:
|
||||
r_name = ROUTINE_NAMES[random.randint(0, len(ROUTINE_NAMES) - 1)]
|
||||
self.functions.append(r_name)
|
||||
return r_name
|
||||
"""
|
||||
@brief get a random name from the list of possible names and add it to the current scope
|
||||
|
||||
@param name_type:
|
||||
@return:
|
||||
"""
|
||||
length = random.randint(self.settings['properties']['id-length']['min'],
|
||||
self.settings['properties']['id-length']['max'])
|
||||
name = ''.join(random.choices(string.ascii_letters, k=length))
|
||||
return name
|
||||
|
||||
def get_op(self, type):
|
||||
|
||||
if type == GAZ_INT_KEY:
|
||||
# TODO make this a parameter for peiple to change
|
||||
return GAZ_INT_OPS[random.randint(0, len(GAZ_INT_OPS) - 1)]
|
||||
else:
|
||||
raise TypeError("Unimplemented type: " + type)
|
||||
cutoffs = []
|
||||
values = []
|
||||
ops = []
|
||||
for key, value in self.settings["expression-weights"]["arithmetic"]:
|
||||
cutoffs.append(value + sum(cutoffs))
|
||||
values.append(value)
|
||||
ops.append(get_op(key))
|
||||
|
||||
def _generate(self, element: str or None) -> ET.Element:
|
||||
initial_grammar = convert_ebnf_grammar(self.grammar)
|
||||
solver = ISLaSolver(initial_grammar, start_symbol=element)
|
||||
ast_str = str(solver.solve())
|
||||
res = random.randint(0, sum(values))
|
||||
for i in range(len(cutoffs)):
|
||||
if res < cutoffs[i]:
|
||||
return ops[i]
|
||||
|
||||
print(ast_str)
|
||||
elem = ET.fromstring(ast_str)
|
||||
return elem
|
||||
def get_type(self, tag): # TODO Add support for composite types
|
||||
return 'int' # TODO Add support for all types
|
||||
if tag in [GAZ_PROCEDURE_TAG, GAZ_FUNCTION_TAG, GAZ_VAR_TAG]:
|
||||
cutoffs = []
|
||||
values = []
|
||||
types = []
|
||||
for key, value in self.settings["type-weights"]["value-types"].items():
|
||||
if key == GAZ_VOID_TYPE and tag != GAZ_PROCEDURE_TAG:
|
||||
continue
|
||||
cutoffs.append(value + sum(cutoffs))
|
||||
values.append(value)
|
||||
types.append(key)
|
||||
|
||||
def generate_ast(self) -> ET.Element:
|
||||
return self._generate(None)
|
||||
res = random.randint(0, sum(values))
|
||||
for i in range(len(cutoffs)):
|
||||
if res < cutoffs[i]:
|
||||
return types[i]
|
||||
|
||||
|
||||
def generate_return(self, type) -> ET.Element:
|
||||
elem = self._generate('<return>')
|
||||
elem.set(GAZ_TY_KEY, type)
|
||||
|
||||
return elem
|
||||
|
||||
def generate_literal(self) -> ET.Element:
|
||||
return self._generate('<literal>')
|
||||
|
||||
def generate_variable(self) -> ET.Element:
|
||||
return self._generate('<variable>')
|
||||
|
||||
def generate_operator(self) -> ET.Element:
|
||||
return self._generate('<operator>')
|
||||
|
||||
def generate_block(self) -> ET.Element:
|
||||
return self._generate('<block>')
|
||||
|
||||
def generate_routine(self) -> ET.Element:
|
||||
return self._generate('<routine>')
|
||||
|
||||
def generate_main_routine(self) -> ET.Element:
|
||||
return self._generate('<main_routine>')
|
||||
|
||||
def generate_declaration(self) -> ET.Element:
|
||||
return self._generate('<declaration>')
|
||||
|
||||
def generate_stream(self) -> ET.Element:
|
||||
return self._generate('<stream>')
|
||||
|
||||
def void(self, type):
|
||||
b = random.randint(0, 100)
|
||||
if b < self.void_probability:
|
||||
return GAZ_VOID_TYPE
|
||||
else:
|
||||
return type
|
||||
|
||||
|
||||
|
||||
class AstElement:
|
||||
|
||||
def __init__(self, xml: ET.Element):
|
||||
pass
|
||||
|
||||
|
||||
class RoutineCall(AstElement):
|
||||
|
||||
def __init__(self, xml: ET.Element, dependents=None, type=None):
|
||||
"""
|
||||
@brief initialise a routine call object
|
||||
:param xml:
|
||||
:param dependents:
|
||||
"""
|
||||
super().__init__(xml)
|
||||
if dependents is None:
|
||||
dependents = []
|
||||
else:
|
||||
self.dependents = dependents
|
||||
self.xml = xml
|
||||
class Variable:
|
||||
def __init__(self, name: str, type: str, qualifier: str, value: any = None):
|
||||
self.name = name
|
||||
self.type = type
|
||||
self.value = value
|
||||
self.qualifier = qualifier
|
||||
self.xml = self._build_xml()
|
||||
|
||||
def _build_xml(self):
|
||||
args = [
|
||||
('name', self.name),
|
||||
('type', self.type),
|
||||
('mut', self.qualifier),
|
||||
]
|
||||
return build_xml_element(args, name=GAZ_VAR_TAG)
|
||||
|
||||
|
||||
class Operator(AstElement):
|
||||
class Argument:
|
||||
def __init__(self, name: str, type: str):
|
||||
self.name = name
|
||||
self.type = type
|
||||
self.xml = self._build_xml()
|
||||
|
||||
def __init__(self, xml: ET.Element, params: json):
|
||||
super().__init__(xml)
|
||||
for key, value in params.items():
|
||||
setattr(self, key, value)
|
||||
def __str__(self):
|
||||
return self.type + " " + self.name
|
||||
|
||||
def _build_xml(self):
|
||||
args = [
|
||||
('name', self.name),
|
||||
('type', self.type),
|
||||
]
|
||||
return build_xml_element(args, name=GAZ_ARG_TAG)
|
||||
|
||||
|
||||
def find_routines(AST: str):
|
||||
"""
|
||||
@brief find all of the routine and call elements in the ast
|
||||
|
||||
@param AST: the AST to analyse
|
||||
@return the list of routine elements
|
||||
"""
|
||||
xml = ET.fromstring(AST)
|
||||
result = list[RoutineCall]
|
||||
for node in xml:
|
||||
if node.tag in [GAZ_PROCEDURE_TAG, GAZ_FUNCTION_TAG]:
|
||||
routine = RoutineCall(node)
|
||||
result.append(routine)
|
||||
class Routine:
|
||||
def __init__(self, name: str, type: str, return_type: str, args: list[Argument], xml: ET.Element = None):
|
||||
self.name = name
|
||||
self.type = type
|
||||
self.return_type = return_type
|
||||
self.arguments = args
|
||||
self.xml = xml
|
||||
self.xml = xml
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
||||
# ast_gen = AstGenerator(GAZPREA_TOP_LEVEL, json.loads("{}"))
|
||||
# out: ET.Element = ast_gen.generate_return("int")
|
||||
# print(out)
|
||||
class Scope:
|
||||
def __init__(self, enclosing_scope, child_scope=None, associated_xml: ET.Element = None):
|
||||
self.symbols = {}
|
||||
self.enclosing_scope = enclosing_scope
|
||||
self.child_scope = child_scope
|
||||
self.xml = associated_xml
|
||||
|
||||
def resolve(self, name) -> ET.Element or None:
|
||||
if name in self.symbols:
|
||||
return self.symbols[name]
|
||||
else:
|
||||
return None
|
||||
|
||||
def append(self, name, item: Variable or Argument or Routine):
|
||||
self.symbols[name] = item
|
||||
|
||||
def append_element(self, name, value: ET.Element):
|
||||
self.symbols[name] = value
|
||||
|
||||
def set(self, name, value: ET.Element):
|
||||
self.symbols[name] = value
|
||||
|
||||
def get_all_defined_mutable_vars(self) -> list[Variable]:
|
||||
if self.enclosing_scope is None:
|
||||
return self._get_mutable_vars()
|
||||
else:
|
||||
return self.enclosing_scope.get_all_defined_mutable_vars() + self._get_mutable_vars()
|
||||
|
||||
def _get_mutable_vars(self) -> list[Variable]:
|
||||
mutable_vars = []
|
||||
|
||||
for name, var in self.symbols.items():
|
||||
if not isinstance(var, Variable):
|
||||
continue
|
||||
if var.qualifier != 'const':
|
||||
mutable_vars.append(self.symbols[name])
|
||||
return mutable_vars
|
||||
|
||||
def get_top_scope(self):
|
||||
if self.enclosing_scope is None:
|
||||
return self
|
||||
else:
|
||||
return self.enclosing_scope.get_top_scope()
|
||||
|
||||
|
||||
# gen = AstGenerator("{}")
|
||||
# assert is_valid_grammar(gazprea_ast_grammar.GAZPREA_TOP_LEVEL)
|
||||
#
|
||||
# gen.test_samples(gazprea_ast_grammar.GAZPREA_TOP_LEVEL, iterations=100)
|
||||
#
|
||||
# initial_grammar = convert_ebnf_grammar(gazprea_ast_grammar.GAZPREA_TOP_LEVEL)
|
||||
# solver = ISLaSolver(initial_grammar)
|
||||
# constants_tree_str = str(solver.solve())
|
||||
# print(constants_tree_str)
|
||||
def build_xml_element(*keys, name):
|
||||
elem = ET.Element(name)
|
||||
for key in list(keys)[0]: # TODO refactor
|
||||
elem.set(key[0], key[1])
|
||||
return elem
|
||||
|
||||
|
||||
def get_op(op):
|
||||
if op == 'addition':
|
||||
return '+'
|
||||
elif op == 'subtraction':
|
||||
return '-'
|
||||
elif op == 'multiplication':
|
||||
return '*'
|
||||
elif op == 'division':
|
||||
return '/'
|
||||
elif op == 'modulo':
|
||||
return '%'
|
||||
elif op == 'power':
|
||||
return '^'
|
||||
elif op == 'or':
|
||||
return 'or'
|
||||
elif op == 'and':
|
||||
return 'and'
|
||||
elif op == 'equality':
|
||||
return '=='
|
||||
elif op == 'inequality':
|
||||
return '!='
|
||||
elif op == 'less-than':
|
||||
return '<'
|
||||
elif op == 'less-than-or-equal':
|
||||
return '<='
|
||||
elif op == 'greater-than':
|
||||
return '>'
|
||||
elif op == 'greater-than-or-equal':
|
||||
return '>='
|
||||
elif op == 'negation':
|
||||
return '-'
|
||||
elif op == 'not':
|
||||
return 'not'
|
||||
elif op == 'noop':
|
||||
return '+'
|
||||
elif op == 'concatenation':
|
||||
return '||'
|
||||
else:
|
||||
raise Exception("Unknown operator: " + op)
|
||||
|
||||
|
|
45
ast_generator/constants.py
Normal file
45
ast_generator/constants.py
Normal file
|
@ -0,0 +1,45 @@
|
|||
"""
|
||||
Constants for the ast_generator
|
||||
|
||||
#TODO this could be a dict that can be loaded from a yaml file and thus make this fuzzer more general
|
||||
"""
|
||||
|
||||
Grammar = dict[str, list[str]]
|
||||
GAZ_VOID_TYPE = 'void'
|
||||
VOID_TYPE = 'void'
|
||||
GAZ_BLOCK_TAG = 'block'
|
||||
GAZ_LHS_TAG = 'lhs'
|
||||
GAZ_RHS_TAG = 'rhs'
|
||||
GAZ_RETURN_KEY = "return_type"
|
||||
VAR_NAMES = ['alsdjf', 'asldfjlks', 'qowiuut', 'qoiur', 'qwioue', 'qoyiyut', 'llkjfg', 'kdjkdjf', 'asdjkfeey',
|
||||
'jdhjfjheee']
|
||||
ROUTINE_NAMES = ['bees', 'beans', 'hell']
|
||||
GAZ_INT_OPS = ['*', '+', '-', '/', '%']
|
||||
GAZ_TYPES = ['int']
|
||||
GAZ_FLOAT_KEY = 'float'
|
||||
GAZ_INT_KEY = 'int'
|
||||
GAZ_FUNCTION_TAG = 'function'
|
||||
GAZ_PROCEDURE_TAG = 'procedure'
|
||||
GAZ_OPERATOR_TAG = "operator"
|
||||
GAZ_UNARY_OPERATOR_TAG = "unary"
|
||||
GAZ_LIT_TAG = "literal"
|
||||
GAZ_VAR_TAG = "variable"
|
||||
GAZ_OP_KEY = "op"
|
||||
GAZ_NAME_KEY = "name"
|
||||
GAZ_QUALIFIER_KEY = "mut"
|
||||
GAZ_VAL_KEY = "value"
|
||||
GAZ_TY_KEY = "type"
|
||||
ANY_TYPE = "any"
|
||||
GAZ_DECLARATION_TAG = "declaration"
|
||||
GAZ_IN_STREAM = "std_input"
|
||||
GAZ_OUT_STREAM = "std_output"
|
||||
GAZ_STREAM_TAG = "stream"
|
||||
GAZ_ASSIGNMENT_TAG = "assignment"
|
||||
GAZ_RETURN_TAG = "return"
|
||||
GAZ_MAIN = "main"
|
||||
GAZ_BOOL_KEY = "bool"
|
||||
GAZ_IF_TAG = "conditional"
|
||||
GAZ_LOOP_TAG = "loop"
|
||||
GAZ_TRUE_BLOCK_TAG = "true"
|
||||
GAZ_FALSE_BLOCK_TAG = "false"
|
||||
GAZ_ARG_TAG = "argument"
|
|
@ -1,5 +1,4 @@
|
|||
from fuzzingbook.Grammars import Grammar, is_valid_grammar, convert_ebnf_grammar
|
||||
from isla.solver import ISLaSolver
|
||||
from ast_generator.constants import Grammar
|
||||
|
||||
GAZPREA_TOP_LEVEL: Grammar = {
|
||||
# Top level elements
|
||||
|
@ -52,11 +51,3 @@ GAZPREA_TOP_LEVEL: Grammar = {
|
|||
'<XML_SLASH_TAG>': ['/>'],
|
||||
'<XML_OPEN_SLASH>': ['</'],
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
assert is_valid_grammar(GAZPREA_TOP_LEVEL)
|
||||
|
||||
initial_grammar = convert_ebnf_grammar(GAZPREA_TOP_LEVEL)
|
||||
solver = ISLaSolver(initial_grammar)
|
||||
constants_tree_str = str(solver.solve())
|
||||
print(constants_tree_str)
|
||||
|
|
94
ast_generator/test/config.yaml
Normal file
94
ast_generator/test/config.yaml
Normal file
|
@ -0,0 +1,94 @@
|
|||
# The default configuration for the Gazprea Fuzzer
|
||||
---
|
||||
generation-options:
|
||||
max-nesting-depth: 5 # maximum nesting depth for statements
|
||||
max-conditionals-loops: 5 # maximum number of loops/conditionals per routine
|
||||
max-number-of-routines: 5 # maximum number of routines (main will always be generated)
|
||||
generate-dead-code: True # generate dead code
|
||||
properties:
|
||||
max-range-length: 5 # maximum length of ranges, vectors and tuples, (AxA matrices can exist)
|
||||
use-english-words: True # use english words instead of random names (this may limit the maximum number of names)
|
||||
id-length: # length of identifiers
|
||||
min: 1
|
||||
max: 10
|
||||
function-name-length: # length of function names
|
||||
min: 1
|
||||
max: 10
|
||||
number-of-arguments: # number of arguments to a routine
|
||||
min: 1
|
||||
max: 10
|
||||
generate-max-int: True # if False, generate integers between [-1000, 1000] else
|
||||
expression-weights: # weights for expressions
|
||||
# the higher a weight, the more likely (0, 10000), 0 to exclude, 10000 for only that
|
||||
brackets: 10
|
||||
|
||||
arithmetic:
|
||||
addition: 80
|
||||
subtraction: 80
|
||||
multiplication: 30
|
||||
division: 10
|
||||
modulo: 10
|
||||
power: 5
|
||||
|
||||
comparison:
|
||||
equality: 50
|
||||
inequality: 50
|
||||
less-than: 30
|
||||
greater-than: 30
|
||||
less-than-or-equal: 10
|
||||
greater-than-or-equal: 10
|
||||
|
||||
logical:
|
||||
and: 50
|
||||
or: 50
|
||||
xor: 10
|
||||
|
||||
vector-or-string:
|
||||
generator: 20
|
||||
range: 30
|
||||
filter: 10
|
||||
reverse: 10
|
||||
concatenation: 50
|
||||
|
||||
unary:
|
||||
noop: 10
|
||||
negation: 20
|
||||
not: 10
|
||||
|
||||
|
||||
statement-weights: # set to 0 for any statements you wish to exclude
|
||||
variable-declaration: 50
|
||||
routine-call: 20
|
||||
conditional: 30
|
||||
loop: 20
|
||||
assignment: 40
|
||||
out-stream: 20
|
||||
in-stream: 5
|
||||
|
||||
type-weights:
|
||||
value-types:
|
||||
integer: 50
|
||||
real: 50
|
||||
boolean: 50
|
||||
character: 50
|
||||
void: 10
|
||||
composite-types:
|
||||
vector: 20
|
||||
tuple: 5
|
||||
matrix: 10
|
||||
string: 10
|
||||
composite: 0 #TODO add support for composite types
|
||||
atomic: 40
|
||||
|
||||
routine-weights:
|
||||
procedure: 20
|
||||
function: 50
|
||||
|
||||
misc-weights:
|
||||
type-qualifier-weights:
|
||||
const: 10
|
||||
var: 60
|
||||
|
||||
block-termination-probability: 0.2 # probability for a block to terminate
|
||||
|
||||
|
|
@ -1,193 +1,246 @@
|
|||
import unittest
|
||||
import xml
|
||||
import xml.etree.ElementTree as ET
|
||||
import xml.dom.minidom
|
||||
|
||||
import yaml
|
||||
|
||||
from ast_generator.ast_generator import *
|
||||
from ast_generator.gazprea_ast_grammar import *
|
||||
|
||||
|
||||
def reachable_return(block):
|
||||
return True #TODO we actually need to check this
|
||||
|
||||
|
||||
class TestGeneration(unittest.TestCase):
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
with open("config.yaml", 'r') as stream:
|
||||
props = yaml.safe_load(stream)
|
||||
cls.ast_gen = AstGenerator(props)
|
||||
|
||||
def setUp(self):
|
||||
self.ast_gen = AstGenerator(GAZPREA_TOP_LEVEL, json.loads("{}"))
|
||||
self.ast_gen.current_nesting_depth = 0
|
||||
self.ast_gen.current_control_flow_nesting_depth = 0
|
||||
|
||||
def test_generate_literal(self):
|
||||
out: ET.Element = self.ast_gen.generate_literal()
|
||||
self.ast_gen.ast = ET.Element("block")
|
||||
self.ast_gen.current_ast_element = self.ast_gen.ast
|
||||
self.ast_gen.generate_literal('int')
|
||||
|
||||
self.assertIsNotNone(out.get("type"))
|
||||
self.assertIsNotNone(self.ast_gen.ast.find(GAZ_LIT_TAG))
|
||||
self.assertEqual("int", self.ast_gen.ast.find(GAZ_LIT_TAG).get("type"))
|
||||
self.assertIsNotNone(self.ast_gen.ast.find(GAZ_LIT_TAG).get("value"))
|
||||
|
||||
self.assertIsNotNone(ET.tostring(self.ast_gen.ast, 'utf-8').decode('utf-8'))
|
||||
|
||||
def test_generate_variable(self):
|
||||
out: ET.Element = self.ast_gen.generate_variable()
|
||||
out: Variable = self.ast_gen.generate_variable('int')
|
||||
|
||||
self.assertIsNotNone(out.get("type"))
|
||||
self.assertIsNotNone(out.get("name"))
|
||||
self.assertEqual("int", out.xml.get("type"))
|
||||
self.assertIsNotNone(out.xml.get("name"))
|
||||
self.assertIsNotNone(out.xml.get("mut"))
|
||||
|
||||
self.assertIsNotNone(ET.tostring(out.xml, 'utf-8').decode('utf-8'))
|
||||
|
||||
def test_generate_declaration(self):
|
||||
out: ET.Element = self.ast_gen.generate_declaration()
|
||||
self.ast_gen.ast = ET.Element("block")
|
||||
self.ast_gen.current_ast_element = self.ast_gen.ast
|
||||
self.ast_gen.generate_declaration()
|
||||
|
||||
self.assertIsNotNone(out.find("variable"))
|
||||
self.assertIsNotNone(out.find("rhs"))
|
||||
self.assertIsNotNone(self.ast_gen.ast.find("declaration"))
|
||||
|
||||
def test_generate_operation(self):
|
||||
out: ET.Element = self.ast_gen.generate_operator()
|
||||
decl = self.ast_gen.ast.find("declaration")
|
||||
self.assertIsNotNone(decl.find("variable"))
|
||||
self.assertIsNotNone(decl.find("rhs"))
|
||||
|
||||
self.assertIsNotNone(out.find("lhs"))
|
||||
self.assertIsNotNone(out.find("rhs"))
|
||||
# print(ET.tostring(decl, 'utf-8').decode('utf-8'))
|
||||
|
||||
def test_generate_assignment(self):
|
||||
self.ast_gen.ast = ET.Element("block")
|
||||
self.ast_gen.current_ast_element = self.ast_gen.ast
|
||||
self.ast_gen.generate_declaration()
|
||||
self.ast_gen.generate_assignment()
|
||||
|
||||
self.assertIsNotNone(self.ast_gen.ast.find("assignment"))
|
||||
|
||||
decl = self.ast_gen.ast.find("assignment")
|
||||
|
||||
# print(ET.tostring(decl, 'utf-8').decode('utf-8'))
|
||||
|
||||
self.assertIsNotNone(decl.find("variable"))
|
||||
self.assertIsNotNone(decl.find("rhs"))
|
||||
|
||||
def test_generate_bin_operation(self):
|
||||
self.ast_gen.ast = ET.Element("block")
|
||||
self.ast_gen.current_ast_element = self.ast_gen.ast
|
||||
self.ast_gen.generate_binary('+', 'int')
|
||||
|
||||
self.assertIsNotNone(self.ast_gen.ast.find("operator"))
|
||||
operator = self.ast_gen.ast.find("operator")
|
||||
self.assertEqual('+', operator.get("op"))
|
||||
self.assertEqual('int', operator.get("type"))
|
||||
|
||||
def test_generate_unary_operation(self):
|
||||
self.ast_gen.ast = ET.Element("block")
|
||||
self.ast_gen.current_ast_element = self.ast_gen.ast
|
||||
self.ast_gen.generate_unary('-', 'int')
|
||||
|
||||
self.assertIsNotNone(self.ast_gen.ast.find("unary"))
|
||||
operator = self.ast_gen.ast.find("unary")
|
||||
self.assertEqual('-', operator.get("op"))
|
||||
self.assertEqual('int', operator.get("type"))
|
||||
|
||||
def test_generate_stream(self):
|
||||
out: ET.Element = self.ast_gen.generate_stream()
|
||||
for type in ["std_input", "std_output"]:
|
||||
self.ast_gen.ast = ET.Element("block")
|
||||
self.ast_gen.current_ast_element = self.ast_gen.ast
|
||||
self.ast_gen.generate_in_stream()
|
||||
|
||||
self.assertIsNotNone(out.get("type"))
|
||||
self.assertIsNotNone(self.ast_gen.ast.find("stream"))
|
||||
in_stream = self.ast_gen.ast.find("stream")
|
||||
self.assertEqual("std_input", in_stream.get("type"))
|
||||
|
||||
def test_generate_routine(self):
|
||||
out: ET.Element = self.ast_gen.generate_routine()
|
||||
lad = None
|
||||
for child in in_stream.iter():
|
||||
lad = child.attrib
|
||||
|
||||
self.assertIsNotNone(out.find("block"))
|
||||
self.assertIsNotNone(lad)
|
||||
|
||||
def test_generate_block(self):
|
||||
out: ET.Element = self.ast_gen.generate_block()
|
||||
self.ast_gen.ast = ET.Element("block")
|
||||
self.ast_gen.current_ast_element = self.ast_gen.ast
|
||||
self.ast_gen.generate_block()
|
||||
|
||||
self.assertIsNotNone(self.ast_gen.ast.find("block"))
|
||||
|
||||
elem = None
|
||||
for child in self.ast_gen.ast.iter():
|
||||
elem = child.attrib
|
||||
self.assertIsNotNone(elem)
|
||||
|
||||
def test_generate_conditional(self):
|
||||
self.ast_gen.ast = ET.Element("block")
|
||||
self.ast_gen.current_ast_element = self.ast_gen.ast
|
||||
self.ast_gen.generate_conditional()
|
||||
|
||||
print(ET.tostring(self.ast_gen.ast, 'utf-8').decode('utf-8'))
|
||||
|
||||
self.assertIsNotNone(self.ast_gen.current_ast_element.find("conditional"))
|
||||
conditional = self.ast_gen.ast.find("conditional")
|
||||
|
||||
# print(ET.tostring(conditional, 'utf-8').decode('utf-8'))
|
||||
|
||||
self.assertIsNotNone(conditional.find("operator") or conditional.find("unary_operator") or conditional.find("literal"))
|
||||
|
||||
block = conditional.findall("block")
|
||||
self.assertEqual(2, len(block))
|
||||
|
||||
def test_generate_loop(self):
|
||||
self.ast_gen.ast = ET.Element("block")
|
||||
self.ast_gen.current_ast_element = self.ast_gen.ast
|
||||
self.ast_gen.generate_loop()
|
||||
|
||||
self.assertIsNotNone(self.ast_gen.ast.find("loop"))
|
||||
loop = self.ast_gen.ast.find("loop")
|
||||
|
||||
# print(ET.tostring(loop, 'utf-8').decode('utf-8'))
|
||||
|
||||
self.assertIsNotNone(loop.find("operator") or loop.find("unary_operator") or loop.find("literal"))
|
||||
|
||||
block = loop.findall("block")
|
||||
self.assertEqual(1, len(block))
|
||||
|
||||
|
||||
def test_generate_routine(self):
|
||||
self.ast_gen.ast = ET.Element("block")
|
||||
self.ast_gen.current_ast_element = self.ast_gen.ast
|
||||
self.ast_gen.generate_routine()
|
||||
|
||||
self.assertIsNotNone(self.ast_gen.ast.find("procedure") or self.ast_gen.ast.find("function"))
|
||||
routine = self.ast_gen.ast.find("procedure") or self.ast_gen.ast.find("function")
|
||||
|
||||
print(ET.tostring(routine, 'utf-8').decode('utf-8'))
|
||||
|
||||
self.assertIsNotNone(routine.find("block"))
|
||||
self.assertIsNotNone(routine.find("argument"))
|
||||
|
||||
def test_generate_function_ASSERT_RETURNS(self):
|
||||
self.ast_gen.ast = ET.Element("block")
|
||||
self.ast_gen.current_ast_element = self.ast_gen.ast
|
||||
self.ast_gen.generate_routine(routine_type="function")
|
||||
|
||||
self.assertIsNotNone(self.ast_gen.ast.find("function"))
|
||||
routine = self.ast_gen.ast.find("function")
|
||||
|
||||
print(ET.tostring(routine, 'utf-8').decode('utf-8'))
|
||||
|
||||
self.assertIsNotNone(routine.find("block"))
|
||||
self.assertIsNotNone(routine.find("argument"))
|
||||
|
||||
block = routine.find("block")
|
||||
print(ET.tostring(block, 'utf-8').decode('utf-8'))
|
||||
rets = block.find("return")
|
||||
print(rets)
|
||||
self.assertLess(0, len(rets))
|
||||
self.assertTrue(reachable_return(block))
|
||||
|
||||
self.assertIsNotNone(out)
|
||||
|
||||
def test_generate_main(self):
|
||||
out: ET.Element = self.ast_gen.generate_main_routine()
|
||||
self.ast_gen.ast = ET.Element("block")
|
||||
self.ast_gen.current_ast_element = self.ast_gen.ast
|
||||
self.ast_gen.generate_main()
|
||||
|
||||
self.assertIsNotNone(self.ast_gen.ast.find("procedure"))
|
||||
out = self.ast_gen.ast.find("procedure")
|
||||
|
||||
print(ET.tostring(out, 'utf-8').decode('utf-8'))
|
||||
|
||||
self.assertIsNotNone(out)
|
||||
self.assertEqual("main", out.get("name"))
|
||||
self.assertEqual("int", out.get("return_type"))
|
||||
|
||||
self.assertIsNotNone(out.find("block"))
|
||||
block = out.find("block")
|
||||
self.assertTrue(reachable_return(block))
|
||||
|
||||
self.assertIsNone(out.find("argument"))
|
||||
|
||||
def test_generate_ast(self):
|
||||
out: ET.Element = self.ast_gen.generate_ast()
|
||||
|
||||
self.assertIsNotNone(out)
|
||||
|
||||
has_main = False
|
||||
for child in out:
|
||||
if child.get("name") == "main":
|
||||
has_main = True
|
||||
break
|
||||
|
||||
self.assertTrue(has_main)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class TestRoutines(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.ast_gen = AstGenerator(GAZPREA_TOP_LEVEL, json.loads("{}"))
|
||||
|
||||
def test_populate_function(self):
|
||||
with open("xml/routine_1.xml", 'r') as i:
|
||||
input_elemnet = ET.fromstring(i.read())
|
||||
out: ET.Element = self.ast_gen.populate_routine(input_elemnet, 'int', 'blahaj')
|
||||
|
||||
self.assertEqual('int', out.get("return_type"))
|
||||
self.assertEqual('int', out.find("block").find("return").get("type"))
|
||||
|
||||
def test_generate_return(self):
|
||||
out: ET.Element = self.ast_gen.generate_return("int")
|
||||
|
||||
self.assertEqual('int', out.get("type"))
|
||||
|
||||
def test_populate_typed_procedure(self):
|
||||
with open("xml/routine_2.xml", 'r') as i:
|
||||
input_elemnet = ET.fromstring(i.read())
|
||||
out: ET.Element = self.ast_gen.populate_routine(input_elemnet, 'int', 'blahaj')
|
||||
|
||||
self.assertEqual('int', out.get("return_type"))
|
||||
self.assertEqual('int', out.find("block").find("return").get("type"))
|
||||
|
||||
def test_populate_void_procedure(self):
|
||||
with open("xml/routine_3.xml", 'r') as i:
|
||||
input_elemnet = ET.fromstring(i.read())
|
||||
out: ET.Element = self.ast_gen.populate_routine(input_elemnet, 'void', 'blahaj')
|
||||
|
||||
self.assertEqual('void', out.get("return_type"))
|
||||
self.assertIsNone(out.find("block").find("return"))
|
||||
|
||||
def test_populate_void_procedure_with_return(self):
|
||||
with open("xml/routine_2.xml", 'r') as i:
|
||||
input_elemnet = ET.fromstring(i.read())
|
||||
out: ET.Element = self.ast_gen.populate_routine(input_elemnet, 'void', 'blahaj')
|
||||
|
||||
self.assertEqual('void', out.get("return_type"))
|
||||
self.assertIsNotNone(out.find("block").find("return").get("type"))
|
||||
self.assertEqual('void', out.find("block").find("return").get("type"))
|
||||
|
||||
|
||||
def test_populate_nonvoid_procedure_without_return(self):
|
||||
with open("xml/routine_3.xml", 'r') as i:
|
||||
input_elemnet = ET.fromstring(i.read())
|
||||
out: ET.Element = self.ast_gen.populate_routine(input_elemnet, 'int', 'blahaj')
|
||||
|
||||
self.assertEqual('int', out.get("return_type"))
|
||||
self.assertIsNotNone(out.find("block").find("return"))
|
||||
self.assertEqual('int', out.find("block").find("return").get("type"))
|
||||
|
||||
|
||||
|
||||
|
||||
# def test_populate_assignment(self):
|
||||
# pass
|
||||
#
|
||||
# def test_populate_top_block(self):
|
||||
# pass
|
||||
|
||||
|
||||
# def test_populate_call(self):
|
||||
# pass
|
||||
|
||||
|
||||
|
||||
|
||||
class TestValues(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.ast_gen = AstGenerator(GAZPREA_TOP_LEVEL ,json.loads("{}"))
|
||||
|
||||
def test_populate_int(self):
|
||||
with open("xml/literal_1.xml", 'r') as i:
|
||||
input = i.read()
|
||||
input_element = ET.fromstring(input)
|
||||
out: ET.Element = self.ast_gen.populate_literal(input_element, "int", 42)
|
||||
|
||||
self.assertEqual("int", out.get("type"))
|
||||
self.assertEqual("42", str(out.get("value")))
|
||||
|
||||
def test_populate_var(self):
|
||||
with open("xml/variable_1.xml", 'r') as i:
|
||||
input = i.read()
|
||||
input_element = ET.fromstring(input)
|
||||
out: ET.Element = self.ast_gen.populate_variable(input_element, "var", "float", "bean")
|
||||
|
||||
self.assertEqual("var", out.get("mut"))
|
||||
self.assertEqual("float", out.get("type"))
|
||||
self.assertEqual("bean", out.get("name"))
|
||||
|
||||
def test_populate_operator(self):
|
||||
with open("xml/operator_1.xml", 'r') as i:
|
||||
input = i.read()
|
||||
input_element = ET.fromstring(input)
|
||||
out: ET.Element = self.ast_gen.populate_operator(input_element, '*', 'int')
|
||||
inner_var = out.find("type")
|
||||
|
||||
self.assertEqual("int", out.get("type"))
|
||||
self.assertEqual("*", out.get("op"))
|
||||
self.assertEqual("int", out.find("lhs").find("variable").get("type"))
|
||||
self.assertEqual("int", out.find("rhs").find("literal").get("type"))
|
||||
|
||||
def test_populate_stream(self):
|
||||
with open("xml/stream_1.xml", 'r') as i:
|
||||
input_element = ET.fromstring(i.read())
|
||||
out: ET.Element = self.ast_gen.populate_stream(input_element, "std_output")
|
||||
|
||||
self.assertEqual("std_output", out.get("type"))
|
||||
self.ast_gen.generate_ast()
|
||||
|
||||
self.assertIsNotNone(self.ast_gen.ast)
|
||||
|
||||
print(ET.tostring(self.ast_gen.ast, 'utf-8').decode('utf-8'))
|
||||
|
||||
procedures = self.ast_gen.ast.findall("procedure")
|
||||
self.assertLess(0, len(procedures))
|
||||
|
||||
main = False
|
||||
for proc in procedures:
|
||||
if proc.get("name") == "main":
|
||||
main = True
|
||||
self.assertTrue(main)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
with open("config.yaml", 'r') as stream:
|
||||
props = yaml.safe_load(stream)
|
||||
ast_gen = AstGenerator(props)
|
||||
|
||||
for a in range(20):
|
||||
ast_gen.generate_ast()
|
||||
ast = ast_gen.ast
|
||||
|
||||
with open(f"xml/ast{a}.xml", 'x') as t:
|
||||
dom = xml.dom.minidom.parseString(ET.tostring(ast).decode('utf-8'))
|
||||
pretty: str = dom.toprettyxml()
|
||||
repretty = ""
|
||||
for line in pretty.split('\n'):
|
||||
if line.startswith("<?xml"):
|
||||
pass
|
||||
else:
|
||||
repretty += (line + '\n')
|
||||
|
||||
t.write(repretty)
|
||||
|
|
10
ast_generator/test/test_scope.py
Normal file
10
ast_generator/test/test_scope.py
Normal file
|
@ -0,0 +1,10 @@
|
|||
import unittest
|
||||
|
||||
|
||||
class MyTestCase(unittest.TestCase):
|
||||
def test_something(self):
|
||||
self.assertEqual(True, False) # add assertion here
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -144,7 +144,7 @@ class AstParser:
|
|||
for child in node:
|
||||
self._unparse_node(child)
|
||||
|
||||
# self.input += "}\n\n" #blocks are already there
|
||||
# cls.input += "}\n\n" #blocks are already there
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
110
config.yaml
110
config.yaml
|
@ -3,68 +3,92 @@
|
|||
generation-options:
|
||||
max-nesting-depth: 5 # maximum nesting depth for statements
|
||||
max-conditionals-loops: 5 # maximum number of loops/conditionals per routine
|
||||
max-number-of-routines: 5 # maximum number of routines (main will always be generated)
|
||||
generate-dead-code: True # generate dead code
|
||||
properties:
|
||||
max-range-length: 5 # maximum length of ranges, vectors and tuples, (axa matrices can exist)
|
||||
max-range-length: 5 # maximum length of ranges, vectors and tuples, (AxA matrices can exist)
|
||||
use-english-words: True # use english words instead of random names (this may limit the maximum number of names)
|
||||
id-length: # length of identifiers
|
||||
min: 1
|
||||
max: 10
|
||||
function-name-length: # length of function names
|
||||
min: 1
|
||||
max: 10
|
||||
number-of-arguments: # number of arguments to a routine
|
||||
min: 1
|
||||
max: 10
|
||||
generate-max-int: True # if False, generate integers between [-1000, 1000] else
|
||||
expression-weights: # weights for expressions
|
||||
# the higher a weight, the more likely (0, +inf)
|
||||
# the higher a weight, the more likely (0, 10000), 0 to exclude, 10000 for only that
|
||||
brackets: 10
|
||||
|
||||
# arithmetic
|
||||
addition: 80
|
||||
subtraction: 80
|
||||
multiplication: 30
|
||||
division: 10
|
||||
modulo: 10
|
||||
power: 5
|
||||
negation: 20
|
||||
arithmetic:
|
||||
addition: 80
|
||||
subtraction: 80
|
||||
multiplication: 30
|
||||
division: 10
|
||||
modulo: 10
|
||||
power: 5
|
||||
|
||||
# comparison
|
||||
equality: 50
|
||||
inequality: 50
|
||||
less-than: 30
|
||||
greater-than: 30
|
||||
less-than-or-equal: 10
|
||||
greater-than-or-equal: 10
|
||||
comparison:
|
||||
equality: 50
|
||||
inequality: 50
|
||||
less-than: 30
|
||||
greater-than: 30
|
||||
less-than-or-equal: 10
|
||||
greater-than-or-equal: 10
|
||||
|
||||
# logical
|
||||
and: 50
|
||||
or: 50
|
||||
not: 10
|
||||
xor: 10
|
||||
logical:
|
||||
and: 50
|
||||
or: 50
|
||||
xor: 10
|
||||
|
||||
# vector/string
|
||||
generator: 20
|
||||
range: 30
|
||||
filter: 10
|
||||
reverse: 10
|
||||
concatenation: 50
|
||||
vector-or-string:
|
||||
generator: 20
|
||||
range: 30
|
||||
filter: 10
|
||||
reverse: 10
|
||||
concatenation: 50
|
||||
|
||||
unary:
|
||||
noop: 10
|
||||
negation: 20
|
||||
not: 10
|
||||
|
||||
|
||||
statement-weights: # set to 0 for any statements you wish to exclude
|
||||
variable-declaration:
|
||||
int-declaration: 50
|
||||
float-declaration: 50
|
||||
char-declaration: 50
|
||||
string-declaration: 50
|
||||
bool-declaration: 50
|
||||
vector-declaration: 20
|
||||
tuple-declaration: 10
|
||||
matrix-declaration: 10
|
||||
function-call: 20
|
||||
statement-weights: # set to 0 for any statements you wish to exclude
|
||||
variable-declaration: 50
|
||||
routine-call: 20
|
||||
conditional: 30
|
||||
loop: 20
|
||||
assignment: 40
|
||||
print: 20
|
||||
input: 5
|
||||
out-stream: 20
|
||||
in-stream: 5
|
||||
|
||||
return: 5 # probability for a return statement to be placed arbitrarily in the middle of a generated procedure
|
||||
type-weights:
|
||||
value-types:
|
||||
integer: 50
|
||||
real: 50
|
||||
boolean: 50
|
||||
character: 50
|
||||
void: 10
|
||||
composite-types:
|
||||
vector: 20
|
||||
tuple: 5
|
||||
matrix: 10
|
||||
string: 10
|
||||
composite: 0 #TODO add support for composite types
|
||||
atomic: 40
|
||||
|
||||
block-termination-probability: 0.2 # probability for a block to terminate
|
||||
routine-weights:
|
||||
procedure: 20
|
||||
function: 50
|
||||
|
||||
misc-weights:
|
||||
type-qualifier-weights:
|
||||
const: 10
|
||||
var: 60
|
||||
|
||||
block-termination-probability: 0.2 # probability for a block to terminate
|
||||
|
||||
|
||||
|
|
16
fuzzer.py
16
fuzzer.py
|
@ -1,3 +1,15 @@
|
|||
import yaml
|
||||
import ast_parser
|
||||
import ast_generator
|
||||
|
||||
class GazpreaFuzzer:
|
||||
def __init__(self, config: str, ):
|
||||
pass
|
||||
def __init__(self, config: str):
|
||||
# parse a yaml config file with path in config
|
||||
# and set the appropriate parameters
|
||||
with open(config) as yaml_file:
|
||||
settings: dict = yaml.safe_load(yaml_file)
|
||||
self.settings = settings
|
||||
|
||||
self.parser = None
|
||||
self.generator = ast_generator.AstGenerator()
|
||||
|
||||
|
|
Loading…
Reference in a new issue