gazprea-fuzzer-python/ast_generator/ast_generator.py

714 lines
24 KiB
Python
Raw Normal View History

2023-11-17 16:57:53 -07:00
import random
import string
import xml.etree.ElementTree as ET
2023-11-17 16:57:53 -07:00
from english_words import get_english_words_set
2023-11-17 16:57:53 -07:00
from constants import *
2023-11-17 16:57:53 -07:00
class AstGenerator:
"""
Generates an AST from a grammar based on given settings
2023-11-17 16:57:53 -07:00
Originally the intention was to use the ISLa library to generate
the AST, however I found that ISLa is like taking a buldozer to
a sledgehammer's job, so I decided to write a procedural generator
instead.
2023-11-17 16:57:53 -07:00
The way we select elements is we take all the settings in their
category and assign them a range on a number line. Then we
pick a random number in that range and whichever category it
falls into will be selected.
"""
2023-11-17 16:57:53 -07:00
def __init__(self, settings: dict):
"""
This class is designed to get the settings from some wrapper class that
better defines the precise constraints of the language being generated
2023-11-17 16:57:53 -07:00
the necessary settings are in the .yaml file and #TODO this is not generalizable yet
2023-11-17 16:57:53 -07:00
@param settings: settings for weights and probabilities and lengths
"""
self.settings = settings
2023-11-17 16:57:53 -07:00
self.symbol_table = [] # TODO this should be a list of scopes
global_scope = Scope(None, None)
self.symbol_table.append(global_scope) # NOTE for debug
self.current_scope = global_scope
2023-11-17 16:57:53 -07:00
names = get_english_words_set(['web2'], lower=True)
possible_names = filter(lambda x: self.settings['properties']['id-length']['min'] < len(x)
< self.settings['properties']['id-length']['max'], names)
2023-11-17 16:57:53 -07:00
var_name_len = len(list(possible_names))
var_name_list = list(possible_names)
self.variable_names = var_name_list[0:var_name_len // 2]
self.routine_names = var_name_list[var_name_len // 2:var_name_len]
2023-11-17 16:57:53 -07:00
self.ast: ET.Element or None = None
self.current_ast_element: ET.Element or None = None
self.current_nesting_depth = 0
self.current_control_flow_nesting_depth = 0
2023-11-17 16:57:53 -07:00
def generate_ast(self):
"""
@brief generates an AST from a grammar
"""
self.generate_top_level_block()
def generate_top_level_block(self): # TODO add constant generation into this block
i = 0
element = build_xml_element([], name=GAZ_BLOCK_TAG)
self.current_ast_element = element
self.ast = element
# optional constants here too
self.generate_main()
while i < self.settings['generation-options']['max-number-of-routines']:
if random.random() < self.settings['block-termination-probability']:
break
self.generate_routine()
i += 1
def generate_main(self):
parent = self.current_ast_element
self.push_scope()
main_args = [ # TODO refactor these into constants
("name", "main"),
("return_type", GAZ_INT_KEY),
("args", "()"),
]
element = build_xml_element(main_args, name=GAZ_PROCEDURE_TAG)
self.current_ast_element.append(element)
self.current_ast_element = element
self.generate_block(return_stmt=True, return_value="0", return_type=GAZ_INT_KEY)
self.pop_scope()
self.current_ast_element = parent
def generate_block(self, tag=None, return_stmt=False, return_value=None, return_type=None):
if tag is None:
tag = []
parent = self.current_ast_element
self.push_scope()
element = build_xml_element(tag, name=GAZ_BLOCK_TAG)
self.current_ast_element.append(element)
self.current_ast_element = element
self.generate_statements()
if return_stmt:
self.generate_return(return_type=return_type, return_value=return_value)
if self.settings['generation-options']['generate-dead-code']:
self.generate_statements()
self.pop_scope()
self.current_ast_element = parent
def generate_return(self, return_type=None, return_value=None):
if return_type is None or return_type == GAZ_VOID_TYPE:
self.current_ast_element.append(build_xml_element([], name=GAZ_RETURN_TAG))
return
else:
if return_value is None:
xml_element = build_xml_element([("type", return_type)], name=GAZ_RETURN_TAG)
self.current_ast_element.append(xml_element)
parent = self.current_ast_element
self.current_ast_element = xml_element
self.generate_expression(return_type)
self.current_ast_element = parent
return
else:
xml_element = build_xml_element([("type", return_type)], name=GAZ_RETURN_TAG)
self.current_ast_element.append(xml_element)
parent = self.current_ast_element
self.current_ast_element = xml_element
self.current_ast_element.append(self.make_literal(return_type, return_value))
self.current_ast_element = parent
return
def generate_routine(self, routine_type=None):
if routine_type is None:
routine_type = self.get_routine_type()
else:
routine_type = routine_type
args = self.generate_routine_args()
name = self.get_name(routine_type)
return_type = self.get_type(routine_type)
routine = Routine(name, routine_type, return_type, args)
routine_args = [
("name", routine.name),
("return_type", routine.return_type),
]
element = build_xml_element(routine_args, name=routine.type)
self.current_ast_element.append(element)
parent = self.current_ast_element
self.current_ast_element = element
self.push_scope()
self.define_args(routine.arguments)
self.generate_block(return_stmt=True, return_type=routine.return_type)
self.pop_scope()
self.current_ast_element = parent
def define_args(self, args):
for arg in args:
self.current_ast_element.append(arg.xml)
self.current_scope.append(arg.name, arg)
def generate_statements(self):
# Number line
number_line = 180 #TODO fix the numberline stuff to reflect the settings
cutoffs = [10, 30, 50, 80, 100, 140, 180]
options = {
0: self.generate_declaration,
1: self.generate_routine_call,
2: self.generate_conditional,
3: self.generate_loop,
4: self.generate_assignment,
5: self.generate_out_stream,
6: self.generate_in_stream,
}
while True:
if random.random() < self.settings['block-termination-probability']:
break
a = random.randint(0, number_line)
i = 0
for i in range(len(cutoffs) - 1):
if cutoffs[i] < a < cutoffs[i + 1]:
try:
options[i]()
except ValueError:
break
break
def generate_int_real_expr(self):
# Number line
number_line = 100
cutoffs = [10, 30, 50, 80, 100]
options = { #TODO add brackets
0: "addition",
1: "subtraction",
2: "multiplication",
3: "division",
4: "modulo",
5: "power",
6: "negation",
7: "noop",
8: "equality",
9: "inequality",
10: "less-than",
11: "greater-than",
12: "less-than-or-equal",
13: "greater-than-or-equal",
}
unary = ["negation", "noop"]
self._generate_expression([GAZ_INT_KEY, GAZ_FLOAT_KEY], number_line, cutoffs, options, unary)
def generate_bool_expr(self):
# Number line
number_line = 100
cutoffs = [10, 30, 50, 80, 100]
options = { #TODO add brackets # TODO cannot guarantee correctness of comparison since booleans may appear
0: "equality",
1: "inequality",
2: "less-than",
3: "greater-than",
4: "less-than-or-equal",
5: "greater-than-or-equal",
6: "and",
7: "or",
8: "xor",
9: "not",
}
unary = ["not"]
self._generate_expression([GAZ_BOOL_KEY], number_line, cutoffs, options, unary)
def _generate_expression(self, expr_type: list[str], number_line, cutoffs, options, unary=None):
if unary is None:
unary = []
parent = self.current_ast_element
self.current_nesting_depth += 1
if self.current_nesting_depth > self.settings['generation-options']['max-nesting-depth'] or random.random() < \
self.settings['block-termination-probability']:
self.generate_literal(random.choice(expr_type)) # TODO add the reals
self.current_nesting_depth -= 1
return
op = ""
a = random.randint(0, number_line - 1)
i = 0
for i in range(len(cutoffs) - 1):
if i == 0:
if a < cutoffs[i]:
op = options[i]
break
if cutoffs[i] <= a < cutoffs[i + 1]:
op = options[i]
break
if op in unary:
self.generate_unary(op, random.choice(expr_type))
else:
self.generate_binary(op, random.choice(expr_type))
self.current_nesting_depth -= 1
self.current_ast_element = parent
def generate_declaration(self, mut=None):
parent = self.current_ast_element
decl_type = self.get_type(GAZ_VAR_TAG)
decl_args = [
("type", decl_type),
]
element = build_xml_element(decl_args, name=GAZ_DECLARATION_TAG)
self.current_ast_element.append(element)
self.current_ast_element = element
variable = self.generate_variable(decl_type, mut=mut)
self.current_ast_element.append(variable.xml)
self.current_scope.append(variable.name, variable)
self.generate_xhs(GAZ_RHS_TAG, decl_type) # TODO add real type (decl_type)
self.current_ast_element = parent
def generate_binary(self, op, op_type):
parent = self.current_ast_element
if op == "":
raise ValueError("op is empty!")
args = [
("op", op),
("type", op_type),
]
element = build_xml_element(args, name=GAZ_OPERATOR_TAG)
self.current_ast_element.append(element)
self.current_ast_element = element
self.generate_xhs(GAZ_LHS_TAG, op_type)
self.generate_xhs(GAZ_RHS_TAG, op_type)
self.current_ast_element = parent
def generate_xhs(self, handedness, op_type):
element = build_xml_element([], name=handedness)
parent = self.current_ast_element
self.current_ast_element.append(element)
self.current_ast_element = element
self.generate_expression(op_type)
self.current_ast_element = parent
def generate_unary(self, op, op_type=ANY_TYPE):
parent = self.current_ast_element
args = [
("op", op),
("type", op_type),
]
element = build_xml_element(args, name=GAZ_UNARY_OPERATOR_TAG)
self.current_ast_element.append(element)
self.current_ast_element = element
self.generate_xhs(GAZ_RHS_TAG, op_type)
self.current_ast_element = parent
def generate_routine_call(self):
pass
2023-11-17 16:57:53 -07:00
def generate_conditional(self):
if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']:
return
2023-11-17 16:57:53 -07:00
if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings['block-termination-probability']:
return
2023-11-17 16:57:53 -07:00
element = build_xml_element([], name=GAZ_IF_TAG)
self.current_ast_element.append(element)
parent = self.current_ast_element
self.current_ast_element = element
2023-11-17 16:57:53 -07:00
self.current_control_flow_nesting_depth += 1
2023-11-17 16:57:53 -07:00
self.push_scope()
2023-11-17 16:57:53 -07:00
self.generate_expression(GAZ_BOOL_KEY)
2023-11-17 16:57:53 -07:00
self.generate_block(tag=[("type", GAZ_TRUE_BLOCK_TAG)])
self.generate_block(tag=[("type", GAZ_FALSE_BLOCK_TAG)])
2023-11-17 16:57:53 -07:00
self.pop_scope()
self.current_ast_element = parent
2023-11-17 16:57:53 -07:00
def generate_loop(self):
if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']:
return
2023-11-17 16:57:53 -07:00
if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings['block-termination-probability']:
return
2023-11-17 16:57:53 -07:00
parent = self.current_ast_element
element = build_xml_element([], name=GAZ_LOOP_TAG)
self.current_ast_element.append(element)
self.current_ast_element = element
2023-11-17 16:57:53 -07:00
self.current_control_flow_nesting_depth += 1
self.push_scope()
self.generate_expression(GAZ_BOOL_KEY)
self.generate_block()
self.pop_scope()
self.current_ast_element = parent
2023-11-17 16:57:53 -07:00
def generate_assignment(self):
possible_vars = self.current_scope.get_all_defined_mutable_vars()
if len(possible_vars) == 0:
raise ValueError("No possible variables to assign to!")
# same structure as a declaration
parent = self.current_ast_element
2023-11-17 16:57:53 -07:00
element = build_xml_element([], name=GAZ_ASSIGNMENT_TAG)
self.current_ast_element.append(element)
self.current_ast_element = element
2023-11-17 16:57:53 -07:00
variable = random.choice(possible_vars)
2023-11-17 16:57:53 -07:00
self.current_ast_element.append(variable.xml)
self.generate_xhs(GAZ_RHS_TAG, variable.type)
2023-11-17 16:57:53 -07:00
self.current_ast_element = parent
2023-11-17 16:57:53 -07:00
def generate_out_stream(self):
self.generate_stream(GAZ_OUT_STREAM)
2023-11-17 16:57:53 -07:00
def generate_in_stream(self):
self.generate_stream(GAZ_IN_STREAM)
2023-11-17 16:57:53 -07:00
def generate_stream(self, stream_type):
parent = self.current_ast_element
args = [
("type", stream_type),
]
element = build_xml_element(args, name=GAZ_STREAM_TAG)
self.current_ast_element.append(element)
self.current_ast_element = element
2023-11-17 16:57:53 -07:00
self.generate_expression(ANY_TYPE)
2023-11-17 16:57:53 -07:00
self.current_ast_element = parent
2023-11-17 16:57:53 -07:00
def generate_variable(self, var_type: str, mut=None):
if mut is None:
return Variable(self.get_name(GAZ_VAR_TAG), var_type, self.get_qualifier())
else:
return Variable(self.get_name(GAZ_VAR_TAG), var_type, mut)
2023-11-17 16:57:53 -07:00
def generate_literal(self, var_type: str):
args = [
("type", var_type),
("value", str(self.get_value(var_type))),
]
element = build_xml_element(args, name=GAZ_LIT_TAG)
self.current_ast_element.append(element)
def make_literal(self, type, value):
args = [
("type", type),
("value", value),
]
element = build_xml_element(args, name=GAZ_LIT_TAG)
return element
def generate_global(self):
current_scope = self.current_scope
current_element = self.current_ast_element
2023-11-17 16:57:53 -07:00
self.current_scope = self.current_scope.get_top_scope()
self.current_ast_element = self.ast
2023-11-17 16:57:53 -07:00
self.generate_declaration()
2023-11-17 16:57:53 -07:00
self.current_scope = current_scope
self.current_ast_element = current_element
def generate_expression(self, expr_type: str):
if expr_type == GAZ_INT_KEY or expr_type == GAZ_FLOAT_KEY:
self.generate_int_real_expr()
elif expr_type == GAZ_BOOL_KEY:
self.generate_bool_expr()
elif expr_type == ANY_TYPE: # TODO implement the choice of any type
self.generate_int_real_expr()
else:
raise NotImplementedError(f"Expression type {expr_type} not implemented")
def generate_routine_args(self):
number = random.randint(self.settings['properties']['number-of-arguments']['min'],
self.settings['properties']['number-of-arguments']['max'])
args = []
for i in range(number):
arg = self.generate_arg()
args.append(arg)
self.current_scope.append(arg.name, arg)
return args
def generate_arg(self):
return Argument(self.get_name(GAZ_VAR_TAG), self.get_type(GAZ_VAR_TAG))
def push_scope(self, xml_element: ET.Element = None):
scope = Scope(self.current_scope)
self.symbol_table.append(scope)
self.current_scope = scope
def pop_scope(self):
self.current_scope = self.current_scope.enclosing_scope
# TODO revamp the random value generations
def get_qualifier(self):
2023-11-17 16:57:53 -07:00
"""
@brief get a random qualifier from the list of possible qualifiers
@return a qualifier as a string
2023-11-17 16:57:53 -07:00
"""
number_line = (self.settings["misc-weights"]["type-qualifier-weights"]["const"] +
self.settings["misc-weights"]["type-qualifier-weights"]["var"] -1 )
res = random.randint(0, number_line)
if res in range(0, self.settings["misc-weights"]["type-qualifier-weights"]["const"]):
return 'const'
elif res in range(self.settings["misc-weights"]["type-qualifier-weights"]["const"],
self.settings["misc-weights"]["type-qualifier-weights"]["const"] +
self.settings["misc-weights"]["type-qualifier-weights"]["var"]):
return 'var'
2023-11-17 16:57:53 -07:00
else:
raise ValueError("Internal Error, please report the stack trace to me")
def get_routine_type(self):
cutoffs = []
values = []
ops = []
for key, value in self.settings["routine-weights"].items():
cutoffs.append(value + sum(cutoffs))
values.append(value)
ops.append(key)
res = random.randint(0, sum(values))
for i in range(len(cutoffs)):
if res < cutoffs[i]:
return ops[i] # TODO everything should be fast faied
def get_value(self, type):
if type == GAZ_INT_KEY:
if self.settings["properties"]["generate-max-int"]:
return random.randint(-2147483648, 2147483647)
else:
return random.randint(-1000, 1000)
elif type == GAZ_FLOAT_KEY:
return random.uniform(-1000, 1000)
elif type == GAZ_BOOL_KEY:
return random.choice([True, False])
else:
raise TypeError("Unimplemented generator for type: " + type)
def get_name(self, name_type):
"""
@brief get a random name from the list of possible names and add it to the current scope
@param name_type:
@return:
"""
length = random.randint(self.settings['properties']['id-length']['min'],
self.settings['properties']['id-length']['max'])
name = ''.join(random.choices(string.ascii_letters, k=length))
return name
def get_op(self, type):
if type == GAZ_INT_KEY:
cutoffs = []
values = []
ops = []
for key, value in self.settings["expression-weights"]["arithmetic"]:
cutoffs.append(value + sum(cutoffs))
values.append(value)
ops.append(get_op(key))
res = random.randint(0, sum(values))
for i in range(len(cutoffs)):
if res < cutoffs[i]:
return ops[i]
def get_type(self, tag): # TODO Add support for composite types
return 'int' # TODO Add support for all types
if tag in [GAZ_PROCEDURE_TAG, GAZ_FUNCTION_TAG, GAZ_VAR_TAG]:
cutoffs = []
values = []
types = []
for key, value in self.settings["type-weights"]["value-types"].items():
if key == GAZ_VOID_TYPE and tag != GAZ_PROCEDURE_TAG:
continue
cutoffs.append(value + sum(cutoffs))
values.append(value)
types.append(key)
res = random.randint(0, sum(values))
for i in range(len(cutoffs)):
if res < cutoffs[i]:
return types[i]
class Variable:
def __init__(self, name: str, type: str, qualifier: str, value: any = None):
self.name = name
self.type = type
self.value = value
self.qualifier = qualifier
self.xml = self._build_xml()
def _build_xml(self):
args = [
('name', self.name),
('type', self.type),
('mut', self.qualifier),
]
return build_xml_element(args, name=GAZ_VAR_TAG)
class Argument:
def __init__(self, name: str, type: str):
self.name = name
2023-11-17 16:57:53 -07:00
self.type = type
self.xml = self._build_xml()
2023-11-17 16:57:53 -07:00
def __str__(self):
return self.type + " " + self.name
2023-11-17 16:57:53 -07:00
def _build_xml(self):
args = [
('name', self.name),
('type', self.type),
]
return build_xml_element(args, name=GAZ_ARG_TAG)
2023-11-17 16:57:53 -07:00
class Routine:
def __init__(self, name: str, type: str, return_type: str, args: list[Argument], xml: ET.Element = None):
self.name = name
self.type = type
self.return_type = return_type
self.arguments = args
self.xml = xml
self.xml = xml
2023-11-17 16:57:53 -07:00
class Scope:
def __init__(self, enclosing_scope, child_scope=None, associated_xml: ET.Element = None):
self.symbols = {}
self.enclosing_scope = enclosing_scope
self.child_scope = child_scope
self.xml = associated_xml
def resolve(self, name) -> ET.Element or None:
if name in self.symbols:
return self.symbols[name]
else:
return None
def append(self, name, item: Variable or Argument or Routine):
self.symbols[name] = item
def append_element(self, name, value: ET.Element):
self.symbols[name] = value
def set(self, name, value: ET.Element):
self.symbols[name] = value
def get_all_defined_mutable_vars(self) -> list[Variable]:
if self.enclosing_scope is None:
return self._get_mutable_vars()
else:
return self.enclosing_scope.get_all_defined_mutable_vars() + self._get_mutable_vars()
def _get_mutable_vars(self) -> list[Variable]:
mutable_vars = []
for name, var in self.symbols.items():
if not isinstance(var, Variable):
continue
if var.qualifier != 'const':
mutable_vars.append(self.symbols[name])
return mutable_vars
def get_top_scope(self):
if self.enclosing_scope is None:
return self
else:
return self.enclosing_scope.get_top_scope()
def build_xml_element(*keys, name):
elem = ET.Element(name)
for key in list(keys)[0]: # TODO refactor
elem.set(key[0], key[1])
return elem
def get_op(op):
if op == 'addition':
return '+'
elif op == 'subtraction':
return '-'
elif op == 'multiplication':
return '*'
elif op == 'division':
return '/'
elif op == 'modulo':
return '%'
elif op == 'power':
return '^'
elif op == 'or':
return 'or'
elif op == 'and':
return 'and'
elif op == 'equality':
return '=='
elif op == 'inequality':
return '!='
elif op == 'less-than':
return '<'
elif op == 'less-than-or-equal':
return '<='
elif op == 'greater-than':
return '>'
elif op == 'greater-than-or-equal':
return '>='
elif op == 'negation':
return '-'
elif op == 'not':
return 'not'
elif op == 'noop':
return '+'
elif op == 'concatenation':
return '||'
else:
raise Exception("Unknown operator: " + op)