gazprea-fuzzer-python/ast_generator/ast_generator.py
2023-11-18 20:13:15 -07:00

702 lines
23 KiB
Python

import random
import string
import xml.etree.ElementTree as ET
from english_words import get_english_words_set
from constants import *
class AstGenerator:
"""
Generates an AST from a grammar based on given settings
Originally the intention was to use the ISLa library to generate
the AST, however I found that ISLa is like taking a buldozer to
a sledgehammer's job, so I decided to write a procedural generator
instead.
The way we select elements is we take all the settings in their
category and assign them a range on a number line. Then we
pick a random number in that range and whichever category it
falls into will be selected.
"""
def __init__(self, settings: dict):
"""
This class is designed to get the settings from some wrapper class that
better defines the precise constraints of the language being generated
the necessary settings are in the .yaml file and #TODO this is not generalizable yet
@param settings: settings for weights and probabilities and lengths
"""
self.settings = settings
self.symbol_table = [] # TODO this should be a list of scopes
global_scope = Scope(None, None)
self.symbol_table.append(global_scope) # NOTE for debug
self.current_scope = global_scope
names = get_english_words_set(['web2'], lower=True)
possible_names = filter(lambda x: self.settings['properties']['id-length']['min'] < len(x)
< self.settings['properties']['id-length']['max'], names)
var_name_len = len(list(possible_names))
var_name_list = list(possible_names)
self.variable_names = var_name_list[0:var_name_len // 2]
self.routine_names = var_name_list[var_name_len // 2:var_name_len]
self.ast: ET.Element or None = None
self.current_ast_element: ET.Element or None = None
self.current_nesting_depth = 0
self.current_control_flow_nesting_depth = 0
def generate_ast(self):
"""
@brief generates an AST from a grammar
"""
self.generate_top_level_block()
def generate_top_level_block(self): # TODO add constant generation into this block
i = 0
element = build_xml_element([], name=GAZ_BLOCK_TAG)
self.current_ast_element = element
self.ast = element
# optional constants here too
self.generate_main()
while i < self.settings['generation-options']['max-number-of-routines']:
if random.random() < self.settings['block-termination-probability']:
break
self.generate_routine()
i += 1
def generate_main(self):
parent = self.current_ast_element
self.push_scope()
main_args = [ # TODO refactor these into constants
("name", "main"),
("return_type", GAZ_INT_KEY),
("args", "()"),
]
element = build_xml_element(main_args, name=GAZ_PROCEDURE_TAG)
self.current_ast_element.append(element)
self.current_ast_element = element
self.generate_block(return_stmt=True, return_value="0", return_type=GAZ_INT_KEY)
self.pop_scope()
self.current_ast_element = parent
def generate_block(self, tag=None, return_stmt=False, return_value=None, return_type=None):
if tag is None:
tag = []
parent = self.current_ast_element
self.push_scope()
element = build_xml_element(tag, name=GAZ_BLOCK_TAG)
self.current_ast_element.append(element)
self.current_ast_element = element
self.generate_statements()
if return_stmt:
self.generate_return(return_type=return_type, return_value=return_value)
if self.settings['generation-options']['generate-dead-code']:
self.generate_statements()
self.pop_scope()
self.current_ast_element = parent
def generate_return(self, return_type=None, return_value=None):
if return_type is None or return_type == GAZ_VOID_TYPE:
self.current_ast_element.append(build_xml_element([], name=GAZ_RETURN_TAG))
return
else:
if return_value is None:
xml_element = build_xml_element([("type", return_type)], name=GAZ_RETURN_TAG)
self.current_ast_element.append(xml_element)
parent = self.current_ast_element
self.current_ast_element = xml_element
self.generate_expression(return_type)
self.current_ast_element = parent
return
else:
xml_element = build_xml_element([("type", return_type)], name=GAZ_RETURN_TAG)
self.current_ast_element.append(xml_element)
parent = self.current_ast_element
self.current_ast_element = xml_element
self.current_ast_element.append(self.make_literal(return_type, return_value))
self.current_ast_element = parent
return
def generate_routine(self, routine_type=None):
if routine_type is None:
routine_type = self.get_routine_type()
else:
routine_type = routine_type
args = self.generate_routine_args()
name = self.get_name(routine_type)
return_type = self.get_type(routine_type)
routine = Routine(name, routine_type, return_type, args)
routine_args = [
("name", routine.name),
("return_type", routine.return_type),
]
element = build_xml_element(routine_args, name=routine.type)
self.current_ast_element.append(element)
parent = self.current_ast_element
self.current_ast_element = element
self.push_scope()
self.define_args(routine.arguments)
self.generate_block(return_stmt=True, return_type=routine.return_type)
self.pop_scope()
self.current_ast_element = parent
def define_args(self, args):
for arg in args:
self.current_ast_element.append(arg.xml)
self.current_scope.append(arg.name, arg)
def generate_statements(self):
# Number line
number_line = 180 #TODO fix the numberline stuff to reflect the settings
cutoffs = [10, 30, 50, 80, 100, 140, 180]
options = {
0: self.generate_declaration,
1: self.generate_routine_call,
2: self.generate_conditional,
3: self.generate_loop,
4: self.generate_assignment,
5: self.generate_out_stream,
6: self.generate_in_stream,
}
while True:
if random.random() < self.settings['block-termination-probability']:
break
a = random.randint(0, number_line)
i = 0
for i in range(len(cutoffs) - 1):
if cutoffs[i] < a < cutoffs[i + 1]:
options[i]()
break
def generate_int_real_expr(self):
# Number line
number_line = 100
cutoffs = [10, 30, 50, 80, 100]
options = { #TODO add brackets
0: "addition",
1: "subtraction",
2: "multiplication",
3: "division",
4: "modulo",
5: "power",
6: "negation",
7: "noop",
8: "equality",
9: "inequality",
10: "less-than",
11: "greater-than",
12: "less-than-or-equal",
13: "greater-than-or-equal",
}
unary = ["negation", "noop"]
self._generate_expression([GAZ_INT_KEY, GAZ_FLOAT_KEY], number_line, cutoffs, options, unary)
def generate_bool_expr(self):
# Number line
number_line = 100
cutoffs = [10, 30, 50, 80, 100]
options = { #TODO add brackets # TODO cannot guarantee correctness of comparison since booleans may appear
0: "equality",
1: "inequality",
2: "less-than",
3: "greater-than",
4: "less-than-or-equal",
5: "greater-than-or-equal",
6: "and",
7: "or",
8: "xor",
9: "not",
} # FIXME sometimes this will return a "" op, need to figure out why
unary = ["not"]
self._generate_expression([GAZ_BOOL_KEY], number_line, cutoffs, options, unary)
def _generate_expression(self, expr_type: list[str], number_line, cutoffs, options, unary=None):
if unary is None:
unary = []
parent = self.current_ast_element
self.current_nesting_depth += 1
if self.current_nesting_depth > self.settings['generation-options']['max-nesting-depth'] or random.random() < \
self.settings['block-termination-probability']:
self.generate_literal(random.choice(expr_type)) # TODO add the reals
self.current_nesting_depth -= 1
return
op = ""
a = random.randint(0, number_line)
i = 0
for i in range(len(cutoffs) - 1):
if cutoffs[i] < a < cutoffs[i + 1]:
op = options[i]
break
if op in unary:
self.generate_unary(op, random.choice(expr_type))
else:
self.generate_binary(op, random.choice(expr_type))
self.current_nesting_depth -= 1
self.current_ast_element = parent
def generate_declaration(self):
parent = self.current_ast_element
decl_type = self.get_type(GAZ_VAR_TAG)
decl_args = [
("type", decl_type),
]
element = build_xml_element(decl_args, name=GAZ_DECLARATION_TAG)
self.current_ast_element.append(element)
self.current_ast_element = element
variable = self.generate_variable(decl_type)
self.current_ast_element.append(variable.xml)
self.current_scope.append(variable.name, variable)
self.generate_xhs(GAZ_RHS_TAG, decl_type) # TODO add real type (decl_type)
self.current_ast_element = parent
def generate_binary(self, op, op_type):
parent = self.current_ast_element
args = [
("op", op),
("type", op_type),
]
element = build_xml_element(args, name=GAZ_OPERATOR_TAG)
self.current_ast_element.append(element)
self.current_ast_element = element
self.generate_xhs(GAZ_LHS_TAG, op_type)
self.generate_xhs(GAZ_RHS_TAG, op_type)
self.current_ast_element = parent
def generate_xhs(self, handedness, op_type):
element = build_xml_element([], name=handedness)
parent = self.current_ast_element
self.current_ast_element.append(element)
self.current_ast_element = element
self.generate_expression(op_type)
self.current_ast_element = parent
def generate_unary(self, op, op_type=ANY_TYPE):
parent = self.current_ast_element
args = [
("op", op),
("type", op_type),
]
element = build_xml_element(args, name=GAZ_UNARY_OPERATOR_TAG)
self.current_ast_element.append(element)
self.current_ast_element = element
self.generate_xhs(GAZ_RHS_TAG, op_type)
self.current_ast_element = parent
def generate_routine_call(self):
pass
def generate_conditional(self):
if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']:
return
if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings['block-termination-probability']:
return
element = build_xml_element([], name=GAZ_IF_TAG)
self.current_ast_element.append(element)
parent = self.current_ast_element
self.current_ast_element = element
self.current_control_flow_nesting_depth += 1
self.push_scope()
self.generate_expression(GAZ_BOOL_KEY)
self.generate_block(tag=[("type", GAZ_TRUE_BLOCK_TAG)])
self.generate_block(tag=[("type", GAZ_FALSE_BLOCK_TAG)])
self.pop_scope()
self.current_ast_element = parent
def generate_loop(self):
if self.current_control_flow_nesting_depth >= self.settings['generation-options']['max-nesting-depth']:
return
if self.current_control_flow_nesting_depth > 0 and random.random() < self.settings['block-termination-probability']:
return
parent = self.current_ast_element
element = build_xml_element([], name=GAZ_LOOP_TAG)
self.current_ast_element.append(element)
self.current_ast_element = element
self.current_control_flow_nesting_depth += 1
self.push_scope()
self.generate_expression(GAZ_BOOL_KEY)
self.generate_block()
self.pop_scope()
self.current_ast_element = parent
def generate_assignment(self):
# same structure as a declaration
parent = self.current_ast_element
element = build_xml_element([], name=GAZ_ASSIGNMENT_TAG)
self.current_ast_element.append(element)
self.current_ast_element = element
possible_vars = self.current_scope.get_all_defined_mutable_vars()
if len(possible_vars) == 0:
self.generate_global()
possible_vars = self.current_scope.get_all_defined_mutable_vars()
assert len(possible_vars) > 0
variable = random.choice(possible_vars)
self.current_ast_element.append(variable.xml)
self.generate_xhs(GAZ_RHS_TAG, variable.type)
self.current_ast_element = parent
def generate_out_stream(self):
self.generate_stream(GAZ_OUT_STREAM)
def generate_in_stream(self):
self.generate_stream(GAZ_IN_STREAM)
def generate_stream(self, stream_type):
parent = self.current_ast_element
args = [
("type", stream_type),
]
element = build_xml_element(args, name=GAZ_STREAM_TAG)
self.current_ast_element.append(element)
self.current_ast_element = element
self.generate_expression(ANY_TYPE)
self.current_ast_element = parent
def generate_variable(self, var_type: str):
return Variable(self.get_name(GAZ_VAR_TAG), var_type, self.get_qualifier())
def generate_literal(self, var_type: str):
args = [
("type", var_type),
("value", str(self.get_value(var_type))),
]
element = build_xml_element(args, name=GAZ_LIT_TAG)
self.current_ast_element.append(element)
def make_literal(self, type, value):
args = [
("type", type),
("value", value),
]
element = build_xml_element(args, name=GAZ_LIT_TAG)
return element
def generate_global(self):
current_scope = self.current_scope
current_element = self.current_ast_element
self.current_scope = self.current_scope.get_top_scope()
self.current_ast_element = self.ast
self.generate_declaration()
self.current_scope = current_scope
self.current_ast_element = current_element
def generate_expression(self, expr_type: str):
if expr_type == GAZ_INT_KEY or expr_type == GAZ_FLOAT_KEY:
self.generate_int_real_expr()
elif expr_type == GAZ_BOOL_KEY:
self.generate_bool_expr()
elif expr_type == ANY_TYPE: # TODO implement the choice of any type
self.generate_int_real_expr()
else:
raise NotImplementedError(f"Expression type {expr_type} not implemented")
def generate_routine_args(self):
number = random.randint(self.settings['properties']['number-of-arguments']['min'],
self.settings['properties']['number-of-arguments']['max'])
args = []
for i in range(number):
arg = self.generate_arg()
args.append(arg)
self.current_scope.append(arg.name, arg)
return args
def generate_arg(self):
return Argument(self.get_name(GAZ_VAR_TAG), self.get_type(GAZ_VAR_TAG))
def push_scope(self, xml_element: ET.Element = None):
scope = Scope(self.current_scope)
self.symbol_table.append(scope)
self.current_scope = scope
def pop_scope(self):
self.current_scope = self.current_scope.enclosing_scope
# TODO revamp the random value generations
def get_qualifier(self):
"""
@brief get a random qualifier from the list of possible qualifiers
@return a qualifier as a string
"""
number_line = (self.settings["misc-weights"]["type-qualifier-weights"]["const"] +
self.settings["misc-weights"]["type-qualifier-weights"]["var"] -1 )
res = random.randint(0, number_line)
if res in range(0, self.settings["misc-weights"]["type-qualifier-weights"]["const"]):
return 'const'
elif res in range(self.settings["misc-weights"]["type-qualifier-weights"]["const"],
self.settings["misc-weights"]["type-qualifier-weights"]["const"] +
self.settings["misc-weights"]["type-qualifier-weights"]["var"]):
return 'var'
else:
raise ValueError("Internal Error, please report the stack trace to me")
def get_routine_type(self):
cutoffs = []
values = []
ops = []
for key, value in self.settings["routine-weights"].items():
cutoffs.append(value + sum(cutoffs))
values.append(value)
ops.append(key)
res = random.randint(0, sum(values))
for i in range(len(cutoffs)):
if res < cutoffs[i]:
return ops[i] # TODO everything should be fast faied
def get_value(self, type):
if type == GAZ_INT_KEY:
if self.settings["properties"]["generate-max-int"]:
return random.randint(-2147483648, 2147483647)
else:
return random.randint(-1000, 1000)
elif type == GAZ_FLOAT_KEY:
return random.uniform(-1000, 1000)
elif type == GAZ_BOOL_KEY:
return random.choice([True, False])
else:
raise TypeError("Unimplemented generator for type: " + type)
def get_name(self, name_type):
"""
@brief get a random name from the list of possible names and add it to the current scope
@param name_type:
@return:
"""
length = random.randint(self.settings['properties']['id-length']['min'],
self.settings['properties']['id-length']['max'])
name = ''.join(random.choices(string.ascii_letters, k=length))
return name
def get_op(self, type):
if type == GAZ_INT_KEY:
cutoffs = []
values = []
ops = []
for key, value in self.settings["expression-weights"]["arithmetic"]:
cutoffs.append(value + sum(cutoffs))
values.append(value)
ops.append(get_op(key))
res = random.randint(0, sum(values))
for i in range(len(cutoffs)):
if res < cutoffs[i]:
return ops[i]
def get_type(self, tag): # TODO Add support for composite types
return 'int' # TODO Add support for all types
if tag in [GAZ_PROCEDURE_TAG, GAZ_FUNCTION_TAG, GAZ_VAR_TAG]:
cutoffs = []
values = []
types = []
for key, value in self.settings["type-weights"]["value-types"].items():
if key == GAZ_VOID_TYPE and tag != GAZ_PROCEDURE_TAG:
continue
cutoffs.append(value + sum(cutoffs))
values.append(value)
types.append(key)
res = random.randint(0, sum(values))
for i in range(len(cutoffs)):
if res < cutoffs[i]:
return types[i]
class Variable:
def __init__(self, name: str, type: str, qualifier: str, value: any = None):
self.name = name
self.type = type
self.value = value
self.qualifier = qualifier
self.xml = self._build_xml()
def _build_xml(self):
args = [
('name', self.name),
('type', self.type),
('mut', self.qualifier),
]
return build_xml_element(args, name=GAZ_VAR_TAG)
class Argument:
def __init__(self, name: str, type: str):
self.name = name
self.type = type
self.xml = self._build_xml()
def __str__(self):
return self.type + " " + self.name
def _build_xml(self):
args = [
('name', self.name),
('type', self.type),
]
return build_xml_element(args, name=GAZ_ARG_TAG)
class Routine:
def __init__(self, name: str, type: str, return_type: str, args: list[Argument], xml: ET.Element = None):
self.name = name
self.type = type
self.return_type = return_type
self.arguments = args
self.xml = xml
self.xml = xml
class Scope:
def __init__(self, enclosing_scope, child_scope=None, associated_xml: ET.Element = None):
self.symbols = {}
self.enclosing_scope = enclosing_scope
self.child_scope = child_scope
self.xml = associated_xml
def resolve(self, name) -> ET.Element or None:
if name in self.symbols:
return self.symbols[name]
else:
return None
def append(self, name, item: Variable or Argument or Routine):
self.symbols[name] = item
def append_element(self, name, value: ET.Element):
self.symbols[name] = value
def set(self, name, value: ET.Element):
self.symbols[name] = value
def get_all_defined_mutable_vars(self) -> list[Variable]:
if self.enclosing_scope is None:
return self._get_mutable_vars()
else:
return self.enclosing_scope.get_all_defined_mutable_vars() + self._get_mutable_vars()
def _get_mutable_vars(self) -> list[Variable]:
mutable_vars = []
for name, var in self.symbols.items():
if not isinstance(var, Variable):
continue
if var.qualifier != 'const':
mutable_vars.append(self.symbols[name])
return mutable_vars
def get_top_scope(self):
if self.enclosing_scope is None:
return self
else:
return self.enclosing_scope.get_top_scope()
def build_xml_element(*keys, name):
elem = ET.Element(name)
for key in list(keys)[0]: # TODO refactor
elem.set(key[0], key[1])
return elem
def get_op(op):
if op == 'addition':
return '+'
elif op == 'subtraction':
return '-'
elif op == 'multiplication':
return '*'
elif op == 'division':
return '/'
elif op == 'modulo':
return '%'
elif op == 'power':
return '^'
elif op == 'or':
return 'or'
elif op == 'and':
return 'and'
elif op == 'equality':
return '=='
elif op == 'inequality':
return '!='
elif op == 'less-than':
return '<'
elif op == 'less-than-or-equal':
return '<='
elif op == 'greater-than':
return '>'
elif op == 'greater-than-or-equal':
return '>='
elif op == 'negation':
return '-'
elif op == 'not':
return 'not'
elif op == 'noop':
return '+'
elif op == 'concatenation':
return '||'
else:
raise Exception("Unknown operator: " + op)