Refactored ASTGenerator #3
7 changed files with 139 additions and 151 deletions
|
@ -4,7 +4,7 @@ import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
from english_words import get_english_words_set
|
from english_words import get_english_words_set
|
||||||
|
|
||||||
from ast_generator.utils import Variable, Argument, Routine, Scope, build_xml_element
|
from ast_generator.utils import *
|
||||||
from constants import *
|
from constants import *
|
||||||
|
|
||||||
import keyword
|
import keyword
|
||||||
|
@ -41,98 +41,47 @@ class AstGenerator:
|
||||||
self.symbol_table.append(global_scope) # NOTE for debug
|
self.symbol_table.append(global_scope) # NOTE for debug
|
||||||
self.current_scope = global_scope
|
self.current_scope = global_scope
|
||||||
|
|
||||||
names = get_english_words_set(['web2'], alpha=True)
|
self._init_names()
|
||||||
possible_names = filter(lambda x: self.settings['properties']['id-length']['max'] <= len(x) <=
|
|
||||||
self.settings['properties']['id-length']['max'] and not keyword.iskeyword(x),
|
|
||||||
names)
|
|
||||||
|
|
||||||
var_name_list = list(possible_names)
|
|
||||||
var_name_len = len(var_name_list)
|
|
||||||
self.variable_names = var_name_list[0:var_name_len // 2]
|
|
||||||
self.routine_names = var_name_list[var_name_len // 2:var_name_len]
|
|
||||||
|
|
||||||
self.ast: ET.Element or None = None
|
self.ast: ET.Element or None = None
|
||||||
self.current_ast_element: ET.Element or None = None
|
self.current_ast_element: ET.Element or None = None
|
||||||
self.current_nesting_depth = 0
|
self.current_nesting_depth = 0
|
||||||
self.current_control_flow_nesting_depth = 0
|
self.current_control_flow_nesting_depth = 0
|
||||||
|
|
||||||
|
self._init_numlines()
|
||||||
|
|
||||||
|
def _init_numlines(self):
|
||||||
# Numberlines - For computing probabilities
|
# Numberlines - For computing probabilities
|
||||||
self.int_op_options, self.int_op_cutoffs, self.int_op_numline = (
|
self.int_op_options, self.int_op_cutoffs, self.int_op_numline = (
|
||||||
self.get_numberlines('expression-weights',
|
get_numberlines('expression-weights', ['brackets', 'arithmetic', 'unary'], [[], [], ['not']],
|
||||||
['brackets', 'arithmetic', 'unary'],
|
self.settings))
|
||||||
[[], [], ['not']]))
|
|
||||||
self.int_unary = ['negation', 'noop']
|
self.int_unary = ['negation', 'noop']
|
||||||
|
|
||||||
self.bool_op_options, self.bool_op_cutoffs, self.bool_op_numline = (
|
self.bool_op_options, self.bool_op_cutoffs, self.bool_op_numline = (
|
||||||
self.get_numberlines('expression-weights',
|
get_numberlines('expression-weights', ['brackets', 'comparison', 'logical', 'unary'],
|
||||||
['brackets', 'comparison', 'logical', 'unary'],
|
|
||||||
excluded_values=[[], ['less-than-or-equal', 'greater-than-or-equal', 'less-than',
|
excluded_values=[[], ['less-than-or-equal', 'greater-than-or-equal', 'less-than',
|
||||||
'greater-than'], [], ['noop', 'negation']]))
|
'greater-than'], [], ['noop', 'negation']],
|
||||||
|
settings=self.settings))
|
||||||
self.bool_unary = ['not']
|
self.bool_unary = ['not']
|
||||||
|
|
||||||
self.float_op_options, self.float_op_cutoffs, self.float_op_numline = (
|
self.float_op_options, self.float_op_cutoffs, self.float_op_numline = (
|
||||||
self.get_numberlines('expression-weights',
|
get_numberlines('expression-weights', ['brackets', 'arithmetic', 'unary'], [[], [], ['not']],
|
||||||
['brackets', 'arithmetic', 'unary'],
|
self.settings))
|
||||||
[[], [], ['not']]))
|
|
||||||
self.float_unary = ['negation', 'noop']
|
self.float_unary = ['negation', 'noop']
|
||||||
|
|
||||||
self.char_op_options, self.char_op_cutoffs, self.char_op_numline = (
|
self.char_op_options, self.char_op_cutoffs, self.char_op_numline = (
|
||||||
self.get_numberlines('expression-weights',
|
get_numberlines('expression-weights', ['brackets', 'comparison'],
|
||||||
['brackets', 'comparison'],
|
[[], ['less-than', 'greater-than', 'less-than-or-equal', 'greater-than-or-equal']],
|
||||||
[[], ['less-than', 'greater-than', 'less-than-or-equal', 'greater-than-or-equal']]))
|
self.settings))
|
||||||
|
|
||||||
self.comp_op_options, self.comp_op_cutoffs, self.comp_op_numline = (
|
self.comp_op_options, self.comp_op_cutoffs, self.comp_op_numline = (
|
||||||
self.get_numberlines('expression-weights',
|
get_numberlines('expression-weights', ['brackets', 'comparison'], [[], []], self.settings))
|
||||||
['brackets', 'comparison'],
|
|
||||||
[[], []]))
|
|
||||||
|
|
||||||
def get_numberlines(self, settings_section: str, subsettings: list[str], excluded_values):
|
def _init_names(self):
|
||||||
assert len(subsettings) == len(excluded_values)
|
names = get_english_words_set(['web2'], alpha=True)
|
||||||
|
possible_names = filter(lambda x: self.settings['properties']['id-length']['max'] <= len(x) <=
|
||||||
number_line = 0
|
self.settings['properties']['id-length']['max'] and not keyword.iskeyword(x),
|
||||||
cutoffs = []
|
names)
|
||||||
cutoff = 0
|
var_name_list = list(possible_names)
|
||||||
options = {}
|
var_name_len = len(var_name_list)
|
||||||
option = 0
|
self.variable_names = var_name_list[0:var_name_len // 2]
|
||||||
|
self.routine_names = var_name_list[var_name_len // 2:var_name_len]
|
||||||
settings = []
|
|
||||||
|
|
||||||
for key, value in self.settings[settings_section].items():
|
|
||||||
if key in subsettings and key not in excluded_values: # this check needs to be done recursively
|
|
||||||
if isinstance(value, int):
|
|
||||||
t = {
|
|
||||||
key: value
|
|
||||||
}
|
|
||||||
settings.append(t)
|
|
||||||
elif isinstance(value, dict):
|
|
||||||
settings.append(value)
|
|
||||||
else:
|
|
||||||
raise TypeError("invalid setting type. Found " + str(value) + " instead of expected int or dict")
|
|
||||||
|
|
||||||
for v in range(len(settings)):
|
|
||||||
for i in excluded_values:
|
|
||||||
for j in i:
|
|
||||||
if j in settings[v]:
|
|
||||||
settings[v].pop(j)
|
|
||||||
|
|
||||||
for v in settings:
|
|
||||||
if isinstance(v, dict):
|
|
||||||
for key, value in v.items():
|
|
||||||
number_line += value
|
|
||||||
cutoffs.append(cutoff + value)
|
|
||||||
cutoff += value
|
|
||||||
options[option] = key
|
|
||||||
option += 1
|
|
||||||
elif isinstance(v, int):
|
|
||||||
number_line += v
|
|
||||||
cutoffs.append(cutoff + v)
|
|
||||||
cutoff += v
|
|
||||||
options[option] = v
|
|
||||||
option += 1
|
|
||||||
else:
|
|
||||||
raise TypeError("invalid setting type. Found " + str(v) + " instead of expected int")
|
|
||||||
|
|
||||||
return options, cutoffs, number_line
|
|
||||||
|
|
||||||
def generate_ast(self):
|
def generate_ast(self):
|
||||||
"""
|
"""
|
||||||
|
@ -140,37 +89,71 @@ class AstGenerator:
|
||||||
"""
|
"""
|
||||||
self.generate_top_level_block()
|
self.generate_top_level_block()
|
||||||
|
|
||||||
def generate_top_level_block(self): # TODO add constant generation into this block
|
def make_element(self, name: str, keys: list[tuple[str, any]]) -> ET.Element:
|
||||||
i = 0
|
"""
|
||||||
|
@brief make an xml element for the ast
|
||||||
|
|
||||||
element = build_xml_element([], name=GAZ_BLOCK_TAG)
|
@effects modifies self.current_ast_element
|
||||||
|
|
||||||
|
@param name: the tag for the element
|
||||||
|
@param keys: a list of tuple containing keys for the element
|
||||||
|
"""
|
||||||
|
element = build_xml_element(keys, name=name)
|
||||||
|
if self.current_ast_element is not None:
|
||||||
|
self.current_ast_element.append(element)
|
||||||
self.current_ast_element = element
|
self.current_ast_element = element
|
||||||
|
|
||||||
|
return element
|
||||||
|
|
||||||
|
def make_scoped_element(self, name, keys) -> ET.Element:
|
||||||
|
"""
|
||||||
|
@brief make an xml element for the ast with a scope
|
||||||
|
|
||||||
|
@param name: the tag for the element
|
||||||
|
@param keys: a list of tuple containing keys for the element
|
||||||
|
"""
|
||||||
|
parent = self.current_ast_element
|
||||||
|
self.push_scope()
|
||||||
|
self.make_element(name, keys)
|
||||||
|
return parent
|
||||||
|
|
||||||
|
def exit_scoped_element(self, parent):
|
||||||
|
"""
|
||||||
|
@brief leave the current element and return to parent
|
||||||
|
|
||||||
|
@param parent: the enclosing element to return to
|
||||||
|
"""
|
||||||
|
self.pop_scope()
|
||||||
|
self.current_ast_element = parent
|
||||||
|
|
||||||
|
def generate_top_level_block(self):
|
||||||
|
"""
|
||||||
|
@brief creates the top-level block containing the whole program
|
||||||
|
"""
|
||||||
|
element = self.make_element(GAZ_BLOCK_TAG, [])
|
||||||
self.ast = element
|
self.ast = element
|
||||||
|
|
||||||
# TODO generate constants and forward declarations
|
for i in range(random.randint(0, self.settings['generation-options']['max-globals'])):
|
||||||
while i < self.settings['generation-options']['max-number-of-routines']:
|
self.generate_global()
|
||||||
|
for i in range(self.settings['generation-options']['max-number-of-routines']):
|
||||||
if random.random() < self.settings['block-termination-probability']:
|
if random.random() < self.settings['block-termination-probability']:
|
||||||
break
|
break
|
||||||
self.generate_routine()
|
self.generate_routine()
|
||||||
i += 1
|
|
||||||
|
|
||||||
self.generate_main()
|
self.generate_main()
|
||||||
|
pass
|
||||||
|
|
||||||
def generate_main(self):
|
def generate_main(self):
|
||||||
parent = self.current_ast_element
|
|
||||||
self.push_scope()
|
|
||||||
main_args = [ # TODO refactor these into constants
|
main_args = [ # TODO refactor these into constants
|
||||||
("name", "main"),
|
(GAZ_NAME_KEY, "main"),
|
||||||
("return_type", GAZ_INT_KEY),
|
(GAZ_RETURN_KEY, GAZ_INT_KEY),
|
||||||
("args", "()"),
|
|
||||||
]
|
]
|
||||||
element = build_xml_element(main_args, name=GAZ_PROCEDURE_TAG)
|
|
||||||
self.current_ast_element.append(element)
|
parent = self.make_scoped_element(GAZ_PROCEDURE_TAG, main_args)
|
||||||
self.current_ast_element = element
|
|
||||||
self.generate_block(return_stmt=True, return_value="0", return_type=GAZ_INT_KEY, block_type=GAZ_PROCEDURE_TAG)
|
self.generate_block(return_stmt=True, return_value="0", return_type=GAZ_INT_KEY, block_type=GAZ_PROCEDURE_TAG)
|
||||||
self.pop_scope()
|
|
||||||
self.current_ast_element = parent
|
self.exit_scoped_element(parent)
|
||||||
|
|
||||||
def generate_block(self, tag=None, return_stmt=False, return_value=None, return_type=None, block_type=None,
|
def generate_block(self, tag=None, return_stmt=False, return_value=None, return_type=None, block_type=None,
|
||||||
loop_var=None):
|
loop_var=None):
|
||||||
|
|
|
@ -1,53 +0,0 @@
|
||||||
from constants import Grammar
|
|
||||||
|
|
||||||
GAZPREA_TOP_LEVEL: Grammar = {
|
|
||||||
# Top level elements
|
|
||||||
'<start>': ['<topBlock>'],
|
|
||||||
'<topBlock>': ['<XML_OPEN_TAG>block<XML_CLOSE_TAG><routine_list><main_routine><routine_list><XML_OPEN_SLASH>block<XML_CLOSE_TAG>'],
|
|
||||||
# TODO constants
|
|
||||||
|
|
||||||
# Routines
|
|
||||||
'<routine>': ['<function>', '<procedure>'], # TODO forward_declaration
|
|
||||||
'<function>': [
|
|
||||||
'<XML_OPEN_TAG>function name="_NAME_" return_type="_TYPE_" args="_ARGS_"<XML_CLOSE_TAG><return_block><XML_OPEN_SLASH>function<XML_CLOSE_TAG>'],
|
|
||||||
'<procedure>': [
|
|
||||||
'<XML_OPEN_TAG>procedure name="_NAME_" return_type="_TYPE_" args="_ARGS_"<XML_CLOSE_TAG><block><XML_OPEN_SLASH>procedure<XML_CLOSE_TAG>'],
|
|
||||||
'<main_routine>': [
|
|
||||||
'<XML_OPEN_TAG>procedure name="main" return_type="int" args="()"<XML_CLOSE_TAG><return_block><XML_OPEN_SLASH>procedure<XML_CLOSE_TAG>'],
|
|
||||||
'<routine_list>': ['<routine><routine_list><routine>', '<routine>'],
|
|
||||||
|
|
||||||
# Blocks
|
|
||||||
'<block>': ['<XML_OPEN_TAG>block<XML_CLOSE_TAG><statement_list><XML_OPEN_SLASH>block<XML_CLOSE_TAG>'],
|
|
||||||
'<return_block>': ['<XML_OPEN_TAG>block<XML_CLOSE_TAG><statement_list><return><XML_OPEN_SLASH>block<XML_CLOSE_TAG>'],
|
|
||||||
'<statement>': [
|
|
||||||
'<declaration>',
|
|
||||||
'<stream>',
|
|
||||||
# '<call>',
|
|
||||||
# '<return>', # TODO if/else, loop
|
|
||||||
],
|
|
||||||
'<statement_list>': ['<statement><statement_list><statement>', '<statement>'],
|
|
||||||
|
|
||||||
# Things that belong on their own lines
|
|
||||||
'<declaration>': ['<XML_OPEN_TAG>declaration<XML_CLOSE_TAG><variable><rhs><XML_OPEN_SLASH>declaration<XML_CLOSE_TAG>'],
|
|
||||||
'<stream>': ['<out_stream>'], #, '<in_stream>'],
|
|
||||||
'<return>': ['<XML_OPEN_TAG>return<XML_CLOSE_TAG><has_value><XML_OPEN_SLASH>return<XML_CLOSE_TAG>'],
|
|
||||||
|
|
||||||
'<out_stream>': ['<XML_OPEN_TAG>stream type="std_output"<XML_CLOSE_TAG><has_value><XML_OPEN_SLASH>stream<XML_CLOSE_TAG>'],
|
|
||||||
# '<in_stream>': ['<XML_OPEN_TAG>stream type="std_input"<XML_CLOSE_TAG><has_value><XML_OPEN_SLASH>stream<XML_CLOSE_TAG>'],
|
|
||||||
|
|
||||||
# Things that are part of lines
|
|
||||||
'<has_value>': ['<variable>', '<literal>', '<operator>'],
|
|
||||||
'<lhs>': ['<XML_OPEN_TAG>lhs<XML_CLOSE_TAG><has_value><XML_OPEN_SLASH>lhs<XML_CLOSE_TAG>'],
|
|
||||||
'<rhs>': ['<XML_OPEN_TAG>rhs<XML_CLOSE_TAG><has_value><XML_OPEN_SLASH>rhs<XML_CLOSE_TAG>'],
|
|
||||||
|
|
||||||
# Things that have values
|
|
||||||
'<operator>': ['<XML_OPEN_TAG>operator<XML_CLOSE_TAG><lhs><rhs><XML_OPEN_SLASH>operator<XML_CLOSE_TAG>'],
|
|
||||||
'<variable>': ['<XML_OPEN_TAG>variable mut="_MODIFIER_" type="_TYPE_" name="_NAME_"<XML_SLASH_TAG>'],
|
|
||||||
'<literal>': ['<XML_OPEN_TAG>literal type="_TYPE_" value="_VALUE_"<XML_SLASH_TAG>'],
|
|
||||||
|
|
||||||
# Helper rules
|
|
||||||
'<XML_OPEN_TAG>': ['<'],
|
|
||||||
'<XML_CLOSE_TAG>': ['>'],
|
|
||||||
'<XML_SLASH_TAG>': ['/>'],
|
|
||||||
'<XML_OPEN_SLASH>': ['</'],
|
|
||||||
}
|
|
|
@ -5,19 +5,21 @@ generation-options:
|
||||||
max-conditionals-loops: 5 # maximum number of loops/conditionals per routine
|
max-conditionals-loops: 5 # maximum number of loops/conditionals per routine
|
||||||
max-number-of-routines: 5 # maximum number of routines (main will always be generated)
|
max-number-of-routines: 5 # maximum number of routines (main will always be generated)
|
||||||
generate-dead-code: True # generate dead code
|
generate-dead-code: True # generate dead code
|
||||||
|
max-loop-iterations: 100 # maximum number of iterations in a loop
|
||||||
|
max-globals: 5 # maximum number of global variables
|
||||||
properties:
|
properties:
|
||||||
max-range-length: 5 # maximum length of ranges, vectors and tuples, (AxA matrices can exist)
|
max-range-length: 5 # maximum length of ranges, vectors and tuples, (AxA matrices can exist)
|
||||||
use-english-words: True # use english words instead of random names (this may limit the maximum number of names)
|
use-english-words: True # use english words instead of random names (this may limit the maximum number of names)
|
||||||
id-length: # length of identifiers
|
id-length: # length of identifiers
|
||||||
min: 1
|
min: 1
|
||||||
max: 10
|
max: 5
|
||||||
function-name-length: # length of function names
|
function-name-length: # length of function names
|
||||||
min: 1
|
min: 1
|
||||||
max: 10
|
max: 10
|
||||||
number-of-arguments: # number of arguments to a routine
|
number-of-arguments: # number of arguments to a routine
|
||||||
min: 1
|
min: 1
|
||||||
max: 10
|
max: 10
|
||||||
generate-max-int: True # if False, generate integers between [-1000, 1000] else
|
generate-max-int: False # if False, generate integers between [-1000, 1000] else
|
||||||
expression-weights: # weights for expressions
|
expression-weights: # weights for expressions
|
||||||
# the higher a weight, the more likely (0, 10000), 0 to exclude, 10000 for only that
|
# the higher a weight, the more likely (0, 10000), 0 to exclude, 10000 for only that
|
||||||
brackets: 10
|
brackets: 10
|
||||||
|
|
|
@ -130,9 +130,15 @@ class TestGeneration(unittest.TestCase):
|
||||||
self.assertIsNotNone(self.ast_gen.current_ast_element.find("conditional"))
|
self.assertIsNotNone(self.ast_gen.current_ast_element.find("conditional"))
|
||||||
conditional = self.ast_gen.ast.find("conditional")
|
conditional = self.ast_gen.ast.find("conditional")
|
||||||
|
|
||||||
# print(ET.tostring(conditional, 'utf-8').decode('utf-8'))
|
print(ET.tostring(conditional, 'utf-8').decode('utf-8'))
|
||||||
|
|
||||||
self.assertIsNotNone(conditional.find("operator") or conditional.find("unary_operator") or conditional.find("literal"))
|
opts = ['operator', 'unary_operator', 'literal', 'brackets']
|
||||||
|
res = []
|
||||||
|
for i in opts:
|
||||||
|
res.append(conditional.find(i))
|
||||||
|
res_list = list(filter(lambda x: x is not None, res))
|
||||||
|
|
||||||
|
self.assertGreater(len(res_list), 0)
|
||||||
|
|
||||||
block = conditional.findall("block")
|
block = conditional.findall("block")
|
||||||
self.assertEqual(2, len(block))
|
self.assertEqual(2, len(block))
|
||||||
|
@ -211,7 +217,7 @@ class TestGeneration(unittest.TestCase):
|
||||||
|
|
||||||
self.assertIsNotNone(self.ast_gen.ast)
|
self.assertIsNotNone(self.ast_gen.ast)
|
||||||
|
|
||||||
# print(ET.tostring(self.ast_gen.ast, 'utf-8').decode('utf-8'))
|
print(ET.tostring(self.ast_gen.ast, 'utf-8').decode('utf-8'))
|
||||||
|
|
||||||
procedures = self.ast_gen.ast.findall("procedure")
|
procedures = self.ast_gen.ast.findall("procedure")
|
||||||
self.assertLess(0, len(procedures))
|
self.assertLess(0, len(procedures))
|
||||||
|
|
|
@ -97,3 +97,52 @@ def build_xml_element(*keys, name):
|
||||||
for key in list(keys)[0]: # TODO refactor
|
for key in list(keys)[0]: # TODO refactor
|
||||||
elem.set(key[0], key[1])
|
elem.set(key[0], key[1])
|
||||||
return elem
|
return elem
|
||||||
|
|
||||||
|
|
||||||
|
def get_numberlines(settings_section: str, subsettings: list[str], excluded_values, settings):
|
||||||
|
assert len(subsettings) == len(excluded_values)
|
||||||
|
|
||||||
|
number_line = 0
|
||||||
|
cutoffs = []
|
||||||
|
cutoff = 0
|
||||||
|
options = {}
|
||||||
|
option = 0
|
||||||
|
|
||||||
|
valid_settings = []
|
||||||
|
|
||||||
|
for key, value in settings[settings_section].items():
|
||||||
|
if key in subsettings and key not in excluded_values: # this check needs to be done recursively
|
||||||
|
if isinstance(value, int):
|
||||||
|
t = {
|
||||||
|
key: value
|
||||||
|
}
|
||||||
|
valid_settings.append(t)
|
||||||
|
elif isinstance(value, dict):
|
||||||
|
valid_settings.append(value)
|
||||||
|
else:
|
||||||
|
raise TypeError("invalid setting type. Found " + str(value) + " instead of expected int or dict")
|
||||||
|
|
||||||
|
for v in range(len(valid_settings)):
|
||||||
|
for i in excluded_values:
|
||||||
|
for j in i:
|
||||||
|
if j in valid_settings[v]:
|
||||||
|
valid_settings[v].pop(j)
|
||||||
|
|
||||||
|
for v in valid_settings:
|
||||||
|
if isinstance(v, dict):
|
||||||
|
for key, value in v.items():
|
||||||
|
number_line += value
|
||||||
|
cutoffs.append(cutoff + value)
|
||||||
|
cutoff += value
|
||||||
|
options[option] = key
|
||||||
|
option += 1
|
||||||
|
elif isinstance(v, int):
|
||||||
|
number_line += v
|
||||||
|
cutoffs.append(cutoff + v)
|
||||||
|
cutoff += v
|
||||||
|
options[option] = v
|
||||||
|
option += 1
|
||||||
|
else:
|
||||||
|
raise TypeError("invalid setting type. Found " + str(v) + " instead of expected int")
|
||||||
|
|
||||||
|
return options, cutoffs, number_line
|
|
@ -6,6 +6,7 @@ generation-options:
|
||||||
max-number-of-routines: 5 # maximum number of routines (main will always be generated)
|
max-number-of-routines: 5 # maximum number of routines (main will always be generated)
|
||||||
generate-dead-code: True # generate dead code
|
generate-dead-code: True # generate dead code
|
||||||
max-loop-iterations: 100 # maximum number of iterations in a loop
|
max-loop-iterations: 100 # maximum number of iterations in a loop
|
||||||
|
max-globals: 5 # maximum number of global variables
|
||||||
properties:
|
properties:
|
||||||
max-range-length: 5 # maximum length of ranges, vectors and tuples, (AxA matrices can exist)
|
max-range-length: 5 # maximum length of ranges, vectors and tuples, (AxA matrices can exist)
|
||||||
use-english-words: True # use english words instead of random names (this may limit the maximum number of names)
|
use-english-words: True # use english words instead of random names (this may limit the maximum number of names)
|
||||||
|
|
Loading…
Reference in a new issue