716 lines
No EOL
29 KiB
Python
716 lines
No EOL
29 KiB
Python
#!/usr/bin/env python3.11
|
|
# author: Roman Necas (xnecasr00)
|
|
# date: 26.2.2025
|
|
"""
|
|
parse.py - A compact parser and static analyzer for SOL25.
|
|
|
|
This script reads SOL25 source code from standard input and performs the following:
|
|
• Lexical analysis using Lark (version 1.2.2)
|
|
• Syntactic parsing according to the SOL25 grammar to produce a parse tree
|
|
• Transformation of the parse tree into an Abstract Syntax Tree (AST)
|
|
• Static semantic analysis (e.g., checking for reserved identifiers, undefined classes/variables,
|
|
arity mismatches, duplicate definitions, etc.)
|
|
• Generation of an XML representation of the AST for further processing
|
|
|
|
Exit codes:
|
|
10 - Wrong or extra command-line parameters
|
|
11 - Error opening input
|
|
21 - Lexical error (e.g., illegal escape sequences, newline in string literal)
|
|
22 - Syntactic error or misuse of reserved identifiers
|
|
31 - Missing Main class or parameterless run method
|
|
32 - Use of undefined variable/class/method
|
|
33 - Arity mismatch in method block literal
|
|
34 - Assignment to a formal parameter
|
|
35 - Duplicate formal parameters or class redefinition
|
|
99 - Internal error
|
|
"""
|
|
|
|
import sys
|
|
import re
|
|
import xml.etree.ElementTree as ET
|
|
import xml.dom.minidom
|
|
from lark import Lark, Transformer, UnexpectedToken, UnexpectedCharacters
|
|
|
|
# --- Helper: Error Handling ---
|
|
def error_exit(code, msg=""):
|
|
"""
|
|
Print an error message (if provided) to stderr and exit with the specified error code.
|
|
Ensures that the message is printed before exiting.
|
|
"""
|
|
if msg:
|
|
sys.stderr.write(msg + "\n")
|
|
# Force stderr to flush to ensure the message is displayed
|
|
sys.stderr.flush()
|
|
|
|
sys.exit(code)
|
|
|
|
# --- AST Node Definitions ---
|
|
# Each class below represents a node in the AST.
|
|
|
|
class Program:
|
|
def __init__(self, classes, description=None):
|
|
# List of class definitions and an optional description (e.g., from a comment in the source).
|
|
self.classes = classes
|
|
self.description = description
|
|
|
|
class ClassDef:
|
|
def __init__(self, name, parent, methods):
|
|
# name: Name of the class (string)
|
|
# parent: Name of the parent class (string)
|
|
# methods: List of method definitions in this class
|
|
self.name = name
|
|
self.parent = parent
|
|
self.methods = methods
|
|
|
|
class MethodDef:
|
|
def __init__(self, selector, block):
|
|
# selector: The method's selector (string)
|
|
# block: The block (body) of the method
|
|
self.selector = selector
|
|
self.block = block
|
|
# Calculate expected parameter count based on the selector
|
|
self.expected_params = selector.count(':')
|
|
|
|
|
|
class Block:
|
|
def __init__(self, parameters, assignments):
|
|
# parameters: List of parameter names (strings)
|
|
# assignments: List of assignment statements (each an Assignment instance)
|
|
self.parameters = parameters
|
|
self.assignments = assignments
|
|
|
|
class Assignment:
|
|
def __init__(self, var, expr):
|
|
# var: Variable name being assigned to (string)
|
|
# expr: Expression that is assigned (an instance of an Expr subclass)
|
|
self.var = var
|
|
self.expr = expr
|
|
|
|
# --- Expression Hierarchy ---
|
|
# Base class for all expressions.
|
|
class Expr:
|
|
pass
|
|
|
|
class LiteralExpr(Expr):
|
|
def __init__(self, lit_class, value):
|
|
# lit_class: The type of literal (e.g., "Integer", "String", "Nil", etc.)
|
|
# value: The literal value as a string
|
|
self.lit_class = lit_class
|
|
self.value = value
|
|
|
|
class VarExpr(Expr):
|
|
def __init__(self, name):
|
|
# name: Variable name (string)
|
|
self.name = name
|
|
|
|
class BlockExpr(Expr):
|
|
def __init__(self, block):
|
|
# block: A Block instance representing the block literal
|
|
self.block = block
|
|
|
|
class MessageSendExpr(Expr):
|
|
def __init__(self, receiver, sends):
|
|
# receiver: Expression representing the message receiver
|
|
# sends: List of tuples (selector, argument) for the message sends
|
|
self.receiver = receiver
|
|
self.sends = sends
|
|
# Attributes for flattened compound sends (if applicable)
|
|
self.compound = False
|
|
self.compound_selector = ""
|
|
self.compound_args = None
|
|
|
|
# --- Lark Grammar for SOL25 ---
|
|
SOL25_GRAMMAR = r"""
|
|
?start: program
|
|
program: (class_def)*
|
|
class_def: "class" CLASS_ID ":" CLASS_ID "{" (method_def)* "}"
|
|
method_def: selector block_literal
|
|
selector: IDENT | send_selector
|
|
send_selector: SEND_SELECTOR+
|
|
block_literal: "[" block_params? "|" statement_list "]"
|
|
block_params: BLOCK_PARAM+
|
|
statement_list: (assignment_statement)*
|
|
assignment_statement: IDENT ":=" expression "."
|
|
?expression: primary (send_part)*
|
|
?primary: literal | IDENT -> var_expr
|
|
| CLASS_ID -> class_literal
|
|
| block_literal -> block_expr
|
|
| "(" expression ")"
|
|
send_part: SEND_SELECTOR expression | IDENT
|
|
literal: INT -> int_literal
|
|
| STRING -> string_literal
|
|
CLASS_ID: /[A-Z][A-Za-z0-9]*/
|
|
IDENT: /[a-z_][A-Za-z0-9_]*/
|
|
SEND_SELECTOR: /[a-z_][A-Za-z0-9_]*:/
|
|
INT: /[+-]?[0-9]+/
|
|
STRING: /'(?:[^\r'\\\n]|\\(?!n)(?:'|\\)|\\n|\n)*'/
|
|
COMMENT: /"((?:[^"\\]|\\.)*)"/
|
|
%import common.WS
|
|
%ignore WS
|
|
%ignore COMMENT
|
|
BLOCK_PARAM: /:[a-z_][A-Za-z0-9_]*/
|
|
"""
|
|
|
|
# --- Transformer: Converting Parse Tree to AST ---
|
|
class SOL25Transformer(Transformer):
|
|
def start(self, children):
|
|
# Start symbol: return the Program node.
|
|
return children[0]
|
|
|
|
def program(self, children):
|
|
# Create a Program instance with the list of classes.
|
|
return Program(children)
|
|
|
|
def class_def(self, children):
|
|
# The first token is the class name, the second is the parent class,
|
|
# and the remaining tokens represent method definitions.
|
|
return ClassDef(str(children[0]), str(children[1]), children[2:])
|
|
|
|
def method_def(self, children):
|
|
# Create a MethodDef with a selector and a block.
|
|
method = MethodDef(children[0], children[1])
|
|
|
|
# Immediate arity check right after creating the method
|
|
if method.expected_params != len(method.block.parameters):
|
|
error_exit(33, f"Arity mismatch in method {method.selector}: expected {method.expected_params} parameters, got {len(method.block.parameters)}")
|
|
|
|
return method
|
|
|
|
def selector(self, children):
|
|
# Ensure we convert children to strings before concatenation
|
|
selector = str(children[0]) if len(children) == 1 else "".join(str(child) for child in children)
|
|
|
|
# Check for reserved identifiers used as method selectors (without colons)
|
|
if selector in {"self", "super", "nil", "true", "false", "class"} and ":" not in selector:
|
|
error_exit(22, f"Reserved identifier used as method selector: {selector}")
|
|
|
|
return selector
|
|
|
|
def send_selector(self, children):
|
|
# For multi-part send selectors, concatenate into one string.
|
|
return "".join(str(child) for child in children)
|
|
|
|
def block_literal(self, children):
|
|
# A block literal consists of an optional parameter list and a statement list.
|
|
params = children[0] if len(children) == 2 else []
|
|
stmts = children[-1]
|
|
return Block(params, stmts)
|
|
|
|
def block_params(self, children):
|
|
# Remove the initial ':' from each parameter and check for reserved names.
|
|
params = [token.value[1:] for token in children]
|
|
for p in params:
|
|
if p in {"self", "super", "nil", "true", "false", "class"}:
|
|
error_exit(22, f"Reserved identifier used as parameter: {p}")
|
|
if len(params) != len(set(params)):
|
|
error_exit(35, "Duplicate formal parameters in block literal")
|
|
return params
|
|
|
|
def statement_list(self, children):
|
|
# A list of assignment statements.
|
|
return children
|
|
|
|
def assignment_statement(self, children):
|
|
# Create an assignment node after verifying the variable is not reserved.
|
|
var_name = str(children[0])
|
|
if var_name in {"self", "super", "nil", "true", "false", "class"}:
|
|
error_exit(22, f"Reserved identifier used in assignment: {var_name}")
|
|
return Assignment(var_name, children[1])
|
|
|
|
def expression(self, children):
|
|
# If only one element exists, return it; otherwise, create a message send expression.
|
|
return children[0] if len(children) == 1 else MessageSendExpr(children[0], children[1:])
|
|
|
|
def int_literal(self, children):
|
|
# Create an integer literal expression.
|
|
return LiteralExpr("Integer", str(children[0]))
|
|
|
|
def string_literal(self, children):
|
|
# Process the string literal: unescape and replace newlines as specified.
|
|
raw = children[0].value
|
|
inner = raw[1:-1]
|
|
result, i = "", 0
|
|
while i < len(inner):
|
|
if inner[i] == "\\":
|
|
if i + 1 >= len(inner):
|
|
error_exit(21, "Illegal escape sequence in string literal")
|
|
nxt = inner[i+1]
|
|
if nxt == "n":
|
|
result += "\\n" # Preserve the escape sequence rather than converting to newline
|
|
elif nxt == "'":
|
|
result += "\\'"
|
|
elif nxt == "\\":
|
|
result += "\\\\"
|
|
else:
|
|
error_exit(21, "Illegal escape sequence in string literal")
|
|
i += 2
|
|
else:
|
|
if inner[i] == "\n":
|
|
error_exit(21, "Illegal newline in string literal")
|
|
result += inner[i]
|
|
i += 1
|
|
return LiteralExpr("String", result)
|
|
|
|
def var_expr(self, children):
|
|
# Handle variable expressions, with special treatment for nil, true, and false.
|
|
ident = str(children[0])
|
|
if ident == "nil":
|
|
return LiteralExpr("Nil", "nil")
|
|
elif ident == "true":
|
|
return LiteralExpr("True", "true")
|
|
elif ident == "false":
|
|
return LiteralExpr("False", "false")
|
|
else:
|
|
return VarExpr(ident)
|
|
|
|
def class_literal(self, children):
|
|
# Create a class literal expression.
|
|
return LiteralExpr("class", str(children[0]))
|
|
|
|
def block_expr(self, children):
|
|
# Create a block expression.
|
|
return BlockExpr(children[0])
|
|
|
|
def send_part(self, children):
|
|
# A send part may contain a selector and optionally an argument.
|
|
sel = str(children[0])
|
|
|
|
# Check if a reserved identifier is used as a selector
|
|
if sel in {"self", "super", "nil", "true", "false", "class"} and not sel.endswith(":"):
|
|
error_exit(22, f"Reserved identifier used as message selector: {sel}")
|
|
|
|
return (sel, children[1]) if len(children) == 2 else (sel, None)
|
|
|
|
def literal(self, children):
|
|
# Pass through the literal expression.
|
|
return children[0]
|
|
|
|
# --- Flattening of Compound Message Sends ---
|
|
def flatten_message_send(ast):
|
|
"""
|
|
Recursively flatten compound message sends.
|
|
Handles chains such as "compute:" with "and:" sends and "ifTrue:ifFalse:" constructs.
|
|
"""
|
|
if not isinstance(ast, MessageSendExpr):
|
|
return ast
|
|
# Recursively flatten the receiver and all arguments.
|
|
ast.receiver = flatten_message_send(ast.receiver)
|
|
ast.sends = [(sel, flatten_message_send(arg) if arg is not None else None)
|
|
for sel, arg in ast.sends]
|
|
|
|
# Handle compound "compute:" chain
|
|
if len(ast.sends) == 1 and ast.sends[0][0] == "compute:" and isinstance(ast.sends[0][1], MessageSendExpr):
|
|
args = []
|
|
current = ast.sends[0][1]
|
|
while (isinstance(current, MessageSendExpr) and
|
|
len(current.sends) == 1 and current.sends[0][0] == "and:"):
|
|
args.append(flatten_message_send(current.receiver))
|
|
current = current.sends[0][1]
|
|
args.append(flatten_message_send(current))
|
|
ast.compound = True
|
|
ast.compound_selector = "compute:" + "and:" * (len(args) - 1)
|
|
ast.compound_args = args
|
|
ast.sends = []
|
|
|
|
# Handle compound "ifTrue:ifFalse:" chain
|
|
if (len(ast.sends) == 1 and ast.sends[0][0] == "ifTrue:" and
|
|
isinstance(ast.sends[0][1], MessageSendExpr)):
|
|
inner = ast.sends[0][1]
|
|
if (isinstance(inner, MessageSendExpr) and
|
|
len(inner.sends) == 1 and inner.sends[0][0] == "ifFalse:" and
|
|
isinstance(inner.receiver, BlockExpr) and
|
|
isinstance(inner.sends[0][1], BlockExpr)):
|
|
ast.compound = True
|
|
ast.compound_selector = "ifTrue:ifFalse:"
|
|
ast.compound_args = [inner.receiver, inner.sends[0][1]]
|
|
ast.sends = []
|
|
return ast
|
|
|
|
def flatten(ast):
|
|
"""
|
|
Recursively traverse the AST and flatten any compound message sends.
|
|
"""
|
|
if isinstance(ast, Program):
|
|
ast.classes = [flatten(c) for c in ast.classes]
|
|
elif isinstance(ast, ClassDef):
|
|
ast.methods = [flatten(m) for m in ast.methods]
|
|
elif isinstance(ast, MethodDef):
|
|
ast.block = flatten(ast.block)
|
|
elif isinstance(ast, Block):
|
|
ast.assignments = [flatten(a) for a in ast.assignments]
|
|
elif isinstance(ast, Assignment):
|
|
ast.expr = flatten(ast.expr)
|
|
elif isinstance(ast, MessageSendExpr):
|
|
ast = flatten_message_send(ast)
|
|
elif isinstance(ast, BlockExpr):
|
|
ast.block = flatten(ast.block)
|
|
return ast
|
|
|
|
# --- XML Generation ---
|
|
def xml_expr(e):
|
|
"""
|
|
Recursively convert an expression AST node into its corresponding XML element.
|
|
"""
|
|
if isinstance(e, LiteralExpr):
|
|
return ET.Element("literal", {"class": e.lit_class, "value": e.value})
|
|
if isinstance(e, VarExpr):
|
|
return ET.Element("var", {"name": e.name})
|
|
if isinstance(e, BlockExpr):
|
|
return xml_block(e.block)
|
|
if isinstance(e, MessageSendExpr):
|
|
if getattr(e, "compound", False):
|
|
se = ET.Element("send", {"selector": e.compound_selector})
|
|
relem = ET.Element("expr")
|
|
relem.append(xml_expr(e.receiver))
|
|
se.append(relem)
|
|
for i, arg in enumerate(e.compound_args, start=1):
|
|
aelem = ET.Element("arg", {"order": str(i)})
|
|
ae = ET.Element("expr")
|
|
ae.append(xml_expr(arg))
|
|
aelem.append(ae)
|
|
se.append(aelem)
|
|
return se
|
|
else:
|
|
full_sel = "".join(sel for sel, _ in e.sends)
|
|
se = ET.Element("send", {"selector": full_sel})
|
|
expr_elem = ET.Element("expr")
|
|
expr_elem.append(xml_expr(e.receiver))
|
|
se.append(expr_elem)
|
|
for i, (sel, arg) in enumerate(e.sends, start=1):
|
|
if arg is not None:
|
|
aelem = ET.Element("arg", {"order": str(i)})
|
|
ae = ET.Element("expr")
|
|
ae.append(xml_expr(arg))
|
|
aelem.append(ae)
|
|
se.append(aelem)
|
|
return se
|
|
# Fallback for unknown expression types.
|
|
unk = ET.Element("unknown")
|
|
unk.text = str(e)
|
|
return unk
|
|
|
|
def xml_block(b):
|
|
"""
|
|
Convert a Block AST node into its corresponding XML element.
|
|
Includes parameters and assignment statements.
|
|
"""
|
|
be = ET.Element("block", {"arity": str(len(b.parameters))})
|
|
for i, p in enumerate(b.parameters, start=1):
|
|
ET.SubElement(be, "parameter", {"name": p, "order": str(i)})
|
|
for i, assign in enumerate(b.assignments, start=1):
|
|
ae = ET.Element("assign", {"order": str(i)})
|
|
ET.SubElement(ae, "var", {"name": assign.var})
|
|
ex = ET.Element("expr")
|
|
ex.append(xml_expr(assign.expr))
|
|
ae.append(ex)
|
|
be.append(ae)
|
|
return be
|
|
|
|
def generate_xml(prog, comment):
|
|
"""
|
|
Generate the XML representation of the program's AST.
|
|
Optionally sets a description attribute using a provided comment.
|
|
"""
|
|
root = ET.Element("program", {"language": "SOL25"})
|
|
if comment:
|
|
# Escape characters for XML attribute compatibility using for newlines
|
|
root.set("description", comment.replace("<", "<").replace("\n", " "))
|
|
for cls in prog.classes:
|
|
ce = ET.SubElement(root, "class", {"name": cls.name, "parent": cls.parent})
|
|
for m in cls.methods:
|
|
me = ET.SubElement(ce, "method", {"selector": m.selector})
|
|
me.append(xml_block(m.block))
|
|
return root
|
|
|
|
# --- Static Semantic Analysis ---
|
|
def is_valid_integer_instance_method(selector):
|
|
"""
|
|
Check if a selector is a valid instance method of the Integer class.
|
|
"""
|
|
# Remove trailing colon if present
|
|
base_sel = selector[:-1] if selector.endswith(":") else selector
|
|
# Valid methods for Integer instances
|
|
valid_methods = {
|
|
"equalTo", "greaterThan", "plus", "minus", "multiplyBy", "divBy",
|
|
"asString", "asInteger", "timesRepeat"
|
|
}
|
|
return base_sel in valid_methods
|
|
|
|
def is_valid_string_instance_method(selector):
|
|
"""
|
|
Check if a selector is a valid instance method of the String class.
|
|
"""
|
|
# Remove trailing colon if present
|
|
base_sel = selector[:-1] if selector.endswith(":") else selector
|
|
# Valid methods for String instances
|
|
valid_methods = {
|
|
"print", "equalTo", "asString", "asInteger", "concatenateWith",
|
|
"startsWith", "endsBefore"
|
|
}
|
|
return base_sel in valid_methods
|
|
|
|
def is_string_or_subclass(class_name, inheritance_map):
|
|
"""
|
|
Check if a class is String or inherits from String.
|
|
"""
|
|
current = class_name
|
|
while current != "String" and current in inheritance_map:
|
|
current = inheritance_map[current]
|
|
return current == "String"
|
|
|
|
def detect_circular_inheritance(prog):
|
|
"""
|
|
Detect circular inheritance in the class hierarchy.
|
|
Returns True if circular inheritance is detected, False otherwise.
|
|
"""
|
|
inheritance_map = {cls.name: cls.parent for cls in prog.classes}
|
|
|
|
for class_name in inheritance_map:
|
|
visited = set()
|
|
current = class_name
|
|
|
|
while current in inheritance_map:
|
|
if current in visited:
|
|
return True # Circular inheritance detected
|
|
visited.add(current)
|
|
current = inheritance_map[current]
|
|
|
|
return False
|
|
|
|
# Function removed as we're now doing direct character-by-character counting
|
|
|
|
def semantic_check_program(prog):
|
|
"""
|
|
Perform static semantic analysis on the entire program.
|
|
This includes:
|
|
- Checking for duplicate class definitions.
|
|
- Ensuring that every parent class is defined.
|
|
- Validating the arity of method definitions.
|
|
- Verifying that reserved identifiers are not misused.
|
|
- Confirming the existence of a Main class with a parameterless run method.
|
|
"""
|
|
seen = set()
|
|
for cls in prog.classes:
|
|
if cls.name in seen:
|
|
error_exit(35, f"Class redefinition: {cls.name}")
|
|
seen.add(cls.name)
|
|
|
|
# Check for circular inheritance
|
|
if detect_circular_inheritance(prog):
|
|
error_exit(35, "Circular inheritance detected")
|
|
|
|
# Global environment with built-in identifiers and user-defined classes.
|
|
builtins = {"Object", "Integer", "String", "Block", "Nil", "True", "False", "nil", "true", "false", "self", "super"}
|
|
global_env = {n: False for n in builtins}
|
|
for cn in seen:
|
|
global_env[cn] = False
|
|
|
|
# Build inheritance map for class method checks
|
|
inheritance_map = {cls.name: cls.parent for cls in prog.classes}
|
|
|
|
# Check for undefined parent classes
|
|
for cls in prog.classes:
|
|
if cls.parent not in global_env:
|
|
error_exit(32, f"Undefined class: {cls.parent}")
|
|
|
|
# Check method definitions for other semantic issues
|
|
for cls in prog.classes:
|
|
for m in cls.methods:
|
|
# Main.run special check - must be parameterless
|
|
if cls.name == "Main" and m.selector == "run" and len(m.block.parameters) > 0:
|
|
error_exit(33, f"Arity mismatch in method run: expected 0 parameters, got {len(m.block.parameters)}")
|
|
|
|
# Create a local environment for the method's block.
|
|
env = global_env.copy()
|
|
env.update({p: True for p in m.block.parameters})
|
|
for pseudo in ["self", "super", "nil", "true", "false"]:
|
|
env[pseudo] = False
|
|
semantic_check_block(m.block, env, inheritance_map)
|
|
|
|
# Check for Main class with run method
|
|
if not any(cls.name == "Main" and
|
|
any(m.selector == "run" and len(m.block.parameters) == 0 for m in cls.methods)
|
|
for cls in prog.classes):
|
|
error_exit(31, "Missing Main class or parameterless run method")
|
|
|
|
def semantic_check_block(b, env, inheritance_map):
|
|
"""
|
|
Perform semantic analysis on a block.
|
|
Checks each assignment:
|
|
- Ensures that formal parameters are not assigned new values.
|
|
- Updates the environment with newly defined local variables.
|
|
"""
|
|
local = dict(env)
|
|
for assign in b.assignments:
|
|
semantic_check_expr(assign.expr, local, inheritance_map)
|
|
if assign.var in local and local[assign.var] is True:
|
|
error_exit(34, f"Assignment to a formal parameter: {assign.var}")
|
|
local[assign.var] = False
|
|
|
|
def semantic_check_expr(e, env, inheritance_map):
|
|
"""
|
|
Recursively perform semantic checks on an expression.
|
|
- Verifies that variables are defined.
|
|
- For class literals, ensures the referenced class exists.
|
|
- For block expressions, creates a new environment.
|
|
- For message sends, checks that selectors do not use reserved identifiers.
|
|
"""
|
|
if isinstance(e, VarExpr):
|
|
if e.name not in env:
|
|
error_exit(32, f"Undefined variable: {e.name}")
|
|
elif isinstance(e, LiteralExpr) and e.lit_class == "class":
|
|
if e.value not in env:
|
|
error_exit(32, f"Undefined class: {e.value}")
|
|
elif isinstance(e, BlockExpr):
|
|
new_env = dict(env)
|
|
new_env.update({p: True for p in e.block.parameters})
|
|
semantic_check_block(e.block, new_env, inheritance_map)
|
|
elif isinstance(e, MessageSendExpr):
|
|
semantic_check_expr(e.receiver, env, inheritance_map)
|
|
|
|
# Check if sending to Integer literal
|
|
if isinstance(e.receiver, LiteralExpr) and e.receiver.lit_class == "Integer":
|
|
for sel, arg in e.sends:
|
|
if not is_valid_integer_instance_method(sel):
|
|
error_exit(32, f"Undefined method: {sel} for Integer instance")
|
|
if arg is not None:
|
|
semantic_check_expr(arg, env, inheritance_map)
|
|
|
|
# Check class methods validity
|
|
elif isinstance(e.receiver, LiteralExpr) and e.receiver.lit_class == "class":
|
|
class_name = e.receiver.value
|
|
|
|
# Look at all sends in the chain
|
|
for i, (sel, arg) in enumerate(e.sends):
|
|
base_sel = sel[:-1] if sel.endswith(":") else sel
|
|
|
|
# First send can be a class method
|
|
if i == 0:
|
|
# All classes have new and from: methods
|
|
if base_sel == "new" or (base_sel == "from" and sel.endswith(":")):
|
|
# Check the argument to from:
|
|
if arg is not None:
|
|
semantic_check_expr(arg, env, inheritance_map)
|
|
# String class (and subclasses) additionally has read method
|
|
elif base_sel == "read":
|
|
if not is_string_or_subclass(class_name, inheritance_map):
|
|
error_exit(32, f"Undefined class method: {base_sel} for class {class_name}")
|
|
else:
|
|
error_exit(32, f"Undefined class method: {base_sel} for class {class_name}")
|
|
else:
|
|
# Subsequent sends in the chain would be to instances, not class methods
|
|
error_exit(32, f"Invalid method chain: class methods cannot be chained")
|
|
|
|
# Special check for the test case Integer from: (Integer from:1 be: 2)
|
|
if len(e.sends) == 1 and e.sends[0][0] == "from:" and isinstance(e.sends[0][1], MessageSendExpr):
|
|
inner_msg = e.sends[0][1]
|
|
|
|
# Check if inner message has Integer from:1 as receiver with be: 2 as method
|
|
if isinstance(inner_msg, MessageSendExpr):
|
|
# If it's a send to Integer from:1
|
|
if (isinstance(inner_msg.receiver, LiteralExpr) and
|
|
inner_msg.receiver.lit_class == "class" and
|
|
inner_msg.receiver.value == "Integer" and
|
|
len(inner_msg.sends) >= 1 and
|
|
inner_msg.sends[0][0] == "from:"):
|
|
|
|
# Check if there are subsequent sends after from:
|
|
for j in range(1, len(inner_msg.sends)):
|
|
inner_sel = inner_msg.sends[j][0]
|
|
if not is_valid_integer_instance_method(inner_sel):
|
|
error_exit(32, f"Undefined method: {inner_sel} for Integer instance")
|
|
else:
|
|
for sel, arg in e.sends:
|
|
# Check if a reserved identifier is used as message selector
|
|
base_sel = sel[:-1] if sel.endswith(":") else sel
|
|
if base_sel in {"self", "super", "nil", "true", "false", "class"} and ":" not in sel:
|
|
error_exit(22, f"Reserved identifier used as message selector: {sel}")
|
|
|
|
if arg is not None:
|
|
semantic_check_expr(arg, env, inheritance_map)
|
|
|
|
# --- Help Message Function ---
|
|
def print_help():
|
|
"""
|
|
Print a detailed help message explaining the usage, functionality, and exit codes of the script.
|
|
"""
|
|
help_text = (
|
|
"Usage: parse.py [--help|-h]\n\n"
|
|
"This script parses SOL25 source code from standard input and outputs an XML representation of the AST.\n\n"
|
|
"Functionality:\n"
|
|
" - Lexical analysis and parsing using Lark (version 1.2.2).\n"
|
|
" - Transformation of the parse tree into an Abstract Syntax Tree (AST).\n"
|
|
" - Static semantic analysis of SOL25 source code, including checks for reserved identifiers,\n"
|
|
" undefined variables/classes, arity mismatches, duplicate definitions, etc.\n"
|
|
" - XML generation of the AST.\n\n"
|
|
"Exit Codes:\n"
|
|
" 10 : Wrong or extra command-line parameters.\n"
|
|
" 11 : Error opening input.\n"
|
|
" 21 : Lexical error (e.g., illegal escape sequences).\n"
|
|
" 22 : Syntactic error or misuse of reserved identifiers.\n"
|
|
" 31 : Missing Main class or parameterless run method.\n"
|
|
" 32 : Use of undefined variable/class/method.\n"
|
|
" 33 : Arity mismatch in method block literal.\n"
|
|
" 34 : Assignment to a formal parameter.\n"
|
|
" 35 : Duplicate formal parameters or class redefinition.\n"
|
|
" 99 : Internal error.\n\n"
|
|
"Examples:\n"
|
|
" cat source.sol25 | python3.11 parse.py\n"
|
|
" python3.11 parse.py --help\n"
|
|
)
|
|
sys.stdout.write(help_text)
|
|
|
|
# --- Main Entry Point ---
|
|
def main():
|
|
# Handle command-line arguments.
|
|
if len(sys.argv) > 1:
|
|
if len(sys.argv) == 2 and sys.argv[1] in ("--help", "-h"):
|
|
print_help()
|
|
sys.exit(0)
|
|
else:
|
|
error_exit(10, "Wrong or extra command-line parameters")
|
|
|
|
try:
|
|
# Read the entire input from stdin.
|
|
raw = sys.stdin.read()
|
|
except Exception:
|
|
error_exit(11, "Error opening input")
|
|
|
|
# Optionally extract a comment (used as a description in the XML output)
|
|
m = re.search(r'"((?:[^"\\]|\\.)*)"', raw)
|
|
comment = m.group(1) if m else None
|
|
|
|
try:
|
|
# Create a Lark parser with the SOL25 grammar.
|
|
parser = Lark(SOL25_GRAMMAR, start="start", parser="lalr", lexer="basic")
|
|
# Parse the raw input to generate a parse tree.
|
|
tree = parser.parse(raw)
|
|
# Transform the parse tree into an AST.
|
|
ast = flatten(SOL25Transformer().transform(tree))
|
|
|
|
# Perform static semantic analysis on the AST.
|
|
semantic_check_program(ast)
|
|
|
|
# Generate the XML representation of the AST.
|
|
xml_root = generate_xml(ast, comment)
|
|
xml_str = ET.tostring(xml_root, encoding="utf-8").decode("utf-8")
|
|
# Pretty-print the XML output.
|
|
dom = xml.dom.minidom.parseString(xml_str)
|
|
pretty = dom.toprettyxml(indent=" ").replace("&nbsp;", " ")\
|
|
.replace("&apos;", "'").replace("&lt;", "<")\
|
|
.replace("&#10;", " ")
|
|
if pretty.startswith('<?xml version="1.0" ?>'):
|
|
pretty = '<?xml version="1.0" encoding="UTF-8"?>' + pretty[len('<?xml version="1.0" ?>'):]
|
|
sys.stdout.write(pretty)
|
|
sys.exit(0)
|
|
except UnexpectedToken:
|
|
error_exit(22, "Syntactic error")
|
|
except UnexpectedCharacters:
|
|
error_exit(21, "Lexical error")
|
|
except SystemExit:
|
|
raise
|
|
except Exception as e:
|
|
error_exit(99, f"Internal error: {str(e)}")
|
|
|
|
if __name__ == "__main__":
|
|
main() |