Commit dde710fe authored by Roman Alifanov's avatar Roman Alifanov

Add Dead Code Elimination (DCE) for stdlib

- Add dce.py: UsageAnalyzer that scans AST for used stdlib categories - Modify stdlib.py: emit_stdlib() now accepts used_categories parameter - Modify codegen.py: analyze programs before generating stdlib Categories: core, object, http, fs, json, logger, string, array, dict, regex, math, time, awk, exception, args, misc Results: - Simple "print(Hello)" program: 200+ lines -> 35 lines - Complex programs: minimal change (use most categories anyway)
parent 5a7a4772
...@@ -9,6 +9,7 @@ from .class_codegen import ClassMixin ...@@ -9,6 +9,7 @@ from .class_codegen import ClassMixin
from .decorator_codegen import DecoratorMixin from .decorator_codegen import DecoratorMixin
from .dispatch_codegen import DispatchMixin from .dispatch_codegen import DispatchMixin
from .cse_codegen import CseMixin from .cse_codegen import CseMixin
from .dce import UsageAnalyzer
class CodeGenerator(StdlibMixin, AwkCodegenMixin, ExprMixin, StmtMixin, class CodeGenerator(StdlibMixin, AwkCodegenMixin, ExprMixin, StmtMixin,
...@@ -106,14 +107,25 @@ class CodeGenerator(StdlibMixin, AwkCodegenMixin, ExprMixin, StmtMixin, ...@@ -106,14 +107,25 @@ class CodeGenerator(StdlibMixin, AwkCodegenMixin, ExprMixin, StmtMixin,
"""Generate code for a single program.""" """Generate code for a single program."""
return self.generate_multi([program]) return self.generate_multi([program])
def generate_multi(self, programs: list) -> str: def generate_multi(self, programs: list, dce: bool = True) -> str:
"""Generate code for multiple programs (multi-file compilation).""" """Generate code for multiple programs (multi-file compilation).
Args:
programs: List of Program AST nodes
dce: Enable Dead Code Elimination (default True)
"""
self.emit_raw("#!/usr/bin/env bash") self.emit_raw("#!/usr/bin/env bash")
self.emit_raw("# Generated by ContenT compiler") self.emit_raw("# Generated by ContenT compiler")
self.emit_raw("set -euo pipefail") self.emit_raw("set -euo pipefail")
self.emit() self.emit()
self.emit_stdlib() # Анализ используемых категорий для DCE
if dce:
analyzer = UsageAnalyzer()
used_categories = analyzer.analyze(programs)
self.emit_stdlib(used_categories)
else:
self.emit_stdlib()
for program in programs: for program in programs:
for stmt in program.statements: for stmt in program.statements:
......
"""Dead Code Elimination - анализ используемых функций stdlib."""
from .ast_nodes import *
class UsageAnalyzer:
"""Анализирует AST и определяет какие категории stdlib используются."""
# Категории stdlib
CATEGORIES = {
'core', # print, exit, range - базовые функции
'object', # классы, __CT_OBJ, method dispatch
'http', # http.get/post/put/delete
'fs', # fs.read/write/exists/etc
'json', # json.parse/stringify
'logger', # logger.info/warn/error/debug
'string', # методы строк .upper(), .lower(), etc
'array', # методы массивов .push(), .join(), etc
'dict', # методы словарей .get(), .set(), etc
'regex', # regex.match/extract
'math', # math.add/sub/mul/etc
'time', # time.now/ms
'awk', # @awk wrapper
'exception', # try/catch/throw/defer
'args', # args.get/count
'misc', # random, byte functions
}
def __init__(self):
self.used: set = set()
self.has_classes = False
self.has_awk = False
def analyze(self, programs: list) -> set:
"""Анализирует список программ и возвращает используемые категории."""
self.used = {'core'} # core всегда нужен
for program in programs:
for stmt in program.statements:
self._analyze_stmt(stmt)
# Если есть классы - нужна object system
if self.has_classes:
self.used.add('object')
# Если есть @awk - нужен awk wrapper
if self.has_awk:
self.used.add('awk')
return self.used
def _analyze_stmt(self, stmt):
"""Анализирует statement."""
if isinstance(stmt, ClassDecl):
self.has_classes = True
for method in stmt.methods:
if method.decorators:
for dec in method.decorators:
if dec.name == 'awk':
self.has_awk = True
self._analyze_body(method.body)
if stmt.constructor:
self._analyze_body(stmt.constructor.body)
elif isinstance(stmt, FunctionDecl):
if stmt.decorators:
for dec in stmt.decorators:
if dec.name == 'awk':
self.has_awk = True
self._analyze_body(stmt.body)
elif isinstance(stmt, Assignment):
self._analyze_expr(stmt.value)
elif isinstance(stmt, ExpressionStmt):
self._analyze_expr(stmt.expression)
elif isinstance(stmt, IfStmt):
self._analyze_expr(stmt.condition)
self._analyze_body(stmt.then_branch)
if stmt.else_branch:
self._analyze_body(stmt.else_branch)
elif isinstance(stmt, ForStmt):
self._analyze_expr(stmt.iterable)
self._analyze_body(stmt.body)
elif isinstance(stmt, ForeachStmt):
self._analyze_expr(stmt.iterable)
self._analyze_body(stmt.body)
elif isinstance(stmt, WhileStmt):
self._analyze_expr(stmt.condition)
self._analyze_body(stmt.body)
elif isinstance(stmt, WhenStmt):
if stmt.value:
self._analyze_expr(stmt.value)
for branch in stmt.branches:
for pattern in branch.patterns:
self._analyze_expr(pattern)
self._analyze_body(branch.body)
elif isinstance(stmt, TryStmt):
self.used.add('exception')
self._analyze_body(stmt.try_body)
if stmt.except_body:
self._analyze_body(stmt.except_body)
if stmt.finally_body:
self._analyze_body(stmt.finally_body)
elif isinstance(stmt, ThrowStmt):
self.used.add('exception')
self._analyze_expr(stmt.expression)
elif isinstance(stmt, DeferStmt):
self.used.add('exception')
self._analyze_expr(stmt.expression)
elif isinstance(stmt, ReturnStmt):
if stmt.value:
self._analyze_expr(stmt.value)
def _analyze_body(self, body):
"""Анализирует тело (список statements или Block)."""
if body is None:
return
# body может быть Block или list
if hasattr(body, 'statements'):
stmts = body.statements
elif isinstance(body, list):
stmts = body
else:
return
for stmt in stmts:
self._analyze_stmt(stmt)
def _analyze_expr(self, expr):
"""Анализирует expression и определяет используемые категории."""
if expr is None:
return
if isinstance(expr, CallExpr):
self._analyze_call(expr)
for arg in expr.arguments:
self._analyze_expr(arg)
elif isinstance(expr, MemberAccess):
self._analyze_member_access(expr)
self._analyze_expr(expr.object)
elif isinstance(expr, BinaryOp):
self._analyze_expr(expr.left)
self._analyze_expr(expr.right)
elif isinstance(expr, UnaryOp):
self._analyze_expr(expr.operand)
elif isinstance(expr, ArrayLiteral):
self.used.add('array')
for elem in expr.elements:
self._analyze_expr(elem)
elif isinstance(expr, DictLiteral):
self.used.add('dict')
for k, v in expr.pairs:
self._analyze_expr(k)
self._analyze_expr(v)
elif isinstance(expr, IndexAccess):
self._analyze_expr(expr.object)
self._analyze_expr(expr.index)
elif isinstance(expr, Lambda):
self._analyze_body(expr.body)
elif isinstance(expr, NewExpr):
self.has_classes = True
for arg in expr.arguments:
self._analyze_expr(arg)
def _analyze_call(self, expr: CallExpr):
"""Анализирует вызов функции."""
callee = expr.callee
if isinstance(callee, MemberAccess):
if isinstance(callee.object, Identifier):
ns = callee.object.name
# Namespace calls
if ns == 'http':
self.used.add('http')
elif ns == 'fs':
self.used.add('fs')
elif ns == 'json':
self.used.add('json')
elif ns == 'logger':
self.used.add('logger')
elif ns == 'regex':
self.used.add('regex')
elif ns == 'math':
self.used.add('math')
elif ns == 'time':
self.used.add('time')
elif ns == 'args':
self.used.add('args')
elif ns == 'shell':
pass # shell не требует stdlib
else:
# Вызов метода на переменной - может быть строка/массив/словарь/объект
method = callee.member
self._check_method(method)
def _analyze_member_access(self, expr: MemberAccess):
"""Анализирует доступ к члену."""
if isinstance(expr.object, Identifier):
# Может быть вызов метода
pass
def _check_method(self, method: str):
"""Определяет категорию по имени метода."""
string_methods = {'upper', 'lower', 'trim', 'len', 'contains', 'starts',
'ends', 'index', 'replace', 'substr', 'split', 'charAt'}
array_methods = {'push', 'pop', 'shift', 'join', 'get', 'set', 'slice', 'len'}
dict_methods = {'get', 'set', 'has', 'del', 'keys'}
if method in string_methods:
self.used.add('string')
if method in array_methods:
self.used.add('array')
if method in dict_methods:
self.used.add('dict')
...@@ -7,26 +7,67 @@ class StdlibMixin: ...@@ -7,26 +7,67 @@ class StdlibMixin:
standard library functions in Bash. standard library functions in Bash.
""" """
def emit_stdlib (self): def emit_stdlib (self, used_categories: set = None):
"""Emit the ContenT standard library.""" """Emit the ContenT standard library.
Args:
used_categories: Set of category names to emit. If None, emit all.
"""
self.emit ("# === ContenT Standard Library ===") self.emit ("# === ContenT Standard Library ===")
self.emit () self.emit ()
# Core всегда нужен
self._emit_core () self._emit_core ()
self._emit_http ()
self._emit_fs () # Условная генерация по категориям
self._emit_json () if used_categories is None:
self._emit_object_system () # Старое поведение - генерировать всё
self._emit_exception () self._emit_http ()
self._emit_logger () self._emit_fs ()
self._emit_string () self._emit_json ()
self._emit_array () self._emit_object_system ()
self._emit_regex () self._emit_exception ()
self._emit_utils () self._emit_logger ()
self._emit_awk_wrapper () self._emit_string ()
self._emit_math () self._emit_array ()
self._emit_dict () self._emit_regex ()
self._emit_misc () self._emit_utils ()
self._emit_awk_wrapper ()
self._emit_math ()
self._emit_dict ()
self._emit_misc ()
else:
# DCE - только используемые категории
if 'http' in used_categories:
self._emit_http ()
if 'fs' in used_categories:
self._emit_fs ()
if 'json' in used_categories:
self._emit_json ()
if 'object' in used_categories:
self._emit_object_system ()
if 'exception' in used_categories:
self._emit_exception ()
if 'logger' in used_categories:
self._emit_logger ()
if 'string' in used_categories:
self._emit_string ()
if 'array' in used_categories:
self._emit_array ()
if 'regex' in used_categories:
self._emit_regex ()
if 'args' in used_categories:
self._emit_args ()
else:
self._emit_utils_minimal ()
if 'awk' in used_categories:
self._emit_awk_wrapper ()
if 'math' in used_categories:
self._emit_math ()
if 'dict' in used_categories:
self._emit_dict ()
if 'misc' in used_categories or 'time' in used_categories:
self._emit_misc ()
self.emit ("# === End Standard Library ===") self.emit ("# === End Standard Library ===")
self.emit () self.emit ()
...@@ -342,6 +383,29 @@ class StdlibMixin: ...@@ -342,6 +383,29 @@ class StdlibMixin:
self.emit ("__ct_args_get () { printf '%s\\n' \"${__ct_args[$1]}\"; }") self.emit ("__ct_args_get () { printf '%s\\n' \"${__ct_args[$1]}\"; }")
self.emit () self.emit ()
def _emit_utils_minimal (self):
"""Minimal utility functions (without args)."""
self.emit ("__ct_exit () { exit \"${1:-0}\"; }")
self.emit ()
self.emit ("__ct_is_number () { [[ \"$1\" =~ ^-?[0-9]+$ ]] && echo true || echo false; }")
self.emit ("__ct_is_empty () { [[ -z \"$1\" ]] && echo true || echo false; }")
self.emit ()
def _emit_args (self):
"""Args functions only."""
self.emit ("__ct_exit () { exit \"${1:-0}\"; }")
self.emit ()
self.emit ("__ct_is_number () { [[ \"$1\" =~ ^-?[0-9]+$ ]] && echo true || echo false; }")
self.emit ("__ct_is_empty () { [[ -z \"$1\" ]] && echo true || echo false; }")
self.emit ()
self.emit ('__ct_args=("$@")')
self.emit ('__ct_args_count () { echo ${#__ct_args[@]}; }')
self.emit ("__ct_args_get () { printf '%s\\n' \"${__ct_args[$1]}\"; }")
self.emit ()
def _emit_awk_wrapper (self): def _emit_awk_wrapper (self):
"""AWK wrapper functions.""" """AWK wrapper functions."""
self.emit ("# AWK wrapper - uses mawk if available (2-10x faster)") self.emit ("# AWK wrapper - uses mawk if available (2-10x faster)")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment