Commit cb16dd5f authored by Roman Alifanov's avatar Roman Alifanov

Add DCE for unused classes

- Track which classes are instantiated (new X() or X()) - Track transitive dependencies via class fields - Skip generating unused classes with comment marker Example: if class A is used but class B is not, B is skipped: # DCE: skipped unused class B
parent dde710fe
...@@ -44,7 +44,7 @@ class CodeGenerator(StdlibMixin, AwkCodegenMixin, ExprMixin, StmtMixin, ...@@ -44,7 +44,7 @@ class CodeGenerator(StdlibMixin, AwkCodegenMixin, ExprMixin, StmtMixin,
self.array_vars: Set[str] = set() self.array_vars: Set[str] = set()
self.dict_vars: Set[str] = set() self.dict_vars: Set[str] = set()
self.object_vars: Set[str] = set() # Variables holding object instances self.object_vars: Set[str] = set()
self.class_field_types: Dict[tuple, str] = {} self.class_field_types: Dict[tuple, str] = {}
self.local_vars: Set[str] = set() self.local_vars: Set[str] = set()
...@@ -66,6 +66,9 @@ class CodeGenerator(StdlibMixin, AwkCodegenMixin, ExprMixin, StmtMixin, ...@@ -66,6 +66,9 @@ class CodeGenerator(StdlibMixin, AwkCodegenMixin, ExprMixin, StmtMixin,
self.deferred_calls: List[str] = [] self.deferred_calls: List[str] = []
self.used_classes: Optional[Set[str]] = None
self.used_methods: Optional[Dict[str, Set[str]]] = None
def indent(self) -> str: def indent(self) -> str:
return " " * self.indent_level return " " * self.indent_level
...@@ -119,13 +122,16 @@ class CodeGenerator(StdlibMixin, AwkCodegenMixin, ExprMixin, StmtMixin, ...@@ -119,13 +122,16 @@ class CodeGenerator(StdlibMixin, AwkCodegenMixin, ExprMixin, StmtMixin,
self.emit_raw("set -euo pipefail") self.emit_raw("set -euo pipefail")
self.emit() self.emit()
# Анализ используемых категорий для DCE
if dce: if dce:
analyzer = UsageAnalyzer() analyzer = UsageAnalyzer()
used_categories = analyzer.analyze(programs) used_categories = analyzer.analyze(programs)
self.emit_stdlib(used_categories) self.emit_stdlib(used_categories)
self.used_classes = analyzer.get_used_classes()
self.used_methods = analyzer.used_methods
else: else:
self.emit_stdlib() self.emit_stdlib()
self.used_classes = None
self.used_methods = None
for program in programs: for program in programs:
for stmt in program.statements: for stmt in program.statements:
......
"""Dead Code Elimination - анализ используемых функций stdlib.""" """Dead Code Elimination."""
from .ast_nodes import * from .ast_nodes import *
class UsageAnalyzer: class UsageAnalyzer:
"""Анализирует AST и определяет какие категории stdlib используются."""
# Категории stdlib
CATEGORIES = { CATEGORIES = {
'core', # print, exit, range - базовые функции 'core', 'object', 'http', 'fs', 'json', 'logger', 'string',
'object', # классы, __CT_OBJ, method dispatch 'array', 'dict', 'regex', 'math', 'time', 'awk', 'exception',
'http', # http.get/post/put/delete 'args', 'misc',
'fs', # fs.read/write/exists/etc
'json', # json.parse/stringify
'logger', # logger.info/warn/error/debug
'string', # методы строк .upper(), .lower(), etc
'array', # методы массивов .push(), .join(), etc
'dict', # методы словарей .get(), .set(), etc
'regex', # regex.match/extract
'math', # math.add/sub/mul/etc
'time', # time.now/ms
'awk', # @awk wrapper
'exception', # try/catch/throw/defer
'args', # args.get/count
'misc', # random, byte functions
} }
def __init__(self): def __init__(self):
self.used: set = set() self.used: set = set()
self.has_classes = False self.has_classes = False
self.has_awk = False self.has_awk = False
self.defined_classes: dict = {}
self.used_classes: set = set()
self.used_methods: dict = {}
self.class_fields: dict = {}
def analyze(self, programs: list) -> set: def analyze(self, programs: list) -> set:
"""Анализирует список программ и возвращает используемые категории.""" self.used = {'core'}
self.used = {'core'} # core всегда нужен
for program in programs:
for stmt in program.statements:
if isinstance(stmt, ClassDecl):
self.defined_classes[stmt.name] = stmt
self._collect_class_fields(stmt)
for program in programs: for program in programs:
for stmt in program.statements: for stmt in program.statements:
self._analyze_stmt(stmt) self._analyze_stmt(stmt)
# Если есть классы - нужна object system self._resolve_transitive_classes()
if self.has_classes: if self.has_classes:
self.used.add('object') self.used.add('object')
# Если есть @awk - нужен awk wrapper
if self.has_awk: if self.has_awk:
self.used.add('awk') self.used.add('awk')
return self.used return self.used
def _collect_class_fields(self, cls: ClassDecl):
self.class_fields[cls.name] = {}
for field_name, default_value in cls.fields:
field_class = None
if default_value:
if isinstance(default_value, NewExpr):
field_class = default_value.class_name
elif isinstance(default_value, CallExpr) and isinstance(default_value.callee, Identifier):
field_class = default_value.callee.name
self.class_fields[cls.name][field_name] = field_class
def _resolve_transitive_classes(self):
changed = True
while changed:
changed = False
for cls_name in list(self.used_classes):
if cls_name in self.class_fields:
for field_name, field_class in self.class_fields[cls_name].items():
if field_class and field_class not in self.used_classes:
if field_class in self.defined_classes:
self.used_classes.add(field_class)
changed = True
def get_used_classes(self) -> set:
return self.used_classes
def get_used_methods(self, class_name: str) -> set:
return self.used_methods.get(class_name, set())
def _analyze_stmt(self, stmt): def _analyze_stmt(self, stmt):
"""Анализирует statement."""
if isinstance(stmt, ClassDecl): if isinstance(stmt, ClassDecl):
self.has_classes = True self.has_classes = True
for method in stmt.methods: for method in stmt.methods:
...@@ -122,10 +144,8 @@ class UsageAnalyzer: ...@@ -122,10 +144,8 @@ class UsageAnalyzer:
self._analyze_expr(stmt.value) self._analyze_expr(stmt.value)
def _analyze_body(self, body): def _analyze_body(self, body):
"""Анализирует тело (список statements или Block)."""
if body is None: if body is None:
return return
# body может быть Block или list
if hasattr(body, 'statements'): if hasattr(body, 'statements'):
stmts = body.statements stmts = body.statements
elif isinstance(body, list): elif isinstance(body, list):
...@@ -136,7 +156,6 @@ class UsageAnalyzer: ...@@ -136,7 +156,6 @@ class UsageAnalyzer:
self._analyze_stmt(stmt) self._analyze_stmt(stmt)
def _analyze_expr(self, expr): def _analyze_expr(self, expr):
"""Анализирует expression и определяет используемые категории."""
if expr is None: if expr is None:
return return
...@@ -176,17 +195,37 @@ class UsageAnalyzer: ...@@ -176,17 +195,37 @@ class UsageAnalyzer:
elif isinstance(expr, NewExpr): elif isinstance(expr, NewExpr):
self.has_classes = True self.has_classes = True
self.used_classes.add(expr.class_name)
for arg in expr.arguments: for arg in expr.arguments:
self._analyze_expr(arg) self._analyze_expr(arg)
elif isinstance(expr, Identifier):
pass
def _analyze_call(self, expr: CallExpr): def _analyze_call(self, expr: CallExpr):
"""Анализирует вызов функции."""
callee = expr.callee callee = expr.callee
if isinstance(callee, Identifier):
if callee.name in self.defined_classes:
self.has_classes = True
self.used_classes.add(callee.name)
if isinstance(callee, MemberAccess): if isinstance(callee, MemberAccess):
if isinstance(callee.object, Identifier): if isinstance(callee.object, ThisExpr):
pass
elif isinstance(callee.object, MemberAccess) and isinstance(callee.object.object, ThisExpr):
field_name = callee.object.member
method = callee.member
for cls_name, fields in self.class_fields.items():
if field_name in fields and fields[field_name]:
field_class = fields[field_name]
if field_class not in self.used_methods:
self.used_methods[field_class] = set()
self.used_methods[field_class].add(method)
elif isinstance(callee.object, Identifier):
ns = callee.object.name ns = callee.object.name
# Namespace calls
if ns == 'http': if ns == 'http':
self.used.add('http') self.used.add('http')
elif ns == 'fs': elif ns == 'fs':
...@@ -204,20 +243,16 @@ class UsageAnalyzer: ...@@ -204,20 +243,16 @@ class UsageAnalyzer:
elif ns == 'args': elif ns == 'args':
self.used.add('args') self.used.add('args')
elif ns == 'shell': elif ns == 'shell':
pass # shell не требует stdlib pass
else: else:
# Вызов метода на переменной - может быть строка/массив/словарь/объект
method = callee.member method = callee.member
self._check_method(method) self._check_method(method)
def _analyze_member_access(self, expr: MemberAccess): def _analyze_member_access(self, expr: MemberAccess):
"""Анализирует доступ к члену."""
if isinstance(expr.object, Identifier): if isinstance(expr.object, Identifier):
# Может быть вызов метода
pass pass
def _check_method(self, method: str): def _check_method(self, method: str):
"""Определяет категорию по имени метода."""
string_methods = {'upper', 'lower', 'trim', 'len', 'contains', 'starts', string_methods = {'upper', 'lower', 'trim', 'len', 'contains', 'starts',
'ends', 'index', 'replace', 'substr', 'split', 'charAt'} 'ends', 'index', 'replace', 'substr', 'split', 'charAt'}
array_methods = {'push', 'pop', 'shift', 'join', 'get', 'set', 'slice', 'len'} array_methods = {'push', 'pop', 'shift', 'join', 'get', 'set', 'slice', 'len'}
......
...@@ -16,12 +16,9 @@ class StdlibMixin: ...@@ -16,12 +16,9 @@ class StdlibMixin:
self.emit ("# === ContenT Standard Library ===") self.emit ("# === ContenT Standard Library ===")
self.emit () self.emit ()
# Core всегда нужен
self._emit_core () self._emit_core ()
# Условная генерация по категориям
if used_categories is None: if used_categories is None:
# Старое поведение - генерировать всё
self._emit_http () self._emit_http ()
self._emit_fs () self._emit_fs ()
self._emit_json () self._emit_json ()
...@@ -37,7 +34,6 @@ class StdlibMixin: ...@@ -37,7 +34,6 @@ class StdlibMixin:
self._emit_dict () self._emit_dict ()
self._emit_misc () self._emit_misc ()
else: else:
# DCE - только используемые категории
if 'http' in used_categories: if 'http' in used_categories:
self._emit_http () self._emit_http ()
if 'fs' in used_categories: if 'fs' in used_categories:
......
...@@ -8,7 +8,10 @@ class StmtMixin: ...@@ -8,7 +8,10 @@ class StmtMixin:
if isinstance(stmt, FunctionDecl): if isinstance(stmt, FunctionDecl):
self.generate_function(stmt) self.generate_function(stmt)
elif isinstance(stmt, ClassDecl): elif isinstance(stmt, ClassDecl):
self.generate_class(stmt) if self.used_classes is not None and stmt.name not in self.used_classes:
self.emit(f"# DCE: skipped unused class {stmt.name}")
else:
self.generate_class(stmt)
elif isinstance(stmt, ImportStmt): elif isinstance(stmt, ImportStmt):
self.generate_import(stmt) self.generate_import(stmt)
elif isinstance(stmt, Assignment): elif isinstance(stmt, Assignment):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment