text2manim / code_cleaner.py
thanhkt's picture
Upload 13 files
1645305 verified
raw
history blame contribute delete
6.85 kB
"""
Utilities for cleaning and validating Manim code generated by LLMs.
"""
import re
import logging
import json
logger = logging.getLogger(__name__)
def clean_manim_code(raw_code):
"""
Clean Manim code from LLM responses by removing markdown formatting
and ensuring proper structure.
Args:
raw_code (str): The raw code from the LLM response
Returns:
str: Cleaned, executable Python code
"""
# Start with the raw code
code = raw_code
# Extract code from markdown code blocks if present
if "```python" in code:
parts = code.split("```python")
if len(parts) > 1:
code = parts[1]
if "```" in code:
code = code.split("```")[0]
elif "```" in code:
parts = code.split("```")
if len(parts) > 1:
code = parts[1]
if "```" in parts[1]:
code = code.split("```")[0]
# Remove any remaining backticks
code = code.replace('```', '')
# Ensure code begins with the necessary import
if not code.strip().startswith('from manim import'):
code = 'from manim import *\n\n' + code
# Verify the code contains a Scene class
if 'class' not in code or 'Scene' not in code:
logger.warning("Generated code does not contain a proper Scene class")
# Add a basic scene structure if missing
if 'class ManimScene(Scene):' not in code:
code = 'from manim import *\n\nclass ManimScene(Scene):\n def construct(self):\n ' + code
# Verify the code has a construct method
if 'def construct(self)' not in code:
logger.warning("Generated code does not contain a construct method")
# Try to find where the class is defined and add construct method
class_match = re.search(r'class\s+\w+\s*\(\s*Scene\s*\)\s*:', code)
if class_match:
insert_pos = class_match.end()
code = code[:insert_pos] + '\n def construct(self):\n pass\n' + code[insert_pos:]
# Ensure there's a wait at the end if not present
if 'self.wait(' not in code.split('def construct')[-1]:
# Find the end of the construct method to add wait
construct_body_match = re.search(r'def\s+construct\s*\(\s*self\s*\)\s*:', code)
if construct_body_match:
# Check if the method has content
method_content = code[construct_body_match.end():]
indentation = ' ' # Default indentation
# Try to determine indentation from code
indent_match = re.search(r'\n(\s+)', method_content)
if indent_match:
indentation = indent_match.group(1)
# Find a good place to insert the wait
if '}' in method_content.splitlines()[-1]: # If last line closes something
code = code.rstrip() + f'\n{indentation}self.wait(1)\n'
else:
code = code.rstrip() + f'\n{indentation}self.wait(1)\n'
return code.strip()
def parse_scenario_from_llm_response(content):
"""
Extract structured scenario information from an LLM response.
Args:
content (str): The LLM response text
Returns:
dict: Extracted scenario dictionary
"""
try:
# Try to find and extract a JSON object
json_match = re.search(r'\{.*\}', content, re.DOTALL)
if json_match:
json_str = json_match.group(0)
scenario_dict = json.loads(json_str)
return scenario_dict
except Exception as e:
logger.error(f"Error parsing scenario JSON: {e}")
# Manual parsing fallback
scenario = {
"title": "",
"objects": [],
"transformations": [],
"equations": []
}
# Simple pattern matching to extract information
title_match = re.search(r'title["\s:]+([^"]+)', content, re.IGNORECASE)
if title_match:
scenario["title"] = title_match.group(1).strip()
# Extract lists with various possible formats
objects_pattern = r'objects[":\s\[]+([^\]]+)'
objects_match = re.search(objects_pattern, content, re.IGNORECASE | re.DOTALL)
if objects_match:
objects_text = objects_match.group(1)
# Handle both comma-separated and quote-wrapped items
objects = re.findall(r'"([^"]+)"', objects_text)
if not objects:
objects = [item.strip() for item in objects_text.split(',')]
scenario["objects"] = objects
# Similar extraction for transformations
trans_pattern = r'transformations[":\s\[]+([^\]]+)'
trans_match = re.search(trans_pattern, content, re.IGNORECASE | re.DOTALL)
if trans_match:
trans_text = trans_match.group(1)
transformations = re.findall(r'"([^"]+)"', trans_text)
if not transformations:
transformations = [item.strip() for item in trans_text.split(',')]
scenario["transformations"] = transformations
# Extract equations if present
equations_pattern = r'equations[":\s\[]+([^\]]+)'
equations_match = re.search(equations_pattern, content, re.IGNORECASE | re.DOTALL)
if equations_match:
equations_text = equations_match.group(1)
if equations_text.lower().strip() in ['null', 'none']:
scenario["equations"] = None
else:
equations = re.findall(r'"([^"]+)"', equations_text)
if not equations:
equations = [item.strip() for item in equations_text.split(',')]
scenario["equations"] = equations
return scenario
def validate_manim_code(code):
"""
Perform basic validation on Manim code to catch common issues.
Args:
code (str): The Manim code to validate
Returns:
tuple: (is_valid, error_message)
"""
# Check for basic Python syntax errors
try:
compile(code, '<string>', 'exec')
except SyntaxError as e:
return False, f"Syntax error: {str(e)}"
# Check for necessary components
if 'from manim import' not in code:
return False, "Missing Manim import"
if 'class' not in code or 'Scene' not in code:
return False, "No Scene class defined"
if 'def construct(self)' not in code:
return False, "No construct method defined"
# Check for common Manim issues
if 'self.play(' not in code and 'self.add(' not in code:
return False, "No objects added to scene (missing self.play or self.add calls)"
# All checks passed
return True, "Code appears valid"