Spaces:

thanhkt
/

text2manim

Runtime error

App Files Files Community

text2manim / code_cleaner.py

thanhkt

Upload 13 files

1645305 verified about 2 months ago

raw

history blame contribute delete

6.85 kB

	"""
	Utilities for cleaning and validating Manim code generated by LLMs.
	"""

	import re
	import logging
	import json

	logger = logging.getLogger(__name__)

	def clean_manim_code(raw_code):
	"""
	Clean Manim code from LLM responses by removing markdown formatting
	and ensuring proper structure.

	Args:
	raw_code (str): The raw code from the LLM response

	Returns:
	str: Cleaned, executable Python code
	"""
	# Start with the raw code
	code = raw_code

	# Extract code from markdown code blocks if present
	if "```python" in code:
	parts = code.split("```python")
	if len(parts) > 1:
	code = parts[1]
	if "```" in code:
	code = code.split("```")[0]
	elif "```" in code:
	parts = code.split("```")
	if len(parts) > 1:
	code = parts[1]
	if "```" in parts[1]:
	code = code.split("```")[0]

	# Remove any remaining backticks
	code = code.replace('```', '')

	# Ensure code begins with the necessary import
	if not code.strip().startswith('from manim import'):
	code = 'from manim import *\n\n' + code

	# Verify the code contains a Scene class
	if 'class' not in code or 'Scene' not in code:
	logger.warning("Generated code does not contain a proper Scene class")
	# Add a basic scene structure if missing
	if 'class ManimScene(Scene):' not in code:
	code = 'from manim import *\n\nclass ManimScene(Scene):\n def construct(self):\n ' + code

	# Verify the code has a construct method
	if 'def construct(self)' not in code:
	logger.warning("Generated code does not contain a construct method")
	# Try to find where the class is defined and add construct method
	class_match = re.search(r'class\s+\w+\s\(\sScene\s\)\s:', code)
	if class_match:
	insert_pos = class_match.end()
	code = code[:insert_pos] + '\n def construct(self):\n pass\n' + code[insert_pos:]

	# Ensure there's a wait at the end if not present
	if 'self.wait(' not in code.split('def construct')[-1]:
	# Find the end of the construct method to add wait
	construct_body_match = re.search(r'def\s+construct\s\(\sself\s\)\s:', code)
	if construct_body_match:
	# Check if the method has content
	method_content = code[construct_body_match.end():]
	indentation = ' ' # Default indentation

	# Try to determine indentation from code
	indent_match = re.search(r'\n(\s+)', method_content)
	if indent_match:
	indentation = indent_match.group(1)

	# Find a good place to insert the wait
	if '}' in method_content.splitlines()[-1]: # If last line closes something
	code = code.rstrip() + f'\n{indentation}self.wait(1)\n'
	else:
	code = code.rstrip() + f'\n{indentation}self.wait(1)\n'

	return code.strip()

	def parse_scenario_from_llm_response(content):
	"""
	Extract structured scenario information from an LLM response.

	Args:
	content (str): The LLM response text

	Returns:
	dict: Extracted scenario dictionary
	"""
	try:
	# Try to find and extract a JSON object
	json_match = re.search(r'\{.*\}', content, re.DOTALL)
	if json_match:
	json_str = json_match.group(0)
	scenario_dict = json.loads(json_str)
	return scenario_dict
	except Exception as e:
	logger.error(f"Error parsing scenario JSON: {e}")

	# Manual parsing fallback
	scenario = {
	"title": "",
	"objects": [],
	"transformations": [],
	"equations": []
	}

	# Simple pattern matching to extract information
	title_match = re.search(r'title["\s:]+([^"]+)', content, re.IGNORECASE)
	if title_match:
	scenario["title"] = title_match.group(1).strip()

	# Extract lists with various possible formats
	objects_pattern = r'objects[":\s\[]+([^\]]+)'
	objects_match = re.search(objects_pattern, content, re.IGNORECASE \| re.DOTALL)
	if objects_match:
	objects_text = objects_match.group(1)
	# Handle both comma-separated and quote-wrapped items
	objects = re.findall(r'"([^"]+)"', objects_text)
	if not objects:
	objects = [item.strip() for item in objects_text.split(',')]
	scenario["objects"] = objects

	# Similar extraction for transformations
	trans_pattern = r'transformations[":\s\[]+([^\]]+)'
	trans_match = re.search(trans_pattern, content, re.IGNORECASE \| re.DOTALL)
	if trans_match:
	trans_text = trans_match.group(1)
	transformations = re.findall(r'"([^"]+)"', trans_text)
	if not transformations:
	transformations = [item.strip() for item in trans_text.split(',')]
	scenario["transformations"] = transformations

	# Extract equations if present
	equations_pattern = r'equations[":\s\[]+([^\]]+)'
	equations_match = re.search(equations_pattern, content, re.IGNORECASE \| re.DOTALL)
	if equations_match:
	equations_text = equations_match.group(1)
	if equations_text.lower().strip() in ['null', 'none']:
	scenario["equations"] = None
	else:
	equations = re.findall(r'"([^"]+)"', equations_text)
	if not equations:
	equations = [item.strip() for item in equations_text.split(',')]
	scenario["equations"] = equations

	return scenario

	def validate_manim_code(code):
	"""
	Perform basic validation on Manim code to catch common issues.

	Args:
	code (str): The Manim code to validate

	Returns:
	tuple: (is_valid, error_message)
	"""
	# Check for basic Python syntax errors
	try:
	compile(code, '<string>', 'exec')
	except SyntaxError as e:
	return False, f"Syntax error: {str(e)}"

	# Check for necessary components
	if 'from manim import' not in code:
	return False, "Missing Manim import"

	if 'class' not in code or 'Scene' not in code:
	return False, "No Scene class defined"

	if 'def construct(self)' not in code:
	return False, "No construct method defined"

	# Check for common Manim issues
	if 'self.play(' not in code and 'self.add(' not in code:
	return False, "No objects added to scene (missing self.play or self.add calls)"

	# All checks passed
	return True, "Code appears valid"