Spaces:

kedar-bhumkar
/

Code_change_impact_analyzer

Sleeping

App Files Files Community

kedar-bhumkar commited on Mar 17

Commit

3d833be

verified ·

1 Parent(s): f9c4f9f

Upload 5 files

Browse files

Files changed (5) hide show

README.md +75 -14
app.py +255 -0
backend.py +180 -0
pydantic_model.py +14 -0
requirements.txt +6 -0

README.md CHANGED Viewed

@@ -1,14 +1,75 @@
----
-title: Code Change Impact Analyzer
-emoji: 🌍
-colorFrom: pink
-colorTo: indigo
-sdk: streamlit
-sdk_version: 1.43.2
-app_file: app.py
-pinned: false
-license: apache-2.0
-short_description: Analyze the impact of a code change (GIT repo) using LLM
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Code Impact Analyzer
+emoji: 🔍
+colorFrom: blue
+colorTo: indigo
+sdk: streamlit
+sdk_version: 1.32.0
+app_file: app.py
+pinned: false
+---
+# Code Impact Analyzer
+A powerful tool that analyzes code changes in Git repositories using AI to provide detailed impact analysis.
+## Features
+- 🔍 **Git Repository Analysis**: Clone and analyze any public Git repository
+- 🤖 **AI-Powered Analysis**: Uses GPT-4 and Claude Sonnet for intelligent code analysis
+- 📊 **Impact Assessment**: Provides detailed analysis of code changes and their impact
+- 🔒 **Secure API Key Management**: Supports both environment variables and session-based API keys
+- 📝 **Structured Output**: Returns analysis in a standardized JSON format
+- 📦 **Large Codebase Support**: Handles large repositories through intelligent chunking
+## Usage
+1. Enter a Git repository URL
+2. Select your preferred AI model (GPT-4 or Claude Sonnet)
+3. Enter your code/configuration changes
+4. Click "Analyze" to get detailed impact analysis
+## API Key Setup
+### Option 1: Environment Variables
+Set your API keys in the `.env` file:
+```
+OPENAI_API_KEY=your_openai_key_here
+ANTHROPIC_API_KEY=your_anthropic_key_here
+```
+### Option 2: In-App Input
+Enter your OpenAI API key directly in the application interface.
+## Analysis Output
+The tool provides analysis in the following format:
+```json
+{
+    "severity_level": "LOW/MEDIUM/HIGH",
+    "number_of_files_impacted": <integer>,
+    "files_impacted": [
+        {
+            "files_impacted": "file_path",
+            "impact_details": "detailed_impact_description"
+        }
+    ]
+}
+```
+## Severity Levels
+- **LOW**: 1-3 files impacted
+- **MEDIUM**: 4-8 files impacted
+- **HIGH**: More than 8 files impacted
+## Technical Details
+- Built with Streamlit
+- Uses OpenAI's GPT-4 and Anthropic's Claude Sonnet
+- Supports multiple programming languages
+- Handles large codebases through token-based chunking
+## License
+MIT License

app.py ADDED Viewed

	@@ -0,0 +1,255 @@

+import streamlit as st
+import tempfile
+import json
+from backend import (
+    clone_repository,
+    read_code_files,
+    analyze_code,
+    check_api_keys
+)
+def get_severity_color(severity):
+    """Get color based on severity level."""
+    colors = {
+        "LOW": "#FFA500",  # Orange
+        "MEDIUM": "#FF6B6B",  # Light Red
+        "HIGH": "#FF0000"  # Red
+    }
+    return colors.get(severity.upper(), "#000000")
+def render_analysis_results(analysis_text):
+    """Render the analysis results according to the Pydantic model schema."""
+    try:
+        # Parse the analysis text as JSON
+        analysis_data = json.loads(analysis_text)
+        # Custom CSS for styling
+        st.markdown("""
+            <style>
+                .severity-box {
+                    background-color: #f0f2f6;
+                    padding: 1rem;
+                    border-radius: 0.5rem;
+                    margin: 1rem 0;
+                }
+                .file-impact {
+                    background-color: #ffffff;
+                    padding: 1rem;
+                    border-radius: 0.5rem;
+                    margin: 0.5rem 0;
+                    border: 1px solid #e1e4e8;
+                }
+                .impact-count {
+                    background-color: #e6f3ff;
+                    padding: 0.5rem 1rem;
+                    border-radius: 0.5rem;
+                    margin: 1rem 0;
+                }
+            </style>
+        """, unsafe_allow_html=True)
+        # Calculate severity level based on number of files impacted
+        severity_level = analysis_data['severity_level']
+        if(analysis_data['number_of_files_impacted'] == None or analysis_data['number_of_files_impacted'] == 0):
+            severity_level = "No Impact"
+        elif(analysis_data['number_of_files_impacted'] > 0 and analysis_data['number_of_files_impacted'] <= 3):
+            severity_level = "Low"
+        elif(analysis_data['number_of_files_impacted'] > 3 and analysis_data['number_of_files_impacted'] <= 8):
+            severity_level = "Medium"
+        else:
+            severity_level = "High"
+        # Display Severity Level with custom styling
+        severity_color = get_severity_color(severity_level)
+        st.markdown(f"""
+            <div class="severity-box">
+                <h3 style='color: {severity_color}; margin: 0; font-size: 1.5rem; font-weight: bold;'>
+                    Severity Level: {severity_level}
+                </h3>
+            </div>
+        """, unsafe_allow_html=True)
+        # Display Number of Files Impacted with custom styling
+        st.markdown(f"""
+            <div class="impact-count">
+                <h3 style='color: #1f77b4; margin: 0; font-size: 1.2rem;'>
+                    Number of Files Impacted: {analysis_data['number_of_files_impacted']}
+                </h3>
+            </div>
+        """, unsafe_allow_html=True)
+        # Display Files Impacted with custom styling
+        st.markdown("<h3 style='color: #2c3e50; font-size: 1.3rem;'>Files Impacted</h3>", unsafe_allow_html=True)
+        for file_impact in analysis_data['files_impacted']:
+            with st.expander(f"📄 {file_impact['files_impacted']}", expanded=False):
+                st.markdown(f"""
+                    <div class="file-impact">
+                        <p style='color: #34495e; font-size: 1rem; line-height: 1.6;'>
+                            {file_impact['impact_details']}
+                        </p>
+                    </div>
+                """, unsafe_allow_html=True)
+    except json.JSONDecodeError:
+        # If the response is not valid JSON, display it as plain text
+        st.markdown(analysis_text)
+    except Exception as e:
+        st.error(f"Error rendering analysis results: {str(e)}")
+        st.markdown(analysis_text)
+def main():
+    st.title("Git Repository Code Analyzer")
+    st.write("Enter a Git repository URL and a prompt to analyze the code.")
+    # Example data
+    examples = [
+        {
+            "Git URL": "https://github.com/kedar-bhumkar/SFRoutingFramework",
+            "Code/Config Changes": "Enum USER_INTERFACE removed from file: BaseAppLiterals.cls"
+        },
+        {
+            "Git URL": "https://github.com/kedar-bhumkar/SFDynamicFields",
+            "Code/Config Changes": "Removed a field Value__c from DynamicFieldTable__c.object"
+        }
+    ]
+    # Initialize session state if not exists
+    if 'selected_example' not in st.session_state:
+        st.session_state.selected_example = None
+    if 'openai_key' not in st.session_state:
+        st.session_state.openai_key = ""
+    # API Key input section
+    with st.expander("🔑 API Key Settings", expanded=False):
+        st.markdown("""
+            <style>
+                .api-key-section {
+                    background-color: #f8f9fa;
+                    padding: 1rem;
+                    border-radius: 0.5rem;
+                    margin: 0.5rem 0;
+                }
+            </style>
+        """, unsafe_allow_html=True)
+        st.markdown("""
+            <div class="api-key-section">
+                <p style='color: #2c3e50; font-size: 0.9rem;'>
+                    Enter your OpenAI API key to use the GPT-4 model. The key will be stored in the session and not saved permanently.
+                </p>
+            </div>
+        """, unsafe_allow_html=True)
+        openai_key = st.text_input(
+            "OpenAI API Key",
+            value=st.session_state.openai_key,
+            type="password",
+            help="Enter your OpenAI API key to use GPT-4"
+        )
+        if openai_key:
+            st.session_state.openai_key = openai_key
+            st.success("API key saved for this session")
+    # Display examples table with Select buttons
+    st.subheader("Example Cases")
+    # Create columns for the table
+    col1, col2, col3 = st.columns([2, 2, 1])
+    # Table header
+    with col1:
+        st.write("**Git URL**")
+    with col2:
+        st.write("**Code/Config Changes**")
+    with col3:
+        st.write("**Action**")
+    # Table rows
+    for idx, example in enumerate(examples):
+        with col1:
+            st.write(example["Git URL"])
+        with col2:
+            st.write(example["Code/Config Changes"])
+        with col3:
+            if st.button("Select", key=f"select_{idx}"):
+                st.session_state.selected_example = idx
+                st.session_state.repo_url = example["Git URL"]
+                st.session_state.prompt = example["Code/Config Changes"]
+                st.experimental_rerun()
+    # Get user inputs
+    repo_url = st.text_input("Git Repository URL",
+                            value=st.session_state.get("repo_url", ""))
+    # Model selection
+    model = st.selectbox(
+        "Select AI Model",
+        ["gpt-4", "claude-sonnet (coming soon)"],
+        help="Choose the AI model to analyze the code"
+    )
+    prompt = st.text_area("Code or configuration changes",
+                         value=st.session_state.get("prompt", "List down the code/configuration changes to be performed"))
+    # Clear button
+    if st.button("Clear Selection"):
+        st.session_state.selected_example = None
+        st.session_state.repo_url = ""
+        st.session_state.prompt = "List down the code/configuration changes to be performed"
+        st.experimental_rerun()
+    if st.button("Analyze"):
+        if not repo_url:
+            st.error("Please enter a Git repository URL")
+            return
+        # Check API keys
+        api_keys_status = check_api_keys()
+        if model == "gpt-4":
+            # First check session state for OpenAI key
+            if st.session_state.openai_key:
+                # Use the key from session state
+                api_keys_status["gpt-4"] = True
+            elif not api_keys_status["gpt-4"]:
+                st.error("OpenAI API key not found. Please enter your key in the API Key Settings section or set the OPENAI_API_KEY environment variable.")
+                return
+        elif model == "claude-sonnet" and not api_keys_status["claude-sonnet"]:
+            st.error("Anthropic API key not found. Please set the ANTHROPIC_API_KEY environment variable.")
+            return
+        with st.spinner("Cloning repository and analyzing code..."):
+            # Create a temporary directory
+            with tempfile.TemporaryDirectory() as temp_dir:
+                # Clone the repository
+                success, error = clone_repository(repo_url, temp_dir)
+                if not success:
+                    st.error(f"Error cloning repository: {error}")
+                    return
+                # Read code files
+                code_files, warnings = read_code_files(temp_dir)
+                # Display any warnings from reading files
+                for warning in warnings:
+                    st.warning(warning)
+                if not code_files:
+                    st.warning("No code files found in the repository.")
+                    return
+                # Analyze the code
+                analysis, error = analyze_code(code_files, prompt, model)
+                if error:
+                    st.error(f"Error during analysis: {error}")
+                    return
+                if analysis:
+                    st.subheader("Analysis Results")
+                    render_analysis_results(analysis)
+if __name__ == "__main__":
+    main()

backend.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import os
+import git
+from pathlib import Path
+from openai import OpenAI
+from anthropic import Anthropic
+from dotenv import load_dotenv
+from pydantic_model import ImpactAnalysis
+import tiktoken
+import json
+from typing import List, Tuple, Dict, Any
+# Load environment variables
+load_dotenv()
+# Initialize API clients
+openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+anthropic_client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
+def clone_repository(repo_url, temp_dir):
+    """Clone a git repository to a temporary directory."""
+    try:
+        git.Repo.clone_from(repo_url, temp_dir)
+        return True, None
+    except Exception as e:
+        return False, str(e)
+def read_code_files(directory):
+    """Read all code files from the directory."""
+    code_files = []
+    code_extensions = {'.py', '.js', '.jsx', '.ts', '.tsx', '.java', '.cpp', '.c', '.cs', '.go', '.rb', '.php', '.cls', '.object','.page'}
+    warnings = []
+    for root, _, files in os.walk(directory):
+        for file in files:
+            if Path(file).suffix in code_extensions:
+                file_path = os.path.join(root, file)
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        content = f.read()
+                        relative_path = os.path.relpath(file_path, directory)
+                        code_files.append({
+                            'path': relative_path,
+                            'content': content
+                        })
+                except Exception as e:
+                    warnings.append(f"Could not read file {file_path}: {str(e)}")
+    return code_files, warnings
+def count_tokens(text: str, model: str = "gpt-4") -> int:
+    """Count the number of tokens in a text string."""
+    encoding = tiktoken.encoding_for_model(model)
+    return len(encoding.encode(text))
+def chunk_files(code_files: List[Dict[str, str]], model: str = "gpt-4", max_tokens: int = 120000) -> List[List[Dict[str, str]]]:
+    """Split files into chunks that fit within the context window."""
+    chunks = []
+    current_chunk = []
+    current_tokens = 0
+    for file in code_files:
+        file_content = f"File: {file['path']}\nContent:\n{file['content']}\n"
+        file_tokens = count_tokens(file_content, model)
+        # If a single file is larger than max_tokens, skip it
+        if file_tokens > max_tokens:
+            print(f"Warning: File {file['path']} is too large ({file_tokens} tokens) and will be skipped")
+            continue
+        # If adding this file would exceed max_tokens, start a new chunk
+        if current_tokens + file_tokens > max_tokens:
+            if current_chunk:  # Only add non-empty chunks
+                chunks.append(current_chunk)
+            current_chunk = [file]
+            current_tokens = file_tokens
+        else:
+            current_chunk.append(file)
+            current_tokens += file_tokens
+    # Add the last chunk if it's not empty
+    if current_chunk:
+        chunks.append(current_chunk)
+    return chunks
+def analyze_code_chunk(chunk: List[Dict[str, str]], prompt: str, model: str) -> Tuple[str, str]:
+    """Analyze a chunk of code files."""
+    try:
+        # Prepare the context from the chunk
+        context = "Here are the relevant code files:\n\n"
+        for file in chunk:
+            context += f"File: {file['path']}\n```\n{file['content']}\n```\n"
+        if model == "gpt-4":
+            json_schema = ImpactAnalysis.model_json_schema()
+            messages = [
+                {"role": "system", "content": "You are a code analysis expert. Analyze the provided code based on the user's prompt."},
+                {"role": "user", "content": f"Please check the impact of performing the below code/configuration changes on the above codebase. Provide only the summary of the impact in a table with aggregate analysis that outputs a JSON object with the following schema : {json_schema} . Pls note :  Do not add the characters ``` json anywhere in the response. Do not respond with messages like 'Here is the response in the required JSON format:'.\n\nCode or configuration changes: {prompt}\n\n{context}"}
+            ]
+            response = openai_client.chat.completions.create(
+                model="gpt-4o",
+                messages=messages,
+                temperature=0.7,
+                max_tokens=2000
+            )
+            return response.choices[0].message.content, ""
+        else:
+            # Keep original Claude implementation
+            system_message = "You are a code analysis expert. Analyze the provided code based on the user's prompt."
+            user_message = f"Please check the impact of performing the below code/configuration changes on the above codebase. Provide only the summary of the impact in a table with aggregate analysis that includes 1) List of files impacted. 2) No of files impacted 3) Impactd etail on each file impacted . Surface a 'Severity Level' at the top of table with possible values: Low, Medium, High based on the 'Number of impacted files' impacted. E.g. if 'Number of impacted files' > 0 but < 3 then LOW, if 'Number of impacted files' > 3 but < 8 then MEDIUM, if 'Number of impacted files' > 8 then HIGH.\n\nCode or configuration changes: {prompt}\n\n{context}"
+            response = anthropic_client.messages.create(
+                model="claude-3-7-sonnet-20250219",
+                max_tokens=2000,
+                temperature=0.7,
+                system=system_message,
+                messages=[{"role": "user", "content": user_message}]
+            )
+            return response.content[0].text, ""
+    except Exception as e:
+        return "", str(e)
+def analyze_code(code_files: List[Dict[str, str]], prompt: str, model: str) -> Tuple[str, str]:
+    """Analyze code files with chunking to handle large codebases."""
+    try:
+        # Split files into chunks
+        chunks = chunk_files(code_files, model)
+        if not chunks:
+            return "", "No valid files to analyze"
+        # Analyze each chunk
+        all_analyses = []
+        for i, chunk in enumerate(chunks):
+            analysis, error = analyze_code_chunk(chunk, prompt, model)
+            if error:
+                return "", f"Error analyzing chunk {i+1}: {error}"
+            if analysis:
+                all_analyses.append(analysis)
+        if not all_analyses:
+            return "", "No analysis results generated"
+        # Combine results from all chunks
+        combined_analysis = {
+            "severity_level": "LOW",  # Default to lowest severity
+            "number_of_files_impacted": 0,
+            "files_impacted": []
+        }
+        # Merge results from all chunks
+        for analysis in all_analyses:
+            try:
+                chunk_data = json.loads(analysis)
+                combined_analysis["number_of_files_impacted"] += chunk_data.get("number_of_files_impacted", 0)
+                combined_analysis["files_impacted"].extend(chunk_data.get("files_impacted", []))
+                # Update severity level based on the highest severity found
+                severity_map = {"LOW": 1, "MEDIUM": 2, "HIGH": 3}
+                current_severity = severity_map.get(combined_analysis["severity_level"], 0)
+                chunk_severity = severity_map.get(chunk_data.get("severity_level", "LOW"), 0)
+                if chunk_severity > current_severity:
+                    combined_analysis["severity_level"] = chunk_data["severity_level"]
+            except json.JSONDecodeError:
+                continue
+        return json.dumps(combined_analysis), ""
+    except Exception as e:
+        return "", str(e)
+def check_api_keys():
+    """Check if required API keys are set."""
+    openai_key = os.getenv("OPENAI_API_KEY") is not None
+    anthropic_key = os.getenv("ANTHROPIC_API_KEY") is not None
+    return {
+        "gpt-4": openai_key,
+        "claude-sonnet": anthropic_key
+    }

pydantic_model.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from pydantic import BaseModel, Field
+from typing import List, Optional, Literal
+class FileImpact(BaseModel):
+    files_impacted: str
+    impact_details: str
+class ImpactAnalysis(BaseModel):
+    files_impacted: List[FileImpact]
+    number_of_files_impacted: int
+    severity_level: Optional[Literal["Low", "Medium", "High"]] = Field(description="possible values: Low, Medium, High based on the 'number_of_files_impacted' impacted. E.g. if 'number_of_files_impacted' > 0 but < 3 then LOW, if 'number_of_files_impacted' > 3 but < 8 then MEDIUM, if 'number_of_files_impacted' > 8 then HIGH.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit==1.32.0
+openai==1.12.0
+python-dotenv==1.0.1
+gitpython==3.1.42
+anthropic==0.18.1
+tiktoken==0.6.0