PlanExe / src /plan /estimate_wbs_task_durations.py
Simon Strandgaard
snapshot of PlanExe repo
6369972
raw
history blame
6.62 kB
"""
https://en.wikipedia.org/wiki/Work_breakdown_structure
"""
import os
import json
import time
from math import ceil
from typing import List, Optional
from uuid import uuid4
from dataclasses import dataclass
from pydantic import BaseModel, Field
from llama_index.core.llms.llm import LLM
from src.format_json_for_use_in_query import format_json_for_use_in_query
class TaskTimeEstimateDetail(BaseModel):
"""
Details about a task duration, lower/upper bounds. Potential risks impacting the duration.
"""
task_id: str = Field(
description="UUID that uniquely identifies the task."
)
delay_risks: str = Field(
description="Possible issues that may delay the task. Example: ['Weather-related disruptions', 'Third-party vendors might fail to deliver on time', 'Key team members might be unavailable']. **This field MUST be filled with a meaningful description. Do not leave it empty.**"
)
mitigation_strategy: str = Field(
description="Actions or strategies to minimize the risk of delays. Example: ['Engage backup vendors', 'Schedule regular progress reviews', 'Establish clear communication channels']. **This field MUST be filled with a meaningful and specific strategy. Do not leave it empty.**"
)
days_min: int = Field(
description="Number of days, the best case scenario. If not applicable use minus 1."
)
days_max: int = Field(
description="Number of days, the worst case scenario. If not applicable use minus 1."
)
days_realistic: int = Field(
description="Number of days, in the realistic scenario. If not applicable use minus 1."
)
class TimeEstimates(BaseModel):
"""
Estimating realistic durations for each task and appropriately assigning resources
ensures that the project stays on schedule and within budget.
"""
task_details: list[TaskTimeEstimateDetail] = Field(
description="List with tasks with time estimates."
)
QUERY_PREAMBLE = f"""
Assign estimated durations for each task and subtask.
Ensure a consistent voice and phrasing across tasks.
**For each task, you MUST provide a meaningful description for both 'delay_risks' and 'mitigation_strategy'. Do not leave these fields as empty strings.**
**Example of good 'delay_risks' and 'mitigation_strategy':**
For the task of "Define project scope and objectives":
- delay_risks: "Lack of clear initial requirements from stakeholders, potential for scope creep later in the project."
- mitigation_strategy: "Conduct thorough initial meetings with all key stakeholders to gather requirements, establish a clear change management process."
"""
@dataclass
class EstimateWBSTaskDurations:
"""
Enrich an existing Work Breakdown Structure (WBS) with task duration estimates.
"""
query: str
response: dict
metadata: dict
@classmethod
def format_query(cls, plan_json: dict, wbs_level2_json: list, task_ids: list[str]) -> str:
if not isinstance(plan_json, dict):
raise ValueError("Invalid plan_json.")
if not isinstance(wbs_level2_json, list):
raise ValueError("Invalid wbs_level1_json.")
if not isinstance(task_ids, list):
raise ValueError("Invalid task_ids.")
"""
Wrap the task ids in quotes, so it looks like this:
"0ca58751-3abd-44d0-b24b-ebcf14c794e7"
"86f0ed30-ba23-46e4-83d9-ef53d95ff054"
"58d5dcc3-7385-4919-adc1-e1f84727e9d2"
"""
task_ids_in_quotes = [f'"{task_id}"' for task_id in task_ids]
task_id_strings = "\n".join(task_ids_in_quotes)
query = f"""
The project plan:
{format_json_for_use_in_query(plan_json)}
The Work Breakdown Structure (WBS):
{format_json_for_use_in_query(wbs_level2_json)}
Only estimate these {len(task_ids)} tasks:
{task_id_strings}
"""
return query
@classmethod
def execute(cls, llm: LLM, query: str) -> 'EstimateWBSTaskDurations':
"""
Invoke LLM to estimate task durations from a json representation of a project plan and Work Breakdown Structure (WBS).
Executing with too many task_ids may result in a timeout, where the LLM cannot complete the task within a reasonable time.
Split the task_ids into smaller chunks of around 3 task_ids each, and process them one at a time.
"""
if not isinstance(llm, LLM):
raise ValueError("Invalid LLM instance.")
if not isinstance(query, str):
raise ValueError("Invalid query.")
start_time = time.perf_counter()
sllm = llm.as_structured_llm(TimeEstimates)
response = sllm.complete(QUERY_PREAMBLE + query)
json_response = json.loads(response.text)
end_time = time.perf_counter()
duration = int(ceil(end_time - start_time))
metadata = dict(llm.metadata)
metadata["llm_classname"] = llm.class_name()
metadata["duration"] = duration
result = EstimateWBSTaskDurations(
query=query,
response=json_response,
metadata=metadata,
)
return result
def raw_response_dict(self, include_metadata=True, include_query=True) -> dict:
d = self.response.copy()
if include_metadata:
d['metadata'] = self.metadata
if include_query:
d['query'] = self.query
return d
if __name__ == "__main__":
from llama_index.llms.ollama import Ollama
# TODO: Eliminate hardcoded paths
basepath = '/Users/neoneye/Desktop/planexe_data'
def load_json(relative_path: str) -> dict:
path = os.path.join(basepath, relative_path)
print(f"loading file: {path}")
with open(path, 'r', encoding='utf-8') as f:
the_json = json.load(f)
return the_json
plan_json = load_json('002-project_plan.json')
wbs_level2_json = load_json('006-wbs_level2.json')
task_ids = [
"c6a249af-b8d3-4d4c-b3ef-8a5caa8793d4",
"622fa6f1-6252-445e-8b5a-2a5c75683a80",
"fdaa706e-3d3b-4166-9730-7ea3e238d0cf"
]
query = EstimateWBSTaskDurations.format_query(plan_json, wbs_level2_json, task_ids)
model_name = "llama3.1:latest"
# model_name = "qwen2.5-coder:latest"
# model_name = "phi4:latest"
llm = Ollama(model=model_name, request_timeout=120.0, temperature=0.5, is_function_calling_model=False)
print(f"Query: {query}")
result = EstimateWBSTaskDurations.execute(llm, query)
print("\n\nResponse:")
response_dict = result.raw_response_dict(include_query=False)
print(json.dumps(response_dict, indent=2))