aquibmoin commited on
Commit
c24f2ac
·
verified ·
1 Parent(s): 31952a4

Create gen_doc.py

Browse files
Files changed (1) hide show
  1. utils/gen_doc.py +82 -0
utils/gen_doc.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from docx import Document
2
+ from docx.shared import Pt
3
+ from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
4
+ from docx.oxml.ns import nsdecls
5
+ from docx.oxml import parse_xml
6
+ import io
7
+ import tempfile
8
+
9
+ def export_to_word(response_content, subdomain_definition, science_goal, context, max_tokens, temperature, top_p, frequency_penalty, presence_penalty):
10
+ doc = Document()
11
+
12
+ # Add a title (optional, you can remove this if not needed)
13
+ doc.add_heading('AI Generated SCDD', 0)
14
+
15
+ # Insert the Subdomain Definition at the top
16
+ doc.add_heading('Subdomain Definition:', level=1)
17
+ doc.add_paragraph(subdomain_definition)
18
+
19
+ # Insert the Science Goal at the top
20
+ doc.add_heading('Science Goal:', level=1)
21
+ doc.add_paragraph(science_goal)
22
+
23
+ # Insert the User-defined Context
24
+ doc.add_heading('User-defined Context:', level=1)
25
+ doc.add_paragraph(context)
26
+
27
+ # Insert Model Parameters
28
+ doc.add_heading('Model Parameters:', level=1)
29
+ doc.add_paragraph(f"Max Tokens: {max_tokens}")
30
+ doc.add_paragraph(f"Temperature: {temperature}")
31
+ doc.add_paragraph(f"Top-p: {top_p}")
32
+ doc.add_paragraph(f"Frequency Penalty: {frequency_penalty}")
33
+ doc.add_paragraph(f"Presence Penalty: {presence_penalty}")
34
+
35
+ # Split the response into sections based on ### headings
36
+ sections = response_content.split('### ')
37
+
38
+ for section in sections:
39
+ if section.strip():
40
+ # Handle the "Observations Requirements Table" separately with proper formatting
41
+ if 'Observations Requirements Table' in section:
42
+ doc.add_heading('Observations Requirements Table', level=1)
43
+
44
+ # Extract table lines
45
+ table_lines = section.split('\n')[2:] # Start after the heading line
46
+
47
+ # Check if it's an actual table (split lines by '|' symbol)
48
+ table_data = [line.split('|')[1:-1] for line in table_lines if '|' in line]
49
+
50
+ if table_data:
51
+ # Add table to the document
52
+ table = doc.add_table(rows=len(table_data), cols=len(table_data[0]))
53
+ table.style = 'Table Grid'
54
+ for i, row in enumerate(table_data):
55
+ for j, cell_text in enumerate(row):
56
+ cell = table.cell(i, j)
57
+ cell.text = cell_text.strip()
58
+ # Apply text wrapping for each cell
59
+ cell._element.get_or_add_tcPr().append(parse_xml(r'<w:tcW w:w="2500" w:type="pct" ' + nsdecls('w') + '/>'))
60
+
61
+ # Process any paragraphs that follow the table
62
+ paragraph_after_table = '\n'.join([line for line in table_lines if '|' not in line and line.strip()])
63
+ if paragraph_after_table:
64
+ doc.add_paragraph(paragraph_after_table.strip())
65
+
66
+ # Handle the "ADS References" section
67
+ elif section.startswith('ADS References'):
68
+ doc.add_heading('ADS References', level=1)
69
+ references = section.split('\n')[1:] # Skip the heading
70
+ for reference in references:
71
+ if reference.strip():
72
+ doc.add_paragraph(reference.strip())
73
+
74
+ # Add all other sections as plain paragraphs
75
+ else:
76
+ doc.add_paragraph(section.strip())
77
+
78
+ # Save the document to a temporary file
79
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".docx")
80
+ doc.save(temp_file.name)
81
+
82
+ return temp_file.name