broadfield-dev commited on
Commit
63492a9
·
verified ·
1 Parent(s): ab88b4d

Update parser.py

Browse files
Files changed (1) hide show
  1. parser.py +25 -10
parser.py CHANGED
@@ -73,12 +73,12 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
73
 
74
  # Skip if any lines are already processed
75
  if any(line in processed_lines for line in range(start_line, end_line + 1)):
76
- return parts
77
 
78
  # Get category, default to 'other' if None
79
  category = get_category(node, parent_path[-1] if parent_path else None) or 'other'
80
  if category not in counters:
81
- category = 'other' # Ensure category exists in counters
82
  counters[category] += 1
83
  node_id = f"{category.capitalize()}[{counters[category]}]"
84
 
@@ -117,6 +117,7 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
117
  processed_lines.add(start_line)
118
 
119
  # Handle variables in function definitions (input variables)
 
120
  if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) and node.args.args:
121
  for arg in node.args.args:
122
  var_start = start_line # Assume args are on the same line as function def for simplicity
@@ -136,6 +137,7 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
136
  'node_id': var_node_id
137
  })
138
  processed_lines.add(var_start)
 
139
 
140
  # Process nested bodies
141
  nested_prev_end = start_line
@@ -160,8 +162,10 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
160
  'node_id': sub_node_id
161
  })
162
  processed_lines.add(child_start)
163
- child_parts = parse_node(child, lines, child_start, level + 1, total_lines, current_path, counters, processed_lines)
 
164
  parts.extend(child_parts)
 
165
  nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
166
  elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
167
  if child_start not in processed_lines and not is_blank_or_comment(lines[child_start - 1]):
@@ -177,8 +181,10 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
177
  'node_id': sub_node_id
178
  })
179
  processed_lines.add(child_start)
180
- child_parts = parse_node(child, lines, child_start, level + 1, total_lines, current_path, counters, processed_lines)
 
181
  parts.extend(child_parts)
 
182
  nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
183
  elif attr == 'finalbody':
184
  if child_start not in processed_lines and not is_blank_or_comment(lines[child_start - 1]):
@@ -194,8 +200,10 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
194
  'node_id': sub_node_id
195
  })
196
  processed_lines.add(child_start)
197
- child_parts = parse_node(child, lines, child_start, level + 1, total_lines, current_path, counters, processed_lines)
 
198
  parts.extend(child_parts)
 
199
  nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
200
  else:
201
  # Handle assignments and returns for variable detection
@@ -218,6 +226,7 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
218
  'node_id': var_node_id
219
  })
220
  processed_lines.add(var_start)
 
221
  else: # AnnAssign or AugAssign
222
  target = child.target
223
  if isinstance(target, ast.Name):
@@ -235,6 +244,7 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
235
  'node_id': var_node_id
236
  })
237
  processed_lines.add(var_start)
 
238
  elif isinstance(child, ast.Return):
239
  for value in ast.walk(child):
240
  if isinstance(value, ast.Name):
@@ -252,8 +262,10 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
252
  'node_id': var_node_id
253
  })
254
  processed_lines.add(var_start)
255
- child_parts = parse_node(child, lines, nested_prev_end, level + 1, total_lines, current_path, counters, processed_lines)
 
256
  parts.extend(child_parts)
 
257
  nested_prev_end = child_parts[-1]['location'][1] if child_parts else nested_prev_end
258
 
259
  # Update end_line and source of the parent node if its body extends it
@@ -265,7 +277,7 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
265
  parts[-1]['vector'] = create_vector(category, level, (start_line, final_end), total_lines, current_path)
266
  processed_lines.update(range(start_line, final_end + 1))
267
 
268
- return parts
269
 
270
  def parse_python_code(code):
271
  lines = code.splitlines(keepends=True)
@@ -273,15 +285,17 @@ def parse_python_code(code):
273
  try:
274
  tree = ast.parse(code)
275
  except SyntaxError:
276
- return [{'category': 'error', 'source': 'Invalid Python code', 'location': (1, 1), 'level': 0, 'vector': [0, 0, 1.0, 0.0, 0, 0], 'parent_path': 'Top-Level', 'node_id': 'Error[1]'}]
277
 
278
  parts = []
279
  prev_end = 0
280
  processed_lines = set()
 
281
 
282
  for stmt in tree.body:
283
- stmt_parts = parse_node(stmt, lines, prev_end, total_lines=total_lines, processed_lines=processed_lines)
284
  parts.extend(stmt_parts)
 
285
  prev_end = stmt_parts[-1]['location'][1] if stmt_parts else prev_end
286
 
287
  if prev_end < total_lines:
@@ -304,8 +318,9 @@ def parse_python_code(code):
304
  'node_id': spacer_node_id
305
  })
306
  processed_lines.add(i)
 
307
 
308
- return parts
309
 
310
  def is_blank_or_comment(line):
311
  """Check if a line is blank or a comment."""
 
73
 
74
  # Skip if any lines are already processed
75
  if any(line in processed_lines for line in range(start_line, end_line + 1)):
76
+ return parts, []
77
 
78
  # Get category, default to 'other' if None
79
  category = get_category(node, parent_path[-1] if parent_path else None) or 'other'
80
  if category not in counters:
81
+ category = 'other'
82
  counters[category] += 1
83
  node_id = f"{category.capitalize()}[{counters[category]}]"
84
 
 
117
  processed_lines.add(start_line)
118
 
119
  # Handle variables in function definitions (input variables)
120
+ category_sequence = [category]
121
  if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) and node.args.args:
122
  for arg in node.args.args:
123
  var_start = start_line # Assume args are on the same line as function def for simplicity
 
137
  'node_id': var_node_id
138
  })
139
  processed_lines.add(var_start)
140
+ category_sequence.append(arg_category)
141
 
142
  # Process nested bodies
143
  nested_prev_end = start_line
 
162
  'node_id': sub_node_id
163
  })
164
  processed_lines.add(child_start)
165
+ category_sequence.append(sub_category)
166
+ child_parts, child_seq = parse_node(child, lines, child_start, level + 1, total_lines, current_path, counters, processed_lines)
167
  parts.extend(child_parts)
168
+ category_sequence.extend(child_seq)
169
  nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
170
  elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
171
  if child_start not in processed_lines and not is_blank_or_comment(lines[child_start - 1]):
 
181
  'node_id': sub_node_id
182
  })
183
  processed_lines.add(child_start)
184
+ category_sequence.append('except')
185
+ child_parts, child_seq = parse_node(child, lines, child_start, level + 1, total_lines, current_path, counters, processed_lines)
186
  parts.extend(child_parts)
187
+ category_sequence.extend(child_seq)
188
  nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
189
  elif attr == 'finalbody':
190
  if child_start not in processed_lines and not is_blank_or_comment(lines[child_start - 1]):
 
200
  'node_id': sub_node_id
201
  })
202
  processed_lines.add(child_start)
203
+ category_sequence.append('finally')
204
+ child_parts, child_seq = parse_node(child, lines, child_start, level + 1, total_lines, current_path, counters, processed_lines)
205
  parts.extend(child_parts)
206
+ category_sequence.extend(child_seq)
207
  nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
208
  else:
209
  # Handle assignments and returns for variable detection
 
226
  'node_id': var_node_id
227
  })
228
  processed_lines.add(var_start)
229
+ category_sequence.append('assigned_variable')
230
  else: # AnnAssign or AugAssign
231
  target = child.target
232
  if isinstance(target, ast.Name):
 
244
  'node_id': var_node_id
245
  })
246
  processed_lines.add(var_start)
247
+ category_sequence.append('assigned_variable')
248
  elif isinstance(child, ast.Return):
249
  for value in ast.walk(child):
250
  if isinstance(value, ast.Name):
 
262
  'node_id': var_node_id
263
  })
264
  processed_lines.add(var_start)
265
+ category_sequence.append('returned_variable')
266
+ child_parts, child_seq = parse_node(child, lines, nested_prev_end, level + 1, total_lines, current_path, counters, processed_lines)
267
  parts.extend(child_parts)
268
+ category_sequence.extend(child_seq)
269
  nested_prev_end = child_parts[-1]['location'][1] if child_parts else nested_prev_end
270
 
271
  # Update end_line and source of the parent node if its body extends it
 
277
  parts[-1]['vector'] = create_vector(category, level, (start_line, final_end), total_lines, current_path)
278
  processed_lines.update(range(start_line, final_end + 1))
279
 
280
+ return parts, category_sequence
281
 
282
  def parse_python_code(code):
283
  lines = code.splitlines(keepends=True)
 
285
  try:
286
  tree = ast.parse(code)
287
  except SyntaxError:
288
+ return ([{'category': 'error', 'source': 'Invalid Python code', 'location': (1, 1), 'level': 0, 'vector': [0, 0, 1.0, 0.0, 0, 0], 'parent_path': 'Top-Level', 'node_id': 'Error[1]'}], ['error'])
289
 
290
  parts = []
291
  prev_end = 0
292
  processed_lines = set()
293
+ category_sequence = []
294
 
295
  for stmt in tree.body:
296
+ stmt_parts, stmt_seq = parse_node(stmt, lines, prev_end, total_lines=total_lines, processed_lines=processed_lines)
297
  parts.extend(stmt_parts)
298
+ category_sequence.extend(stmt_seq)
299
  prev_end = stmt_parts[-1]['location'][1] if stmt_parts else prev_end
300
 
301
  if prev_end < total_lines:
 
318
  'node_id': spacer_node_id
319
  })
320
  processed_lines.add(i)
321
+ category_sequence.append('spacer')
322
 
323
+ return parts, category_sequence
324
 
325
  def is_blank_or_comment(line):
326
  """Check if a line is blank or a comment."""