Update app.py
Browse files
app.py
CHANGED
@@ -324,102 +324,20 @@ def split_into_chunks(content, chunk_size=1000):
|
|
324 |
"""Splits large content into smaller chunks."""
|
325 |
return [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)]
|
326 |
|
327 |
-
|
328 |
-
API_URL = "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct"
|
329 |
-
qwen = os.getenv("QWEN")
|
330 |
-
headers = {"Authorization": f"Bearer {qwen}"}
|
331 |
-
|
332 |
-
def extract_cleaned_gemini_output(gemini_output):
|
333 |
"""
|
334 |
-
|
335 |
Args:
|
336 |
-
|
|
|
337 |
Returns:
|
338 |
-
str:
|
339 |
-
"""
|
340 |
-
lines = gemini_output.splitlines()
|
341 |
-
cleaned_output = []
|
342 |
-
functions_section = False
|
343 |
-
|
344 |
-
for line in lines:
|
345 |
-
line = line.strip()
|
346 |
-
if line.startswith("Project Summary:") or line.startswith("Functionality:"):
|
347 |
-
cleaned_output.append(line)
|
348 |
-
elif line.startswith("Functions:"):
|
349 |
-
cleaned_output.append(line)
|
350 |
-
functions_section = True
|
351 |
-
elif functions_section and line:
|
352 |
-
cleaned_output.append(line)
|
353 |
-
elif line.startswith("File:") or "Qwen," in line:
|
354 |
-
break
|
355 |
-
|
356 |
-
return "\n".join(cleaned_output)
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
def clean_output(output):
|
361 |
-
"""
|
362 |
-
Cleans the final output to include only documentation sections.
|
363 |
"""
|
364 |
-
|
365 |
-
|
366 |
-
in_valid_section = False
|
367 |
-
|
368 |
-
for line in lines:
|
369 |
-
line = line.strip()
|
370 |
-
# Include only valid sections
|
371 |
-
if line.startswith("Project Summary:") or line.startswith("Functionality Summary:") or line.startswith("Functionality Flow:"):
|
372 |
-
in_valid_section = True
|
373 |
-
filtered_lines.append(line)
|
374 |
-
elif line.startswith("Function Documentation:"):
|
375 |
-
in_valid_section = True
|
376 |
-
filtered_lines.append(line)
|
377 |
-
# Stop processing at any undesired section
|
378 |
-
elif line.startswith("User-specified functionality:") or line.startswith("Tasks identified by Gemini:"):
|
379 |
-
in_valid_section = False
|
380 |
-
elif in_valid_section and line:
|
381 |
-
filtered_lines.append(line)
|
382 |
-
|
383 |
-
return "\n".join(filtered_lines)
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
|
|
|
389 |
|
390 |
-
def validate_and_generate_documentation(api_url, headers, gemini_output, functionality_description):
|
391 |
-
"""
|
392 |
-
Uses the Hugging Face Inference API to generate clean and relevant documentation using Qwen.
|
393 |
-
Handles truncated output by identifying missing functions and re-prompting Qwen.
|
394 |
-
"""
|
395 |
-
def get_missing_functions(output, cleaned_gemini_output):
|
396 |
-
"""
|
397 |
-
Identifies functions mentioned in Gemini's output that are missing in Qwen's response.
|
398 |
-
"""
|
399 |
-
gemini_functions = set()
|
400 |
-
for line in cleaned_gemini_output.splitlines():
|
401 |
-
if line.startswith("-public "): # Assuming function definitions start with "-public"
|
402 |
-
function_signature = line.split(":")[0]
|
403 |
-
gemini_functions.add(function_signature.strip())
|
404 |
-
|
405 |
-
qwen_functions = set()
|
406 |
-
for line in output.splitlines():
|
407 |
-
if line.startswith("- **Function**:"): # Qwen's format for functions
|
408 |
-
function_signature = line.replace("- **Function**:", "").strip().split("`")[1]
|
409 |
-
qwen_functions.add(function_signature.strip())
|
410 |
-
|
411 |
-
# Find functions in Gemini's output but missing in Qwen's response
|
412 |
-
return gemini_functions - qwen_functions
|
413 |
-
|
414 |
-
cleaned_gemini_output = extract_cleaned_gemini_output(gemini_output)
|
415 |
-
gemini_output_length = len(cleaned_gemini_output) # Record the length of the cleaned Gemini output
|
416 |
-
|
417 |
-
# Generate the initial refined prompt for Qwen
|
418 |
-
prompt = f"""
|
419 |
-
User-specified functionality: '{functionality_description}'
|
420 |
-
Functions identified by Gemini:
|
421 |
-
{cleaned_gemini_output}
|
422 |
-
|
423 |
Tasks:
|
424 |
1. Generate a project summary:
|
425 |
'
|
@@ -436,10 +354,10 @@ def validate_and_generate_documentation(api_url, headers, gemini_output, functio
|
|
436 |
Functionality Flow:
|
437 |
<Explain the sequence of functions and data flow>
|
438 |
'
|
439 |
-
4. Generate detailed documentation for each function:
|
440 |
'
|
441 |
Function Documentation:
|
442 |
-
For each
|
443 |
- Summary: <Description of the function's purpose>
|
444 |
- Inputs: <Details of inputs and their types>
|
445 |
- Outputs: <Details of outputs and their types>
|
@@ -450,60 +368,15 @@ def validate_and_generate_documentation(api_url, headers, gemini_output, functio
|
|
450 |
- Assumptions: <Any assumptions the function makes>
|
451 |
- Example Usage: <Example demonstrating usage>
|
452 |
'
|
453 |
-
|
454 |
-
"""
|
455 |
-
|
456 |
-
qwen_prompt_length = len(prompt)
|
457 |
-
|
458 |
-
payload = {"inputs": prompt, "parameters": {"max_new_tokens": 1024}}
|
459 |
-
response = requests.post(api_url, headers=headers, json=payload)
|
460 |
-
|
461 |
-
# Handle API response
|
462 |
-
if response.status_code == 200:
|
463 |
-
api_response = response.json()
|
464 |
-
output = api_response.get("generated_text", "") if isinstance(api_response, dict) else api_response[0].get("generated_text", "")
|
465 |
-
|
466 |
-
# Remove the Gemini content from the top of the Qwen output
|
467 |
-
trimmed_output = output[gemini_output_length + qwen_prompt_length:].strip()
|
468 |
-
|
469 |
-
# Check for missing functions
|
470 |
-
missing_functions = get_missing_functions(trimmed_output, cleaned_gemini_output)
|
471 |
-
|
472 |
-
if missing_functions:
|
473 |
-
# Re-prompt Qwen for the missing functions
|
474 |
-
missing_functions_prompt = f"""
|
475 |
-
User-specified functionality: '{functionality_description}'
|
476 |
-
The following functions were not fully documented in the previous response:
|
477 |
-
{', '.join(missing_functions)}
|
478 |
-
|
479 |
-
Please provide detailed documentation for these functions in the following format:
|
480 |
-
'
|
481 |
-
Function Documentation:
|
482 |
-
For each function:
|
483 |
-
- Summary: <Description of the function's purpose>
|
484 |
-
- Inputs: <Details of inputs and their types>
|
485 |
-
- Outputs: <Details of outputs and their types>
|
486 |
-
- Dependencies: <Dependencies on other modules/functions>
|
487 |
-
- Data structures: <Details of data structures used>
|
488 |
-
- Algorithmic Details: <Description of the algorithm used>
|
489 |
-
- Error Handling: <Description of how the function handles errors>
|
490 |
-
- Assumptions: <Any assumptions the function makes>
|
491 |
-
- Example Usage: <Example demonstrating usage>
|
492 |
-
'
|
493 |
-
"""
|
494 |
-
missing_payload = {"inputs": missing_functions_prompt, "parameters": {"max_new_tokens": 1024}}
|
495 |
-
missing_response = requests.post(api_url, headers=headers, json=missing_payload)
|
496 |
-
|
497 |
-
if missing_response.status_code == 200:
|
498 |
-
missing_api_response = missing_response.json()
|
499 |
-
missing_output = missing_api_response.get("generated_text", "") if isinstance(missing_api_response, dict) else missing_api_response[0].get("generated_text", "")
|
500 |
-
trimmed_output += "\n\n" + missing_output
|
501 |
-
|
502 |
-
return clean_output(trimmed_output) # Final cleanup if necessary
|
503 |
-
else:
|
504 |
-
raise ValueError(f"Error during API call: {response.status_code}, {response.text}")
|
505 |
|
|
|
|
|
|
|
|
|
506 |
|
|
|
|
|
507 |
|
508 |
|
509 |
def generate_documentation_page():
|
@@ -527,17 +400,18 @@ def generate_documentation_page():
|
|
527 |
|
528 |
if os.path.exists(project_folder):
|
529 |
try:
|
530 |
-
#
|
531 |
-
|
|
|
|
|
|
|
532 |
|
533 |
-
# Generate documentation using
|
534 |
-
|
535 |
-
API_URL, headers, gemini_result, functionality
|
536 |
-
)
|
537 |
|
538 |
# Display the final documentation
|
539 |
st.success("Documentation generated successfully!")
|
540 |
-
st.text_area("Generated Documentation",
|
541 |
except Exception as e:
|
542 |
st.error(f"An error occurred: {e}")
|
543 |
else:
|
@@ -545,14 +419,6 @@ def generate_documentation_page():
|
|
545 |
else:
|
546 |
st.error("Please enter the functionality to analyze.")
|
547 |
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
557 |
|
558 |
|
|
|
324 |
"""Splits large content into smaller chunks."""
|
325 |
return [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)]
|
326 |
|
327 |
+
def generate_detailed_documentation(file_contents, functionality_description):
|
|
|
|
|
|
|
|
|
|
|
328 |
"""
|
329 |
+
Generates detailed documentation using Gemini directly.
|
330 |
Args:
|
331 |
+
file_contents (dict): A dictionary with file paths as keys and their content as values.
|
332 |
+
functionality_description (str): A description of the functionality to document.
|
333 |
Returns:
|
334 |
+
str: The generated documentation.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
335 |
"""
|
336 |
+
prompt = f"""
|
337 |
+
The following code files are provided. Analyze their contents and generate comprehensive documentation.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
338 |
|
339 |
+
Functionality description: '{functionality_description}'
|
340 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
341 |
Tasks:
|
342 |
1. Generate a project summary:
|
343 |
'
|
|
|
354 |
Functionality Flow:
|
355 |
<Explain the sequence of functions and data flow>
|
356 |
'
|
357 |
+
4. Generate detailed documentation for each function in the codebase:
|
358 |
'
|
359 |
Function Documentation:
|
360 |
+
For each function:
|
361 |
- Summary: <Description of the function's purpose>
|
362 |
- Inputs: <Details of inputs and their types>
|
363 |
- Outputs: <Details of outputs and their types>
|
|
|
368 |
- Assumptions: <Any assumptions the function makes>
|
369 |
- Example Usage: <Example demonstrating usage>
|
370 |
'
|
371 |
+
Please return only the required documentation in the specified format.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
372 |
|
373 |
+
Code files:
|
374 |
+
"""
|
375 |
+
for file_path, content in file_contents.items():
|
376 |
+
prompt += f"\nFile: {os.path.basename(file_path)}\n{content}\n"
|
377 |
|
378 |
+
response = model.generate_content(prompt)
|
379 |
+
return response.text.strip()
|
380 |
|
381 |
|
382 |
def generate_documentation_page():
|
|
|
400 |
|
401 |
if os.path.exists(project_folder):
|
402 |
try:
|
403 |
+
# Gather all file paths in the project directory
|
404 |
+
file_paths = read_project_files(project_folder)
|
405 |
+
|
406 |
+
# Read file contents
|
407 |
+
file_contents = read_files(file_paths)
|
408 |
|
409 |
+
# Generate documentation using Gemini
|
410 |
+
documentation = generate_detailed_documentation(file_contents, functionality)
|
|
|
|
|
411 |
|
412 |
# Display the final documentation
|
413 |
st.success("Documentation generated successfully!")
|
414 |
+
st.text_area("Generated Documentation", documentation, height=600)
|
415 |
except Exception as e:
|
416 |
st.error(f"An error occurred: {e}")
|
417 |
else:
|
|
|
419 |
else:
|
420 |
st.error("Please enter the functionality to analyze.")
|
421 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
422 |
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
423 |
|
424 |
|