Spaces:
Sleeping
Sleeping
anmolsahai
commited on
Commit
•
2b00504
1
Parent(s):
671291d
fix 18
Browse files- __pycache__/langchain_pipeline.cpython-310.pyc +0 -0
- app.py +21 -27
- langchain_pipeline.py +1 -1
- requirements.txt +2 -1
__pycache__/langchain_pipeline.cpython-310.pyc
CHANGED
Binary files a/__pycache__/langchain_pipeline.cpython-310.pyc and b/__pycache__/langchain_pipeline.cpython-310.pyc differ
|
|
app.py
CHANGED
@@ -6,6 +6,7 @@ from difflib import unified_diff
|
|
6 |
import tempfile
|
7 |
from docx.shared import RGBColor
|
8 |
import re
|
|
|
9 |
|
10 |
def pdf_to_text_with_layout(pdf_file):
|
11 |
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
|
@@ -38,32 +39,8 @@ def apply_pipeline(file, model_name, balance_type, apsn_transactions, max_fees_p
|
|
38 |
)
|
39 |
|
40 |
def redline_changes(original_path, revised_path, output_path):
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
original_text = "\n".join([para.text for para in original_doc.paragraphs])
|
45 |
-
revised_text = "\n".join([para.text for para in revised_doc.paragraphs])
|
46 |
-
|
47 |
-
diff = unified_diff(original_text.splitlines(), revised_text.splitlines(), lineterm='')
|
48 |
-
|
49 |
-
diff_doc = Document()
|
50 |
-
for line in diff:
|
51 |
-
if line.startswith('-'):
|
52 |
-
p = diff_doc.add_paragraph(style='Normal')
|
53 |
-
run = p.add_run(line)
|
54 |
-
run.font.color.rgb = RGBColor(255, 0, 0) # Red
|
55 |
-
elif line.startswith('+'):
|
56 |
-
p = diff_doc.add_paragraph(style='Normal')
|
57 |
-
run = p.add_run(line)
|
58 |
-
run.font.color.rgb = RGBColor(0, 128, 0) # Green
|
59 |
-
elif line.startswith('@@'):
|
60 |
-
p = diff_doc.add_paragraph(style='Normal')
|
61 |
-
run = p.add_run(line)
|
62 |
-
run.font.color.rgb = RGBColor(0, 0, 255) # Blue
|
63 |
-
else:
|
64 |
-
diff_doc.add_paragraph(line, style='Normal')
|
65 |
-
|
66 |
-
diff_doc.save(output_path)
|
67 |
|
68 |
# Streamlit App
|
69 |
st.title("Canarie AI Prototype")
|
@@ -108,6 +85,22 @@ if uploaded_file is not None:
|
|
108 |
redlined_output_path = tempfile.NamedTemporaryFile(delete=False, suffix=".docx").name
|
109 |
redline_changes(original_word_path, revised_word_path, redlined_output_path)
|
110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
with open(redlined_output_path, "rb") as f:
|
112 |
st.download_button(
|
113 |
label="Download Redlined Document",
|
@@ -115,7 +108,8 @@ if uploaded_file is not None:
|
|
115 |
file_name="redlined_document.docx",
|
116 |
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
117 |
)
|
118 |
-
|
|
|
119 |
|
120 |
except Exception as e:
|
121 |
st.exception(e)
|
|
|
6 |
import tempfile
|
7 |
from docx.shared import RGBColor
|
8 |
import re
|
9 |
+
import subprocess
|
10 |
|
11 |
def pdf_to_text_with_layout(pdf_file):
|
12 |
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
|
|
|
39 |
)
|
40 |
|
41 |
def redline_changes(original_path, revised_path, output_path):
|
42 |
+
# Using docxcompose to create a redlined document
|
43 |
+
subprocess.run(['docxcompose', 'compose', original_path, revised_path, output_path])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
# Streamlit App
|
46 |
st.title("Canarie AI Prototype")
|
|
|
85 |
redlined_output_path = tempfile.NamedTemporaryFile(delete=False, suffix=".docx").name
|
86 |
redline_changes(original_word_path, revised_word_path, redlined_output_path)
|
87 |
|
88 |
+
with open(original_word_path, "rb") as f:
|
89 |
+
st.download_button(
|
90 |
+
label="Download Original Document",
|
91 |
+
data=f,
|
92 |
+
file_name="original_document.docx",
|
93 |
+
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
94 |
+
)
|
95 |
+
|
96 |
+
with open(revised_word_path, "rb") as f:
|
97 |
+
st.download_button(
|
98 |
+
label="Download Revised Document",
|
99 |
+
data=f,
|
100 |
+
file_name="revised_document.docx",
|
101 |
+
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
102 |
+
)
|
103 |
+
|
104 |
with open(redlined_output_path, "rb") as f:
|
105 |
st.download_button(
|
106 |
label="Download Redlined Document",
|
|
|
108 |
file_name="redlined_document.docx",
|
109 |
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
110 |
)
|
111 |
+
|
112 |
+
st.success("Documents created successfully!")
|
113 |
|
114 |
except Exception as e:
|
115 |
st.exception(e)
|
langchain_pipeline.py
CHANGED
@@ -54,7 +54,7 @@ def pipeline(file, model_name, balance_type, apsn_transactions, max_fees_per_day
|
|
54 |
What is the minimum transaction amount to trigger an overdraft?: {min_transaction_overdraft}
|
55 |
|
56 |
Please output in the following format:
|
57 |
-
{{
|
58 |
------
|
59 |
{{reasons_for_changes with cases or law cited}}
|
60 |
"""
|
|
|
54 |
What is the minimum transaction amount to trigger an overdraft?: {min_transaction_overdraft}
|
55 |
|
56 |
Please output in the following format:
|
57 |
+
{{full text of the updated disclosure with formatting retained}}
|
58 |
------
|
59 |
{{reasons_for_changes with cases or law cited}}
|
60 |
"""
|
requirements.txt
CHANGED
@@ -19,4 +19,5 @@ langchain_astradb
|
|
19 |
langchain_core
|
20 |
langchain_openai
|
21 |
langchain_anthropic
|
22 |
-
langchain_google_genai
|
|
|
|
19 |
langchain_core
|
20 |
langchain_openai
|
21 |
langchain_anthropic
|
22 |
+
langchain_google_genai
|
23 |
+
docxcompose
|