WAQASCHANNA commited on
Commit
80aa59b
·
verified ·
1 Parent(s): c7ddc0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -19
app.py CHANGED
@@ -30,6 +30,13 @@ def detect_encoding(file):
30
  def chunk_text(text, chunk_size=1000):
31
  return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
32
 
 
 
 
 
 
 
 
33
  # Main area - Display content and perform tasks
34
  if uploaded_file is not None:
35
  try:
@@ -41,30 +48,36 @@ if uploaded_file is not None:
41
  uploaded_file.seek(0) # Reset file pointer to the beginning
42
  text = uploaded_file.read().decode(encoding)
43
 
44
- # Chunk the text if it is too long
45
- chunks = chunk_text(text, chunk_size=1000)
 
 
 
 
46
 
47
- if task == "Summarization":
48
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
49
- summarized_text = ""
50
 
51
- # Summarize each chunk and combine the results
52
- for chunk in chunks:
53
- if len(chunk.split()) > min_length:
54
- summary = summarizer(chunk, max_length=max_length, min_length=min_length, do_sample=do_sample)
55
- summarized_text += summary[0]['summary_text'] + " "
56
 
57
- st.subheader("Summary:")
58
- st.write(summarized_text)
59
 
60
- elif task == "Named Entity Recognition (NER)":
61
- ner = pipeline("ner", grouped_entities=True, model="dslim/bert-base-NER")
62
- st.subheader("Named Entities:")
63
 
64
- for chunk in chunks:
65
- entities = ner(chunk)
66
- for entity in entities:
67
- st.write(f"{entity['entity_group']} - {entity['word']} (Score: {entity['score']:.2f})")
 
 
68
 
69
  except IndexError as e:
70
  st.error(f"IndexError: {e}. Ensure the text is long enough and parameters are set correctly.")
 
30
  def chunk_text(text, chunk_size=1000):
31
  return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
32
 
33
+ # Function to classify text as law-related or not using zero-shot classification
34
+ def classify_text(text):
35
+ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
36
+ candidate_labels = ["law-related", "not law-related"]
37
+ result = classifier(text[:512], candidate_labels=candidate_labels)
38
+ return result['labels'][0] == "law-related"
39
+
40
  # Main area - Display content and perform tasks
41
  if uploaded_file is not None:
42
  try:
 
48
  uploaded_file.seek(0) # Reset file pointer to the beginning
49
  text = uploaded_file.read().decode(encoding)
50
 
51
+ # Classify the text before proceeding with summarization or NER
52
+ if classify_text(text):
53
+ st.write("This document is classified as law-related.")
54
+
55
+ # Chunk the text if it is too long
56
+ chunks = chunk_text(text, chunk_size=1000)
57
 
58
+ if task == "Summarization":
59
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
60
+ summarized_text = ""
61
 
62
+ # Summarize each chunk and combine the results
63
+ for chunk in chunks:
64
+ if len(chunk.split()) > min_length:
65
+ summary = summarizer(chunk, max_length=max_length, min_length=min_length, do_sample=do_sample)
66
+ summarized_text += summary[0]['summary_text'] + " "
67
 
68
+ st.subheader("Summary:")
69
+ st.write(summarized_text)
70
 
71
+ elif task == "Named Entity Recognition (NER)":
72
+ ner = pipeline("ner", grouped_entities=True, model="dslim/bert-base-NER")
73
+ st.subheader("Named Entities:")
74
 
75
+ for chunk in chunks:
76
+ entities = ner(chunk)
77
+ for entity in entities:
78
+ st.write(f"{entity['entity_group']} - {entity['word']} (Score: {entity['score']:.2f})")
79
+ else:
80
+ st.warning("The uploaded document does not contain law-related content. Please upload a legal document.")
81
 
82
  except IndexError as e:
83
  st.error(f"IndexError: {e}. Ensure the text is long enough and parameters are set correctly.")