Rajagopal commited on
Commit
7dda26f
·
0 Parent(s):

Duplicate from Rajagopal/try423

Browse files
Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +13 -0
  3. app.py +88 -0
  4. requirements.txt +7 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: csrd trying
3
+ emoji: 🏆
4
+ colorFrom: yellow
5
+ colorTo: gray
6
+ sdk: streamlit
7
+ sdk_version: 1.21.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: Rajagopal/try423
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain import OpenAI, PromptTemplate, LLMChain
3
+ from langchain.text_splitter import CharacterTextSplitter
4
+ from langchain.chains.mapreduce import MapReduceChain
5
+ from langchain.prompts import PromptTemplate
6
+ from langchain.chat_models import AzureChatOpenAI
7
+ from langchain.chains.summarize import load_summarize_chain
8
+ from langchain.chains import AnalyzeDocumentChain
9
+ from PyPDF2 import PdfReader
10
+ from langchain.document_loaders import TextLoader
11
+ from langchain.indexes import VectorstoreIndexCreator
12
+ from langchain.document_loaders import PyPDFLoader
13
+
14
+
15
+
16
+ import os
17
+
18
+
19
+ os.environ["OPENAI_API_TYPE"] = "azure"
20
+ os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
21
+
22
+
23
+
24
+ llm = AzureChatOpenAI(
25
+ deployment_name="esujnand", model_name="gpt-35-turbo"
26
+ )
27
+
28
+
29
+
30
+ st.title("Wipro CSRD AI 1")
31
+
32
+ # description text
33
+ st.write("Step 1: Summary of your selected section of CSRD... Sections in this are enviormental topic1, enviornamtal topic2 ")
34
+ st.write("Step 2: Ask your specfici questions regarding a CSRD disclosure requirments")
35
+
36
+
37
+ # pdf file upload
38
+ pdf_file = st.file_uploader("Upload file", type=["pdf"])
39
+
40
+ numberofpages = 100
41
+
42
+ if st.button("How many pages? "):
43
+ reader = PdfReader(pdf_file)
44
+ numberofpages = len(reader.pages)
45
+ st.write("length is ", numberofpages)
46
+
47
+ if st.button("table of contents? "):
48
+ reader = PdfReader(pdf_file)
49
+ page = reader.pages[2].extract_text()
50
+ st.write(page)
51
+
52
+
53
+
54
+
55
+ startpage = st.slider('Which section to look at', 0, numberofpages, 1)
56
+ st.write("starting section page", startpage)
57
+
58
+
59
+ pagecount = st.slider('How many pages', 1, 5, 1)
60
+ st.write("pages to read", pagecount)
61
+
62
+
63
+ def extract_text_from_pdf():
64
+ reader = PdfReader(pdf_file)
65
+ # get all pages text
66
+ text = [reader.pages[i].extract_text() for i in range(startpage, startpage + pagecount )]
67
+ # join all pages text
68
+ text = " ".join(text)
69
+ return text
70
+
71
+
72
+ def extract_text_from_pdf2():
73
+ reader = PdfReader(pdf_file)
74
+ # get all pages text
75
+ text = [reader.pages[i].extract_text() for i in range(len(reader.pages))]
76
+ # join all pages text
77
+ text = " ".join(text)
78
+ return text
79
+
80
+ if st.button("Summerize "):
81
+ with st.spinner("Extracting Text..."):
82
+ summary_chain = load_summarize_chain(llm, chain_type="map_reduce")
83
+ summarize_document_chain = AnalyzeDocumentChain(combine_docs_chain=summary_chain, verbose=True)
84
+ text = extract_text_from_pdf()
85
+ with st.spinner("Summarizing..."):
86
+ result = summarize_document_chain.run(text)
87
+ st.write(result)
88
+
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ openai
2
+ langchain
3
+ streamlit
4
+ PyPDF2
5
+ tiktoken
6
+ pypdf
7
+ chromadb