Spaces:
Sleeping
Sleeping
carbonnnnn
commited on
Commit
·
8677815
1
Parent(s):
4f52cc8
First version
Browse files- .gitattributes +1 -0
- app.py +27 -0
- clear.py +8 -0
- data/Bandeira etal2019 - TRD - EV last mile Rio de Janeiro.pdf +3 -0
- data/Bayer_Aklin2020 - ES - EU ETS reduced CO2.pdf +3 -0
- data/Belavina2020 - MSOM - appendix.pdf +3 -0
- data/Belavina2020 - MSOM - grocery store density and food waste.pdf +3 -0
- data/Beloavia etal2017 - MS - online grocery retail - online appendix.pdf +3 -0
- data/Beloavia etal2017 - MS - online grocery retail.pdf +3 -0
- data/Cachon 2014 - MS - Retail Store Density and the Cost of Greenhouse Gas Emissions.pdf +3 -0
- data/Carlsson etal2016 - OR - household-level economics of scale.pdf +3 -0
- data/Carlsson_Jia2014 - TS - continuous facility location -sm.pdf +3 -0
- data/Carlsson_Jia2014 - TS - continuous facility location.pdf +3 -0
- data/Chakraborty etal2021 - JORS - EV adoption and policy decision.pdf +3 -0
- data/Dilek etal2017 - EJOR - retail location carbon penalty.pdf +3 -0
- data/Edwards etal2009 - IJPDLM - carbon footprints of conventional and online retailing.pdf +3 -0
- data/Fan etal2022 - MS - price commitment control carbon emissions.pdf +3 -0
- data/Figliozzi2020 - TRD - emission reduction autonomous vehicles.pdf +3 -0
- data/GLS2020 - sustainability report.pdf +3 -0
- data/Gao etal2018 - TRE - green supply chain consumer.pdf +3 -0
- data/Giesler_Veresiu2014 - JCR - responsible consumer.pdf +3 -0
- data/Gong_Zhou2013 - OR - production plan with emissions trading.pdf +3 -0
- data/Goodchild_Toy2018 - TRD - drone.pdf +3 -0
- data/Hong_Zimmerman2021 - TRD - GHG autonomous vehicles.pdf +3 -0
- data/Jaller_Pahwa2020 - TRD - environmental impact of online shopping.pdf +3 -0
- data/Kopplin etal2021- TRD - consumer acceptance of shared e-scooters.pdf +3 -0
- data/McKinnon2022 - Environmentally sustainable city logistics.pdf +3 -0
- data/McLeod etal2020 - TRD - porters and cycle couriers for last mile delivery.pdf +3 -0
- data/Park etal2015 - POM - SC design monopolistic competition.pdf +3 -0
- data/Perotti etal2021 - BSE - logistics sites CO2eq.pdf +3 -0
- data/Qi etal2018 - MSOM - shared mobility for last-mile delivery.pdf +3 -0
- data/Shahmohammadi etal2020 - EST - carbon footprint online retailing.pdf +3 -0
- data/Song etal2020 - POM - value of buy-online-and-pickup-in-store.pdf +3 -0
- data/Tezer_Bodur2020 - JCR - greenconsumption effect.pdf +3 -0
- data/UPS2019 - sustainability progress report.pdf +3 -0
- data/Yuan etal2018 - POM - control of emissions trading and production.pdf +3 -0
- data/vanLoon etal2015 - JCP - emissions from online retailing.pdf +3 -0
- hay/__pycache__/model.cpython-310.pyc +0 -0
- hay/__pycache__/pipeline.cpython-310.pyc +0 -0
- hay/__pycache__/retriever.cpython-310.pyc +0 -0
- hay/model.py +36 -0
- hay/pipeline.py +50 -0
- hay/retriever.py +76 -0
- main.py +64 -0
- outputs/docs-dataset/data-00000-of-00001.arrow +3 -0
- outputs/docs-dataset/dataset_info.json +60 -0
- outputs/docs-dataset/state.json +13 -0
- outputs/faiss_index.faiss +0 -0
- outputs/faiss_index.json +1 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.pdf filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Application file for Gradio App
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
import time
|
5 |
+
from hay.pipeline import rs_pipeline
|
6 |
+
|
7 |
+
with gr.Blocks() as chat:
|
8 |
+
chatbot = gr.Chatbot()
|
9 |
+
msg = gr.Textbox()
|
10 |
+
clear = gr.ClearButton([msg, chatbot])
|
11 |
+
|
12 |
+
def user(user_message, history):
|
13 |
+
return "", history + [[user_message, None]]
|
14 |
+
|
15 |
+
def respond(message, chat_history):
|
16 |
+
question = str(message)
|
17 |
+
answer = rs_pipeline(question)
|
18 |
+
bot_message = answer
|
19 |
+
chat_history.append((message, bot_message))
|
20 |
+
time.sleep(2)
|
21 |
+
return " ", chat_history
|
22 |
+
|
23 |
+
msg.submit(respond, [msg, chatbot], [msg, chatbot])
|
24 |
+
|
25 |
+
def application():
|
26 |
+
chat.launch()
|
27 |
+
|
clear.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# USE THIS FILE TO CLEAR GPU CACHE
|
2 |
+
|
3 |
+
import gc
|
4 |
+
import torch
|
5 |
+
# def report_gpu():
|
6 |
+
print(torch.cuda.list_gpu_processes())
|
7 |
+
gc.collect()
|
8 |
+
torch.cuda.empty_cache()
|
data/Bandeira etal2019 - TRD - EV last mile Rio de Janeiro.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d1b1d2ab290adee450d72ce4d36681c7a823c5d15efd61468be44f6a6b6f5a3
|
3 |
+
size 1080580
|
data/Bayer_Aklin2020 - ES - EU ETS reduced CO2.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c34e0b4ce49fd511f441e9e41fa00b6ffb781372b453170a846aaf647a5371c
|
3 |
+
size 684692
|
data/Belavina2020 - MSOM - appendix.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2875ff00ce0f04c01018971ce296e68b45a107befeec34c2409f11af84a1b7c6
|
3 |
+
size 1271531
|
data/Belavina2020 - MSOM - grocery store density and food waste.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a85c9d89c27d4220fbac0c139a6c2a6c8fe43e6f74cf47648b69b04b0d476e30
|
3 |
+
size 1029737
|
data/Beloavia etal2017 - MS - online grocery retail - online appendix.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d51453559bfd9ff58520cc93e9bce4e25883fca9fb8a9434f3e2e6351ebcb930
|
3 |
+
size 1195260
|
data/Beloavia etal2017 - MS - online grocery retail.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8eba61b54c22cbb2aa5a2a55cb3d903eccb3ae2fe2f13aec6d0234c1cab0a31
|
3 |
+
size 518288
|
data/Cachon 2014 - MS - Retail Store Density and the Cost of Greenhouse Gas Emissions.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b662a5533e51cfc2bfac81d41961b1bba4ffad4443386ed35dbd92da131e4d64
|
3 |
+
size 400283
|
data/Carlsson etal2016 - OR - household-level economics of scale.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d160fd7121de0f203c520286599c28814077029a3e0d471c927b68a8183d133
|
3 |
+
size 551476
|
data/Carlsson_Jia2014 - TS - continuous facility location -sm.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e9124240851e4698c9c01e273b82f98624e9e4dd917f18aac673fada90afce7
|
3 |
+
size 432199
|
data/Carlsson_Jia2014 - TS - continuous facility location.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57acd403a3b56be6f828f1ed05bcc264960d131c5fb2192a78d95e77c3b95781
|
3 |
+
size 1692461
|
data/Chakraborty etal2021 - JORS - EV adoption and policy decision.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0eacf24de220b0708cbdb8781997a7eb20eb1ba2df3221ca5395e82f1ee5592
|
3 |
+
size 2798560
|
data/Dilek etal2017 - EJOR - retail location carbon penalty.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75edc2505d908e6824b5031149100158330f2727fa734725f559dd628a8333d3
|
3 |
+
size 1326566
|
data/Edwards etal2009 - IJPDLM - carbon footprints of conventional and online retailing.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61bb1ca9b33a288d4ea680559199d9ec529f75f0956d2f70efd8aa01f1f3d92d
|
3 |
+
size 237714
|
data/Fan etal2022 - MS - price commitment control carbon emissions.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c36295c19a33c3e510ee80da493acf6da9da48b6e7d0fdcf8203368d1a67f4dc
|
3 |
+
size 1531063
|
data/Figliozzi2020 - TRD - emission reduction autonomous vehicles.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f38845c42403de50285545b64d7860fd675ca9433cd0ef90fb4494973329432e
|
3 |
+
size 678602
|
data/GLS2020 - sustainability report.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cde30d8d1e45adcf5cd530c5164abb30beae8825a59663aa22e821f97260a54a
|
3 |
+
size 5286679
|
data/Gao etal2018 - TRE - green supply chain consumer.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32a9d9451fde6c2d2ba25694f889734c80e4c1b0ffafa6beb18a697899272d82
|
3 |
+
size 1034859
|
data/Giesler_Veresiu2014 - JCR - responsible consumer.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1473f5f29cbc719745bec983dfb7adefcd15d45a37f207fc2c3fe41dba79f71b
|
3 |
+
size 426868
|
data/Gong_Zhou2013 - OR - production plan with emissions trading.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:934962c1ecb3250229dfd51f99919219d3222c64c040bdee9dc3e674ee40168a
|
3 |
+
size 387375
|
data/Goodchild_Toy2018 - TRD - drone.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc5abcd978e565a28dc55130830a9ab70ce9338b799682f00a144628dd6d7a8c
|
3 |
+
size 2325404
|
data/Hong_Zimmerman2021 - TRD - GHG autonomous vehicles.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6153c7d355b54546b7d5851671745fcea2732d9684f055cb3fb227971a34e8f
|
3 |
+
size 1455529
|
data/Jaller_Pahwa2020 - TRD - environmental impact of online shopping.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a9b4557084e62b0fa2fd25ec1f45b5ce4642a1a0cfe9f5ab14af68b3aa24947
|
3 |
+
size 1780179
|
data/Kopplin etal2021- TRD - consumer acceptance of shared e-scooters.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5539f86cf02f250119af34c0fc4bdff1c144458b82e04c1266f6f38e0e9618fa
|
3 |
+
size 798226
|
data/McKinnon2022 - Environmentally sustainable city logistics.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd02c40f124beef047acf208aca769a58513e6b1b75de0e3071ed58836022dc7
|
3 |
+
size 992619
|
data/McLeod etal2020 - TRD - porters and cycle couriers for last mile delivery.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac3f78a064fd764955d18380d48cdb64e6d158d65baf26facbb7972033deda2b
|
3 |
+
size 2125351
|
data/Park etal2015 - POM - SC design monopolistic competition.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3595af774cc1925517ecc1413a8fe66f8731bab82857c211bda432c488afc875
|
3 |
+
size 894201
|
data/Perotti etal2021 - BSE - logistics sites CO2eq.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1eba42c20a6aaf57d70f389f36bc846441ebb13c77ce7f7912ef302a75a2fc4f
|
3 |
+
size 2122111
|
data/Qi etal2018 - MSOM - shared mobility for last-mile delivery.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cea1eefeaf4c2453c2a69c43e4872d853ad677a5edcd3363f2da6d5aaa4b9b1b
|
3 |
+
size 1866416
|
data/Shahmohammadi etal2020 - EST - carbon footprint online retailing.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9df7e46465abee74f54918fd8f426beebaf04042e516662c00540f7e973ae2b
|
3 |
+
size 1554492
|
data/Song etal2020 - POM - value of buy-online-and-pickup-in-store.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ccf76bfb9c860407186b627eec631ec0e4536cbbb9392e9065976647a4e4b017
|
3 |
+
size 316808
|
data/Tezer_Bodur2020 - JCR - greenconsumption effect.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36ab9ce0a583cdc016703c7283accb075c0de4d7913ccd54b315683f271a55f8
|
3 |
+
size 322643
|
data/UPS2019 - sustainability progress report.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34e06fb69b4d51f08e34afcf602dc8075fd08ae2e4dfc74c5aa67072892e42e1
|
3 |
+
size 5345260
|
data/Yuan etal2018 - POM - control of emissions trading and production.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6fada733543daad99890048255983fc0d874caf802250af18147409834fd19d
|
3 |
+
size 807730
|
data/vanLoon etal2015 - JCP - emissions from online retailing.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:288364c2562ff08ed3f440893b1c6fa89730a6f4207f564887776478bdfbeb93
|
3 |
+
size 2037787
|
hay/__pycache__/model.cpython-310.pyc
ADDED
Binary file (900 Bytes). View file
|
|
hay/__pycache__/pipeline.cpython-310.pyc
ADDED
Binary file (1.35 kB). View file
|
|
hay/__pycache__/retriever.cpython-310.pyc
ADDED
Binary file (1.63 kB). View file
|
|
hay/model.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from haystack.nodes import PromptNode, PromptTemplate
|
2 |
+
from haystack.nodes import AnswerParser
|
3 |
+
from haystack.nodes import TransformersSummarizer
|
4 |
+
from haystack import Document
|
5 |
+
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
def prompting_model():
|
10 |
+
'''
|
11 |
+
Define a prompt node in haystack pipeline
|
12 |
+
'''
|
13 |
+
|
14 |
+
prompt_node = PromptNode(model_name_or_path="facebook/galactica-125m", default_prompt_template="deepset/question-answering-per-document")
|
15 |
+
|
16 |
+
# prompt_node = PromptNode(model_name_or_path="facebook/opt-350m", default_prompt_template=lfqa_prompt)
|
17 |
+
|
18 |
+
return prompt_node
|
19 |
+
|
20 |
+
|
21 |
+
def summarize():
|
22 |
+
|
23 |
+
'''
|
24 |
+
Use a summarizer node, to summarize the output of generator
|
25 |
+
To remove redundancy/repitition
|
26 |
+
'''
|
27 |
+
|
28 |
+
summarizer = TransformersSummarizer(model_name_or_path="google/pegasus-xsum")
|
29 |
+
|
30 |
+
return summarizer
|
31 |
+
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
+
|
36 |
+
|
hay/pipeline.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from hay.model import prompting_model, summarize
|
2 |
+
from haystack.pipelines import Pipeline, SearchSummarizationPipeline
|
3 |
+
from hay.retriever import retriever1
|
4 |
+
|
5 |
+
def rg_pipeline(question):
|
6 |
+
'''
|
7 |
+
Defines a pipeline of retriever and generator and generates output for the given question
|
8 |
+
'''
|
9 |
+
|
10 |
+
prompt_node = prompting_model()
|
11 |
+
retriever = retriever1()
|
12 |
+
|
13 |
+
pipe = Pipeline()
|
14 |
+
pipe.add_node(component=retriever, name="retriever", inputs=["Query"])
|
15 |
+
pipe.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"])
|
16 |
+
|
17 |
+
output = pipe.run(query=question)
|
18 |
+
|
19 |
+
for i in range(1, len(output['results'])):
|
20 |
+
print("Value at " + str(i))
|
21 |
+
print(output["results"][i])
|
22 |
+
|
23 |
+
return None
|
24 |
+
|
25 |
+
|
26 |
+
def rs_pipeline(question):
|
27 |
+
'''
|
28 |
+
Defines a pipeline of retriever and summarizer and generates output for the given question
|
29 |
+
'''
|
30 |
+
|
31 |
+
retriever = retriever1()
|
32 |
+
summarizer = summarize()
|
33 |
+
|
34 |
+
# Get top 10 results from the retriever and summarize them
|
35 |
+
pipeline = SearchSummarizationPipeline(summarizer=summarizer, retriever=retriever)
|
36 |
+
result = pipeline.run(query=question, params={"Retriever": {"top_k": 5}})
|
37 |
+
|
38 |
+
output = ''
|
39 |
+
for i in range(len(result['documents'])):
|
40 |
+
output += result['documents'][i].meta['summary']
|
41 |
+
|
42 |
+
# print(output)
|
43 |
+
|
44 |
+
return output
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
|
50 |
+
|
hay/retriever.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from haystack.utils import convert_files_to_docs
|
2 |
+
from haystack.nodes import PreProcessor
|
3 |
+
|
4 |
+
import pyarrow as pa
|
5 |
+
import pyarrow.dataset as ds
|
6 |
+
import pandas as pd
|
7 |
+
from datasets import Dataset, load_from_disk
|
8 |
+
import pandas as pd
|
9 |
+
|
10 |
+
from haystack.nodes import BM25Retriever
|
11 |
+
from haystack.document_stores import InMemoryDocumentStore
|
12 |
+
from haystack.document_stores import FAISSDocumentStore
|
13 |
+
from haystack.nodes import DensePassageRetriever
|
14 |
+
from haystack.document_stores import InMemoryDocumentStore
|
15 |
+
from haystack.nodes import TfidfRetriever
|
16 |
+
|
17 |
+
|
18 |
+
import warnings
|
19 |
+
warnings.filterwarnings('ignore')
|
20 |
+
|
21 |
+
def generate_docs(overlap, length):
|
22 |
+
|
23 |
+
'''
|
24 |
+
Takes in split length and split overlap
|
25 |
+
Saves the docs in a pandas dataframe
|
26 |
+
'''
|
27 |
+
all_docs = convert_files_to_docs(dir_path='data')
|
28 |
+
|
29 |
+
preprocessor = PreProcessor(
|
30 |
+
clean_empty_lines=True,
|
31 |
+
clean_whitespace=True,
|
32 |
+
clean_header_footer=True,
|
33 |
+
split_by="word",
|
34 |
+
split_overlap=overlap,
|
35 |
+
split_length=length,
|
36 |
+
split_respect_sentence_boundary=False,
|
37 |
+
)
|
38 |
+
|
39 |
+
docs = preprocessor.process(all_docs)
|
40 |
+
|
41 |
+
# print(f"n_files_input: {len(all_docs)}\nn_docs_output: {len(docs)}")
|
42 |
+
|
43 |
+
df = pd.DataFrame(docs)
|
44 |
+
dataset = Dataset(pa.Table.from_pandas(df))
|
45 |
+
dataset.save_to_disk('outputs/docs-dataset')
|
46 |
+
|
47 |
+
return None
|
48 |
+
|
49 |
+
|
50 |
+
def retriever1():
|
51 |
+
'''
|
52 |
+
Use BM25 Retriever to retrieve data
|
53 |
+
'''
|
54 |
+
|
55 |
+
dataset = load_from_disk('outputs/docs-dataset')
|
56 |
+
|
57 |
+
# BM25Retriever with InMemoryDocumentStore
|
58 |
+
document_store = InMemoryDocumentStore(use_bm25=True)
|
59 |
+
document_store.write_documents(dataset)
|
60 |
+
retriever = BM25Retriever(document_store=document_store, top_k=5)
|
61 |
+
|
62 |
+
return retriever
|
63 |
+
|
64 |
+
|
65 |
+
# def retriever2():
|
66 |
+
# document_store = FAISSDocumentStore(similarity="dot_product")
|
67 |
+
# retriever = DensePassageRetriever(
|
68 |
+
# document_store=document_store,
|
69 |
+
# query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
|
70 |
+
# passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base"
|
71 |
+
# )
|
72 |
+
# document_store.update_embeddings(retriever)
|
73 |
+
|
74 |
+
# return retriever
|
75 |
+
# generate_docs(20, 250)
|
76 |
+
# ret = retriever2()
|
main.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
from hay.retriever import generate_docs
|
3 |
+
from hay.pipeline import rg_pipeline, rs_pipeline
|
4 |
+
from app import application
|
5 |
+
|
6 |
+
|
7 |
+
def main():
|
8 |
+
parser = argparse.ArgumentParser()
|
9 |
+
|
10 |
+
parser.add_argument(
|
11 |
+
'--docs', dest='docs',
|
12 |
+
action = 'store_true'
|
13 |
+
)
|
14 |
+
|
15 |
+
parser.add_argument(
|
16 |
+
'--rgpipeline', dest='rgpipeline',
|
17 |
+
action = 'store_true'
|
18 |
+
)
|
19 |
+
|
20 |
+
parser.add_argument(
|
21 |
+
'--rspipeline', dest='rspipeline',
|
22 |
+
action='store_true'
|
23 |
+
)
|
24 |
+
|
25 |
+
parser.add_argument(
|
26 |
+
'--gradio', dest='gradio',
|
27 |
+
action='store_true'
|
28 |
+
)
|
29 |
+
|
30 |
+
args = parser.parse_args()
|
31 |
+
|
32 |
+
if args.docs:
|
33 |
+
'''
|
34 |
+
Use this argument to generate the docs and store in DOCUMENT format
|
35 |
+
'''
|
36 |
+
generate_docs(overlap=10, length=100)
|
37 |
+
|
38 |
+
if args.rgpipeline:
|
39 |
+
'''
|
40 |
+
Use this argument to run the base retriever generator pipeline
|
41 |
+
'''
|
42 |
+
question = "How to reduce emissions?"
|
43 |
+
rg_pipeline(question)
|
44 |
+
|
45 |
+
if args.rspipeline:
|
46 |
+
'''
|
47 |
+
Use this argument to run the retriever summarizer pipeline
|
48 |
+
'''
|
49 |
+
|
50 |
+
question = "How to reduce emissions in last mile supply chain?"
|
51 |
+
answer = rs_pipeline(question)
|
52 |
+
print(answer)
|
53 |
+
|
54 |
+
if args.gradio:
|
55 |
+
'''
|
56 |
+
Use this argument to run the application
|
57 |
+
'''
|
58 |
+
application()
|
59 |
+
|
60 |
+
return None
|
61 |
+
|
62 |
+
|
63 |
+
if __name__ == '__main__':
|
64 |
+
main()
|
outputs/docs-dataset/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d321d150f9b0febdc19c3874580ec5e6907ce98edcb356f088907d2b6bf5f655
|
3 |
+
size 4072512
|
outputs/docs-dataset/dataset_info.json
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"citation": "",
|
3 |
+
"description": "",
|
4 |
+
"features": {
|
5 |
+
"id": {
|
6 |
+
"dtype": "string",
|
7 |
+
"_type": "Value"
|
8 |
+
},
|
9 |
+
"content": {
|
10 |
+
"dtype": "string",
|
11 |
+
"_type": "Value"
|
12 |
+
},
|
13 |
+
"content_type": {
|
14 |
+
"dtype": "string",
|
15 |
+
"_type": "Value"
|
16 |
+
},
|
17 |
+
"meta": {
|
18 |
+
"_split_id": {
|
19 |
+
"dtype": "int64",
|
20 |
+
"_type": "Value"
|
21 |
+
},
|
22 |
+
"_split_overlap": [
|
23 |
+
{
|
24 |
+
"doc_id": {
|
25 |
+
"dtype": "string",
|
26 |
+
"_type": "Value"
|
27 |
+
},
|
28 |
+
"range": {
|
29 |
+
"feature": {
|
30 |
+
"dtype": "int64",
|
31 |
+
"_type": "Value"
|
32 |
+
},
|
33 |
+
"_type": "Sequence"
|
34 |
+
}
|
35 |
+
}
|
36 |
+
],
|
37 |
+
"name": {
|
38 |
+
"dtype": "string",
|
39 |
+
"_type": "Value"
|
40 |
+
}
|
41 |
+
},
|
42 |
+
"id_hash_keys": {
|
43 |
+
"feature": {
|
44 |
+
"dtype": "string",
|
45 |
+
"_type": "Value"
|
46 |
+
},
|
47 |
+
"_type": "Sequence"
|
48 |
+
},
|
49 |
+
"score": {
|
50 |
+
"dtype": "null",
|
51 |
+
"_type": "Value"
|
52 |
+
},
|
53 |
+
"embedding": {
|
54 |
+
"dtype": "null",
|
55 |
+
"_type": "Value"
|
56 |
+
}
|
57 |
+
},
|
58 |
+
"homepage": "",
|
59 |
+
"license": ""
|
60 |
+
}
|
outputs/docs-dataset/state.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00001.arrow"
|
5 |
+
}
|
6 |
+
],
|
7 |
+
"_fingerprint": "b673ed2843344a0a",
|
8 |
+
"_format_columns": null,
|
9 |
+
"_format_kwargs": {},
|
10 |
+
"_format_type": null,
|
11 |
+
"_output_all_columns": false,
|
12 |
+
"_split": null
|
13 |
+
}
|
outputs/faiss_index.faiss
ADDED
Binary file (45 Bytes). View file
|
|
outputs/faiss_index.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"faiss_index_factory_str": "Flat"}
|