carbonnnnn commited on
Commit
8677815
·
1 Parent(s): 4f52cc8

First version

Browse files
Files changed (49) hide show
  1. .gitattributes +1 -0
  2. app.py +27 -0
  3. clear.py +8 -0
  4. data/Bandeira etal2019 - TRD - EV last mile Rio de Janeiro.pdf +3 -0
  5. data/Bayer_Aklin2020 - ES - EU ETS reduced CO2.pdf +3 -0
  6. data/Belavina2020 - MSOM - appendix.pdf +3 -0
  7. data/Belavina2020 - MSOM - grocery store density and food waste.pdf +3 -0
  8. data/Beloavia etal2017 - MS - online grocery retail - online appendix.pdf +3 -0
  9. data/Beloavia etal2017 - MS - online grocery retail.pdf +3 -0
  10. data/Cachon 2014 - MS - Retail Store Density and the Cost of Greenhouse Gas Emissions.pdf +3 -0
  11. data/Carlsson etal2016 - OR - household-level economics of scale.pdf +3 -0
  12. data/Carlsson_Jia2014 - TS - continuous facility location -sm.pdf +3 -0
  13. data/Carlsson_Jia2014 - TS - continuous facility location.pdf +3 -0
  14. data/Chakraborty etal2021 - JORS - EV adoption and policy decision.pdf +3 -0
  15. data/Dilek etal2017 - EJOR - retail location carbon penalty.pdf +3 -0
  16. data/Edwards etal2009 - IJPDLM - carbon footprints of conventional and online retailing.pdf +3 -0
  17. data/Fan etal2022 - MS - price commitment control carbon emissions.pdf +3 -0
  18. data/Figliozzi2020 - TRD - emission reduction autonomous vehicles.pdf +3 -0
  19. data/GLS2020 - sustainability report.pdf +3 -0
  20. data/Gao etal2018 - TRE - green supply chain consumer.pdf +3 -0
  21. data/Giesler_Veresiu2014 - JCR - responsible consumer.pdf +3 -0
  22. data/Gong_Zhou2013 - OR - production plan with emissions trading.pdf +3 -0
  23. data/Goodchild_Toy2018 - TRD - drone.pdf +3 -0
  24. data/Hong_Zimmerman2021 - TRD - GHG autonomous vehicles.pdf +3 -0
  25. data/Jaller_Pahwa2020 - TRD - environmental impact of online shopping.pdf +3 -0
  26. data/Kopplin etal2021- TRD - consumer acceptance of shared e-scooters.pdf +3 -0
  27. data/McKinnon2022 - Environmentally sustainable city logistics.pdf +3 -0
  28. data/McLeod etal2020 - TRD - porters and cycle couriers for last mile delivery.pdf +3 -0
  29. data/Park etal2015 - POM - SC design monopolistic competition.pdf +3 -0
  30. data/Perotti etal2021 - BSE - logistics sites CO2eq.pdf +3 -0
  31. data/Qi etal2018 - MSOM - shared mobility for last-mile delivery.pdf +3 -0
  32. data/Shahmohammadi etal2020 - EST - carbon footprint online retailing.pdf +3 -0
  33. data/Song etal2020 - POM - value of buy-online-and-pickup-in-store.pdf +3 -0
  34. data/Tezer_Bodur2020 - JCR - greenconsumption effect.pdf +3 -0
  35. data/UPS2019 - sustainability progress report.pdf +3 -0
  36. data/Yuan etal2018 - POM - control of emissions trading and production.pdf +3 -0
  37. data/vanLoon etal2015 - JCP - emissions from online retailing.pdf +3 -0
  38. hay/__pycache__/model.cpython-310.pyc +0 -0
  39. hay/__pycache__/pipeline.cpython-310.pyc +0 -0
  40. hay/__pycache__/retriever.cpython-310.pyc +0 -0
  41. hay/model.py +36 -0
  42. hay/pipeline.py +50 -0
  43. hay/retriever.py +76 -0
  44. main.py +64 -0
  45. outputs/docs-dataset/data-00000-of-00001.arrow +3 -0
  46. outputs/docs-dataset/dataset_info.json +60 -0
  47. outputs/docs-dataset/state.json +13 -0
  48. outputs/faiss_index.faiss +0 -0
  49. outputs/faiss_index.json +1 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.pdf filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Application file for Gradio App
2
+
3
+ import gradio as gr
4
+ import time
5
+ from hay.pipeline import rs_pipeline
6
+
7
+ with gr.Blocks() as chat:
8
+ chatbot = gr.Chatbot()
9
+ msg = gr.Textbox()
10
+ clear = gr.ClearButton([msg, chatbot])
11
+
12
+ def user(user_message, history):
13
+ return "", history + [[user_message, None]]
14
+
15
+ def respond(message, chat_history):
16
+ question = str(message)
17
+ answer = rs_pipeline(question)
18
+ bot_message = answer
19
+ chat_history.append((message, bot_message))
20
+ time.sleep(2)
21
+ return " ", chat_history
22
+
23
+ msg.submit(respond, [msg, chatbot], [msg, chatbot])
24
+
25
+ def application():
26
+ chat.launch()
27
+
clear.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # USE THIS FILE TO CLEAR GPU CACHE
2
+
3
+ import gc
4
+ import torch
5
+ # def report_gpu():
6
+ print(torch.cuda.list_gpu_processes())
7
+ gc.collect()
8
+ torch.cuda.empty_cache()
data/Bandeira etal2019 - TRD - EV last mile Rio de Janeiro.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d1b1d2ab290adee450d72ce4d36681c7a823c5d15efd61468be44f6a6b6f5a3
3
+ size 1080580
data/Bayer_Aklin2020 - ES - EU ETS reduced CO2.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c34e0b4ce49fd511f441e9e41fa00b6ffb781372b453170a846aaf647a5371c
3
+ size 684692
data/Belavina2020 - MSOM - appendix.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2875ff00ce0f04c01018971ce296e68b45a107befeec34c2409f11af84a1b7c6
3
+ size 1271531
data/Belavina2020 - MSOM - grocery store density and food waste.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a85c9d89c27d4220fbac0c139a6c2a6c8fe43e6f74cf47648b69b04b0d476e30
3
+ size 1029737
data/Beloavia etal2017 - MS - online grocery retail - online appendix.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d51453559bfd9ff58520cc93e9bce4e25883fca9fb8a9434f3e2e6351ebcb930
3
+ size 1195260
data/Beloavia etal2017 - MS - online grocery retail.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8eba61b54c22cbb2aa5a2a55cb3d903eccb3ae2fe2f13aec6d0234c1cab0a31
3
+ size 518288
data/Cachon 2014 - MS - Retail Store Density and the Cost of Greenhouse Gas Emissions.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b662a5533e51cfc2bfac81d41961b1bba4ffad4443386ed35dbd92da131e4d64
3
+ size 400283
data/Carlsson etal2016 - OR - household-level economics of scale.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d160fd7121de0f203c520286599c28814077029a3e0d471c927b68a8183d133
3
+ size 551476
data/Carlsson_Jia2014 - TS - continuous facility location -sm.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e9124240851e4698c9c01e273b82f98624e9e4dd917f18aac673fada90afce7
3
+ size 432199
data/Carlsson_Jia2014 - TS - continuous facility location.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57acd403a3b56be6f828f1ed05bcc264960d131c5fb2192a78d95e77c3b95781
3
+ size 1692461
data/Chakraborty etal2021 - JORS - EV adoption and policy decision.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0eacf24de220b0708cbdb8781997a7eb20eb1ba2df3221ca5395e82f1ee5592
3
+ size 2798560
data/Dilek etal2017 - EJOR - retail location carbon penalty.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75edc2505d908e6824b5031149100158330f2727fa734725f559dd628a8333d3
3
+ size 1326566
data/Edwards etal2009 - IJPDLM - carbon footprints of conventional and online retailing.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61bb1ca9b33a288d4ea680559199d9ec529f75f0956d2f70efd8aa01f1f3d92d
3
+ size 237714
data/Fan etal2022 - MS - price commitment control carbon emissions.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c36295c19a33c3e510ee80da493acf6da9da48b6e7d0fdcf8203368d1a67f4dc
3
+ size 1531063
data/Figliozzi2020 - TRD - emission reduction autonomous vehicles.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f38845c42403de50285545b64d7860fd675ca9433cd0ef90fb4494973329432e
3
+ size 678602
data/GLS2020 - sustainability report.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cde30d8d1e45adcf5cd530c5164abb30beae8825a59663aa22e821f97260a54a
3
+ size 5286679
data/Gao etal2018 - TRE - green supply chain consumer.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32a9d9451fde6c2d2ba25694f889734c80e4c1b0ffafa6beb18a697899272d82
3
+ size 1034859
data/Giesler_Veresiu2014 - JCR - responsible consumer.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1473f5f29cbc719745bec983dfb7adefcd15d45a37f207fc2c3fe41dba79f71b
3
+ size 426868
data/Gong_Zhou2013 - OR - production plan with emissions trading.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:934962c1ecb3250229dfd51f99919219d3222c64c040bdee9dc3e674ee40168a
3
+ size 387375
data/Goodchild_Toy2018 - TRD - drone.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc5abcd978e565a28dc55130830a9ab70ce9338b799682f00a144628dd6d7a8c
3
+ size 2325404
data/Hong_Zimmerman2021 - TRD - GHG autonomous vehicles.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6153c7d355b54546b7d5851671745fcea2732d9684f055cb3fb227971a34e8f
3
+ size 1455529
data/Jaller_Pahwa2020 - TRD - environmental impact of online shopping.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a9b4557084e62b0fa2fd25ec1f45b5ce4642a1a0cfe9f5ab14af68b3aa24947
3
+ size 1780179
data/Kopplin etal2021- TRD - consumer acceptance of shared e-scooters.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5539f86cf02f250119af34c0fc4bdff1c144458b82e04c1266f6f38e0e9618fa
3
+ size 798226
data/McKinnon2022 - Environmentally sustainable city logistics.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd02c40f124beef047acf208aca769a58513e6b1b75de0e3071ed58836022dc7
3
+ size 992619
data/McLeod etal2020 - TRD - porters and cycle couriers for last mile delivery.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac3f78a064fd764955d18380d48cdb64e6d158d65baf26facbb7972033deda2b
3
+ size 2125351
data/Park etal2015 - POM - SC design monopolistic competition.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3595af774cc1925517ecc1413a8fe66f8731bab82857c211bda432c488afc875
3
+ size 894201
data/Perotti etal2021 - BSE - logistics sites CO2eq.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eba42c20a6aaf57d70f389f36bc846441ebb13c77ce7f7912ef302a75a2fc4f
3
+ size 2122111
data/Qi etal2018 - MSOM - shared mobility for last-mile delivery.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cea1eefeaf4c2453c2a69c43e4872d853ad677a5edcd3363f2da6d5aaa4b9b1b
3
+ size 1866416
data/Shahmohammadi etal2020 - EST - carbon footprint online retailing.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9df7e46465abee74f54918fd8f426beebaf04042e516662c00540f7e973ae2b
3
+ size 1554492
data/Song etal2020 - POM - value of buy-online-and-pickup-in-store.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccf76bfb9c860407186b627eec631ec0e4536cbbb9392e9065976647a4e4b017
3
+ size 316808
data/Tezer_Bodur2020 - JCR - greenconsumption effect.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36ab9ce0a583cdc016703c7283accb075c0de4d7913ccd54b315683f271a55f8
3
+ size 322643
data/UPS2019 - sustainability progress report.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34e06fb69b4d51f08e34afcf602dc8075fd08ae2e4dfc74c5aa67072892e42e1
3
+ size 5345260
data/Yuan etal2018 - POM - control of emissions trading and production.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6fada733543daad99890048255983fc0d874caf802250af18147409834fd19d
3
+ size 807730
data/vanLoon etal2015 - JCP - emissions from online retailing.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:288364c2562ff08ed3f440893b1c6fa89730a6f4207f564887776478bdfbeb93
3
+ size 2037787
hay/__pycache__/model.cpython-310.pyc ADDED
Binary file (900 Bytes). View file
 
hay/__pycache__/pipeline.cpython-310.pyc ADDED
Binary file (1.35 kB). View file
 
hay/__pycache__/retriever.cpython-310.pyc ADDED
Binary file (1.63 kB). View file
 
hay/model.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from haystack.nodes import PromptNode, PromptTemplate
2
+ from haystack.nodes import AnswerParser
3
+ from haystack.nodes import TransformersSummarizer
4
+ from haystack import Document
5
+
6
+
7
+
8
+
9
+ def prompting_model():
10
+ '''
11
+ Define a prompt node in haystack pipeline
12
+ '''
13
+
14
+ prompt_node = PromptNode(model_name_or_path="facebook/galactica-125m", default_prompt_template="deepset/question-answering-per-document")
15
+
16
+ # prompt_node = PromptNode(model_name_or_path="facebook/opt-350m", default_prompt_template=lfqa_prompt)
17
+
18
+ return prompt_node
19
+
20
+
21
+ def summarize():
22
+
23
+ '''
24
+ Use a summarizer node, to summarize the output of generator
25
+ To remove redundancy/repitition
26
+ '''
27
+
28
+ summarizer = TransformersSummarizer(model_name_or_path="google/pegasus-xsum")
29
+
30
+ return summarizer
31
+
32
+
33
+
34
+
35
+
36
+
hay/pipeline.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from hay.model import prompting_model, summarize
2
+ from haystack.pipelines import Pipeline, SearchSummarizationPipeline
3
+ from hay.retriever import retriever1
4
+
5
+ def rg_pipeline(question):
6
+ '''
7
+ Defines a pipeline of retriever and generator and generates output for the given question
8
+ '''
9
+
10
+ prompt_node = prompting_model()
11
+ retriever = retriever1()
12
+
13
+ pipe = Pipeline()
14
+ pipe.add_node(component=retriever, name="retriever", inputs=["Query"])
15
+ pipe.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"])
16
+
17
+ output = pipe.run(query=question)
18
+
19
+ for i in range(1, len(output['results'])):
20
+ print("Value at " + str(i))
21
+ print(output["results"][i])
22
+
23
+ return None
24
+
25
+
26
+ def rs_pipeline(question):
27
+ '''
28
+ Defines a pipeline of retriever and summarizer and generates output for the given question
29
+ '''
30
+
31
+ retriever = retriever1()
32
+ summarizer = summarize()
33
+
34
+ # Get top 10 results from the retriever and summarize them
35
+ pipeline = SearchSummarizationPipeline(summarizer=summarizer, retriever=retriever)
36
+ result = pipeline.run(query=question, params={"Retriever": {"top_k": 5}})
37
+
38
+ output = ''
39
+ for i in range(len(result['documents'])):
40
+ output += result['documents'][i].meta['summary']
41
+
42
+ # print(output)
43
+
44
+ return output
45
+
46
+
47
+
48
+
49
+
50
+
hay/retriever.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from haystack.utils import convert_files_to_docs
2
+ from haystack.nodes import PreProcessor
3
+
4
+ import pyarrow as pa
5
+ import pyarrow.dataset as ds
6
+ import pandas as pd
7
+ from datasets import Dataset, load_from_disk
8
+ import pandas as pd
9
+
10
+ from haystack.nodes import BM25Retriever
11
+ from haystack.document_stores import InMemoryDocumentStore
12
+ from haystack.document_stores import FAISSDocumentStore
13
+ from haystack.nodes import DensePassageRetriever
14
+ from haystack.document_stores import InMemoryDocumentStore
15
+ from haystack.nodes import TfidfRetriever
16
+
17
+
18
+ import warnings
19
+ warnings.filterwarnings('ignore')
20
+
21
+ def generate_docs(overlap, length):
22
+
23
+ '''
24
+ Takes in split length and split overlap
25
+ Saves the docs in a pandas dataframe
26
+ '''
27
+ all_docs = convert_files_to_docs(dir_path='data')
28
+
29
+ preprocessor = PreProcessor(
30
+ clean_empty_lines=True,
31
+ clean_whitespace=True,
32
+ clean_header_footer=True,
33
+ split_by="word",
34
+ split_overlap=overlap,
35
+ split_length=length,
36
+ split_respect_sentence_boundary=False,
37
+ )
38
+
39
+ docs = preprocessor.process(all_docs)
40
+
41
+ # print(f"n_files_input: {len(all_docs)}\nn_docs_output: {len(docs)}")
42
+
43
+ df = pd.DataFrame(docs)
44
+ dataset = Dataset(pa.Table.from_pandas(df))
45
+ dataset.save_to_disk('outputs/docs-dataset')
46
+
47
+ return None
48
+
49
+
50
+ def retriever1():
51
+ '''
52
+ Use BM25 Retriever to retrieve data
53
+ '''
54
+
55
+ dataset = load_from_disk('outputs/docs-dataset')
56
+
57
+ # BM25Retriever with InMemoryDocumentStore
58
+ document_store = InMemoryDocumentStore(use_bm25=True)
59
+ document_store.write_documents(dataset)
60
+ retriever = BM25Retriever(document_store=document_store, top_k=5)
61
+
62
+ return retriever
63
+
64
+
65
+ # def retriever2():
66
+ # document_store = FAISSDocumentStore(similarity="dot_product")
67
+ # retriever = DensePassageRetriever(
68
+ # document_store=document_store,
69
+ # query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
70
+ # passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base"
71
+ # )
72
+ # document_store.update_embeddings(retriever)
73
+
74
+ # return retriever
75
+ # generate_docs(20, 250)
76
+ # ret = retriever2()
main.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from hay.retriever import generate_docs
3
+ from hay.pipeline import rg_pipeline, rs_pipeline
4
+ from app import application
5
+
6
+
7
+ def main():
8
+ parser = argparse.ArgumentParser()
9
+
10
+ parser.add_argument(
11
+ '--docs', dest='docs',
12
+ action = 'store_true'
13
+ )
14
+
15
+ parser.add_argument(
16
+ '--rgpipeline', dest='rgpipeline',
17
+ action = 'store_true'
18
+ )
19
+
20
+ parser.add_argument(
21
+ '--rspipeline', dest='rspipeline',
22
+ action='store_true'
23
+ )
24
+
25
+ parser.add_argument(
26
+ '--gradio', dest='gradio',
27
+ action='store_true'
28
+ )
29
+
30
+ args = parser.parse_args()
31
+
32
+ if args.docs:
33
+ '''
34
+ Use this argument to generate the docs and store in DOCUMENT format
35
+ '''
36
+ generate_docs(overlap=10, length=100)
37
+
38
+ if args.rgpipeline:
39
+ '''
40
+ Use this argument to run the base retriever generator pipeline
41
+ '''
42
+ question = "How to reduce emissions?"
43
+ rg_pipeline(question)
44
+
45
+ if args.rspipeline:
46
+ '''
47
+ Use this argument to run the retriever summarizer pipeline
48
+ '''
49
+
50
+ question = "How to reduce emissions in last mile supply chain?"
51
+ answer = rs_pipeline(question)
52
+ print(answer)
53
+
54
+ if args.gradio:
55
+ '''
56
+ Use this argument to run the application
57
+ '''
58
+ application()
59
+
60
+ return None
61
+
62
+
63
+ if __name__ == '__main__':
64
+ main()
outputs/docs-dataset/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d321d150f9b0febdc19c3874580ec5e6907ce98edcb356f088907d2b6bf5f655
3
+ size 4072512
outputs/docs-dataset/dataset_info.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "id": {
6
+ "dtype": "string",
7
+ "_type": "Value"
8
+ },
9
+ "content": {
10
+ "dtype": "string",
11
+ "_type": "Value"
12
+ },
13
+ "content_type": {
14
+ "dtype": "string",
15
+ "_type": "Value"
16
+ },
17
+ "meta": {
18
+ "_split_id": {
19
+ "dtype": "int64",
20
+ "_type": "Value"
21
+ },
22
+ "_split_overlap": [
23
+ {
24
+ "doc_id": {
25
+ "dtype": "string",
26
+ "_type": "Value"
27
+ },
28
+ "range": {
29
+ "feature": {
30
+ "dtype": "int64",
31
+ "_type": "Value"
32
+ },
33
+ "_type": "Sequence"
34
+ }
35
+ }
36
+ ],
37
+ "name": {
38
+ "dtype": "string",
39
+ "_type": "Value"
40
+ }
41
+ },
42
+ "id_hash_keys": {
43
+ "feature": {
44
+ "dtype": "string",
45
+ "_type": "Value"
46
+ },
47
+ "_type": "Sequence"
48
+ },
49
+ "score": {
50
+ "dtype": "null",
51
+ "_type": "Value"
52
+ },
53
+ "embedding": {
54
+ "dtype": "null",
55
+ "_type": "Value"
56
+ }
57
+ },
58
+ "homepage": "",
59
+ "license": ""
60
+ }
outputs/docs-dataset/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "b673ed2843344a0a",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
outputs/faiss_index.faiss ADDED
Binary file (45 Bytes). View file
 
outputs/faiss_index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"faiss_index_factory_str": "Flat"}