etweedy commited on
Commit
c09673f
·
1 Parent(s): e77a114

Upload 9 files

Browse files
app.py CHANGED
@@ -24,7 +24,7 @@ else:
24
  # - add examples
25
  # What else??
26
 
27
-
28
  if 'response' not in st.session_state:
29
  st.session_state['response'] = ''
30
  if 'context' not in st.session_state:
@@ -52,37 +52,73 @@ def clear_boxes():
52
 
53
  with st.spinner('Loading the model...'):
54
  model, tokenizer = get_model()
55
-
56
- ex_q, ex_c = get_examples()
57
-
58
- for i in range(len(ex_q)):
59
- st.sidebar.button(
60
- label = f'Try example {i+1}',
61
- key = f'ex_button_{i+1}',
62
- on_click = fill_in_example,
63
- args=(i,),
64
- )
65
- st.sidebar.button(
66
- label = 'Clear boxes',
67
- key = 'clear_button',
68
- on_click = clear_boxes,
69
- )
70
 
71
  st.header('RoBERTa Q&A model')
72
 
73
  st.markdown('''
74
- This app demonstrates the answer-retrieval capabilities of a finetuned RoBERTa (Robustly optimized Bidirectional Encoder Representations from Transformers) model. The [RoBERTa base model](https://huggingface.co/roberta-base) was fine-tuned on version 2 of the [SQuAD (Stanford Question Answering Dataset) dataset](https://huggingface.co/datasets/squad_v2), a dataset of context-question-answer triples. The objective of the model is to retrieve the answer to the question from the context paragraph.
75
-
76
- Version 2 incorporates the 100,000 samples from Version 1.1, along with 50,000 'unanswerable' questions, i.e. samples in the question cannot be answered using the context given.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
- Please type or paste a context paragraph and question you'd like to ask about it. The model will attempt to answer the question, or otherwise will report that it cannot.
 
79
 
80
- Alternatively, you can try some of the examples provided on the sidebar to the left.
81
  ''')
 
 
 
82
  input_container = st.container()
83
- st.divider()
84
  response_container = st.container()
85
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  # Form for user inputs
87
  with input_container:
88
  with st.form(key='input_form',clear_on_submit=False):
@@ -94,7 +130,6 @@ with input_container:
94
  placeholder='Enter your context paragraph here.',
95
  height=300,
96
  )
97
- st.session_state['context'] = context
98
  question = st.text_input(
99
  label='Question',
100
  value=st.session_state['question'],
@@ -102,9 +137,10 @@ with input_container:
102
  label_visibility='hidden',
103
  placeholder='Enter your question here.',
104
  )
105
- st.session_state['question'] = question
106
  query_submitted = st.form_submit_button("Submit")
107
  if query_submitted:
 
 
108
  with st.spinner('Generating response...'):
109
  data_raw = Dataset.from_dict(
110
  {
 
24
  # - add examples
25
  # What else??
26
 
27
+ # Initialize session state variables
28
  if 'response' not in st.session_state:
29
  st.session_state['response'] = ''
30
  if 'context' not in st.session_state:
 
52
 
53
  with st.spinner('Loading the model...'):
54
  model, tokenizer = get_model()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  st.header('RoBERTa Q&A model')
57
 
58
  st.markdown('''
59
+ This app demonstrates the answer-retrieval capabilities of a fine-tuned RoBERTa (Robustly optimized Bidirectional Encoder Representations from Transformers) model.
60
+ ''')
61
+ with st.expander('Click to read more about the model...'):
62
+ st.markdown('''
63
+ * [Click here](https://huggingface.co/etweedy/roberta-base-squad-v2) to visit the Hugging Face model card for this fine-tuned model.
64
+ * To create this model, the [RoBERTa base model](https://huggingface.co/roberta-base) was fine-tuned on Version 2 of [SQuAD (Stanford Question Answering Dataset)](https://huggingface.co/datasets/squad_v2), a dataset of context-question-answer triples.
65
+ * The objective of the model is "extractive question answering", the task of retrieving the answer to the question from a given context text corpus.
66
+ * SQuAD Version 2 incorporates the 100,000 samples from Version 1.1, along with 50,000 'unanswerable' questions, i.e. samples in the question cannot be answered using the context given.
67
+ * The original base RoBERTa model was introduced in [this paper](https://arxiv.org/abs/1907.11692) and [this repository](https://github.com/facebookresearch/fairseq/tree/main/examples/roberta). Here's a citation for that base model:
68
+ ```bibtex
69
+ @article{DBLP:journals/corr/abs-1907-11692,
70
+ author = {Yinhan Liu and
71
+ Myle Ott and
72
+ Naman Goyal and
73
+ Jingfei Du and
74
+ Mandar Joshi and
75
+ Danqi Chen and
76
+ Omer Levy and
77
+ Mike Lewis and
78
+ Luke Zettlemoyer and
79
+ Veselin Stoyanov},
80
+ title = {RoBERTa: {A} Robustly Optimized {BERT} Pretraining Approach},
81
+ journal = {CoRR},
82
+ volume = {abs/1907.11692},
83
+ year = {2019},
84
+ url = {http://arxiv.org/abs/1907.11692},
85
+ archivePrefix = {arXiv},
86
+ eprint = {1907.11692},
87
+ timestamp = {Thu, 01 Aug 2019 08:59:33 +0200},
88
+ biburl = {https://dblp.org/rec/journals/corr/abs-1907-11692.bib},
89
+ bibsource = {dblp computer science bibliography, https://dblp.org}
90
+ }
91
+ ```
92
+ ''')
93
 
94
+ st.markdown('''
95
+ Please type or paste a context paragraph and question you'd like to ask about it. The model will attempt to answer the question, or otherwise will report that it cannot. Your results will appear below the question field when the model is finished running.
96
 
97
+ Alternatively, you can try an example by clicking one of the buttons below:
98
  ''')
99
+
100
+ ex_q, ex_c = get_examples()
101
+ example_container = st.container()
102
  input_container = st.container()
 
103
  response_container = st.container()
104
+
105
+ with example_container:
106
+ ex_cols = st.columns(len(ex_q)+1)
107
+ for i in range(len(ex_q)):
108
+ with ex_cols[i]:
109
+ st.button(
110
+ label = f'Try example {i+1}',
111
+ key = f'ex_button_{i+1}',
112
+ on_click = fill_in_example,
113
+ args=(i,),
114
+ )
115
+ with ex_cols[-1]:
116
+ st.button(
117
+ label = "Clear all fields",
118
+ key = "clear_button",
119
+ on_click = clear_boxes,
120
+ )
121
+
122
  # Form for user inputs
123
  with input_container:
124
  with st.form(key='input_form',clear_on_submit=False):
 
130
  placeholder='Enter your context paragraph here.',
131
  height=300,
132
  )
 
133
  question = st.text_input(
134
  label='Question',
135
  value=st.session_state['question'],
 
137
  label_visibility='hidden',
138
  placeholder='Enter your question here.',
139
  )
 
140
  query_submitted = st.form_submit_button("Submit")
141
  if query_submitted:
142
+ st.session_state['question'] = question
143
+ st.session_state['context'] = context
144
  with st.spinner('Generating response...'):
145
  data_raw = Dataset.from_dict(
146
  {
examples.csv CHANGED
@@ -1,4 +1,4 @@
1
  question,context
2
- What did Oppenheimer remark abotut the explosion?,"Oppenheimer attended Harvard University, where he earned a bachelor's degree in chemistry in 1925. He studied physics at the University of Cambridge and University of Göttingen, where he received his PhD in 1927. He held academic positions at the University of California, Berkeley, and the California Institute of Technology, and made significant contributions to theoretical physics, including in quantum mechanics and nuclear physics. During World War II, he was recruited to work on the Manhattan Project, and in 1943 was appointed as director of the Los Alamos Laboratory in New Mexico, tasked with developing the weapons. Oppenheimer's leadership and scientific expertise were instrumental in the success of the project. He was among those who observed the Trinity test on July 16, 1945, in which the first atomic bomb was successfully detonated. He later remarked that the explosion brought to his mind words from the Hindu scripture Bhagavad Gita: ""Now I am become Death, the destroyer of worlds."" In August 1945, the atomic bombs were used on the Japanese cities of Hiroshima and Nagasaki, the only use of nuclear weapons in war."
3
- What was the phrase on the billboard which inspired the Twinkies name?,"Twinkies were invented on April 6, 1930, by Canadian-born baker James Alexander Dewar for the Continental Baking Company in Schiller Park, Illinois. Realizing that several machines used for making cream-filled strawberry shortcake sat idle when strawberries were out of season, Dewar conceived a snack cake filled with banana cream, which he dubbed the Twinkie. Ritchy Koph said he came up with the name when he saw a billboard in St. Louis for ""Twinkle Toe Shoes"". During World War II, bananas were rationed, and the company was forced to switch to vanilla cream. This change proved popular, and banana-cream Twinkies were not widely re-introduced. The original flavor was occasionally found in limited time only promotions, but the company used vanilla cream for most Twinkies. In 1988, Fruit and Cream Twinkies were introduced with a strawberry filling swirled into the cream. The product was soon dropped. Vanilla's dominance over banana flavoring was challenged in 2005, following a month-long promotion of the movie King Kong. Hostess saw its Twinkie sales rise 20 percent during the promotion, and in 2007 restored the banana-cream Twinkie to its snack lineup although they are now made with 2% banana purée."
4
- What happened in November 2020?,"""Baby Shark"" is a children's song associated with a dance involving hand movements that originated as a campfire song dating back to at least the 20th century. In 2016, ""Baby Shark"" became very popular when Pinkfong, a South Korean entertainment company, released a version of the song with a YouTube music video that went viral across social media, online video, and radio. In January 2022, it became the first YouTube video to reach 10 billion views. In November 2020, Pinkfong's version became the most-viewed YouTube video of all time, with over 12 billion views as of April 2023. ""Baby Shark"" originated as a campfire song or chant. The original song dates back to at least the 20th century, potentially created by camp counselors inspired by the movie Jaws. In the chant, each member of a family of sharks is introduced, with campers using their hands to imitate the sharks' jaws. Different versions of the song have the sharks hunting fish, eating a sailor, or killing people who then go to heaven. Various entities have copyrighted original videos and sound recordings of the song, and some have trademarked merchandise based on their versions. However, according to The New York Times, the underlying song and characters are believed to be in the public domain."
 
1
  question,context
2
+ What did Oppenheimer remark about the explosion?,"Oppenheimer attended Harvard University, where he earned a bachelor's degree in chemistry in 1925. He studied physics at the University of Cambridge and University of Göttingen, where he received his PhD in 1927. He held academic positions at the University of California, Berkeley, and the California Institute of Technology, and made significant contributions to theoretical physics, including in quantum mechanics and nuclear physics. During World War II, he was recruited to work on the Manhattan Project, and in 1943 was appointed as director of the Los Alamos Laboratory in New Mexico, tasked with developing the weapons. Oppenheimer's leadership and scientific expertise were instrumental in the success of the project. He was among those who observed the Trinity test on July 16, 1945, in which the first atomic bomb was successfully detonated. He later remarked that the explosion brought to his mind words from the Hindu scripture Bhagavad Gita: ""Now I am become Death, the destroyer of worlds."" In August 1945, the atomic bombs were used on the Japanese cities of Hiroshima and Nagasaki, the only use of nuclear weapons in war."
3
+ Why did Twinkies change to vanilla cream?,"Twinkies were invented on April 6, 1930, by Canadian-born baker James Alexander Dewar for the Continental Baking Company in Schiller Park, Illinois. Realizing that several machines used for making cream-filled strawberry shortcake sat idle when strawberries were out of season, Dewar conceived a snack cake filled with banana cream, which he dubbed the Twinkie. Ritchy Koph said he came up with the name when he saw a billboard in St. Louis for ""Twinkle Toe Shoes"". During World War II, bananas were rationed, and the company was forced to switch to vanilla cream. This change proved popular, and banana-cream Twinkies were not widely re-introduced. The original flavor was occasionally found in limited time only promotions, but the company used vanilla cream for most Twinkies. In 1988, Fruit and Cream Twinkies were introduced with a strawberry filling swirled into the cream. The product was soon dropped. Vanilla's dominance over banana flavoring was challenged in 2005, following a month-long promotion of the movie King Kong. Hostess saw its Twinkie sales rise 20 percent during the promotion, and in 2007 restored the banana-cream Twinkie to its snack lineup although they are now made with 2% banana purée."
4
+ When was Pinkfong founded?,"""Baby Shark"" is a children's song associated with a dance involving hand movements that originated as a campfire song dating back to at least the 20th century. In 2016, ""Baby Shark"" became very popular when Pinkfong, a South Korean entertainment company, released a version of the song with a YouTube music video that went viral across social media, online video, and radio. In January 2022, it became the first YouTube video to reach 10 billion views. In November 2020, Pinkfong's version became the most-viewed YouTube video of all time, with over 12 billion views as of April 2023. ""Baby Shark"" originated as a campfire song or chant. The original song dates back to at least the 20th century, potentially created by camp counselors inspired by the movie Jaws. In the chant, each member of a family of sharks is introduced, with campers using their hands to imitate the sharks' jaws. Different versions of the song have the sharks hunting fish, eating a sailor, or killing people who then go to heaven. Various entities have copyrighted original videos and sound recordings of the song, and some have trademarked merchandise based on their versions. However, according to The New York Times, the underlying song and characters are believed to be in the public domain."
lib/.DS_Store CHANGED
Binary files a/lib/.DS_Store and b/lib/.DS_Store differ
 
lib/.ipynb_checkpoints/utils-checkpoint.py CHANGED
@@ -189,6 +189,16 @@ def make_predictions(model,tokenizer,inputs,examples,
189
  return predicted_answers
190
 
191
  def get_examples():
 
 
 
 
 
 
 
 
 
 
192
  examples = pd.read_csv('examples.csv')
193
  questions = list(examples['question'])
194
  contexts = list(examples['context'])
 
189
  return predicted_answers
190
 
191
  def get_examples():
192
+ """
193
+ Retrieve pre-made examples from a .csv file
194
+ Parameters: None
195
+ -----------
196
+ Returns:
197
+ --------
198
+ questions, contexts : list, list
199
+ Lists of examples of corresponding question-context pairs
200
+
201
+ """
202
  examples = pd.read_csv('examples.csv')
203
  questions = list(examples['question'])
204
  contexts = list(examples['context'])
lib/__pycache__/utils.cpython-310.pyc CHANGED
Binary files a/lib/__pycache__/utils.cpython-310.pyc and b/lib/__pycache__/utils.cpython-310.pyc differ
 
lib/utils.py CHANGED
@@ -189,6 +189,16 @@ def make_predictions(model,tokenizer,inputs,examples,
189
  return predicted_answers
190
 
191
  def get_examples():
 
 
 
 
 
 
 
 
 
 
192
  examples = pd.read_csv('examples.csv')
193
  questions = list(examples['question'])
194
  contexts = list(examples['context'])
 
189
  return predicted_answers
190
 
191
  def get_examples():
192
+ """
193
+ Retrieve pre-made examples from a .csv file
194
+ Parameters: None
195
+ -----------
196
+ Returns:
197
+ --------
198
+ questions, contexts : list, list
199
+ Lists of examples of corresponding question-context pairs
200
+
201
+ """
202
  examples = pd.read_csv('examples.csv')
203
  questions = list(examples['question'])
204
  contexts = list(examples['context'])