tanveeshsingh commited on
Commit
524cf7c
1 Parent(s): 0fc916a

Refusal Changed

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -115,7 +115,11 @@ Now, please output the following as a JSON object:
115
  """
116
 
117
  template = Template(template_str)
118
- rendered_message = template.render(conversation_prefix=conv_prefix, response=response,scoring_criteria=[{"score":0,"description":unsafe_text},{"score":1,"description":safe_text}])
 
 
 
 
119
  client = openai.OpenAI(
120
  base_url=API_ENDPOINT,
121
  api_key=API_KEY
@@ -133,9 +137,9 @@ Now, please output the following as a JSON object:
133
  output_value = re.search(r'"output"\s*:\s*"(\d+)"', output)
134
  val = output_value.group(1) if output_value else None
135
  if int(val)==1:
136
- return 'safe'
137
  else:
138
- return 'unsafe'
139
 
140
  async def add_to_dataset(safe_text,unsafe_text,conv_prefix, response,llama_resp,collinear_resp):
141
  conv_prefix = json.loads(conv_prefix)
@@ -166,7 +170,10 @@ def process_inputs(category,safe_text,unsafe_text,conv_prefix, response_content)
166
  response = {"role": "assistant", "content": response_content}
167
  conv_prefix = json.loads(conv_prefix)
168
  output = classify_prompt(category,safe_text,unsafe_text,conv_prefix, response)
169
- llama_output = llama_guard_classify(conv_prefix, response)
 
 
 
170
  return output,llama_output
171
 
172
 
 
115
  """
116
 
117
  template = Template(template_str)
118
+ if category=='prompt' or category=='response':
119
+ scoring_criteria = [{"score":0,"description":unsafe_text},{"score":1,"description":safe_text}]
120
+ else:
121
+ scoring_criteria = [{"score":1,"description":unsafe_text},{"score":0,"description":safe_text}]
122
+ rendered_message = template.render(conversation_prefix=conv_prefix, response=response,scoring_criteria=scoring_criteria)
123
  client = openai.OpenAI(
124
  base_url=API_ENDPOINT,
125
  api_key=API_KEY
 
137
  output_value = re.search(r'"output"\s*:\s*"(\d+)"', output)
138
  val = output_value.group(1) if output_value else None
139
  if int(val)==1:
140
+ return 'refusal' if category=='refusal' else 'safe'
141
  else:
142
+ return 'non refusal' if category=='refusal' else 'unsafe'
143
 
144
  async def add_to_dataset(safe_text,unsafe_text,conv_prefix, response,llama_resp,collinear_resp):
145
  conv_prefix = json.loads(conv_prefix)
 
170
  response = {"role": "assistant", "content": response_content}
171
  conv_prefix = json.loads(conv_prefix)
172
  output = classify_prompt(category,safe_text,unsafe_text,conv_prefix, response)
173
+ if category=='response':
174
+ llama_output = llama_guard_classify(conv_prefix, response)
175
+ else:
176
+ llama_output = 'NA'
177
  return output,llama_output
178
 
179