Mengmeng Liu commited on
Commit
36c7297
·
1 Parent(s): b394215

initial build

Browse files
__pycache__/utils.cpython-310.pyc ADDED
Binary file (1.2 kB). View file
 
app.py CHANGED
@@ -1,11 +1,75 @@
1
  import gradio
 
 
 
 
 
 
 
 
2
 
3
- def my_inference_function(name):
4
- return "Hello " + name + "!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  gradio_interface = gradio.Interface(fn = my_inference_function,
7
  inputs = "text",
8
- outputs = "text"
 
 
9
  )
10
 
11
  gradio_interface.launch()
 
1
  import gradio
2
+ import os
3
+ import json
4
+ import torch
5
+ import numpy as np
6
+ from utils import ModelWrapper
7
+ from sklearn.metrics.pairwise import cosine_similarity
8
+ # load the models and all other utils functions
9
+ model_loader = ModelWrapper()
10
 
11
+ def my_inference_function(question):
12
+ question_embeddings = model_loader.get_embeddings(question, 0)
13
+
14
+ # not embed the documents for now
15
+ if 0:
16
+ files = os.listdir("./documents")
17
+ document_embeddings = {}
18
+ for file in files:
19
+ # open document
20
+ f = open("./documents/"+file,"r", encoding="utf-8")
21
+ f = f.read()
22
+
23
+ # get the embedding of the document
24
+ document_embeddings[file] = model_loader.get_embeddings(f, 1).tolist()
25
+
26
+ # save the embeddings of all the documents as vector database
27
+ with open("./vectors/embeddings.json","w") as outfile:
28
+ outfile.write(json.dumps(document_embeddings, indent=4))
29
+
30
+
31
+ # open the embeddings for documents
32
+ # will replace with vector database later on
33
+ embeddings_file = open("./vectors/embeddings.json","r")
34
+ document_embeddings = json.load(embeddings_file)
35
+
36
+ # linear search for the most relevant document
37
+ max_similarity = -1
38
+ most_relevant_document = None
39
+ for document in document_embeddings:
40
+ cur_similarity = cosine_similarity(question_embeddings, document_embeddings[document])
41
+ if cur_similarity > max_similarity:
42
+ most_relevant_document = document
43
+ max_similarity = cur_similarity
44
+
45
+ if max_similarity >= 0.35:
46
+ with open("./documents/"+most_relevant_document, "r", encoding="utf-8") as f:
47
+ f = f.read()
48
+ inputs = model_loader.tokenizer(question, f, return_tensors="pt")
49
+ with torch.no_grad():
50
+ outputs = model_loader.model_qa(**inputs)
51
+
52
+ answer_start_index = outputs.start_logits.argmax()
53
+ answer_end_index = outputs.end_logits.argmax()
54
+
55
+ predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
56
+ predict_answer = model_loader.tokenizer.decode(predict_answer_tokens, skip_special_tokens=True)
57
+
58
+ if predict_answer is None:
59
+ predict_answer = "I can't answer your question right now. I am evolving ..."
60
+
61
+ ret = {"answer":predict_answer, "most_relevant_document": most_relevant_document, "cosine_similarity": str(max_similarity)}
62
+ else:
63
+ ret = {"answer": "Sorry we can't find the relevant document", "most_relevant_document": "None", "cosine_similarity": str(-1)}
64
+
65
+
66
+ return ret
67
 
68
  gradio_interface = gradio.Interface(fn = my_inference_function,
69
  inputs = "text",
70
+ outputs = "json",
71
+ examples = ["Where did Robert Kauffman graduate?", "What's the position of Fred Danback?"],
72
+ title = "HRA Leadership QA Bot"
73
  )
74
 
75
  gradio_interface.launch()
documents/brian.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Brian Dalton is Vice President of Business Development for the TDC Group of companies (TDC Group) in New York State. Mr. Dalton leads business development efforts in New York and manages the broker distribution network for both the Northeast region and Healthcare Risk Advisors (HRA). He also engages with national brokers to develop opportunities for HRA self-insurance products and serves as the New York liaison between the various business units of TDC Group and common sales channels in New York, and across the country where opportunities may exist for New York.
2
+
3
+ Mr. Dalton began his career in the insurance industry with a New York admitted carrier. He began as an Assistant Director of Human Resources, before later being promoted to Director of Operations and culminating his time as Vice President for the Dental Business. In the latter role, Mr. Dalton oversaw all aspects of the business, during which time he doubled the business in policy count and premium, while improving the loss ratio by over 21 percent. He spent 13 years with this carrier prior to joining The Doctors Company in June 2017 as Assistant Vice President of Business Development.
4
+
5
+ Mr. Dalton earned his Bachelor of Arts degree in urban planning and education from Queens College. He went on to earn his Master of Science degree in school counseling from St. John’s University. In addition, Mr. Dalton earned a professional certificate in human resources development from Cornell University’s School of Industrial and Labor Relations.
documents/david.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ David L. Feldman, MD, MBA, CPE, FAAPL, FACS, is an experienced healthcare leader, serving for many years as chief medical officer (CMO) of the HIC/FOJP hospitals in New York City. In addition to serving as chief medical officer of The Doctors Company and TDC Group, he is senior vice president and chief medical officer at HRA, a TDC Group company. Under Dr. Feldman’s leadership, HRA provides resources and a collaborative environment designed to minimize claims and lower premiums for HRA clients by preventing patient harm, enhancing teamwork and communication, and improving documentation.Prior to his position at HRA, Dr. Feldman was vice president for patient safety, vice president of perioperative services, and vice chairman of the department of surgery at Maimonides Medical Center in Brooklyn, New York. He implemented numerous patient safety initiatives including the use of the World Health Organization (WHO) Surgical Safety Checklist. As past president of the Maimonides medical staff, Dr. Feldman was instrumental in the creation and implementation of a hospital-wide Code of Mutual Respect, and physician peer review committee.Dr. Feldman currently serves on the steering committee of the American College of Surgeons (ACS) for retraining and retooling of practicing surgeons. He served on the ACS committee on perioperative care and as vice chairman of the ACS collaborative task force for the development of high-performance teams in surgery. He also served as the ACS liaison to the Association of periOperative Registered Nurses recommended practices committee.Dr. Feldman is a master TeamSTEPPSTM trainer and a certified trainer in Crucial Conversations® and Crucial Confrontations®. He received a Bachelor of Arts degree and Doctor of Medicine degree from Duke University, completed training in general surgery at the Roosevelt Hospital (now Mount Sinai West), and plastic surgery at Duke University Medical Center. He earned a Master of Business Administration degree from New York University.
documents/fred.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Fred Danback is senior vice president, chief information officer, and chief information security officer, responsible for HRA’s information technology strategy and operations. Mr. Danback leads the machine learning and artificial intelligence strategy and execution for HRA with a focus on creating industry leading insights to reduce medical malpractice liability and improving patient care.
2
+ Mr. Danback has over 30 years of experience in the insurance industry. Before joining HRA, Mr. Danback was senior vice president and chief information officer of Tokio Marine Management, the United States operation for Tokio Marine Nichido Fire, a global property and casualty carrier. Earlier, he was managing principal and chief information officer for Integro Insurance Brokers, and served as the head of global technology architecture with XL Capital and NAC Reinsurance.
3
+ Mr. Danback received a Bachelor of Business Administration degree magna cum laude in international management from Pace University. Mr. Danback is a certified information systems security professional having received his CISSP designation.
documents/marc.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Marc Lanzkowsky, JD, is HRA’s vice president of claims operations. He oversees claims administration, litigation management, investigations, and claims coding teams. Mr. Lanzkowsky helps drive the long-term strategic vision in support of the professional and liability claims professionals, and also helps to innovate claims data, and technology to lead our organization to claims operational excellence.
2
+
3
+ After more than 25 years in the insurance industry, Mr. Lanzkowsky joined the company in 2021. His career began serving as a litigator in defending Long Island doctors and hospitals.  His pivot to insurance started with medical malpractice claims at Zurich North America where he held the positions of medical malpractice claims examiner, northeast regional claims manager for healthcare, and the director of operational innovation for the specialty claims group.
4
+
5
+ Mr. Lanzkowsky served as senior vice president of home office claims for Arch Insurance where, from its inception, he helped build the claims operations including the development of innovative claims technology, litigation management protocols, and claims administration.
6
+
7
+ Following his time at Arch Insurance, Mr. Lanzkowsky started his own claims consultancy. Next, he was managing director in the global insurance services group at FTI Consulting, Inc., a forensic and litigation consulting practice. He focused on assisting his clients to innovate and improve their claims organizations.
8
+
9
+ His most recent role was executive director of operations and administration for a small brokerage where he modernized their infrastructure, overseeing customer service, compliance, business continuity, information technology, and administration.
10
+
11
+ Mr. Lanzkowsky holds a Bachelor of Arts degree from New York University and a Juris Doctor degree from Pace University School of Law.
documents/melissa.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Melissa Johnson currently serves as HRA's Vice President of Finance and Controller. She has held many previous positions within the HIC and FOJP finance departments since joining the program in 2005. She brings to the company over 20 years of experience in finance and accounting.
2
+
3
+ Her experience includes statutory financial reporting, audit coordination, and internal controls with a primary focus on medical malpractice insurance.
4
+
5
+ Ms. Johnson holds a Bachelor of Science degree in commerce with concentrations in finance, accounting, and economics from the University of Virginia. She is the Treasurer of Pets Alive, a nonprofit animal rescue based in New York. She also serves as Treasurer of the NYC Parks Mounted Auxiliary Unit.
documents/noeleen.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Noeleen Doelger has more than 30 years of financial services experience and is the chief operating and financial officer of HRA. Ms. Doelger previously served as senior vice president and chief financial officer of HIC and FOJP and directed the finance, audit, treasury, information technology, and human resources functions.Prior to her roles at HIC and FOJP, Ms. Doelger was a managing director in the global insurance services group of the forensic and litigation consulting practice at FTI Consulting, Inc., a global business advisory firm, and, before that, at Veris Consulting, Inc. Before joining Veris, she was a partner in the management advisory services practice at KPMG, where she began her career.Ms. Doelger’s career has been focused on serving the property-casualty insurance, life and health insurance and reinsurance industries. Her experience includes generally accepted accounting principles (GAAP) and statutory financial reporting, auditing, internal controls assessment, forensic accounting, fraud investigations, and litigation consulting.She graduated from St. Peter’s College with a Bachelor of Science degree in accounting. She is a Certified Public Accountant (CPA) and is Certified in Financial Forensics (CFF).
documents/peter.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Peter A. Kolbert, JD, is HRA’s senior vice president for claim and litigation services, which manages and oversees the defense aspects of all professional and general liability claims covered by the company. Mr. Kolbert is also the enterprise COVID-19 coordinator responsible for overseeing and coordinating the COVID-19 claims across all the TDC Group strategic business units. He works with HRA’s risk management department helping clients mitigate and prevent losses. Mr. Kolbert also works directly with clients on legal and educational projects to improve risk prevention.
2
+
3
+ Mr. Kolbert joined the company in 2010 after gaining more than 20 years experience defending medical malpractice cases in state and federal courts involving a wide array of medical specialties. As a partner with Wilson Elser, he managed a large volume of medical malpractice litigation from pretrial to trial, while overseeing a team of attorneys in one of the firm’s medical malpractice groups. In concert with his practice, Mr. Kolbert has lectured and published articles on medical malpractice–related topics ranging from informed consent to immunities applicable to COVID-19–related litigation, and from best charting practices to principles of legal risk management.
4
+
5
+ Prior to going into private practice, Mr. Kolbert was an assistant corporation counsel representing New York City and its many agencies including the New York Health and Hospitals Corporation, the NYPD, the NYFD, and the New York Board of Education in trying cases to verdict throughout New York City.
6
+
7
+ Mr. Kolbert has a Bachelor of Arts degree from the University of Massachusetts and a Juris Doctor degree from Brooklyn Law School. He is a visiting assistant professor for the Department of Medicine at Albert Einstein College of Medicine, Montefiore Medical Center. He also holds a New York State independent insurance adjuster license.
documents/rich_caldwell.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Rich Caldwell currently serves as HRA's Vice President of Finance and Underwriting. He brings to the company 15 years of experience in the property-casualty insurance industry with a primary focus on medical malpractice.
2
+
3
+ Prior to joining Health Risk Advisors in 2016, Mr. Caldwell was a senior director in the global insurance services group practice at FTI Consulting, Inc., a global business advisory firm.  However, his career began at Veris Consulting, Inc.
4
+
5
+ Throughout his career, Mr. Caldwell’s service has focused on property-casualty and life insurance industries. His experience includes healthcare systems underwriting and pricing, financial modeling, statutory financial reporting, forensic accounting, litigation consulting, and internal auditing.
6
+
7
+ Mr. Caldwell holds a Bachelor of Business Administration degree in accounting, and a Master of Accounting from the College of William and Mary, Mason School of Business. He is a Certified Public Accountant (CPA) serving on the audit committee of the Visiting Nurse Association Health Group, a non-profit home healthcare provider based in New Jersey.
documents/robert.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Robert A. Kauffman is president of Healthcare Risk Advisors (HRA), leading the expansion of the company’s self-insurance and risk transfer solutions for large medical practices, hospitals, and health systems. Rob previously served as senior vice president, secretary, and general counsel of FOJP Service Corporation (“FOJP”) and Hospitals Insurance Company (“HIC”).
2
+ Rob has built a distinguished career in insurance and risk management. Prior to his roles at FOJP and HIC, he was senior vice president, secretary, general counsel, and chief compliance officer at Harleysville Insurance. He was also a partner at Reed Smith, an international law firm specializing in complex litigation, strategic transactions, and regulatory matters.
3
+ In addition to his private sector experience, Rob served with distinction as an Assistant U.S. Attorney in the Criminal Division of the United States Attorney’s Office for the Eastern District of Pennsylvania.
4
+ Mr. Kauffman earned his Bachelor of Arts and Juris Doctor degrees from the University of Pennsylvania.
documents/ross.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Ross Hess HRA’s Vice President of Underwriting and Physician Services.  Prior to joining our company, Ross was President of Hess Consulting, LLC, where he provided consulting services to Med-Lantic Management Services, Inc.
2
+
3
+ Mr. Hess previously served as regional vice president of underwriting for ProAssurance, leading physician underwriting efforts across the Mid-Atlantic, New England, and New York.  His insurance career began at SCPIE, which is now part of The Doctors Company.
4
+
5
+ Mr. Hess is a licensed insurance producer and also holds the Chartered Property Casualty Underwriter (CPCU), and Registered Professional Liability Underwriter (RPLU) designations. 
6
+
7
+ He holds a Bachelor of Arts degree from Georgetown University, and a Master of Arts degree from Syracuse University.
documents/veronique.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Veronique Grenon is HRA’s Vice President of Data Analytics. She oversees a team of data scientists and statisticians who work on transforming data into actionable insights, using analytics to solve complex problems, and using technology to innovate the risk management field.
2
+
3
+ Ms. Grenon joins Health Risk Advisors with over 20 years of experience in analytics and actuarial analysis.  Prior to joining the company, she was managing director of global InsurTech research at Guy Carpenter, and a vice president at Stanford Hospital and Clinics, leading the Stanford Risk Management Analytics Team.
4
+
5
+ Ms. Grenon is a Fellow of the Casualty Actuarial Society and holds a Bachelor of Science in Actuarial Mathematics from Concordia University, Montréal, Canada.
models/deepset/tinyroberta-squad/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "deepset/tinyroberta-squad2",
3
+ "architectures": [
4
+ "RobertaForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "language": "english",
17
+ "layer_norm_eps": 1e-05,
18
+ "max_position_embeddings": 514,
19
+ "model_type": "roberta",
20
+ "name": "Roberta",
21
+ "num_attention_heads": 12,
22
+ "num_hidden_layers": 6,
23
+ "pad_token_id": 1,
24
+ "position_embedding_type": "absolute",
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.30.2",
27
+ "type_vocab_size": 1,
28
+ "use_cache": true,
29
+ "vocab_size": 50265
30
+ }
models/deepset/tinyroberta-squad/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/deepset/tinyroberta-squad/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3216bcdc78b3c899a482179b996f48da35fee3a654aa55422597315e84f180f3
3
+ size 326155437
models/deepset/tinyroberta-squad/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
models/deepset/tinyroberta-squad/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/deepset/tinyroberta-squad/tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "cls_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "errors": "replace",
8
+ "mask_token": "<mask>",
9
+ "model_max_length": 512,
10
+ "pad_token": "<pad>",
11
+ "sep_token": "</s>",
12
+ "tokenizer_class": "RobertaTokenizer",
13
+ "trim_offsets": true,
14
+ "unk_token": "<unk>"
15
+ }
models/deepset/tinyroberta-squad/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:569ee3bdcc22004d6ba63b65a3d195d9f3033a90b386bae47e1edbf619acf483
3
+ size 3899
models/deepset/tinyroberta-squad/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ numpy
4
+ pandas
5
+ sentence_transformers
utils.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Load model directly
2
+ from transformers import AutoTokenizer, AutoModelForQuestionAnswering
3
+ from sentence_transformers import SentenceTransformer
4
+ from transformers import Trainer
5
+ import torch
6
+ import torch.nn.functional as F
7
+
8
+ class ModelWrapper():
9
+ def __init__(self, location = "./models/deepset/tinyroberta-squad"):
10
+ self.model_location = location
11
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_location)
12
+ self.model_qa = AutoModelForQuestionAnswering.from_pretrained(self.model_location)
13
+ self.embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
14
+
15
+ def get_embeddings(self, text, isDocument):
16
+ if isDocument:
17
+ text = text.split(".")
18
+ embeddings = self.embedding_model.encode(text)
19
+
20
+ if isDocument:
21
+ embeddings = sum(embeddings).reshape(1,-1)
22
+ else:
23
+ embeddings = embeddings.reshape(1,-1)
24
+
25
+ return embeddings
vectors/embeddings.json ADDED
The diff for this file is too large to render. See raw diff