ehdwns1516's picture
Update README.md
41f1703

ehdwns1516/bert-base-uncased_SWAG

Overview

Language model: bert-base-uncased

Language: English

Training data: SWAG dataset

Code: See Ainize Workspace

Usage

In Transformers

from transformers import AutoTokenizer, AutoModelForMultipleChoice
  
tokenizer = AutoTokenizer.from_pretrained("ehdwns1516/bert-base-uncased_SWAG")

model = AutoModelForMultipleChoice.from_pretrained("ehdwns1516/bert-base-uncased_SWAG")

def run_model(candicates_count, context: str, candicates: list[str]):
    assert len(candicates) == candicates_count, "you need " + candicates_count + " candidates"
    choices_inputs = []
    for c in candicates:
        text_a = ""  # empty context
        text_b = context + " " + c
        inputs = tokenizer(
            text_a,
            text_b,
            add_special_tokens=True,
            max_length=128,
            padding="max_length",
            truncation=True,
            return_overflowing_tokens=True,
        )
        choices_inputs.append(inputs)

    input_ids = torch.LongTensor([x["input_ids"] for x in choices_inputs])
    output = model(input_ids=input_ids)

    return {"result": candicates[torch.argmax(output.logits).item()]}

items = list()
count = 4 # candicates count
context = "your context"
for i in range(int(count)):
    items.append("sentence")

result = run_model(count, context, items)