INFERENTIA2 ONLY

from transformers import AutoTokenizer
from optimum.neuron import NeuronBertForQuestionAnswering

input_shapes = {"batch_size": 1, "sequence_length": 128}
compiler_args = {"auto_cast": "matmul", "auto_cast_type": "bf16"}
neuron_model = NeuronBertForQuestionAnswering.from_pretrained(
    "deepset/bert-base-cased-squad2",
    export=True,
    **input_shapes,
    **compiler_args,
)
# Save locally
neuron_model.save_pretrained("bert_base_cased_squad2_neuronx")
neuron_model.push_to_hub(
    "bert_base_cased_squad2_neuronx",
    repository_id="optimum/bert-base-cased-squad2-neuronx",  # Replace with your HF Hub repo id
)
Downloads last month
7
Inference Providers NEW
This model isn't deployed by any Inference Provider. ๐Ÿ™‹ Ask for provider support