urlbert
Collection
A collection of bert-based models for URL analysis
•
10 items
•
Updated
•
1
urlbert-tiny-base-v4 is a lightweight BERT-based model specifically optimized for URL analysis. This version includes several improvements over the previous version:
The result is an efficient model that can be rapidly fine-tuned for URL classification tasks with minimal computational resources.
from transformers import BertTokenizerFast, BertForMaskedLM, pipeline
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")
model_name = "CrabInHoney/urlbert-tiny-base-v4"
tokenizer = BertTokenizerFast.from_pretrained(model_name)
model = BertForMaskedLM.from_pretrained(model_name)
model.to(device)
fill_mask = pipeline(
"fill-mask",
model=model,
tokenizer=tokenizer,
device=0 if torch.cuda.is_available() else -1
)
sentences = [
"http://example.[MASK]/"
]
for sentence in sentences:
print(f"\nInput: {sentence}")
results = fill_mask(sentence)
for result in results:
token_str = result['token_str']
score = result['score']
print(f"Predicted token: {token_str}, probability: {score:.4f}")
Input: http://example.[MASK]/
Predicted token: com, probability: 0.7307
Predicted token: net, probability: 0.1319
Predicted token: org, probability: 0.0881
Predicted token: info, probability: 0.0094
Predicted token: cn, probability: 0.0084