|
This model is funetune version of Codebert in robert. On CodeSearchNet. |
|
### |
|
Quick start: |
|
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("addy88/programming-lang-identifier") |
|
|
|
model = AutoModelForSequenceClassification.from_pretrained("addy88/programming-lang-identifier") |
|
|
|
input_ids = tokenizer.encode(CODE_TO_IDENTIFY) |
|
logits = model(input_ids)[0] |
|
|
|
language_idx = logits.argmax() # index for the resulting label |
|
### |
|
|
|
|
|
CodeSearchNet citation |
|
Details |
|
@article{husain_codesearchnet_2019, |
|
title = {{CodeSearchNet} {Challenge}: {Evaluating} the {State} of {Semantic} {Code} {Search}}, |
|
shorttitle = {{CodeSearchNet} {Challenge}}, |
|
url = {http://arxiv.org/abs/1909.09436}, |
|
urldate = {2020-03-12}, |
|
journal = {arXiv:1909.09436 [cs, stat]}, |
|
author = {Husain, Hamel and Wu, Ho-Hsiang and Gazit, Tiferet and Allamanis, Miltiadis and Brockschmidt, Marc}, |
|
month = sep, |
|
year = {2019}, |
|
note = {arXiv: 1909.09436}, |
|
} |