Spaces:
Paused
Paused
initial commit
Browse files- .gitignore +164 -0
- LICENSE.md +28 -0
- README-doc.md +85 -0
- app.py +5 -0
- assets/bino-logo.svg +189 -0
- assets/binoculars.jpg +0 -0
- binoculars/__init__.py +4 -0
- binoculars/detector.py +84 -0
- binoculars/metrics.py +57 -0
- binoculars/utils.py +10 -0
- config.py +6 -0
- demo/demo.py +111 -0
- main.py +14 -0
- requirements.txt +5 -0
- setup.py +15 -0
.gitignore
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# pdm
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
106 |
+
#pdm.lock
|
107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
+
# in version control.
|
109 |
+
# https://pdm.fming.dev/#use-with-ide
|
110 |
+
.pdm.toml
|
111 |
+
|
112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
113 |
+
__pypackages__/
|
114 |
+
|
115 |
+
# Celery stuff
|
116 |
+
celerybeat-schedule
|
117 |
+
celerybeat.pid
|
118 |
+
|
119 |
+
# SageMath parsed files
|
120 |
+
*.sage.py
|
121 |
+
|
122 |
+
# Environments
|
123 |
+
.env
|
124 |
+
.venv
|
125 |
+
env/
|
126 |
+
venv/
|
127 |
+
ENV/
|
128 |
+
env.bak/
|
129 |
+
venv.bak/
|
130 |
+
|
131 |
+
# Spyder project settings
|
132 |
+
.spyderproject
|
133 |
+
.spyproject
|
134 |
+
|
135 |
+
# Rope project settings
|
136 |
+
.ropeproject
|
137 |
+
|
138 |
+
# mkdocs documentation
|
139 |
+
/site
|
140 |
+
|
141 |
+
# mypy
|
142 |
+
.mypy_cache/
|
143 |
+
.dmypy.json
|
144 |
+
dmypy.json
|
145 |
+
|
146 |
+
# Pyre type checker
|
147 |
+
.pyre/
|
148 |
+
|
149 |
+
# pytype static type analyzer
|
150 |
+
.pytype/
|
151 |
+
|
152 |
+
# Cython debug symbols
|
153 |
+
cython_debug/
|
154 |
+
|
155 |
+
# PyCharm
|
156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
157 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
#.idea/
|
161 |
+
|
162 |
+
samples/
|
163 |
+
|
164 |
+
**.*ipynb
|
LICENSE.md
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BSD 3-Clause License
|
2 |
+
|
3 |
+
Copyright (c) 2023, Abhimanyu Hans, Avi Schwarzschild, Tom Goldstein
|
4 |
+
|
5 |
+
Redistribution and use in source and binary forms, with or without
|
6 |
+
modification, are permitted provided that the following conditions are met:
|
7 |
+
|
8 |
+
1. Redistributions of source code must retain the above copyright notice, this
|
9 |
+
list of conditions and the following disclaimer.
|
10 |
+
|
11 |
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
12 |
+
this list of conditions and the following disclaimer in the documentation
|
13 |
+
and/or other materials provided with the distribution.
|
14 |
+
|
15 |
+
3. Neither the name of the copyright holder nor the names of its
|
16 |
+
contributors may be used to endorse or promote products derived from
|
17 |
+
this software without specific prior written permission.
|
18 |
+
|
19 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
20 |
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
21 |
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
22 |
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
23 |
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
24 |
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
25 |
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
26 |
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
27 |
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
README-doc.md
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# <img src="./assets/bino-logo.svg" width=40 style="padding-top: 0px"/> Binoculars: Zero-Shot Detection of LLM-Generated Text [[paper]](https://arxiv.org/abs/2401.12070)
|
2 |
+
|
3 |
+
<p align="center">
|
4 |
+
<img src="assets/binoculars.jpg" width="300" height="300" alt="ool Binoculars with Falcon on Top">
|
5 |
+
</p>
|
6 |
+
|
7 |
+
We introduce Binoculars, a state-of-the-art method for detecting AI-generated text. Binoculars is a
|
8 |
+
zero-shot and domain-agnostic (requires no training data) method. It is based on a simple idea: most
|
9 |
+
decoder-only, causal language models have a huge overlap in pretraining datasets, for e.g. Common Crawl, Pile, etc.
|
10 |
+
More details about the method and results can be found in our paper **Spotting LLMs with Binoculars: Zero-Shot
|
11 |
+
Detection of Machine-Generated Text**.
|
12 |
+
|
13 |
+
## Getting Started
|
14 |
+
|
15 |
+
### Installation
|
16 |
+
|
17 |
+
To run the implementation of Binoculars, you can clone this repository and install the package using pip. This code was
|
18 |
+
developed and tested on Python This code was developed and tested with Python 3.9. To install the package, run the
|
19 |
+
following commands:
|
20 |
+
|
21 |
+
```bash
|
22 |
+
$ git clone https://github.com/ahans30/Binoculars.git
|
23 |
+
$ cd Binoculars
|
24 |
+
$ pip install -e .
|
25 |
+
```
|
26 |
+
|
27 |
+
### Usage
|
28 |
+
|
29 |
+
Please note, this implementation comes with a fixed global threshold that is used to classify the input as AI-generated
|
30 |
+
or not. This threshold is selected using _Falcon-7B_ and _Falcon-7B-Instruct_ models for scoring. If you want to
|
31 |
+
use different scoring models, you can pass it as an argument to the `Binoculars` class. Please read the paper for more
|
32 |
+
details about the Binoculars work.
|
33 |
+
|
34 |
+
To detect AI-generated text, please use the following code snippet:
|
35 |
+
|
36 |
+
```python
|
37 |
+
from binoculars import Binoculars
|
38 |
+
|
39 |
+
bino = Binoculars()
|
40 |
+
|
41 |
+
# ChatGPT (GPT-4) output when prompted with “Can you write a few sentences about a capybara that is an astrophysicist?"
|
42 |
+
sample_string = '''Dr. Capy Cosmos, a capybara unlike any other, astounded the scientific community with his
|
43 |
+
groundbreaking research in astrophysics. With his keen sense of observation and unparalleled ability to interpret
|
44 |
+
cosmic data, he uncovered new insights into the mysteries of black holes and the origins of the universe. As he
|
45 |
+
peered through telescopes with his large, round eyes, fellow researchers often remarked that it seemed as if the
|
46 |
+
stars themselves whispered their secrets directly to him. Dr. Cosmos not only became a beacon of inspiration to
|
47 |
+
aspiring scientists but also proved that intellect and innovation can be found in the most unexpected of creatures.'''
|
48 |
+
|
49 |
+
print(bino.compute_score(sample_string)) # 0.75661373
|
50 |
+
print(bino.predict(sample_string)) # 'AI-Generated'
|
51 |
+
```
|
52 |
+
|
53 |
+
In the above code, user can also pass a `list` of `str` to `compute_score` and `predict` methods to get results for
|
54 |
+
the entire batch of samples.
|
55 |
+
|
56 |
+
### Demo
|
57 |
+
|
58 |
+
We have also made a demo available to predict AI-generated text interactively with a simple UI
|
59 |
+
using [gradio](https://github.com/gradio-app/gradio). You can run the demo using the following command:
|
60 |
+
|
61 |
+
```bash
|
62 |
+
$ python app.py
|
63 |
+
```
|
64 |
+
|
65 |
+
## Limitations
|
66 |
+
|
67 |
+
All AI-generated text detectors aim for accuracy, but none are perfect and can have multiple failure modes (e.g.,
|
68 |
+
Binoculars is more proficient in detecting English language text compared to other languages). This implementation is
|
69 |
+
for academic purposes only and should not be considered as a consumer product. We also strongly caution against using
|
70 |
+
Binoculars (or any detector) without human supervision.
|
71 |
+
|
72 |
+
## Cite our work
|
73 |
+
|
74 |
+
If you find this work useful, please cite our paper:
|
75 |
+
|
76 |
+
```bibtex
|
77 |
+
@misc{hans2024spotting,
|
78 |
+
title={Spotting LLMs With Binoculars: Zero-Shot Detection of Machine-Generated Text},
|
79 |
+
author={Abhimanyu Hans and Avi Schwarzschild and Valeriia Cherepanova and Hamid Kazemi and Aniruddha Saha and Micah Goldblum and Jonas Geiping and Tom Goldstein},
|
80 |
+
year={2024},
|
81 |
+
eprint={2401.12070},
|
82 |
+
archivePrefix={arXiv},
|
83 |
+
primaryClass={cs.CL}
|
84 |
+
}
|
85 |
+
```
|
app.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from demo.demo import app
|
2 |
+
|
3 |
+
if __name__ == "__main__":
|
4 |
+
# Launch the Gradio interface
|
5 |
+
app.launch(show_api=False, debug=True, share=True)
|
assets/bino-logo.svg
ADDED
assets/binoculars.jpg
ADDED
binoculars/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from config import huggingface_config
|
2 |
+
from .detector import Binoculars
|
3 |
+
|
4 |
+
__all__ = ["Binoculars"]
|
binoculars/detector.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Union
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import torch
|
5 |
+
import transformers
|
6 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
7 |
+
|
8 |
+
from config import huggingface_config
|
9 |
+
from .utils import assert_tokenizer_consistency
|
10 |
+
from .metrics import perplexity, entropy
|
11 |
+
|
12 |
+
torch.set_grad_enabled(False)
|
13 |
+
|
14 |
+
GLOBAL_BINOCULARS_THRESHOLD = 0.9015310749276843 # selected using Falcon-7B and Falcon-7B-Instruct at bfloat16
|
15 |
+
DEVICE_1 = "cuda:0" if torch.cuda.is_available() else "cpu"
|
16 |
+
DEVICE_2 = "cuda:1" if torch.cuda.device_count() > 1 else DEVICE_1
|
17 |
+
|
18 |
+
|
19 |
+
class Binoculars(object):
|
20 |
+
def __init__(self,
|
21 |
+
observer_name_or_path: str = "tiiuae/falcon-7b",
|
22 |
+
performer_name_or_path: str = "tiiuae/falcon-7b-instruct",
|
23 |
+
use_bfloat16: bool = True,
|
24 |
+
max_token_observed: int = 512,
|
25 |
+
) -> None:
|
26 |
+
assert_tokenizer_consistency(observer_name_or_path, performer_name_or_path)
|
27 |
+
|
28 |
+
self.observer_model = AutoModelForCausalLM.from_pretrained(observer_name_or_path,
|
29 |
+
device_map={"": DEVICE_1},
|
30 |
+
trust_remote_code=True,
|
31 |
+
torch_dtype=torch.bfloat16 if use_bfloat16
|
32 |
+
else torch.float32,
|
33 |
+
token=huggingface_config["TOKEN"]
|
34 |
+
)
|
35 |
+
self.performer_model = AutoModelForCausalLM.from_pretrained(performer_name_or_path,
|
36 |
+
device_map={"": DEVICE_2},
|
37 |
+
trust_remote_code=True,
|
38 |
+
torch_dtype=torch.bfloat16 if use_bfloat16
|
39 |
+
else torch.float32,
|
40 |
+
token=huggingface_config["TOKEN"]
|
41 |
+
)
|
42 |
+
|
43 |
+
self.observer_model.eval()
|
44 |
+
self.performer_model.eval()
|
45 |
+
|
46 |
+
self.tokenizer = AutoTokenizer.from_pretrained(observer_name_or_path)
|
47 |
+
if not self.tokenizer.pad_token:
|
48 |
+
self.tokenizer.pad_token = self.tokenizer.eos_token
|
49 |
+
|
50 |
+
self.max_token_observed = max_token_observed
|
51 |
+
|
52 |
+
def _tokenize(self, batch: list[str]) -> transformers.BatchEncoding:
|
53 |
+
batch_size = len(batch)
|
54 |
+
encodings = self.tokenizer(
|
55 |
+
batch,
|
56 |
+
return_tensors="pt",
|
57 |
+
padding="longest" if batch_size > 1 else False,
|
58 |
+
truncation=True,
|
59 |
+
max_length=self.max_token_observed,
|
60 |
+
return_token_type_ids=False).to(self.observer_model.device)
|
61 |
+
return encodings
|
62 |
+
|
63 |
+
@torch.inference_mode()
|
64 |
+
def _get_logits(self, encodings: transformers.BatchEncoding) -> torch.Tensor:
|
65 |
+
observer_logits = self.observer_model(**encodings.to(DEVICE_1)).logits
|
66 |
+
performer_logits = self.performer_model(**encodings.to(DEVICE_2)).logits
|
67 |
+
torch.cuda.synchronize()
|
68 |
+
return observer_logits, performer_logits
|
69 |
+
|
70 |
+
def compute_score(self, input_text: Union[list[str], str]) -> Union[float, list[float]]:
|
71 |
+
batch = [input_text] if isinstance(input_text, str) else input_text
|
72 |
+
encodings = self._tokenize(batch)
|
73 |
+
observer_logits, performer_logits = self._get_logits(encodings)
|
74 |
+
ppl = perplexity(encodings, performer_logits)
|
75 |
+
x_ppl = entropy(observer_logits.to(DEVICE_1), performer_logits.to(DEVICE_1),
|
76 |
+
encodings.to(DEVICE_1), self.tokenizer.pad_token_id)
|
77 |
+
binoculars_scores = ppl / x_ppl
|
78 |
+
binoculars_scores = binoculars_scores.tolist()
|
79 |
+
return binoculars_scores[0] if isinstance(input_text, str) else binoculars_scores
|
80 |
+
|
81 |
+
def predict(self, input_text: Union[list[str], str]) -> Union[list[str], str]:
|
82 |
+
binoculars_scores = np.array(self.compute_score(input_text))
|
83 |
+
pred = np.where(binoculars_scores < GLOBAL_BINOCULARS_THRESHOLD, "AI-Generated", "Human-Generated").tolist()
|
84 |
+
return pred
|
binoculars/metrics.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import transformers
|
4 |
+
|
5 |
+
ce_loss_fn = torch.nn.CrossEntropyLoss(reduction="none")
|
6 |
+
softmax_fn = torch.nn.Softmax(dim=-1)
|
7 |
+
|
8 |
+
|
9 |
+
def perplexity(encoding: transformers.BatchEncoding,
|
10 |
+
logits: torch.Tensor,
|
11 |
+
median: bool = False,
|
12 |
+
temperature: float = 1.0):
|
13 |
+
shifted_logits = logits[..., :-1, :].contiguous() / temperature
|
14 |
+
shifted_labels = encoding.input_ids[..., 1:].contiguous()
|
15 |
+
shifted_attention_mask = encoding.attention_mask[..., 1:].contiguous()
|
16 |
+
|
17 |
+
if median:
|
18 |
+
ce_nan = (ce_loss_fn(shifted_logits.transpose(1, 2), shifted_labels).
|
19 |
+
masked_fill(~shifted_attention_mask.bool(), float("nan")))
|
20 |
+
ppl = np.nanmedian(ce_nan.cpu().float().numpy(), 1)
|
21 |
+
|
22 |
+
else:
|
23 |
+
ppl = (ce_loss_fn(shifted_logits.transpose(1, 2), shifted_labels) *
|
24 |
+
shifted_attention_mask).sum(1) / shifted_attention_mask.sum(1)
|
25 |
+
ppl = ppl.to("cpu").float().numpy()
|
26 |
+
|
27 |
+
return ppl
|
28 |
+
|
29 |
+
|
30 |
+
def entropy(p_logits: torch.Tensor,
|
31 |
+
q_logits: torch.Tensor,
|
32 |
+
encoding: transformers.BatchEncoding,
|
33 |
+
pad_token_id: int,
|
34 |
+
median: bool = False,
|
35 |
+
sample_p: bool = False,
|
36 |
+
temperature: float = 1.0):
|
37 |
+
vocab_size = p_logits.shape[-1]
|
38 |
+
total_tokens_available = q_logits.shape[-2]
|
39 |
+
p_scores, q_scores = p_logits / temperature, q_logits / temperature
|
40 |
+
|
41 |
+
p_proba = softmax_fn(p_scores).view(-1, vocab_size)
|
42 |
+
|
43 |
+
if sample_p:
|
44 |
+
p_proba = torch.multinomial(p_proba.view(-1, vocab_size), replacement=True, num_samples=1).view(-1)
|
45 |
+
|
46 |
+
q_scores = q_scores.view(-1, vocab_size)
|
47 |
+
|
48 |
+
ce = ce_loss_fn(input=q_scores, target=p_proba).view(-1, total_tokens_available)
|
49 |
+
padding_mask = (encoding.input_ids != pad_token_id).type(torch.uint8)
|
50 |
+
|
51 |
+
if median:
|
52 |
+
ce_nan = ce.masked_fill(~padding_mask.bool(), float("nan"))
|
53 |
+
agg_ce = np.nanmedian(ce_nan.cpu().float().numpy(), 1)
|
54 |
+
else:
|
55 |
+
agg_ce = (((ce * padding_mask).sum(1) / padding_mask.sum(1)).to("cpu").float().numpy())
|
56 |
+
|
57 |
+
return agg_ce
|
binoculars/utils.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer
|
2 |
+
|
3 |
+
|
4 |
+
def assert_tokenizer_consistency(model_id_1, model_id_2):
|
5 |
+
identical_tokenizers = (
|
6 |
+
AutoTokenizer.from_pretrained(model_id_1).vocab
|
7 |
+
== AutoTokenizer.from_pretrained(model_id_2).vocab
|
8 |
+
)
|
9 |
+
if not identical_tokenizers:
|
10 |
+
raise ValueError(f"Tokenizers are not identical for {model_id_1.name_of_path} and {model_id_2.name_of_path}.")
|
config.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
huggingface_config = {
|
4 |
+
# Only required for private models from Huggingface (e.g. LLaMA models)
|
5 |
+
"TOKEN": os.environ.get("HF_TOKEN", None)
|
6 |
+
}
|
demo/demo.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__all__ = ["app"]
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
from binoculars import Binoculars
|
5 |
+
|
6 |
+
BINO = Binoculars()
|
7 |
+
TOKENIZER = BINO.tokenizer
|
8 |
+
MINIMUM_TOKENS = 64
|
9 |
+
|
10 |
+
|
11 |
+
def count_tokens(text):
|
12 |
+
return len(TOKENIZER(text).input_ids)
|
13 |
+
|
14 |
+
|
15 |
+
def run_detector(input_str):
|
16 |
+
if count_tokens(input_str) < MINIMUM_TOKENS:
|
17 |
+
gr.Warning(f"Too short length. Need minimum {MINIMUM_TOKENS} tokens to run Binoculars.")
|
18 |
+
return ""
|
19 |
+
return f"{BINO.predict(input_str)}"
|
20 |
+
|
21 |
+
|
22 |
+
# def load_set(progress=gr.Progress()):
|
23 |
+
# tokens = [None] * 24
|
24 |
+
# for count in progress.tqdm(tokens, desc="Counting Tokens..."):
|
25 |
+
# time.sleep(0.01)
|
26 |
+
# return ["Loaded"] * 2
|
27 |
+
|
28 |
+
|
29 |
+
css = """
|
30 |
+
.green { color: black!important;line-height:1.9em; padding: 0.2em 0.2em; background: #ccffcc; border-radius:0.5rem;}
|
31 |
+
.red { color: black!important;line-height:1.9em; padding: 0.2em 0.2em; background: #ffad99; border-radius:0.5rem;}
|
32 |
+
.hyperlinks {
|
33 |
+
display: flex;
|
34 |
+
align-items: center;
|
35 |
+
align-content: center;
|
36 |
+
padding-top: 12px;
|
37 |
+
justify-content: flex-end;
|
38 |
+
margin: 0 10px; /* Adjust the margin as needed */
|
39 |
+
text-decoration: none;
|
40 |
+
color: #000; /* Set the desired text color */
|
41 |
+
}
|
42 |
+
"""
|
43 |
+
|
44 |
+
capybara_problem = '''Dr. Capy Cosmos, a capybara unlike any other, astounded the scientific community with his groundbreaking research in astrophysics. With his keen sense of observation and unparalleled ability to interpret cosmic data, he uncovered new insights into the mysteries of black holes and the origins of the universe. As he peered through telescopes with his large, round eyes, fellow researchers often remarked that it seemed as if the stars themselves whispered their secrets directly to him. Dr. Cosmos not only became a beacon of inspiration to aspiring scientists but also proved that intellect and innovation can be found in the most unexpected of creatures.'''
|
45 |
+
|
46 |
+
with gr.Blocks(css=css,
|
47 |
+
theme=gr.themes.Default(font=[gr.themes.GoogleFont("Inconsolata"), "Arial", "sans-serif"])) as app:
|
48 |
+
with gr.Row():
|
49 |
+
with gr.Column(scale=3):
|
50 |
+
gr.HTML("<p><h1> binoculars: zero-shot llm-text detector</h1>")
|
51 |
+
with gr.Column(scale=1):
|
52 |
+
gr.HTML("""
|
53 |
+
<p>
|
54 |
+
<a href="https://arxiv.org/abs/2401.12070" target="_blank">paper</a>
|
55 |
+
|
56 |
+
<a href="https://github.com/AHans30/Binoculars" target="_blank">code</a>
|
57 |
+
|
58 |
+
<a href="mailto:[email protected]" target="_blank">contact</a>
|
59 |
+
""", elem_classes="hyperlinks")
|
60 |
+
with gr.Row():
|
61 |
+
input_box = gr.Textbox(value=capybara_problem, placeholder="Enter text here", lines=8, label="Input Text", )
|
62 |
+
with gr.Row():
|
63 |
+
clear_button = gr.ClearButton()
|
64 |
+
submit_button = gr.Button("Run Binoculars", variant="primary")
|
65 |
+
with gr.Row():
|
66 |
+
output_text = gr.Textbox(label="Prediction", value="AI-Generated")
|
67 |
+
|
68 |
+
with gr.Row():
|
69 |
+
gr.HTML("<p><p><p>")
|
70 |
+
with gr.Row():
|
71 |
+
gr.HTML("<p><p><p>")
|
72 |
+
with gr.Row():
|
73 |
+
gr.HTML("<p><p><p>")
|
74 |
+
|
75 |
+
with gr.Accordion("Disclaimer", open=False):
|
76 |
+
gr.Markdown(
|
77 |
+
"""
|
78 |
+
- `Accuracy` :
|
79 |
+
- AI-generated text detectors aim for accuracy, but achieving 100% is challenging.
|
80 |
+
- The provided prediction is for demo purposes only and should not be considered a consumer product.
|
81 |
+
- Users are advised to exercise discretion, and we assume no liability for any use.
|
82 |
+
- `Detection Use Cases` :
|
83 |
+
- In this work, our focus is to achieve an ultra-low false positive rate, crucial for sensitive downstream use case (e.g., avoiding false accusations in academic honesty cases).
|
84 |
+
- We find optimal application in content moderation, for example in detecting AI-generated reviews on platforms like Amazon, Google, Yelp, etc. This represents one of the most compelling and noteworthy use cases for Binoculars.
|
85 |
+
- `Human Supervision Advisory` :
|
86 |
+
- Strongly caution against using Binoculars (or any detector) without human supervision.
|
87 |
+
- `Performance by Language` :
|
88 |
+
- As noted in our paper, Binoculars exhibit superior detection performance in the English language compared to other languages.
|
89 |
+
"""
|
90 |
+
)
|
91 |
+
|
92 |
+
with gr.Accordion("Cite our work", open=False):
|
93 |
+
gr.Markdown(
|
94 |
+
"""
|
95 |
+
```bibtex
|
96 |
+
@misc{hans2024spotting,
|
97 |
+
title={Spotting LLMs With Binoculars: Zero-Shot Detection of Machine-Generated Text},
|
98 |
+
author={Abhimanyu Hans and Avi Schwarzschild and Valeriia Cherepanova and Hamid Kazemi and Aniruddha Saha and Micah Goldblum and Jonas Geiping and Tom Goldstein},
|
99 |
+
year={2024},
|
100 |
+
eprint={2401.12070},
|
101 |
+
archivePrefix={arXiv},
|
102 |
+
primaryClass={cs.CL}
|
103 |
+
}
|
104 |
+
"""
|
105 |
+
)
|
106 |
+
|
107 |
+
# confidence_bar = gr.Label(value={"Confidence": 0})
|
108 |
+
|
109 |
+
# clear_button.click(lambda x: input_box., )
|
110 |
+
submit_button.click(run_detector, inputs=input_box, outputs=output_text)
|
111 |
+
clear_button.click(lambda: ("", ""), outputs=[input_box, output_text])
|
main.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from binoculars import Binoculars
|
2 |
+
|
3 |
+
bino = Binoculars()
|
4 |
+
|
5 |
+
# ChatGPT (GPT-4) output when prompted with “Can you write a few sentences about a capybara that is an astrophysicist?"
|
6 |
+
sample_string = '''Dr. Capy Cosmos, a capybara unlike any other, astounded the scientific community with his
|
7 |
+
groundbreaking research in astrophysics. With his keen sense of observation and unparalleled ability to interpret
|
8 |
+
cosmic data, he uncovered new insights into the mysteries of black holes and the origins of the universe. As he
|
9 |
+
peered through telescopes with his large, round eyes, fellow researchers often remarked that it seemed as if the
|
10 |
+
stars themselves whispered their secrets directly to him. Dr. Cosmos not only became a beacon of inspiration to
|
11 |
+
aspiring scientists but also proved that intellect and innovation can be found in the most unexpected of creatures.'''
|
12 |
+
|
13 |
+
print(bino.compute_score(sample_string)) # 0.75661373
|
14 |
+
print(bino.predict(sample_string)) # 'AI-Generated'
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
sentencepiece
|
2 |
+
transformers[torch] @ https://github.com/huggingface/transformers/archive/refs/tags/v4.31.0.zip
|
3 |
+
numpy
|
4 |
+
gradio
|
5 |
+
gradio_client
|
setup.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from setuptools import setup, find_packages
|
2 |
+
|
3 |
+
setup(
|
4 |
+
name='Binoculars',
|
5 |
+
version='0.0.10',
|
6 |
+
packages=find_packages(),
|
7 |
+
url='https://github.com/ahans30/Binoculars',
|
8 |
+
license=open("LICENSE.md", "r", encoding="utf-8").read(),
|
9 |
+
author='Authors of "Binoculars: Zero-Shot Detection of LLM-Generated Text"',
|
10 |
+
author_email='[email protected]',
|
11 |
+
description='A language model generated text detector.',
|
12 |
+
long_description=open("README.md", "r", encoding="utf-8").read(),
|
13 |
+
long_description_content_type="text/markdown",
|
14 |
+
install_requires=open("requirements.txt", "r", encoding="utf-8").read().splitlines(),
|
15 |
+
)
|