Spaces:

MegaTronX
/

ZeroGPUTestLexiUncensored

Runtime error

App Files Files Community

MegaTronX commited on Nov 8, 2024

Commit

05ba171

verified ·

1 Parent(s): ebc169c

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -28

app.py CHANGED Viewed

@@ -11,32 +11,6 @@ MAX_MAX_NEW_TOKENS = 8096
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
-DESCRIPTION = """\
-# Uncensored Llama-3.2-3B-Instruct Chat
-This is an uncensored version of the original [Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct), created using [mlabonne](https://huggingface.co/mlabonne)'s [script](https://colab.research.google.com/drive/1VYm3hOcvCpbGiqKZb141gJwjdmmCcVpR?usp=sharing), which builds on [FailSpy's notebook](https://huggingface.co/failspy/llama-3-70B-Instruct-abliterated/blob/main/ortho_cookbook.ipynb) and the original work from [Andy Arditi et al.](https://colab.research.google.com/drive/1a-aQvKC9avdZpdyBn4jgRQFObTPy1JZw?usp=sharing). The method is discussed in details in this [blog](https://huggingface.co/blog/mlabonne/abliteration) and this [paper](https://arxiv.org/abs/2406.11717).
-You can found the uncensored model [here](https://huggingface.co/chuanli11/Llama-3.2-3B-Instruct-uncensored).
-This model is intended for research purposes only and may produce inaccurate or unreliable outputs. Use it cautiously and at your own risk.
-🦄 Other exciting ML projects at Lambda: [ML Times](https://news.lambdalabs.com/news/today), [Distributed Training Guide](https://github.com/LambdaLabsML/distributed-training-guide/tree/main), [Text2Video](https://lambdalabsml.github.io/Open-Sora/introduction/), [GPU Benchmark](https://lambdalabs.com/gpu-benchmarks).
-"""
-LICENSE = """
-<p/>
----
-As a derivate work of [Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) by Meta,
-this demo is governed by the original [license](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE).
-"""
-# if not torch.cuda.is_available():
-#     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
 if torch.cuda.is_available() or os.getenv("ZERO_GPU_SUPPORT", False):
     model_id = "chuanli11/Llama-3.2-3B-Instruct-uncensored"
     model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
@@ -132,10 +106,10 @@ chat_interface = gr.ChatInterface(
 )
 with gr.Blocks(css="style.css", fill_height=True) as demo:
-    gr.Markdown(DESCRIPTION)
     # gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
     chat_interface.render()
-    gr.Markdown(LICENSE)
 if __name__ == "__main__":
     demo.queue(max_size=20).launch()

 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 if torch.cuda.is_available() or os.getenv("ZERO_GPU_SUPPORT", False):
     model_id = "chuanli11/Llama-3.2-3B-Instruct-uncensored"
     model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
 )
 with gr.Blocks(css="style.css", fill_height=True) as demo:
+    #gr.Markdown(DESCRIPTION)
     # gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
     chat_interface.render()
+    #gr.Markdown(LICENSE)
 if __name__ == "__main__":
     demo.queue(max_size=20).launch()