bkowshik commited on
Commit
689b71f
·
1 Parent(s): 4f144a5

Add application file

Browse files
Files changed (1) hide show
  1. app.py +29 -0
app.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+
7
+ from huggingface_hub import hf_hub_download
8
+ from llama_cpp import Llama
9
+
10
+ import gradio as gr
11
+
12
+ hub_model_path = hf_hub_download(
13
+ repo_id='TheBloke/h2ogpt-4096-llama2-13B-GGML',
14
+ filename='h2ogpt-4096-llama2-13b.ggmlv3.q2_K.bin'
15
+ )
16
+ model = Llama(
17
+ model_path=hub_model_path,
18
+ n_ctx=220, # Maximum context size. TODO: Increase this later.
19
+ use_mlock=True, # Force the system to keep the model in RAM.
20
+ seed=77,
21
+ n_batch=64
22
+ )
23
+
24
+ def generate(prompt):
25
+ output = model(prompt, max_tokens=64, stop=['Q:', '\n'], echo=True)
26
+ return json.dumps(output, indent=4)
27
+
28
+ iface = gr.Interface(fn=generate, inputs='text', outputs='text')
29
+ iface.launch()