papayaga commited on
Commit
ec2a0f3
·
0 Parent(s):

init and basic architecture and logic in the Readme

Browse files
Files changed (9) hide show
  1. .gitignore +166 -0
  2. Dockerfile +52 -0
  3. README.md +81 -0
  4. adaptors/__init__.py +2 -0
  5. adaptors/llm.py +28 -0
  6. adaptors/voice.py +45 -0
  7. homeros.py +3 -0
  8. main.py +86 -0
  9. requirements.txt +141 -0
.gitignore ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app specific
2
+ outputs/*
3
+
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Mac stuff
13
+ .DS_Store
14
+
15
+ # Distribution / packaging
16
+ .Python
17
+ build/
18
+ develop-eggs/
19
+ dist/
20
+ downloads/
21
+ eggs/
22
+ .eggs/
23
+ lib/
24
+ lib64/
25
+ parts/
26
+ sdist/
27
+ var/
28
+ wheels/
29
+ share/python-wheels/
30
+ *.egg-info/
31
+ .installed.cfg
32
+ *.egg
33
+ MANIFEST
34
+
35
+ # PyInstaller
36
+ # Usually these files are written by a python script from a template
37
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
38
+ *.manifest
39
+ *.spec
40
+
41
+ # Installer logs
42
+ pip-log.txt
43
+ pip-delete-this-directory.txt
44
+
45
+ # Unit test / coverage reports
46
+ htmlcov/
47
+ .tox/
48
+ .nox/
49
+ .coverage
50
+ .coverage.*
51
+ .cache
52
+ nosetests.xml
53
+ coverage.xml
54
+ *.cover
55
+ *.py,cover
56
+ .hypothesis/
57
+ .pytest_cache/
58
+ cover/
59
+
60
+ # Translations
61
+ *.mo
62
+ *.pot
63
+
64
+ # Django stuff:
65
+ *.log
66
+ local_settings.py
67
+ db.sqlite3
68
+ db.sqlite3-journal
69
+
70
+ # Flask stuff:
71
+ instance/
72
+ .webassets-cache
73
+
74
+ # Scrapy stuff:
75
+ .scrapy
76
+
77
+ # Sphinx documentation
78
+ docs/_build/
79
+
80
+ # PyBuilder
81
+ .pybuilder/
82
+ target/
83
+
84
+ # Jupyter Notebook
85
+ .ipynb_checkpoints
86
+
87
+ # IPython
88
+ profile_default/
89
+ ipython_config.py
90
+
91
+ # pyenv
92
+ # For a library or package, you might want to ignore these files since the code is
93
+ # intended to run in multiple environments; otherwise, check them in:
94
+ # .python-version
95
+
96
+ # pipenv
97
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
98
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
99
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
100
+ # install all needed dependencies.
101
+ #Pipfile.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/#use-with-ide
116
+ .pdm.toml
117
+
118
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
119
+ __pypackages__/
120
+
121
+ # Celery stuff
122
+ celerybeat-schedule
123
+ celerybeat.pid
124
+
125
+ # SageMath parsed files
126
+ *.sage.py
127
+
128
+ # Environments
129
+ .env
130
+ .venv
131
+ env/
132
+ venv/
133
+ ENV/
134
+ env.bak/
135
+ venv.bak/
136
+
137
+ # Spyder project settings
138
+ .spyderproject
139
+ .spyproject
140
+
141
+ # Rope project settings
142
+ .ropeproject
143
+
144
+ # mkdocs documentation
145
+ /site
146
+
147
+ # mypy
148
+ .mypy_cache/
149
+ .dmypy.json
150
+ dmypy.json
151
+
152
+ # Pyre type checker
153
+ .pyre/
154
+
155
+ # pytype static type analyzer
156
+ .pytype/
157
+
158
+ # Cython debug symbols
159
+ cython_debug/
160
+
161
+ # PyCharm
162
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
163
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
164
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
165
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
166
+ #.idea/
Dockerfile ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Start from Ubuntu base image
2
+ FROM ubuntu:latest
3
+
4
+ # Set environment variables
5
+ ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
6
+
7
+ # Install essentials
8
+ RUN apt-get update && apt-get install -y \
9
+ software-properties-common \
10
+ build-essential \
11
+ curl \
12
+ git \
13
+ vim
14
+
15
+ # Install ffmpeg
16
+ RUN apt-get update && apt-get install -y \
17
+ ffmpeg
18
+
19
+ # Install Python3 and pip
20
+ RUN apt-get update && apt-get install -y \
21
+ python3-pip \
22
+ python3-dev \
23
+ python3-setuptools \
24
+ && pip3 install --upgrade pip setuptools wheel
25
+
26
+ # Clean up APT when done
27
+ RUN apt-get clean && rm -rf /var/lib/apt/lists/*
28
+
29
+ # Set up a new user named "user" with user ID 1000
30
+ RUN useradd -m -u 1000 user
31
+
32
+ # Switch to the "user" user
33
+ USER user
34
+
35
+ # Set home to the user's home directory
36
+ ENV HOME=/home/user \
37
+ PATH=/home/user/.local/bin:$PATH
38
+
39
+ # Set up the working directory
40
+ WORKDIR $HOME/app
41
+
42
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
43
+ COPY --chown=user . $HOME/app
44
+
45
+ # Try and run pip command after setting the user with `USER user` to avoid permission issues with Python
46
+ RUN pip install --no-cache-dir --upgrade pip
47
+
48
+ # Copy requirements.txt and install requirements
49
+ RUN pip3 install -r requirements.txt
50
+
51
+ # Command to run on container start
52
+ CMD ["gradio", "main.py"]
README.md ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: HOMER.OS
3
+ emoji: 🐳
4
+ colorFrom: purple
5
+ colorTo: black
6
+ sdk: docker
7
+ app_port: 7860
8
+ ---
9
+
10
+ # HOMER.OS
11
+
12
+ ### the operating system for the next age of co-creative storytelling
13
+
14
+ This demo is exploring the future of interactive storytelling.
15
+ It puts the user in charge of a how the story is going to develop.
16
+
17
+ ## Here is how it works:
18
+
19
+ 1. The user interacts with the system by means of audio messages
20
+ 2. The experience starts with the user inputting the details of the hero, the style and the story they'd like to play.
21
+ 3. The system creates the beginning of the story and reads it outloud to the user. The system the asks what the hero should do next.
22
+ 4. The user answers via voice message
23
+ 5. The system takes the user input into account, generates the next chunk of the story and reads it out to the user
24
+ 6. The loop continues until after X messages the system decides to end the story (to prevent from exceeding GPT context window for now)
25
+
26
+ ## Tech. Stack for the demo:
27
+
28
+ - GPT-4 for story generation
29
+ - Whisper for speech to text
30
+ - Play.ht for voice generation
31
+ - Gradio for interface
32
+ - Gradio Spaces for deployment
33
+
34
+ ## Story schema
35
+
36
+ - STRING `uuid` = uuid of this story
37
+ - STRING `status` = 'not_started' / 'ongoing' / 'finished'
38
+ - TEXT `world` = text description of the world
39
+ - TEXT `hero` = text description of the hero of the story
40
+ - TEXT `plot` = high level description of the plot. without chapters or anything like that. we can use this to later break down into chapters and get smarter about story ark management with a second LLM
41
+ - STRING `ending` = text string representing what kind of ending we want e.g. happy or tragic
42
+ - STRING `style` = text description of the style of story-telling
43
+ - STRING `voice` = id of the voice we are using for sounding the story
44
+ - TEXT(JSON) `chunks` = JSON array of story-chunks. each chunk has {"text", "audio_url"}
45
+ - TEXT(JSON) `messages` = JSON array of messages in the openAI compatible format {role=system/user/assistant content=message}
46
+ - STRING `full_story_audio_url` = url of the full rendered audio story (story chunks audio combined)
47
+ - TEXT `full_story_text` = full story text
48
+
49
+ ## Flow
50
+
51
+ 1. Welcome the user
52
+ 2. Ask for the magic word
53
+ 3. Check the magic word - if not apologize and tell them how to get it
54
+ 4. Once we have the magic word - generate uuid and kickstart story configuration:
55
+ - say "Let me now ask you a few questions about the story you'd like to hear..."
56
+ - ask the user about the world their story should happen in
57
+ - ask the user about the hero and save it
58
+ - ask the user about the plot and save it
59
+ - ask the user if they want the story to end in a happy way or in a sad way (free user input) and save it
60
+ - ask the user about the style and save it
61
+ 5. Say "Our story is all set! Let it begin."
62
+ 6. Tell the first paragraph / part and then ask at the end "What do you think should the hero do next?"
63
+ 7. Process user input, generate the next chunk and repeat
64
+ 8. If number of chunks (or total tokens in the story) is approaching the limit - end the story by passing a constructed user message that references the type of ending
65
+ 9. Thank the user and say goodby
66
+ 10. If the user records more messages - say a fixed message that this story has ended but the user wants another one, they can come again.
67
+
68
+ ## Basic ToDo
69
+
70
+ - Gradio input/outpus/state setup (with text only)
71
+ - Story object setup, schema, logic
72
+ - GPT-4 story generation in a gradio interface
73
+ - Dockerfile and deploy (including magic word for access control)
74
+ - Interchange text input for whisper
75
+ - Inerchange text output for play.ht voice generation
76
+
77
+ ## Enhancements
78
+
79
+ - Add SQlite DB and save stories
80
+ - Add option to download the full story as one .mp3
81
+ - Add meta-moderator role to manage story state and scenarios better
adaptors/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from . import llm
2
+ from . import voice
adaptors/llm.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ an abstraction over GPT-4 for easy substitution later if needed
3
+ '''
4
+
5
+ import openai
6
+ import os
7
+
8
+ openai.api_key = os.getenv('OPENAI_API_KEY')
9
+
10
+ MODEL = 'gpt-4'
11
+ #MODEL = 'gpt-3.5-turbo'
12
+
13
+ def answer(system_message, user_and_assistant_messages):
14
+ messages = [{
15
+ "role":"system",
16
+ "content": system_prompt
17
+ }]
18
+
19
+ messages.extend(user_and_assistant_messages)
20
+
21
+ chat_completion = openai.ChatCompletion.create(
22
+ model=MODEL,
23
+ messages=messages
24
+ )
25
+
26
+ output = chat_completion.choices[0].message.content
27
+ return output
28
+
adaptors/voice.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ generates voice using play.ht api
3
+ returns a url of the generated mp3
4
+ '''
5
+ import requests
6
+ import sseclient
7
+ from loguru import logger
8
+ import os
9
+ from pprint import pprint
10
+ import json
11
+
12
+ url = "https://play.ht/api/v2/tts"
13
+ user_id = os.environ['PLAYHT_USERID']
14
+ api_key = os.environ['PLAYHT_SECRETKEY']
15
+
16
+ headers = {
17
+ "accept": "text/event-stream",
18
+ "content-type": "application/json",
19
+ "AUTHORIZATION": f"Bearer {api_key}",
20
+ "X-USER-ID": user_id
21
+ }
22
+
23
+ def say_it(text, voice):
24
+ payload = {
25
+ "quality": "medium",
26
+ "output_format": "mp3",
27
+ "speed": 1,
28
+ "sample_rate": 24000,
29
+ "text": text,
30
+ "voice": voice
31
+ }
32
+
33
+ response = requests.post(url, stream=True, headers=headers, json=payload)
34
+
35
+ stream_url = response.headers['content-location']
36
+ logger.debug(f"stream_url = {stream_url}")
37
+
38
+ resp = requests.get(stream_url, stream=True, headers=headers)
39
+
40
+ client = sseclient.SSEClient(resp)
41
+ for event in client.events():
42
+ if event.data:
43
+ e = json.loads(event.data)
44
+ if e["stage"] == "complete":
45
+ return(e["url"])
homeros.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+
2
+ def start_story(world, hero, plot, style):
3
+ return "ohoho"
main.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pprint import pprint
3
+ import uuid
4
+ import json
5
+ from loguru import logger
6
+ from dotenv import load_dotenv
7
+ load_dotenv()
8
+
9
+ from homeros import start_story, continue_story
10
+
11
+ def gen_unique_id():
12
+ return str(uuid.uuid4())
13
+
14
+ def do_homeros(user_input, story):
15
+
16
+ pprint(story)
17
+
18
+ if story["status"] == "not_started":
19
+ story["uuid"] = gen_unique_id()
20
+
21
+ return json.dumps(story["messages"]), story
22
+
23
+ demo = gr.Blocks()
24
+
25
+ with demo:
26
+
27
+ story = gr.State(value = {
28
+ "uuid" : "",
29
+ "status" : "not_started",
30
+ "world": "",
31
+ "hero": "",
32
+ "plot": "",
33
+ "ending": "",
34
+ "style": "",
35
+ "voice": "dylan",
36
+ "chunks" : [],
37
+ "messages": [],
38
+ "full_story_audio_ur": "",
39
+ "full_story_text": ""
40
+ })
41
+
42
+ pprint(story.value)
43
+
44
+ with gr.Row():
45
+ gr.Markdown('''
46
+ # HOMEROS
47
+
48
+ This demo is exploring the future of interactive storytelling.
49
+ It puts the user in charge and makes blurs the boundary between the reader and the author.
50
+
51
+ Hit "Tell me!" to get started.
52
+
53
+ When Homeros asks you something - hit record, answer with your voice and then hit "Tell me!" again.
54
+
55
+ ''')
56
+
57
+ with gr.Row():
58
+
59
+ text_input = gr.Textbox()
60
+
61
+
62
+ with gr.Row():
63
+
64
+ go_btn = gr.Button(
65
+ "Tell me!",
66
+ )
67
+
68
+
69
+ with gr.Row():
70
+ story_chunk = gr.Textbox()
71
+
72
+ go_btn.click(
73
+ do_homeros,
74
+ inputs=[text_input, story],
75
+ outputs=[story_chunk, story]
76
+ )
77
+
78
+ demo.queue(
79
+ concurrency_count=5
80
+ )
81
+
82
+ demo.launch(
83
+ server_name="0.0.0.0",
84
+ ssl_verify=False,
85
+ show_api=False
86
+ )
requirements.txt ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.1.0
2
+ aiohttp==3.8.5
3
+ aiosignal==1.3.1
4
+ altair==5.0.1
5
+ annotated-types==0.5.0
6
+ anyio==3.7.1
7
+ async-timeout==4.0.2
8
+ attrs==23.1.0
9
+ certifi==2023.5.7
10
+ charset-normalizer==3.2.0
11
+ click==8.1.6
12
+ contourpy==1.1.0
13
+ cycler==0.11.0
14
+ fastapi==0.100.0
15
+ ffmpy==0.3.1
16
+ filelock==3.12.2
17
+ fonttools==4.41.0
18
+ frozenlist==1.4.0
19
+ fsspec==2023.6.0
20
+ gradio==3.37.0
21
+ gradio_client==0.2.10
22
+ h11==0.14.0
23
+ httpcore==0.17.3
24
+ httpx==0.24.1
25
+ huggingface-hub==0.16.4
26
+ idna==3.4
27
+ Jinja2==3.1.2
28
+ jsonschema==4.18.4
29
+ jsonschema-specifications==2023.7.1
30
+ kiwisolver==1.4.4
31
+ linkify-it-py==2.0.2
32
+ loguru==0.7.0
33
+ markdown-it-py==2.2.0
34
+ MarkupSafe==2.1.3
35
+ matplotlib==3.7.2
36
+ mdit-py-plugins==0.3.3
37
+ mdurl==0.1.2
38
+ multidict==6.0.4
39
+ numpy==1.25.1
40
+ openai==0.27.8
41
+ orjson==3.9.2
42
+ packaging==23.1
43
+ pandas==2.0.3
44
+ Pillow==10.0.0
45
+ pydantic==2.0.3
46
+ pydantic_core==2.3.0
47
+ pydub==0.25.1
48
+ pyparsing==3.0.9
49
+ python-dateutil==2.8.2
50
+ python-dotenv==1.0.0
51
+ python-multipart==0.0.6
52
+ pytz==2023.3
53
+ PyYAML==6.0.1
54
+ referencing==0.30.0
55
+ requests==2.31.0
56
+ rpds-py==0.9.2
57
+ semantic-version==2.10.0
58
+ six==1.16.0
59
+ sniffio==1.3.0
60
+ sseclient==0.0.27
61
+ starlette==0.27.0
62
+ toolz==0.12.0
63
+ tqdm==4.65.0
64
+ typing_extensions==4.7.1
65
+ tzdata==2023.3
66
+ uc-micro-py==1.0.2
67
+ urllib3==2.0.4
68
+ uvicorn==0.23.1
69
+ websockets==11.0.3
70
+ yarl==1.9.2
71
+ aiofiles==23.1.0
72
+ aiohttp==3.8.5
73
+ aiosignal==1.3.1
74
+ altair==5.0.1
75
+ annotated-types==0.5.0
76
+ anyio==3.7.1
77
+ async-timeout==4.0.2
78
+ attrs==23.1.0
79
+ certifi==2023.5.7
80
+ charset-normalizer==3.2.0
81
+ click==8.1.6
82
+ contourpy==1.1.0
83
+ cycler==0.11.0
84
+ fastapi==0.100.0
85
+ ffmpy==0.3.1
86
+ filelock==3.12.2
87
+ fonttools==4.41.0
88
+ frozenlist==1.4.0
89
+ fsspec==2023.6.0
90
+ gradio==3.37.0
91
+ gradio_client==0.2.10
92
+ h11==0.14.0
93
+ httpcore==0.17.3
94
+ httpx==0.24.1
95
+ huggingface-hub==0.16.4
96
+ idna==3.4
97
+ Jinja2==3.1.2
98
+ jsonschema==4.18.4
99
+ jsonschema-specifications==2023.7.1
100
+ kiwisolver==1.4.4
101
+ linkify-it-py==2.0.2
102
+ loguru==0.7.0
103
+ markdown-it-py==2.2.0
104
+ MarkupSafe==2.1.3
105
+ matplotlib==3.7.2
106
+ mdit-py-plugins==0.3.3
107
+ mdurl==0.1.2
108
+ multidict==6.0.4
109
+ numpy==1.25.1
110
+ openai==0.27.8
111
+ orjson==3.9.2
112
+ packaging==23.1
113
+ pandas==2.0.3
114
+ Pillow==10.0.0
115
+ pydantic==2.0.3
116
+ pydantic_core==2.3.0
117
+ pydub==0.25.1
118
+ pyparsing==3.0.9
119
+ python-dateutil==2.8.2
120
+ python-dotenv==1.0.0
121
+ python-multipart==0.0.6
122
+ pytz==2023.3
123
+ PyYAML==6.0.1
124
+ referencing==0.30.0
125
+ requests==2.31.0
126
+ rpds-py==0.9.2
127
+ semantic-version==2.10.0
128
+ six==1.16.0
129
+ sniffio==1.3.0
130
+ sseclient==0.0.27
131
+ starlette==0.27.0
132
+ toolz==0.12.0
133
+ tqdm==4.65.0
134
+ typing_extensions==4.7.1
135
+ tzdata==2023.3
136
+ uc-micro-py==1.0.2
137
+ urllib3==2.0.4
138
+ uuid==1.30
139
+ uvicorn==0.23.1
140
+ websockets==11.0.3
141
+ yarl==1.9.2