Fedir Zadniprovskyi commited on
Commit
79f1f8d
1 Parent(s): 83e02a3

fix task enum vals, fix env var parsing, improve gradio, use uv in dockerfile

Browse files
Dockerfile.cpu CHANGED
@@ -1,22 +1,20 @@
1
  FROM ubuntu:22.04
 
2
  # hadolint ignore=DL3008,DL3015,DL4006
3
  RUN apt-get update && \
4
- apt-get install -y curl software-properties-common && \
5
  add-apt-repository ppa:deadsnakes/ppa && \
6
- apt-get update && \
7
  DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3.12 python3.12-distutils && \
8
- curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12 && \
9
  apt-get clean && \
10
  rm -rf /var/lib/apt/lists/*
11
- RUN pip install --no-cache-dir poetry==1.8.3
12
  WORKDIR /root/faster-whisper-server
13
- COPY pyproject.toml poetry.lock ./
14
- RUN poetry install --only main
 
15
  COPY ./faster_whisper_server ./faster_whisper_server
16
- ENTRYPOINT ["poetry", "run"]
17
- CMD ["uvicorn", "faster_whisper_server.main:app"]
18
- ENV WHISPER_MODEL=Systran/faster-whisper-medium.en
19
- ENV WHISPER_INFERENCE_DEVICE=cpu
20
- ENV WHISPER_COMPUTE_TYPE=int8
21
  ENV UVICORN_HOST=0.0.0.0
22
  ENV UVICORN_PORT=8000
 
 
1
  FROM ubuntu:22.04
2
+ # `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
3
  # hadolint ignore=DL3008,DL3015,DL4006
4
  RUN apt-get update && \
5
+ apt-get install -y ffmpeg software-properties-common && \
6
  add-apt-repository ppa:deadsnakes/ppa && \
 
7
  DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3.12 python3.12-distutils && \
 
8
  apt-get clean && \
9
  rm -rf /var/lib/apt/lists/*
 
10
  WORKDIR /root/faster-whisper-server
11
+ COPY requirements.txt .
12
+ RUN --mount=from=ghcr.io/astral-sh/uv:latest,source=/uv,target=/bin/uv \
13
+ uv pip install --system --python 3.12 --no-cache -r requirements.txt
14
  COPY ./faster_whisper_server ./faster_whisper_server
15
+ ENV WHISPER__MODEL=Systran/faster-whisper-medium.en
16
+ ENV WHISPER__INFERENCE_DEVICE=cpu
17
+ ENV WHISPER__COMPUTE_TYPE=int8
 
 
18
  ENV UVICORN_HOST=0.0.0.0
19
  ENV UVICORN_PORT=8000
20
+ CMD ["uvicorn", "faster_whisper_server.main:app"]
Dockerfile.cuda CHANGED
@@ -1,21 +1,19 @@
1
  FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
 
2
  # hadolint ignore=DL3008,DL3015,DL4006
3
  RUN apt-get update && \
4
- apt-get install -y curl software-properties-common && \
5
  add-apt-repository ppa:deadsnakes/ppa && \
6
- apt-get update && \
7
  DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3.12 python3.12-distutils && \
8
- curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12 && \
9
  apt-get clean && \
10
  rm -rf /var/lib/apt/lists/*
11
- RUN pip install --no-cache-dir poetry==1.8.3
12
  WORKDIR /root/faster-whisper-server
13
- COPY pyproject.toml poetry.lock ./
14
- RUN poetry install --only main
 
15
  COPY ./faster_whisper_server ./faster_whisper_server
16
- ENTRYPOINT ["poetry", "run"]
17
- CMD ["uvicorn", "faster_whisper_server.main:app"]
18
- ENV WHISPER_MODEL=Systran/faster-distil-whisper-large-v3
19
- ENV WHISPER_INFERENCE_DEVICE=cuda
20
  ENV UVICORN_HOST=0.0.0.0
21
  ENV UVICORN_PORT=8000
 
 
1
  FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
2
+ # `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
3
  # hadolint ignore=DL3008,DL3015,DL4006
4
  RUN apt-get update && \
5
+ apt-get install -y ffmpeg software-properties-common && \
6
  add-apt-repository ppa:deadsnakes/ppa && \
 
7
  DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3.12 python3.12-distutils && \
 
8
  apt-get clean && \
9
  rm -rf /var/lib/apt/lists/*
 
10
  WORKDIR /root/faster-whisper-server
11
+ COPY requirements.txt .
12
+ RUN --mount=from=ghcr.io/astral-sh/uv:latest,source=/uv,target=/bin/uv \
13
+ uv pip install --system --python 3.12 --no-cache -r requirements.txt
14
  COPY ./faster_whisper_server ./faster_whisper_server
15
+ ENV WHISPER__MODEL=Systran/faster-distil-whisper-large-v3
16
+ ENV WHISPER__INFERENCE_DEVICE=auto
 
 
17
  ENV UVICORN_HOST=0.0.0.0
18
  ENV UVICORN_PORT=8000
19
+ CMD ["uvicorn", "faster_whisper_server.main:app"]
faster_whisper_server/config.py CHANGED
@@ -169,8 +169,8 @@ class Language(enum.StrEnum):
169
 
170
 
171
  class Task(enum.StrEnum):
172
- TRANSCRIPTION = "transcription"
173
- TRANSLATION = "translation"
174
 
175
 
176
  class WhisperConfig(BaseModel):
@@ -192,7 +192,7 @@ class Config(BaseSettings):
192
  the environment variable `LOG_LEVEL` will be mapped to `log_level`, `WHISPER_MODEL` to `whisper.model`, etc.
193
  """
194
 
195
- model_config = SettingsConfigDict(env_nested_delimiter="_")
196
 
197
  log_level: str = "info"
198
  default_language: Language | None = None
 
169
 
170
 
171
  class Task(enum.StrEnum):
172
+ TRANSCRIBE = "transcribe"
173
+ TRANSLATE = "translate"
174
 
175
 
176
  class WhisperConfig(BaseModel):
 
192
  the environment variable `LOG_LEVEL` will be mapped to `log_level`, `WHISPER_MODEL` to `whisper.model`, etc.
193
  """
194
 
195
+ model_config = SettingsConfigDict(env_nested_delimiter="__")
196
 
197
  log_level: str = "info"
198
  default_language: Language | None = None
faster_whisper_server/gradio_app.py CHANGED
@@ -18,21 +18,24 @@ def create_gradio_demo(config: Config) -> gr.Blocks:
18
  http_client = httpx.Client(base_url=f"http://{host}:{port}", timeout=None)
19
 
20
  def handler(
21
- file_path: str | None, model: str, task: Task, temperature: float, stream: bool
22
  ) -> Generator[str, None, None]:
23
- if file_path is None:
24
- yield ""
25
- return
26
  if stream:
27
- yield from transcribe_audio_streaming(file_path, task, temperature, model)
28
- yield transcribe_audio(file_path, task, temperature, model)
 
 
 
 
 
 
29
 
30
  def transcribe_audio(
31
  file_path: str, task: Task, temperature: float, model: str
32
  ) -> str:
33
- if task == Task.TRANSCRIPTION:
34
  endpoint = TRANSCRIPTION_ENDPOINT
35
- elif task == Task.TRANSLATION:
36
  endpoint = TRANSLATION_ENDPOINT
37
 
38
  with open(file_path, "rb") as file:
@@ -64,15 +67,32 @@ def create_gradio_demo(config: Config) -> gr.Blocks:
64
  }
65
  endpoint = (
66
  TRANSCRIPTION_ENDPOINT
67
- if task == Task.TRANSCRIPTION
68
  else TRANSLATION_ENDPOINT
69
  )
70
  with connect_sse(http_client, "POST", endpoint, **kwargs) as event_source:
71
  for event in event_source.iter_sse():
72
  yield event.data
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  model_dropdown = gr.Dropdown(
75
- # TODO: use output from /v1/models
76
  choices=[config.whisper.model],
77
  label="Model",
78
  value=config.whisper.model,
@@ -80,13 +100,13 @@ def create_gradio_demo(config: Config) -> gr.Blocks:
80
  task_dropdown = gr.Dropdown(
81
  choices=[task.value for task in Task],
82
  label="Task",
83
- value=Task.TRANSCRIPTION,
84
  )
85
  temperature_slider = gr.Slider(
86
  minimum=0.0, maximum=1.0, step=0.1, label="Temperature", value=0.0
87
  )
88
  stream_checkbox = gr.Checkbox(label="Stream", value=True)
89
- demo = gr.Interface(
90
  title="Whisper Playground",
91
  description="""Consider supporting the project by starring the <a href="https://github.com/fedirz/faster-whisper-server">repository on GitHub</a>.""",
92
  inputs=[
@@ -98,5 +118,6 @@ def create_gradio_demo(config: Config) -> gr.Blocks:
98
  ],
99
  fn=handler,
100
  outputs="text",
101
- )
 
102
  return demo
 
18
  http_client = httpx.Client(base_url=f"http://{host}:{port}", timeout=None)
19
 
20
  def handler(
21
+ file_path: str, model: str, task: Task, temperature: float, stream: bool
22
  ) -> Generator[str, None, None]:
 
 
 
23
  if stream:
24
+ previous_transcription = ""
25
+ for transcription in transcribe_audio_streaming(
26
+ file_path, task, temperature, model
27
+ ):
28
+ previous_transcription += transcription
29
+ yield previous_transcription
30
+ else:
31
+ yield transcribe_audio(file_path, task, temperature, model)
32
 
33
  def transcribe_audio(
34
  file_path: str, task: Task, temperature: float, model: str
35
  ) -> str:
36
+ if task == Task.TRANSCRIBE:
37
  endpoint = TRANSCRIPTION_ENDPOINT
38
+ elif task == Task.TRANSLATE:
39
  endpoint = TRANSLATION_ENDPOINT
40
 
41
  with open(file_path, "rb") as file:
 
67
  }
68
  endpoint = (
69
  TRANSCRIPTION_ENDPOINT
70
+ if task == Task.TRANSCRIBE
71
  else TRANSLATION_ENDPOINT
72
  )
73
  with connect_sse(http_client, "POST", endpoint, **kwargs) as event_source:
74
  for event in event_source.iter_sse():
75
  yield event.data
76
 
77
+ def update_model_dropdown() -> gr.Dropdown:
78
+ res = http_client.get("/v1/models")
79
+ res_data = res.json()
80
+ models: list[str] = [model["id"] for model in res_data]
81
+ assert config.whisper.model in models
82
+ recommended_models = set(
83
+ model for model in models if model.startswith("Systran")
84
+ )
85
+ other_models = [model for model in models if model not in recommended_models]
86
+ models = list(recommended_models) + other_models
87
+ model_dropdown = gr.Dropdown(
88
+ # no idea why it's complaining
89
+ choices=models, # type: ignore
90
+ label="Model",
91
+ value=config.whisper.model,
92
+ )
93
+ return model_dropdown
94
+
95
  model_dropdown = gr.Dropdown(
 
96
  choices=[config.whisper.model],
97
  label="Model",
98
  value=config.whisper.model,
 
100
  task_dropdown = gr.Dropdown(
101
  choices=[task.value for task in Task],
102
  label="Task",
103
+ value=Task.TRANSCRIBE,
104
  )
105
  temperature_slider = gr.Slider(
106
  minimum=0.0, maximum=1.0, step=0.1, label="Temperature", value=0.0
107
  )
108
  stream_checkbox = gr.Checkbox(label="Stream", value=True)
109
+ with gr.Interface(
110
  title="Whisper Playground",
111
  description="""Consider supporting the project by starring the <a href="https://github.com/fedirz/faster-whisper-server">repository on GitHub</a>.""",
112
  inputs=[
 
118
  ],
119
  fn=handler,
120
  outputs="text",
121
+ ) as demo:
122
+ demo.load(update_model_dropdown, inputs=None, outputs=model_dropdown)
123
  return demo
faster_whisper_server/main.py CHANGED
@@ -203,7 +203,7 @@ def translate_file(
203
  whisper = load_model(model)
204
  segments, transcription_info = whisper.transcribe(
205
  file.file,
206
- task=Task.TRANSLATION,
207
  initial_prompt=prompt,
208
  temperature=temperature,
209
  vad_filter=True,
@@ -244,7 +244,7 @@ def transcribe_file(
244
  whisper = load_model(model)
245
  segments, transcription_info = whisper.transcribe(
246
  file.file,
247
- task=Task.TRANSCRIPTION,
248
  language=language,
249
  initial_prompt=prompt,
250
  word_timestamps="word" in timestamp_granularities,
 
203
  whisper = load_model(model)
204
  segments, transcription_info = whisper.transcribe(
205
  file.file,
206
+ task=Task.TRANSLATE,
207
  initial_prompt=prompt,
208
  temperature=temperature,
209
  vad_filter=True,
 
244
  whisper = load_model(model)
245
  segments, transcription_info = whisper.transcribe(
246
  file.file,
247
+ task=Task.TRANSCRIBE,
248
  language=language,
249
  initial_prompt=prompt,
250
  word_timestamps="word" in timestamp_granularities,
flake.nix CHANGED
@@ -30,6 +30,7 @@
30
  rsync
31
  ruff
32
  websocat
 
33
  ];
34
  shellHook = ''
35
  poetry env use python3.12
 
30
  rsync
31
  ruff
32
  websocat
33
+ uv
34
  ];
35
  shellHook = ''
36
  poetry env use python3.12
requirements.txt ADDED
The diff for this file is too large to render. See raw diff