Fedir Zadniprovskyi commited on
Commit
a5d79bf
·
1 Parent(s): f58fddb

chore: update volume names and mount points

Browse files
Dockerfile CHANGED
@@ -29,7 +29,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
29
  # Creating a directory for the cache to avoid the following error:
30
  # PermissionError: [Errno 13] Permission denied: '/home/ubuntu/.cache/huggingface/hub'
31
  # This error occurs because the volume is mounted as root and the `ubuntu` user doesn't have permission to write to it. Pre-creating the directory solves this issue.
32
- RUN mkdir -p $HOME/.cache/huggingface
33
  ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
34
  ENV UVICORN_HOST=0.0.0.0
35
  ENV UVICORN_PORT=8000
 
29
  # Creating a directory for the cache to avoid the following error:
30
  # PermissionError: [Errno 13] Permission denied: '/home/ubuntu/.cache/huggingface/hub'
31
  # This error occurs because the volume is mounted as root and the `ubuntu` user doesn't have permission to write to it. Pre-creating the directory solves this issue.
32
+ RUN mkdir -p $HOME/.cache/huggingface/hub
33
  ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
34
  ENV UVICORN_HOST=0.0.0.0
35
  ENV UVICORN_PORT=8000
README.md CHANGED
@@ -49,9 +49,9 @@ docker compose --file compose.cpu.yaml up --detach
49
 
50
  ```bash
51
  # for GPU support
52
- docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --detach fedirz/faster-whisper-server:latest-cuda
53
  # for CPU only (use this if you don't have a GPU, as the image is much smaller)
54
- docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=Systran/faster-whisper-small --detach fedirz/faster-whisper-server:latest-cpu
55
  ```
56
 
57
  ### Using Kubernetes
 
49
 
50
  ```bash
51
  # for GPU support
52
+ docker run --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --detach fedirz/faster-whisper-server:latest-cuda
53
  # for CPU only (use this if you don't have a GPU, as the image is much smaller)
54
+ docker run --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=Systran/faster-whisper-small --detach fedirz/faster-whisper-server:latest-cpu
55
  ```
56
 
57
  ### Using Kubernetes
audio.wav CHANGED
Binary files a/audio.wav and b/audio.wav differ
 
compose.cpu.yaml CHANGED
@@ -12,6 +12,6 @@ services:
12
  environment:
13
  - WHISPER__MODEL=Systran/faster-whisper-small
14
  volumes:
15
- - hugging_face_cache:/root/.cache/huggingface
16
  volumes:
17
- hugging_face_cache:
 
12
  environment:
13
  - WHISPER__MODEL=Systran/faster-whisper-small
14
  volumes:
15
+ - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
16
  volumes:
17
+ hf-hub-cache:
compose.cuda-cdi.yaml CHANGED
@@ -9,7 +9,7 @@ services:
9
  file: compose.cuda.yaml
10
  service: faster-whisper-server
11
  volumes:
12
- - hugging_face_cache:/root/.cache/huggingface
13
  deploy:
14
  resources:
15
  reservations:
@@ -21,4 +21,4 @@ services:
21
  device_ids:
22
  - nvidia.com/gpu=all
23
  volumes:
24
- hugging_face_cache:
 
9
  file: compose.cuda.yaml
10
  service: faster-whisper-server
11
  volumes:
12
+ - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
13
  deploy:
14
  resources:
15
  reservations:
 
21
  device_ids:
22
  - nvidia.com/gpu=all
23
  volumes:
24
+ hf-hub-cache:
compose.cuda.yaml CHANGED
@@ -12,11 +12,11 @@ services:
12
  environment:
13
  - WHISPER__MODEL=Systran/faster-whisper-large-v3
14
  volumes:
15
- - hugging_face_cache:/root/.cache/huggingface
16
  deploy:
17
  resources:
18
  reservations:
19
  devices:
20
  - capabilities: ["gpu"]
21
  volumes:
22
- hugging_face_cache:
 
12
  environment:
13
  - WHISPER__MODEL=Systran/faster-whisper-large-v3
14
  volumes:
15
+ - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
16
  deploy:
17
  resources:
18
  reservations:
19
  devices:
20
  - capabilities: ["gpu"]
21
  volumes:
22
+ hf-hub-cache:
docs/installation.md CHANGED
@@ -13,14 +13,14 @@ TODO: just reference the existing compose file in the repo
13
  ports:
14
  - 8000:8000
15
  volumes:
16
- - hugging_face_cache:/root/.cache/huggingface
17
  deploy:
18
  resources:
19
  reservations:
20
  devices:
21
  - capabilities: ["gpu"]
22
  volumes:
23
- hugging_face_cache:
24
  ```
25
 
26
  === "CUDA (with CDI feature enabled)"
@@ -35,7 +35,7 @@ TODO: just reference the existing compose file in the repo
35
  ports:
36
  - 8000:8000
37
  volumes:
38
- - hugging_face_cache:/root/.cache/huggingface
39
  deploy:
40
  resources:
41
  reservations:
@@ -46,7 +46,7 @@ TODO: just reference the existing compose file in the repo
46
  device_ids:
47
  - nvidia.com/gpu=all
48
  volumes:
49
- hugging_face_cache:
50
  ```
51
 
52
  === "CPU"
@@ -60,9 +60,9 @@ TODO: just reference the existing compose file in the repo
60
  ports:
61
  - 8000:8000
62
  volumes:
63
- - hugging_face_cache:/root/.cache/huggingface
64
  volumes:
65
- hugging_face_cache:
66
  ```
67
 
68
  ## Docker
@@ -70,19 +70,19 @@ TODO: just reference the existing compose file in the repo
70
  === "CUDA"
71
 
72
  ```bash
73
- docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hugging_face_cache:/root/.cache/huggingface --gpus=all fedirz/faster-whisper-server:latest-cuda
74
  ```
75
 
76
  === "CUDA (with CDI feature enabled)"
77
 
78
  ```bash
79
- docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hugging_face_cache:/root/.cache/huggingface --device=nvidia.com/gpu=all fedirz/faster-whisper-server:latest-cuda
80
  ```
81
 
82
  === "CPU"
83
 
84
  ```bash
85
- docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hugging_face_cache:/root/.cache/huggingface fedirz/faster-whisper-server:latest-cpu
86
  ```
87
 
88
  ## Kubernetes
 
13
  ports:
14
  - 8000:8000
15
  volumes:
16
+ - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
17
  deploy:
18
  resources:
19
  reservations:
20
  devices:
21
  - capabilities: ["gpu"]
22
  volumes:
23
+ hf-hub-cache:
24
  ```
25
 
26
  === "CUDA (with CDI feature enabled)"
 
35
  ports:
36
  - 8000:8000
37
  volumes:
38
+ - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
39
  deploy:
40
  resources:
41
  reservations:
 
46
  device_ids:
47
  - nvidia.com/gpu=all
48
  volumes:
49
+ hf-hub-cache:
50
  ```
51
 
52
  === "CPU"
 
60
  ports:
61
  - 8000:8000
62
  volumes:
63
+ - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
64
  volumes:
65
+ hf-hub-cache:
66
  ```
67
 
68
  ## Docker
 
70
  === "CUDA"
71
 
72
  ```bash
73
+ docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --gpus=all fedirz/faster-whisper-server:latest-cuda
74
  ```
75
 
76
  === "CUDA (with CDI feature enabled)"
77
 
78
  ```bash
79
+ docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --device=nvidia.com/gpu=all fedirz/faster-whisper-server:latest-cuda
80
  ```
81
 
82
  === "CPU"
83
 
84
  ```bash
85
+ docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub fedirz/faster-whisper-server:latest-cpu
86
  ```
87
 
88
  ## Kubernetes
examples/live-audio/script.sh CHANGED
@@ -10,9 +10,9 @@ set -e
10
  export WHISPER__MODEL=Systran/faster-distil-whisper-large-v3 # or Systran/faster-whisper-tiny.en if you are running on a CPU for a faster inference.
11
 
12
  # Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
13
- docker run --detach --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cuda
14
  # or you can run it on a CPU
15
- # docker run --detach --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cpu
16
 
17
  # `pv` is used to limit the rate at which the audio is streamed to the server. Audio is being streamed at a rate of 32kb/s(16000 sample rate * 16-bit sample / 8 bits per byte = 32000 bytes per second). This emulutes live audio input from a microphone: `ffmpeg -loglevel quiet -f alsa -i default -ac 1 -ar 16000 -f s16le -`
18
  # shellcheck disable=SC2002
 
10
  export WHISPER__MODEL=Systran/faster-distil-whisper-large-v3 # or Systran/faster-whisper-tiny.en if you are running on a CPU for a faster inference.
11
 
12
  # Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
13
+ docker run --detach --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cuda
14
  # or you can run it on a CPU
15
+ # docker run --detach --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cpu
16
 
17
  # `pv` is used to limit the rate at which the audio is streamed to the server. Audio is being streamed at a rate of 32kb/s(16000 sample rate * 16-bit sample / 8 bits per byte = 32000 bytes per second). This emulutes live audio input from a microphone: `ffmpeg -loglevel quiet -f alsa -i default -ac 1 -ar 16000 -f s16le -`
18
  # shellcheck disable=SC2002
examples/youtube/script.sh CHANGED
@@ -6,9 +6,9 @@ set -e
6
  export WHISPER__MODEL=Systran/faster-distil-whisper-large-v3 # or Systran/faster-whisper-tiny.en if you are running on a CPU for a faster inference.
7
 
8
  # Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
9
- docker run --detach --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cuda
10
  # or you can run it on a CPU
11
- # docker run --detach --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cpu
12
 
13
  # Download the audio from a YouTube video. In this example I'm downloading "The Evolution of the Operating System" by Asionometry YouTube channel. I highly checking this channel out, the guy produces very high content. If you don't have `youtube-dl`, you'll have to install it. https://github.com/ytdl-org/youtube-dl
14
  youtube-dl --extract-audio --audio-format mp3 -o the-evolution-of-the-operating-system.mp3 'https://www.youtube.com/watch?v=1lG7lFLXBIs'
 
6
  export WHISPER__MODEL=Systran/faster-distil-whisper-large-v3 # or Systran/faster-whisper-tiny.en if you are running on a CPU for a faster inference.
7
 
8
  # Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
9
+ docker run --detach --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cuda
10
  # or you can run it on a CPU
11
+ # docker run --detach --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cpu
12
 
13
  # Download the audio from a YouTube video. In this example I'm downloading "The Evolution of the Operating System" by Asionometry YouTube channel. I highly checking this channel out, the guy produces very high content. If you don't have `youtube-dl`, you'll have to install it. https://github.com/ytdl-org/youtube-dl
14
  youtube-dl --extract-audio --audio-format mp3 -o the-evolution-of-the-operating-system.mp3 'https://www.youtube.com/watch?v=1lG7lFLXBIs'