Spaces:
Running
on
T4
Running
on
T4
Fedir Zadniprovskyi
commited on
Commit
•
94c7543
1
Parent(s):
9f56267
style: add ruff
Browse files- .pre-commit-config.yaml +16 -13
- flake.nix +1 -0
- pyproject.toml +3 -0
- speaches/audio.py +4 -4
- speaches/core.py +8 -8
- speaches/main.py +13 -4
.pre-commit-config.yaml
CHANGED
@@ -8,18 +8,21 @@ repos:
|
|
8 |
- id: end-of-file-fixer
|
9 |
- id: check-yaml
|
10 |
- id: check-added-large-files
|
11 |
-
|
12 |
-
- repo: https://github.com/pre-commit/mirrors-mypy
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
#
|
18 |
-
#
|
|
|
19 |
# hooks:
|
20 |
-
#
|
21 |
-
#
|
22 |
-
# - repo: https://github.com/
|
23 |
-
# rev:
|
24 |
# hooks:
|
25 |
-
# - id:
|
|
|
|
|
|
8 |
- id: end-of-file-fixer
|
9 |
- id: check-yaml
|
10 |
- id: check-added-large-files
|
11 |
+
# TODO: enable
|
12 |
+
# - repo: https://github.com/pre-commit/mirrors-mypy
|
13 |
+
# rev: v1.10.0
|
14 |
+
# hooks:
|
15 |
+
# - id: mypy
|
16 |
+
# args: [--strict]
|
17 |
+
# TODO: enable
|
18 |
+
# - repo: https://github.com/RobertCraigie/pyright-python
|
19 |
+
# rev: v1.1.363
|
20 |
# hooks:
|
21 |
+
# - id: pyright
|
22 |
+
# Disabled because it doesn't work on NixOS
|
23 |
+
# - repo: https://github.com/astral-sh/ruff-pre-commit
|
24 |
+
# rev: v0.4.4
|
25 |
# hooks:
|
26 |
+
# - id: ruff # linter
|
27 |
+
# args: [--fix]
|
28 |
+
# - id: ruff-format
|
flake.nix
CHANGED
@@ -26,6 +26,7 @@
|
|
26 |
pv
|
27 |
pyright
|
28 |
python311
|
|
|
29 |
websocat
|
30 |
];
|
31 |
shellHook = ''
|
|
|
26 |
pv
|
27 |
pyright
|
28 |
python311
|
29 |
+
ruff
|
30 |
websocat
|
31 |
];
|
32 |
shellHook = ''
|
pyproject.toml
CHANGED
@@ -27,6 +27,9 @@ pytest-xdist = "^3.6.1"
|
|
27 |
httpx = "^0.27.0"
|
28 |
httpx-ws = "^0.6.0"
|
29 |
|
|
|
|
|
|
|
30 |
[build-system]
|
31 |
requires = ["poetry-core"]
|
32 |
build-backend = "poetry.core.masonry.api"
|
|
|
27 |
httpx = "^0.27.0"
|
28 |
httpx-ws = "^0.6.0"
|
29 |
|
30 |
+
[tool.ruff]
|
31 |
+
target-version = "py311"
|
32 |
+
|
33 |
[build-system]
|
34 |
requires = ["poetry-core"]
|
35 |
build-backend = "poetry.core.masonry.api"
|
speaches/audio.py
CHANGED
@@ -12,7 +12,7 @@ from speaches.logger import logger
|
|
12 |
|
13 |
|
14 |
def audio_samples_from_file(file: BinaryIO) -> NDArray[np.float32]:
|
15 |
-
audio_and_sample_rate
|
16 |
file,
|
17 |
format="RAW",
|
18 |
channels=1,
|
@@ -22,7 +22,7 @@ def audio_samples_from_file(file: BinaryIO) -> NDArray[np.float32]:
|
|
22 |
endian="LITTLE",
|
23 |
)
|
24 |
audio = audio_and_sample_rate[0]
|
25 |
-
return audio
|
26 |
|
27 |
|
28 |
class Audio:
|
@@ -68,12 +68,12 @@ class AudioStream(Audio):
|
|
68 |
self.modify_event = asyncio.Event()
|
69 |
|
70 |
def extend(self, data: NDArray[np.float32]) -> None:
|
71 |
-
assert self.closed
|
72 |
super().extend(data)
|
73 |
self.modify_event.set()
|
74 |
|
75 |
def close(self) -> None:
|
76 |
-
assert self.closed
|
77 |
self.closed = True
|
78 |
self.modify_event.set()
|
79 |
logger.info("AudioStream closed")
|
|
|
12 |
|
13 |
|
14 |
def audio_samples_from_file(file: BinaryIO) -> NDArray[np.float32]:
|
15 |
+
audio_and_sample_rate = sf.read(
|
16 |
file,
|
17 |
format="RAW",
|
18 |
channels=1,
|
|
|
22 |
endian="LITTLE",
|
23 |
)
|
24 |
audio = audio_and_sample_rate[0]
|
25 |
+
return audio # type: ignore
|
26 |
|
27 |
|
28 |
class Audio:
|
|
|
68 |
self.modify_event = asyncio.Event()
|
69 |
|
70 |
def extend(self, data: NDArray[np.float32]) -> None:
|
71 |
+
assert not self.closed
|
72 |
super().extend(data)
|
73 |
self.modify_event.set()
|
74 |
|
75 |
def close(self) -> None:
|
76 |
+
assert not self.closed
|
77 |
self.closed = True
|
78 |
self.modify_event.set()
|
79 |
logger.info("AudioStream closed")
|
speaches/core.py
CHANGED
@@ -92,14 +92,14 @@ class Transcription:
|
|
92 |
|
93 |
|
94 |
def test_segment_is_eos():
|
95 |
-
assert Segment("Hello").is_eos
|
96 |
-
assert Segment("Hello...").is_eos
|
97 |
-
assert Segment("Hello.").is_eos
|
98 |
-
assert Segment("Hello!").is_eos
|
99 |
-
assert Segment("Hello?").is_eos
|
100 |
-
assert Segment("Hello. Yo").is_eos
|
101 |
-
assert Segment("Hello. Yo...").is_eos
|
102 |
-
assert Segment("Hello. Yo.").is_eos
|
103 |
|
104 |
|
105 |
def to_full_sentences(words: list[Word]) -> list[Segment]:
|
|
|
92 |
|
93 |
|
94 |
def test_segment_is_eos():
|
95 |
+
assert not Segment("Hello").is_eos
|
96 |
+
assert not Segment("Hello...").is_eos
|
97 |
+
assert Segment("Hello.").is_eos
|
98 |
+
assert Segment("Hello!").is_eos
|
99 |
+
assert Segment("Hello?").is_eos
|
100 |
+
assert not Segment("Hello. Yo").is_eos
|
101 |
+
assert not Segment("Hello. Yo...").is_eos
|
102 |
+
assert Segment("Hello. Yo.").is_eos
|
103 |
|
104 |
|
105 |
def to_full_sentences(words: list[Word]) -> list[Segment]:
|
speaches/main.py
CHANGED
@@ -7,8 +7,14 @@ from contextlib import asynccontextmanager
|
|
7 |
from io import BytesIO
|
8 |
from typing import Annotated
|
9 |
|
10 |
-
from fastapi import (
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
from fastapi.websockets import WebSocketState
|
13 |
from faster_whisper import WhisperModel
|
14 |
from faster_whisper.vad import VadOptions, get_speech_timestamps
|
@@ -18,8 +24,11 @@ from speaches.audio import AudioStream, audio_samples_from_file
|
|
18 |
from speaches.config import SAMPLES_PER_SECOND, Language, config
|
19 |
from speaches.core import Transcription
|
20 |
from speaches.logger import logger
|
21 |
-
from speaches.server_models import (
|
22 |
-
|
|
|
|
|
|
|
23 |
from speaches.transcriber import audio_transcriber
|
24 |
|
25 |
whisper: WhisperModel = None # type: ignore
|
|
|
7 |
from io import BytesIO
|
8 |
from typing import Annotated
|
9 |
|
10 |
+
from fastapi import (
|
11 |
+
Depends,
|
12 |
+
FastAPI,
|
13 |
+
Response,
|
14 |
+
UploadFile,
|
15 |
+
WebSocket,
|
16 |
+
WebSocketDisconnect,
|
17 |
+
)
|
18 |
from fastapi.websockets import WebSocketState
|
19 |
from faster_whisper import WhisperModel
|
20 |
from faster_whisper.vad import VadOptions, get_speech_timestamps
|
|
|
24 |
from speaches.config import SAMPLES_PER_SECOND, Language, config
|
25 |
from speaches.core import Transcription
|
26 |
from speaches.logger import logger
|
27 |
+
from speaches.server_models import (
|
28 |
+
ResponseFormat,
|
29 |
+
TranscriptionResponse,
|
30 |
+
TranscriptionVerboseResponse,
|
31 |
+
)
|
32 |
from speaches.transcriber import audio_transcriber
|
33 |
|
34 |
whisper: WhisperModel = None # type: ignore
|