Spaces:
Runtime error
Runtime error
| """Simple Reader that reads transcript of youtube video.""" | |
| from typing import Any, List | |
| from gpt_index.readers.base import BaseReader | |
| from gpt_index.readers.schema.base import Document | |
| class YoutubeTranscriptReader(BaseReader): | |
| """Youtube Transcript reader.""" | |
| def __init__(self) -> None: | |
| """Initialize with parameters.""" | |
| def load_data(self, ytlinks: List[str], **load_kwargs: Any) -> List[Document]: | |
| """Load data from the input directory. | |
| Args: | |
| pages (List[str]): List of youtube links \ | |
| for which transcripts are to be read. | |
| """ | |
| try: | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| except ImportError: | |
| raise ImportError( | |
| "`youtube_transcript_api` package not found, \ | |
| please run `pip install youtube-transcript-api`" | |
| ) | |
| results = [] | |
| for link in ytlinks: | |
| video_id = link.split("?v=")[-1] | |
| srt = YouTubeTranscriptApi.get_transcript(video_id) | |
| transcript = "" | |
| for chunk in srt: | |
| transcript = transcript + chunk["text"] + "\n" | |
| results.append(Document(transcript)) | |
| return results | |