diff --git a/meow/lib/python3.13/site-packages/__pycache__/typing_extensions.cpython-313.pyc b/meow/lib/python3.13/site-packages/__pycache__/typing_extensions.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..23d59c8c321a7ec358050e7e1891420c21a093e8 Binary files /dev/null and b/meow/lib/python3.13/site-packages/__pycache__/typing_extensions.cpython-313.pyc differ diff --git a/meow/lib/python3.13/site-packages/charset_normalizer/__init__.py b/meow/lib/python3.13/site-packages/charset_normalizer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0d3a37990145e94ad85406166dbaf52f4c311e5e --- /dev/null +++ b/meow/lib/python3.13/site-packages/charset_normalizer/__init__.py @@ -0,0 +1,48 @@ +""" +Charset-Normalizer +~~~~~~~~~~~~~~ +The Real First Universal Charset Detector. +A library that helps you read text from an unknown charset encoding. +Motivated by chardet, This package is trying to resolve the issue by taking a new approach. +All IANA character set names for which the Python core library provides codecs are supported. + +Basic usage: + >>> from charset_normalizer import from_bytes + >>> results = from_bytes('Bсеки човек има право на образование. Oбразованието!'.encode('utf_8')) + >>> best_guess = results.best() + >>> str(best_guess) + 'Bсеки човек има право на образование. Oбразованието!' + +Others methods and usages are available - see the full documentation +at . +:copyright: (c) 2021 by Ahmed TAHRI +:license: MIT, see LICENSE for more details. +""" + +from __future__ import annotations + +import logging + +from .api import from_bytes, from_fp, from_path, is_binary +from .legacy import detect +from .models import CharsetMatch, CharsetMatches +from .utils import set_logging_handler +from .version import VERSION, __version__ + +__all__ = ( + "from_fp", + "from_path", + "from_bytes", + "is_binary", + "detect", + "CharsetMatch", + "CharsetMatches", + "__version__", + "VERSION", + "set_logging_handler", +) + +# Attach a NullHandler to the top level logger by default +# https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library + +logging.getLogger("charset_normalizer").addHandler(logging.NullHandler()) diff --git a/meow/lib/python3.13/site-packages/charset_normalizer/api.py b/meow/lib/python3.13/site-packages/charset_normalizer/api.py new file mode 100644 index 0000000000000000000000000000000000000000..2c8c0618cc5ad2e92380edb194a5d9b8b2d977c2 --- /dev/null +++ b/meow/lib/python3.13/site-packages/charset_normalizer/api.py @@ -0,0 +1,668 @@ +from __future__ import annotations + +import logging +from os import PathLike +from typing import BinaryIO + +from .cd import ( + coherence_ratio, + encoding_languages, + mb_encoding_languages, + merge_coherence_ratios, +) +from .constant import IANA_SUPPORTED, TOO_BIG_SEQUENCE, TOO_SMALL_SEQUENCE, TRACE +from .md import mess_ratio +from .models import CharsetMatch, CharsetMatches +from .utils import ( + any_specified_encoding, + cut_sequence_chunks, + iana_name, + identify_sig_or_bom, + is_cp_similar, + is_multi_byte_encoding, + should_strip_sig_or_bom, +) + +logger = logging.getLogger("charset_normalizer") +explain_handler = logging.StreamHandler() +explain_handler.setFormatter( + logging.Formatter("%(asctime)s | %(levelname)s | %(message)s") +) + + +def from_bytes( + sequences: bytes | bytearray, + steps: int = 5, + chunk_size: int = 512, + threshold: float = 0.2, + cp_isolation: list[str] | None = None, + cp_exclusion: list[str] | None = None, + preemptive_behaviour: bool = True, + explain: bool = False, + language_threshold: float = 0.1, + enable_fallback: bool = True, +) -> CharsetMatches: + """ + Given a raw bytes sequence, return the best possibles charset usable to render str objects. + If there is no results, it is a strong indicator that the source is binary/not text. + By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence. + And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will. + + The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page + but never take it for granted. Can improve the performance. + + You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that + purpose. + + This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32. + By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain' + toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging. + Custom logging format and handler can be set manually. + """ + + if not isinstance(sequences, (bytearray, bytes)): + raise TypeError( + "Expected object of type bytes or bytearray, got: {}".format( + type(sequences) + ) + ) + + if explain: + previous_logger_level: int = logger.level + logger.addHandler(explain_handler) + logger.setLevel(TRACE) + + length: int = len(sequences) + + if length == 0: + logger.debug("Encoding detection on empty bytes, assuming utf_8 intention.") + if explain: # Defensive: ensure exit path clean handler + logger.removeHandler(explain_handler) + logger.setLevel(previous_logger_level or logging.WARNING) + return CharsetMatches([CharsetMatch(sequences, "utf_8", 0.0, False, [], "")]) + + if cp_isolation is not None: + logger.log( + TRACE, + "cp_isolation is set. use this flag for debugging purpose. " + "limited list of encoding allowed : %s.", + ", ".join(cp_isolation), + ) + cp_isolation = [iana_name(cp, False) for cp in cp_isolation] + else: + cp_isolation = [] + + if cp_exclusion is not None: + logger.log( + TRACE, + "cp_exclusion is set. use this flag for debugging purpose. " + "limited list of encoding excluded : %s.", + ", ".join(cp_exclusion), + ) + cp_exclusion = [iana_name(cp, False) for cp in cp_exclusion] + else: + cp_exclusion = [] + + if length <= (chunk_size * steps): + logger.log( + TRACE, + "override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.", + steps, + chunk_size, + length, + ) + steps = 1 + chunk_size = length + + if steps > 1 and length / steps < chunk_size: + chunk_size = int(length / steps) + + is_too_small_sequence: bool = len(sequences) < TOO_SMALL_SEQUENCE + is_too_large_sequence: bool = len(sequences) >= TOO_BIG_SEQUENCE + + if is_too_small_sequence: + logger.log( + TRACE, + "Trying to detect encoding from a tiny portion of ({}) byte(s).".format( + length + ), + ) + elif is_too_large_sequence: + logger.log( + TRACE, + "Using lazy str decoding because the payload is quite large, ({}) byte(s).".format( + length + ), + ) + + prioritized_encodings: list[str] = [] + + specified_encoding: str | None = ( + any_specified_encoding(sequences) if preemptive_behaviour else None + ) + + if specified_encoding is not None: + prioritized_encodings.append(specified_encoding) + logger.log( + TRACE, + "Detected declarative mark in sequence. Priority +1 given for %s.", + specified_encoding, + ) + + tested: set[str] = set() + tested_but_hard_failure: list[str] = [] + tested_but_soft_failure: list[str] = [] + + fallback_ascii: CharsetMatch | None = None + fallback_u8: CharsetMatch | None = None + fallback_specified: CharsetMatch | None = None + + results: CharsetMatches = CharsetMatches() + + early_stop_results: CharsetMatches = CharsetMatches() + + sig_encoding, sig_payload = identify_sig_or_bom(sequences) + + if sig_encoding is not None: + prioritized_encodings.append(sig_encoding) + logger.log( + TRACE, + "Detected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.", + len(sig_payload), + sig_encoding, + ) + + prioritized_encodings.append("ascii") + + if "utf_8" not in prioritized_encodings: + prioritized_encodings.append("utf_8") + + for encoding_iana in prioritized_encodings + IANA_SUPPORTED: + if cp_isolation and encoding_iana not in cp_isolation: + continue + + if cp_exclusion and encoding_iana in cp_exclusion: + continue + + if encoding_iana in tested: + continue + + tested.add(encoding_iana) + + decoded_payload: str | None = None + bom_or_sig_available: bool = sig_encoding == encoding_iana + strip_sig_or_bom: bool = bom_or_sig_available and should_strip_sig_or_bom( + encoding_iana + ) + + if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available: + logger.log( + TRACE, + "Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.", + encoding_iana, + ) + continue + if encoding_iana in {"utf_7"} and not bom_or_sig_available: + logger.log( + TRACE, + "Encoding %s won't be tested as-is because detection is unreliable without BOM/SIG.", + encoding_iana, + ) + continue + + try: + is_multi_byte_decoder: bool = is_multi_byte_encoding(encoding_iana) + except (ModuleNotFoundError, ImportError): + logger.log( + TRACE, + "Encoding %s does not provide an IncrementalDecoder", + encoding_iana, + ) + continue + + try: + if is_too_large_sequence and is_multi_byte_decoder is False: + str( + ( + sequences[: int(50e4)] + if strip_sig_or_bom is False + else sequences[len(sig_payload) : int(50e4)] + ), + encoding=encoding_iana, + ) + else: + decoded_payload = str( + ( + sequences + if strip_sig_or_bom is False + else sequences[len(sig_payload) :] + ), + encoding=encoding_iana, + ) + except (UnicodeDecodeError, LookupError) as e: + if not isinstance(e, LookupError): + logger.log( + TRACE, + "Code page %s does not fit given bytes sequence at ALL. %s", + encoding_iana, + str(e), + ) + tested_but_hard_failure.append(encoding_iana) + continue + + similar_soft_failure_test: bool = False + + for encoding_soft_failed in tested_but_soft_failure: + if is_cp_similar(encoding_iana, encoding_soft_failed): + similar_soft_failure_test = True + break + + if similar_soft_failure_test: + logger.log( + TRACE, + "%s is deemed too similar to code page %s and was consider unsuited already. Continuing!", + encoding_iana, + encoding_soft_failed, + ) + continue + + r_ = range( + 0 if not bom_or_sig_available else len(sig_payload), + length, + int(length / steps), + ) + + multi_byte_bonus: bool = ( + is_multi_byte_decoder + and decoded_payload is not None + and len(decoded_payload) < length + ) + + if multi_byte_bonus: + logger.log( + TRACE, + "Code page %s is a multi byte encoding table and it appear that at least one character " + "was encoded using n-bytes.", + encoding_iana, + ) + + max_chunk_gave_up: int = int(len(r_) / 4) + + max_chunk_gave_up = max(max_chunk_gave_up, 2) + early_stop_count: int = 0 + lazy_str_hard_failure = False + + md_chunks: list[str] = [] + md_ratios = [] + + try: + for chunk in cut_sequence_chunks( + sequences, + encoding_iana, + r_, + chunk_size, + bom_or_sig_available, + strip_sig_or_bom, + sig_payload, + is_multi_byte_decoder, + decoded_payload, + ): + md_chunks.append(chunk) + + md_ratios.append( + mess_ratio( + chunk, + threshold, + explain is True and 1 <= len(cp_isolation) <= 2, + ) + ) + + if md_ratios[-1] >= threshold: + early_stop_count += 1 + + if (early_stop_count >= max_chunk_gave_up) or ( + bom_or_sig_available and strip_sig_or_bom is False + ): + break + except ( + UnicodeDecodeError + ) as e: # Lazy str loading may have missed something there + logger.log( + TRACE, + "LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s", + encoding_iana, + str(e), + ) + early_stop_count = max_chunk_gave_up + lazy_str_hard_failure = True + + # We might want to check the sequence again with the whole content + # Only if initial MD tests passes + if ( + not lazy_str_hard_failure + and is_too_large_sequence + and not is_multi_byte_decoder + ): + try: + sequences[int(50e3) :].decode(encoding_iana, errors="strict") + except UnicodeDecodeError as e: + logger.log( + TRACE, + "LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %s", + encoding_iana, + str(e), + ) + tested_but_hard_failure.append(encoding_iana) + continue + + mean_mess_ratio: float = sum(md_ratios) / len(md_ratios) if md_ratios else 0.0 + if mean_mess_ratio >= threshold or early_stop_count >= max_chunk_gave_up: + tested_but_soft_failure.append(encoding_iana) + logger.log( + TRACE, + "%s was excluded because of initial chaos probing. Gave up %i time(s). " + "Computed mean chaos is %f %%.", + encoding_iana, + early_stop_count, + round(mean_mess_ratio * 100, ndigits=3), + ) + # Preparing those fallbacks in case we got nothing. + if ( + enable_fallback + and encoding_iana in ["ascii", "utf_8", specified_encoding] + and not lazy_str_hard_failure + ): + fallback_entry = CharsetMatch( + sequences, + encoding_iana, + threshold, + False, + [], + decoded_payload, + preemptive_declaration=specified_encoding, + ) + if encoding_iana == specified_encoding: + fallback_specified = fallback_entry + elif encoding_iana == "ascii": + fallback_ascii = fallback_entry + else: + fallback_u8 = fallback_entry + continue + + logger.log( + TRACE, + "%s passed initial chaos probing. Mean measured chaos is %f %%", + encoding_iana, + round(mean_mess_ratio * 100, ndigits=3), + ) + + if not is_multi_byte_decoder: + target_languages: list[str] = encoding_languages(encoding_iana) + else: + target_languages = mb_encoding_languages(encoding_iana) + + if target_languages: + logger.log( + TRACE, + "{} should target any language(s) of {}".format( + encoding_iana, str(target_languages) + ), + ) + + cd_ratios = [] + + # We shall skip the CD when its about ASCII + # Most of the time its not relevant to run "language-detection" on it. + if encoding_iana != "ascii": + for chunk in md_chunks: + chunk_languages = coherence_ratio( + chunk, + language_threshold, + ",".join(target_languages) if target_languages else None, + ) + + cd_ratios.append(chunk_languages) + + cd_ratios_merged = merge_coherence_ratios(cd_ratios) + + if cd_ratios_merged: + logger.log( + TRACE, + "We detected language {} using {}".format( + cd_ratios_merged, encoding_iana + ), + ) + + current_match = CharsetMatch( + sequences, + encoding_iana, + mean_mess_ratio, + bom_or_sig_available, + cd_ratios_merged, + ( + decoded_payload + if ( + is_too_large_sequence is False + or encoding_iana in [specified_encoding, "ascii", "utf_8"] + ) + else None + ), + preemptive_declaration=specified_encoding, + ) + + results.append(current_match) + + if ( + encoding_iana in [specified_encoding, "ascii", "utf_8"] + and mean_mess_ratio < 0.1 + ): + # If md says nothing to worry about, then... stop immediately! + if mean_mess_ratio == 0.0: + logger.debug( + "Encoding detection: %s is most likely the one.", + current_match.encoding, + ) + if explain: # Defensive: ensure exit path clean handler + logger.removeHandler(explain_handler) + logger.setLevel(previous_logger_level) + return CharsetMatches([current_match]) + + early_stop_results.append(current_match) + + if ( + len(early_stop_results) + and (specified_encoding is None or specified_encoding in tested) + and "ascii" in tested + and "utf_8" in tested + ): + probable_result: CharsetMatch = early_stop_results.best() # type: ignore[assignment] + logger.debug( + "Encoding detection: %s is most likely the one.", + probable_result.encoding, + ) + if explain: # Defensive: ensure exit path clean handler + logger.removeHandler(explain_handler) + logger.setLevel(previous_logger_level) + + return CharsetMatches([probable_result]) + + if encoding_iana == sig_encoding: + logger.debug( + "Encoding detection: %s is most likely the one as we detected a BOM or SIG within " + "the beginning of the sequence.", + encoding_iana, + ) + if explain: # Defensive: ensure exit path clean handler + logger.removeHandler(explain_handler) + logger.setLevel(previous_logger_level) + return CharsetMatches([results[encoding_iana]]) + + if len(results) == 0: + if fallback_u8 or fallback_ascii or fallback_specified: + logger.log( + TRACE, + "Nothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.", + ) + + if fallback_specified: + logger.debug( + "Encoding detection: %s will be used as a fallback match", + fallback_specified.encoding, + ) + results.append(fallback_specified) + elif ( + (fallback_u8 and fallback_ascii is None) + or ( + fallback_u8 + and fallback_ascii + and fallback_u8.fingerprint != fallback_ascii.fingerprint + ) + or (fallback_u8 is not None) + ): + logger.debug("Encoding detection: utf_8 will be used as a fallback match") + results.append(fallback_u8) + elif fallback_ascii: + logger.debug("Encoding detection: ascii will be used as a fallback match") + results.append(fallback_ascii) + + if results: + logger.debug( + "Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.", + results.best().encoding, # type: ignore + len(results) - 1, + ) + else: + logger.debug("Encoding detection: Unable to determine any suitable charset.") + + if explain: + logger.removeHandler(explain_handler) + logger.setLevel(previous_logger_level) + + return results + + +def from_fp( + fp: BinaryIO, + steps: int = 5, + chunk_size: int = 512, + threshold: float = 0.20, + cp_isolation: list[str] | None = None, + cp_exclusion: list[str] | None = None, + preemptive_behaviour: bool = True, + explain: bool = False, + language_threshold: float = 0.1, + enable_fallback: bool = True, +) -> CharsetMatches: + """ + Same thing than the function from_bytes but using a file pointer that is already ready. + Will not close the file pointer. + """ + return from_bytes( + fp.read(), + steps, + chunk_size, + threshold, + cp_isolation, + cp_exclusion, + preemptive_behaviour, + explain, + language_threshold, + enable_fallback, + ) + + +def from_path( + path: str | bytes | PathLike, # type: ignore[type-arg] + steps: int = 5, + chunk_size: int = 512, + threshold: float = 0.20, + cp_isolation: list[str] | None = None, + cp_exclusion: list[str] | None = None, + preemptive_behaviour: bool = True, + explain: bool = False, + language_threshold: float = 0.1, + enable_fallback: bool = True, +) -> CharsetMatches: + """ + Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode. + Can raise IOError. + """ + with open(path, "rb") as fp: + return from_fp( + fp, + steps, + chunk_size, + threshold, + cp_isolation, + cp_exclusion, + preemptive_behaviour, + explain, + language_threshold, + enable_fallback, + ) + + +def is_binary( + fp_or_path_or_payload: PathLike | str | BinaryIO | bytes, # type: ignore[type-arg] + steps: int = 5, + chunk_size: int = 512, + threshold: float = 0.20, + cp_isolation: list[str] | None = None, + cp_exclusion: list[str] | None = None, + preemptive_behaviour: bool = True, + explain: bool = False, + language_threshold: float = 0.1, + enable_fallback: bool = False, +) -> bool: + """ + Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string. + Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match + are disabled to be stricter around ASCII-compatible but unlikely to be a string. + """ + if isinstance(fp_or_path_or_payload, (str, PathLike)): + guesses = from_path( + fp_or_path_or_payload, + steps=steps, + chunk_size=chunk_size, + threshold=threshold, + cp_isolation=cp_isolation, + cp_exclusion=cp_exclusion, + preemptive_behaviour=preemptive_behaviour, + explain=explain, + language_threshold=language_threshold, + enable_fallback=enable_fallback, + ) + elif isinstance( + fp_or_path_or_payload, + ( + bytes, + bytearray, + ), + ): + guesses = from_bytes( + fp_or_path_or_payload, + steps=steps, + chunk_size=chunk_size, + threshold=threshold, + cp_isolation=cp_isolation, + cp_exclusion=cp_exclusion, + preemptive_behaviour=preemptive_behaviour, + explain=explain, + language_threshold=language_threshold, + enable_fallback=enable_fallback, + ) + else: + guesses = from_fp( + fp_or_path_or_payload, + steps=steps, + chunk_size=chunk_size, + threshold=threshold, + cp_isolation=cp_isolation, + cp_exclusion=cp_exclusion, + preemptive_behaviour=preemptive_behaviour, + explain=explain, + language_threshold=language_threshold, + enable_fallback=enable_fallback, + ) + + return not guesses diff --git a/meow/lib/python3.13/site-packages/charset_normalizer/legacy.py b/meow/lib/python3.13/site-packages/charset_normalizer/legacy.py new file mode 100644 index 0000000000000000000000000000000000000000..a2f534514120df5ed1170bc298f81250405655db --- /dev/null +++ b/meow/lib/python3.13/site-packages/charset_normalizer/legacy.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any +from warnings import warn + +from .api import from_bytes +from .constant import CHARDET_CORRESPONDENCE + +# TODO: remove this check when dropping Python 3.7 support +if TYPE_CHECKING: + from typing_extensions import TypedDict + + class ResultDict(TypedDict): + encoding: str | None + language: str + confidence: float | None + + +def detect( + byte_str: bytes, should_rename_legacy: bool = False, **kwargs: Any +) -> ResultDict: + """ + chardet legacy method + Detect the encoding of the given byte string. It should be mostly backward-compatible. + Encoding name will match Chardet own writing whenever possible. (Not on encoding name unsupported by it) + This function is deprecated and should be used to migrate your project easily, consult the documentation for + further information. Not planned for removal. + + :param byte_str: The byte sequence to examine. + :param should_rename_legacy: Should we rename legacy encodings + to their more modern equivalents? + """ + if len(kwargs): + warn( + f"charset-normalizer disregard arguments '{','.join(list(kwargs.keys()))}' in legacy function detect()" + ) + + if not isinstance(byte_str, (bytearray, bytes)): + raise TypeError( # pragma: nocover + "Expected object of type bytes or bytearray, got: " "{}".format( + type(byte_str) + ) + ) + + if isinstance(byte_str, bytearray): + byte_str = bytes(byte_str) + + r = from_bytes(byte_str).best() + + encoding = r.encoding if r is not None else None + language = r.language if r is not None and r.language != "Unknown" else "" + confidence = 1.0 - r.chaos if r is not None else None + + # Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get stripped in the detection/normalization process + # but chardet does return 'utf-8-sig' and it is a valid codec name. + if r is not None and encoding == "utf_8" and r.bom: + encoding += "_sig" + + if should_rename_legacy is False and encoding in CHARDET_CORRESPONDENCE: + encoding = CHARDET_CORRESPONDENCE[encoding] + + return { + "encoding": encoding, + "language": language, + "confidence": confidence, + } diff --git a/meow/lib/python3.13/site-packages/charset_normalizer/md.py b/meow/lib/python3.13/site-packages/charset_normalizer/md.py new file mode 100644 index 0000000000000000000000000000000000000000..9ed59a868dafeebdf07132d79f48fe28e202ec0a --- /dev/null +++ b/meow/lib/python3.13/site-packages/charset_normalizer/md.py @@ -0,0 +1,630 @@ +from __future__ import annotations + +from functools import lru_cache +from logging import getLogger + +from .constant import ( + COMMON_SAFE_ASCII_CHARACTERS, + TRACE, + UNICODE_SECONDARY_RANGE_KEYWORD, +) +from .utils import ( + is_accentuated, + is_arabic, + is_arabic_isolated_form, + is_case_variable, + is_cjk, + is_emoticon, + is_hangul, + is_hiragana, + is_katakana, + is_latin, + is_punctuation, + is_separator, + is_symbol, + is_thai, + is_unprintable, + remove_accent, + unicode_range, +) + + +class MessDetectorPlugin: + """ + Base abstract class used for mess detection plugins. + All detectors MUST extend and implement given methods. + """ + + def eligible(self, character: str) -> bool: + """ + Determine if given character should be fed in. + """ + raise NotImplementedError # pragma: nocover + + def feed(self, character: str) -> None: + """ + The main routine to be executed upon character. + Insert the logic in witch the text would be considered chaotic. + """ + raise NotImplementedError # pragma: nocover + + def reset(self) -> None: # pragma: no cover + """ + Permit to reset the plugin to the initial state. + """ + raise NotImplementedError + + @property + def ratio(self) -> float: + """ + Compute the chaos ratio based on what your feed() has seen. + Must NOT be lower than 0.; No restriction gt 0. + """ + raise NotImplementedError # pragma: nocover + + +class TooManySymbolOrPunctuationPlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._punctuation_count: int = 0 + self._symbol_count: int = 0 + self._character_count: int = 0 + + self._last_printable_char: str | None = None + self._frenzy_symbol_in_word: bool = False + + def eligible(self, character: str) -> bool: + return character.isprintable() + + def feed(self, character: str) -> None: + self._character_count += 1 + + if ( + character != self._last_printable_char + and character not in COMMON_SAFE_ASCII_CHARACTERS + ): + if is_punctuation(character): + self._punctuation_count += 1 + elif ( + character.isdigit() is False + and is_symbol(character) + and is_emoticon(character) is False + ): + self._symbol_count += 2 + + self._last_printable_char = character + + def reset(self) -> None: # Abstract + self._punctuation_count = 0 + self._character_count = 0 + self._symbol_count = 0 + + @property + def ratio(self) -> float: + if self._character_count == 0: + return 0.0 + + ratio_of_punctuation: float = ( + self._punctuation_count + self._symbol_count + ) / self._character_count + + return ratio_of_punctuation if ratio_of_punctuation >= 0.3 else 0.0 + + +class TooManyAccentuatedPlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._character_count: int = 0 + self._accentuated_count: int = 0 + + def eligible(self, character: str) -> bool: + return character.isalpha() + + def feed(self, character: str) -> None: + self._character_count += 1 + + if is_accentuated(character): + self._accentuated_count += 1 + + def reset(self) -> None: # Abstract + self._character_count = 0 + self._accentuated_count = 0 + + @property + def ratio(self) -> float: + if self._character_count < 8: + return 0.0 + + ratio_of_accentuation: float = self._accentuated_count / self._character_count + return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0 + + +class UnprintablePlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._unprintable_count: int = 0 + self._character_count: int = 0 + + def eligible(self, character: str) -> bool: + return True + + def feed(self, character: str) -> None: + if is_unprintable(character): + self._unprintable_count += 1 + self._character_count += 1 + + def reset(self) -> None: # Abstract + self._unprintable_count = 0 + + @property + def ratio(self) -> float: + if self._character_count == 0: + return 0.0 + + return (self._unprintable_count * 8) / self._character_count + + +class SuspiciousDuplicateAccentPlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._successive_count: int = 0 + self._character_count: int = 0 + + self._last_latin_character: str | None = None + + def eligible(self, character: str) -> bool: + return character.isalpha() and is_latin(character) + + def feed(self, character: str) -> None: + self._character_count += 1 + if ( + self._last_latin_character is not None + and is_accentuated(character) + and is_accentuated(self._last_latin_character) + ): + if character.isupper() and self._last_latin_character.isupper(): + self._successive_count += 1 + # Worse if its the same char duplicated with different accent. + if remove_accent(character) == remove_accent(self._last_latin_character): + self._successive_count += 1 + self._last_latin_character = character + + def reset(self) -> None: # Abstract + self._successive_count = 0 + self._character_count = 0 + self._last_latin_character = None + + @property + def ratio(self) -> float: + if self._character_count == 0: + return 0.0 + + return (self._successive_count * 2) / self._character_count + + +class SuspiciousRange(MessDetectorPlugin): + def __init__(self) -> None: + self._suspicious_successive_range_count: int = 0 + self._character_count: int = 0 + self._last_printable_seen: str | None = None + + def eligible(self, character: str) -> bool: + return character.isprintable() + + def feed(self, character: str) -> None: + self._character_count += 1 + + if ( + character.isspace() + or is_punctuation(character) + or character in COMMON_SAFE_ASCII_CHARACTERS + ): + self._last_printable_seen = None + return + + if self._last_printable_seen is None: + self._last_printable_seen = character + return + + unicode_range_a: str | None = unicode_range(self._last_printable_seen) + unicode_range_b: str | None = unicode_range(character) + + if is_suspiciously_successive_range(unicode_range_a, unicode_range_b): + self._suspicious_successive_range_count += 1 + + self._last_printable_seen = character + + def reset(self) -> None: # Abstract + self._character_count = 0 + self._suspicious_successive_range_count = 0 + self._last_printable_seen = None + + @property + def ratio(self) -> float: + if self._character_count <= 13: + return 0.0 + + ratio_of_suspicious_range_usage: float = ( + self._suspicious_successive_range_count * 2 + ) / self._character_count + + return ratio_of_suspicious_range_usage + + +class SuperWeirdWordPlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._word_count: int = 0 + self._bad_word_count: int = 0 + self._foreign_long_count: int = 0 + + self._is_current_word_bad: bool = False + self._foreign_long_watch: bool = False + + self._character_count: int = 0 + self._bad_character_count: int = 0 + + self._buffer: str = "" + self._buffer_accent_count: int = 0 + self._buffer_glyph_count: int = 0 + + def eligible(self, character: str) -> bool: + return True + + def feed(self, character: str) -> None: + if character.isalpha(): + self._buffer += character + if is_accentuated(character): + self._buffer_accent_count += 1 + if ( + self._foreign_long_watch is False + and (is_latin(character) is False or is_accentuated(character)) + and is_cjk(character) is False + and is_hangul(character) is False + and is_katakana(character) is False + and is_hiragana(character) is False + and is_thai(character) is False + ): + self._foreign_long_watch = True + if ( + is_cjk(character) + or is_hangul(character) + or is_katakana(character) + or is_hiragana(character) + or is_thai(character) + ): + self._buffer_glyph_count += 1 + return + if not self._buffer: + return + if ( + character.isspace() or is_punctuation(character) or is_separator(character) + ) and self._buffer: + self._word_count += 1 + buffer_length: int = len(self._buffer) + + self._character_count += buffer_length + + if buffer_length >= 4: + if self._buffer_accent_count / buffer_length >= 0.5: + self._is_current_word_bad = True + # Word/Buffer ending with an upper case accentuated letter are so rare, + # that we will consider them all as suspicious. Same weight as foreign_long suspicious. + elif ( + is_accentuated(self._buffer[-1]) + and self._buffer[-1].isupper() + and all(_.isupper() for _ in self._buffer) is False + ): + self._foreign_long_count += 1 + self._is_current_word_bad = True + elif self._buffer_glyph_count == 1: + self._is_current_word_bad = True + self._foreign_long_count += 1 + if buffer_length >= 24 and self._foreign_long_watch: + camel_case_dst = [ + i + for c, i in zip(self._buffer, range(0, buffer_length)) + if c.isupper() + ] + probable_camel_cased: bool = False + + if camel_case_dst and (len(camel_case_dst) / buffer_length <= 0.3): + probable_camel_cased = True + + if not probable_camel_cased: + self._foreign_long_count += 1 + self._is_current_word_bad = True + + if self._is_current_word_bad: + self._bad_word_count += 1 + self._bad_character_count += len(self._buffer) + self._is_current_word_bad = False + + self._foreign_long_watch = False + self._buffer = "" + self._buffer_accent_count = 0 + self._buffer_glyph_count = 0 + elif ( + character not in {"<", ">", "-", "=", "~", "|", "_"} + and character.isdigit() is False + and is_symbol(character) + ): + self._is_current_word_bad = True + self._buffer += character + + def reset(self) -> None: # Abstract + self._buffer = "" + self._is_current_word_bad = False + self._foreign_long_watch = False + self._bad_word_count = 0 + self._word_count = 0 + self._character_count = 0 + self._bad_character_count = 0 + self._foreign_long_count = 0 + + @property + def ratio(self) -> float: + if self._word_count <= 10 and self._foreign_long_count == 0: + return 0.0 + + return self._bad_character_count / self._character_count + + +class CjkInvalidStopPlugin(MessDetectorPlugin): + """ + GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and + can be easily detected. Searching for the overuse of '丅' and '丄'. + """ + + def __init__(self) -> None: + self._wrong_stop_count: int = 0 + self._cjk_character_count: int = 0 + + def eligible(self, character: str) -> bool: + return True + + def feed(self, character: str) -> None: + if character in {"丅", "丄"}: + self._wrong_stop_count += 1 + return + if is_cjk(character): + self._cjk_character_count += 1 + + def reset(self) -> None: # Abstract + self._wrong_stop_count = 0 + self._cjk_character_count = 0 + + @property + def ratio(self) -> float: + if self._cjk_character_count < 16: + return 0.0 + return self._wrong_stop_count / self._cjk_character_count + + +class ArchaicUpperLowerPlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._buf: bool = False + + self._character_count_since_last_sep: int = 0 + + self._successive_upper_lower_count: int = 0 + self._successive_upper_lower_count_final: int = 0 + + self._character_count: int = 0 + + self._last_alpha_seen: str | None = None + self._current_ascii_only: bool = True + + def eligible(self, character: str) -> bool: + return True + + def feed(self, character: str) -> None: + is_concerned = character.isalpha() and is_case_variable(character) + chunk_sep = is_concerned is False + + if chunk_sep and self._character_count_since_last_sep > 0: + if ( + self._character_count_since_last_sep <= 64 + and character.isdigit() is False + and self._current_ascii_only is False + ): + self._successive_upper_lower_count_final += ( + self._successive_upper_lower_count + ) + + self._successive_upper_lower_count = 0 + self._character_count_since_last_sep = 0 + self._last_alpha_seen = None + self._buf = False + self._character_count += 1 + self._current_ascii_only = True + + return + + if self._current_ascii_only is True and character.isascii() is False: + self._current_ascii_only = False + + if self._last_alpha_seen is not None: + if (character.isupper() and self._last_alpha_seen.islower()) or ( + character.islower() and self._last_alpha_seen.isupper() + ): + if self._buf is True: + self._successive_upper_lower_count += 2 + self._buf = False + else: + self._buf = True + else: + self._buf = False + + self._character_count += 1 + self._character_count_since_last_sep += 1 + self._last_alpha_seen = character + + def reset(self) -> None: # Abstract + self._character_count = 0 + self._character_count_since_last_sep = 0 + self._successive_upper_lower_count = 0 + self._successive_upper_lower_count_final = 0 + self._last_alpha_seen = None + self._buf = False + self._current_ascii_only = True + + @property + def ratio(self) -> float: + if self._character_count == 0: + return 0.0 + + return self._successive_upper_lower_count_final / self._character_count + + +class ArabicIsolatedFormPlugin(MessDetectorPlugin): + def __init__(self) -> None: + self._character_count: int = 0 + self._isolated_form_count: int = 0 + + def reset(self) -> None: # Abstract + self._character_count = 0 + self._isolated_form_count = 0 + + def eligible(self, character: str) -> bool: + return is_arabic(character) + + def feed(self, character: str) -> None: + self._character_count += 1 + + if is_arabic_isolated_form(character): + self._isolated_form_count += 1 + + @property + def ratio(self) -> float: + if self._character_count < 8: + return 0.0 + + isolated_form_usage: float = self._isolated_form_count / self._character_count + + return isolated_form_usage + + +@lru_cache(maxsize=1024) +def is_suspiciously_successive_range( + unicode_range_a: str | None, unicode_range_b: str | None +) -> bool: + """ + Determine if two Unicode range seen next to each other can be considered as suspicious. + """ + if unicode_range_a is None or unicode_range_b is None: + return True + + if unicode_range_a == unicode_range_b: + return False + + if "Latin" in unicode_range_a and "Latin" in unicode_range_b: + return False + + if "Emoticons" in unicode_range_a or "Emoticons" in unicode_range_b: + return False + + # Latin characters can be accompanied with a combining diacritical mark + # eg. Vietnamese. + if ("Latin" in unicode_range_a or "Latin" in unicode_range_b) and ( + "Combining" in unicode_range_a or "Combining" in unicode_range_b + ): + return False + + keywords_range_a, keywords_range_b = ( + unicode_range_a.split(" "), + unicode_range_b.split(" "), + ) + + for el in keywords_range_a: + if el in UNICODE_SECONDARY_RANGE_KEYWORD: + continue + if el in keywords_range_b: + return False + + # Japanese Exception + range_a_jp_chars, range_b_jp_chars = ( + unicode_range_a + in ( + "Hiragana", + "Katakana", + ), + unicode_range_b in ("Hiragana", "Katakana"), + ) + if (range_a_jp_chars or range_b_jp_chars) and ( + "CJK" in unicode_range_a or "CJK" in unicode_range_b + ): + return False + if range_a_jp_chars and range_b_jp_chars: + return False + + if "Hangul" in unicode_range_a or "Hangul" in unicode_range_b: + if "CJK" in unicode_range_a or "CJK" in unicode_range_b: + return False + if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin": + return False + + # Chinese/Japanese use dedicated range for punctuation and/or separators. + if ("CJK" in unicode_range_a or "CJK" in unicode_range_b) or ( + unicode_range_a in ["Katakana", "Hiragana"] + and unicode_range_b in ["Katakana", "Hiragana"] + ): + if "Punctuation" in unicode_range_a or "Punctuation" in unicode_range_b: + return False + if "Forms" in unicode_range_a or "Forms" in unicode_range_b: + return False + if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin": + return False + + return True + + +@lru_cache(maxsize=2048) +def mess_ratio( + decoded_sequence: str, maximum_threshold: float = 0.2, debug: bool = False +) -> float: + """ + Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier. + """ + + detectors: list[MessDetectorPlugin] = [ + md_class() for md_class in MessDetectorPlugin.__subclasses__() + ] + + length: int = len(decoded_sequence) + 1 + + mean_mess_ratio: float = 0.0 + + if length < 512: + intermediary_mean_mess_ratio_calc: int = 32 + elif length <= 1024: + intermediary_mean_mess_ratio_calc = 64 + else: + intermediary_mean_mess_ratio_calc = 128 + + for character, index in zip(decoded_sequence + "\n", range(length)): + for detector in detectors: + if detector.eligible(character): + detector.feed(character) + + if ( + index > 0 and index % intermediary_mean_mess_ratio_calc == 0 + ) or index == length - 1: + mean_mess_ratio = sum(dt.ratio for dt in detectors) + + if mean_mess_ratio >= maximum_threshold: + break + + if debug: + logger = getLogger("charset_normalizer") + + logger.log( + TRACE, + "Mess-detector extended-analysis start. " + f"intermediary_mean_mess_ratio_calc={intermediary_mean_mess_ratio_calc} mean_mess_ratio={mean_mess_ratio} " + f"maximum_threshold={maximum_threshold}", + ) + + if len(decoded_sequence) > 16: + logger.log(TRACE, f"Starting with: {decoded_sequence[:16]}") + logger.log(TRACE, f"Ending with: {decoded_sequence[-16::]}") + + for dt in detectors: + logger.log(TRACE, f"{dt.__class__}: {dt.ratio}") + + return round(mean_mess_ratio, 3) diff --git a/meow/lib/python3.13/site-packages/charset_normalizer/models.py b/meow/lib/python3.13/site-packages/charset_normalizer/models.py new file mode 100644 index 0000000000000000000000000000000000000000..1042758f873d2a54f7078c5411b17ffc11dca4ee --- /dev/null +++ b/meow/lib/python3.13/site-packages/charset_normalizer/models.py @@ -0,0 +1,360 @@ +from __future__ import annotations + +from encodings.aliases import aliases +from hashlib import sha256 +from json import dumps +from re import sub +from typing import Any, Iterator, List, Tuple + +from .constant import RE_POSSIBLE_ENCODING_INDICATION, TOO_BIG_SEQUENCE +from .utils import iana_name, is_multi_byte_encoding, unicode_range + + +class CharsetMatch: + def __init__( + self, + payload: bytes, + guessed_encoding: str, + mean_mess_ratio: float, + has_sig_or_bom: bool, + languages: CoherenceMatches, + decoded_payload: str | None = None, + preemptive_declaration: str | None = None, + ): + self._payload: bytes = payload + + self._encoding: str = guessed_encoding + self._mean_mess_ratio: float = mean_mess_ratio + self._languages: CoherenceMatches = languages + self._has_sig_or_bom: bool = has_sig_or_bom + self._unicode_ranges: list[str] | None = None + + self._leaves: list[CharsetMatch] = [] + self._mean_coherence_ratio: float = 0.0 + + self._output_payload: bytes | None = None + self._output_encoding: str | None = None + + self._string: str | None = decoded_payload + + self._preemptive_declaration: str | None = preemptive_declaration + + def __eq__(self, other: object) -> bool: + if not isinstance(other, CharsetMatch): + if isinstance(other, str): + return iana_name(other) == self.encoding + return False + return self.encoding == other.encoding and self.fingerprint == other.fingerprint + + def __lt__(self, other: object) -> bool: + """ + Implemented to make sorted available upon CharsetMatches items. + """ + if not isinstance(other, CharsetMatch): + raise ValueError + + chaos_difference: float = abs(self.chaos - other.chaos) + coherence_difference: float = abs(self.coherence - other.coherence) + + # Below 1% difference --> Use Coherence + if chaos_difference < 0.01 and coherence_difference > 0.02: + return self.coherence > other.coherence + elif chaos_difference < 0.01 and coherence_difference <= 0.02: + # When having a difficult decision, use the result that decoded as many multi-byte as possible. + # preserve RAM usage! + if len(self._payload) >= TOO_BIG_SEQUENCE: + return self.chaos < other.chaos + return self.multi_byte_usage > other.multi_byte_usage + + return self.chaos < other.chaos + + @property + def multi_byte_usage(self) -> float: + return 1.0 - (len(str(self)) / len(self.raw)) + + def __str__(self) -> str: + # Lazy Str Loading + if self._string is None: + self._string = str(self._payload, self._encoding, "strict") + return self._string + + def __repr__(self) -> str: + return f"" + + def add_submatch(self, other: CharsetMatch) -> None: + if not isinstance(other, CharsetMatch) or other == self: + raise ValueError( + "Unable to add instance <{}> as a submatch of a CharsetMatch".format( + other.__class__ + ) + ) + + other._string = None # Unload RAM usage; dirty trick. + self._leaves.append(other) + + @property + def encoding(self) -> str: + return self._encoding + + @property + def encoding_aliases(self) -> list[str]: + """ + Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855. + """ + also_known_as: list[str] = [] + for u, p in aliases.items(): + if self.encoding == u: + also_known_as.append(p) + elif self.encoding == p: + also_known_as.append(u) + return also_known_as + + @property + def bom(self) -> bool: + return self._has_sig_or_bom + + @property + def byte_order_mark(self) -> bool: + return self._has_sig_or_bom + + @property + def languages(self) -> list[str]: + """ + Return the complete list of possible languages found in decoded sequence. + Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'. + """ + return [e[0] for e in self._languages] + + @property + def language(self) -> str: + """ + Most probable language found in decoded sequence. If none were detected or inferred, the property will return + "Unknown". + """ + if not self._languages: + # Trying to infer the language based on the given encoding + # Its either English or we should not pronounce ourselves in certain cases. + if "ascii" in self.could_be_from_charset: + return "English" + + # doing it there to avoid circular import + from charset_normalizer.cd import encoding_languages, mb_encoding_languages + + languages = ( + mb_encoding_languages(self.encoding) + if is_multi_byte_encoding(self.encoding) + else encoding_languages(self.encoding) + ) + + if len(languages) == 0 or "Latin Based" in languages: + return "Unknown" + + return languages[0] + + return self._languages[0][0] + + @property + def chaos(self) -> float: + return self._mean_mess_ratio + + @property + def coherence(self) -> float: + if not self._languages: + return 0.0 + return self._languages[0][1] + + @property + def percent_chaos(self) -> float: + return round(self.chaos * 100, ndigits=3) + + @property + def percent_coherence(self) -> float: + return round(self.coherence * 100, ndigits=3) + + @property + def raw(self) -> bytes: + """ + Original untouched bytes. + """ + return self._payload + + @property + def submatch(self) -> list[CharsetMatch]: + return self._leaves + + @property + def has_submatch(self) -> bool: + return len(self._leaves) > 0 + + @property + def alphabets(self) -> list[str]: + if self._unicode_ranges is not None: + return self._unicode_ranges + # list detected ranges + detected_ranges: list[str | None] = [unicode_range(char) for char in str(self)] + # filter and sort + self._unicode_ranges = sorted(list({r for r in detected_ranges if r})) + return self._unicode_ranges + + @property + def could_be_from_charset(self) -> list[str]: + """ + The complete list of encoding that output the exact SAME str result and therefore could be the originating + encoding. + This list does include the encoding available in property 'encoding'. + """ + return [self._encoding] + [m.encoding for m in self._leaves] + + def output(self, encoding: str = "utf_8") -> bytes: + """ + Method to get re-encoded bytes payload using given target encoding. Default to UTF-8. + Any errors will be simply ignored by the encoder NOT replaced. + """ + if self._output_encoding is None or self._output_encoding != encoding: + self._output_encoding = encoding + decoded_string = str(self) + if ( + self._preemptive_declaration is not None + and self._preemptive_declaration.lower() + not in ["utf-8", "utf8", "utf_8"] + ): + patched_header = sub( + RE_POSSIBLE_ENCODING_INDICATION, + lambda m: m.string[m.span()[0] : m.span()[1]].replace( + m.groups()[0], + iana_name(self._output_encoding).replace("_", "-"), # type: ignore[arg-type] + ), + decoded_string[:8192], + count=1, + ) + + decoded_string = patched_header + decoded_string[8192:] + + self._output_payload = decoded_string.encode(encoding, "replace") + + return self._output_payload # type: ignore + + @property + def fingerprint(self) -> str: + """ + Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one. + """ + return sha256(self.output()).hexdigest() + + +class CharsetMatches: + """ + Container with every CharsetMatch items ordered by default from most probable to the less one. + Act like a list(iterable) but does not implements all related methods. + """ + + def __init__(self, results: list[CharsetMatch] | None = None): + self._results: list[CharsetMatch] = sorted(results) if results else [] + + def __iter__(self) -> Iterator[CharsetMatch]: + yield from self._results + + def __getitem__(self, item: int | str) -> CharsetMatch: + """ + Retrieve a single item either by its position or encoding name (alias may be used here). + Raise KeyError upon invalid index or encoding not present in results. + """ + if isinstance(item, int): + return self._results[item] + if isinstance(item, str): + item = iana_name(item, False) + for result in self._results: + if item in result.could_be_from_charset: + return result + raise KeyError + + def __len__(self) -> int: + return len(self._results) + + def __bool__(self) -> bool: + return len(self._results) > 0 + + def append(self, item: CharsetMatch) -> None: + """ + Insert a single match. Will be inserted accordingly to preserve sort. + Can be inserted as a submatch. + """ + if not isinstance(item, CharsetMatch): + raise ValueError( + "Cannot append instance '{}' to CharsetMatches".format( + str(item.__class__) + ) + ) + # We should disable the submatch factoring when the input file is too heavy (conserve RAM usage) + if len(item.raw) < TOO_BIG_SEQUENCE: + for match in self._results: + if match.fingerprint == item.fingerprint and match.chaos == item.chaos: + match.add_submatch(item) + return + self._results.append(item) + self._results = sorted(self._results) + + def best(self) -> CharsetMatch | None: + """ + Simply return the first match. Strict equivalent to matches[0]. + """ + if not self._results: + return None + return self._results[0] + + def first(self) -> CharsetMatch | None: + """ + Redundant method, call the method best(). Kept for BC reasons. + """ + return self.best() + + +CoherenceMatch = Tuple[str, float] +CoherenceMatches = List[CoherenceMatch] + + +class CliDetectionResult: + def __init__( + self, + path: str, + encoding: str | None, + encoding_aliases: list[str], + alternative_encodings: list[str], + language: str, + alphabets: list[str], + has_sig_or_bom: bool, + chaos: float, + coherence: float, + unicode_path: str | None, + is_preferred: bool, + ): + self.path: str = path + self.unicode_path: str | None = unicode_path + self.encoding: str | None = encoding + self.encoding_aliases: list[str] = encoding_aliases + self.alternative_encodings: list[str] = alternative_encodings + self.language: str = language + self.alphabets: list[str] = alphabets + self.has_sig_or_bom: bool = has_sig_or_bom + self.chaos: float = chaos + self.coherence: float = coherence + self.is_preferred: bool = is_preferred + + @property + def __dict__(self) -> dict[str, Any]: # type: ignore + return { + "path": self.path, + "encoding": self.encoding, + "encoding_aliases": self.encoding_aliases, + "alternative_encodings": self.alternative_encodings, + "language": self.language, + "alphabets": self.alphabets, + "has_sig_or_bom": self.has_sig_or_bom, + "chaos": self.chaos, + "coherence": self.coherence, + "unicode_path": self.unicode_path, + "is_preferred": self.is_preferred, + } + + def to_json(self) -> str: + return dumps(self.__dict__, ensure_ascii=True, indent=4) diff --git a/meow/lib/python3.13/site-packages/charset_normalizer/utils.py b/meow/lib/python3.13/site-packages/charset_normalizer/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0175e0a96aa70652d357525649d0404e7e02fc00 --- /dev/null +++ b/meow/lib/python3.13/site-packages/charset_normalizer/utils.py @@ -0,0 +1,408 @@ +from __future__ import annotations + +import importlib +import logging +import unicodedata +from codecs import IncrementalDecoder +from encodings.aliases import aliases +from functools import lru_cache +from re import findall +from typing import Generator + +from _multibytecodec import ( # type: ignore[import-not-found,import] + MultibyteIncrementalDecoder, +) + +from .constant import ( + ENCODING_MARKS, + IANA_SUPPORTED_SIMILAR, + RE_POSSIBLE_ENCODING_INDICATION, + UNICODE_RANGES_COMBINED, + UNICODE_SECONDARY_RANGE_KEYWORD, + UTF8_MAXIMAL_ALLOCATION, +) + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_accentuated(character: str) -> bool: + try: + description: str = unicodedata.name(character) + except ValueError: # Defensive: unicode database outdated? + return False + return ( + "WITH GRAVE" in description + or "WITH ACUTE" in description + or "WITH CEDILLA" in description + or "WITH DIAERESIS" in description + or "WITH CIRCUMFLEX" in description + or "WITH TILDE" in description + or "WITH MACRON" in description + or "WITH RING ABOVE" in description + ) + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def remove_accent(character: str) -> str: + decomposed: str = unicodedata.decomposition(character) + if not decomposed: + return character + + codes: list[str] = decomposed.split(" ") + + return chr(int(codes[0], 16)) + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def unicode_range(character: str) -> str | None: + """ + Retrieve the Unicode range official name from a single character. + """ + character_ord: int = ord(character) + + for range_name, ord_range in UNICODE_RANGES_COMBINED.items(): + if character_ord in ord_range: + return range_name + + return None + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_latin(character: str) -> bool: + try: + description: str = unicodedata.name(character) + except ValueError: # Defensive: unicode database outdated? + return False + return "LATIN" in description + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_punctuation(character: str) -> bool: + character_category: str = unicodedata.category(character) + + if "P" in character_category: + return True + + character_range: str | None = unicode_range(character) + + if character_range is None: + return False + + return "Punctuation" in character_range + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_symbol(character: str) -> bool: + character_category: str = unicodedata.category(character) + + if "S" in character_category or "N" in character_category: + return True + + character_range: str | None = unicode_range(character) + + if character_range is None: + return False + + return "Forms" in character_range and character_category != "Lo" + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_emoticon(character: str) -> bool: + character_range: str | None = unicode_range(character) + + if character_range is None: + return False + + return "Emoticons" in character_range or "Pictographs" in character_range + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_separator(character: str) -> bool: + if character.isspace() or character in {"|", "+", "<", ">"}: + return True + + character_category: str = unicodedata.category(character) + + return "Z" in character_category or character_category in {"Po", "Pd", "Pc"} + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_case_variable(character: str) -> bool: + return character.islower() != character.isupper() + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_cjk(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: # Defensive: unicode database outdated? + return False + + return "CJK" in character_name + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_hiragana(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: # Defensive: unicode database outdated? + return False + + return "HIRAGANA" in character_name + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_katakana(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: # Defensive: unicode database outdated? + return False + + return "KATAKANA" in character_name + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_hangul(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: # Defensive: unicode database outdated? + return False + + return "HANGUL" in character_name + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_thai(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: # Defensive: unicode database outdated? + return False + + return "THAI" in character_name + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_arabic(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: # Defensive: unicode database outdated? + return False + + return "ARABIC" in character_name + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_arabic_isolated_form(character: str) -> bool: + try: + character_name = unicodedata.name(character) + except ValueError: # Defensive: unicode database outdated? + return False + + return "ARABIC" in character_name and "ISOLATED FORM" in character_name + + +@lru_cache(maxsize=len(UNICODE_RANGES_COMBINED)) +def is_unicode_range_secondary(range_name: str) -> bool: + return any(keyword in range_name for keyword in UNICODE_SECONDARY_RANGE_KEYWORD) + + +@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION) +def is_unprintable(character: str) -> bool: + return ( + character.isspace() is False # includes \n \t \r \v + and character.isprintable() is False + and character != "\x1a" # Why? Its the ASCII substitute character. + and character != "\ufeff" # bug discovered in Python, + # Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space. + ) + + +def any_specified_encoding(sequence: bytes, search_zone: int = 8192) -> str | None: + """ + Extract using ASCII-only decoder any specified encoding in the first n-bytes. + """ + if not isinstance(sequence, bytes): + raise TypeError + + seq_len: int = len(sequence) + + results: list[str] = findall( + RE_POSSIBLE_ENCODING_INDICATION, + sequence[: min(seq_len, search_zone)].decode("ascii", errors="ignore"), + ) + + if len(results) == 0: + return None + + for specified_encoding in results: + specified_encoding = specified_encoding.lower().replace("-", "_") + + encoding_alias: str + encoding_iana: str + + for encoding_alias, encoding_iana in aliases.items(): + if encoding_alias == specified_encoding: + return encoding_iana + if encoding_iana == specified_encoding: + return encoding_iana + + return None + + +@lru_cache(maxsize=128) +def is_multi_byte_encoding(name: str) -> bool: + """ + Verify is a specific encoding is a multi byte one based on it IANA name + """ + return name in { + "utf_8", + "utf_8_sig", + "utf_16", + "utf_16_be", + "utf_16_le", + "utf_32", + "utf_32_le", + "utf_32_be", + "utf_7", + } or issubclass( + importlib.import_module(f"encodings.{name}").IncrementalDecoder, + MultibyteIncrementalDecoder, + ) + + +def identify_sig_or_bom(sequence: bytes) -> tuple[str | None, bytes]: + """ + Identify and extract SIG/BOM in given sequence. + """ + + for iana_encoding in ENCODING_MARKS: + marks: bytes | list[bytes] = ENCODING_MARKS[iana_encoding] + + if isinstance(marks, bytes): + marks = [marks] + + for mark in marks: + if sequence.startswith(mark): + return iana_encoding, mark + + return None, b"" + + +def should_strip_sig_or_bom(iana_encoding: str) -> bool: + return iana_encoding not in {"utf_16", "utf_32"} + + +def iana_name(cp_name: str, strict: bool = True) -> str: + """Returns the Python normalized encoding name (Not the IANA official name).""" + cp_name = cp_name.lower().replace("-", "_") + + encoding_alias: str + encoding_iana: str + + for encoding_alias, encoding_iana in aliases.items(): + if cp_name in [encoding_alias, encoding_iana]: + return encoding_iana + + if strict: + raise ValueError(f"Unable to retrieve IANA for '{cp_name}'") + + return cp_name + + +def cp_similarity(iana_name_a: str, iana_name_b: str) -> float: + if is_multi_byte_encoding(iana_name_a) or is_multi_byte_encoding(iana_name_b): + return 0.0 + + decoder_a = importlib.import_module(f"encodings.{iana_name_a}").IncrementalDecoder + decoder_b = importlib.import_module(f"encodings.{iana_name_b}").IncrementalDecoder + + id_a: IncrementalDecoder = decoder_a(errors="ignore") + id_b: IncrementalDecoder = decoder_b(errors="ignore") + + character_match_count: int = 0 + + for i in range(255): + to_be_decoded: bytes = bytes([i]) + if id_a.decode(to_be_decoded) == id_b.decode(to_be_decoded): + character_match_count += 1 + + return character_match_count / 254 + + +def is_cp_similar(iana_name_a: str, iana_name_b: str) -> bool: + """ + Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using + the function cp_similarity. + """ + return ( + iana_name_a in IANA_SUPPORTED_SIMILAR + and iana_name_b in IANA_SUPPORTED_SIMILAR[iana_name_a] + ) + + +def set_logging_handler( + name: str = "charset_normalizer", + level: int = logging.INFO, + format_string: str = "%(asctime)s | %(levelname)s | %(message)s", +) -> None: + logger = logging.getLogger(name) + logger.setLevel(level) + + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter(format_string)) + logger.addHandler(handler) + + +def cut_sequence_chunks( + sequences: bytes, + encoding_iana: str, + offsets: range, + chunk_size: int, + bom_or_sig_available: bool, + strip_sig_or_bom: bool, + sig_payload: bytes, + is_multi_byte_decoder: bool, + decoded_payload: str | None = None, +) -> Generator[str, None, None]: + if decoded_payload and is_multi_byte_decoder is False: + for i in offsets: + chunk = decoded_payload[i : i + chunk_size] + if not chunk: + break + yield chunk + else: + for i in offsets: + chunk_end = i + chunk_size + if chunk_end > len(sequences) + 8: + continue + + cut_sequence = sequences[i : i + chunk_size] + + if bom_or_sig_available and strip_sig_or_bom is False: + cut_sequence = sig_payload + cut_sequence + + chunk = cut_sequence.decode( + encoding_iana, + errors="ignore" if is_multi_byte_decoder else "strict", + ) + + # multi-byte bad cutting detector and adjustment + # not the cleanest way to perform that fix but clever enough for now. + if is_multi_byte_decoder and i > 0: + chunk_partial_size_chk: int = min(chunk_size, 16) + + if ( + decoded_payload + and chunk[:chunk_partial_size_chk] not in decoded_payload + ): + for j in range(i, i - 4, -1): + cut_sequence = sequences[j:chunk_end] + + if bom_or_sig_available and strip_sig_or_bom is False: + cut_sequence = sig_payload + cut_sequence + + chunk = cut_sequence.decode(encoding_iana, errors="ignore") + + if chunk[:chunk_partial_size_chk] in decoded_payload: + break + + yield chunk diff --git a/meow/lib/python3.13/site-packages/charset_normalizer/version.py b/meow/lib/python3.13/site-packages/charset_normalizer/version.py new file mode 100644 index 0000000000000000000000000000000000000000..f85e8929e74cad7b3c0bf95bbc8ac3625b4db1b2 --- /dev/null +++ b/meow/lib/python3.13/site-packages/charset_normalizer/version.py @@ -0,0 +1,8 @@ +""" +Expose version +""" + +from __future__ import annotations + +__version__ = "3.4.1" +VERSION = __version__.split(".") diff --git a/meow/lib/python3.13/site-packages/filelock/__init__.py b/meow/lib/python3.13/site-packages/filelock/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c9d8c5b8ebe565a652b3671b3dfa066f7346af45 --- /dev/null +++ b/meow/lib/python3.13/site-packages/filelock/__init__.py @@ -0,0 +1,70 @@ +""" +A platform independent file lock that supports the with-statement. + +.. autodata:: filelock.__version__ + :no-value: + +""" + +from __future__ import annotations + +import sys +import warnings +from typing import TYPE_CHECKING + +from ._api import AcquireReturnProxy, BaseFileLock +from ._error import Timeout +from ._soft import SoftFileLock +from ._unix import UnixFileLock, has_fcntl +from ._windows import WindowsFileLock +from .asyncio import ( + AsyncAcquireReturnProxy, + AsyncSoftFileLock, + AsyncUnixFileLock, + AsyncWindowsFileLock, + BaseAsyncFileLock, +) +from .version import version + +#: version of the project as a string +__version__: str = version + + +if sys.platform == "win32": # pragma: win32 cover + _FileLock: type[BaseFileLock] = WindowsFileLock + _AsyncFileLock: type[BaseAsyncFileLock] = AsyncWindowsFileLock +else: # pragma: win32 no cover # noqa: PLR5501 + if has_fcntl: + _FileLock: type[BaseFileLock] = UnixFileLock + _AsyncFileLock: type[BaseAsyncFileLock] = AsyncUnixFileLock + else: + _FileLock = SoftFileLock + _AsyncFileLock = AsyncSoftFileLock + if warnings is not None: + warnings.warn("only soft file lock is available", stacklevel=2) + +if TYPE_CHECKING: + FileLock = SoftFileLock + AsyncFileLock = AsyncSoftFileLock +else: + #: Alias for the lock, which should be used for the current platform. + FileLock = _FileLock + AsyncFileLock = _AsyncFileLock + + +__all__ = [ + "AcquireReturnProxy", + "AsyncAcquireReturnProxy", + "AsyncFileLock", + "AsyncSoftFileLock", + "AsyncUnixFileLock", + "AsyncWindowsFileLock", + "BaseAsyncFileLock", + "BaseFileLock", + "FileLock", + "SoftFileLock", + "Timeout", + "UnixFileLock", + "WindowsFileLock", + "__version__", +] diff --git a/meow/lib/python3.13/site-packages/filelock/_api.py b/meow/lib/python3.13/site-packages/filelock/_api.py new file mode 100644 index 0000000000000000000000000000000000000000..771a559a658bf1cdc61ed2ff74e1fca7db4683e5 --- /dev/null +++ b/meow/lib/python3.13/site-packages/filelock/_api.py @@ -0,0 +1,403 @@ +from __future__ import annotations + +import contextlib +import inspect +import logging +import os +import time +import warnings +from abc import ABCMeta, abstractmethod +from dataclasses import dataclass +from threading import local +from typing import TYPE_CHECKING, Any, cast +from weakref import WeakValueDictionary + +from ._error import Timeout + +if TYPE_CHECKING: + import sys + from types import TracebackType + + if sys.version_info >= (3, 11): # pragma: no cover (py311+) + from typing import Self + else: # pragma: no cover ( None: + self.lock = lock + + def __enter__(self) -> BaseFileLock: + return self.lock + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: + self.lock.release() + + +@dataclass +class FileLockContext: + """A dataclass which holds the context for a ``BaseFileLock`` object.""" + + # The context is held in a separate class to allow optional use of thread local storage via the + # ThreadLocalFileContext class. + + #: The path to the lock file. + lock_file: str + + #: The default timeout value. + timeout: float + + #: The mode for the lock files + mode: int + + #: Whether the lock should be blocking or not + blocking: bool + + #: The file descriptor for the *_lock_file* as it is returned by the os.open() function, not None when lock held + lock_file_fd: int | None = None + + #: The lock counter is used for implementing the nested locking mechanism. + lock_counter: int = 0 # When the lock is acquired is increased and the lock is only released, when this value is 0 + + +class ThreadLocalFileContext(FileLockContext, local): + """A thread local version of the ``FileLockContext`` class.""" + + +class FileLockMeta(ABCMeta): + def __call__( # noqa: PLR0913 + cls, + lock_file: str | os.PathLike[str], + timeout: float = -1, + mode: int = 0o644, + thread_local: bool = True, # noqa: FBT001, FBT002 + *, + blocking: bool = True, + is_singleton: bool = False, + **kwargs: Any, # capture remaining kwargs for subclasses # noqa: ANN401 + ) -> BaseFileLock: + if is_singleton: + instance = cls._instances.get(str(lock_file)) # type: ignore[attr-defined] + if instance: + params_to_check = { + "thread_local": (thread_local, instance.is_thread_local()), + "timeout": (timeout, instance.timeout), + "mode": (mode, instance.mode), + "blocking": (blocking, instance.blocking), + } + + non_matching_params = { + name: (passed_param, set_param) + for name, (passed_param, set_param) in params_to_check.items() + if passed_param != set_param + } + if not non_matching_params: + return cast(BaseFileLock, instance) + + # parameters do not match; raise error + msg = "Singleton lock instances cannot be initialized with differing arguments" + msg += "\nNon-matching arguments: " + for param_name, (passed_param, set_param) in non_matching_params.items(): + msg += f"\n\t{param_name} (existing lock has {set_param} but {passed_param} was passed)" + raise ValueError(msg) + + # Workaround to make `__init__`'s params optional in subclasses + # E.g. virtualenv changes the signature of the `__init__` method in the `BaseFileLock` class descendant + # (https://github.com/tox-dev/filelock/pull/340) + + all_params = { + "timeout": timeout, + "mode": mode, + "thread_local": thread_local, + "blocking": blocking, + "is_singleton": is_singleton, + **kwargs, + } + + present_params = inspect.signature(cls.__init__).parameters # type: ignore[misc] + init_params = {key: value for key, value in all_params.items() if key in present_params} + + instance = super().__call__(lock_file, **init_params) + + if is_singleton: + cls._instances[str(lock_file)] = instance # type: ignore[attr-defined] + + return cast(BaseFileLock, instance) + + +class BaseFileLock(contextlib.ContextDecorator, metaclass=FileLockMeta): + """Abstract base class for a file lock object.""" + + _instances: WeakValueDictionary[str, BaseFileLock] + + def __init_subclass__(cls, **kwargs: dict[str, Any]) -> None: + """Setup unique state for lock subclasses.""" + super().__init_subclass__(**kwargs) + cls._instances = WeakValueDictionary() + + def __init__( # noqa: PLR0913 + self, + lock_file: str | os.PathLike[str], + timeout: float = -1, + mode: int = 0o644, + thread_local: bool = True, # noqa: FBT001, FBT002 + *, + blocking: bool = True, + is_singleton: bool = False, + ) -> None: + """ + Create a new lock object. + + :param lock_file: path to the file + :param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in \ + the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it \ + to a negative value. A timeout of 0 means that there is exactly one attempt to acquire the file lock. + :param mode: file permissions for the lockfile + :param thread_local: Whether this object's internal context should be thread local or not. If this is set to \ + ``False`` then the lock will be reentrant across threads. + :param blocking: whether the lock should be blocking or not + :param is_singleton: If this is set to ``True`` then only one instance of this class will be created \ + per lock file. This is useful if you want to use the lock object for reentrant locking without needing \ + to pass the same object around. + + """ + self._is_thread_local = thread_local + self._is_singleton = is_singleton + + # Create the context. Note that external code should not work with the context directly and should instead use + # properties of this class. + kwargs: dict[str, Any] = { + "lock_file": os.fspath(lock_file), + "timeout": timeout, + "mode": mode, + "blocking": blocking, + } + self._context: FileLockContext = (ThreadLocalFileContext if thread_local else FileLockContext)(**kwargs) + + def is_thread_local(self) -> bool: + """:return: a flag indicating if this lock is thread local or not""" + return self._is_thread_local + + @property + def is_singleton(self) -> bool: + """:return: a flag indicating if this lock is singleton or not""" + return self._is_singleton + + @property + def lock_file(self) -> str: + """:return: path to the lock file""" + return self._context.lock_file + + @property + def timeout(self) -> float: + """ + :return: the default timeout value, in seconds + + .. versionadded:: 2.0.0 + """ + return self._context.timeout + + @timeout.setter + def timeout(self, value: float | str) -> None: + """ + Change the default timeout value. + + :param value: the new value, in seconds + + """ + self._context.timeout = float(value) + + @property + def blocking(self) -> bool: + """:return: whether the locking is blocking or not""" + return self._context.blocking + + @blocking.setter + def blocking(self, value: bool) -> None: + """ + Change the default blocking value. + + :param value: the new value as bool + + """ + self._context.blocking = value + + @property + def mode(self) -> int: + """:return: the file permissions for the lockfile""" + return self._context.mode + + @abstractmethod + def _acquire(self) -> None: + """If the file lock could be acquired, self._context.lock_file_fd holds the file descriptor of the lock file.""" + raise NotImplementedError + + @abstractmethod + def _release(self) -> None: + """Releases the lock and sets self._context.lock_file_fd to None.""" + raise NotImplementedError + + @property + def is_locked(self) -> bool: + """ + + :return: A boolean indicating if the lock file is holding the lock currently. + + .. versionchanged:: 2.0.0 + + This was previously a method and is now a property. + """ + return self._context.lock_file_fd is not None + + @property + def lock_counter(self) -> int: + """:return: The number of times this lock has been acquired (but not yet released).""" + return self._context.lock_counter + + def acquire( + self, + timeout: float | None = None, + poll_interval: float = 0.05, + *, + poll_intervall: float | None = None, + blocking: bool | None = None, + ) -> AcquireReturnProxy: + """ + Try to acquire the file lock. + + :param timeout: maximum wait time for acquiring the lock, ``None`` means use the default :attr:`~timeout` is and + if ``timeout < 0``, there is no timeout and this method will block until the lock could be acquired + :param poll_interval: interval of trying to acquire the lock file + :param poll_intervall: deprecated, kept for backwards compatibility, use ``poll_interval`` instead + :param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the + first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired. + :raises Timeout: if fails to acquire lock within the timeout period + :return: a context object that will unlock the file when the context is exited + + .. code-block:: python + + # You can use this method in the context manager (recommended) + with lock.acquire(): + pass + + # Or use an equivalent try-finally construct: + lock.acquire() + try: + pass + finally: + lock.release() + + .. versionchanged:: 2.0.0 + + This method returns now a *proxy* object instead of *self*, + so that it can be used in a with statement without side effects. + + """ + # Use the default timeout, if no timeout is provided. + if timeout is None: + timeout = self._context.timeout + + if blocking is None: + blocking = self._context.blocking + + if poll_intervall is not None: + msg = "use poll_interval instead of poll_intervall" + warnings.warn(msg, DeprecationWarning, stacklevel=2) + poll_interval = poll_intervall + + # Increment the number right at the beginning. We can still undo it, if something fails. + self._context.lock_counter += 1 + + lock_id = id(self) + lock_filename = self.lock_file + start_time = time.perf_counter() + try: + while True: + if not self.is_locked: + _LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename) + self._acquire() + if self.is_locked: + _LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename) + break + if blocking is False: + _LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename) + raise Timeout(lock_filename) # noqa: TRY301 + if 0 <= timeout < time.perf_counter() - start_time: + _LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename) + raise Timeout(lock_filename) # noqa: TRY301 + msg = "Lock %s not acquired on %s, waiting %s seconds ..." + _LOGGER.debug(msg, lock_id, lock_filename, poll_interval) + time.sleep(poll_interval) + except BaseException: # Something did go wrong, so decrement the counter. + self._context.lock_counter = max(0, self._context.lock_counter - 1) + raise + return AcquireReturnProxy(lock=self) + + def release(self, force: bool = False) -> None: # noqa: FBT001, FBT002 + """ + Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0. + Also note, that the lock file itself is not automatically deleted. + + :param force: If true, the lock counter is ignored and the lock is released in every case/ + + """ + if self.is_locked: + self._context.lock_counter -= 1 + + if self._context.lock_counter == 0 or force: + lock_id, lock_filename = id(self), self.lock_file + + _LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename) + self._release() + self._context.lock_counter = 0 + _LOGGER.debug("Lock %s released on %s", lock_id, lock_filename) + + def __enter__(self) -> Self: + """ + Acquire the lock. + + :return: the lock object + + """ + self.acquire() + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: + """ + Release the lock. + + :param exc_type: the exception type if raised + :param exc_value: the exception value if raised + :param traceback: the exception traceback if raised + + """ + self.release() + + def __del__(self) -> None: + """Called when the lock object is deleted.""" + self.release(force=True) + + +__all__ = [ + "AcquireReturnProxy", + "BaseFileLock", +] diff --git a/meow/lib/python3.13/site-packages/filelock/_error.py b/meow/lib/python3.13/site-packages/filelock/_error.py new file mode 100644 index 0000000000000000000000000000000000000000..f7ff08c0f508ad7077eb6ed1990898840c952b3a --- /dev/null +++ b/meow/lib/python3.13/site-packages/filelock/_error.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import Any + + +class Timeout(TimeoutError): # noqa: N818 + """Raised when the lock could not be acquired in *timeout* seconds.""" + + def __init__(self, lock_file: str) -> None: + super().__init__() + self._lock_file = lock_file + + def __reduce__(self) -> str | tuple[Any, ...]: + return self.__class__, (self._lock_file,) # Properly pickle the exception + + def __str__(self) -> str: + return f"The file lock '{self._lock_file}' could not be acquired." + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.lock_file!r})" + + @property + def lock_file(self) -> str: + """:return: The path of the file lock.""" + return self._lock_file + + +__all__ = [ + "Timeout", +] diff --git a/meow/lib/python3.13/site-packages/filelock/_soft.py b/meow/lib/python3.13/site-packages/filelock/_soft.py new file mode 100644 index 0000000000000000000000000000000000000000..28c67f74cc82b8f55e47afd6a71972cc1fb95eb6 --- /dev/null +++ b/meow/lib/python3.13/site-packages/filelock/_soft.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import os +import sys +from contextlib import suppress +from errno import EACCES, EEXIST +from pathlib import Path + +from ._api import BaseFileLock +from ._util import ensure_directory_exists, raise_on_not_writable_file + + +class SoftFileLock(BaseFileLock): + """Simply watches the existence of the lock file.""" + + def _acquire(self) -> None: + raise_on_not_writable_file(self.lock_file) + ensure_directory_exists(self.lock_file) + # first check for exists and read-only mode as the open will mask this case as EEXIST + flags = ( + os.O_WRONLY # open for writing only + | os.O_CREAT + | os.O_EXCL # together with above raise EEXIST if the file specified by filename exists + | os.O_TRUNC # truncate the file to zero byte + ) + try: + file_handler = os.open(self.lock_file, flags, self._context.mode) + except OSError as exception: # re-raise unless expected exception + if not ( + exception.errno == EEXIST # lock already exist + or (exception.errno == EACCES and sys.platform == "win32") # has no access to this lock + ): # pragma: win32 no cover + raise + else: + self._context.lock_file_fd = file_handler + + def _release(self) -> None: + assert self._context.lock_file_fd is not None # noqa: S101 + os.close(self._context.lock_file_fd) # the lock file is definitely not None + self._context.lock_file_fd = None + with suppress(OSError): # the file is already deleted and that's what we want + Path(self.lock_file).unlink() + + +__all__ = [ + "SoftFileLock", +] diff --git a/meow/lib/python3.13/site-packages/filelock/_unix.py b/meow/lib/python3.13/site-packages/filelock/_unix.py new file mode 100644 index 0000000000000000000000000000000000000000..4ae1fbe916f95762418cd62251f91f74ba35fc8c --- /dev/null +++ b/meow/lib/python3.13/site-packages/filelock/_unix.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +import os +import sys +from contextlib import suppress +from errno import ENOSYS +from pathlib import Path +from typing import cast + +from ._api import BaseFileLock +from ._util import ensure_directory_exists + +#: a flag to indicate if the fcntl API is available +has_fcntl = False +if sys.platform == "win32": # pragma: win32 cover + + class UnixFileLock(BaseFileLock): + """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems.""" + + def _acquire(self) -> None: + raise NotImplementedError + + def _release(self) -> None: + raise NotImplementedError + +else: # pragma: win32 no cover + try: + import fcntl + except ImportError: + pass + else: + has_fcntl = True + + class UnixFileLock(BaseFileLock): + """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems.""" + + def _acquire(self) -> None: + ensure_directory_exists(self.lock_file) + open_flags = os.O_RDWR | os.O_TRUNC + if not Path(self.lock_file).exists(): + open_flags |= os.O_CREAT + fd = os.open(self.lock_file, open_flags, self._context.mode) + with suppress(PermissionError): # This locked is not owned by this UID + os.fchmod(fd, self._context.mode) + try: + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + except OSError as exception: + os.close(fd) + if exception.errno == ENOSYS: # NotImplemented error + msg = "FileSystem does not appear to support flock; use SoftFileLock instead" + raise NotImplementedError(msg) from exception + else: + self._context.lock_file_fd = fd + + def _release(self) -> None: + # Do not remove the lockfile: + # https://github.com/tox-dev/py-filelock/issues/31 + # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition + fd = cast(int, self._context.lock_file_fd) + self._context.lock_file_fd = None + fcntl.flock(fd, fcntl.LOCK_UN) + os.close(fd) + + +__all__ = [ + "UnixFileLock", + "has_fcntl", +] diff --git a/meow/lib/python3.13/site-packages/filelock/_util.py b/meow/lib/python3.13/site-packages/filelock/_util.py new file mode 100644 index 0000000000000000000000000000000000000000..c671e8533873948f0e1b5575ff952c722019f067 --- /dev/null +++ b/meow/lib/python3.13/site-packages/filelock/_util.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import os +import stat +import sys +from errno import EACCES, EISDIR +from pathlib import Path + + +def raise_on_not_writable_file(filename: str) -> None: + """ + Raise an exception if attempting to open the file for writing would fail. + + This is done so files that will never be writable can be separated from files that are writable but currently + locked. + + :param filename: file to check + :raises OSError: as if the file was opened for writing. + + """ + try: # use stat to do exists + can write to check without race condition + file_stat = os.stat(filename) # noqa: PTH116 + except OSError: + return # swallow does not exist or other errors + + if file_stat.st_mtime != 0: # if os.stat returns but modification is zero that's an invalid os.stat - ignore it + if not (file_stat.st_mode & stat.S_IWUSR): + raise PermissionError(EACCES, "Permission denied", filename) + + if stat.S_ISDIR(file_stat.st_mode): + if sys.platform == "win32": # pragma: win32 cover + # On Windows, this is PermissionError + raise PermissionError(EACCES, "Permission denied", filename) + else: # pragma: win32 no cover # noqa: RET506 + # On linux / macOS, this is IsADirectoryError + raise IsADirectoryError(EISDIR, "Is a directory", filename) + + +def ensure_directory_exists(filename: Path | str) -> None: + """ + Ensure the directory containing the file exists (create it if necessary). + + :param filename: file. + + """ + Path(filename).parent.mkdir(parents=True, exist_ok=True) + + +__all__ = [ + "ensure_directory_exists", + "raise_on_not_writable_file", +] diff --git a/meow/lib/python3.13/site-packages/filelock/_windows.py b/meow/lib/python3.13/site-packages/filelock/_windows.py new file mode 100644 index 0000000000000000000000000000000000000000..8db55dcbaa3e7bab091781b17ce22fde1fc239f2 --- /dev/null +++ b/meow/lib/python3.13/site-packages/filelock/_windows.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import os +import sys +from contextlib import suppress +from errno import EACCES +from pathlib import Path +from typing import cast + +from ._api import BaseFileLock +from ._util import ensure_directory_exists, raise_on_not_writable_file + +if sys.platform == "win32": # pragma: win32 cover + import msvcrt + + class WindowsFileLock(BaseFileLock): + """Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems.""" + + def _acquire(self) -> None: + raise_on_not_writable_file(self.lock_file) + ensure_directory_exists(self.lock_file) + flags = ( + os.O_RDWR # open for read and write + | os.O_CREAT # create file if not exists + | os.O_TRUNC # truncate file if not empty + ) + try: + fd = os.open(self.lock_file, flags, self._context.mode) + except OSError as exception: + if exception.errno != EACCES: # has no access to this lock + raise + else: + try: + msvcrt.locking(fd, msvcrt.LK_NBLCK, 1) + except OSError as exception: + os.close(fd) # close file first + if exception.errno != EACCES: # file is already locked + raise + else: + self._context.lock_file_fd = fd + + def _release(self) -> None: + fd = cast(int, self._context.lock_file_fd) + self._context.lock_file_fd = None + msvcrt.locking(fd, msvcrt.LK_UNLCK, 1) + os.close(fd) + + with suppress(OSError): # Probably another instance of the application hat acquired the file lock. + Path(self.lock_file).unlink() + +else: # pragma: win32 no cover + + class WindowsFileLock(BaseFileLock): + """Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems.""" + + def _acquire(self) -> None: + raise NotImplementedError + + def _release(self) -> None: + raise NotImplementedError + + +__all__ = [ + "WindowsFileLock", +] diff --git a/meow/lib/python3.13/site-packages/filelock/asyncio.py b/meow/lib/python3.13/site-packages/filelock/asyncio.py new file mode 100644 index 0000000000000000000000000000000000000000..0fdd8f79c3fa742d8342cec01e98be6c0dfaf353 --- /dev/null +++ b/meow/lib/python3.13/site-packages/filelock/asyncio.py @@ -0,0 +1,342 @@ +"""An asyncio-based implementation of the file lock.""" # noqa: A005 + +from __future__ import annotations + +import asyncio +import contextlib +import logging +import os +import time +from dataclasses import dataclass +from threading import local +from typing import TYPE_CHECKING, Any, Callable, NoReturn, cast + +from ._api import BaseFileLock, FileLockContext, FileLockMeta +from ._error import Timeout +from ._soft import SoftFileLock +from ._unix import UnixFileLock +from ._windows import WindowsFileLock + +if TYPE_CHECKING: + import sys + from concurrent import futures + from types import TracebackType + + if sys.version_info >= (3, 11): # pragma: no cover (py311+) + from typing import Self + else: # pragma: no cover ( None: # noqa: D107 + self.lock = lock + + async def __aenter__(self) -> BaseAsyncFileLock: # noqa: D105 + return self.lock + + async def __aexit__( # noqa: D105 + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: + await self.lock.release() + + +class AsyncFileLockMeta(FileLockMeta): + def __call__( # type: ignore[override] # noqa: PLR0913 + cls, # noqa: N805 + lock_file: str | os.PathLike[str], + timeout: float = -1, + mode: int = 0o644, + thread_local: bool = False, # noqa: FBT001, FBT002 + *, + blocking: bool = True, + is_singleton: bool = False, + loop: asyncio.AbstractEventLoop | None = None, + run_in_executor: bool = True, + executor: futures.Executor | None = None, + ) -> BaseAsyncFileLock: + if thread_local and run_in_executor: + msg = "run_in_executor is not supported when thread_local is True" + raise ValueError(msg) + instance = super().__call__( + lock_file=lock_file, + timeout=timeout, + mode=mode, + thread_local=thread_local, + blocking=blocking, + is_singleton=is_singleton, + loop=loop, + run_in_executor=run_in_executor, + executor=executor, + ) + return cast(BaseAsyncFileLock, instance) + + +class BaseAsyncFileLock(BaseFileLock, metaclass=AsyncFileLockMeta): + """Base class for asynchronous file locks.""" + + def __init__( # noqa: PLR0913 + self, + lock_file: str | os.PathLike[str], + timeout: float = -1, + mode: int = 0o644, + thread_local: bool = False, # noqa: FBT001, FBT002 + *, + blocking: bool = True, + is_singleton: bool = False, + loop: asyncio.AbstractEventLoop | None = None, + run_in_executor: bool = True, + executor: futures.Executor | None = None, + ) -> None: + """ + Create a new lock object. + + :param lock_file: path to the file + :param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in \ + the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it \ + to a negative value. A timeout of 0 means that there is exactly one attempt to acquire the file lock. + :param mode: file permissions for the lockfile + :param thread_local: Whether this object's internal context should be thread local or not. If this is set to \ + ``False`` then the lock will be reentrant across threads. + :param blocking: whether the lock should be blocking or not + :param is_singleton: If this is set to ``True`` then only one instance of this class will be created \ + per lock file. This is useful if you want to use the lock object for reentrant locking without needing \ + to pass the same object around. + :param loop: The event loop to use. If not specified, the running event loop will be used. + :param run_in_executor: If this is set to ``True`` then the lock will be acquired in an executor. + :param executor: The executor to use. If not specified, the default executor will be used. + + """ + self._is_thread_local = thread_local + self._is_singleton = is_singleton + + # Create the context. Note that external code should not work with the context directly and should instead use + # properties of this class. + kwargs: dict[str, Any] = { + "lock_file": os.fspath(lock_file), + "timeout": timeout, + "mode": mode, + "blocking": blocking, + "loop": loop, + "run_in_executor": run_in_executor, + "executor": executor, + } + self._context: AsyncFileLockContext = (AsyncThreadLocalFileContext if thread_local else AsyncFileLockContext)( + **kwargs + ) + + @property + def run_in_executor(self) -> bool: + """::return: whether run in executor.""" + return self._context.run_in_executor + + @property + def executor(self) -> futures.Executor | None: + """::return: the executor.""" + return self._context.executor + + @executor.setter + def executor(self, value: futures.Executor | None) -> None: # pragma: no cover + """ + Change the executor. + + :param value: the new executor or ``None`` + :type value: futures.Executor | None + + """ + self._context.executor = value + + @property + def loop(self) -> asyncio.AbstractEventLoop | None: + """::return: the event loop.""" + return self._context.loop + + async def acquire( # type: ignore[override] + self, + timeout: float | None = None, + poll_interval: float = 0.05, + *, + blocking: bool | None = None, + ) -> AsyncAcquireReturnProxy: + """ + Try to acquire the file lock. + + :param timeout: maximum wait time for acquiring the lock, ``None`` means use the default + :attr:`~BaseFileLock.timeout` is and if ``timeout < 0``, there is no timeout and + this method will block until the lock could be acquired + :param poll_interval: interval of trying to acquire the lock file + :param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the + first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired. + :raises Timeout: if fails to acquire lock within the timeout period + :return: a context object that will unlock the file when the context is exited + + .. code-block:: python + + # You can use this method in the context manager (recommended) + with lock.acquire(): + pass + + # Or use an equivalent try-finally construct: + lock.acquire() + try: + pass + finally: + lock.release() + + """ + # Use the default timeout, if no timeout is provided. + if timeout is None: + timeout = self._context.timeout + + if blocking is None: + blocking = self._context.blocking + + # Increment the number right at the beginning. We can still undo it, if something fails. + self._context.lock_counter += 1 + + lock_id = id(self) + lock_filename = self.lock_file + start_time = time.perf_counter() + try: + while True: + if not self.is_locked: + _LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename) + await self._run_internal_method(self._acquire) + if self.is_locked: + _LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename) + break + if blocking is False: + _LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename) + raise Timeout(lock_filename) # noqa: TRY301 + if 0 <= timeout < time.perf_counter() - start_time: + _LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename) + raise Timeout(lock_filename) # noqa: TRY301 + msg = "Lock %s not acquired on %s, waiting %s seconds ..." + _LOGGER.debug(msg, lock_id, lock_filename, poll_interval) + await asyncio.sleep(poll_interval) + except BaseException: # Something did go wrong, so decrement the counter. + self._context.lock_counter = max(0, self._context.lock_counter - 1) + raise + return AsyncAcquireReturnProxy(lock=self) + + async def release(self, force: bool = False) -> None: # type: ignore[override] # noqa: FBT001, FBT002 + """ + Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0. + Also note, that the lock file itself is not automatically deleted. + + :param force: If true, the lock counter is ignored and the lock is released in every case/ + + """ + if self.is_locked: + self._context.lock_counter -= 1 + + if self._context.lock_counter == 0 or force: + lock_id, lock_filename = id(self), self.lock_file + + _LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename) + await self._run_internal_method(self._release) + self._context.lock_counter = 0 + _LOGGER.debug("Lock %s released on %s", lock_id, lock_filename) + + async def _run_internal_method(self, method: Callable[[], Any]) -> None: + if asyncio.iscoroutinefunction(method): + await method() + elif self.run_in_executor: + loop = self.loop or asyncio.get_running_loop() + await loop.run_in_executor(self.executor, method) + else: + method() + + def __enter__(self) -> NoReturn: + """ + Replace old __enter__ method to avoid using it. + + NOTE: DO NOT USE `with` FOR ASYNCIO LOCKS, USE `async with` INSTEAD. + + :return: none + :rtype: NoReturn + """ + msg = "Do not use `with` for asyncio locks, use `async with` instead." + raise NotImplementedError(msg) + + async def __aenter__(self) -> Self: + """ + Acquire the lock. + + :return: the lock object + + """ + await self.acquire() + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: + """ + Release the lock. + + :param exc_type: the exception type if raised + :param exc_value: the exception value if raised + :param traceback: the exception traceback if raised + + """ + await self.release() + + def __del__(self) -> None: + """Called when the lock object is deleted.""" + with contextlib.suppress(RuntimeError): + loop = self.loop or asyncio.get_running_loop() + if not loop.is_running(): # pragma: no cover + loop.run_until_complete(self.release(force=True)) + else: + loop.create_task(self.release(force=True)) + + +class AsyncSoftFileLock(SoftFileLock, BaseAsyncFileLock): + """Simply watches the existence of the lock file.""" + + +class AsyncUnixFileLock(UnixFileLock, BaseAsyncFileLock): + """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems.""" + + +class AsyncWindowsFileLock(WindowsFileLock, BaseAsyncFileLock): + """Uses the :func:`msvcrt.locking` to hard lock the lock file on windows systems.""" + + +__all__ = [ + "AsyncAcquireReturnProxy", + "AsyncSoftFileLock", + "AsyncUnixFileLock", + "AsyncWindowsFileLock", + "BaseAsyncFileLock", +] diff --git a/meow/lib/python3.13/site-packages/filelock/py.typed b/meow/lib/python3.13/site-packages/filelock/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/meow/lib/python3.13/site-packages/filelock/version.py b/meow/lib/python3.13/site-packages/filelock/version.py new file mode 100644 index 0000000000000000000000000000000000000000..4c4b30d4db20040de197c104ff86f47879e0c956 --- /dev/null +++ b/meow/lib/python3.13/site-packages/filelock/version.py @@ -0,0 +1,16 @@ +# file generated by setuptools_scm +# don't change, don't track in version control +TYPE_CHECKING = False +if TYPE_CHECKING: + from typing import Tuple, Union + VERSION_TUPLE = Tuple[Union[int, str], ...] +else: + VERSION_TUPLE = object + +version: str +__version__: str +__version_tuple__: VERSION_TUPLE +version_tuple: VERSION_TUPLE + +__version__ = version = '3.16.1' +__version_tuple__ = version_tuple = (3, 16, 1) diff --git a/meow/lib/python3.13/site-packages/fsspec/__init__.py b/meow/lib/python3.13/site-packages/fsspec/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d7af775f77cd67734aebd25ce4e051be87e64fa8 --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/__init__.py @@ -0,0 +1,69 @@ +from importlib.metadata import entry_points + +from . import caching +from ._version import __version__ # noqa: F401 +from .callbacks import Callback +from .compression import available_compressions +from .core import get_fs_token_paths, open, open_files, open_local, url_to_fs +from .exceptions import FSTimeoutError +from .mapping import FSMap, get_mapper +from .registry import ( + available_protocols, + filesystem, + get_filesystem_class, + register_implementation, + registry, +) +from .spec import AbstractFileSystem + +__all__ = [ + "AbstractFileSystem", + "FSTimeoutError", + "FSMap", + "filesystem", + "register_implementation", + "get_filesystem_class", + "get_fs_token_paths", + "get_mapper", + "open", + "open_files", + "open_local", + "registry", + "caching", + "Callback", + "available_protocols", + "available_compressions", + "url_to_fs", +] + + +def process_entries(): + if entry_points is not None: + try: + eps = entry_points() + except TypeError: + pass # importlib-metadata < 0.8 + else: + if hasattr(eps, "select"): # Python 3.10+ / importlib_metadata >= 3.9.0 + specs = eps.select(group="fsspec.specs") + else: + specs = eps.get("fsspec.specs", []) + registered_names = {} + for spec in specs: + err_msg = f"Unable to load filesystem from {spec}" + name = spec.name + if name in registered_names: + continue + registered_names[name] = True + register_implementation( + name, + spec.value.replace(":", "."), + errtxt=err_msg, + # We take our implementations as the ones to overload with if + # for some reason we encounter some, may be the same, already + # registered + clobber=True, + ) + + +process_entries() diff --git a/meow/lib/python3.13/site-packages/fsspec/_version.py b/meow/lib/python3.13/site-packages/fsspec/_version.py new file mode 100644 index 0000000000000000000000000000000000000000..c0e4afe39a23a0b9e1dcf6e59b188db59339b6d5 --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/_version.py @@ -0,0 +1,16 @@ +# file generated by setuptools_scm +# don't change, don't track in version control +TYPE_CHECKING = False +if TYPE_CHECKING: + from typing import Tuple, Union + VERSION_TUPLE = Tuple[Union[int, str], ...] +else: + VERSION_TUPLE = object + +version: str +__version__: str +__version_tuple__: VERSION_TUPLE +version_tuple: VERSION_TUPLE + +__version__ = version = '2024.12.0' +__version_tuple__ = version_tuple = (2024, 12, 0) diff --git a/meow/lib/python3.13/site-packages/fsspec/archive.py b/meow/lib/python3.13/site-packages/fsspec/archive.py new file mode 100644 index 0000000000000000000000000000000000000000..f466780fc802d6aa79be02a2af3424c051503708 --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/archive.py @@ -0,0 +1,73 @@ +from fsspec import AbstractFileSystem +from fsspec.utils import tokenize + + +class AbstractArchiveFileSystem(AbstractFileSystem): + """ + A generic superclass for implementing Archive-based filesystems. + + Currently, it is shared amongst + :class:`~fsspec.implementations.zip.ZipFileSystem`, + :class:`~fsspec.implementations.libarchive.LibArchiveFileSystem` and + :class:`~fsspec.implementations.tar.TarFileSystem`. + """ + + def __str__(self): + return f"" + + __repr__ = __str__ + + def ukey(self, path): + return tokenize(path, self.fo, self.protocol) + + def _all_dirnames(self, paths): + """Returns *all* directory names for each path in paths, including intermediate + ones. + + Parameters + ---------- + paths: Iterable of path strings + """ + if len(paths) == 0: + return set() + + dirnames = {self._parent(path) for path in paths} - {self.root_marker} + return dirnames | self._all_dirnames(dirnames) + + def info(self, path, **kwargs): + self._get_dirs() + path = self._strip_protocol(path) + if path in {"", "/"} and self.dir_cache: + return {"name": "", "type": "directory", "size": 0} + if path in self.dir_cache: + return self.dir_cache[path] + elif path + "/" in self.dir_cache: + return self.dir_cache[path + "/"] + else: + raise FileNotFoundError(path) + + def ls(self, path, detail=True, **kwargs): + self._get_dirs() + paths = {} + for p, f in self.dir_cache.items(): + p = p.rstrip("/") + if "/" in p: + root = p.rsplit("/", 1)[0] + else: + root = "" + if root == path.rstrip("/"): + paths[p] = f + elif all( + (a == b) + for a, b in zip(path.split("/"), [""] + p.strip("/").split("/")) + ): + # root directory entry + ppath = p.rstrip("/").split("/", 1)[0] + if ppath not in paths: + out = {"name": ppath, "size": 0, "type": "directory"} + paths[ppath] = out + if detail: + out = sorted(paths.values(), key=lambda _: _["name"]) + return out + else: + return sorted(paths) diff --git a/meow/lib/python3.13/site-packages/fsspec/asyn.py b/meow/lib/python3.13/site-packages/fsspec/asyn.py new file mode 100644 index 0000000000000000000000000000000000000000..adeb88fb4c72932457ac31e0ff0ff5c26438e317 --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/asyn.py @@ -0,0 +1,1098 @@ +import asyncio +import asyncio.events +import functools +import inspect +import io +import numbers +import os +import re +import threading +from contextlib import contextmanager +from glob import has_magic +from typing import TYPE_CHECKING, Iterable + +from .callbacks import DEFAULT_CALLBACK +from .exceptions import FSTimeoutError +from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep +from .spec import AbstractBufferedFile, AbstractFileSystem +from .utils import glob_translate, is_exception, other_paths + +private = re.compile("_[^_]") +iothread = [None] # dedicated fsspec IO thread +loop = [None] # global event loop for any non-async instance +_lock = None # global lock placeholder +get_running_loop = asyncio.get_running_loop + + +def get_lock(): + """Allocate or return a threading lock. + + The lock is allocated on first use to allow setting one lock per forked process. + """ + global _lock + if not _lock: + _lock = threading.Lock() + return _lock + + +def reset_lock(): + """Reset the global lock. + + This should be called only on the init of a forked process to reset the lock to + None, enabling the new forked process to get a new lock. + """ + global _lock + + iothread[0] = None + loop[0] = None + _lock = None + + +async def _runner(event, coro, result, timeout=None): + timeout = timeout if timeout else None # convert 0 or 0.0 to None + if timeout is not None: + coro = asyncio.wait_for(coro, timeout=timeout) + try: + result[0] = await coro + except Exception as ex: + result[0] = ex + finally: + event.set() + + +def sync(loop, func, *args, timeout=None, **kwargs): + """ + Make loop run coroutine until it returns. Runs in other thread + + Examples + -------- + >>> fsspec.asyn.sync(fsspec.asyn.get_loop(), func, *args, + timeout=timeout, **kwargs) + """ + timeout = timeout if timeout else None # convert 0 or 0.0 to None + # NB: if the loop is not running *yet*, it is OK to submit work + # and we will wait for it + if loop is None or loop.is_closed(): + raise RuntimeError("Loop is not running") + try: + loop0 = asyncio.events.get_running_loop() + if loop0 is loop: + raise NotImplementedError("Calling sync() from within a running loop") + except NotImplementedError: + raise + except RuntimeError: + pass + coro = func(*args, **kwargs) + result = [None] + event = threading.Event() + asyncio.run_coroutine_threadsafe(_runner(event, coro, result, timeout), loop) + while True: + # this loops allows thread to get interrupted + if event.wait(1): + break + if timeout is not None: + timeout -= 1 + if timeout < 0: + raise FSTimeoutError + + return_result = result[0] + if isinstance(return_result, asyncio.TimeoutError): + # suppress asyncio.TimeoutError, raise FSTimeoutError + raise FSTimeoutError from return_result + elif isinstance(return_result, BaseException): + raise return_result + else: + return return_result + + +def sync_wrapper(func, obj=None): + """Given a function, make so can be called in blocking contexts + + Leave obj=None if defining within a class. Pass the instance if attaching + as an attribute of the instance. + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + self = obj or args[0] + return sync(self.loop, func, *args, **kwargs) + + return wrapper + + +@contextmanager +def _selector_policy(): + original_policy = asyncio.get_event_loop_policy() + try: + if os.name == "nt" and hasattr(asyncio, "WindowsSelectorEventLoopPolicy"): + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) + + yield + finally: + asyncio.set_event_loop_policy(original_policy) + + +def get_loop(): + """Create or return the default fsspec IO loop + + The loop will be running on a separate thread. + """ + if loop[0] is None: + with get_lock(): + # repeat the check just in case the loop got filled between the + # previous two calls from another thread + if loop[0] is None: + with _selector_policy(): + loop[0] = asyncio.new_event_loop() + th = threading.Thread(target=loop[0].run_forever, name="fsspecIO") + th.daemon = True + th.start() + iothread[0] = th + return loop[0] + + +if TYPE_CHECKING: + import resource + + ResourceError = resource.error +else: + try: + import resource + except ImportError: + resource = None + ResourceError = OSError + else: + ResourceError = getattr(resource, "error", OSError) + +_DEFAULT_BATCH_SIZE = 128 +_NOFILES_DEFAULT_BATCH_SIZE = 1280 + + +def _get_batch_size(nofiles=False): + from fsspec.config import conf + + if nofiles: + if "nofiles_gather_batch_size" in conf: + return conf["nofiles_gather_batch_size"] + else: + if "gather_batch_size" in conf: + return conf["gather_batch_size"] + if nofiles: + return _NOFILES_DEFAULT_BATCH_SIZE + if resource is None: + return _DEFAULT_BATCH_SIZE + + try: + soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE) + except (ImportError, ValueError, ResourceError): + return _DEFAULT_BATCH_SIZE + + if soft_limit == resource.RLIM_INFINITY: + return -1 + else: + return soft_limit // 8 + + +def running_async() -> bool: + """Being executed by an event loop?""" + try: + asyncio.get_running_loop() + return True + except RuntimeError: + return False + + +async def _run_coros_in_chunks( + coros, + batch_size=None, + callback=DEFAULT_CALLBACK, + timeout=None, + return_exceptions=False, + nofiles=False, +): + """Run the given coroutines in chunks. + + Parameters + ---------- + coros: list of coroutines to run + batch_size: int or None + Number of coroutines to submit/wait on simultaneously. + If -1, then it will not be any throttling. If + None, it will be inferred from _get_batch_size() + callback: fsspec.callbacks.Callback instance + Gets a relative_update when each coroutine completes + timeout: number or None + If given, each coroutine times out after this time. Note that, since + there are multiple batches, the total run time of this function will in + general be longer + return_exceptions: bool + Same meaning as in asyncio.gather + nofiles: bool + If inferring the batch_size, does this operation involve local files? + If yes, you normally expect smaller batches. + """ + + if batch_size is None: + batch_size = _get_batch_size(nofiles=nofiles) + + if batch_size == -1: + batch_size = len(coros) + + assert batch_size > 0 + + async def _run_coro(coro, i): + try: + return await asyncio.wait_for(coro, timeout=timeout), i + except Exception as e: + if not return_exceptions: + raise + return e, i + finally: + callback.relative_update(1) + + i = 0 + n = len(coros) + results = [None] * n + pending = set() + + while pending or i < n: + while len(pending) < batch_size and i < n: + pending.add(asyncio.ensure_future(_run_coro(coros[i], i))) + i += 1 + + if not pending: + break + + done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED) + while done: + result, k = await done.pop() + results[k] = result + + return results + + +# these methods should be implemented as async by any async-able backend +async_methods = [ + "_ls", + "_cat_file", + "_get_file", + "_put_file", + "_rm_file", + "_cp_file", + "_pipe_file", + "_expand_path", + "_info", + "_isfile", + "_isdir", + "_exists", + "_walk", + "_glob", + "_find", + "_du", + "_size", + "_mkdir", + "_makedirs", +] + + +class AsyncFileSystem(AbstractFileSystem): + """Async file operations, default implementations + + Passes bulk operations to asyncio.gather for concurrent operation. + + Implementations that have concurrent batch operations and/or async methods + should inherit from this class instead of AbstractFileSystem. Docstrings are + copied from the un-underscored method in AbstractFileSystem, if not given. + """ + + # note that methods do not have docstring here; they will be copied + # for _* methods and inferred for overridden methods. + + async_impl = True + mirror_sync_methods = True + disable_throttling = False + + def __init__(self, *args, asynchronous=False, loop=None, batch_size=None, **kwargs): + self.asynchronous = asynchronous + self._pid = os.getpid() + if not asynchronous: + self._loop = loop or get_loop() + else: + self._loop = None + self.batch_size = batch_size + super().__init__(*args, **kwargs) + + @property + def loop(self): + if self._pid != os.getpid(): + raise RuntimeError("This class is not fork-safe") + return self._loop + + async def _rm_file(self, path, **kwargs): + raise NotImplementedError + + async def _rm(self, path, recursive=False, batch_size=None, **kwargs): + # TODO: implement on_error + batch_size = batch_size or self.batch_size + path = await self._expand_path(path, recursive=recursive) + return await _run_coros_in_chunks( + [self._rm_file(p, **kwargs) for p in reversed(path)], + batch_size=batch_size, + nofiles=True, + ) + + async def _cp_file(self, path1, path2, **kwargs): + raise NotImplementedError + + async def _mv_file(self, path1, path2): + await self._cp_file(path1, path2) + await self._rm_file(path1) + + async def _copy( + self, + path1, + path2, + recursive=False, + on_error=None, + maxdepth=None, + batch_size=None, + **kwargs, + ): + if on_error is None and recursive: + on_error = "ignore" + elif on_error is None: + on_error = "raise" + + if isinstance(path1, list) and isinstance(path2, list): + # No need to expand paths when both source and destination + # are provided as lists + paths1 = path1 + paths2 = path2 + else: + source_is_str = isinstance(path1, str) + paths1 = await self._expand_path( + path1, maxdepth=maxdepth, recursive=recursive + ) + if source_is_str and (not recursive or maxdepth is not None): + # Non-recursive glob does not copy directories + paths1 = [ + p for p in paths1 if not (trailing_sep(p) or await self._isdir(p)) + ] + if not paths1: + return + + source_is_file = len(paths1) == 1 + dest_is_dir = isinstance(path2, str) and ( + trailing_sep(path2) or await self._isdir(path2) + ) + + exists = source_is_str and ( + (has_magic(path1) and source_is_file) + or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1)) + ) + paths2 = other_paths( + paths1, + path2, + exists=exists, + flatten=not source_is_str, + ) + + batch_size = batch_size or self.batch_size + coros = [self._cp_file(p1, p2, **kwargs) for p1, p2 in zip(paths1, paths2)] + result = await _run_coros_in_chunks( + coros, batch_size=batch_size, return_exceptions=True, nofiles=True + ) + + for ex in filter(is_exception, result): + if on_error == "ignore" and isinstance(ex, FileNotFoundError): + continue + raise ex + + async def _pipe_file(self, path, value, mode="overwrite", **kwargs): + raise NotImplementedError + + async def _pipe(self, path, value=None, batch_size=None, **kwargs): + if isinstance(path, str): + path = {path: value} + batch_size = batch_size or self.batch_size + return await _run_coros_in_chunks( + [self._pipe_file(k, v, **kwargs) for k, v in path.items()], + batch_size=batch_size, + nofiles=True, + ) + + async def _process_limits(self, url, start, end): + """Helper for "Range"-based _cat_file""" + size = None + suff = False + if start is not None and start < 0: + # if start is negative and end None, end is the "suffix length" + if end is None: + end = -start + start = "" + suff = True + else: + size = size or (await self._info(url))["size"] + start = size + start + elif start is None: + start = 0 + if not suff: + if end is not None and end < 0: + if start is not None: + size = size or (await self._info(url))["size"] + end = size + end + elif end is None: + end = "" + if isinstance(end, numbers.Integral): + end -= 1 # bytes range is inclusive + return f"bytes={start}-{end}" + + async def _cat_file(self, path, start=None, end=None, **kwargs): + raise NotImplementedError + + async def _cat( + self, path, recursive=False, on_error="raise", batch_size=None, **kwargs + ): + paths = await self._expand_path(path, recursive=recursive) + coros = [self._cat_file(path, **kwargs) for path in paths] + batch_size = batch_size or self.batch_size + out = await _run_coros_in_chunks( + coros, batch_size=batch_size, nofiles=True, return_exceptions=True + ) + if on_error == "raise": + ex = next(filter(is_exception, out), False) + if ex: + raise ex + if ( + len(paths) > 1 + or isinstance(path, list) + or paths[0] != self._strip_protocol(path) + ): + return { + k: v + for k, v in zip(paths, out) + if on_error != "omit" or not is_exception(v) + } + else: + return out[0] + + async def _cat_ranges( + self, + paths, + starts, + ends, + max_gap=None, + batch_size=None, + on_error="return", + **kwargs, + ): + """Get the contents of byte ranges from one or more files + + Parameters + ---------- + paths: list + A list of of filepaths on this filesystems + starts, ends: int or list + Bytes limits of the read. If using a single int, the same value will be + used to read all the specified files. + """ + # TODO: on_error + if max_gap is not None: + # use utils.merge_offset_ranges + raise NotImplementedError + if not isinstance(paths, list): + raise TypeError + if not isinstance(starts, Iterable): + starts = [starts] * len(paths) + if not isinstance(ends, Iterable): + ends = [ends] * len(paths) + if len(starts) != len(paths) or len(ends) != len(paths): + raise ValueError + coros = [ + self._cat_file(p, start=s, end=e, **kwargs) + for p, s, e in zip(paths, starts, ends) + ] + batch_size = batch_size or self.batch_size + return await _run_coros_in_chunks( + coros, batch_size=batch_size, nofiles=True, return_exceptions=True + ) + + async def _put_file(self, lpath, rpath, mode="overwrite", **kwargs): + raise NotImplementedError + + async def _put( + self, + lpath, + rpath, + recursive=False, + callback=DEFAULT_CALLBACK, + batch_size=None, + maxdepth=None, + **kwargs, + ): + """Copy file(s) from local. + + Copies a specific file or tree of files (if recursive=True). If rpath + ends with a "/", it will be assumed to be a directory, and target files + will go within. + + The put_file method will be called concurrently on a batch of files. The + batch_size option can configure the amount of futures that can be executed + at the same time. If it is -1, then all the files will be uploaded concurrently. + The default can be set for this instance by passing "batch_size" in the + constructor, or for all instances by setting the "gather_batch_size" key + in ``fsspec.config.conf``, falling back to 1/8th of the system limit . + """ + if isinstance(lpath, list) and isinstance(rpath, list): + # No need to expand paths when both source and destination + # are provided as lists + rpaths = rpath + lpaths = lpath + else: + source_is_str = isinstance(lpath, str) + if source_is_str: + lpath = make_path_posix(lpath) + fs = LocalFileSystem() + lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth) + if source_is_str and (not recursive or maxdepth is not None): + # Non-recursive glob does not copy directories + lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))] + if not lpaths: + return + + source_is_file = len(lpaths) == 1 + dest_is_dir = isinstance(rpath, str) and ( + trailing_sep(rpath) or await self._isdir(rpath) + ) + + rpath = self._strip_protocol(rpath) + exists = source_is_str and ( + (has_magic(lpath) and source_is_file) + or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath)) + ) + rpaths = other_paths( + lpaths, + rpath, + exists=exists, + flatten=not source_is_str, + ) + + is_dir = {l: os.path.isdir(l) for l in lpaths} + rdirs = [r for l, r in zip(lpaths, rpaths) if is_dir[l]] + file_pairs = [(l, r) for l, r in zip(lpaths, rpaths) if not is_dir[l]] + + await asyncio.gather(*[self._makedirs(d, exist_ok=True) for d in rdirs]) + batch_size = batch_size or self.batch_size + + coros = [] + callback.set_size(len(file_pairs)) + for lfile, rfile in file_pairs: + put_file = callback.branch_coro(self._put_file) + coros.append(put_file(lfile, rfile, **kwargs)) + + return await _run_coros_in_chunks( + coros, batch_size=batch_size, callback=callback + ) + + async def _get_file(self, rpath, lpath, **kwargs): + raise NotImplementedError + + async def _get( + self, + rpath, + lpath, + recursive=False, + callback=DEFAULT_CALLBACK, + maxdepth=None, + **kwargs, + ): + """Copy file(s) to local. + + Copies a specific file or tree of files (if recursive=True). If lpath + ends with a "/", it will be assumed to be a directory, and target files + will go within. Can submit a list of paths, which may be glob-patterns + and will be expanded. + + The get_file method will be called concurrently on a batch of files. The + batch_size option can configure the amount of futures that can be executed + at the same time. If it is -1, then all the files will be uploaded concurrently. + The default can be set for this instance by passing "batch_size" in the + constructor, or for all instances by setting the "gather_batch_size" key + in ``fsspec.config.conf``, falling back to 1/8th of the system limit . + """ + if isinstance(lpath, list) and isinstance(rpath, list): + # No need to expand paths when both source and destination + # are provided as lists + rpaths = rpath + lpaths = lpath + else: + source_is_str = isinstance(rpath, str) + # First check for rpath trailing slash as _strip_protocol removes it. + source_not_trailing_sep = source_is_str and not trailing_sep(rpath) + rpath = self._strip_protocol(rpath) + rpaths = await self._expand_path( + rpath, recursive=recursive, maxdepth=maxdepth + ) + if source_is_str and (not recursive or maxdepth is not None): + # Non-recursive glob does not copy directories + rpaths = [ + p for p in rpaths if not (trailing_sep(p) or await self._isdir(p)) + ] + if not rpaths: + return + + lpath = make_path_posix(lpath) + source_is_file = len(rpaths) == 1 + dest_is_dir = isinstance(lpath, str) and ( + trailing_sep(lpath) or LocalFileSystem().isdir(lpath) + ) + + exists = source_is_str and ( + (has_magic(rpath) and source_is_file) + or (not has_magic(rpath) and dest_is_dir and source_not_trailing_sep) + ) + lpaths = other_paths( + rpaths, + lpath, + exists=exists, + flatten=not source_is_str, + ) + + [os.makedirs(os.path.dirname(lp), exist_ok=True) for lp in lpaths] + batch_size = kwargs.pop("batch_size", self.batch_size) + + coros = [] + callback.set_size(len(lpaths)) + for lpath, rpath in zip(lpaths, rpaths): + get_file = callback.branch_coro(self._get_file) + coros.append(get_file(rpath, lpath, **kwargs)) + return await _run_coros_in_chunks( + coros, batch_size=batch_size, callback=callback + ) + + async def _isfile(self, path): + try: + return (await self._info(path))["type"] == "file" + except: # noqa: E722 + return False + + async def _isdir(self, path): + try: + return (await self._info(path))["type"] == "directory" + except OSError: + return False + + async def _size(self, path): + return (await self._info(path)).get("size", None) + + async def _sizes(self, paths, batch_size=None): + batch_size = batch_size or self.batch_size + return await _run_coros_in_chunks( + [self._size(p) for p in paths], batch_size=batch_size + ) + + async def _exists(self, path, **kwargs): + try: + await self._info(path, **kwargs) + return True + except FileNotFoundError: + return False + + async def _info(self, path, **kwargs): + raise NotImplementedError + + async def _ls(self, path, detail=True, **kwargs): + raise NotImplementedError + + async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs): + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") + + path = self._strip_protocol(path) + full_dirs = {} + dirs = {} + files = {} + + detail = kwargs.pop("detail", False) + try: + listing = await self._ls(path, detail=True, **kwargs) + except (FileNotFoundError, OSError) as e: + if on_error == "raise": + raise + elif callable(on_error): + on_error(e) + if detail: + yield path, {}, {} + else: + yield path, [], [] + return + + for info in listing: + # each info name must be at least [path]/part , but here + # we check also for names like [path]/part/ + pathname = info["name"].rstrip("/") + name = pathname.rsplit("/", 1)[-1] + if info["type"] == "directory" and pathname != path: + # do not include "self" path + full_dirs[name] = pathname + dirs[name] = info + elif pathname == path: + # file-like with same name as give path + files[""] = info + else: + files[name] = info + + if detail: + yield path, dirs, files + else: + yield path, list(dirs), list(files) + + if maxdepth is not None: + maxdepth -= 1 + if maxdepth < 1: + return + + for d in dirs: + async for _ in self._walk( + full_dirs[d], maxdepth=maxdepth, detail=detail, **kwargs + ): + yield _ + + async def _glob(self, path, maxdepth=None, **kwargs): + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") + + import re + + seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,) + ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash + path = self._strip_protocol(path) + append_slash_to_dirname = ends_with_sep or path.endswith( + tuple(sep + "**" for sep in seps) + ) + idx_star = path.find("*") if path.find("*") >= 0 else len(path) + idx_qmark = path.find("?") if path.find("?") >= 0 else len(path) + idx_brace = path.find("[") if path.find("[") >= 0 else len(path) + + min_idx = min(idx_star, idx_qmark, idx_brace) + + detail = kwargs.pop("detail", False) + + if not has_magic(path): + if await self._exists(path, **kwargs): + if not detail: + return [path] + else: + return {path: await self._info(path, **kwargs)} + else: + if not detail: + return [] # glob of non-existent returns empty + else: + return {} + elif "/" in path[:min_idx]: + min_idx = path[:min_idx].rindex("/") + root = path[: min_idx + 1] + depth = path[min_idx + 1 :].count("/") + 1 + else: + root = "" + depth = path[min_idx + 1 :].count("/") + 1 + + if "**" in path: + if maxdepth is not None: + idx_double_stars = path.find("**") + depth_double_stars = path[idx_double_stars:].count("/") + 1 + depth = depth - depth_double_stars + maxdepth + else: + depth = None + + allpaths = await self._find( + root, maxdepth=depth, withdirs=True, detail=True, **kwargs + ) + + pattern = glob_translate(path + ("/" if ends_with_sep else "")) + pattern = re.compile(pattern) + + out = { + p: info + for p, info in sorted(allpaths.items()) + if pattern.match( + p + "/" + if append_slash_to_dirname and info["type"] == "directory" + else p + ) + } + + if detail: + return out + else: + return list(out) + + async def _du(self, path, total=True, maxdepth=None, **kwargs): + sizes = {} + # async for? + for f in await self._find(path, maxdepth=maxdepth, **kwargs): + info = await self._info(f) + sizes[info["name"]] = info["size"] + if total: + return sum(sizes.values()) + else: + return sizes + + async def _find(self, path, maxdepth=None, withdirs=False, **kwargs): + path = self._strip_protocol(path) + out = {} + detail = kwargs.pop("detail", False) + + # Add the root directory if withdirs is requested + # This is needed for posix glob compliance + if withdirs and path != "" and await self._isdir(path): + out[path] = await self._info(path) + + # async for? + async for _, dirs, files in self._walk(path, maxdepth, detail=True, **kwargs): + if withdirs: + files.update(dirs) + out.update({info["name"]: info for name, info in files.items()}) + if not out and (await self._isfile(path)): + # walk works on directories, but find should also return [path] + # when path happens to be a file + out[path] = {} + names = sorted(out) + if not detail: + return names + else: + return {name: out[name] for name in names} + + async def _expand_path(self, path, recursive=False, maxdepth=None): + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") + + if isinstance(path, str): + out = await self._expand_path([path], recursive, maxdepth) + else: + out = set() + path = [self._strip_protocol(p) for p in path] + for p in path: # can gather here + if has_magic(p): + bit = set(await self._glob(p, maxdepth=maxdepth)) + out |= bit + if recursive: + # glob call above expanded one depth so if maxdepth is defined + # then decrement it in expand_path call below. If it is zero + # after decrementing then avoid expand_path call. + if maxdepth is not None and maxdepth <= 1: + continue + out |= set( + await self._expand_path( + list(bit), + recursive=recursive, + maxdepth=maxdepth - 1 if maxdepth is not None else None, + ) + ) + continue + elif recursive: + rec = set(await self._find(p, maxdepth=maxdepth, withdirs=True)) + out |= rec + if p not in out and (recursive is False or (await self._exists(p))): + # should only check once, for the root + out.add(p) + if not out: + raise FileNotFoundError(path) + return sorted(out) + + async def _mkdir(self, path, create_parents=True, **kwargs): + pass # not necessary to implement, may not have directories + + async def _makedirs(self, path, exist_ok=False): + pass # not necessary to implement, may not have directories + + async def open_async(self, path, mode="rb", **kwargs): + if "b" not in mode or kwargs.get("compression"): + raise ValueError + raise NotImplementedError + + +def mirror_sync_methods(obj): + """Populate sync and async methods for obj + + For each method will create a sync version if the name refers to an async method + (coroutine) and there is no override in the child class; will create an async + method for the corresponding sync method if there is no implementation. + + Uses the methods specified in + - async_methods: the set that an implementation is expected to provide + - default_async_methods: that can be derived from their sync version in + AbstractFileSystem + - AsyncFileSystem: async-specific default coroutines + """ + from fsspec import AbstractFileSystem + + for method in async_methods + dir(AsyncFileSystem): + if not method.startswith("_"): + continue + smethod = method[1:] + if private.match(method): + isco = inspect.iscoroutinefunction(getattr(obj, method, None)) + unsync = getattr(getattr(obj, smethod, False), "__func__", None) + is_default = unsync is getattr(AbstractFileSystem, smethod, "") + if isco and is_default: + mth = sync_wrapper(getattr(obj, method), obj=obj) + setattr(obj, smethod, mth) + if not mth.__doc__: + mth.__doc__ = getattr( + getattr(AbstractFileSystem, smethod, None), "__doc__", "" + ) + + +class FSSpecCoroutineCancel(Exception): + pass + + +def _dump_running_tasks( + printout=True, cancel=True, exc=FSSpecCoroutineCancel, with_task=False +): + import traceback + + tasks = [t for t in asyncio.tasks.all_tasks(loop[0]) if not t.done()] + if printout: + [task.print_stack() for task in tasks] + out = [ + { + "locals": task._coro.cr_frame.f_locals, + "file": task._coro.cr_frame.f_code.co_filename, + "firstline": task._coro.cr_frame.f_code.co_firstlineno, + "linelo": task._coro.cr_frame.f_lineno, + "stack": traceback.format_stack(task._coro.cr_frame), + "task": task if with_task else None, + } + for task in tasks + ] + if cancel: + for t in tasks: + cbs = t._callbacks + t.cancel() + asyncio.futures.Future.set_exception(t, exc) + asyncio.futures.Future.cancel(t) + [cb[0](t) for cb in cbs] # cancels any dependent concurrent.futures + try: + t._coro.throw(exc) # exits coro, unless explicitly handled + except exc: + pass + return out + + +class AbstractAsyncStreamedFile(AbstractBufferedFile): + # no read buffering, and always auto-commit + # TODO: readahead might still be useful here, but needs async version + + async def read(self, length=-1): + """ + Return data from cache, or fetch pieces as necessary + + Parameters + ---------- + length: int (-1) + Number of bytes to read; if <0, all remaining bytes. + """ + length = -1 if length is None else int(length) + if self.mode != "rb": + raise ValueError("File not in read mode") + if length < 0: + length = self.size - self.loc + if self.closed: + raise ValueError("I/O operation on closed file.") + if length == 0: + # don't even bother calling fetch + return b"" + out = await self._fetch_range(self.loc, self.loc + length) + self.loc += len(out) + return out + + async def write(self, data): + """ + Write data to buffer. + + Buffer only sent on flush() or if buffer is greater than + or equal to blocksize. + + Parameters + ---------- + data: bytes + Set of bytes to be written. + """ + if self.mode not in {"wb", "ab"}: + raise ValueError("File not in write mode") + if self.closed: + raise ValueError("I/O operation on closed file.") + if self.forced: + raise ValueError("This file has been force-flushed, can only close") + out = self.buffer.write(data) + self.loc += out + if self.buffer.tell() >= self.blocksize: + await self.flush() + return out + + async def close(self): + """Close file + + Finalizes writes, discards cache + """ + if getattr(self, "_unclosable", False): + return + if self.closed: + return + if self.mode == "rb": + self.cache = None + else: + if not self.forced: + await self.flush(force=True) + + if self.fs is not None: + self.fs.invalidate_cache(self.path) + self.fs.invalidate_cache(self.fs._parent(self.path)) + + self.closed = True + + async def flush(self, force=False): + if self.closed: + raise ValueError("Flush on closed file") + if force and self.forced: + raise ValueError("Force flush cannot be called more than once") + if force: + self.forced = True + + if self.mode not in {"wb", "ab"}: + # no-op to flush on read-mode + return + + if not force and self.buffer.tell() < self.blocksize: + # Defer write on small block + return + + if self.offset is None: + # Initialize a multipart upload + self.offset = 0 + try: + await self._initiate_upload() + except: + self.closed = True + raise + + if await self._upload_chunk(final=force) is not False: + self.offset += self.buffer.seek(0, 2) + self.buffer = io.BytesIO() + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.close() + + async def _fetch_range(self, start, end): + raise NotImplementedError + + async def _initiate_upload(self): + pass + + async def _upload_chunk(self, final=False): + raise NotImplementedError diff --git a/meow/lib/python3.13/site-packages/fsspec/caching.py b/meow/lib/python3.13/site-packages/fsspec/caching.py new file mode 100644 index 0000000000000000000000000000000000000000..34231c881e2b09bdc1c04e3969f1d68405a7af02 --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/caching.py @@ -0,0 +1,966 @@ +from __future__ import annotations + +import collections +import functools +import logging +import math +import os +import threading +import warnings +from concurrent.futures import Future, ThreadPoolExecutor +from itertools import groupby +from operator import itemgetter +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Generic, + NamedTuple, + Optional, + OrderedDict, + TypeVar, +) + +if TYPE_CHECKING: + import mmap + + from typing_extensions import ParamSpec + + P = ParamSpec("P") +else: + P = TypeVar("P") + +T = TypeVar("T") + + +logger = logging.getLogger("fsspec") + +Fetcher = Callable[[int, int], bytes] # Maps (start, end) to bytes + + +class BaseCache: + """Pass-though cache: doesn't keep anything, calls every time + + Acts as base class for other cachers + + Parameters + ---------- + blocksize: int + How far to read ahead in numbers of bytes + fetcher: func + Function of the form f(start, end) which gets bytes from remote as + specified + size: int + How big this file is + """ + + name: ClassVar[str] = "none" + + def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None: + self.blocksize = blocksize + self.nblocks = 0 + self.fetcher = fetcher + self.size = size + self.hit_count = 0 + self.miss_count = 0 + # the bytes that we actually requested + self.total_requested_bytes = 0 + + def _fetch(self, start: int | None, stop: int | None) -> bytes: + if start is None: + start = 0 + if stop is None: + stop = self.size + if start >= self.size or start >= stop: + return b"" + return self.fetcher(start, stop) + + def _reset_stats(self) -> None: + """Reset hit and miss counts for a more ganular report e.g. by file.""" + self.hit_count = 0 + self.miss_count = 0 + self.total_requested_bytes = 0 + + def _log_stats(self) -> str: + """Return a formatted string of the cache statistics.""" + if self.hit_count == 0 and self.miss_count == 0: + # a cache that does nothing, this is for logs only + return "" + return f" , {self.name}: {self.hit_count} hits, {self.miss_count} misses, {self.total_requested_bytes} total requested bytes" + + def __repr__(self) -> str: + # TODO: use rich for better formatting + return f""" + <{self.__class__.__name__}: + block size : {self.blocksize} + block count : {self.nblocks} + file size : {self.size} + cache hits : {self.hit_count} + cache misses: {self.miss_count} + total requested bytes: {self.total_requested_bytes}> + """ + + +class MMapCache(BaseCache): + """memory-mapped sparse file cache + + Opens temporary file, which is filled blocks-wise when data is requested. + Ensure there is enough disc space in the temporary location. + + This cache method might only work on posix + """ + + name = "mmap" + + def __init__( + self, + blocksize: int, + fetcher: Fetcher, + size: int, + location: str | None = None, + blocks: set[int] | None = None, + ) -> None: + super().__init__(blocksize, fetcher, size) + self.blocks = set() if blocks is None else blocks + self.location = location + self.cache = self._makefile() + + def _makefile(self) -> mmap.mmap | bytearray: + import mmap + import tempfile + + if self.size == 0: + return bytearray() + + # posix version + if self.location is None or not os.path.exists(self.location): + if self.location is None: + fd = tempfile.TemporaryFile() + self.blocks = set() + else: + fd = open(self.location, "wb+") + fd.seek(self.size - 1) + fd.write(b"1") + fd.flush() + else: + fd = open(self.location, "r+b") + + return mmap.mmap(fd.fileno(), self.size) + + def _fetch(self, start: int | None, end: int | None) -> bytes: + logger.debug(f"MMap cache fetching {start}-{end}") + if start is None: + start = 0 + if end is None: + end = self.size + if start >= self.size or start >= end: + return b"" + start_block = start // self.blocksize + end_block = end // self.blocksize + block_range = range(start_block, end_block + 1) + # Determine which blocks need to be fetched. This sequence is sorted by construction. + need = (i for i in block_range if i not in self.blocks) + # Count the number of blocks already cached + self.hit_count += sum(1 for i in block_range if i in self.blocks) + + # Consolidate needed blocks. + # Algorithm adapted from Python 2.x itertools documentation. + # We are grouping an enumerated sequence of blocks. By comparing when the difference + # between an ascending range (provided by enumerate) and the needed block numbers + # we can detect when the block number skips values. The key computes this difference. + # Whenever the difference changes, we know that we have previously cached block(s), + # and a new group is started. In other words, this algorithm neatly groups + # runs of consecutive block numbers so they can be fetched together. + for _, _blocks in groupby(enumerate(need), key=lambda x: x[0] - x[1]): + # Extract the blocks from the enumerated sequence + _blocks = tuple(map(itemgetter(1), _blocks)) + # Compute start of first block + sstart = _blocks[0] * self.blocksize + # Compute the end of the last block. Last block may not be full size. + send = min(_blocks[-1] * self.blocksize + self.blocksize, self.size) + + # Fetch bytes (could be multiple consecutive blocks) + self.total_requested_bytes += send - sstart + logger.debug( + f"MMap get blocks {_blocks[0]}-{_blocks[-1]} ({sstart}-{send})" + ) + self.cache[sstart:send] = self.fetcher(sstart, send) + + # Update set of cached blocks + self.blocks.update(_blocks) + # Update cache statistics with number of blocks we had to cache + self.miss_count += len(_blocks) + + return self.cache[start:end] + + def __getstate__(self) -> dict[str, Any]: + state = self.__dict__.copy() + # Remove the unpicklable entries. + del state["cache"] + return state + + def __setstate__(self, state: dict[str, Any]) -> None: + # Restore instance attributes + self.__dict__.update(state) + self.cache = self._makefile() + + +class ReadAheadCache(BaseCache): + """Cache which reads only when we get beyond a block of data + + This is a much simpler version of BytesCache, and does not attempt to + fill holes in the cache or keep fragments alive. It is best suited to + many small reads in a sequential order (e.g., reading lines from a file). + """ + + name = "readahead" + + def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None: + super().__init__(blocksize, fetcher, size) + self.cache = b"" + self.start = 0 + self.end = 0 + + def _fetch(self, start: int | None, end: int | None) -> bytes: + if start is None: + start = 0 + if end is None or end > self.size: + end = self.size + if start >= self.size or start >= end: + return b"" + l = end - start + if start >= self.start and end <= self.end: + # cache hit + self.hit_count += 1 + return self.cache[start - self.start : end - self.start] + elif self.start <= start < self.end: + # partial hit + self.miss_count += 1 + part = self.cache[start - self.start :] + l -= len(part) + start = self.end + else: + # miss + self.miss_count += 1 + part = b"" + end = min(self.size, end + self.blocksize) + self.total_requested_bytes += end - start + self.cache = self.fetcher(start, end) # new block replaces old + self.start = start + self.end = self.start + len(self.cache) + return part + self.cache[:l] + + +class FirstChunkCache(BaseCache): + """Caches the first block of a file only + + This may be useful for file types where the metadata is stored in the header, + but is randomly accessed. + """ + + name = "first" + + def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None: + if blocksize > size: + # this will buffer the whole thing + blocksize = size + super().__init__(blocksize, fetcher, size) + self.cache: bytes | None = None + + def _fetch(self, start: int | None, end: int | None) -> bytes: + start = start or 0 + if start > self.size: + logger.debug("FirstChunkCache: requested start > file size") + return b"" + + end = min(end, self.size) + + if start < self.blocksize: + if self.cache is None: + self.miss_count += 1 + if end > self.blocksize: + self.total_requested_bytes += end + data = self.fetcher(0, end) + self.cache = data[: self.blocksize] + return data[start:] + self.cache = self.fetcher(0, self.blocksize) + self.total_requested_bytes += self.blocksize + part = self.cache[start:end] + if end > self.blocksize: + self.total_requested_bytes += end - self.blocksize + part += self.fetcher(self.blocksize, end) + self.hit_count += 1 + return part + else: + self.miss_count += 1 + self.total_requested_bytes += end - start + return self.fetcher(start, end) + + +class BlockCache(BaseCache): + """ + Cache holding memory as a set of blocks. + + Requests are only ever made ``blocksize`` at a time, and are + stored in an LRU cache. The least recently accessed block is + discarded when more than ``maxblocks`` are stored. + + Parameters + ---------- + blocksize : int + The number of bytes to store in each block. + Requests are only ever made for ``blocksize``, so this + should balance the overhead of making a request against + the granularity of the blocks. + fetcher : Callable + size : int + The total size of the file being cached. + maxblocks : int + The maximum number of blocks to cache for. The maximum memory + use for this cache is then ``blocksize * maxblocks``. + """ + + name = "blockcache" + + def __init__( + self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32 + ) -> None: + super().__init__(blocksize, fetcher, size) + self.nblocks = math.ceil(size / blocksize) + self.maxblocks = maxblocks + self._fetch_block_cached = functools.lru_cache(maxblocks)(self._fetch_block) + + def cache_info(self): + """ + The statistics on the block cache. + + Returns + ------- + NamedTuple + Returned directly from the LRU Cache used internally. + """ + return self._fetch_block_cached.cache_info() + + def __getstate__(self) -> dict[str, Any]: + state = self.__dict__ + del state["_fetch_block_cached"] + return state + + def __setstate__(self, state: dict[str, Any]) -> None: + self.__dict__.update(state) + self._fetch_block_cached = functools.lru_cache(state["maxblocks"])( + self._fetch_block + ) + + def _fetch(self, start: int | None, end: int | None) -> bytes: + if start is None: + start = 0 + if end is None: + end = self.size + if start >= self.size or start >= end: + return b"" + + # byte position -> block numbers + start_block_number = start // self.blocksize + end_block_number = end // self.blocksize + + # these are cached, so safe to do multiple calls for the same start and end. + for block_number in range(start_block_number, end_block_number + 1): + self._fetch_block_cached(block_number) + + return self._read_cache( + start, + end, + start_block_number=start_block_number, + end_block_number=end_block_number, + ) + + def _fetch_block(self, block_number: int) -> bytes: + """ + Fetch the block of data for `block_number`. + """ + if block_number > self.nblocks: + raise ValueError( + f"'block_number={block_number}' is greater than " + f"the number of blocks ({self.nblocks})" + ) + + start = block_number * self.blocksize + end = start + self.blocksize + self.total_requested_bytes += end - start + self.miss_count += 1 + logger.info("BlockCache fetching block %d", block_number) + block_contents = super()._fetch(start, end) + return block_contents + + def _read_cache( + self, start: int, end: int, start_block_number: int, end_block_number: int + ) -> bytes: + """ + Read from our block cache. + + Parameters + ---------- + start, end : int + The start and end byte positions. + start_block_number, end_block_number : int + The start and end block numbers. + """ + start_pos = start % self.blocksize + end_pos = end % self.blocksize + + self.hit_count += 1 + if start_block_number == end_block_number: + block: bytes = self._fetch_block_cached(start_block_number) + return block[start_pos:end_pos] + + else: + # read from the initial + out = [self._fetch_block_cached(start_block_number)[start_pos:]] + + # intermediate blocks + # Note: it'd be nice to combine these into one big request. However + # that doesn't play nicely with our LRU cache. + out.extend( + map( + self._fetch_block_cached, + range(start_block_number + 1, end_block_number), + ) + ) + + # final block + out.append(self._fetch_block_cached(end_block_number)[:end_pos]) + + return b"".join(out) + + +class BytesCache(BaseCache): + """Cache which holds data in a in-memory bytes object + + Implements read-ahead by the block size, for semi-random reads progressing + through the file. + + Parameters + ---------- + trim: bool + As we read more data, whether to discard the start of the buffer when + we are more than a blocksize ahead of it. + """ + + name: ClassVar[str] = "bytes" + + def __init__( + self, blocksize: int, fetcher: Fetcher, size: int, trim: bool = True + ) -> None: + super().__init__(blocksize, fetcher, size) + self.cache = b"" + self.start: int | None = None + self.end: int | None = None + self.trim = trim + + def _fetch(self, start: int | None, end: int | None) -> bytes: + # TODO: only set start/end after fetch, in case it fails? + # is this where retry logic might go? + if start is None: + start = 0 + if end is None: + end = self.size + if start >= self.size or start >= end: + return b"" + if ( + self.start is not None + and start >= self.start + and self.end is not None + and end < self.end + ): + # cache hit: we have all the required data + offset = start - self.start + self.hit_count += 1 + return self.cache[offset : offset + end - start] + + if self.blocksize: + bend = min(self.size, end + self.blocksize) + else: + bend = end + + if bend == start or start > self.size: + return b"" + + if (self.start is None or start < self.start) and ( + self.end is None or end > self.end + ): + # First read, or extending both before and after + self.total_requested_bytes += bend - start + self.miss_count += 1 + self.cache = self.fetcher(start, bend) + self.start = start + else: + assert self.start is not None + assert self.end is not None + self.miss_count += 1 + + if start < self.start: + if self.end is None or self.end - end > self.blocksize: + self.total_requested_bytes += bend - start + self.cache = self.fetcher(start, bend) + self.start = start + else: + self.total_requested_bytes += self.start - start + new = self.fetcher(start, self.start) + self.start = start + self.cache = new + self.cache + elif self.end is not None and bend > self.end: + if self.end > self.size: + pass + elif end - self.end > self.blocksize: + self.total_requested_bytes += bend - start + self.cache = self.fetcher(start, bend) + self.start = start + else: + self.total_requested_bytes += bend - self.end + new = self.fetcher(self.end, bend) + self.cache = self.cache + new + + self.end = self.start + len(self.cache) + offset = start - self.start + out = self.cache[offset : offset + end - start] + if self.trim: + num = (self.end - self.start) // (self.blocksize + 1) + if num > 1: + self.start += self.blocksize * num + self.cache = self.cache[self.blocksize * num :] + return out + + def __len__(self) -> int: + return len(self.cache) + + +class AllBytes(BaseCache): + """Cache entire contents of the file""" + + name: ClassVar[str] = "all" + + def __init__( + self, + blocksize: int | None = None, + fetcher: Fetcher | None = None, + size: int | None = None, + data: bytes | None = None, + ) -> None: + super().__init__(blocksize, fetcher, size) # type: ignore[arg-type] + if data is None: + self.miss_count += 1 + self.total_requested_bytes += self.size + data = self.fetcher(0, self.size) + self.data = data + + def _fetch(self, start: int | None, stop: int | None) -> bytes: + self.hit_count += 1 + return self.data[start:stop] + + +class KnownPartsOfAFile(BaseCache): + """ + Cache holding known file parts. + + Parameters + ---------- + blocksize: int + How far to read ahead in numbers of bytes + fetcher: func + Function of the form f(start, end) which gets bytes from remote as + specified + size: int + How big this file is + data: dict + A dictionary mapping explicit `(start, stop)` file-offset tuples + with known bytes. + strict: bool, default True + Whether to fetch reads that go beyond a known byte-range boundary. + If `False`, any read that ends outside a known part will be zero + padded. Note that zero padding will not be used for reads that + begin outside a known byte-range. + """ + + name: ClassVar[str] = "parts" + + def __init__( + self, + blocksize: int, + fetcher: Fetcher, + size: int, + data: Optional[dict[tuple[int, int], bytes]] = None, + strict: bool = True, + **_: Any, + ): + super().__init__(blocksize, fetcher, size) + self.strict = strict + + # simple consolidation of contiguous blocks + if data: + old_offsets = sorted(data.keys()) + offsets = [old_offsets[0]] + blocks = [data.pop(old_offsets[0])] + for start, stop in old_offsets[1:]: + start0, stop0 = offsets[-1] + if start == stop0: + offsets[-1] = (start0, stop) + blocks[-1] += data.pop((start, stop)) + else: + offsets.append((start, stop)) + blocks.append(data.pop((start, stop))) + + self.data = dict(zip(offsets, blocks)) + else: + self.data = {} + + def _fetch(self, start: int | None, stop: int | None) -> bytes: + if start is None: + start = 0 + if stop is None: + stop = self.size + + out = b"" + for (loc0, loc1), data in self.data.items(): + # If self.strict=False, use zero-padded data + # for reads beyond the end of a "known" buffer + if loc0 <= start < loc1: + off = start - loc0 + out = data[off : off + stop - start] + if not self.strict or loc0 <= stop <= loc1: + # The request is within a known range, or + # it begins within a known range, and we + # are allowed to pad reads beyond the + # buffer with zero + out += b"\x00" * (stop - start - len(out)) + self.hit_count += 1 + return out + else: + # The request ends outside a known range, + # and we are being "strict" about reads + # beyond the buffer + start = loc1 + break + + # We only get here if there is a request outside the + # known parts of the file. In an ideal world, this + # should never happen + if self.fetcher is None: + # We cannot fetch the data, so raise an error + raise ValueError(f"Read is outside the known file parts: {(start, stop)}. ") + # We can fetch the data, but should warn the user + # that this may be slow + warnings.warn( + f"Read is outside the known file parts: {(start, stop)}. " + f"IO/caching performance may be poor!" + ) + logger.debug(f"KnownPartsOfAFile cache fetching {start}-{stop}") + self.total_requested_bytes += stop - start + self.miss_count += 1 + return out + super()._fetch(start, stop) + + +class UpdatableLRU(Generic[P, T]): + """ + Custom implementation of LRU cache that allows updating keys + + Used by BackgroudBlockCache + """ + + class CacheInfo(NamedTuple): + hits: int + misses: int + maxsize: int + currsize: int + + def __init__(self, func: Callable[P, T], max_size: int = 128) -> None: + self._cache: OrderedDict[Any, T] = collections.OrderedDict() + self._func = func + self._max_size = max_size + self._hits = 0 + self._misses = 0 + self._lock = threading.Lock() + + def __call__(self, *args: P.args, **kwargs: P.kwargs) -> T: + if kwargs: + raise TypeError(f"Got unexpected keyword argument {kwargs.keys()}") + with self._lock: + if args in self._cache: + self._cache.move_to_end(args) + self._hits += 1 + return self._cache[args] + + result = self._func(*args, **kwargs) + + with self._lock: + self._cache[args] = result + self._misses += 1 + if len(self._cache) > self._max_size: + self._cache.popitem(last=False) + + return result + + def is_key_cached(self, *args: Any) -> bool: + with self._lock: + return args in self._cache + + def add_key(self, result: T, *args: Any) -> None: + with self._lock: + self._cache[args] = result + if len(self._cache) > self._max_size: + self._cache.popitem(last=False) + + def cache_info(self) -> UpdatableLRU.CacheInfo: + with self._lock: + return self.CacheInfo( + maxsize=self._max_size, + currsize=len(self._cache), + hits=self._hits, + misses=self._misses, + ) + + +class BackgroundBlockCache(BaseCache): + """ + Cache holding memory as a set of blocks with pre-loading of + the next block in the background. + + Requests are only ever made ``blocksize`` at a time, and are + stored in an LRU cache. The least recently accessed block is + discarded when more than ``maxblocks`` are stored. If the + next block is not in cache, it is loaded in a separate thread + in non-blocking way. + + Parameters + ---------- + blocksize : int + The number of bytes to store in each block. + Requests are only ever made for ``blocksize``, so this + should balance the overhead of making a request against + the granularity of the blocks. + fetcher : Callable + size : int + The total size of the file being cached. + maxblocks : int + The maximum number of blocks to cache for. The maximum memory + use for this cache is then ``blocksize * maxblocks``. + """ + + name: ClassVar[str] = "background" + + def __init__( + self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32 + ) -> None: + super().__init__(blocksize, fetcher, size) + self.nblocks = math.ceil(size / blocksize) + self.maxblocks = maxblocks + self._fetch_block_cached = UpdatableLRU(self._fetch_block, maxblocks) + + self._thread_executor = ThreadPoolExecutor(max_workers=1) + self._fetch_future_block_number: int | None = None + self._fetch_future: Future[bytes] | None = None + self._fetch_future_lock = threading.Lock() + + def cache_info(self) -> UpdatableLRU.CacheInfo: + """ + The statistics on the block cache. + + Returns + ------- + NamedTuple + Returned directly from the LRU Cache used internally. + """ + return self._fetch_block_cached.cache_info() + + def __getstate__(self) -> dict[str, Any]: + state = self.__dict__ + del state["_fetch_block_cached"] + del state["_thread_executor"] + del state["_fetch_future_block_number"] + del state["_fetch_future"] + del state["_fetch_future_lock"] + return state + + def __setstate__(self, state) -> None: + self.__dict__.update(state) + self._fetch_block_cached = UpdatableLRU(self._fetch_block, state["maxblocks"]) + self._thread_executor = ThreadPoolExecutor(max_workers=1) + self._fetch_future_block_number = None + self._fetch_future = None + self._fetch_future_lock = threading.Lock() + + def _fetch(self, start: int | None, end: int | None) -> bytes: + if start is None: + start = 0 + if end is None: + end = self.size + if start >= self.size or start >= end: + return b"" + + # byte position -> block numbers + start_block_number = start // self.blocksize + end_block_number = end // self.blocksize + + fetch_future_block_number = None + fetch_future = None + with self._fetch_future_lock: + # Background thread is running. Check we we can or must join it. + if self._fetch_future is not None: + assert self._fetch_future_block_number is not None + if self._fetch_future.done(): + logger.info("BlockCache joined background fetch without waiting.") + self._fetch_block_cached.add_key( + self._fetch_future.result(), self._fetch_future_block_number + ) + # Cleanup the fetch variables. Done with fetching the block. + self._fetch_future_block_number = None + self._fetch_future = None + else: + # Must join if we need the block for the current fetch + must_join = bool( + start_block_number + <= self._fetch_future_block_number + <= end_block_number + ) + if must_join: + # Copy to the local variables to release lock + # before waiting for result + fetch_future_block_number = self._fetch_future_block_number + fetch_future = self._fetch_future + + # Cleanup the fetch variables. Have a local copy. + self._fetch_future_block_number = None + self._fetch_future = None + + # Need to wait for the future for the current read + if fetch_future is not None: + logger.info("BlockCache waiting for background fetch.") + # Wait until result and put it in cache + self._fetch_block_cached.add_key( + fetch_future.result(), fetch_future_block_number + ) + + # these are cached, so safe to do multiple calls for the same start and end. + for block_number in range(start_block_number, end_block_number + 1): + self._fetch_block_cached(block_number) + + # fetch next block in the background if nothing is running in the background, + # the block is within file and it is not already cached + end_block_plus_1 = end_block_number + 1 + with self._fetch_future_lock: + if ( + self._fetch_future is None + and end_block_plus_1 <= self.nblocks + and not self._fetch_block_cached.is_key_cached(end_block_plus_1) + ): + self._fetch_future_block_number = end_block_plus_1 + self._fetch_future = self._thread_executor.submit( + self._fetch_block, end_block_plus_1, "async" + ) + + return self._read_cache( + start, + end, + start_block_number=start_block_number, + end_block_number=end_block_number, + ) + + def _fetch_block(self, block_number: int, log_info: str = "sync") -> bytes: + """ + Fetch the block of data for `block_number`. + """ + if block_number > self.nblocks: + raise ValueError( + f"'block_number={block_number}' is greater than " + f"the number of blocks ({self.nblocks})" + ) + + start = block_number * self.blocksize + end = start + self.blocksize + logger.info("BlockCache fetching block (%s) %d", log_info, block_number) + self.total_requested_bytes += end - start + self.miss_count += 1 + block_contents = super()._fetch(start, end) + return block_contents + + def _read_cache( + self, start: int, end: int, start_block_number: int, end_block_number: int + ) -> bytes: + """ + Read from our block cache. + + Parameters + ---------- + start, end : int + The start and end byte positions. + start_block_number, end_block_number : int + The start and end block numbers. + """ + start_pos = start % self.blocksize + end_pos = end % self.blocksize + + # kind of pointless to count this as a hit, but it is + self.hit_count += 1 + + if start_block_number == end_block_number: + block = self._fetch_block_cached(start_block_number) + return block[start_pos:end_pos] + + else: + # read from the initial + out = [self._fetch_block_cached(start_block_number)[start_pos:]] + + # intermediate blocks + # Note: it'd be nice to combine these into one big request. However + # that doesn't play nicely with our LRU cache. + out.extend( + map( + self._fetch_block_cached, + range(start_block_number + 1, end_block_number), + ) + ) + + # final block + out.append(self._fetch_block_cached(end_block_number)[:end_pos]) + + return b"".join(out) + + +caches: dict[str | None, type[BaseCache]] = { + # one custom case + None: BaseCache, +} + + +def register_cache(cls: type[BaseCache], clobber: bool = False) -> None: + """'Register' cache implementation. + + Parameters + ---------- + clobber: bool, optional + If set to True (default is False) - allow to overwrite existing + entry. + + Raises + ------ + ValueError + """ + name = cls.name + if not clobber and name in caches: + raise ValueError(f"Cache with name {name!r} is already known: {caches[name]}") + caches[name] = cls + + +for c in ( + BaseCache, + MMapCache, + BytesCache, + ReadAheadCache, + BlockCache, + FirstChunkCache, + AllBytes, + KnownPartsOfAFile, + BackgroundBlockCache, +): + register_cache(c) diff --git a/meow/lib/python3.13/site-packages/fsspec/callbacks.py b/meow/lib/python3.13/site-packages/fsspec/callbacks.py new file mode 100644 index 0000000000000000000000000000000000000000..7ca99ca6ac3cd69b28bcd1550f6550e8e648c5fe --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/callbacks.py @@ -0,0 +1,324 @@ +from functools import wraps + + +class Callback: + """ + Base class and interface for callback mechanism + + This class can be used directly for monitoring file transfers by + providing ``callback=Callback(hooks=...)`` (see the ``hooks`` argument, + below), or subclassed for more specialised behaviour. + + Parameters + ---------- + size: int (optional) + Nominal quantity for the value that corresponds to a complete + transfer, e.g., total number of tiles or total number of + bytes + value: int (0) + Starting internal counter value + hooks: dict or None + A dict of named functions to be called on each update. The signature + of these must be ``f(size, value, **kwargs)`` + """ + + def __init__(self, size=None, value=0, hooks=None, **kwargs): + self.size = size + self.value = value + self.hooks = hooks or {} + self.kw = kwargs + + def __enter__(self): + return self + + def __exit__(self, *exc_args): + self.close() + + def close(self): + """Close callback.""" + + def branched(self, path_1, path_2, **kwargs): + """ + Return callback for child transfers + + If this callback is operating at a higher level, e.g., put, which may + trigger transfers that can also be monitored. The function returns a callback + that has to be passed to the child method, e.g., put_file, + as `callback=` argument. + + The implementation uses `callback.branch` for compatibility. + When implementing callbacks, it is recommended to override this function instead + of `branch` and avoid calling `super().branched(...)`. + + Prefer using this function over `branch`. + + Parameters + ---------- + path_1: str + Child's source path + path_2: str + Child's destination path + **kwargs: + Arbitrary keyword arguments + + Returns + ------- + callback: Callback + A callback instance to be passed to the child method + """ + self.branch(path_1, path_2, kwargs) + # mutate kwargs so that we can force the caller to pass "callback=" explicitly + return kwargs.pop("callback", DEFAULT_CALLBACK) + + def branch_coro(self, fn): + """ + Wraps a coroutine, and pass a new child callback to it. + """ + + @wraps(fn) + async def func(path1, path2: str, **kwargs): + with self.branched(path1, path2, **kwargs) as child: + return await fn(path1, path2, callback=child, **kwargs) + + return func + + def set_size(self, size): + """ + Set the internal maximum size attribute + + Usually called if not initially set at instantiation. Note that this + triggers a ``call()``. + + Parameters + ---------- + size: int + """ + self.size = size + self.call() + + def absolute_update(self, value): + """ + Set the internal value state + + Triggers ``call()`` + + Parameters + ---------- + value: int + """ + self.value = value + self.call() + + def relative_update(self, inc=1): + """ + Delta increment the internal counter + + Triggers ``call()`` + + Parameters + ---------- + inc: int + """ + self.value += inc + self.call() + + def call(self, hook_name=None, **kwargs): + """ + Execute hook(s) with current state + + Each function is passed the internal size and current value + + Parameters + ---------- + hook_name: str or None + If given, execute on this hook + kwargs: passed on to (all) hook(s) + """ + if not self.hooks: + return + kw = self.kw.copy() + kw.update(kwargs) + if hook_name: + if hook_name not in self.hooks: + return + return self.hooks[hook_name](self.size, self.value, **kw) + for hook in self.hooks.values() or []: + hook(self.size, self.value, **kw) + + def wrap(self, iterable): + """ + Wrap an iterable to call ``relative_update`` on each iterations + + Parameters + ---------- + iterable: Iterable + The iterable that is being wrapped + """ + for item in iterable: + self.relative_update() + yield item + + def branch(self, path_1, path_2, kwargs): + """ + Set callbacks for child transfers + + If this callback is operating at a higher level, e.g., put, which may + trigger transfers that can also be monitored. The passed kwargs are + to be *mutated* to add ``callback=``, if this class supports branching + to children. + + Parameters + ---------- + path_1: str + Child's source path + path_2: str + Child's destination path + kwargs: dict + arguments passed to child method, e.g., put_file. + + Returns + ------- + + """ + return None + + def no_op(self, *_, **__): + pass + + def __getattr__(self, item): + """ + If undefined methods are called on this class, nothing happens + """ + return self.no_op + + @classmethod + def as_callback(cls, maybe_callback=None): + """Transform callback=... into Callback instance + + For the special value of ``None``, return the global instance of + ``NoOpCallback``. This is an alternative to including + ``callback=DEFAULT_CALLBACK`` directly in a method signature. + """ + if maybe_callback is None: + return DEFAULT_CALLBACK + return maybe_callback + + +class NoOpCallback(Callback): + """ + This implementation of Callback does exactly nothing + """ + + def call(self, *args, **kwargs): + return None + + +class DotPrinterCallback(Callback): + """ + Simple example Callback implementation + + Almost identical to Callback with a hook that prints a char; here we + demonstrate how the outer layer may print "#" and the inner layer "." + """ + + def __init__(self, chr_to_print="#", **kwargs): + self.chr = chr_to_print + super().__init__(**kwargs) + + def branch(self, path_1, path_2, kwargs): + """Mutate kwargs to add new instance with different print char""" + kwargs["callback"] = DotPrinterCallback(".") + + def call(self, **kwargs): + """Just outputs a character""" + print(self.chr, end="") + + +class TqdmCallback(Callback): + """ + A callback to display a progress bar using tqdm + + Parameters + ---------- + tqdm_kwargs : dict, (optional) + Any argument accepted by the tqdm constructor. + See the `tqdm doc `_. + Will be forwarded to `tqdm_cls`. + tqdm_cls: (optional) + subclass of `tqdm.tqdm`. If not passed, it will default to `tqdm.tqdm`. + + Examples + -------- + >>> import fsspec + >>> from fsspec.callbacks import TqdmCallback + >>> fs = fsspec.filesystem("memory") + >>> path2distant_data = "/your-path" + >>> fs.upload( + ".", + path2distant_data, + recursive=True, + callback=TqdmCallback(), + ) + + You can forward args to tqdm using the ``tqdm_kwargs`` parameter. + + >>> fs.upload( + ".", + path2distant_data, + recursive=True, + callback=TqdmCallback(tqdm_kwargs={"desc": "Your tqdm description"}), + ) + + You can also customize the progress bar by passing a subclass of `tqdm`. + + .. code-block:: python + + class TqdmFormat(tqdm): + '''Provides a `total_time` format parameter''' + @property + def format_dict(self): + d = super().format_dict + total_time = d["elapsed"] * (d["total"] or 0) / max(d["n"], 1) + d.update(total_time=self.format_interval(total_time) + " in total") + return d + + >>> with TqdmCallback( + tqdm_kwargs={ + "desc": "desc", + "bar_format": "{total_time}: {percentage:.0f}%|{bar}{r_bar}", + }, + tqdm_cls=TqdmFormat, + ) as callback: + fs.upload(".", path2distant_data, recursive=True, callback=callback) + """ + + def __init__(self, tqdm_kwargs=None, *args, **kwargs): + try: + from tqdm import tqdm + + except ImportError as exce: + raise ImportError( + "Using TqdmCallback requires tqdm to be installed" + ) from exce + + self._tqdm_cls = kwargs.pop("tqdm_cls", tqdm) + self._tqdm_kwargs = tqdm_kwargs or {} + self.tqdm = None + super().__init__(*args, **kwargs) + + def call(self, *args, **kwargs): + if self.tqdm is None: + self.tqdm = self._tqdm_cls(total=self.size, **self._tqdm_kwargs) + self.tqdm.total = self.size + self.tqdm.update(self.value - self.tqdm.n) + + def close(self): + if self.tqdm is not None: + self.tqdm.close() + self.tqdm = None + + def __del__(self): + return self.close() + + +DEFAULT_CALLBACK = _DEFAULT_CALLBACK = NoOpCallback() diff --git a/meow/lib/python3.13/site-packages/fsspec/compression.py b/meow/lib/python3.13/site-packages/fsspec/compression.py new file mode 100644 index 0000000000000000000000000000000000000000..fc519c24532fdcd3f6f1b3fc645d38e1e1dde1d7 --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/compression.py @@ -0,0 +1,175 @@ +"""Helper functions for a standard streaming compression API""" + +from zipfile import ZipFile + +import fsspec.utils +from fsspec.spec import AbstractBufferedFile + + +def noop_file(file, mode, **kwargs): + return file + + +# TODO: files should also be available as contexts +# should be functions of the form func(infile, mode=, **kwargs) -> file-like +compr = {None: noop_file} + + +def register_compression(name, callback, extensions, force=False): + """Register an "inferable" file compression type. + + Registers transparent file compression type for use with fsspec.open. + Compression can be specified by name in open, or "infer"-ed for any files + ending with the given extensions. + + Args: + name: (str) The compression type name. Eg. "gzip". + callback: A callable of form (infile, mode, **kwargs) -> file-like. + Accepts an input file-like object, the target mode and kwargs. + Returns a wrapped file-like object. + extensions: (str, Iterable[str]) A file extension, or list of file + extensions for which to infer this compression scheme. Eg. "gz". + force: (bool) Force re-registration of compression type or extensions. + + Raises: + ValueError: If name or extensions already registered, and not force. + + """ + if isinstance(extensions, str): + extensions = [extensions] + + # Validate registration + if name in compr and not force: + raise ValueError(f"Duplicate compression registration: {name}") + + for ext in extensions: + if ext in fsspec.utils.compressions and not force: + raise ValueError(f"Duplicate compression file extension: {ext} ({name})") + + compr[name] = callback + + for ext in extensions: + fsspec.utils.compressions[ext] = name + + +def unzip(infile, mode="rb", filename=None, **kwargs): + if "r" not in mode: + filename = filename or "file" + z = ZipFile(infile, mode="w", **kwargs) + fo = z.open(filename, mode="w") + fo.close = lambda closer=fo.close: closer() or z.close() + return fo + z = ZipFile(infile) + if filename is None: + filename = z.namelist()[0] + return z.open(filename, mode="r", **kwargs) + + +register_compression("zip", unzip, "zip") + +try: + from bz2 import BZ2File +except ImportError: + pass +else: + register_compression("bz2", BZ2File, "bz2") + +try: # pragma: no cover + from isal import igzip + + def isal(infile, mode="rb", **kwargs): + return igzip.IGzipFile(fileobj=infile, mode=mode, **kwargs) + + register_compression("gzip", isal, "gz") +except ImportError: + from gzip import GzipFile + + register_compression( + "gzip", lambda f, **kwargs: GzipFile(fileobj=f, **kwargs), "gz" + ) + +try: + from lzma import LZMAFile + + register_compression("lzma", LZMAFile, "lzma") + register_compression("xz", LZMAFile, "xz") +except ImportError: + pass + +try: + import lzmaffi + + register_compression("lzma", lzmaffi.LZMAFile, "lzma", force=True) + register_compression("xz", lzmaffi.LZMAFile, "xz", force=True) +except ImportError: + pass + + +class SnappyFile(AbstractBufferedFile): + def __init__(self, infile, mode, **kwargs): + import snappy + + super().__init__( + fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs + ) + self.infile = infile + if "r" in mode: + self.codec = snappy.StreamDecompressor() + else: + self.codec = snappy.StreamCompressor() + + def _upload_chunk(self, final=False): + self.buffer.seek(0) + out = self.codec.add_chunk(self.buffer.read()) + self.infile.write(out) + return True + + def seek(self, loc, whence=0): + raise NotImplementedError("SnappyFile is not seekable") + + def seekable(self): + return False + + def _fetch_range(self, start, end): + """Get the specified set of bytes from remote""" + data = self.infile.read(end - start) + return self.codec.decompress(data) + + +try: + import snappy + + snappy.compress(b"") + # Snappy may use the .sz file extension, but this is not part of the + # standard implementation. + register_compression("snappy", SnappyFile, []) + +except (ImportError, NameError, AttributeError): + pass + +try: + import lz4.frame + + register_compression("lz4", lz4.frame.open, "lz4") +except ImportError: + pass + +try: + import zstandard as zstd + + def zstandard_file(infile, mode="rb"): + if "r" in mode: + cctx = zstd.ZstdDecompressor() + return cctx.stream_reader(infile) + else: + cctx = zstd.ZstdCompressor(level=10) + return cctx.stream_writer(infile) + + register_compression("zstd", zstandard_file, "zst") +except ImportError: + pass + + +def available_compressions(): + """Return a list of the implemented compressions.""" + return list(compr) diff --git a/meow/lib/python3.13/site-packages/fsspec/config.py b/meow/lib/python3.13/site-packages/fsspec/config.py new file mode 100644 index 0000000000000000000000000000000000000000..76d9af14aaf7df47c4551c169f27b05abf9c269e --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/config.py @@ -0,0 +1,131 @@ +from __future__ import annotations + +import configparser +import json +import os +import warnings +from typing import Any + +conf: dict[str, dict[str, Any]] = {} +default_conf_dir = os.path.join(os.path.expanduser("~"), ".config/fsspec") +conf_dir = os.environ.get("FSSPEC_CONFIG_DIR", default_conf_dir) + + +def set_conf_env(conf_dict, envdict=os.environ): + """Set config values from environment variables + + Looks for variables of the form ``FSSPEC_`` and + ``FSSPEC__``. For ``FSSPEC_`` the value is parsed + as a json dictionary and used to ``update`` the config of the + corresponding protocol. For ``FSSPEC__`` there is no + attempt to convert the string value, but the kwarg keys will be lower-cased. + + The ``FSSPEC__`` variables are applied after the + ``FSSPEC_`` ones. + + Parameters + ---------- + conf_dict : dict(str, dict) + This dict will be mutated + envdict : dict-like(str, str) + Source for the values - usually the real environment + """ + kwarg_keys = [] + for key in envdict: + if key.startswith("FSSPEC_") and len(key) > 7 and key[7] != "_": + if key.count("_") > 1: + kwarg_keys.append(key) + continue + try: + value = json.loads(envdict[key]) + except json.decoder.JSONDecodeError as ex: + warnings.warn( + f"Ignoring environment variable {key} due to a parse failure: {ex}" + ) + else: + if isinstance(value, dict): + _, proto = key.split("_", 1) + conf_dict.setdefault(proto.lower(), {}).update(value) + else: + warnings.warn( + f"Ignoring environment variable {key} due to not being a dict:" + f" {type(value)}" + ) + elif key.startswith("FSSPEC"): + warnings.warn( + f"Ignoring environment variable {key} due to having an unexpected name" + ) + + for key in kwarg_keys: + _, proto, kwarg = key.split("_", 2) + conf_dict.setdefault(proto.lower(), {})[kwarg.lower()] = envdict[key] + + +def set_conf_files(cdir, conf_dict): + """Set config values from files + + Scans for INI and JSON files in the given dictionary, and uses their + contents to set the config. In case of repeated values, later values + win. + + In the case of INI files, all values are strings, and these will not + be converted. + + Parameters + ---------- + cdir : str + Directory to search + conf_dict : dict(str, dict) + This dict will be mutated + """ + if not os.path.isdir(cdir): + return + allfiles = sorted(os.listdir(cdir)) + for fn in allfiles: + if fn.endswith(".ini"): + ini = configparser.ConfigParser() + ini.read(os.path.join(cdir, fn)) + for key in ini: + if key == "DEFAULT": + continue + conf_dict.setdefault(key, {}).update(dict(ini[key])) + if fn.endswith(".json"): + with open(os.path.join(cdir, fn)) as f: + js = json.load(f) + for key in js: + conf_dict.setdefault(key, {}).update(dict(js[key])) + + +def apply_config(cls, kwargs, conf_dict=None): + """Supply default values for kwargs when instantiating class + + Augments the passed kwargs, by finding entries in the config dict + which match the classes ``.protocol`` attribute (one or more str) + + Parameters + ---------- + cls : file system implementation + kwargs : dict + conf_dict : dict of dict + Typically this is the global configuration + + Returns + ------- + dict : the modified set of kwargs + """ + if conf_dict is None: + conf_dict = conf + protos = cls.protocol if isinstance(cls.protocol, (tuple, list)) else [cls.protocol] + kw = {} + for proto in protos: + # default kwargs from the current state of the config + if proto in conf_dict: + kw.update(conf_dict[proto]) + # explicit kwargs always win + kw.update(**kwargs) + kwargs = kw + return kwargs + + +set_conf_files(conf_dir, conf) +set_conf_env(conf) diff --git a/meow/lib/python3.13/site-packages/fsspec/conftest.py b/meow/lib/python3.13/site-packages/fsspec/conftest.py new file mode 100644 index 0000000000000000000000000000000000000000..6874a42c4895c3c7b973dc5d63fd4488a4e60b44 --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/conftest.py @@ -0,0 +1,55 @@ +import os +import shutil +import subprocess +import sys +import time + +import pytest + +import fsspec +from fsspec.implementations.cached import CachingFileSystem + + +@pytest.fixture() +def m(): + """ + Fixture providing a memory filesystem. + """ + m = fsspec.filesystem("memory") + m.store.clear() + m.pseudo_dirs.clear() + m.pseudo_dirs.append("") + try: + yield m + finally: + m.store.clear() + m.pseudo_dirs.clear() + m.pseudo_dirs.append("") + + +@pytest.fixture +def ftp_writable(tmpdir): + """ + Fixture providing a writable FTP filesystem. + """ + pytest.importorskip("pyftpdlib") + from fsspec.implementations.ftp import FTPFileSystem + + FTPFileSystem.clear_instance_cache() # remove lingering connections + CachingFileSystem.clear_instance_cache() + d = str(tmpdir) + with open(os.path.join(d, "out"), "wb") as f: + f.write(b"hello" * 10000) + P = subprocess.Popen( + [sys.executable, "-m", "pyftpdlib", "-d", d, "-u", "user", "-P", "pass", "-w"] + ) + try: + time.sleep(1) + yield "localhost", 2121, "user", "pass" + finally: + P.terminate() + P.wait() + try: + shutil.rmtree(tmpdir) + except Exception: + pass diff --git a/meow/lib/python3.13/site-packages/fsspec/core.py b/meow/lib/python3.13/site-packages/fsspec/core.py new file mode 100644 index 0000000000000000000000000000000000000000..1d3c04c6fd39251dabcb038543682cdfacf02feb --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/core.py @@ -0,0 +1,743 @@ +from __future__ import annotations + +import io +import logging +import os +import re +from glob import has_magic +from pathlib import Path + +# for backwards compat, we export cache things from here too +from fsspec.caching import ( # noqa: F401 + BaseCache, + BlockCache, + BytesCache, + MMapCache, + ReadAheadCache, + caches, +) +from fsspec.compression import compr +from fsspec.config import conf +from fsspec.registry import filesystem, get_filesystem_class +from fsspec.utils import ( + _unstrip_protocol, + build_name_function, + infer_compression, + stringify_path, +) + +logger = logging.getLogger("fsspec") + + +class OpenFile: + """ + File-like object to be used in a context + + Can layer (buffered) text-mode and compression over any file-system, which + are typically binary-only. + + These instances are safe to serialize, as the low-level file object + is not created until invoked using ``with``. + + Parameters + ---------- + fs: FileSystem + The file system to use for opening the file. Should be a subclass or duck-type + with ``fsspec.spec.AbstractFileSystem`` + path: str + Location to open + mode: str like 'rb', optional + Mode of the opened file + compression: str or None, optional + Compression to apply + encoding: str or None, optional + The encoding to use if opened in text mode. + errors: str or None, optional + How to handle encoding errors if opened in text mode. + newline: None or str + Passed to TextIOWrapper in text mode, how to handle line endings. + autoopen: bool + If True, calls open() immediately. Mostly used by pickle + pos: int + If given and autoopen is True, seek to this location immediately + """ + + def __init__( + self, + fs, + path, + mode="rb", + compression=None, + encoding=None, + errors=None, + newline=None, + ): + self.fs = fs + self.path = path + self.mode = mode + self.compression = get_compression(path, compression) + self.encoding = encoding + self.errors = errors + self.newline = newline + self.fobjects = [] + + def __reduce__(self): + return ( + OpenFile, + ( + self.fs, + self.path, + self.mode, + self.compression, + self.encoding, + self.errors, + self.newline, + ), + ) + + def __repr__(self): + return f"" + + def __enter__(self): + mode = self.mode.replace("t", "").replace("b", "") + "b" + + try: + f = self.fs.open(self.path, mode=mode) + except FileNotFoundError as e: + if has_magic(self.path): + raise FileNotFoundError( + "%s not found. The URL contains glob characters: you maybe needed\n" + "to pass expand=True in fsspec.open() or the storage_options of \n" + "your library. You can also set the config value 'open_expand'\n" + "before import, or fsspec.core.DEFAULT_EXPAND at runtime, to True.", + self.path, + ) from e + raise + + self.fobjects = [f] + + if self.compression is not None: + compress = compr[self.compression] + f = compress(f, mode=mode[0]) + self.fobjects.append(f) + + if "b" not in self.mode: + # assume, for example, that 'r' is equivalent to 'rt' as in builtin + f = PickleableTextIOWrapper( + f, encoding=self.encoding, errors=self.errors, newline=self.newline + ) + self.fobjects.append(f) + + return self.fobjects[-1] + + def __exit__(self, *args): + self.close() + + @property + def full_name(self): + return _unstrip_protocol(self.path, self.fs) + + def open(self): + """Materialise this as a real open file without context + + The OpenFile object should be explicitly closed to avoid enclosed file + instances persisting. You must, therefore, keep a reference to the OpenFile + during the life of the file-like it generates. + """ + return self.__enter__() + + def close(self): + """Close all encapsulated file objects""" + for f in reversed(self.fobjects): + if "r" not in self.mode and not f.closed: + f.flush() + f.close() + self.fobjects.clear() + + +class OpenFiles(list): + """List of OpenFile instances + + Can be used in a single context, which opens and closes all of the + contained files. Normal list access to get the elements works as + normal. + + A special case is made for caching filesystems - the files will + be down/uploaded together at the start or end of the context, and + this may happen concurrently, if the target filesystem supports it. + """ + + def __init__(self, *args, mode="rb", fs=None): + self.mode = mode + self.fs = fs + self.files = [] + super().__init__(*args) + + def __enter__(self): + if self.fs is None: + raise ValueError("Context has already been used") + + fs = self.fs + while True: + if hasattr(fs, "open_many"): + # check for concurrent cache download; or set up for upload + self.files = fs.open_many(self) + return self.files + if hasattr(fs, "fs") and fs.fs is not None: + fs = fs.fs + else: + break + return [s.__enter__() for s in self] + + def __exit__(self, *args): + fs = self.fs + [s.__exit__(*args) for s in self] + if "r" not in self.mode: + while True: + if hasattr(fs, "open_many"): + # check for concurrent cache upload + fs.commit_many(self.files) + return + if hasattr(fs, "fs") and fs.fs is not None: + fs = fs.fs + else: + break + + def __getitem__(self, item): + out = super().__getitem__(item) + if isinstance(item, slice): + return OpenFiles(out, mode=self.mode, fs=self.fs) + return out + + def __repr__(self): + return f"" + + +def open_files( + urlpath, + mode="rb", + compression=None, + encoding="utf8", + errors=None, + name_function=None, + num=1, + protocol=None, + newline=None, + auto_mkdir=True, + expand=True, + **kwargs, +): + """Given a path or paths, return a list of ``OpenFile`` objects. + + For writing, a str path must contain the "*" character, which will be filled + in by increasing numbers, e.g., "part*" -> "part1", "part2" if num=2. + + For either reading or writing, can instead provide explicit list of paths. + + Parameters + ---------- + urlpath: string or list + Absolute or relative filepath(s). Prefix with a protocol like ``s3://`` + to read from alternative filesystems. To read from multiple files you + can pass a globstring or a list of paths, with the caveat that they + must all have the same protocol. + mode: 'rb', 'wt', etc. + compression: string or None + If given, open file using compression codec. Can either be a compression + name (a key in ``fsspec.compression.compr``) or "infer" to guess the + compression from the filename suffix. + encoding: str + For text mode only + errors: None or str + Passed to TextIOWrapper in text mode + name_function: function or None + if opening a set of files for writing, those files do not yet exist, + so we need to generate their names by formatting the urlpath for + each sequence number + num: int [1] + if writing mode, number of files we expect to create (passed to + name+function) + protocol: str or None + If given, overrides the protocol found in the URL. + newline: bytes or None + Used for line terminator in text mode. If None, uses system default; + if blank, uses no translation. + auto_mkdir: bool (True) + If in write mode, this will ensure the target directory exists before + writing, by calling ``fs.mkdirs(exist_ok=True)``. + expand: bool + **kwargs: dict + Extra options that make sense to a particular storage connection, e.g. + host, port, username, password, etc. + + Examples + -------- + >>> files = open_files('2015-*-*.csv') # doctest: +SKIP + >>> files = open_files( + ... 's3://bucket/2015-*-*.csv.gz', compression='gzip' + ... ) # doctest: +SKIP + + Returns + ------- + An ``OpenFiles`` instance, which is a list of ``OpenFile`` objects that can + be used as a single context + + Notes + ----- + For a full list of the available protocols and the implementations that + they map across to see the latest online documentation: + + - For implementations built into ``fsspec`` see + https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations + - For implementations in separate packages see + https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations + """ + fs, fs_token, paths = get_fs_token_paths( + urlpath, + mode, + num=num, + name_function=name_function, + storage_options=kwargs, + protocol=protocol, + expand=expand, + ) + if fs.protocol == "file": + fs.auto_mkdir = auto_mkdir + elif "r" not in mode and auto_mkdir: + parents = {fs._parent(path) for path in paths} + for parent in parents: + try: + fs.makedirs(parent, exist_ok=True) + except PermissionError: + pass + return OpenFiles( + [ + OpenFile( + fs, + path, + mode=mode, + compression=compression, + encoding=encoding, + errors=errors, + newline=newline, + ) + for path in paths + ], + mode=mode, + fs=fs, + ) + + +def _un_chain(path, kwargs): + # Avoid a circular import + from fsspec.implementations.cached import CachingFileSystem + + if "::" in path: + x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word + bits = [] + for p in path.split("::"): + if "://" in p or x.match(p): + bits.append(p) + else: + bits.append(p + "://") + else: + bits = [path] + # [[url, protocol, kwargs], ...] + out = [] + previous_bit = None + kwargs = kwargs.copy() + for bit in reversed(bits): + protocol = kwargs.pop("protocol", None) or split_protocol(bit)[0] or "file" + cls = get_filesystem_class(protocol) + extra_kwargs = cls._get_kwargs_from_urls(bit) + kws = kwargs.pop(protocol, {}) + if bit is bits[0]: + kws.update(kwargs) + kw = dict( + **{k: v for k, v in extra_kwargs.items() if k not in kws or v != kws[k]}, + **kws, + ) + bit = cls._strip_protocol(bit) + if "target_protocol" not in kw and issubclass(cls, CachingFileSystem): + bit = previous_bit + out.append((bit, protocol, kw)) + previous_bit = bit + out.reverse() + return out + + +def url_to_fs(url, **kwargs): + """ + Turn fully-qualified and potentially chained URL into filesystem instance + + Parameters + ---------- + url : str + The fsspec-compatible URL + **kwargs: dict + Extra options that make sense to a particular storage connection, e.g. + host, port, username, password, etc. + + Returns + ------- + filesystem : FileSystem + The new filesystem discovered from ``url`` and created with + ``**kwargs``. + urlpath : str + The file-systems-specific URL for ``url``. + """ + url = stringify_path(url) + # non-FS arguments that appear in fsspec.open() + # inspect could keep this in sync with open()'s signature + known_kwargs = { + "compression", + "encoding", + "errors", + "expand", + "mode", + "name_function", + "newline", + "num", + } + kwargs = {k: v for k, v in kwargs.items() if k not in known_kwargs} + chain = _un_chain(url, kwargs) + inkwargs = {} + # Reverse iterate the chain, creating a nested target_* structure + for i, ch in enumerate(reversed(chain)): + urls, protocol, kw = ch + if i == len(chain) - 1: + inkwargs = dict(**kw, **inkwargs) + continue + inkwargs["target_options"] = dict(**kw, **inkwargs) + inkwargs["target_protocol"] = protocol + inkwargs["fo"] = urls + urlpath, protocol, _ = chain[0] + fs = filesystem(protocol, **inkwargs) + return fs, urlpath + + +DEFAULT_EXPAND = conf.get("open_expand", False) + + +def open( + urlpath, + mode="rb", + compression=None, + encoding="utf8", + errors=None, + protocol=None, + newline=None, + expand=None, + **kwargs, +): + """Given a path or paths, return one ``OpenFile`` object. + + Parameters + ---------- + urlpath: string or list + Absolute or relative filepath. Prefix with a protocol like ``s3://`` + to read from alternative filesystems. Should not include glob + character(s). + mode: 'rb', 'wt', etc. + compression: string or None + If given, open file using compression codec. Can either be a compression + name (a key in ``fsspec.compression.compr``) or "infer" to guess the + compression from the filename suffix. + encoding: str + For text mode only + errors: None or str + Passed to TextIOWrapper in text mode + protocol: str or None + If given, overrides the protocol found in the URL. + newline: bytes or None + Used for line terminator in text mode. If None, uses system default; + if blank, uses no translation. + expand: bool or Nonw + Whether to regard file paths containing special glob characters as needing + expansion (finding the first match) or absolute. Setting False allows using + paths which do embed such characters. If None (default), this argument + takes its value from the DEFAULT_EXPAND module variable, which takes + its initial value from the "open_expand" config value at startup, which will + be False if not set. + **kwargs: dict + Extra options that make sense to a particular storage connection, e.g. + host, port, username, password, etc. + + Examples + -------- + >>> openfile = open('2015-01-01.csv') # doctest: +SKIP + >>> openfile = open( + ... 's3://bucket/2015-01-01.csv.gz', compression='gzip' + ... ) # doctest: +SKIP + >>> with openfile as f: + ... df = pd.read_csv(f) # doctest: +SKIP + ... + + Returns + ------- + ``OpenFile`` object. + + Notes + ----- + For a full list of the available protocols and the implementations that + they map across to see the latest online documentation: + + - For implementations built into ``fsspec`` see + https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations + - For implementations in separate packages see + https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations + """ + expand = DEFAULT_EXPAND if expand is None else expand + out = open_files( + urlpath=[urlpath], + mode=mode, + compression=compression, + encoding=encoding, + errors=errors, + protocol=protocol, + newline=newline, + expand=expand, + **kwargs, + ) + if not out: + raise FileNotFoundError(urlpath) + return out[0] + + +def open_local( + url: str | list[str] | Path | list[Path], + mode: str = "rb", + **storage_options: dict, +) -> str | list[str]: + """Open file(s) which can be resolved to local + + For files which either are local, or get downloaded upon open + (e.g., by file caching) + + Parameters + ---------- + url: str or list(str) + mode: str + Must be read mode + storage_options: + passed on to FS for or used by open_files (e.g., compression) + """ + if "r" not in mode: + raise ValueError("Can only ensure local files when reading") + of = open_files(url, mode=mode, **storage_options) + if not getattr(of[0].fs, "local_file", False): + raise ValueError( + "open_local can only be used on a filesystem which" + " has attribute local_file=True" + ) + with of as files: + paths = [f.name for f in files] + if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path): + return paths[0] + return paths + + +def get_compression(urlpath, compression): + if compression == "infer": + compression = infer_compression(urlpath) + if compression is not None and compression not in compr: + raise ValueError(f"Compression type {compression} not supported") + return compression + + +def split_protocol(urlpath): + """Return protocol, path pair""" + urlpath = stringify_path(urlpath) + if "://" in urlpath: + protocol, path = urlpath.split("://", 1) + if len(protocol) > 1: + # excludes Windows paths + return protocol, path + if urlpath.startswith("data:"): + return urlpath.split(":", 1) + return None, urlpath + + +def strip_protocol(urlpath): + """Return only path part of full URL, according to appropriate backend""" + protocol, _ = split_protocol(urlpath) + cls = get_filesystem_class(protocol) + return cls._strip_protocol(urlpath) + + +def expand_paths_if_needed(paths, mode, num, fs, name_function): + """Expand paths if they have a ``*`` in them (write mode) or any of ``*?[]`` + in them (read mode). + + :param paths: list of paths + mode: str + Mode in which to open files. + num: int + If opening in writing mode, number of files we expect to create. + fs: filesystem object + name_function: callable + If opening in writing mode, this callable is used to generate path + names. Names are generated for each partition by + ``urlpath.replace('*', name_function(partition_index))``. + :return: list of paths + """ + expanded_paths = [] + paths = list(paths) + + if "w" in mode: # read mode + if sum(1 for p in paths if "*" in p) > 1: + raise ValueError( + "When writing data, only one filename mask can be specified." + ) + num = max(num, len(paths)) + + for curr_path in paths: + if "*" in curr_path: + # expand using name_function + expanded_paths.extend(_expand_paths(curr_path, name_function, num)) + else: + expanded_paths.append(curr_path) + # if we generated more paths that asked for, trim the list + if len(expanded_paths) > num: + expanded_paths = expanded_paths[:num] + + else: # read mode + for curr_path in paths: + if has_magic(curr_path): + # expand using glob + expanded_paths.extend(fs.glob(curr_path)) + else: + expanded_paths.append(curr_path) + + return expanded_paths + + +def get_fs_token_paths( + urlpath, + mode="rb", + num=1, + name_function=None, + storage_options=None, + protocol=None, + expand=True, +): + """Filesystem, deterministic token, and paths from a urlpath and options. + + Parameters + ---------- + urlpath: string or iterable + Absolute or relative filepath, URL (may include protocols like + ``s3://``), or globstring pointing to data. + mode: str, optional + Mode in which to open files. + num: int, optional + If opening in writing mode, number of files we expect to create. + name_function: callable, optional + If opening in writing mode, this callable is used to generate path + names. Names are generated for each partition by + ``urlpath.replace('*', name_function(partition_index))``. + storage_options: dict, optional + Additional keywords to pass to the filesystem class. + protocol: str or None + To override the protocol specifier in the URL + expand: bool + Expand string paths for writing, assuming the path is a directory + """ + if isinstance(urlpath, (list, tuple, set)): + if not urlpath: + raise ValueError("empty urlpath sequence") + urlpath0 = stringify_path(next(iter(urlpath))) + else: + urlpath0 = stringify_path(urlpath) + storage_options = storage_options or {} + if protocol: + storage_options["protocol"] = protocol + chain = _un_chain(urlpath0, storage_options or {}) + inkwargs = {} + # Reverse iterate the chain, creating a nested target_* structure + for i, ch in enumerate(reversed(chain)): + urls, nested_protocol, kw = ch + if i == len(chain) - 1: + inkwargs = dict(**kw, **inkwargs) + continue + inkwargs["target_options"] = dict(**kw, **inkwargs) + inkwargs["target_protocol"] = nested_protocol + inkwargs["fo"] = urls + paths, protocol, _ = chain[0] + fs = filesystem(protocol, **inkwargs) + if isinstance(urlpath, (list, tuple, set)): + pchains = [ + _un_chain(stringify_path(u), storage_options or {})[0] for u in urlpath + ] + if len({pc[1] for pc in pchains}) > 1: + raise ValueError("Protocol mismatch getting fs from %s", urlpath) + paths = [pc[0] for pc in pchains] + else: + paths = fs._strip_protocol(paths) + if isinstance(paths, (list, tuple, set)): + if expand: + paths = expand_paths_if_needed(paths, mode, num, fs, name_function) + elif not isinstance(paths, list): + paths = list(paths) + else: + if ("w" in mode or "x" in mode) and expand: + paths = _expand_paths(paths, name_function, num) + elif "*" in paths: + paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)] + else: + paths = [paths] + + return fs, fs._fs_token, paths + + +def _expand_paths(path, name_function, num): + if isinstance(path, str): + if path.count("*") > 1: + raise ValueError("Output path spec must contain exactly one '*'.") + elif "*" not in path: + path = os.path.join(path, "*.part") + + if name_function is None: + name_function = build_name_function(num - 1) + + paths = [path.replace("*", name_function(i)) for i in range(num)] + if paths != sorted(paths): + logger.warning( + "In order to preserve order between partitions" + " paths created with ``name_function`` should " + "sort to partition order" + ) + elif isinstance(path, (tuple, list)): + assert len(path) == num + paths = list(path) + else: + raise ValueError( + "Path should be either\n" + "1. A list of paths: ['foo.json', 'bar.json', ...]\n" + "2. A directory: 'foo/\n" + "3. A path with a '*' in it: 'foo.*.json'" + ) + return paths + + +class PickleableTextIOWrapper(io.TextIOWrapper): + """TextIOWrapper cannot be pickled. This solves it. + + Requires that ``buffer`` be pickleable, which all instances of + AbstractBufferedFile are. + """ + + def __init__( + self, + buffer, + encoding=None, + errors=None, + newline=None, + line_buffering=False, + write_through=False, + ): + self.args = buffer, encoding, errors, newline, line_buffering, write_through + super().__init__(*self.args) + + def __reduce__(self): + return PickleableTextIOWrapper, self.args diff --git a/meow/lib/python3.13/site-packages/fsspec/dircache.py b/meow/lib/python3.13/site-packages/fsspec/dircache.py new file mode 100644 index 0000000000000000000000000000000000000000..eca19566b135e5a7a4f6e7407d56411ec58bfe44 --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/dircache.py @@ -0,0 +1,98 @@ +import time +from collections.abc import MutableMapping +from functools import lru_cache + + +class DirCache(MutableMapping): + """ + Caching of directory listings, in a structure like:: + + {"path0": [ + {"name": "path0/file0", + "size": 123, + "type": "file", + ... + }, + {"name": "path0/file1", + }, + ... + ], + "path1": [...] + } + + Parameters to this class control listing expiry or indeed turn + caching off + """ + + def __init__( + self, + use_listings_cache=True, + listings_expiry_time=None, + max_paths=None, + **kwargs, + ): + """ + + Parameters + ---------- + use_listings_cache: bool + If False, this cache never returns items, but always reports KeyError, + and setting items has no effect + listings_expiry_time: int or float (optional) + Time in seconds that a listing is considered valid. If None, + listings do not expire. + max_paths: int (optional) + The number of most recent listings that are considered valid; 'recent' + refers to when the entry was set. + """ + self._cache = {} + self._times = {} + if max_paths: + self._q = lru_cache(max_paths + 1)(lambda key: self._cache.pop(key, None)) + self.use_listings_cache = use_listings_cache + self.listings_expiry_time = listings_expiry_time + self.max_paths = max_paths + + def __getitem__(self, item): + if self.listings_expiry_time is not None: + if self._times.get(item, 0) - time.time() < -self.listings_expiry_time: + del self._cache[item] + if self.max_paths: + self._q(item) + return self._cache[item] # maybe raises KeyError + + def clear(self): + self._cache.clear() + + def __len__(self): + return len(self._cache) + + def __contains__(self, item): + try: + self[item] + return True + except KeyError: + return False + + def __setitem__(self, key, value): + if not self.use_listings_cache: + return + if self.max_paths: + self._q(key) + self._cache[key] = value + if self.listings_expiry_time is not None: + self._times[key] = time.time() + + def __delitem__(self, key): + del self._cache[key] + + def __iter__(self): + entries = list(self._cache) + + return (k for k in entries if k in self) + + def __reduce__(self): + return ( + DirCache, + (self.use_listings_cache, self.listings_expiry_time, self.max_paths), + ) diff --git a/meow/lib/python3.13/site-packages/fsspec/exceptions.py b/meow/lib/python3.13/site-packages/fsspec/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..ae8905475f02655f4fc5863931d99ca9da55db78 --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/exceptions.py @@ -0,0 +1,18 @@ +""" +fsspec user-defined exception classes +""" + +import asyncio + + +class BlocksizeMismatchError(ValueError): + """ + Raised when a cached file is opened with a different blocksize than it was + written with + """ + + +class FSTimeoutError(asyncio.TimeoutError): + """ + Raised when a fsspec function timed out occurs + """ diff --git a/meow/lib/python3.13/site-packages/fsspec/fuse.py b/meow/lib/python3.13/site-packages/fsspec/fuse.py new file mode 100644 index 0000000000000000000000000000000000000000..566d520fce3e94e3bbaee48c3c6acc9f1db315a8 --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/fuse.py @@ -0,0 +1,324 @@ +import argparse +import logging +import os +import stat +import threading +import time +from errno import EIO, ENOENT + +from fuse import FUSE, FuseOSError, LoggingMixIn, Operations + +from fsspec import __version__ +from fsspec.core import url_to_fs + +logger = logging.getLogger("fsspec.fuse") + + +class FUSEr(Operations): + def __init__(self, fs, path, ready_file=False): + self.fs = fs + self.cache = {} + self.root = path.rstrip("/") + "/" + self.counter = 0 + logger.info("Starting FUSE at %s", path) + self._ready_file = ready_file + + def getattr(self, path, fh=None): + logger.debug("getattr %s", path) + if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]: + return {"type": "file", "st_size": 5} + + path = "".join([self.root, path.lstrip("/")]).rstrip("/") + try: + info = self.fs.info(path) + except FileNotFoundError as exc: + raise FuseOSError(ENOENT) from exc + + data = {"st_uid": info.get("uid", 1000), "st_gid": info.get("gid", 1000)} + perm = info.get("mode", 0o777) + + if info["type"] != "file": + data["st_mode"] = stat.S_IFDIR | perm + data["st_size"] = 0 + data["st_blksize"] = 0 + else: + data["st_mode"] = stat.S_IFREG | perm + data["st_size"] = info["size"] + data["st_blksize"] = 5 * 2**20 + data["st_nlink"] = 1 + data["st_atime"] = info["atime"] if "atime" in info else time.time() + data["st_ctime"] = info["ctime"] if "ctime" in info else time.time() + data["st_mtime"] = info["mtime"] if "mtime" in info else time.time() + return data + + def readdir(self, path, fh): + logger.debug("readdir %s", path) + path = "".join([self.root, path.lstrip("/")]) + files = self.fs.ls(path, False) + files = [os.path.basename(f.rstrip("/")) for f in files] + return [".", ".."] + files + + def mkdir(self, path, mode): + path = "".join([self.root, path.lstrip("/")]) + self.fs.mkdir(path) + return 0 + + def rmdir(self, path): + path = "".join([self.root, path.lstrip("/")]) + self.fs.rmdir(path) + return 0 + + def read(self, path, size, offset, fh): + logger.debug("read %s", (path, size, offset)) + if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]: + # status indicator + return b"ready" + + f = self.cache[fh] + f.seek(offset) + out = f.read(size) + return out + + def write(self, path, data, offset, fh): + logger.debug("write %s", (path, offset)) + f = self.cache[fh] + f.seek(offset) + f.write(data) + return len(data) + + def create(self, path, flags, fi=None): + logger.debug("create %s", (path, flags)) + fn = "".join([self.root, path.lstrip("/")]) + self.fs.touch(fn) # OS will want to get attributes immediately + f = self.fs.open(fn, "wb") + self.cache[self.counter] = f + self.counter += 1 + return self.counter - 1 + + def open(self, path, flags): + logger.debug("open %s", (path, flags)) + fn = "".join([self.root, path.lstrip("/")]) + if flags % 2 == 0: + # read + mode = "rb" + else: + # write/create + mode = "wb" + self.cache[self.counter] = self.fs.open(fn, mode) + self.counter += 1 + return self.counter - 1 + + def truncate(self, path, length, fh=None): + fn = "".join([self.root, path.lstrip("/")]) + if length != 0: + raise NotImplementedError + # maybe should be no-op since open with write sets size to zero anyway + self.fs.touch(fn) + + def unlink(self, path): + fn = "".join([self.root, path.lstrip("/")]) + try: + self.fs.rm(fn, False) + except (OSError, FileNotFoundError) as exc: + raise FuseOSError(EIO) from exc + + def release(self, path, fh): + try: + if fh in self.cache: + f = self.cache[fh] + f.close() + self.cache.pop(fh) + except Exception as e: + print(e) + return 0 + + def chmod(self, path, mode): + if hasattr(self.fs, "chmod"): + path = "".join([self.root, path.lstrip("/")]) + return self.fs.chmod(path, mode) + raise NotImplementedError + + +def run( + fs, + path, + mount_point, + foreground=True, + threads=False, + ready_file=False, + ops_class=FUSEr, +): + """Mount stuff in a local directory + + This uses fusepy to make it appear as if a given path on an fsspec + instance is in fact resident within the local file-system. + + This requires that fusepy by installed, and that FUSE be available on + the system (typically requiring a package to be installed with + apt, yum, brew, etc.). + + Parameters + ---------- + fs: file-system instance + From one of the compatible implementations + path: str + Location on that file-system to regard as the root directory to + mount. Note that you typically should include the terminating "/" + character. + mount_point: str + An empty directory on the local file-system where the contents of + the remote path will appear. + foreground: bool + Whether or not calling this function will block. Operation will + typically be more stable if True. + threads: bool + Whether or not to create threads when responding to file operations + within the mounter directory. Operation will typically be more + stable if False. + ready_file: bool + Whether the FUSE process is ready. The ``.fuse_ready`` file will + exist in the ``mount_point`` directory if True. Debugging purpose. + ops_class: FUSEr or Subclass of FUSEr + To override the default behavior of FUSEr. For Example, logging + to file. + + """ + func = lambda: FUSE( + ops_class(fs, path, ready_file=ready_file), + mount_point, + nothreads=not threads, + foreground=foreground, + ) + if not foreground: + th = threading.Thread(target=func) + th.daemon = True + th.start() + return th + else: # pragma: no cover + try: + func() + except KeyboardInterrupt: + pass + + +def main(args): + """Mount filesystem from chained URL to MOUNT_POINT. + + Examples: + + python3 -m fsspec.fuse memory /usr/share /tmp/mem + + python3 -m fsspec.fuse local /tmp/source /tmp/local \\ + -l /tmp/fsspecfuse.log + + You can also mount chained-URLs and use special settings: + + python3 -m fsspec.fuse 'filecache::zip::file://data.zip' \\ + / /tmp/zip \\ + -o 'filecache-cache_storage=/tmp/simplecache' + + You can specify the type of the setting by using `[int]` or `[bool]`, + (`true`, `yes`, `1` represents the Boolean value `True`): + + python3 -m fsspec.fuse 'simplecache::ftp://ftp1.at.proftpd.org' \\ + /historic/packages/RPMS /tmp/ftp \\ + -o 'simplecache-cache_storage=/tmp/simplecache' \\ + -o 'simplecache-check_files=false[bool]' \\ + -o 'ftp-listings_expiry_time=60[int]' \\ + -o 'ftp-username=anonymous' \\ + -o 'ftp-password=xieyanbo' + """ + + class RawDescriptionArgumentParser(argparse.ArgumentParser): + def format_help(self): + usage = super().format_help() + parts = usage.split("\n\n") + parts[1] = self.description.rstrip() + return "\n\n".join(parts) + + parser = RawDescriptionArgumentParser(prog="fsspec.fuse", description=main.__doc__) + parser.add_argument("--version", action="version", version=__version__) + parser.add_argument("url", type=str, help="fs url") + parser.add_argument("source_path", type=str, help="source directory in fs") + parser.add_argument("mount_point", type=str, help="local directory") + parser.add_argument( + "-o", + "--option", + action="append", + help="Any options of protocol included in the chained URL", + ) + parser.add_argument( + "-l", "--log-file", type=str, help="Logging FUSE debug info (Default: '')" + ) + parser.add_argument( + "-f", + "--foreground", + action="store_false", + help="Running in foreground or not (Default: False)", + ) + parser.add_argument( + "-t", + "--threads", + action="store_false", + help="Running with threads support (Default: False)", + ) + parser.add_argument( + "-r", + "--ready-file", + action="store_false", + help="The `.fuse_ready` file will exist after FUSE is ready. " + "(Debugging purpose, Default: False)", + ) + args = parser.parse_args(args) + + kwargs = {} + for item in args.option or []: + key, sep, value = item.partition("=") + if not sep: + parser.error(message=f"Wrong option: {item!r}") + val = value.lower() + if val.endswith("[int]"): + value = int(value[: -len("[int]")]) + elif val.endswith("[bool]"): + value = val[: -len("[bool]")] in ["1", "yes", "true"] + + if "-" in key: + fs_name, setting_name = key.split("-", 1) + if fs_name in kwargs: + kwargs[fs_name][setting_name] = value + else: + kwargs[fs_name] = {setting_name: value} + else: + kwargs[key] = value + + if args.log_file: + logging.basicConfig( + level=logging.DEBUG, + filename=args.log_file, + format="%(asctime)s %(message)s", + ) + + class LoggingFUSEr(FUSEr, LoggingMixIn): + pass + + fuser = LoggingFUSEr + else: + fuser = FUSEr + + fs, url_path = url_to_fs(args.url, **kwargs) + logger.debug("Mounting %s to %s", url_path, str(args.mount_point)) + run( + fs, + args.source_path, + args.mount_point, + foreground=args.foreground, + threads=args.threads, + ready_file=args.ready_file, + ops_class=fuser, + ) + + +if __name__ == "__main__": + import sys + + main(sys.argv[1:]) diff --git a/meow/lib/python3.13/site-packages/fsspec/generic.py b/meow/lib/python3.13/site-packages/fsspec/generic.py new file mode 100644 index 0000000000000000000000000000000000000000..9bad0f048f737400b4cd919f7699d15fbfbf7c62 --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/generic.py @@ -0,0 +1,411 @@ +from __future__ import annotations + +import inspect +import logging +import os +import shutil +import uuid +from typing import Optional + +from .asyn import AsyncFileSystem, _run_coros_in_chunks, sync_wrapper +from .callbacks import DEFAULT_CALLBACK +from .core import filesystem, get_filesystem_class, split_protocol, url_to_fs + +_generic_fs = {} +logger = logging.getLogger("fsspec.generic") + + +def set_generic_fs(protocol, **storage_options): + _generic_fs[protocol] = filesystem(protocol, **storage_options) + + +default_method = "default" + + +def _resolve_fs(url, method=None, protocol=None, storage_options=None): + """Pick instance of backend FS""" + method = method or default_method + protocol = protocol or split_protocol(url)[0] + storage_options = storage_options or {} + if method == "default": + return filesystem(protocol) + if method == "generic": + return _generic_fs[protocol] + if method == "current": + cls = get_filesystem_class(protocol) + return cls.current() + if method == "options": + fs, _ = url_to_fs(url, **storage_options.get(protocol, {})) + return fs + raise ValueError(f"Unknown FS resolution method: {method}") + + +def rsync( + source, + destination, + delete_missing=False, + source_field="size", + dest_field="size", + update_cond="different", + inst_kwargs=None, + fs=None, + **kwargs, +): + """Sync files between two directory trees + + (experimental) + + Parameters + ---------- + source: str + Root of the directory tree to take files from. This must be a directory, but + do not include any terminating "/" character + destination: str + Root path to copy into. The contents of this location should be + identical to the contents of ``source`` when done. This will be made a + directory, and the terminal "/" should not be included. + delete_missing: bool + If there are paths in the destination that don't exist in the + source and this is True, delete them. Otherwise, leave them alone. + source_field: str | callable + If ``update_field`` is "different", this is the key in the info + of source files to consider for difference. Maybe a function of the + info dict. + dest_field: str | callable + If ``update_field`` is "different", this is the key in the info + of destination files to consider for difference. May be a function of + the info dict. + update_cond: "different"|"always"|"never" + If "always", every file is copied, regardless of whether it exists in + the destination. If "never", files that exist in the destination are + not copied again. If "different" (default), only copy if the info + fields given by ``source_field`` and ``dest_field`` (usually "size") + are different. Other comparisons may be added in the future. + inst_kwargs: dict|None + If ``fs`` is None, use this set of keyword arguments to make a + GenericFileSystem instance + fs: GenericFileSystem|None + Instance to use if explicitly given. The instance defines how to + to make downstream file system instances from paths. + + Returns + ------- + dict of the copy operations that were performed, {source: destination} + """ + fs = fs or GenericFileSystem(**(inst_kwargs or {})) + source = fs._strip_protocol(source) + destination = fs._strip_protocol(destination) + allfiles = fs.find(source, withdirs=True, detail=True) + if not fs.isdir(source): + raise ValueError("Can only rsync on a directory") + otherfiles = fs.find(destination, withdirs=True, detail=True) + dirs = [ + a + for a, v in allfiles.items() + if v["type"] == "directory" and a.replace(source, destination) not in otherfiles + ] + logger.debug(f"{len(dirs)} directories to create") + if dirs: + fs.make_many_dirs( + [dirn.replace(source, destination) for dirn in dirs], exist_ok=True + ) + allfiles = {a: v for a, v in allfiles.items() if v["type"] == "file"} + logger.debug(f"{len(allfiles)} files to consider for copy") + to_delete = [ + o + for o, v in otherfiles.items() + if o.replace(destination, source) not in allfiles and v["type"] == "file" + ] + for k, v in allfiles.copy().items(): + otherfile = k.replace(source, destination) + if otherfile in otherfiles: + if update_cond == "always": + allfiles[k] = otherfile + elif update_cond == "different": + inf1 = source_field(v) if callable(source_field) else v[source_field] + v2 = otherfiles[otherfile] + inf2 = dest_field(v2) if callable(dest_field) else v2[dest_field] + if inf1 != inf2: + # details mismatch, make copy + allfiles[k] = otherfile + else: + # details match, don't copy + allfiles.pop(k) + else: + # file not in target yet + allfiles[k] = otherfile + logger.debug(f"{len(allfiles)} files to copy") + if allfiles: + source_files, target_files = zip(*allfiles.items()) + fs.cp(source_files, target_files, **kwargs) + logger.debug(f"{len(to_delete)} files to delete") + if delete_missing and to_delete: + fs.rm(to_delete) + return allfiles + + +class GenericFileSystem(AsyncFileSystem): + """Wrapper over all other FS types + + + + This implementation is a single unified interface to be able to run FS operations + over generic URLs, and dispatch to the specific implementations using the URL + protocol prefix. + + Note: instances of this FS are always async, even if you never use it with any async + backend. + """ + + protocol = "generic" # there is no real reason to ever use a protocol with this FS + + def __init__(self, default_method="default", **kwargs): + """ + + Parameters + ---------- + default_method: str (optional) + Defines how to configure backend FS instances. Options are: + - "default": instantiate like FSClass(), with no + extra arguments; this is the default instance of that FS, and can be + configured via the config system + - "generic": takes instances from the `_generic_fs` dict in this module, + which you must populate before use. Keys are by protocol + - "current": takes the most recently instantiated version of each FS + """ + self.method = default_method + super().__init__(**kwargs) + + def _parent(self, path): + fs = _resolve_fs(path, self.method) + return fs.unstrip_protocol(fs._parent(path)) + + def _strip_protocol(self, path): + # normalization only + fs = _resolve_fs(path, self.method) + return fs.unstrip_protocol(fs._strip_protocol(path)) + + async def _find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs): + fs = _resolve_fs(path, self.method) + if fs.async_impl: + out = await fs._find( + path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs + ) + else: + out = fs.find( + path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs + ) + result = {} + for k, v in out.items(): + v = v.copy() # don't corrupt target FS dircache + name = fs.unstrip_protocol(k) + v["name"] = name + result[name] = v + if detail: + return result + return list(result) + + async def _info(self, url, **kwargs): + fs = _resolve_fs(url, self.method) + if fs.async_impl: + out = await fs._info(url, **kwargs) + else: + out = fs.info(url, **kwargs) + out = out.copy() # don't edit originals + out["name"] = fs.unstrip_protocol(out["name"]) + return out + + async def _ls( + self, + url, + detail=True, + **kwargs, + ): + fs = _resolve_fs(url, self.method) + if fs.async_impl: + out = await fs._ls(url, detail=True, **kwargs) + else: + out = fs.ls(url, detail=True, **kwargs) + out = [o.copy() for o in out] # don't edit originals + for o in out: + o["name"] = fs.unstrip_protocol(o["name"]) + if detail: + return out + else: + return [o["name"] for o in out] + + async def _cat_file( + self, + url, + **kwargs, + ): + fs = _resolve_fs(url, self.method) + if fs.async_impl: + return await fs._cat_file(url, **kwargs) + else: + return fs.cat_file(url, **kwargs) + + async def _pipe_file( + self, + path, + value, + **kwargs, + ): + fs = _resolve_fs(path, self.method) + if fs.async_impl: + return await fs._pipe_file(path, value, **kwargs) + else: + return fs.pipe_file(path, value, **kwargs) + + async def _rm(self, url, **kwargs): + urls = url + if isinstance(urls, str): + urls = [urls] + fs = _resolve_fs(urls[0], self.method) + if fs.async_impl: + await fs._rm(urls, **kwargs) + else: + fs.rm(url, **kwargs) + + async def _makedirs(self, path, exist_ok=False): + logger.debug("Make dir %s", path) + fs = _resolve_fs(path, self.method) + if fs.async_impl: + await fs._makedirs(path, exist_ok=exist_ok) + else: + fs.makedirs(path, exist_ok=exist_ok) + + def rsync(self, source, destination, **kwargs): + """Sync files between two directory trees + + See `func:rsync` for more details. + """ + rsync(source, destination, fs=self, **kwargs) + + async def _cp_file( + self, + url, + url2, + blocksize=2**20, + callback=DEFAULT_CALLBACK, + **kwargs, + ): + fs = _resolve_fs(url, self.method) + fs2 = _resolve_fs(url2, self.method) + if fs is fs2: + # pure remote + if fs.async_impl: + return await fs._cp_file(url, url2, **kwargs) + else: + return fs.cp_file(url, url2, **kwargs) + kw = {"blocksize": 0, "cache_type": "none"} + try: + f1 = ( + await fs.open_async(url, "rb") + if hasattr(fs, "open_async") + else fs.open(url, "rb", **kw) + ) + callback.set_size(await maybe_await(f1.size)) + f2 = ( + await fs2.open_async(url2, "wb") + if hasattr(fs2, "open_async") + else fs2.open(url2, "wb", **kw) + ) + while f1.size is None or f2.tell() < f1.size: + data = await maybe_await(f1.read(blocksize)) + if f1.size is None and not data: + break + await maybe_await(f2.write(data)) + callback.absolute_update(f2.tell()) + finally: + try: + await maybe_await(f2.close()) + await maybe_await(f1.close()) + except NameError: + # fail while opening f1 or f2 + pass + + async def _make_many_dirs(self, urls, exist_ok=True): + fs = _resolve_fs(urls[0], self.method) + if fs.async_impl: + coros = [fs._makedirs(u, exist_ok=exist_ok) for u in urls] + await _run_coros_in_chunks(coros) + else: + for u in urls: + fs.makedirs(u, exist_ok=exist_ok) + + make_many_dirs = sync_wrapper(_make_many_dirs) + + async def _copy( + self, + path1: list[str], + path2: list[str], + recursive: bool = False, + on_error: str = "ignore", + maxdepth: Optional[int] = None, + batch_size: Optional[int] = None, + tempdir: Optional[str] = None, + **kwargs, + ): + if recursive: + raise NotImplementedError + fs = _resolve_fs(path1[0], self.method) + fs2 = _resolve_fs(path2[0], self.method) + # not expanding paths atm., assume call is from rsync() + if fs is fs2: + # pure remote + if fs.async_impl: + return await fs._copy(path1, path2, **kwargs) + else: + return fs.copy(path1, path2, **kwargs) + await copy_file_op( + fs, path1, fs2, path2, tempdir, batch_size, on_error=on_error + ) + + +async def copy_file_op( + fs1, url1, fs2, url2, tempdir=None, batch_size=20, on_error="ignore" +): + import tempfile + + tempdir = tempdir or tempfile.mkdtemp() + try: + coros = [ + _copy_file_op( + fs1, + u1, + fs2, + u2, + os.path.join(tempdir, uuid.uuid4().hex), + on_error=on_error, + ) + for u1, u2 in zip(url1, url2) + ] + await _run_coros_in_chunks(coros, batch_size=batch_size) + finally: + shutil.rmtree(tempdir) + + +async def _copy_file_op(fs1, url1, fs2, url2, local, on_error="ignore"): + ex = () if on_error == "raise" else Exception + logger.debug("Copy %s -> %s", url1, url2) + try: + if fs1.async_impl: + await fs1._get_file(url1, local) + else: + fs1.get_file(url1, local) + if fs2.async_impl: + await fs2._put_file(local, url2) + else: + fs2.put_file(local, url2) + os.unlink(local) + logger.debug("Copy %s -> %s; done", url1, url2) + except ex as e: + logger.debug("ignoring cp exception for %s: %s", url1, e) + + +async def maybe_await(cor): + if inspect.iscoroutine(cor): + return await cor + else: + return cor diff --git a/meow/lib/python3.13/site-packages/fsspec/gui.py b/meow/lib/python3.13/site-packages/fsspec/gui.py new file mode 100644 index 0000000000000000000000000000000000000000..321379eb8ef924302cc2f56bccb3bdb873ff86e3 --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/gui.py @@ -0,0 +1,416 @@ +import ast +import contextlib +import logging +import os +import re +from typing import ClassVar, Sequence + +import panel as pn + +from .core import OpenFile, get_filesystem_class, split_protocol +from .registry import known_implementations + +pn.extension() +logger = logging.getLogger("fsspec.gui") + + +class SigSlot: + """Signal-slot mixin, for Panel event passing + + Include this class in a widget manager's superclasses to be able to + register events and callbacks on Panel widgets managed by that class. + + The method ``_register`` should be called as widgets are added, and external + code should call ``connect`` to associate callbacks. + + By default, all signals emit a DEBUG logging statement. + """ + + # names of signals that this class may emit each of which must be + # set by _register for any new instance + signals: ClassVar[Sequence[str]] = [] + # names of actions that this class may respond to + slots: ClassVar[Sequence[str]] = [] + + # each of which must be a method name + + def __init__(self): + self._ignoring_events = False + self._sigs = {} + self._map = {} + self._setup() + + def _setup(self): + """Create GUI elements and register signals""" + self.panel = pn.pane.PaneBase() + # no signals to set up in the base class + + def _register( + self, widget, name, thing="value", log_level=logging.DEBUG, auto=False + ): + """Watch the given attribute of a widget and assign it a named event + + This is normally called at the time a widget is instantiated, in the + class which owns it. + + Parameters + ---------- + widget : pn.layout.Panel or None + Widget to watch. If None, an anonymous signal not associated with + any widget. + name : str + Name of this event + thing : str + Attribute of the given widget to watch + log_level : int + When the signal is triggered, a logging event of the given level + will be fired in the dfviz logger. + auto : bool + If True, automatically connects with a method in this class of the + same name. + """ + if name not in self.signals: + raise ValueError(f"Attempt to assign an undeclared signal: {name}") + self._sigs[name] = { + "widget": widget, + "callbacks": [], + "thing": thing, + "log": log_level, + } + wn = "-".join( + [ + getattr(widget, "name", str(widget)) if widget is not None else "none", + thing, + ] + ) + self._map[wn] = name + if widget is not None: + widget.param.watch(self._signal, thing, onlychanged=True) + if auto and hasattr(self, name): + self.connect(name, getattr(self, name)) + + def _repr_mimebundle_(self, *args, **kwargs): + """Display in a notebook or a server""" + try: + return self.panel._repr_mimebundle_(*args, **kwargs) + except (ValueError, AttributeError) as exc: + raise NotImplementedError( + "Panel does not seem to be set up properly" + ) from exc + + def connect(self, signal, slot): + """Associate call back with given event + + The callback must be a function which takes the "new" value of the + watched attribute as the only parameter. If the callback return False, + this cancels any further processing of the given event. + + Alternatively, the callback can be a string, in which case it means + emitting the correspondingly-named event (i.e., connect to self) + """ + self._sigs[signal]["callbacks"].append(slot) + + def _signal(self, event): + """This is called by a an action on a widget + + Within an self.ignore_events context, nothing happens. + + Tests can execute this method by directly changing the values of + widget components. + """ + if not self._ignoring_events: + wn = "-".join([event.obj.name, event.name]) + if wn in self._map and self._map[wn] in self._sigs: + self._emit(self._map[wn], event.new) + + @contextlib.contextmanager + def ignore_events(self): + """Temporarily turn off events processing in this instance + + (does not propagate to children) + """ + self._ignoring_events = True + try: + yield + finally: + self._ignoring_events = False + + def _emit(self, sig, value=None): + """An event happened, call its callbacks + + This method can be used in tests to simulate message passing without + directly changing visual elements. + + Calling of callbacks will halt whenever one returns False. + """ + logger.log(self._sigs[sig]["log"], f"{sig}: {value}") + for callback in self._sigs[sig]["callbacks"]: + if isinstance(callback, str): + self._emit(callback) + else: + try: + # running callbacks should not break the interface + ret = callback(value) + if ret is False: + break + except Exception as e: + logger.exception( + "Exception (%s) while executing callback for signal: %s", + e, + sig, + ) + + def show(self, threads=False): + """Open a new browser tab and display this instance's interface""" + self.panel.show(threads=threads, verbose=False) + return self + + +class SingleSelect(SigSlot): + """A multiselect which only allows you to select one item for an event""" + + signals = ["_selected", "selected"] # the first is internal + slots = ["set_options", "set_selection", "add", "clear", "select"] + + def __init__(self, **kwargs): + self.kwargs = kwargs + super().__init__() + + def _setup(self): + self.panel = pn.widgets.MultiSelect(**self.kwargs) + self._register(self.panel, "_selected", "value") + self._register(None, "selected") + self.connect("_selected", self.select_one) + + def _signal(self, *args, **kwargs): + super()._signal(*args, **kwargs) + + def select_one(self, *_): + with self.ignore_events(): + val = [self.panel.value[-1]] if self.panel.value else [] + self.panel.value = val + self._emit("selected", self.panel.value) + + def set_options(self, options): + self.panel.options = options + + def clear(self): + self.panel.options = [] + + @property + def value(self): + return self.panel.value + + def set_selection(self, selection): + self.panel.value = [selection] + + +class FileSelector(SigSlot): + """Panel-based graphical file selector widget + + Instances of this widget are interactive and can be displayed in jupyter by having + them as the output of a cell, or in a separate browser tab using ``.show()``. + """ + + signals = [ + "protocol_changed", + "selection_changed", + "directory_entered", + "home_clicked", + "up_clicked", + "go_clicked", + "filters_changed", + ] + slots = ["set_filters", "go_home"] + + def __init__(self, url=None, filters=None, ignore=None, kwargs=None): + """ + + Parameters + ---------- + url : str (optional) + Initial value of the URL to populate the dialog; should include protocol + filters : list(str) (optional) + File endings to include in the listings. If not included, all files are + allowed. Does not affect directories. + If given, the endings will appear as checkboxes in the interface + ignore : list(str) (optional) + Regex(s) of file basename patterns to ignore, e.g., "\\." for typical + hidden files on posix + kwargs : dict (optional) + To pass to file system instance + """ + if url: + self.init_protocol, url = split_protocol(url) + else: + self.init_protocol, url = "file", os.getcwd() + self.init_url = url + self.init_kwargs = (kwargs if isinstance(kwargs, str) else str(kwargs)) or "{}" + self.filters = filters + self.ignore = [re.compile(i) for i in ignore or []] + self._fs = None + super().__init__() + + def _setup(self): + self.url = pn.widgets.TextInput( + name="url", + value=self.init_url, + align="end", + sizing_mode="stretch_width", + width_policy="max", + ) + self.protocol = pn.widgets.Select( + options=sorted(known_implementations), + value=self.init_protocol, + name="protocol", + align="center", + ) + self.kwargs = pn.widgets.TextInput( + name="kwargs", value=self.init_kwargs, align="center" + ) + self.go = pn.widgets.Button(name="⇨", align="end", width=45) + self.main = SingleSelect(size=10) + self.home = pn.widgets.Button(name="🏠", width=40, height=30, align="end") + self.up = pn.widgets.Button(name="‹", width=30, height=30, align="end") + + self._register(self.protocol, "protocol_changed", auto=True) + self._register(self.go, "go_clicked", "clicks", auto=True) + self._register(self.up, "up_clicked", "clicks", auto=True) + self._register(self.home, "home_clicked", "clicks", auto=True) + self._register(None, "selection_changed") + self.main.connect("selected", self.selection_changed) + self._register(None, "directory_entered") + self.prev_protocol = self.protocol.value + self.prev_kwargs = self.storage_options + + self.filter_sel = pn.widgets.CheckBoxGroup( + value=[], options=[], inline=False, align="end", width_policy="min" + ) + self._register(self.filter_sel, "filters_changed", auto=True) + + self.panel = pn.Column( + pn.Row(self.protocol, self.kwargs), + pn.Row(self.home, self.up, self.url, self.go, self.filter_sel), + self.main.panel, + ) + self.set_filters(self.filters) + self.go_clicked() + + def set_filters(self, filters=None): + self.filters = filters + if filters: + self.filter_sel.options = filters + self.filter_sel.value = filters + else: + self.filter_sel.options = [] + self.filter_sel.value = [] + + @property + def storage_options(self): + """Value of the kwargs box as a dictionary""" + return ast.literal_eval(self.kwargs.value) or {} + + @property + def fs(self): + """Current filesystem instance""" + if self._fs is None: + cls = get_filesystem_class(self.protocol.value) + self._fs = cls(**self.storage_options) + return self._fs + + @property + def urlpath(self): + """URL of currently selected item""" + return ( + (f"{self.protocol.value}://{self.main.value[0]}") + if self.main.value + else None + ) + + def open_file(self, mode="rb", compression=None, encoding=None): + """Create OpenFile instance for the currently selected item + + For example, in a notebook you might do something like + + .. code-block:: + + [ ]: sel = FileSelector(); sel + + # user selects their file + + [ ]: with sel.open_file('rb') as f: + ... out = f.read() + + Parameters + ---------- + mode: str (optional) + Open mode for the file. + compression: str (optional) + The interact with the file as compressed. Set to 'infer' to guess + compression from the file ending + encoding: str (optional) + If using text mode, use this encoding; defaults to UTF8. + """ + if self.urlpath is None: + raise ValueError("No file selected") + return OpenFile(self.fs, self.urlpath, mode, compression, encoding) + + def filters_changed(self, values): + self.filters = values + self.go_clicked() + + def selection_changed(self, *_): + if self.urlpath is None: + return + if self.fs.isdir(self.urlpath): + self.url.value = self.fs._strip_protocol(self.urlpath) + self.go_clicked() + + def go_clicked(self, *_): + if ( + self.prev_protocol != self.protocol.value + or self.prev_kwargs != self.storage_options + ): + self._fs = None # causes fs to be recreated + self.prev_protocol = self.protocol.value + self.prev_kwargs = self.storage_options + listing = sorted( + self.fs.ls(self.url.value, detail=True), key=lambda x: x["name"] + ) + listing = [ + l + for l in listing + if not any(i.match(l["name"].rsplit("/", 1)[-1]) for i in self.ignore) + ] + folders = { + "📁 " + o["name"].rsplit("/", 1)[-1]: o["name"] + for o in listing + if o["type"] == "directory" + } + files = { + "📄 " + o["name"].rsplit("/", 1)[-1]: o["name"] + for o in listing + if o["type"] == "file" + } + if self.filters: + files = { + k: v + for k, v in files.items() + if any(v.endswith(ext) for ext in self.filters) + } + self.main.set_options(dict(**folders, **files)) + + def protocol_changed(self, *_): + self._fs = None + self.main.options = [] + self.url.value = "" + + def home_clicked(self, *_): + self.protocol.value = self.init_protocol + self.kwargs.value = self.init_kwargs + self.url.value = self.init_url + self.go_clicked() + + def up_clicked(self, *_): + self.url.value = self.fs._parent(self.url.value) + self.go_clicked() diff --git a/meow/lib/python3.13/site-packages/fsspec/json.py b/meow/lib/python3.13/site-packages/fsspec/json.py new file mode 100644 index 0000000000000000000000000000000000000000..69cead04509a1ebc9175ec5ad449c8a866b60444 --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/json.py @@ -0,0 +1,121 @@ +import json +from contextlib import suppress +from pathlib import PurePath +from typing import ( + Any, + Callable, + ClassVar, + Dict, + List, + Mapping, + Optional, + Sequence, + Tuple, +) + +from .registry import _import_class, get_filesystem_class +from .spec import AbstractFileSystem + + +class FilesystemJSONEncoder(json.JSONEncoder): + include_password: ClassVar[bool] = True + + def default(self, o: Any) -> Any: + if isinstance(o, AbstractFileSystem): + return o.to_dict(include_password=self.include_password) + if isinstance(o, PurePath): + cls = type(o) + return {"cls": f"{cls.__module__}.{cls.__name__}", "str": str(o)} + + return super().default(o) + + def make_serializable(self, obj: Any) -> Any: + """ + Recursively converts an object so that it can be JSON serialized via + :func:`json.dumps` and :func:`json.dump`, without actually calling + said functions. + """ + if isinstance(obj, (str, int, float, bool)): + return obj + if isinstance(obj, Mapping): + return {k: self.make_serializable(v) for k, v in obj.items()} + if isinstance(obj, Sequence): + return [self.make_serializable(v) for v in obj] + + return self.default(obj) + + +class FilesystemJSONDecoder(json.JSONDecoder): + def __init__( + self, + *, + object_hook: Optional[Callable[[Dict[str, Any]], Any]] = None, + parse_float: Optional[Callable[[str], Any]] = None, + parse_int: Optional[Callable[[str], Any]] = None, + parse_constant: Optional[Callable[[str], Any]] = None, + strict: bool = True, + object_pairs_hook: Optional[Callable[[List[Tuple[str, Any]]], Any]] = None, + ) -> None: + self.original_object_hook = object_hook + + super().__init__( + object_hook=self.custom_object_hook, + parse_float=parse_float, + parse_int=parse_int, + parse_constant=parse_constant, + strict=strict, + object_pairs_hook=object_pairs_hook, + ) + + @classmethod + def try_resolve_path_cls(cls, dct: Dict[str, Any]): + with suppress(Exception): + fqp = dct["cls"] + + path_cls = _import_class(fqp) + + if issubclass(path_cls, PurePath): + return path_cls + + return None + + @classmethod + def try_resolve_fs_cls(cls, dct: Dict[str, Any]): + with suppress(Exception): + if "cls" in dct: + try: + fs_cls = _import_class(dct["cls"]) + if issubclass(fs_cls, AbstractFileSystem): + return fs_cls + except Exception: + if "protocol" in dct: # Fallback if cls cannot be imported + return get_filesystem_class(dct["protocol"]) + + raise + + return None + + def custom_object_hook(self, dct: Dict[str, Any]): + if "cls" in dct: + if (obj_cls := self.try_resolve_fs_cls(dct)) is not None: + return AbstractFileSystem.from_dict(dct) + if (obj_cls := self.try_resolve_path_cls(dct)) is not None: + return obj_cls(dct["str"]) + + if self.original_object_hook is not None: + return self.original_object_hook(dct) + + return dct + + def unmake_serializable(self, obj: Any) -> Any: + """ + Inverse function of :meth:`FilesystemJSONEncoder.make_serializable`. + """ + if isinstance(obj, dict): + obj = self.custom_object_hook(obj) + if isinstance(obj, dict): + return {k: self.unmake_serializable(v) for k, v in obj.items()} + if isinstance(obj, (list, tuple)): + return [self.unmake_serializable(v) for v in obj] + + return obj diff --git a/meow/lib/python3.13/site-packages/fsspec/mapping.py b/meow/lib/python3.13/site-packages/fsspec/mapping.py new file mode 100644 index 0000000000000000000000000000000000000000..752eef35273b13eded7297e2e801b58e436a25b1 --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/mapping.py @@ -0,0 +1,251 @@ +import array +import logging +import posixpath +import warnings +from collections.abc import MutableMapping +from functools import cached_property + +from fsspec.core import url_to_fs + +logger = logging.getLogger("fsspec.mapping") + + +class FSMap(MutableMapping): + """Wrap a FileSystem instance as a mutable wrapping. + + The keys of the mapping become files under the given root, and the + values (which must be bytes) the contents of those files. + + Parameters + ---------- + root: string + prefix for all the files + fs: FileSystem instance + check: bool (=True) + performs a touch at the location, to check for write access. + + Examples + -------- + >>> fs = FileSystem(**parameters) # doctest: +SKIP + >>> d = FSMap('my-data/path/', fs) # doctest: +SKIP + or, more likely + >>> d = fs.get_mapper('my-data/path/') + + >>> d['loc1'] = b'Hello World' # doctest: +SKIP + >>> list(d.keys()) # doctest: +SKIP + ['loc1'] + >>> d['loc1'] # doctest: +SKIP + b'Hello World' + """ + + def __init__(self, root, fs, check=False, create=False, missing_exceptions=None): + self.fs = fs + self.root = fs._strip_protocol(root) + self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1] + if missing_exceptions is None: + missing_exceptions = ( + FileNotFoundError, + IsADirectoryError, + NotADirectoryError, + ) + self.missing_exceptions = missing_exceptions + self.check = check + self.create = create + if create: + if not self.fs.exists(root): + self.fs.mkdir(root) + if check: + if not self.fs.exists(root): + raise ValueError( + f"Path {root} does not exist. Create " + f" with the ``create=True`` keyword" + ) + self.fs.touch(root + "/a") + self.fs.rm(root + "/a") + + @cached_property + def dirfs(self): + """dirfs instance that can be used with the same keys as the mapper""" + from .implementations.dirfs import DirFileSystem + + return DirFileSystem(path=self._root_key_to_str, fs=self.fs) + + def clear(self): + """Remove all keys below root - empties out mapping""" + logger.info("Clear mapping at %s", self.root) + try: + self.fs.rm(self.root, True) + self.fs.mkdir(self.root) + except: # noqa: E722 + pass + + def getitems(self, keys, on_error="raise"): + """Fetch multiple items from the store + + If the backend is async-able, this might proceed concurrently + + Parameters + ---------- + keys: list(str) + They keys to be fetched + on_error : "raise", "omit", "return" + If raise, an underlying exception will be raised (converted to KeyError + if the type is in self.missing_exceptions); if omit, keys with exception + will simply not be included in the output; if "return", all keys are + included in the output, but the value will be bytes or an exception + instance. + + Returns + ------- + dict(key, bytes|exception) + """ + keys2 = [self._key_to_str(k) for k in keys] + oe = on_error if on_error == "raise" else "return" + try: + out = self.fs.cat(keys2, on_error=oe) + if isinstance(out, bytes): + out = {keys2[0]: out} + except self.missing_exceptions as e: + raise KeyError from e + out = { + k: (KeyError() if isinstance(v, self.missing_exceptions) else v) + for k, v in out.items() + } + return { + key: out[k2] if on_error == "raise" else out.get(k2, KeyError(k2)) + for key, k2 in zip(keys, keys2) + if on_error == "return" or not isinstance(out[k2], BaseException) + } + + def setitems(self, values_dict): + """Set the values of multiple items in the store + + Parameters + ---------- + values_dict: dict(str, bytes) + """ + values = {self._key_to_str(k): maybe_convert(v) for k, v in values_dict.items()} + self.fs.pipe(values) + + def delitems(self, keys): + """Remove multiple keys from the store""" + self.fs.rm([self._key_to_str(k) for k in keys]) + + def _key_to_str(self, key): + """Generate full path for the key""" + if not isinstance(key, str): + # raise TypeError("key must be of type `str`, got `{type(key).__name__}`" + warnings.warn( + "from fsspec 2023.5 onward FSMap non-str keys will raise TypeError", + DeprecationWarning, + ) + if isinstance(key, list): + key = tuple(key) + key = str(key) + return f"{self._root_key_to_str}{key}".rstrip("/") + + def _str_to_key(self, s): + """Strip path of to leave key name""" + return s[len(self.root) :].lstrip("/") + + def __getitem__(self, key, default=None): + """Retrieve data""" + k = self._key_to_str(key) + try: + result = self.fs.cat(k) + except self.missing_exceptions as exc: + if default is not None: + return default + raise KeyError(key) from exc + return result + + def pop(self, key, default=None): + """Pop data""" + result = self.__getitem__(key, default) + try: + del self[key] + except KeyError: + pass + return result + + def __setitem__(self, key, value): + """Store value in key""" + key = self._key_to_str(key) + self.fs.mkdirs(self.fs._parent(key), exist_ok=True) + self.fs.pipe_file(key, maybe_convert(value)) + + def __iter__(self): + return (self._str_to_key(x) for x in self.fs.find(self.root)) + + def __len__(self): + return len(self.fs.find(self.root)) + + def __delitem__(self, key): + """Remove key""" + try: + self.fs.rm(self._key_to_str(key)) + except Exception as exc: + raise KeyError from exc + + def __contains__(self, key): + """Does key exist in mapping?""" + path = self._key_to_str(key) + return self.fs.isfile(path) + + def __reduce__(self): + return FSMap, (self.root, self.fs, False, False, self.missing_exceptions) + + +def maybe_convert(value): + if isinstance(value, array.array) or hasattr(value, "__array__"): + # bytes-like things + if hasattr(value, "dtype") and value.dtype.kind in "Mm": + # The buffer interface doesn't support datetime64/timdelta64 numpy + # arrays + value = value.view("int64") + value = bytes(memoryview(value)) + return value + + +def get_mapper( + url="", + check=False, + create=False, + missing_exceptions=None, + alternate_root=None, + **kwargs, +): + """Create key-value interface for given URL and options + + The URL will be of the form "protocol://location" and point to the root + of the mapper required. All keys will be file-names below this location, + and their values the contents of each key. + + Also accepts compound URLs like zip::s3://bucket/file.zip , see ``fsspec.open``. + + Parameters + ---------- + url: str + Root URL of mapping + check: bool + Whether to attempt to read from the location before instantiation, to + check that the mapping does exist + create: bool + Whether to make the directory corresponding to the root before + instantiating + missing_exceptions: None or tuple + If given, these exception types will be regarded as missing keys and + return KeyError when trying to read data. By default, you get + (FileNotFoundError, IsADirectoryError, NotADirectoryError) + alternate_root: None or str + In cases of complex URLs, the parser may fail to pick the correct part + for the mapper root, so this arg can override + + Returns + ------- + ``FSMap`` instance, the dict-like key-value store. + """ + # Removing protocol here - could defer to each open() on the backend + fs, urlpath = url_to_fs(url, **kwargs) + root = alternate_root if alternate_root is not None else urlpath + return FSMap(root, fs, check, create, missing_exceptions=missing_exceptions) diff --git a/meow/lib/python3.13/site-packages/fsspec/parquet.py b/meow/lib/python3.13/site-packages/fsspec/parquet.py new file mode 100644 index 0000000000000000000000000000000000000000..faedb7b9e0aa90b6fb7cba33e794c4b4fb35eb77 --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/parquet.py @@ -0,0 +1,541 @@ +import io +import json +import warnings + +from .core import url_to_fs +from .utils import merge_offset_ranges + +# Parquet-Specific Utilities for fsspec +# +# Most of the functions defined in this module are NOT +# intended for public consumption. The only exception +# to this is `open_parquet_file`, which should be used +# place of `fs.open()` to open parquet-formatted files +# on remote file systems. + + +def open_parquet_file( + path, + mode="rb", + fs=None, + metadata=None, + columns=None, + row_groups=None, + storage_options=None, + strict=False, + engine="auto", + max_gap=64_000, + max_block=256_000_000, + footer_sample_size=1_000_000, + **kwargs, +): + """ + Return a file-like object for a single Parquet file. + + The specified parquet `engine` will be used to parse the + footer metadata, and determine the required byte ranges + from the file. The target path will then be opened with + the "parts" (`KnownPartsOfAFile`) caching strategy. + + Note that this method is intended for usage with remote + file systems, and is unlikely to improve parquet-read + performance on local file systems. + + Parameters + ---------- + path: str + Target file path. + mode: str, optional + Mode option to be passed through to `fs.open`. Default is "rb". + metadata: Any, optional + Parquet metadata object. Object type must be supported + by the backend parquet engine. For now, only the "fastparquet" + engine supports an explicit `ParquetFile` metadata object. + If a metadata object is supplied, the remote footer metadata + will not need to be transferred into local memory. + fs: AbstractFileSystem, optional + Filesystem object to use for opening the file. If nothing is + specified, an `AbstractFileSystem` object will be inferred. + engine : str, default "auto" + Parquet engine to use for metadata parsing. Allowed options + include "fastparquet", "pyarrow", and "auto". The specified + engine must be installed in the current environment. If + "auto" is specified, and both engines are installed, + "fastparquet" will take precedence over "pyarrow". + columns: list, optional + List of all column names that may be read from the file. + row_groups : list, optional + List of all row-groups that may be read from the file. This + may be a list of row-group indices (integers), or it may be + a list of `RowGroup` metadata objects (if the "fastparquet" + engine is used). + storage_options : dict, optional + Used to generate an `AbstractFileSystem` object if `fs` was + not specified. + strict : bool, optional + Whether the resulting `KnownPartsOfAFile` cache should + fetch reads that go beyond a known byte-range boundary. + If `False` (the default), any read that ends outside a + known part will be zero padded. Note that using + `strict=True` may be useful for debugging. + max_gap : int, optional + Neighboring byte ranges will only be merged when their + inter-range gap is <= `max_gap`. Default is 64KB. + max_block : int, optional + Neighboring byte ranges will only be merged when the size of + the aggregated range is <= `max_block`. Default is 256MB. + footer_sample_size : int, optional + Number of bytes to read from the end of the path to look + for the footer metadata. If the sampled bytes do not contain + the footer, a second read request will be required, and + performance will suffer. Default is 1MB. + **kwargs : + Optional key-word arguments to pass to `fs.open` + """ + + # Make sure we have an `AbstractFileSystem` object + # to work with + if fs is None: + fs = url_to_fs(path, **(storage_options or {}))[0] + + # For now, `columns == []` not supported. Just use + # default `open` command with `path` input + if columns is not None and len(columns) == 0: + return fs.open(path, mode=mode) + + # Set the engine + engine = _set_engine(engine) + + # Fetch the known byte ranges needed to read + # `columns` and/or `row_groups` + data = _get_parquet_byte_ranges( + [path], + fs, + metadata=metadata, + columns=columns, + row_groups=row_groups, + engine=engine, + max_gap=max_gap, + max_block=max_block, + footer_sample_size=footer_sample_size, + ) + + # Extract file name from `data` + fn = next(iter(data)) if data else path + + # Call self.open with "parts" caching + options = kwargs.pop("cache_options", {}).copy() + return fs.open( + fn, + mode=mode, + cache_type="parts", + cache_options={ + **options, + "data": data.get(fn, {}), + "strict": strict, + }, + **kwargs, + ) + + +def _get_parquet_byte_ranges( + paths, + fs, + metadata=None, + columns=None, + row_groups=None, + max_gap=64_000, + max_block=256_000_000, + footer_sample_size=1_000_000, + engine="auto", +): + """Get a dictionary of the known byte ranges needed + to read a specific column/row-group selection from a + Parquet dataset. Each value in the output dictionary + is intended for use as the `data` argument for the + `KnownPartsOfAFile` caching strategy of a single path. + """ + + # Set engine if necessary + if isinstance(engine, str): + engine = _set_engine(engine) + + # Pass to specialized function if metadata is defined + if metadata is not None: + # Use the provided parquet metadata object + # to avoid transferring/parsing footer metadata + return _get_parquet_byte_ranges_from_metadata( + metadata, + fs, + engine, + columns=columns, + row_groups=row_groups, + max_gap=max_gap, + max_block=max_block, + ) + + # Get file sizes asynchronously + file_sizes = fs.sizes(paths) + + # Populate global paths, starts, & ends + result = {} + data_paths = [] + data_starts = [] + data_ends = [] + add_header_magic = True + if columns is None and row_groups is None: + # We are NOT selecting specific columns or row-groups. + # + # We can avoid sampling the footers, and just transfer + # all file data with cat_ranges + for i, path in enumerate(paths): + result[path] = {} + for b in range(0, file_sizes[i], max_block): + data_paths.append(path) + data_starts.append(b) + data_ends.append(min(b + max_block, file_sizes[i])) + add_header_magic = False # "Magic" should already be included + else: + # We ARE selecting specific columns or row-groups. + # + # Gather file footers. + # We just take the last `footer_sample_size` bytes of each + # file (or the entire file if it is smaller than that) + footer_starts = [] + footer_ends = [] + for i, path in enumerate(paths): + footer_ends.append(file_sizes[i]) + sample_size = max(0, file_sizes[i] - footer_sample_size) + footer_starts.append(sample_size) + footer_samples = fs.cat_ranges(paths, footer_starts, footer_ends) + + # Check our footer samples and re-sample if necessary. + missing_footer_starts = footer_starts.copy() + large_footer = 0 + for i, path in enumerate(paths): + footer_size = int.from_bytes(footer_samples[i][-8:-4], "little") + real_footer_start = file_sizes[i] - (footer_size + 8) + if real_footer_start < footer_starts[i]: + missing_footer_starts[i] = real_footer_start + large_footer = max(large_footer, (footer_size + 8)) + if large_footer: + warnings.warn( + f"Not enough data was used to sample the parquet footer. " + f"Try setting footer_sample_size >= {large_footer}." + ) + for i, block in enumerate( + fs.cat_ranges( + paths, + missing_footer_starts, + footer_starts, + ) + ): + footer_samples[i] = block + footer_samples[i] + footer_starts[i] = missing_footer_starts[i] + + # Calculate required byte ranges for each path + for i, path in enumerate(paths): + # Deal with small-file case. + # Just include all remaining bytes of the file + # in a single range. + if file_sizes[i] < max_block: + if footer_starts[i] > 0: + # Only need to transfer the data if the + # footer sample isn't already the whole file + data_paths.append(path) + data_starts.append(0) + data_ends.append(footer_starts[i]) + continue + + # Use "engine" to collect data byte ranges + path_data_starts, path_data_ends = engine._parquet_byte_ranges( + columns, + row_groups=row_groups, + footer=footer_samples[i], + footer_start=footer_starts[i], + ) + + data_paths += [path] * len(path_data_starts) + data_starts += path_data_starts + data_ends += path_data_ends + + # Merge adjacent offset ranges + data_paths, data_starts, data_ends = merge_offset_ranges( + data_paths, + data_starts, + data_ends, + max_gap=max_gap, + max_block=max_block, + sort=False, # Should already be sorted + ) + + # Start by populating `result` with footer samples + for i, path in enumerate(paths): + result[path] = {(footer_starts[i], footer_ends[i]): footer_samples[i]} + + # Transfer the data byte-ranges into local memory + _transfer_ranges(fs, result, data_paths, data_starts, data_ends) + + # Add b"PAR1" to header if necessary + if add_header_magic: + _add_header_magic(result) + + return result + + +def _get_parquet_byte_ranges_from_metadata( + metadata, + fs, + engine, + columns=None, + row_groups=None, + max_gap=64_000, + max_block=256_000_000, +): + """Simplified version of `_get_parquet_byte_ranges` for + the case that an engine-specific `metadata` object is + provided, and the remote footer metadata does not need to + be transferred before calculating the required byte ranges. + """ + + # Use "engine" to collect data byte ranges + data_paths, data_starts, data_ends = engine._parquet_byte_ranges( + columns, + row_groups=row_groups, + metadata=metadata, + ) + + # Merge adjacent offset ranges + data_paths, data_starts, data_ends = merge_offset_ranges( + data_paths, + data_starts, + data_ends, + max_gap=max_gap, + max_block=max_block, + sort=False, # Should be sorted + ) + + # Transfer the data byte-ranges into local memory + result = {fn: {} for fn in list(set(data_paths))} + _transfer_ranges(fs, result, data_paths, data_starts, data_ends) + + # Add b"PAR1" to header + _add_header_magic(result) + + return result + + +def _transfer_ranges(fs, blocks, paths, starts, ends): + # Use cat_ranges to gather the data byte_ranges + ranges = (paths, starts, ends) + for path, start, stop, data in zip(*ranges, fs.cat_ranges(*ranges)): + blocks[path][(start, stop)] = data + + +def _add_header_magic(data): + # Add b"PAR1" to file headers + for path in list(data.keys()): + add_magic = True + for k in data[path]: + if k[0] == 0 and k[1] >= 4: + add_magic = False + break + if add_magic: + data[path][(0, 4)] = b"PAR1" + + +def _set_engine(engine_str): + # Define a list of parquet engines to try + if engine_str == "auto": + try_engines = ("fastparquet", "pyarrow") + elif not isinstance(engine_str, str): + raise ValueError( + "Failed to set parquet engine! " + "Please pass 'fastparquet', 'pyarrow', or 'auto'" + ) + elif engine_str not in ("fastparquet", "pyarrow"): + raise ValueError(f"{engine_str} engine not supported by `fsspec.parquet`") + else: + try_engines = [engine_str] + + # Try importing the engines in `try_engines`, + # and choose the first one that succeeds + for engine in try_engines: + try: + if engine == "fastparquet": + return FastparquetEngine() + elif engine == "pyarrow": + return PyarrowEngine() + except ImportError: + pass + + # Raise an error if a supported parquet engine + # was not found + raise ImportError( + f"The following parquet engines are not installed " + f"in your python environment: {try_engines}." + f"Please install 'fastparquert' or 'pyarrow' to " + f"utilize the `fsspec.parquet` module." + ) + + +class FastparquetEngine: + # The purpose of the FastparquetEngine class is + # to check if fastparquet can be imported (on initialization) + # and to define a `_parquet_byte_ranges` method. In the + # future, this class may also be used to define other + # methods/logic that are specific to fastparquet. + + def __init__(self): + import fastparquet as fp + + self.fp = fp + + def _row_group_filename(self, row_group, pf): + return pf.row_group_filename(row_group) + + def _parquet_byte_ranges( + self, + columns, + row_groups=None, + metadata=None, + footer=None, + footer_start=None, + ): + # Initialize offset ranges and define ParqetFile metadata + pf = metadata + data_paths, data_starts, data_ends = [], [], [] + if pf is None: + pf = self.fp.ParquetFile(io.BytesIO(footer)) + + # Convert columns to a set and add any index columns + # specified in the pandas metadata (just in case) + column_set = None if columns is None else set(columns) + if column_set is not None and hasattr(pf, "pandas_metadata"): + md_index = [ + ind + for ind in pf.pandas_metadata.get("index_columns", []) + # Ignore RangeIndex information + if not isinstance(ind, dict) + ] + column_set |= set(md_index) + + # Check if row_groups is a list of integers + # or a list of row-group metadata + if row_groups and not isinstance(row_groups[0], int): + # Input row_groups contains row-group metadata + row_group_indices = None + else: + # Input row_groups contains row-group indices + row_group_indices = row_groups + row_groups = pf.row_groups + + # Loop through column chunks to add required byte ranges + for r, row_group in enumerate(row_groups): + # Skip this row-group if we are targeting + # specific row-groups + if row_group_indices is None or r in row_group_indices: + # Find the target parquet-file path for `row_group` + fn = self._row_group_filename(row_group, pf) + + for column in row_group.columns: + name = column.meta_data.path_in_schema[0] + # Skip this column if we are targeting a + # specific columns + if column_set is None or name in column_set: + file_offset0 = column.meta_data.dictionary_page_offset + if file_offset0 is None: + file_offset0 = column.meta_data.data_page_offset + num_bytes = column.meta_data.total_compressed_size + if footer_start is None or file_offset0 < footer_start: + data_paths.append(fn) + data_starts.append(file_offset0) + data_ends.append( + min( + file_offset0 + num_bytes, + footer_start or (file_offset0 + num_bytes), + ) + ) + + if metadata: + # The metadata in this call may map to multiple + # file paths. Need to include `data_paths` + return data_paths, data_starts, data_ends + return data_starts, data_ends + + +class PyarrowEngine: + # The purpose of the PyarrowEngine class is + # to check if pyarrow can be imported (on initialization) + # and to define a `_parquet_byte_ranges` method. In the + # future, this class may also be used to define other + # methods/logic that are specific to pyarrow. + + def __init__(self): + import pyarrow.parquet as pq + + self.pq = pq + + def _row_group_filename(self, row_group, metadata): + raise NotImplementedError + + def _parquet_byte_ranges( + self, + columns, + row_groups=None, + metadata=None, + footer=None, + footer_start=None, + ): + if metadata is not None: + raise ValueError("metadata input not supported for PyarrowEngine") + + data_starts, data_ends = [], [] + md = self.pq.ParquetFile(io.BytesIO(footer)).metadata + + # Convert columns to a set and add any index columns + # specified in the pandas metadata (just in case) + column_set = None if columns is None else set(columns) + if column_set is not None: + schema = md.schema.to_arrow_schema() + has_pandas_metadata = ( + schema.metadata is not None and b"pandas" in schema.metadata + ) + if has_pandas_metadata: + md_index = [ + ind + for ind in json.loads( + schema.metadata[b"pandas"].decode("utf8") + ).get("index_columns", []) + # Ignore RangeIndex information + if not isinstance(ind, dict) + ] + column_set |= set(md_index) + + # Loop through column chunks to add required byte ranges + for r in range(md.num_row_groups): + # Skip this row-group if we are targeting + # specific row-groups + if row_groups is None or r in row_groups: + row_group = md.row_group(r) + for c in range(row_group.num_columns): + column = row_group.column(c) + name = column.path_in_schema + # Skip this column if we are targeting a + # specific columns + split_name = name.split(".")[0] + if ( + column_set is None + or name in column_set + or split_name in column_set + ): + file_offset0 = column.dictionary_page_offset + if file_offset0 is None: + file_offset0 = column.data_page_offset + num_bytes = column.total_compressed_size + if file_offset0 < footer_start: + data_starts.append(file_offset0) + data_ends.append( + min(file_offset0 + num_bytes, footer_start) + ) + return data_starts, data_ends diff --git a/meow/lib/python3.13/site-packages/fsspec/registry.py b/meow/lib/python3.13/site-packages/fsspec/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..02094ea0a18f3226218940865a69ea26ff114fe7 --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/registry.py @@ -0,0 +1,315 @@ +from __future__ import annotations + +import importlib +import types +import warnings + +__all__ = ["registry", "get_filesystem_class", "default"] + +# internal, mutable +_registry: dict[str, type] = {} + +# external, immutable +registry = types.MappingProxyType(_registry) +default = "file" + + +def register_implementation(name, cls, clobber=False, errtxt=None): + """Add implementation class to the registry + + Parameters + ---------- + name: str + Protocol name to associate with the class + cls: class or str + if a class: fsspec-compliant implementation class (normally inherits from + ``fsspec.AbstractFileSystem``, gets added straight to the registry. If a + str, the full path to an implementation class like package.module.class, + which gets added to known_implementations, + so the import is deferred until the filesystem is actually used. + clobber: bool (optional) + Whether to overwrite a protocol with the same name; if False, will raise + instead. + errtxt: str (optional) + If given, then a failure to import the given class will result in this + text being given. + """ + if isinstance(cls, str): + if name in known_implementations and clobber is False: + if cls != known_implementations[name]["class"]: + raise ValueError( + f"Name ({name}) already in the known_implementations and clobber " + f"is False" + ) + else: + known_implementations[name] = { + "class": cls, + "err": errtxt or f"{cls} import failed for protocol {name}", + } + + else: + if name in registry and clobber is False: + if _registry[name] is not cls: + raise ValueError( + f"Name ({name}) already in the registry and clobber is False" + ) + else: + _registry[name] = cls + + +# protocols mapped to the class which implements them. This dict can be +# updated with register_implementation +known_implementations = { + "abfs": { + "class": "adlfs.AzureBlobFileSystem", + "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage", + }, + "adl": { + "class": "adlfs.AzureDatalakeFileSystem", + "err": "Install adlfs to access Azure Datalake Gen1", + }, + "arrow_hdfs": { + "class": "fsspec.implementations.arrow.HadoopFileSystem", + "err": "pyarrow and local java libraries required for HDFS", + }, + "asynclocal": { + "class": "morefs.asyn_local.AsyncLocalFileSystem", + "err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem", + }, + "az": { + "class": "adlfs.AzureBlobFileSystem", + "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage", + }, + "blockcache": {"class": "fsspec.implementations.cached.CachingFileSystem"}, + "box": { + "class": "boxfs.BoxFileSystem", + "err": "Please install boxfs to access BoxFileSystem", + }, + "cached": {"class": "fsspec.implementations.cached.CachingFileSystem"}, + "dask": { + "class": "fsspec.implementations.dask.DaskWorkerFileSystem", + "err": "Install dask distributed to access worker file system", + }, + "data": {"class": "fsspec.implementations.data.DataFileSystem"}, + "dbfs": { + "class": "fsspec.implementations.dbfs.DatabricksFileSystem", + "err": "Install the requests package to use the DatabricksFileSystem", + }, + "dir": {"class": "fsspec.implementations.dirfs.DirFileSystem"}, + "dropbox": { + "class": "dropboxdrivefs.DropboxDriveFileSystem", + "err": ( + 'DropboxFileSystem requires "dropboxdrivefs","requests" and "' + '"dropbox" to be installed' + ), + }, + "dvc": { + "class": "dvc.api.DVCFileSystem", + "err": "Install dvc to access DVCFileSystem", + }, + "file": {"class": "fsspec.implementations.local.LocalFileSystem"}, + "filecache": {"class": "fsspec.implementations.cached.WholeFileCacheFileSystem"}, + "ftp": {"class": "fsspec.implementations.ftp.FTPFileSystem"}, + "gcs": { + "class": "gcsfs.GCSFileSystem", + "err": "Please install gcsfs to access Google Storage", + }, + "gdrive": { + "class": "gdrivefs.GoogleDriveFileSystem", + "err": "Please install gdrivefs for access to Google Drive", + }, + "generic": {"class": "fsspec.generic.GenericFileSystem"}, + "git": { + "class": "fsspec.implementations.git.GitFileSystem", + "err": "Install pygit2 to browse local git repos", + }, + "github": { + "class": "fsspec.implementations.github.GithubFileSystem", + "err": "Install the requests package to use the github FS", + }, + "gs": { + "class": "gcsfs.GCSFileSystem", + "err": "Please install gcsfs to access Google Storage", + }, + "hdfs": { + "class": "fsspec.implementations.arrow.HadoopFileSystem", + "err": "pyarrow and local java libraries required for HDFS", + }, + "hf": { + "class": "huggingface_hub.HfFileSystem", + "err": "Install huggingface_hub to access HfFileSystem", + }, + "http": { + "class": "fsspec.implementations.http.HTTPFileSystem", + "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed', + }, + "https": { + "class": "fsspec.implementations.http.HTTPFileSystem", + "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed', + }, + "jlab": { + "class": "fsspec.implementations.jupyter.JupyterFileSystem", + "err": "Jupyter FS requires requests to be installed", + }, + "jupyter": { + "class": "fsspec.implementations.jupyter.JupyterFileSystem", + "err": "Jupyter FS requires requests to be installed", + }, + "lakefs": { + "class": "lakefs_spec.LakeFSFileSystem", + "err": "Please install lakefs-spec to access LakeFSFileSystem", + }, + "libarchive": { + "class": "fsspec.implementations.libarchive.LibArchiveFileSystem", + "err": "LibArchive requires to be installed", + }, + "local": {"class": "fsspec.implementations.local.LocalFileSystem"}, + "memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"}, + "oci": { + "class": "ocifs.OCIFileSystem", + "err": "Install ocifs to access OCI Object Storage", + }, + "ocilake": { + "class": "ocifs.OCIFileSystem", + "err": "Install ocifs to access OCI Data Lake", + }, + "oss": { + "class": "ossfs.OSSFileSystem", + "err": "Install ossfs to access Alibaba Object Storage System", + }, + "reference": {"class": "fsspec.implementations.reference.ReferenceFileSystem"}, + "root": { + "class": "fsspec_xrootd.XRootDFileSystem", + "err": ( + "Install fsspec-xrootd to access xrootd storage system. " + "Note: 'root' is the protocol name for xrootd storage systems, " + "not referring to root directories" + ), + }, + "s3": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"}, + "s3a": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"}, + "sftp": { + "class": "fsspec.implementations.sftp.SFTPFileSystem", + "err": 'SFTPFileSystem requires "paramiko" to be installed', + }, + "simplecache": {"class": "fsspec.implementations.cached.SimpleCacheFileSystem"}, + "smb": { + "class": "fsspec.implementations.smb.SMBFileSystem", + "err": 'SMB requires "smbprotocol" or "smbprotocol[kerberos]" installed', + }, + "ssh": { + "class": "fsspec.implementations.sftp.SFTPFileSystem", + "err": 'SFTPFileSystem requires "paramiko" to be installed', + }, + "tar": {"class": "fsspec.implementations.tar.TarFileSystem"}, + "tosfs": { + "class": "tosfs.TosFileSystem", + "err": "Install tosfs to access ByteDance volcano engine Tinder Object Storage", + }, + "wandb": {"class": "wandbfs.WandbFS", "err": "Install wandbfs to access wandb"}, + "webdav": { + "class": "webdav4.fsspec.WebdavFileSystem", + "err": "Install webdav4 to access WebDAV", + }, + "webhdfs": { + "class": "fsspec.implementations.webhdfs.WebHDFS", + "err": 'webHDFS access requires "requests" to be installed', + }, + "zip": {"class": "fsspec.implementations.zip.ZipFileSystem"}, +} + +assert list(known_implementations) == sorted( + known_implementations +), "Not in alphabetical order" + + +def get_filesystem_class(protocol): + """Fetch named protocol implementation from the registry + + The dict ``known_implementations`` maps protocol names to the locations + of classes implementing the corresponding file-system. When used for the + first time, appropriate imports will happen and the class will be placed in + the registry. All subsequent calls will fetch directly from the registry. + + Some protocol implementations require additional dependencies, and so the + import may fail. In this case, the string in the "err" field of the + ``known_implementations`` will be given as the error message. + """ + if not protocol: + protocol = default + + if protocol not in registry: + if protocol not in known_implementations: + raise ValueError(f"Protocol not known: {protocol}") + bit = known_implementations[protocol] + try: + register_implementation(protocol, _import_class(bit["class"])) + except ImportError as e: + raise ImportError(bit["err"]) from e + cls = registry[protocol] + if getattr(cls, "protocol", None) in ("abstract", None): + cls.protocol = protocol + + return cls + + +s3_msg = """Your installed version of s3fs is very old and known to cause +severe performance issues, see also https://github.com/dask/dask/issues/10276 + +To fix, you should specify a lower version bound on s3fs, or +update the current installation. +""" + + +def _import_class(fqp: str): + """Take a fully-qualified path and return the imported class or identifier. + + ``fqp`` is of the form "package.module.klass" or + "package.module:subobject.klass". + + Warnings + -------- + This can import arbitrary modules. Make sure you haven't installed any modules + that may execute malicious code at import time. + """ + if ":" in fqp: + mod, name = fqp.rsplit(":", 1) + else: + mod, name = fqp.rsplit(".", 1) + + is_s3 = mod == "s3fs" + mod = importlib.import_module(mod) + if is_s3 and mod.__version__.split(".") < ["0", "5"]: + warnings.warn(s3_msg) + for part in name.split("."): + mod = getattr(mod, part) + + if not isinstance(mod, type): + raise TypeError(f"{fqp} is not a class") + + return mod + + +def filesystem(protocol, **storage_options): + """Instantiate filesystems for given protocol and arguments + + ``storage_options`` are specific to the protocol being chosen, and are + passed directly to the class. + """ + if protocol == "arrow_hdfs": + warnings.warn( + "The 'arrow_hdfs' protocol has been deprecated and will be " + "removed in the future. Specify it as 'hdfs'.", + DeprecationWarning, + ) + + cls = get_filesystem_class(protocol) + return cls(**storage_options) + + +def available_protocols(): + """Return a list of the implemented protocols. + + Note that any given protocol may require extra packages to be importable. + """ + return list(known_implementations) diff --git a/meow/lib/python3.13/site-packages/fsspec/spec.py b/meow/lib/python3.13/site-packages/fsspec/spec.py new file mode 100644 index 0000000000000000000000000000000000000000..7daf850e72515801edc899e8bd3697da64542859 --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/spec.py @@ -0,0 +1,2242 @@ +from __future__ import annotations + +import io +import json +import logging +import os +import threading +import warnings +import weakref +from errno import ESPIPE +from glob import has_magic +from hashlib import sha256 +from typing import Any, ClassVar + +from .callbacks import DEFAULT_CALLBACK +from .config import apply_config, conf +from .dircache import DirCache +from .transaction import Transaction +from .utils import ( + _unstrip_protocol, + glob_translate, + isfilelike, + other_paths, + read_block, + stringify_path, + tokenize, +) + +logger = logging.getLogger("fsspec") + + +def make_instance(cls, args, kwargs): + return cls(*args, **kwargs) + + +class _Cached(type): + """ + Metaclass for caching file system instances. + + Notes + ----- + Instances are cached according to + + * The values of the class attributes listed in `_extra_tokenize_attributes` + * The arguments passed to ``__init__``. + + This creates an additional reference to the filesystem, which prevents the + filesystem from being garbage collected when all *user* references go away. + A call to the :meth:`AbstractFileSystem.clear_instance_cache` must *also* + be made for a filesystem instance to be garbage collected. + """ + + def __init__(cls, *args, **kwargs): + super().__init__(*args, **kwargs) + # Note: we intentionally create a reference here, to avoid garbage + # collecting instances when all other references are gone. To really + # delete a FileSystem, the cache must be cleared. + if conf.get("weakref_instance_cache"): # pragma: no cover + # debug option for analysing fork/spawn conditions + cls._cache = weakref.WeakValueDictionary() + else: + cls._cache = {} + cls._pid = os.getpid() + + def __call__(cls, *args, **kwargs): + kwargs = apply_config(cls, kwargs) + extra_tokens = tuple( + getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes + ) + token = tokenize( + cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs + ) + skip = kwargs.pop("skip_instance_cache", False) + if os.getpid() != cls._pid: + cls._cache.clear() + cls._pid = os.getpid() + if not skip and cls.cachable and token in cls._cache: + cls._latest = token + return cls._cache[token] + else: + obj = super().__call__(*args, **kwargs) + # Setting _fs_token here causes some static linters to complain. + obj._fs_token_ = token + obj.storage_args = args + obj.storage_options = kwargs + if obj.async_impl and obj.mirror_sync_methods: + from .asyn import mirror_sync_methods + + mirror_sync_methods(obj) + + if cls.cachable and not skip: + cls._latest = token + cls._cache[token] = obj + return obj + + +class AbstractFileSystem(metaclass=_Cached): + """ + An abstract super-class for pythonic file-systems + + Implementations are expected to be compatible with or, better, subclass + from here. + """ + + cachable = True # this class can be cached, instances reused + _cached = False + blocksize = 2**22 + sep = "/" + protocol: ClassVar[str | tuple[str, ...]] = "abstract" + _latest = None + async_impl = False + mirror_sync_methods = False + root_marker = "" # For some FSs, may require leading '/' or other character + transaction_type = Transaction + + #: Extra *class attributes* that should be considered when hashing. + _extra_tokenize_attributes = () + + # Set by _Cached metaclass + storage_args: tuple[Any, ...] + storage_options: dict[str, Any] + + def __init__(self, *args, **storage_options): + """Create and configure file-system instance + + Instances may be cachable, so if similar enough arguments are seen + a new instance is not required. The token attribute exists to allow + implementations to cache instances if they wish. + + A reasonable default should be provided if there are no arguments. + + Subclasses should call this method. + + Parameters + ---------- + use_listings_cache, listings_expiry_time, max_paths: + passed to ``DirCache``, if the implementation supports + directory listing caching. Pass use_listings_cache=False + to disable such caching. + skip_instance_cache: bool + If this is a cachable implementation, pass True here to force + creating a new instance even if a matching instance exists, and prevent + storing this instance. + asynchronous: bool + loop: asyncio-compatible IOLoop or None + """ + if self._cached: + # reusing instance, don't change + return + self._cached = True + self._intrans = False + self._transaction = None + self._invalidated_caches_in_transaction = [] + self.dircache = DirCache(**storage_options) + + if storage_options.pop("add_docs", None): + warnings.warn("add_docs is no longer supported.", FutureWarning) + + if storage_options.pop("add_aliases", None): + warnings.warn("add_aliases has been removed.", FutureWarning) + # This is set in _Cached + self._fs_token_ = None + + @property + def fsid(self): + """Persistent filesystem id that can be used to compare filesystems + across sessions. + """ + raise NotImplementedError + + @property + def _fs_token(self): + return self._fs_token_ + + def __dask_tokenize__(self): + return self._fs_token + + def __hash__(self): + return int(self._fs_token, 16) + + def __eq__(self, other): + return isinstance(other, type(self)) and self._fs_token == other._fs_token + + def __reduce__(self): + return make_instance, (type(self), self.storage_args, self.storage_options) + + @classmethod + def _strip_protocol(cls, path): + """Turn path from fully-qualified to file-system-specific + + May require FS-specific handling, e.g., for relative paths or links. + """ + if isinstance(path, list): + return [cls._strip_protocol(p) for p in path] + path = stringify_path(path) + protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol + for protocol in protos: + if path.startswith(protocol + "://"): + path = path[len(protocol) + 3 :] + elif path.startswith(protocol + "::"): + path = path[len(protocol) + 2 :] + path = path.rstrip("/") + # use of root_marker to make minimum required path, e.g., "/" + return path or cls.root_marker + + def unstrip_protocol(self, name: str) -> str: + """Format FS-specific path to generic, including protocol""" + protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol + for protocol in protos: + if name.startswith(f"{protocol}://"): + return name + return f"{protos[0]}://{name}" + + @staticmethod + def _get_kwargs_from_urls(path): + """If kwargs can be encoded in the paths, extract them here + + This should happen before instantiation of the class; incoming paths + then should be amended to strip the options in methods. + + Examples may look like an sftp path "sftp://user@host:/my/path", where + the user and host should become kwargs and later get stripped. + """ + # by default, nothing happens + return {} + + @classmethod + def current(cls): + """Return the most recently instantiated FileSystem + + If no instance has been created, then create one with defaults + """ + if cls._latest in cls._cache: + return cls._cache[cls._latest] + return cls() + + @property + def transaction(self): + """A context within which files are committed together upon exit + + Requires the file class to implement `.commit()` and `.discard()` + for the normal and exception cases. + """ + if self._transaction is None: + self._transaction = self.transaction_type(self) + return self._transaction + + def start_transaction(self): + """Begin write transaction for deferring files, non-context version""" + self._intrans = True + self._transaction = self.transaction_type(self) + return self.transaction + + def end_transaction(self): + """Finish write transaction, non-context version""" + self.transaction.complete() + self._transaction = None + # The invalid cache must be cleared after the transaction is completed. + for path in self._invalidated_caches_in_transaction: + self.invalidate_cache(path) + self._invalidated_caches_in_transaction.clear() + + def invalidate_cache(self, path=None): + """ + Discard any cached directory information + + Parameters + ---------- + path: string or None + If None, clear all listings cached else listings at or under given + path. + """ + # Not necessary to implement invalidation mechanism, may have no cache. + # But if have, you should call this method of parent class from your + # subclass to ensure expiring caches after transacations correctly. + # See the implementation of FTPFileSystem in ftp.py + if self._intrans: + self._invalidated_caches_in_transaction.append(path) + + def mkdir(self, path, create_parents=True, **kwargs): + """ + Create directory entry at path + + For systems that don't have true directories, may create an for + this instance only and not touch the real filesystem + + Parameters + ---------- + path: str + location + create_parents: bool + if True, this is equivalent to ``makedirs`` + kwargs: + may be permissions, etc. + """ + pass # not necessary to implement, may not have directories + + def makedirs(self, path, exist_ok=False): + """Recursively make directories + + Creates directory at path and any intervening required directories. + Raises exception if, for instance, the path already exists but is a + file. + + Parameters + ---------- + path: str + leaf directory name + exist_ok: bool (False) + If False, will error if the target already exists + """ + pass # not necessary to implement, may not have directories + + def rmdir(self, path): + """Remove a directory, if empty""" + pass # not necessary to implement, may not have directories + + def ls(self, path, detail=True, **kwargs): + """List objects at path. + + This should include subdirectories and files at that location. The + difference between a file and a directory must be clear when details + are requested. + + The specific keys, or perhaps a FileInfo class, or similar, is TBD, + but must be consistent across implementations. + Must include: + + - full path to the entry (without protocol) + - size of the entry, in bytes. If the value cannot be determined, will + be ``None``. + - type of entry, "file", "directory" or other + + Additional information + may be present, appropriate to the file-system, e.g., generation, + checksum, etc. + + May use refresh=True|False to allow use of self._ls_from_cache to + check for a saved listing and avoid calling the backend. This would be + common where listing may be expensive. + + Parameters + ---------- + path: str + detail: bool + if True, gives a list of dictionaries, where each is the same as + the result of ``info(path)``. If False, gives a list of paths + (str). + kwargs: may have additional backend-specific options, such as version + information + + Returns + ------- + List of strings if detail is False, or list of directory information + dicts if detail is True. + """ + raise NotImplementedError + + def _ls_from_cache(self, path): + """Check cache for listing + + Returns listing, if found (may be empty list for a directly that exists + but contains nothing), None if not in cache. + """ + parent = self._parent(path) + try: + return self.dircache[path.rstrip("/")] + except KeyError: + pass + try: + files = [ + f + for f in self.dircache[parent] + if f["name"] == path + or (f["name"] == path.rstrip("/") and f["type"] == "directory") + ] + if len(files) == 0: + # parent dir was listed but did not contain this file + raise FileNotFoundError(path) + return files + except KeyError: + pass + + def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs): + """Return all files belows path + + List all files, recursing into subdirectories; output is iterator-style, + like ``os.walk()``. For a simple list of files, ``find()`` is available. + + When topdown is True, the caller can modify the dirnames list in-place (perhaps + using del or slice assignment), and walk() will + only recurse into the subdirectories whose names remain in dirnames; + this can be used to prune the search, impose a specific order of visiting, + or even to inform walk() about directories the caller creates or renames before + it resumes walk() again. + Modifying dirnames when topdown is False has no effect. (see os.walk) + + Note that the "files" outputted will include anything that is not + a directory, such as links. + + Parameters + ---------- + path: str + Root to recurse into + maxdepth: int + Maximum recursion depth. None means limitless, but not recommended + on link-based file-systems. + topdown: bool (True) + Whether to walk the directory tree from the top downwards or from + the bottom upwards. + on_error: "omit", "raise", a callable + if omit (default), path with exception will simply be empty; + If raise, an underlying exception will be raised; + if callable, it will be called with a single OSError instance as argument + kwargs: passed to ``ls`` + """ + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") + + path = self._strip_protocol(path) + full_dirs = {} + dirs = {} + files = {} + + detail = kwargs.pop("detail", False) + try: + listing = self.ls(path, detail=True, **kwargs) + except (FileNotFoundError, OSError) as e: + if on_error == "raise": + raise + if callable(on_error): + on_error(e) + return + + for info in listing: + # each info name must be at least [path]/part , but here + # we check also for names like [path]/part/ + pathname = info["name"].rstrip("/") + name = pathname.rsplit("/", 1)[-1] + if info["type"] == "directory" and pathname != path: + # do not include "self" path + full_dirs[name] = pathname + dirs[name] = info + elif pathname == path: + # file-like with same name as give path + files[""] = info + else: + files[name] = info + + if not detail: + dirs = list(dirs) + files = list(files) + + if topdown: + # Yield before recursion if walking top down + yield path, dirs, files + + if maxdepth is not None: + maxdepth -= 1 + if maxdepth < 1: + if not topdown: + yield path, dirs, files + return + + for d in dirs: + yield from self.walk( + full_dirs[d], + maxdepth=maxdepth, + detail=detail, + topdown=topdown, + **kwargs, + ) + + if not topdown: + # Yield after recursion if walking bottom up + yield path, dirs, files + + def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs): + """List all files below path. + + Like posix ``find`` command without conditions + + Parameters + ---------- + path : str + maxdepth: int or None + If not None, the maximum number of levels to descend + withdirs: bool + Whether to include directory paths in the output. This is True + when used by glob, but users usually only want files. + kwargs are passed to ``ls``. + """ + # TODO: allow equivalent of -name parameter + path = self._strip_protocol(path) + out = {} + + # Add the root directory if withdirs is requested + # This is needed for posix glob compliance + if withdirs and path != "" and self.isdir(path): + out[path] = self.info(path) + + for _, dirs, files in self.walk(path, maxdepth, detail=True, **kwargs): + if withdirs: + files.update(dirs) + out.update({info["name"]: info for name, info in files.items()}) + if not out and self.isfile(path): + # walk works on directories, but find should also return [path] + # when path happens to be a file + out[path] = {} + names = sorted(out) + if not detail: + return names + else: + return {name: out[name] for name in names} + + def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs): + """Space used by files and optionally directories within a path + + Directory size does not include the size of its contents. + + Parameters + ---------- + path: str + total: bool + Whether to sum all the file sizes + maxdepth: int or None + Maximum number of directory levels to descend, None for unlimited. + withdirs: bool + Whether to include directory paths in the output. + kwargs: passed to ``find`` + + Returns + ------- + Dict of {path: size} if total=False, or int otherwise, where numbers + refer to bytes used. + """ + sizes = {} + if withdirs and self.isdir(path): + # Include top-level directory in output + info = self.info(path) + sizes[info["name"]] = info["size"] + for f in self.find(path, maxdepth=maxdepth, withdirs=withdirs, **kwargs): + info = self.info(f) + sizes[info["name"]] = info["size"] + if total: + return sum(sizes.values()) + else: + return sizes + + def glob(self, path, maxdepth=None, **kwargs): + """ + Find files by glob-matching. + + If the path ends with '/', only folders are returned. + + We support ``"**"``, + ``"?"`` and ``"[..]"``. We do not support ^ for pattern negation. + + The `maxdepth` option is applied on the first `**` found in the path. + + kwargs are passed to ``ls``. + """ + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") + + import re + + seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,) + ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash + path = self._strip_protocol(path) + append_slash_to_dirname = ends_with_sep or path.endswith( + tuple(sep + "**" for sep in seps) + ) + idx_star = path.find("*") if path.find("*") >= 0 else len(path) + idx_qmark = path.find("?") if path.find("?") >= 0 else len(path) + idx_brace = path.find("[") if path.find("[") >= 0 else len(path) + + min_idx = min(idx_star, idx_qmark, idx_brace) + + detail = kwargs.pop("detail", False) + + if not has_magic(path): + if self.exists(path, **kwargs): + if not detail: + return [path] + else: + return {path: self.info(path, **kwargs)} + else: + if not detail: + return [] # glob of non-existent returns empty + else: + return {} + elif "/" in path[:min_idx]: + min_idx = path[:min_idx].rindex("/") + root = path[: min_idx + 1] + depth = path[min_idx + 1 :].count("/") + 1 + else: + root = "" + depth = path[min_idx + 1 :].count("/") + 1 + + if "**" in path: + if maxdepth is not None: + idx_double_stars = path.find("**") + depth_double_stars = path[idx_double_stars:].count("/") + 1 + depth = depth - depth_double_stars + maxdepth + else: + depth = None + + allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs) + + pattern = glob_translate(path + ("/" if ends_with_sep else "")) + pattern = re.compile(pattern) + + out = { + p: info + for p, info in sorted(allpaths.items()) + if pattern.match( + p + "/" + if append_slash_to_dirname and info["type"] == "directory" + else p + ) + } + + if detail: + return out + else: + return list(out) + + def exists(self, path, **kwargs): + """Is there a file at the given path""" + try: + self.info(path, **kwargs) + return True + except: # noqa: E722 + # any exception allowed bar FileNotFoundError? + return False + + def lexists(self, path, **kwargs): + """If there is a file at the given path (including + broken links)""" + return self.exists(path) + + def info(self, path, **kwargs): + """Give details of entry at path + + Returns a single dictionary, with exactly the same information as ``ls`` + would with ``detail=True``. + + The default implementation calls ls and could be overridden by a + shortcut. kwargs are passed on to ```ls()``. + + Some file systems might not be able to measure the file's size, in + which case, the returned dict will include ``'size': None``. + + Returns + ------- + dict with keys: name (full path in the FS), size (in bytes), type (file, + directory, or something else) and other FS-specific keys. + """ + path = self._strip_protocol(path) + out = self.ls(self._parent(path), detail=True, **kwargs) + out = [o for o in out if o["name"].rstrip("/") == path] + if out: + return out[0] + out = self.ls(path, detail=True, **kwargs) + path = path.rstrip("/") + out1 = [o for o in out if o["name"].rstrip("/") == path] + if len(out1) == 1: + if "size" not in out1[0]: + out1[0]["size"] = None + return out1[0] + elif len(out1) > 1 or out: + return {"name": path, "size": 0, "type": "directory"} + else: + raise FileNotFoundError(path) + + def checksum(self, path): + """Unique value for current version of file + + If the checksum is the same from one moment to another, the contents + are guaranteed to be the same. If the checksum changes, the contents + *might* have changed. + + This should normally be overridden; default will probably capture + creation/modification timestamp (which would be good) or maybe + access timestamp (which would be bad) + """ + return int(tokenize(self.info(path)), 16) + + def size(self, path): + """Size in bytes of file""" + return self.info(path).get("size", None) + + def sizes(self, paths): + """Size in bytes of each file in a list of paths""" + return [self.size(p) for p in paths] + + def isdir(self, path): + """Is this entry directory-like?""" + try: + return self.info(path)["type"] == "directory" + except OSError: + return False + + def isfile(self, path): + """Is this entry file-like?""" + try: + return self.info(path)["type"] == "file" + except: # noqa: E722 + return False + + def read_text(self, path, encoding=None, errors=None, newline=None, **kwargs): + """Get the contents of the file as a string. + + Parameters + ---------- + path: str + URL of file on this filesystems + encoding, errors, newline: same as `open`. + """ + with self.open( + path, + mode="r", + encoding=encoding, + errors=errors, + newline=newline, + **kwargs, + ) as f: + return f.read() + + def write_text( + self, path, value, encoding=None, errors=None, newline=None, **kwargs + ): + """Write the text to the given file. + + An existing file will be overwritten. + + Parameters + ---------- + path: str + URL of file on this filesystems + value: str + Text to write. + encoding, errors, newline: same as `open`. + """ + with self.open( + path, + mode="w", + encoding=encoding, + errors=errors, + newline=newline, + **kwargs, + ) as f: + return f.write(value) + + def cat_file(self, path, start=None, end=None, **kwargs): + """Get the content of a file + + Parameters + ---------- + path: URL of file on this filesystems + start, end: int + Bytes limits of the read. If negative, backwards from end, + like usual python slices. Either can be None for start or + end of file, respectively + kwargs: passed to ``open()``. + """ + # explicitly set buffering off? + with self.open(path, "rb", **kwargs) as f: + if start is not None: + if start >= 0: + f.seek(start) + else: + f.seek(max(0, f.size + start)) + if end is not None: + if end < 0: + end = f.size + end + return f.read(end - f.tell()) + return f.read() + + def pipe_file(self, path, value, mode="overwrite", **kwargs): + """Set the bytes of given file""" + if mode == "create" and self.exists(path): + # non-atomic but simple way; or could use "xb" in open(), which is likely + # not as well supported + raise FileExistsError + with self.open(path, "wb", **kwargs) as f: + f.write(value) + + def pipe(self, path, value=None, **kwargs): + """Put value into path + + (counterpart to ``cat``) + + Parameters + ---------- + path: string or dict(str, bytes) + If a string, a single remote location to put ``value`` bytes; if a dict, + a mapping of {path: bytesvalue}. + value: bytes, optional + If using a single path, these are the bytes to put there. Ignored if + ``path`` is a dict + """ + if isinstance(path, str): + self.pipe_file(self._strip_protocol(path), value, **kwargs) + elif isinstance(path, dict): + for k, v in path.items(): + self.pipe_file(self._strip_protocol(k), v, **kwargs) + else: + raise ValueError("path must be str or dict") + + def cat_ranges( + self, paths, starts, ends, max_gap=None, on_error="return", **kwargs + ): + """Get the contents of byte ranges from one or more files + + Parameters + ---------- + paths: list + A list of of filepaths on this filesystems + starts, ends: int or list + Bytes limits of the read. If using a single int, the same value will be + used to read all the specified files. + """ + if max_gap is not None: + raise NotImplementedError + if not isinstance(paths, list): + raise TypeError + if not isinstance(starts, list): + starts = [starts] * len(paths) + if not isinstance(ends, list): + ends = [ends] * len(paths) + if len(starts) != len(paths) or len(ends) != len(paths): + raise ValueError + out = [] + for p, s, e in zip(paths, starts, ends): + try: + out.append(self.cat_file(p, s, e)) + except Exception as e: + if on_error == "return": + out.append(e) + else: + raise + return out + + def cat(self, path, recursive=False, on_error="raise", **kwargs): + """Fetch (potentially multiple) paths' contents + + Parameters + ---------- + recursive: bool + If True, assume the path(s) are directories, and get all the + contained files + on_error : "raise", "omit", "return" + If raise, an underlying exception will be raised (converted to KeyError + if the type is in self.missing_exceptions); if omit, keys with exception + will simply not be included in the output; if "return", all keys are + included in the output, but the value will be bytes or an exception + instance. + kwargs: passed to cat_file + + Returns + ------- + dict of {path: contents} if there are multiple paths + or the path has been otherwise expanded + """ + paths = self.expand_path(path, recursive=recursive) + if ( + len(paths) > 1 + or isinstance(path, list) + or paths[0] != self._strip_protocol(path) + ): + out = {} + for path in paths: + try: + out[path] = self.cat_file(path, **kwargs) + except Exception as e: + if on_error == "raise": + raise + if on_error == "return": + out[path] = e + return out + else: + return self.cat_file(paths[0], **kwargs) + + def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, outfile=None, **kwargs): + """Copy single remote file to local""" + from .implementations.local import LocalFileSystem + + if isfilelike(lpath): + outfile = lpath + elif self.isdir(rpath): + os.makedirs(lpath, exist_ok=True) + return None + + fs = LocalFileSystem(auto_mkdir=True) + fs.makedirs(fs._parent(lpath), exist_ok=True) + + with self.open(rpath, "rb", **kwargs) as f1: + if outfile is None: + outfile = open(lpath, "wb") + + try: + callback.set_size(getattr(f1, "size", None)) + data = True + while data: + data = f1.read(self.blocksize) + segment_len = outfile.write(data) + if segment_len is None: + segment_len = len(data) + callback.relative_update(segment_len) + finally: + if not isfilelike(lpath): + outfile.close() + + def get( + self, + rpath, + lpath, + recursive=False, + callback=DEFAULT_CALLBACK, + maxdepth=None, + **kwargs, + ): + """Copy file(s) to local. + + Copies a specific file or tree of files (if recursive=True). If lpath + ends with a "/", it will be assumed to be a directory, and target files + will go within. Can submit a list of paths, which may be glob-patterns + and will be expanded. + + Calls get_file for each source. + """ + if isinstance(lpath, list) and isinstance(rpath, list): + # No need to expand paths when both source and destination + # are provided as lists + rpaths = rpath + lpaths = lpath + else: + from .implementations.local import ( + LocalFileSystem, + make_path_posix, + trailing_sep, + ) + + source_is_str = isinstance(rpath, str) + rpaths = self.expand_path(rpath, recursive=recursive, maxdepth=maxdepth) + if source_is_str and (not recursive or maxdepth is not None): + # Non-recursive glob does not copy directories + rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))] + if not rpaths: + return + + if isinstance(lpath, str): + lpath = make_path_posix(lpath) + + source_is_file = len(rpaths) == 1 + dest_is_dir = isinstance(lpath, str) and ( + trailing_sep(lpath) or LocalFileSystem().isdir(lpath) + ) + + exists = source_is_str and ( + (has_magic(rpath) and source_is_file) + or (not has_magic(rpath) and dest_is_dir and not trailing_sep(rpath)) + ) + lpaths = other_paths( + rpaths, + lpath, + exists=exists, + flatten=not source_is_str, + ) + + callback.set_size(len(lpaths)) + for lpath, rpath in callback.wrap(zip(lpaths, rpaths)): + with callback.branched(rpath, lpath) as child: + self.get_file(rpath, lpath, callback=child, **kwargs) + + def put_file( + self, lpath, rpath, callback=DEFAULT_CALLBACK, mode="overwrite", **kwargs + ): + """Copy single file to remote""" + if mode == "create" and self.exists(rpath): + raise FileExistsError + if os.path.isdir(lpath): + self.makedirs(rpath, exist_ok=True) + return None + + with open(lpath, "rb") as f1: + size = f1.seek(0, 2) + callback.set_size(size) + f1.seek(0) + + self.mkdirs(self._parent(os.fspath(rpath)), exist_ok=True) + with self.open(rpath, "wb", **kwargs) as f2: + while f1.tell() < size: + data = f1.read(self.blocksize) + segment_len = f2.write(data) + if segment_len is None: + segment_len = len(data) + callback.relative_update(segment_len) + + def put( + self, + lpath, + rpath, + recursive=False, + callback=DEFAULT_CALLBACK, + maxdepth=None, + **kwargs, + ): + """Copy file(s) from local. + + Copies a specific file or tree of files (if recursive=True). If rpath + ends with a "/", it will be assumed to be a directory, and target files + will go within. + + Calls put_file for each source. + """ + if isinstance(lpath, list) and isinstance(rpath, list): + # No need to expand paths when both source and destination + # are provided as lists + rpaths = rpath + lpaths = lpath + else: + from .implementations.local import ( + LocalFileSystem, + make_path_posix, + trailing_sep, + ) + + source_is_str = isinstance(lpath, str) + if source_is_str: + lpath = make_path_posix(lpath) + fs = LocalFileSystem() + lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth) + if source_is_str and (not recursive or maxdepth is not None): + # Non-recursive glob does not copy directories + lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))] + if not lpaths: + return + + source_is_file = len(lpaths) == 1 + dest_is_dir = isinstance(rpath, str) and ( + trailing_sep(rpath) or self.isdir(rpath) + ) + + rpath = ( + self._strip_protocol(rpath) + if isinstance(rpath, str) + else [self._strip_protocol(p) for p in rpath] + ) + exists = source_is_str and ( + (has_magic(lpath) and source_is_file) + or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath)) + ) + rpaths = other_paths( + lpaths, + rpath, + exists=exists, + flatten=not source_is_str, + ) + + callback.set_size(len(rpaths)) + for lpath, rpath in callback.wrap(zip(lpaths, rpaths)): + with callback.branched(lpath, rpath) as child: + self.put_file(lpath, rpath, callback=child, **kwargs) + + def head(self, path, size=1024): + """Get the first ``size`` bytes from file""" + with self.open(path, "rb") as f: + return f.read(size) + + def tail(self, path, size=1024): + """Get the last ``size`` bytes from file""" + with self.open(path, "rb") as f: + f.seek(max(-size, -f.size), 2) + return f.read() + + def cp_file(self, path1, path2, **kwargs): + raise NotImplementedError + + def copy( + self, path1, path2, recursive=False, maxdepth=None, on_error=None, **kwargs + ): + """Copy within two locations in the filesystem + + on_error : "raise", "ignore" + If raise, any not-found exceptions will be raised; if ignore any + not-found exceptions will cause the path to be skipped; defaults to + raise unless recursive is true, where the default is ignore + """ + if on_error is None and recursive: + on_error = "ignore" + elif on_error is None: + on_error = "raise" + + if isinstance(path1, list) and isinstance(path2, list): + # No need to expand paths when both source and destination + # are provided as lists + paths1 = path1 + paths2 = path2 + else: + from .implementations.local import trailing_sep + + source_is_str = isinstance(path1, str) + paths1 = self.expand_path(path1, recursive=recursive, maxdepth=maxdepth) + if source_is_str and (not recursive or maxdepth is not None): + # Non-recursive glob does not copy directories + paths1 = [p for p in paths1 if not (trailing_sep(p) or self.isdir(p))] + if not paths1: + return + + source_is_file = len(paths1) == 1 + dest_is_dir = isinstance(path2, str) and ( + trailing_sep(path2) or self.isdir(path2) + ) + + exists = source_is_str and ( + (has_magic(path1) and source_is_file) + or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1)) + ) + paths2 = other_paths( + paths1, + path2, + exists=exists, + flatten=not source_is_str, + ) + + for p1, p2 in zip(paths1, paths2): + try: + self.cp_file(p1, p2, **kwargs) + except FileNotFoundError: + if on_error == "raise": + raise + + def expand_path(self, path, recursive=False, maxdepth=None, **kwargs): + """Turn one or more globs or directories into a list of all matching paths + to files or directories. + + kwargs are passed to ``glob`` or ``find``, which may in turn call ``ls`` + """ + + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") + + if isinstance(path, (str, os.PathLike)): + out = self.expand_path([path], recursive, maxdepth) + else: + out = set() + path = [self._strip_protocol(p) for p in path] + for p in path: + if has_magic(p): + bit = set(self.glob(p, maxdepth=maxdepth, **kwargs)) + out |= bit + if recursive: + # glob call above expanded one depth so if maxdepth is defined + # then decrement it in expand_path call below. If it is zero + # after decrementing then avoid expand_path call. + if maxdepth is not None and maxdepth <= 1: + continue + out |= set( + self.expand_path( + list(bit), + recursive=recursive, + maxdepth=maxdepth - 1 if maxdepth is not None else None, + **kwargs, + ) + ) + continue + elif recursive: + rec = set( + self.find( + p, maxdepth=maxdepth, withdirs=True, detail=False, **kwargs + ) + ) + out |= rec + if p not in out and (recursive is False or self.exists(p)): + # should only check once, for the root + out.add(p) + if not out: + raise FileNotFoundError(path) + return sorted(out) + + def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs): + """Move file(s) from one location to another""" + if path1 == path2: + logger.debug("%s mv: The paths are the same, so no files were moved.", self) + else: + # explicitly raise exception to prevent data corruption + self.copy( + path1, path2, recursive=recursive, maxdepth=maxdepth, onerror="raise" + ) + self.rm(path1, recursive=recursive) + + def rm_file(self, path): + """Delete a file""" + self._rm(path) + + def _rm(self, path): + """Delete one file""" + # this is the old name for the method, prefer rm_file + raise NotImplementedError + + def rm(self, path, recursive=False, maxdepth=None): + """Delete files. + + Parameters + ---------- + path: str or list of str + File(s) to delete. + recursive: bool + If file(s) are directories, recursively delete contents and then + also remove the directory + maxdepth: int or None + Depth to pass to walk for finding files to delete, if recursive. + If None, there will be no limit and infinite recursion may be + possible. + """ + path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth) + for p in reversed(path): + self.rm_file(p) + + @classmethod + def _parent(cls, path): + path = cls._strip_protocol(path) + if "/" in path: + parent = path.rsplit("/", 1)[0].lstrip(cls.root_marker) + return cls.root_marker + parent + else: + return cls.root_marker + + def _open( + self, + path, + mode="rb", + block_size=None, + autocommit=True, + cache_options=None, + **kwargs, + ): + """Return raw bytes-mode file-like from the file-system""" + return AbstractBufferedFile( + self, + path, + mode, + block_size, + autocommit, + cache_options=cache_options, + **kwargs, + ) + + def open( + self, + path, + mode="rb", + block_size=None, + cache_options=None, + compression=None, + **kwargs, + ): + """ + Return a file-like object from the filesystem + + The resultant instance must function correctly in a context ``with`` + block. + + Parameters + ---------- + path: str + Target file + mode: str like 'rb', 'w' + See builtin ``open()`` + Mode "x" (exclusive write) may be implemented by the backend. Even if + it is, whether it is checked up front or on commit, and whether it is + atomic is implementation-dependent. + block_size: int + Some indication of buffering - this is a value in bytes + cache_options : dict, optional + Extra arguments to pass through to the cache. + compression: string or None + If given, open file using compression codec. Can either be a compression + name (a key in ``fsspec.compression.compr``) or "infer" to guess the + compression from the filename suffix. + encoding, errors, newline: passed on to TextIOWrapper for text mode + """ + import io + + path = self._strip_protocol(path) + if "b" not in mode: + mode = mode.replace("t", "") + "b" + + text_kwargs = { + k: kwargs.pop(k) + for k in ["encoding", "errors", "newline"] + if k in kwargs + } + return io.TextIOWrapper( + self.open( + path, + mode, + block_size=block_size, + cache_options=cache_options, + compression=compression, + **kwargs, + ), + **text_kwargs, + ) + else: + ac = kwargs.pop("autocommit", not self._intrans) + f = self._open( + path, + mode=mode, + block_size=block_size, + autocommit=ac, + cache_options=cache_options, + **kwargs, + ) + if compression is not None: + from fsspec.compression import compr + from fsspec.core import get_compression + + compression = get_compression(path, compression) + compress = compr[compression] + f = compress(f, mode=mode[0]) + + if not ac and "r" not in mode: + self.transaction.files.append(f) + return f + + def touch(self, path, truncate=True, **kwargs): + """Create empty file, or update timestamp + + Parameters + ---------- + path: str + file location + truncate: bool + If True, always set file size to 0; if False, update timestamp and + leave file unchanged, if backend allows this + """ + if truncate or not self.exists(path): + with self.open(path, "wb", **kwargs): + pass + else: + raise NotImplementedError # update timestamp, if possible + + def ukey(self, path): + """Hash of file properties, to tell if it has changed""" + return sha256(str(self.info(path)).encode()).hexdigest() + + def read_block(self, fn, offset, length, delimiter=None): + """Read a block of bytes from + + Starting at ``offset`` of the file, read ``length`` bytes. If + ``delimiter`` is set then we ensure that the read starts and stops at + delimiter boundaries that follow the locations ``offset`` and ``offset + + length``. If ``offset`` is zero then we start at zero. The + bytestring returned WILL include the end delimiter string. + + If offset+length is beyond the eof, reads to eof. + + Parameters + ---------- + fn: string + Path to filename + offset: int + Byte offset to start read + length: int + Number of bytes to read. If None, read to end. + delimiter: bytes (optional) + Ensure reading starts and stops at delimiter bytestring + + Examples + -------- + >>> fs.read_block('data/file.csv', 0, 13) # doctest: +SKIP + b'Alice, 100\\nBo' + >>> fs.read_block('data/file.csv', 0, 13, delimiter=b'\\n') # doctest: +SKIP + b'Alice, 100\\nBob, 200\\n' + + Use ``length=None`` to read to the end of the file. + >>> fs.read_block('data/file.csv', 0, None, delimiter=b'\\n') # doctest: +SKIP + b'Alice, 100\\nBob, 200\\nCharlie, 300' + + See Also + -------- + :func:`fsspec.utils.read_block` + """ + with self.open(fn, "rb") as f: + size = f.size + if length is None: + length = size + if size is not None and offset + length > size: + length = size - offset + return read_block(f, offset, length, delimiter) + + def to_json(self, *, include_password: bool = True) -> str: + """ + JSON representation of this filesystem instance. + + Parameters + ---------- + include_password: bool, default True + Whether to include the password (if any) in the output. + + Returns + ------- + JSON string with keys ``cls`` (the python location of this class), + protocol (text name of this class's protocol, first one in case of + multiple), ``args`` (positional args, usually empty), and all other + keyword arguments as their own keys. + + Warnings + -------- + Serialized filesystems may contain sensitive information which have been + passed to the constructor, such as passwords and tokens. Make sure you + store and send them in a secure environment! + """ + from .json import FilesystemJSONEncoder + + return json.dumps( + self, + cls=type( + "_FilesystemJSONEncoder", + (FilesystemJSONEncoder,), + {"include_password": include_password}, + ), + ) + + @staticmethod + def from_json(blob: str) -> AbstractFileSystem: + """ + Recreate a filesystem instance from JSON representation. + + See ``.to_json()`` for the expected structure of the input. + + Parameters + ---------- + blob: str + + Returns + ------- + file system instance, not necessarily of this particular class. + + Warnings + -------- + This can import arbitrary modules (as determined by the ``cls`` key). + Make sure you haven't installed any modules that may execute malicious code + at import time. + """ + from .json import FilesystemJSONDecoder + + return json.loads(blob, cls=FilesystemJSONDecoder) + + def to_dict(self, *, include_password: bool = True) -> dict[str, Any]: + """ + JSON-serializable dictionary representation of this filesystem instance. + + Parameters + ---------- + include_password: bool, default True + Whether to include the password (if any) in the output. + + Returns + ------- + Dictionary with keys ``cls`` (the python location of this class), + protocol (text name of this class's protocol, first one in case of + multiple), ``args`` (positional args, usually empty), and all other + keyword arguments as their own keys. + + Warnings + -------- + Serialized filesystems may contain sensitive information which have been + passed to the constructor, such as passwords and tokens. Make sure you + store and send them in a secure environment! + """ + from .json import FilesystemJSONEncoder + + json_encoder = FilesystemJSONEncoder() + + cls = type(self) + proto = self.protocol + + storage_options = dict(self.storage_options) + if not include_password: + storage_options.pop("password", None) + + return dict( + cls=f"{cls.__module__}:{cls.__name__}", + protocol=proto[0] if isinstance(proto, (tuple, list)) else proto, + args=json_encoder.make_serializable(self.storage_args), + **json_encoder.make_serializable(storage_options), + ) + + @staticmethod + def from_dict(dct: dict[str, Any]) -> AbstractFileSystem: + """ + Recreate a filesystem instance from dictionary representation. + + See ``.to_dict()`` for the expected structure of the input. + + Parameters + ---------- + dct: Dict[str, Any] + + Returns + ------- + file system instance, not necessarily of this particular class. + + Warnings + -------- + This can import arbitrary modules (as determined by the ``cls`` key). + Make sure you haven't installed any modules that may execute malicious code + at import time. + """ + from .json import FilesystemJSONDecoder + + json_decoder = FilesystemJSONDecoder() + + dct = dict(dct) # Defensive copy + + cls = FilesystemJSONDecoder.try_resolve_fs_cls(dct) + if cls is None: + raise ValueError("Not a serialized AbstractFileSystem") + + dct.pop("cls", None) + dct.pop("protocol", None) + + return cls( + *json_decoder.unmake_serializable(dct.pop("args", ())), + **json_decoder.unmake_serializable(dct), + ) + + def _get_pyarrow_filesystem(self): + """ + Make a version of the FS instance which will be acceptable to pyarrow + """ + # all instances already also derive from pyarrow + return self + + def get_mapper(self, root="", check=False, create=False, missing_exceptions=None): + """Create key/value store based on this file-system + + Makes a MutableMapping interface to the FS at the given root path. + See ``fsspec.mapping.FSMap`` for further details. + """ + from .mapping import FSMap + + return FSMap( + root, + self, + check=check, + create=create, + missing_exceptions=missing_exceptions, + ) + + @classmethod + def clear_instance_cache(cls): + """ + Clear the cache of filesystem instances. + + Notes + ----- + Unless overridden by setting the ``cachable`` class attribute to False, + the filesystem class stores a reference to newly created instances. This + prevents Python's normal rules around garbage collection from working, + since the instances refcount will not drop to zero until + ``clear_instance_cache`` is called. + """ + cls._cache.clear() + + def created(self, path): + """Return the created timestamp of a file as a datetime.datetime""" + raise NotImplementedError + + def modified(self, path): + """Return the modified timestamp of a file as a datetime.datetime""" + raise NotImplementedError + + def tree( + self, + path: str = "/", + recursion_limit: int = 2, + max_display: int = 25, + display_size: bool = False, + prefix: str = "", + is_last: bool = True, + first: bool = True, + indent_size: int = 4, + ) -> str: + """ + Return a tree-like structure of the filesystem starting from the given path as a string. + + Parameters + ---------- + path: Root path to start traversal from + recursion_limit: Maximum depth of directory traversal + max_display: Maximum number of items to display per directory + display_size: Whether to display file sizes + prefix: Current line prefix for visual tree structure + is_last: Whether current item is last in its level + first: Whether this is the first call (displays root path) + indent_size: Number of spaces by indent + + Returns + ------- + str: A string representing the tree structure. + + Example + ------- + >>> from fsspec import filesystem + + >>> fs = filesystem('ftp', host='test.rebex.net', user='demo', password='password') + >>> tree = fs.tree(display_size=True, recursion_limit=3, indent_size=8, max_display=10) + >>> print(tree) + """ + + def format_bytes(n: int) -> str: + """Format bytes as text.""" + for prefix, k in ( + ("P", 2**50), + ("T", 2**40), + ("G", 2**30), + ("M", 2**20), + ("k", 2**10), + ): + if n >= 0.9 * k: + return f"{n / k:.2f} {prefix}b" + return f"{n}B" + + result = [] + + if first: + result.append(path) + + if recursion_limit: + indent = " " * indent_size + contents = self.ls(path, detail=True) + contents.sort( + key=lambda x: (x.get("type") != "directory", x.get("name", "")) + ) + + if max_display is not None and len(contents) > max_display: + displayed_contents = contents[:max_display] + remaining_count = len(contents) - max_display + else: + displayed_contents = contents + remaining_count = 0 + + for i, item in enumerate(displayed_contents): + is_last_item = (i == len(displayed_contents) - 1) and ( + remaining_count == 0 + ) + + branch = ( + "└" + ("─" * (indent_size - 2)) + if is_last_item + else "├" + ("─" * (indent_size - 2)) + ) + branch += " " + new_prefix = prefix + ( + indent if is_last_item else "│" + " " * (indent_size - 1) + ) + + name = os.path.basename(item.get("name", "")) + + if display_size and item.get("type") == "directory": + sub_contents = self.ls(item.get("name", ""), detail=True) + num_files = sum( + 1 for sub_item in sub_contents if sub_item.get("type") == "file" + ) + num_folders = sum( + 1 + for sub_item in sub_contents + if sub_item.get("type") == "directory" + ) + + if num_files == 0 and num_folders == 0: + size = " (empty folder)" + elif num_files == 0: + size = f" ({num_folders} subfolder{'s' if num_folders > 1 else ''})" + elif num_folders == 0: + size = f" ({num_files} file{'s' if num_files > 1 else ''})" + else: + size = f" ({num_files} file{'s' if num_files > 1 else ''}, {num_folders} subfolder{'s' if num_folders > 1 else ''})" + elif display_size and item.get("type") == "file": + size = f" ({format_bytes(item.get('size', 0))})" + else: + size = "" + + result.append(f"{prefix}{branch}{name}{size}") + + if item.get("type") == "directory" and recursion_limit > 0: + result.append( + self.tree( + path=item.get("name", ""), + recursion_limit=recursion_limit - 1, + max_display=max_display, + display_size=display_size, + prefix=new_prefix, + is_last=is_last_item, + first=False, + indent_size=indent_size, + ) + ) + + if remaining_count > 0: + more_message = f"{remaining_count} more item(s) not displayed." + result.append( + f"{prefix}{'└' + ('─' * (indent_size - 2))} {more_message}" + ) + + return "\n".join(_ for _ in result if _) + + # ------------------------------------------------------------------------ + # Aliases + + def read_bytes(self, path, start=None, end=None, **kwargs): + """Alias of `AbstractFileSystem.cat_file`.""" + return self.cat_file(path, start=start, end=end, **kwargs) + + def write_bytes(self, path, value, **kwargs): + """Alias of `AbstractFileSystem.pipe_file`.""" + self.pipe_file(path, value, **kwargs) + + def makedir(self, path, create_parents=True, **kwargs): + """Alias of `AbstractFileSystem.mkdir`.""" + return self.mkdir(path, create_parents=create_parents, **kwargs) + + def mkdirs(self, path, exist_ok=False): + """Alias of `AbstractFileSystem.makedirs`.""" + return self.makedirs(path, exist_ok=exist_ok) + + def listdir(self, path, detail=True, **kwargs): + """Alias of `AbstractFileSystem.ls`.""" + return self.ls(path, detail=detail, **kwargs) + + def cp(self, path1, path2, **kwargs): + """Alias of `AbstractFileSystem.copy`.""" + return self.copy(path1, path2, **kwargs) + + def move(self, path1, path2, **kwargs): + """Alias of `AbstractFileSystem.mv`.""" + return self.mv(path1, path2, **kwargs) + + def stat(self, path, **kwargs): + """Alias of `AbstractFileSystem.info`.""" + return self.info(path, **kwargs) + + def disk_usage(self, path, total=True, maxdepth=None, **kwargs): + """Alias of `AbstractFileSystem.du`.""" + return self.du(path, total=total, maxdepth=maxdepth, **kwargs) + + def rename(self, path1, path2, **kwargs): + """Alias of `AbstractFileSystem.mv`.""" + return self.mv(path1, path2, **kwargs) + + def delete(self, path, recursive=False, maxdepth=None): + """Alias of `AbstractFileSystem.rm`.""" + return self.rm(path, recursive=recursive, maxdepth=maxdepth) + + def upload(self, lpath, rpath, recursive=False, **kwargs): + """Alias of `AbstractFileSystem.put`.""" + return self.put(lpath, rpath, recursive=recursive, **kwargs) + + def download(self, rpath, lpath, recursive=False, **kwargs): + """Alias of `AbstractFileSystem.get`.""" + return self.get(rpath, lpath, recursive=recursive, **kwargs) + + def sign(self, path, expiration=100, **kwargs): + """Create a signed URL representing the given path + + Some implementations allow temporary URLs to be generated, as a + way of delegating credentials. + + Parameters + ---------- + path : str + The path on the filesystem + expiration : int + Number of seconds to enable the URL for (if supported) + + Returns + ------- + URL : str + The signed URL + + Raises + ------ + NotImplementedError : if method is not implemented for a filesystem + """ + raise NotImplementedError("Sign is not implemented for this filesystem") + + def _isfilestore(self): + # Originally inherited from pyarrow DaskFileSystem. Keeping this + # here for backwards compatibility as long as pyarrow uses its + # legacy fsspec-compatible filesystems and thus accepts fsspec + # filesystems as well + return False + + +class AbstractBufferedFile(io.IOBase): + """Convenient class to derive from to provide buffering + + In the case that the backend does not provide a pythonic file-like object + already, this class contains much of the logic to build one. The only + methods that need to be overridden are ``_upload_chunk``, + ``_initiate_upload`` and ``_fetch_range``. + """ + + DEFAULT_BLOCK_SIZE = 5 * 2**20 + _details = None + + def __init__( + self, + fs, + path, + mode="rb", + block_size="default", + autocommit=True, + cache_type="readahead", + cache_options=None, + size=None, + **kwargs, + ): + """ + Template for files with buffered reading and writing + + Parameters + ---------- + fs: instance of FileSystem + path: str + location in file-system + mode: str + Normal file modes. Currently only 'wb', 'ab' or 'rb'. Some file + systems may be read-only, and some may not support append. + block_size: int + Buffer size for reading or writing, 'default' for class default + autocommit: bool + Whether to write to final destination; may only impact what + happens when file is being closed. + cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead" + Caching policy in read mode. See the definitions in ``core``. + cache_options : dict + Additional options passed to the constructor for the cache specified + by `cache_type`. + size: int + If given and in read mode, suppressed having to look up the file size + kwargs: + Gets stored as self.kwargs + """ + from .core import caches + + self.path = path + self.fs = fs + self.mode = mode + self.blocksize = ( + self.DEFAULT_BLOCK_SIZE if block_size in ["default", None] else block_size + ) + self.loc = 0 + self.autocommit = autocommit + self.end = None + self.start = None + self.closed = False + + if cache_options is None: + cache_options = {} + + if "trim" in kwargs: + warnings.warn( + "Passing 'trim' to control the cache behavior has been deprecated. " + "Specify it within the 'cache_options' argument instead.", + FutureWarning, + ) + cache_options["trim"] = kwargs.pop("trim") + + self.kwargs = kwargs + + if mode not in {"ab", "rb", "wb", "xb"}: + raise NotImplementedError("File mode not supported") + if mode == "rb": + if size is not None: + self.size = size + else: + self.size = self.details["size"] + self.cache = caches[cache_type]( + self.blocksize, self._fetch_range, self.size, **cache_options + ) + else: + self.buffer = io.BytesIO() + self.offset = None + self.forced = False + self.location = None + + @property + def details(self): + if self._details is None: + self._details = self.fs.info(self.path) + return self._details + + @details.setter + def details(self, value): + self._details = value + self.size = value["size"] + + @property + def full_name(self): + return _unstrip_protocol(self.path, self.fs) + + @property + def closed(self): + # get around this attr being read-only in IOBase + # use getattr here, since this can be called during del + return getattr(self, "_closed", True) + + @closed.setter + def closed(self, c): + self._closed = c + + def __hash__(self): + if "w" in self.mode: + return id(self) + else: + return int(tokenize(self.details), 16) + + def __eq__(self, other): + """Files are equal if they have the same checksum, only in read mode""" + if self is other: + return True + return ( + isinstance(other, type(self)) + and self.mode == "rb" + and other.mode == "rb" + and hash(self) == hash(other) + ) + + def commit(self): + """Move from temp to final destination""" + + def discard(self): + """Throw away temporary file""" + + def info(self): + """File information about this path""" + if self.readable(): + return self.details + else: + raise ValueError("Info not available while writing") + + def tell(self): + """Current file location""" + return self.loc + + def seek(self, loc, whence=0): + """Set current file location + + Parameters + ---------- + loc: int + byte location + whence: {0, 1, 2} + from start of file, current location or end of file, resp. + """ + loc = int(loc) + if not self.mode == "rb": + raise OSError(ESPIPE, "Seek only available in read mode") + if whence == 0: + nloc = loc + elif whence == 1: + nloc = self.loc + loc + elif whence == 2: + nloc = self.size + loc + else: + raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)") + if nloc < 0: + raise ValueError("Seek before start of file") + self.loc = nloc + return self.loc + + def write(self, data): + """ + Write data to buffer. + + Buffer only sent on flush() or if buffer is greater than + or equal to blocksize. + + Parameters + ---------- + data: bytes + Set of bytes to be written. + """ + if not self.writable(): + raise ValueError("File not in write mode") + if self.closed: + raise ValueError("I/O operation on closed file.") + if self.forced: + raise ValueError("This file has been force-flushed, can only close") + out = self.buffer.write(data) + self.loc += out + if self.buffer.tell() >= self.blocksize: + self.flush() + return out + + def flush(self, force=False): + """ + Write buffered data to backend store. + + Writes the current buffer, if it is larger than the block-size, or if + the file is being closed. + + Parameters + ---------- + force: bool + When closing, write the last block even if it is smaller than + blocks are allowed to be. Disallows further writing to this file. + """ + + if self.closed: + raise ValueError("Flush on closed file") + if force and self.forced: + raise ValueError("Force flush cannot be called more than once") + if force: + self.forced = True + + if self.readable(): + # no-op to flush on read-mode + return + + if not force and self.buffer.tell() < self.blocksize: + # Defer write on small block + return + + if self.offset is None: + # Initialize a multipart upload + self.offset = 0 + try: + self._initiate_upload() + except: + self.closed = True + raise + + if self._upload_chunk(final=force) is not False: + self.offset += self.buffer.seek(0, 2) + self.buffer = io.BytesIO() + + def _upload_chunk(self, final=False): + """Write one part of a multi-block file upload + + Parameters + ========== + final: bool + This is the last block, so should complete file, if + self.autocommit is True. + """ + # may not yet have been initialized, may need to call _initialize_upload + + def _initiate_upload(self): + """Create remote file/upload""" + pass + + def _fetch_range(self, start, end): + """Get the specified set of bytes from remote""" + return self.fs.cat_file(self.path, start=start, end=end) + + def read(self, length=-1): + """ + Return data from cache, or fetch pieces as necessary + + Parameters + ---------- + length: int (-1) + Number of bytes to read; if <0, all remaining bytes. + """ + length = -1 if length is None else int(length) + if self.mode != "rb": + raise ValueError("File not in read mode") + if length < 0: + length = self.size - self.loc + if self.closed: + raise ValueError("I/O operation on closed file.") + if length == 0: + # don't even bother calling fetch + return b"" + out = self.cache._fetch(self.loc, self.loc + length) + + logger.debug( + "%s read: %i - %i %s", + self, + self.loc, + self.loc + length, + self.cache._log_stats(), + ) + self.loc += len(out) + return out + + def readinto(self, b): + """mirrors builtin file's readinto method + + https://docs.python.org/3/library/io.html#io.RawIOBase.readinto + """ + out = memoryview(b).cast("B") + data = self.read(out.nbytes) + out[: len(data)] = data + return len(data) + + def readuntil(self, char=b"\n", blocks=None): + """Return data between current position and first occurrence of char + + char is included in the output, except if the end of the tile is + encountered first. + + Parameters + ---------- + char: bytes + Thing to find + blocks: None or int + How much to read in each go. Defaults to file blocksize - which may + mean a new read on every call. + """ + out = [] + while True: + start = self.tell() + part = self.read(blocks or self.blocksize) + if len(part) == 0: + break + found = part.find(char) + if found > -1: + out.append(part[: found + len(char)]) + self.seek(start + found + len(char)) + break + out.append(part) + return b"".join(out) + + def readline(self): + """Read until first occurrence of newline character + + Note that, because of character encoding, this is not necessarily a + true line ending. + """ + return self.readuntil(b"\n") + + def __next__(self): + out = self.readline() + if out: + return out + raise StopIteration + + def __iter__(self): + return self + + def readlines(self): + """Return all data, split by the newline character""" + data = self.read() + lines = data.split(b"\n") + out = [l + b"\n" for l in lines[:-1]] + if data.endswith(b"\n"): + return out + else: + return out + [lines[-1]] + # return list(self) ??? + + def readinto1(self, b): + return self.readinto(b) + + def close(self): + """Close file + + Finalizes writes, discards cache + """ + if getattr(self, "_unclosable", False): + return + if self.closed: + return + try: + if self.mode == "rb": + self.cache = None + else: + if not self.forced: + self.flush(force=True) + + if self.fs is not None: + self.fs.invalidate_cache(self.path) + self.fs.invalidate_cache(self.fs._parent(self.path)) + finally: + self.closed = True + + def readable(self): + """Whether opened for reading""" + return "r" in self.mode and not self.closed + + def seekable(self): + """Whether is seekable (only in read mode)""" + return self.readable() + + def writable(self): + """Whether opened for writing""" + return self.mode in {"wb", "ab", "xb"} and not self.closed + + def __reduce__(self): + if self.mode != "rb": + raise RuntimeError("Pickling a writeable file is not supported") + + return reopen, ( + self.fs, + self.path, + self.mode, + self.blocksize, + self.loc, + self.size, + self.autocommit, + self.cache.name if self.cache else "none", + self.kwargs, + ) + + def __del__(self): + if not self.closed: + self.close() + + def __str__(self): + return f"" + + __repr__ = __str__ + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + +def reopen(fs, path, mode, blocksize, loc, size, autocommit, cache_type, kwargs): + file = fs.open( + path, + mode=mode, + block_size=blocksize, + autocommit=autocommit, + cache_type=cache_type, + size=size, + **kwargs, + ) + if loc > 0: + file.seek(loc) + return file diff --git a/meow/lib/python3.13/site-packages/fsspec/transaction.py b/meow/lib/python3.13/site-packages/fsspec/transaction.py new file mode 100644 index 0000000000000000000000000000000000000000..77293f63ecc5f611e19d849ef236d53e9c258efc --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/transaction.py @@ -0,0 +1,90 @@ +from collections import deque + + +class Transaction: + """Filesystem transaction write context + + Gathers files for deferred commit or discard, so that several write + operations can be finalized semi-atomically. This works by having this + instance as the ``.transaction`` attribute of the given filesystem + """ + + def __init__(self, fs, **kwargs): + """ + Parameters + ---------- + fs: FileSystem instance + """ + self.fs = fs + self.files = deque() + + def __enter__(self): + self.start() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """End transaction and commit, if exit is not due to exception""" + # only commit if there was no exception + self.complete(commit=exc_type is None) + if self.fs: + self.fs._intrans = False + self.fs._transaction = None + self.fs = None + + def start(self): + """Start a transaction on this FileSystem""" + self.files = deque() # clean up after previous failed completions + self.fs._intrans = True + + def complete(self, commit=True): + """Finish transaction: commit or discard all deferred files""" + while self.files: + f = self.files.popleft() + if commit: + f.commit() + else: + f.discard() + self.fs._intrans = False + self.fs._transaction = None + self.fs = None + + +class FileActor: + def __init__(self): + self.files = [] + + def commit(self): + for f in self.files: + f.commit() + self.files.clear() + + def discard(self): + for f in self.files: + f.discard() + self.files.clear() + + def append(self, f): + self.files.append(f) + + +class DaskTransaction(Transaction): + def __init__(self, fs): + """ + Parameters + ---------- + fs: FileSystem instance + """ + import distributed + + super().__init__(fs) + client = distributed.default_client() + self.files = client.submit(FileActor, actor=True).result() + + def complete(self, commit=True): + """Finish transaction: commit or discard all deferred files""" + if commit: + self.files.commit().result() + else: + self.files.discard().result() + self.fs._intrans = False + self.fs = None diff --git a/meow/lib/python3.13/site-packages/fsspec/utils.py b/meow/lib/python3.13/site-packages/fsspec/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..917af8d19c560212a9757d11a16213f54ec77356 --- /dev/null +++ b/meow/lib/python3.13/site-packages/fsspec/utils.py @@ -0,0 +1,739 @@ +from __future__ import annotations + +import contextlib +import logging +import math +import os +import re +import sys +import tempfile +from functools import partial +from hashlib import md5 +from importlib.metadata import version +from typing import ( + IO, + TYPE_CHECKING, + Any, + Callable, + Iterable, + Iterator, + Sequence, + TypeVar, +) +from urllib.parse import urlsplit + +if TYPE_CHECKING: + import pathlib + + from typing_extensions import TypeGuard + + from fsspec.spec import AbstractFileSystem + + +DEFAULT_BLOCK_SIZE = 5 * 2**20 + +T = TypeVar("T") + + +def infer_storage_options( + urlpath: str, inherit_storage_options: dict[str, Any] | None = None +) -> dict[str, Any]: + """Infer storage options from URL path and merge it with existing storage + options. + + Parameters + ---------- + urlpath: str or unicode + Either local absolute file path or URL (hdfs://namenode:8020/file.csv) + inherit_storage_options: dict (optional) + Its contents will get merged with the inferred information from the + given path + + Returns + ------- + Storage options dict. + + Examples + -------- + >>> infer_storage_options('/mnt/datasets/test.csv') # doctest: +SKIP + {"protocol": "file", "path", "/mnt/datasets/test.csv"} + >>> infer_storage_options( + ... 'hdfs://username:pwd@node:123/mnt/datasets/test.csv?q=1', + ... inherit_storage_options={'extra': 'value'}, + ... ) # doctest: +SKIP + {"protocol": "hdfs", "username": "username", "password": "pwd", + "host": "node", "port": 123, "path": "/mnt/datasets/test.csv", + "url_query": "q=1", "extra": "value"} + """ + # Handle Windows paths including disk name in this special case + if ( + re.match(r"^[a-zA-Z]:[\\/]", urlpath) + or re.match(r"^[a-zA-Z0-9]+://", urlpath) is None + ): + return {"protocol": "file", "path": urlpath} + + parsed_path = urlsplit(urlpath) + protocol = parsed_path.scheme or "file" + if parsed_path.fragment: + path = "#".join([parsed_path.path, parsed_path.fragment]) + else: + path = parsed_path.path + if protocol == "file": + # Special case parsing file protocol URL on Windows according to: + # https://msdn.microsoft.com/en-us/library/jj710207.aspx + windows_path = re.match(r"^/([a-zA-Z])[:|]([\\/].*)$", path) + if windows_path: + drive, path = windows_path.groups() + path = f"{drive}:{path}" + + if protocol in ["http", "https"]: + # for HTTP, we don't want to parse, as requests will anyway + return {"protocol": protocol, "path": urlpath} + + options: dict[str, Any] = {"protocol": protocol, "path": path} + + if parsed_path.netloc: + # Parse `hostname` from netloc manually because `parsed_path.hostname` + # lowercases the hostname which is not always desirable (e.g. in S3): + # https://github.com/dask/dask/issues/1417 + options["host"] = parsed_path.netloc.rsplit("@", 1)[-1].rsplit(":", 1)[0] + + if protocol in ("s3", "s3a", "gcs", "gs"): + options["path"] = options["host"] + options["path"] + else: + options["host"] = options["host"] + if parsed_path.port: + options["port"] = parsed_path.port + if parsed_path.username: + options["username"] = parsed_path.username + if parsed_path.password: + options["password"] = parsed_path.password + + if parsed_path.query: + options["url_query"] = parsed_path.query + if parsed_path.fragment: + options["url_fragment"] = parsed_path.fragment + + if inherit_storage_options: + update_storage_options(options, inherit_storage_options) + + return options + + +def update_storage_options( + options: dict[str, Any], inherited: dict[str, Any] | None = None +) -> None: + if not inherited: + inherited = {} + collisions = set(options) & set(inherited) + if collisions: + for collision in collisions: + if options.get(collision) != inherited.get(collision): + raise KeyError( + f"Collision between inferred and specified storage " + f"option:\n{collision}" + ) + options.update(inherited) + + +# Compression extensions registered via fsspec.compression.register_compression +compressions: dict[str, str] = {} + + +def infer_compression(filename: str) -> str | None: + """Infer compression, if available, from filename. + + Infer a named compression type, if registered and available, from filename + extension. This includes builtin (gz, bz2, zip) compressions, as well as + optional compressions. See fsspec.compression.register_compression. + """ + extension = os.path.splitext(filename)[-1].strip(".").lower() + if extension in compressions: + return compressions[extension] + return None + + +def build_name_function(max_int: float) -> Callable[[int], str]: + """Returns a function that receives a single integer + and returns it as a string padded by enough zero characters + to align with maximum possible integer + + >>> name_f = build_name_function(57) + + >>> name_f(7) + '07' + >>> name_f(31) + '31' + >>> build_name_function(1000)(42) + '0042' + >>> build_name_function(999)(42) + '042' + >>> build_name_function(0)(0) + '0' + """ + # handle corner cases max_int is 0 or exact power of 10 + max_int += 1e-8 + + pad_length = int(math.ceil(math.log10(max_int))) + + def name_function(i: int) -> str: + return str(i).zfill(pad_length) + + return name_function + + +def seek_delimiter(file: IO[bytes], delimiter: bytes, blocksize: int) -> bool: + r"""Seek current file to file start, file end, or byte after delimiter seq. + + Seeks file to next chunk delimiter, where chunks are defined on file start, + a delimiting sequence, and file end. Use file.tell() to see location afterwards. + Note that file start is a valid split, so must be at offset > 0 to seek for + delimiter. + + Parameters + ---------- + file: a file + delimiter: bytes + a delimiter like ``b'\n'`` or message sentinel, matching file .read() type + blocksize: int + Number of bytes to read from the file at once. + + + Returns + ------- + Returns True if a delimiter was found, False if at file start or end. + + """ + + if file.tell() == 0: + # beginning-of-file, return without seek + return False + + # Interface is for binary IO, with delimiter as bytes, but initialize last + # with result of file.read to preserve compatibility with text IO. + last: bytes | None = None + while True: + current = file.read(blocksize) + if not current: + # end-of-file without delimiter + return False + full = last + current if last else current + try: + if delimiter in full: + i = full.index(delimiter) + file.seek(file.tell() - (len(full) - i) + len(delimiter)) + return True + elif len(current) < blocksize: + # end-of-file without delimiter + return False + except (OSError, ValueError): + pass + last = full[-len(delimiter) :] + + +def read_block( + f: IO[bytes], + offset: int, + length: int | None, + delimiter: bytes | None = None, + split_before: bool = False, +) -> bytes: + """Read a block of bytes from a file + + Parameters + ---------- + f: File + Open file + offset: int + Byte offset to start read + length: int + Number of bytes to read, read through end of file if None + delimiter: bytes (optional) + Ensure reading starts and stops at delimiter bytestring + split_before: bool (optional) + Start/stop read *before* delimiter bytestring. + + + If using the ``delimiter=`` keyword argument we ensure that the read + starts and stops at delimiter boundaries that follow the locations + ``offset`` and ``offset + length``. If ``offset`` is zero then we + start at zero, regardless of delimiter. The bytestring returned WILL + include the terminating delimiter string. + + Examples + -------- + + >>> from io import BytesIO # doctest: +SKIP + >>> f = BytesIO(b'Alice, 100\\nBob, 200\\nCharlie, 300') # doctest: +SKIP + >>> read_block(f, 0, 13) # doctest: +SKIP + b'Alice, 100\\nBo' + + >>> read_block(f, 0, 13, delimiter=b'\\n') # doctest: +SKIP + b'Alice, 100\\nBob, 200\\n' + + >>> read_block(f, 10, 10, delimiter=b'\\n') # doctest: +SKIP + b'Bob, 200\\nCharlie, 300' + """ + if delimiter: + f.seek(offset) + found_start_delim = seek_delimiter(f, delimiter, 2**16) + if length is None: + return f.read() + start = f.tell() + length -= start - offset + + f.seek(start + length) + found_end_delim = seek_delimiter(f, delimiter, 2**16) + end = f.tell() + + # Adjust split location to before delimiter if seek found the + # delimiter sequence, not start or end of file. + if found_start_delim and split_before: + start -= len(delimiter) + + if found_end_delim and split_before: + end -= len(delimiter) + + offset = start + length = end - start + + f.seek(offset) + + # TODO: allow length to be None and read to the end of the file? + assert length is not None + b = f.read(length) + return b + + +def tokenize(*args: Any, **kwargs: Any) -> str: + """Deterministic token + + (modified from dask.base) + + >>> tokenize([1, 2, '3']) + '9d71491b50023b06fc76928e6eddb952' + + >>> tokenize('Hello') == tokenize('Hello') + True + """ + if kwargs: + args += (kwargs,) + try: + h = md5(str(args).encode()) + except ValueError: + # FIPS systems: https://github.com/fsspec/filesystem_spec/issues/380 + h = md5(str(args).encode(), usedforsecurity=False) + return h.hexdigest() + + +def stringify_path(filepath: str | os.PathLike[str] | pathlib.Path) -> str: + """Attempt to convert a path-like object to a string. + + Parameters + ---------- + filepath: object to be converted + + Returns + ------- + filepath_str: maybe a string version of the object + + Notes + ----- + Objects supporting the fspath protocol are coerced according to its + __fspath__ method. + + For backwards compatibility with older Python version, pathlib.Path + objects are specially coerced. + + Any other object is passed through unchanged, which includes bytes, + strings, buffers, or anything else that's not even path-like. + """ + if isinstance(filepath, str): + return filepath + elif hasattr(filepath, "__fspath__"): + return filepath.__fspath__() + elif hasattr(filepath, "path"): + return filepath.path + else: + return filepath # type: ignore[return-value] + + +def make_instance( + cls: Callable[..., T], args: Sequence[Any], kwargs: dict[str, Any] +) -> T: + inst = cls(*args, **kwargs) + inst._determine_worker() # type: ignore[attr-defined] + return inst + + +def common_prefix(paths: Iterable[str]) -> str: + """For a list of paths, find the shortest prefix common to all""" + parts = [p.split("/") for p in paths] + lmax = min(len(p) for p in parts) + end = 0 + for i in range(lmax): + end = all(p[i] == parts[0][i] for p in parts) + if not end: + break + i += end + return "/".join(parts[0][:i]) + + +def other_paths( + paths: list[str], + path2: str | list[str], + exists: bool = False, + flatten: bool = False, +) -> list[str]: + """In bulk file operations, construct a new file tree from a list of files + + Parameters + ---------- + paths: list of str + The input file tree + path2: str or list of str + Root to construct the new list in. If this is already a list of str, we just + assert it has the right number of elements. + exists: bool (optional) + For a str destination, it is already exists (and is a dir), files should + end up inside. + flatten: bool (optional) + Whether to flatten the input directory tree structure so that the output files + are in the same directory. + + Returns + ------- + list of str + """ + + if isinstance(path2, str): + path2 = path2.rstrip("/") + + if flatten: + path2 = ["/".join((path2, p.split("/")[-1])) for p in paths] + else: + cp = common_prefix(paths) + if exists: + cp = cp.rsplit("/", 1)[0] + if not cp and all(not s.startswith("/") for s in paths): + path2 = ["/".join([path2, p]) for p in paths] + else: + path2 = [p.replace(cp, path2, 1) for p in paths] + else: + assert len(paths) == len(path2) + return path2 + + +def is_exception(obj: Any) -> bool: + return isinstance(obj, BaseException) + + +def isfilelike(f: Any) -> TypeGuard[IO[bytes]]: + return all(hasattr(f, attr) for attr in ["read", "close", "tell"]) + + +def get_protocol(url: str) -> str: + url = stringify_path(url) + parts = re.split(r"(\:\:|\://)", url, maxsplit=1) + if len(parts) > 1: + return parts[0] + return "file" + + +def can_be_local(path: str) -> bool: + """Can the given URL be used with open_local?""" + from fsspec import get_filesystem_class + + try: + return getattr(get_filesystem_class(get_protocol(path)), "local_file", False) + except (ValueError, ImportError): + # not in registry or import failed + return False + + +def get_package_version_without_import(name: str) -> str | None: + """For given package name, try to find the version without importing it + + Import and package.__version__ is still the backup here, so an import + *might* happen. + + Returns either the version string, or None if the package + or the version was not readily found. + """ + if name in sys.modules: + mod = sys.modules[name] + if hasattr(mod, "__version__"): + return mod.__version__ + try: + return version(name) + except: # noqa: E722 + pass + try: + import importlib + + mod = importlib.import_module(name) + return mod.__version__ + except (ImportError, AttributeError): + return None + + +def setup_logging( + logger: logging.Logger | None = None, + logger_name: str | None = None, + level: str = "DEBUG", + clear: bool = True, +) -> logging.Logger: + if logger is None and logger_name is None: + raise ValueError("Provide either logger object or logger name") + logger = logger or logging.getLogger(logger_name) + handle = logging.StreamHandler() + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s -- %(message)s" + ) + handle.setFormatter(formatter) + if clear: + logger.handlers.clear() + logger.addHandler(handle) + logger.setLevel(level) + return logger + + +def _unstrip_protocol(name: str, fs: AbstractFileSystem) -> str: + return fs.unstrip_protocol(name) + + +def mirror_from( + origin_name: str, methods: Iterable[str] +) -> Callable[[type[T]], type[T]]: + """Mirror attributes and methods from the given + origin_name attribute of the instance to the + decorated class""" + + def origin_getter(method: str, self: Any) -> Any: + origin = getattr(self, origin_name) + return getattr(origin, method) + + def wrapper(cls: type[T]) -> type[T]: + for method in methods: + wrapped_method = partial(origin_getter, method) + setattr(cls, method, property(wrapped_method)) + return cls + + return wrapper + + +@contextlib.contextmanager +def nullcontext(obj: T) -> Iterator[T]: + yield obj + + +def merge_offset_ranges( + paths: list[str], + starts: list[int] | int, + ends: list[int] | int, + max_gap: int = 0, + max_block: int | None = None, + sort: bool = True, +) -> tuple[list[str], list[int], list[int]]: + """Merge adjacent byte-offset ranges when the inter-range + gap is <= `max_gap`, and when the merged byte range does not + exceed `max_block` (if specified). By default, this function + will re-order the input paths and byte ranges to ensure sorted + order. If the user can guarantee that the inputs are already + sorted, passing `sort=False` will skip the re-ordering. + """ + # Check input + if not isinstance(paths, list): + raise TypeError + if not isinstance(starts, list): + starts = [starts] * len(paths) + if not isinstance(ends, list): + ends = [ends] * len(paths) + if len(starts) != len(paths) or len(ends) != len(paths): + raise ValueError + + # Early Return + if len(starts) <= 1: + return paths, starts, ends + + starts = [s or 0 for s in starts] + # Sort by paths and then ranges if `sort=True` + if sort: + paths, starts, ends = ( + list(v) + for v in zip( + *sorted( + zip(paths, starts, ends), + ) + ) + ) + + if paths: + # Loop through the coupled `paths`, `starts`, and + # `ends`, and merge adjacent blocks when appropriate + new_paths = paths[:1] + new_starts = starts[:1] + new_ends = ends[:1] + for i in range(1, len(paths)): + if paths[i] == paths[i - 1] and new_ends[-1] is None: + continue + elif ( + paths[i] != paths[i - 1] + or ((starts[i] - new_ends[-1]) > max_gap) + or (max_block is not None and (ends[i] - new_starts[-1]) > max_block) + ): + # Cannot merge with previous block. + # Add new `paths`, `starts`, and `ends` elements + new_paths.append(paths[i]) + new_starts.append(starts[i]) + new_ends.append(ends[i]) + else: + # Merge with previous block by updating the + # last element of `ends` + new_ends[-1] = ends[i] + return new_paths, new_starts, new_ends + + # `paths` is empty. Just return input lists + return paths, starts, ends + + +def file_size(filelike: IO[bytes]) -> int: + """Find length of any open read-mode file-like""" + pos = filelike.tell() + try: + return filelike.seek(0, 2) + finally: + filelike.seek(pos) + + +@contextlib.contextmanager +def atomic_write(path: str, mode: str = "wb"): + """ + A context manager that opens a temporary file next to `path` and, on exit, + replaces `path` with the temporary file, thereby updating `path` + atomically. + """ + fd, fn = tempfile.mkstemp( + dir=os.path.dirname(path), prefix=os.path.basename(path) + "-" + ) + try: + with open(fd, mode) as fp: + yield fp + except BaseException: + with contextlib.suppress(FileNotFoundError): + os.unlink(fn) + raise + else: + os.replace(fn, path) + + +def _translate(pat, STAR, QUESTION_MARK): + # Copied from: https://github.com/python/cpython/pull/106703. + res: list[str] = [] + add = res.append + i, n = 0, len(pat) + while i < n: + c = pat[i] + i = i + 1 + if c == "*": + # compress consecutive `*` into one + if (not res) or res[-1] is not STAR: + add(STAR) + elif c == "?": + add(QUESTION_MARK) + elif c == "[": + j = i + if j < n and pat[j] == "!": + j = j + 1 + if j < n and pat[j] == "]": + j = j + 1 + while j < n and pat[j] != "]": + j = j + 1 + if j >= n: + add("\\[") + else: + stuff = pat[i:j] + if "-" not in stuff: + stuff = stuff.replace("\\", r"\\") + else: + chunks = [] + k = i + 2 if pat[i] == "!" else i + 1 + while True: + k = pat.find("-", k, j) + if k < 0: + break + chunks.append(pat[i:k]) + i = k + 1 + k = k + 3 + chunk = pat[i:j] + if chunk: + chunks.append(chunk) + else: + chunks[-1] += "-" + # Remove empty ranges -- invalid in RE. + for k in range(len(chunks) - 1, 0, -1): + if chunks[k - 1][-1] > chunks[k][0]: + chunks[k - 1] = chunks[k - 1][:-1] + chunks[k][1:] + del chunks[k] + # Escape backslashes and hyphens for set difference (--). + # Hyphens that create ranges shouldn't be escaped. + stuff = "-".join( + s.replace("\\", r"\\").replace("-", r"\-") for s in chunks + ) + # Escape set operations (&&, ~~ and ||). + stuff = re.sub(r"([&~|])", r"\\\1", stuff) + i = j + 1 + if not stuff: + # Empty range: never match. + add("(?!)") + elif stuff == "!": + # Negated empty range: match any character. + add(".") + else: + if stuff[0] == "!": + stuff = "^" + stuff[1:] + elif stuff[0] in ("^", "["): + stuff = "\\" + stuff + add(f"[{stuff}]") + else: + add(re.escape(c)) + assert i == n + return res + + +def glob_translate(pat): + # Copied from: https://github.com/python/cpython/pull/106703. + # The keyword parameters' values are fixed to: + # recursive=True, include_hidden=True, seps=None + """Translate a pathname with shell wildcards to a regular expression.""" + if os.path.altsep: + seps = os.path.sep + os.path.altsep + else: + seps = os.path.sep + escaped_seps = "".join(map(re.escape, seps)) + any_sep = f"[{escaped_seps}]" if len(seps) > 1 else escaped_seps + not_sep = f"[^{escaped_seps}]" + one_last_segment = f"{not_sep}+" + one_segment = f"{one_last_segment}{any_sep}" + any_segments = f"(?:.+{any_sep})?" + any_last_segments = ".*" + results = [] + parts = re.split(any_sep, pat) + last_part_idx = len(parts) - 1 + for idx, part in enumerate(parts): + if part == "*": + results.append(one_segment if idx < last_part_idx else one_last_segment) + continue + if part == "**": + results.append(any_segments if idx < last_part_idx else any_last_segments) + continue + elif "**" in part: + raise ValueError( + "Invalid pattern: '**' can only be an entire path component" + ) + if part: + results.extend(_translate(part, f"{not_sep}*", not_sep)) + if idx < last_part_idx: + results.append(any_sep) + res = "".join(results) + return rf"(?s:{res})\Z" diff --git a/meow/lib/python3.13/site-packages/huggingface_hub-0.27.0.dist-info/INSTALLER b/meow/lib/python3.13/site-packages/huggingface_hub-0.27.0.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/meow/lib/python3.13/site-packages/huggingface_hub-0.27.0.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/meow/lib/python3.13/site-packages/huggingface_hub-0.27.0.dist-info/METADATA b/meow/lib/python3.13/site-packages/huggingface_hub-0.27.0.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..17749013db439bb116a3eb463cc6fdf5ec234780 --- /dev/null +++ b/meow/lib/python3.13/site-packages/huggingface_hub-0.27.0.dist-info/METADATA @@ -0,0 +1,308 @@ +Metadata-Version: 2.1 +Name: huggingface-hub +Version: 0.27.0 +Summary: Client library to download and publish models, datasets and other repos on the huggingface.co hub +Home-page: https://github.com/huggingface/huggingface_hub +Author: Hugging Face, Inc. +Author-email: julien@huggingface.co +License: Apache +Keywords: model-hub machine-learning models natural-language-processing deep-learning pytorch pretrained-models +Platform: UNKNOWN +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Education +Classifier: Intended Audience :: Science/Research +Classifier: License :: OSI Approved :: Apache Software License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence +Requires-Python: >=3.8.0 +Description-Content-Type: text/markdown +License-File: LICENSE +Requires-Dist: filelock +Requires-Dist: fsspec>=2023.5.0 +Requires-Dist: packaging>=20.9 +Requires-Dist: pyyaml>=5.1 +Requires-Dist: requests +Requires-Dist: tqdm>=4.42.1 +Requires-Dist: typing-extensions>=3.7.4.3 +Provides-Extra: all +Requires-Dist: InquirerPy==0.3.4; extra == "all" +Requires-Dist: aiohttp; extra == "all" +Requires-Dist: jedi; extra == "all" +Requires-Dist: Jinja2; extra == "all" +Requires-Dist: pytest<8.2.2,>=8.1.1; extra == "all" +Requires-Dist: pytest-cov; extra == "all" +Requires-Dist: pytest-env; extra == "all" +Requires-Dist: pytest-xdist; extra == "all" +Requires-Dist: pytest-vcr; extra == "all" +Requires-Dist: pytest-asyncio; extra == "all" +Requires-Dist: pytest-rerunfailures; extra == "all" +Requires-Dist: pytest-mock; extra == "all" +Requires-Dist: urllib3<2.0; extra == "all" +Requires-Dist: soundfile; extra == "all" +Requires-Dist: Pillow; extra == "all" +Requires-Dist: gradio>=4.0.0; extra == "all" +Requires-Dist: numpy; extra == "all" +Requires-Dist: fastapi; extra == "all" +Requires-Dist: ruff>=0.5.0; extra == "all" +Requires-Dist: mypy==1.5.1; extra == "all" +Requires-Dist: libcst==1.4.0; extra == "all" +Requires-Dist: typing-extensions>=4.8.0; extra == "all" +Requires-Dist: types-PyYAML; extra == "all" +Requires-Dist: types-requests; extra == "all" +Requires-Dist: types-simplejson; extra == "all" +Requires-Dist: types-toml; extra == "all" +Requires-Dist: types-tqdm; extra == "all" +Requires-Dist: types-urllib3; extra == "all" +Provides-Extra: cli +Requires-Dist: InquirerPy==0.3.4; extra == "cli" +Provides-Extra: dev +Requires-Dist: InquirerPy==0.3.4; extra == "dev" +Requires-Dist: aiohttp; extra == "dev" +Requires-Dist: jedi; extra == "dev" +Requires-Dist: Jinja2; extra == "dev" +Requires-Dist: pytest<8.2.2,>=8.1.1; extra == "dev" +Requires-Dist: pytest-cov; extra == "dev" +Requires-Dist: pytest-env; extra == "dev" +Requires-Dist: pytest-xdist; extra == "dev" +Requires-Dist: pytest-vcr; extra == "dev" +Requires-Dist: pytest-asyncio; extra == "dev" +Requires-Dist: pytest-rerunfailures; extra == "dev" +Requires-Dist: pytest-mock; extra == "dev" +Requires-Dist: urllib3<2.0; extra == "dev" +Requires-Dist: soundfile; extra == "dev" +Requires-Dist: Pillow; extra == "dev" +Requires-Dist: gradio>=4.0.0; extra == "dev" +Requires-Dist: numpy; extra == "dev" +Requires-Dist: fastapi; extra == "dev" +Requires-Dist: ruff>=0.5.0; extra == "dev" +Requires-Dist: mypy==1.5.1; extra == "dev" +Requires-Dist: libcst==1.4.0; extra == "dev" +Requires-Dist: typing-extensions>=4.8.0; extra == "dev" +Requires-Dist: types-PyYAML; extra == "dev" +Requires-Dist: types-requests; extra == "dev" +Requires-Dist: types-simplejson; extra == "dev" +Requires-Dist: types-toml; extra == "dev" +Requires-Dist: types-tqdm; extra == "dev" +Requires-Dist: types-urllib3; extra == "dev" +Provides-Extra: fastai +Requires-Dist: toml; extra == "fastai" +Requires-Dist: fastai>=2.4; extra == "fastai" +Requires-Dist: fastcore>=1.3.27; extra == "fastai" +Provides-Extra: hf_transfer +Requires-Dist: hf-transfer>=0.1.4; extra == "hf-transfer" +Provides-Extra: inference +Requires-Dist: aiohttp; extra == "inference" +Provides-Extra: quality +Requires-Dist: ruff>=0.5.0; extra == "quality" +Requires-Dist: mypy==1.5.1; extra == "quality" +Requires-Dist: libcst==1.4.0; extra == "quality" +Provides-Extra: tensorflow +Requires-Dist: tensorflow; extra == "tensorflow" +Requires-Dist: pydot; extra == "tensorflow" +Requires-Dist: graphviz; extra == "tensorflow" +Provides-Extra: tensorflow-testing +Requires-Dist: tensorflow; extra == "tensorflow-testing" +Requires-Dist: keras<3.0; extra == "tensorflow-testing" +Provides-Extra: testing +Requires-Dist: InquirerPy==0.3.4; extra == "testing" +Requires-Dist: aiohttp; extra == "testing" +Requires-Dist: jedi; extra == "testing" +Requires-Dist: Jinja2; extra == "testing" +Requires-Dist: pytest<8.2.2,>=8.1.1; extra == "testing" +Requires-Dist: pytest-cov; extra == "testing" +Requires-Dist: pytest-env; extra == "testing" +Requires-Dist: pytest-xdist; extra == "testing" +Requires-Dist: pytest-vcr; extra == "testing" +Requires-Dist: pytest-asyncio; extra == "testing" +Requires-Dist: pytest-rerunfailures; extra == "testing" +Requires-Dist: pytest-mock; extra == "testing" +Requires-Dist: urllib3<2.0; extra == "testing" +Requires-Dist: soundfile; extra == "testing" +Requires-Dist: Pillow; extra == "testing" +Requires-Dist: gradio>=4.0.0; extra == "testing" +Requires-Dist: numpy; extra == "testing" +Requires-Dist: fastapi; extra == "testing" +Provides-Extra: torch +Requires-Dist: torch; extra == "torch" +Requires-Dist: safetensors[torch]; extra == "torch" +Provides-Extra: typing +Requires-Dist: typing-extensions>=4.8.0; extra == "typing" +Requires-Dist: types-PyYAML; extra == "typing" +Requires-Dist: types-requests; extra == "typing" +Requires-Dist: types-simplejson; extra == "typing" +Requires-Dist: types-toml; extra == "typing" +Requires-Dist: types-tqdm; extra == "typing" +Requires-Dist: types-urllib3; extra == "typing" + +

+ + + + huggingface_hub library logo + +
+
+

+ +

+ The official Python client for the Huggingface Hub. +

+ +

+ Documentation + GitHub release + PyPi version + PyPI - Downloads + Code coverage +

+ +

+

+ English | + Deutsch | + हिंदी | + 한국어 | + 中文(简体) +

+

+ +--- + +**Documentation**: https://hf.co/docs/huggingface_hub + +**Source Code**: https://github.com/huggingface/huggingface_hub + +--- + +## Welcome to the huggingface_hub library + +The `huggingface_hub` library allows you to interact with the [Hugging Face Hub](https://huggingface.co/), a platform democratizing open-source Machine Learning for creators and collaborators. Discover pre-trained models and datasets for your projects or play with the thousands of machine learning apps hosted on the Hub. You can also create and share your own models, datasets and demos with the community. The `huggingface_hub` library provides a simple way to do all these things with Python. + +## Key features + +- [Download files](https://huggingface.co/docs/huggingface_hub/en/guides/download) from the Hub. +- [Upload files](https://huggingface.co/docs/huggingface_hub/en/guides/upload) to the Hub. +- [Manage your repositories](https://huggingface.co/docs/huggingface_hub/en/guides/repository). +- [Run Inference](https://huggingface.co/docs/huggingface_hub/en/guides/inference) on deployed models. +- [Search](https://huggingface.co/docs/huggingface_hub/en/guides/search) for models, datasets and Spaces. +- [Share Model Cards](https://huggingface.co/docs/huggingface_hub/en/guides/model-cards) to document your models. +- [Engage with the community](https://huggingface.co/docs/huggingface_hub/en/guides/community) through PRs and comments. + +## Installation + +Install the `huggingface_hub` package with [pip](https://pypi.org/project/huggingface-hub/): + +```bash +pip install huggingface_hub +``` + +If you prefer, you can also install it with [conda](https://huggingface.co/docs/huggingface_hub/en/installation#install-with-conda). + +In order to keep the package minimal by default, `huggingface_hub` comes with optional dependencies useful for some use cases. For example, if you want have a complete experience for Inference, run: + +```bash +pip install huggingface_hub[inference] +``` + +To learn more installation and optional dependencies, check out the [installation guide](https://huggingface.co/docs/huggingface_hub/en/installation). + +## Quick start + +### Download files + +Download a single file + +```py +from huggingface_hub import hf_hub_download + +hf_hub_download(repo_id="tiiuae/falcon-7b-instruct", filename="config.json") +``` + +Or an entire repository + +```py +from huggingface_hub import snapshot_download + +snapshot_download("stabilityai/stable-diffusion-2-1") +``` + +Files will be downloaded in a local cache folder. More details in [this guide](https://huggingface.co/docs/huggingface_hub/en/guides/manage-cache). + +### Login + +The Hugging Face Hub uses tokens to authenticate applications (see [docs](https://huggingface.co/docs/hub/security-tokens)). To log in your machine, run the following CLI: + +```bash +huggingface-cli login +# or using an environment variable +huggingface-cli login --token $HUGGINGFACE_TOKEN +``` + +### Create a repository + +```py +from huggingface_hub import create_repo + +create_repo(repo_id="super-cool-model") +``` + +### Upload files + +Upload a single file + +```py +from huggingface_hub import upload_file + +upload_file( + path_or_fileobj="/home/lysandre/dummy-test/README.md", + path_in_repo="README.md", + repo_id="lysandre/test-model", +) +``` + +Or an entire folder + +```py +from huggingface_hub import upload_folder + +upload_folder( + folder_path="/path/to/local/space", + repo_id="username/my-cool-space", + repo_type="space", +) +``` + +For details in the [upload guide](https://huggingface.co/docs/huggingface_hub/en/guides/upload). + +## Integrating to the Hub. + +We're partnering with cool open source ML libraries to provide free model hosting and versioning. You can find the existing integrations [here](https://huggingface.co/docs/hub/libraries). + +The advantages are: + +- Free model or dataset hosting for libraries and their users. +- Built-in file versioning, even with very large files, thanks to a git-based approach. +- Serverless inference API for all models publicly available. +- In-browser widgets to play with the uploaded models. +- Anyone can upload a new model for your library, they just need to add the corresponding tag for the model to be discoverable. +- Fast downloads! We use Cloudfront (a CDN) to geo-replicate downloads so they're blazing fast from anywhere on the globe. +- Usage stats and more features to come. + +If you would like to integrate your library, feel free to open an issue to begin the discussion. We wrote a [step-by-step guide](https://huggingface.co/docs/hub/adding-a-library) with ❤️ showing how to do this integration. + +## Contributions (feature requests, bugs, etc.) are super welcome 💙💚💛💜🧡❤️ + +Everyone is welcome to contribute, and we value everybody's contribution. Code is not the only way to help the community. +Answering questions, helping others, reaching out and improving the documentations are immensely valuable to the community. +We wrote a [contribution guide](https://github.com/huggingface/huggingface_hub/blob/main/CONTRIBUTING.md) to summarize +how to get started to contribute to this repository. + + diff --git a/meow/lib/python3.13/site-packages/huggingface_hub/__init__.py b/meow/lib/python3.13/site-packages/huggingface_hub/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..700622d27e5c62ab2510d8494869ee907204ae6e --- /dev/null +++ b/meow/lib/python3.13/site-packages/huggingface_hub/__init__.py @@ -0,0 +1,1028 @@ +# Copyright 2020 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# *********** +# `huggingface_hub` init has 2 modes: +# - Normal usage: +# If imported to use it, all modules and functions are lazy-loaded. This means +# they exist at top level in module but are imported only the first time they are +# used. This way, `from huggingface_hub import something` will import `something` +# quickly without the hassle of importing all the features from `huggingface_hub`. +# - Static check: +# If statically analyzed, all modules and functions are loaded normally. This way +# static typing check works properly as well as autocomplete in text editors and +# IDEs. +# +# The static model imports are done inside the `if TYPE_CHECKING:` statement at +# the bottom of this file. Since module/functions imports are duplicated, it is +# mandatory to make sure to add them twice when adding one. This is checked in the +# `make quality` command. +# +# To update the static imports, please run the following command and commit the changes. +# ``` +# # Use script +# python utils/check_static_imports.py --update-file +# +# # Or run style on codebase +# make style +# ``` +# +# *********** +# Lazy loader vendored from https://github.com/scientific-python/lazy_loader +import importlib +import os +import sys +from typing import TYPE_CHECKING + + +__version__ = "0.27.0" + +# Alphabetical order of definitions is ensured in tests +# WARNING: any comment added in this dictionary definition will be lost when +# re-generating the file ! +_SUBMOD_ATTRS = { + "_commit_scheduler": [ + "CommitScheduler", + ], + "_inference_endpoints": [ + "InferenceEndpoint", + "InferenceEndpointError", + "InferenceEndpointStatus", + "InferenceEndpointTimeoutError", + "InferenceEndpointType", + ], + "_login": [ + "auth_list", + "auth_switch", + "interpreter_login", + "login", + "logout", + "notebook_login", + ], + "_snapshot_download": [ + "snapshot_download", + ], + "_space_api": [ + "SpaceHardware", + "SpaceRuntime", + "SpaceStage", + "SpaceStorage", + "SpaceVariable", + ], + "_tensorboard_logger": [ + "HFSummaryWriter", + ], + "_webhooks_payload": [ + "WebhookPayload", + "WebhookPayloadComment", + "WebhookPayloadDiscussion", + "WebhookPayloadDiscussionChanges", + "WebhookPayloadEvent", + "WebhookPayloadMovedTo", + "WebhookPayloadRepo", + "WebhookPayloadUrl", + "WebhookPayloadWebhook", + ], + "_webhooks_server": [ + "WebhooksServer", + "webhook_endpoint", + ], + "community": [ + "Discussion", + "DiscussionComment", + "DiscussionCommit", + "DiscussionEvent", + "DiscussionStatusChange", + "DiscussionTitleChange", + "DiscussionWithDetails", + ], + "constants": [ + "CONFIG_NAME", + "FLAX_WEIGHTS_NAME", + "HUGGINGFACE_CO_URL_HOME", + "HUGGINGFACE_CO_URL_TEMPLATE", + "PYTORCH_WEIGHTS_NAME", + "REPO_TYPE_DATASET", + "REPO_TYPE_MODEL", + "REPO_TYPE_SPACE", + "TF2_WEIGHTS_NAME", + "TF_WEIGHTS_NAME", + ], + "fastai_utils": [ + "_save_pretrained_fastai", + "from_pretrained_fastai", + "push_to_hub_fastai", + ], + "file_download": [ + "HfFileMetadata", + "_CACHED_NO_EXIST", + "get_hf_file_metadata", + "hf_hub_download", + "hf_hub_url", + "try_to_load_from_cache", + ], + "hf_api": [ + "Collection", + "CollectionItem", + "CommitInfo", + "CommitOperation", + "CommitOperationAdd", + "CommitOperationCopy", + "CommitOperationDelete", + "DatasetInfo", + "GitCommitInfo", + "GitRefInfo", + "GitRefs", + "HfApi", + "ModelInfo", + "RepoUrl", + "SpaceInfo", + "User", + "UserLikes", + "WebhookInfo", + "WebhookWatchedItem", + "accept_access_request", + "add_collection_item", + "add_space_secret", + "add_space_variable", + "auth_check", + "cancel_access_request", + "change_discussion_status", + "comment_discussion", + "create_branch", + "create_collection", + "create_commit", + "create_discussion", + "create_inference_endpoint", + "create_pull_request", + "create_repo", + "create_tag", + "create_webhook", + "dataset_info", + "delete_branch", + "delete_collection", + "delete_collection_item", + "delete_file", + "delete_folder", + "delete_inference_endpoint", + "delete_repo", + "delete_space_secret", + "delete_space_storage", + "delete_space_variable", + "delete_tag", + "delete_webhook", + "disable_webhook", + "duplicate_space", + "edit_discussion_comment", + "enable_webhook", + "file_exists", + "get_collection", + "get_dataset_tags", + "get_discussion_details", + "get_full_repo_name", + "get_inference_endpoint", + "get_model_tags", + "get_paths_info", + "get_repo_discussions", + "get_safetensors_metadata", + "get_space_runtime", + "get_space_variables", + "get_token_permission", + "get_user_overview", + "get_webhook", + "grant_access", + "like", + "list_accepted_access_requests", + "list_collections", + "list_datasets", + "list_inference_endpoints", + "list_liked_repos", + "list_models", + "list_organization_members", + "list_papers", + "list_pending_access_requests", + "list_rejected_access_requests", + "list_repo_commits", + "list_repo_files", + "list_repo_likers", + "list_repo_refs", + "list_repo_tree", + "list_spaces", + "list_user_followers", + "list_user_following", + "list_webhooks", + "merge_pull_request", + "model_info", + "move_repo", + "paper_info", + "parse_safetensors_file_metadata", + "pause_inference_endpoint", + "pause_space", + "preupload_lfs_files", + "reject_access_request", + "rename_discussion", + "repo_exists", + "repo_info", + "repo_type_and_id_from_hf_id", + "request_space_hardware", + "request_space_storage", + "restart_space", + "resume_inference_endpoint", + "revision_exists", + "run_as_future", + "scale_to_zero_inference_endpoint", + "set_space_sleep_time", + "space_info", + "super_squash_history", + "unlike", + "update_collection_item", + "update_collection_metadata", + "update_inference_endpoint", + "update_repo_settings", + "update_repo_visibility", + "update_webhook", + "upload_file", + "upload_folder", + "upload_large_folder", + "whoami", + ], + "hf_file_system": [ + "HfFileSystem", + "HfFileSystemFile", + "HfFileSystemResolvedPath", + "HfFileSystemStreamFile", + ], + "hub_mixin": [ + "ModelHubMixin", + "PyTorchModelHubMixin", + ], + "inference._client": [ + "InferenceClient", + "InferenceTimeoutError", + ], + "inference._generated._async_client": [ + "AsyncInferenceClient", + ], + "inference._generated.types": [ + "AudioClassificationInput", + "AudioClassificationOutputElement", + "AudioClassificationOutputTransform", + "AudioClassificationParameters", + "AudioToAudioInput", + "AudioToAudioOutputElement", + "AutomaticSpeechRecognitionEarlyStoppingEnum", + "AutomaticSpeechRecognitionGenerationParameters", + "AutomaticSpeechRecognitionInput", + "AutomaticSpeechRecognitionOutput", + "AutomaticSpeechRecognitionOutputChunk", + "AutomaticSpeechRecognitionParameters", + "ChatCompletionInput", + "ChatCompletionInputFunctionDefinition", + "ChatCompletionInputFunctionName", + "ChatCompletionInputGrammarType", + "ChatCompletionInputGrammarTypeType", + "ChatCompletionInputMessage", + "ChatCompletionInputMessageChunk", + "ChatCompletionInputMessageChunkType", + "ChatCompletionInputStreamOptions", + "ChatCompletionInputTool", + "ChatCompletionInputToolChoiceClass", + "ChatCompletionInputToolChoiceEnum", + "ChatCompletionInputURL", + "ChatCompletionOutput", + "ChatCompletionOutputComplete", + "ChatCompletionOutputFunctionDefinition", + "ChatCompletionOutputLogprob", + "ChatCompletionOutputLogprobs", + "ChatCompletionOutputMessage", + "ChatCompletionOutputToolCall", + "ChatCompletionOutputTopLogprob", + "ChatCompletionOutputUsage", + "ChatCompletionStreamOutput", + "ChatCompletionStreamOutputChoice", + "ChatCompletionStreamOutputDelta", + "ChatCompletionStreamOutputDeltaToolCall", + "ChatCompletionStreamOutputFunction", + "ChatCompletionStreamOutputLogprob", + "ChatCompletionStreamOutputLogprobs", + "ChatCompletionStreamOutputTopLogprob", + "ChatCompletionStreamOutputUsage", + "DepthEstimationInput", + "DepthEstimationOutput", + "DocumentQuestionAnsweringInput", + "DocumentQuestionAnsweringInputData", + "DocumentQuestionAnsweringOutputElement", + "DocumentQuestionAnsweringParameters", + "FeatureExtractionInput", + "FeatureExtractionInputTruncationDirection", + "FillMaskInput", + "FillMaskOutputElement", + "FillMaskParameters", + "ImageClassificationInput", + "ImageClassificationOutputElement", + "ImageClassificationOutputTransform", + "ImageClassificationParameters", + "ImageSegmentationInput", + "ImageSegmentationOutputElement", + "ImageSegmentationParameters", + "ImageSegmentationSubtask", + "ImageToImageInput", + "ImageToImageOutput", + "ImageToImageParameters", + "ImageToImageTargetSize", + "ImageToTextEarlyStoppingEnum", + "ImageToTextGenerationParameters", + "ImageToTextInput", + "ImageToTextOutput", + "ImageToTextParameters", + "ObjectDetectionBoundingBox", + "ObjectDetectionInput", + "ObjectDetectionOutputElement", + "ObjectDetectionParameters", + "Padding", + "QuestionAnsweringInput", + "QuestionAnsweringInputData", + "QuestionAnsweringOutputElement", + "QuestionAnsweringParameters", + "SentenceSimilarityInput", + "SentenceSimilarityInputData", + "SummarizationInput", + "SummarizationOutput", + "SummarizationParameters", + "SummarizationTruncationStrategy", + "TableQuestionAnsweringInput", + "TableQuestionAnsweringInputData", + "TableQuestionAnsweringOutputElement", + "TableQuestionAnsweringParameters", + "Text2TextGenerationInput", + "Text2TextGenerationOutput", + "Text2TextGenerationParameters", + "Text2TextGenerationTruncationStrategy", + "TextClassificationInput", + "TextClassificationOutputElement", + "TextClassificationOutputTransform", + "TextClassificationParameters", + "TextGenerationInput", + "TextGenerationInputGenerateParameters", + "TextGenerationInputGrammarType", + "TextGenerationOutput", + "TextGenerationOutputBestOfSequence", + "TextGenerationOutputDetails", + "TextGenerationOutputFinishReason", + "TextGenerationOutputPrefillToken", + "TextGenerationOutputToken", + "TextGenerationStreamOutput", + "TextGenerationStreamOutputStreamDetails", + "TextGenerationStreamOutputToken", + "TextToAudioEarlyStoppingEnum", + "TextToAudioGenerationParameters", + "TextToAudioInput", + "TextToAudioOutput", + "TextToAudioParameters", + "TextToImageInput", + "TextToImageOutput", + "TextToImageParameters", + "TextToImageTargetSize", + "TextToSpeechEarlyStoppingEnum", + "TextToSpeechGenerationParameters", + "TextToSpeechInput", + "TextToSpeechOutput", + "TextToSpeechParameters", + "TokenClassificationAggregationStrategy", + "TokenClassificationInput", + "TokenClassificationOutputElement", + "TokenClassificationParameters", + "TranslationInput", + "TranslationOutput", + "TranslationParameters", + "TranslationTruncationStrategy", + "TypeEnum", + "VideoClassificationInput", + "VideoClassificationOutputElement", + "VideoClassificationOutputTransform", + "VideoClassificationParameters", + "VisualQuestionAnsweringInput", + "VisualQuestionAnsweringInputData", + "VisualQuestionAnsweringOutputElement", + "VisualQuestionAnsweringParameters", + "ZeroShotClassificationInput", + "ZeroShotClassificationOutputElement", + "ZeroShotClassificationParameters", + "ZeroShotImageClassificationInput", + "ZeroShotImageClassificationOutputElement", + "ZeroShotImageClassificationParameters", + "ZeroShotObjectDetectionBoundingBox", + "ZeroShotObjectDetectionInput", + "ZeroShotObjectDetectionOutputElement", + "ZeroShotObjectDetectionParameters", + ], + "inference_api": [ + "InferenceApi", + ], + "keras_mixin": [ + "KerasModelHubMixin", + "from_pretrained_keras", + "push_to_hub_keras", + "save_pretrained_keras", + ], + "repocard": [ + "DatasetCard", + "ModelCard", + "RepoCard", + "SpaceCard", + "metadata_eval_result", + "metadata_load", + "metadata_save", + "metadata_update", + ], + "repocard_data": [ + "CardData", + "DatasetCardData", + "EvalResult", + "ModelCardData", + "SpaceCardData", + ], + "repository": [ + "Repository", + ], + "serialization": [ + "StateDictSplit", + "get_tf_storage_size", + "get_torch_storage_id", + "get_torch_storage_size", + "load_state_dict_from_file", + "load_torch_model", + "save_torch_model", + "save_torch_state_dict", + "split_state_dict_into_shards_factory", + "split_tf_state_dict_into_shards", + "split_torch_state_dict_into_shards", + ], + "serialization._dduf": [ + "DDUFEntry", + "export_entries_as_dduf", + "export_folder_as_dduf", + "read_dduf_file", + ], + "utils": [ + "CacheNotFound", + "CachedFileInfo", + "CachedRepoInfo", + "CachedRevisionInfo", + "CorruptedCacheException", + "DeleteCacheStrategy", + "HFCacheInfo", + "HfFolder", + "cached_assets_path", + "configure_http_backend", + "dump_environment_info", + "get_session", + "get_token", + "logging", + "scan_cache_dir", + ], +} + + +def _attach(package_name, submodules=None, submod_attrs=None): + """Attach lazily loaded submodules, functions, or other attributes. + + Typically, modules import submodules and attributes as follows: + + ```py + import mysubmodule + import anothersubmodule + + from .foo import someattr + ``` + + The idea is to replace a package's `__getattr__`, `__dir__`, and + `__all__`, such that all imports work exactly the way they would + with normal imports, except that the import occurs upon first use. + + The typical way to call this function, replacing the above imports, is: + + ```python + __getattr__, __dir__, __all__ = lazy.attach( + __name__, + ['mysubmodule', 'anothersubmodule'], + {'foo': ['someattr']} + ) + ``` + This functionality requires Python 3.7 or higher. + + Args: + package_name (`str`): + Typically use `__name__`. + submodules (`set`): + List of submodules to attach. + submod_attrs (`dict`): + Dictionary of submodule -> list of attributes / functions. + These attributes are imported as they are used. + + Returns: + __getattr__, __dir__, __all__ + + """ + if submod_attrs is None: + submod_attrs = {} + + if submodules is None: + submodules = set() + else: + submodules = set(submodules) + + attr_to_modules = {attr: mod for mod, attrs in submod_attrs.items() for attr in attrs} + + __all__ = list(submodules | attr_to_modules.keys()) + + def __getattr__(name): + if name in submodules: + try: + return importlib.import_module(f"{package_name}.{name}") + except Exception as e: + print(f"Error importing {package_name}.{name}: {e}") + raise + elif name in attr_to_modules: + submod_path = f"{package_name}.{attr_to_modules[name]}" + try: + submod = importlib.import_module(submod_path) + except Exception as e: + print(f"Error importing {submod_path}: {e}") + raise + attr = getattr(submod, name) + + # If the attribute lives in a file (module) with the same + # name as the attribute, ensure that the attribute and *not* + # the module is accessible on the package. + if name == attr_to_modules[name]: + pkg = sys.modules[package_name] + pkg.__dict__[name] = attr + + return attr + else: + raise AttributeError(f"No {package_name} attribute {name}") + + def __dir__(): + return __all__ + + return __getattr__, __dir__, list(__all__) + + +__getattr__, __dir__, __all__ = _attach(__name__, submodules=[], submod_attrs=_SUBMOD_ATTRS) + +if os.environ.get("EAGER_IMPORT", ""): + for attr in __all__: + __getattr__(attr) + +# WARNING: any content below this statement is generated automatically. Any manual edit +# will be lost when re-generating this file ! +# +# To update the static imports, please run the following command and commit the changes. +# ``` +# # Use script +# python utils/check_static_imports.py --update-file +# +# # Or run style on codebase +# make style +# ``` +if TYPE_CHECKING: # pragma: no cover + from ._commit_scheduler import CommitScheduler # noqa: F401 + from ._inference_endpoints import ( + InferenceEndpoint, # noqa: F401 + InferenceEndpointError, # noqa: F401 + InferenceEndpointStatus, # noqa: F401 + InferenceEndpointTimeoutError, # noqa: F401 + InferenceEndpointType, # noqa: F401 + ) + from ._login import ( + auth_list, # noqa: F401 + auth_switch, # noqa: F401 + interpreter_login, # noqa: F401 + login, # noqa: F401 + logout, # noqa: F401 + notebook_login, # noqa: F401 + ) + from ._snapshot_download import snapshot_download # noqa: F401 + from ._space_api import ( + SpaceHardware, # noqa: F401 + SpaceRuntime, # noqa: F401 + SpaceStage, # noqa: F401 + SpaceStorage, # noqa: F401 + SpaceVariable, # noqa: F401 + ) + from ._tensorboard_logger import HFSummaryWriter # noqa: F401 + from ._webhooks_payload import ( + WebhookPayload, # noqa: F401 + WebhookPayloadComment, # noqa: F401 + WebhookPayloadDiscussion, # noqa: F401 + WebhookPayloadDiscussionChanges, # noqa: F401 + WebhookPayloadEvent, # noqa: F401 + WebhookPayloadMovedTo, # noqa: F401 + WebhookPayloadRepo, # noqa: F401 + WebhookPayloadUrl, # noqa: F401 + WebhookPayloadWebhook, # noqa: F401 + ) + from ._webhooks_server import ( + WebhooksServer, # noqa: F401 + webhook_endpoint, # noqa: F401 + ) + from .community import ( + Discussion, # noqa: F401 + DiscussionComment, # noqa: F401 + DiscussionCommit, # noqa: F401 + DiscussionEvent, # noqa: F401 + DiscussionStatusChange, # noqa: F401 + DiscussionTitleChange, # noqa: F401 + DiscussionWithDetails, # noqa: F401 + ) + from .constants import ( + CONFIG_NAME, # noqa: F401 + FLAX_WEIGHTS_NAME, # noqa: F401 + HUGGINGFACE_CO_URL_HOME, # noqa: F401 + HUGGINGFACE_CO_URL_TEMPLATE, # noqa: F401 + PYTORCH_WEIGHTS_NAME, # noqa: F401 + REPO_TYPE_DATASET, # noqa: F401 + REPO_TYPE_MODEL, # noqa: F401 + REPO_TYPE_SPACE, # noqa: F401 + TF2_WEIGHTS_NAME, # noqa: F401 + TF_WEIGHTS_NAME, # noqa: F401 + ) + from .fastai_utils import ( + _save_pretrained_fastai, # noqa: F401 + from_pretrained_fastai, # noqa: F401 + push_to_hub_fastai, # noqa: F401 + ) + from .file_download import ( + _CACHED_NO_EXIST, # noqa: F401 + HfFileMetadata, # noqa: F401 + get_hf_file_metadata, # noqa: F401 + hf_hub_download, # noqa: F401 + hf_hub_url, # noqa: F401 + try_to_load_from_cache, # noqa: F401 + ) + from .hf_api import ( + Collection, # noqa: F401 + CollectionItem, # noqa: F401 + CommitInfo, # noqa: F401 + CommitOperation, # noqa: F401 + CommitOperationAdd, # noqa: F401 + CommitOperationCopy, # noqa: F401 + CommitOperationDelete, # noqa: F401 + DatasetInfo, # noqa: F401 + GitCommitInfo, # noqa: F401 + GitRefInfo, # noqa: F401 + GitRefs, # noqa: F401 + HfApi, # noqa: F401 + ModelInfo, # noqa: F401 + RepoUrl, # noqa: F401 + SpaceInfo, # noqa: F401 + User, # noqa: F401 + UserLikes, # noqa: F401 + WebhookInfo, # noqa: F401 + WebhookWatchedItem, # noqa: F401 + accept_access_request, # noqa: F401 + add_collection_item, # noqa: F401 + add_space_secret, # noqa: F401 + add_space_variable, # noqa: F401 + auth_check, # noqa: F401 + cancel_access_request, # noqa: F401 + change_discussion_status, # noqa: F401 + comment_discussion, # noqa: F401 + create_branch, # noqa: F401 + create_collection, # noqa: F401 + create_commit, # noqa: F401 + create_discussion, # noqa: F401 + create_inference_endpoint, # noqa: F401 + create_pull_request, # noqa: F401 + create_repo, # noqa: F401 + create_tag, # noqa: F401 + create_webhook, # noqa: F401 + dataset_info, # noqa: F401 + delete_branch, # noqa: F401 + delete_collection, # noqa: F401 + delete_collection_item, # noqa: F401 + delete_file, # noqa: F401 + delete_folder, # noqa: F401 + delete_inference_endpoint, # noqa: F401 + delete_repo, # noqa: F401 + delete_space_secret, # noqa: F401 + delete_space_storage, # noqa: F401 + delete_space_variable, # noqa: F401 + delete_tag, # noqa: F401 + delete_webhook, # noqa: F401 + disable_webhook, # noqa: F401 + duplicate_space, # noqa: F401 + edit_discussion_comment, # noqa: F401 + enable_webhook, # noqa: F401 + file_exists, # noqa: F401 + get_collection, # noqa: F401 + get_dataset_tags, # noqa: F401 + get_discussion_details, # noqa: F401 + get_full_repo_name, # noqa: F401 + get_inference_endpoint, # noqa: F401 + get_model_tags, # noqa: F401 + get_paths_info, # noqa: F401 + get_repo_discussions, # noqa: F401 + get_safetensors_metadata, # noqa: F401 + get_space_runtime, # noqa: F401 + get_space_variables, # noqa: F401 + get_token_permission, # noqa: F401 + get_user_overview, # noqa: F401 + get_webhook, # noqa: F401 + grant_access, # noqa: F401 + like, # noqa: F401 + list_accepted_access_requests, # noqa: F401 + list_collections, # noqa: F401 + list_datasets, # noqa: F401 + list_inference_endpoints, # noqa: F401 + list_liked_repos, # noqa: F401 + list_models, # noqa: F401 + list_organization_members, # noqa: F401 + list_papers, # noqa: F401 + list_pending_access_requests, # noqa: F401 + list_rejected_access_requests, # noqa: F401 + list_repo_commits, # noqa: F401 + list_repo_files, # noqa: F401 + list_repo_likers, # noqa: F401 + list_repo_refs, # noqa: F401 + list_repo_tree, # noqa: F401 + list_spaces, # noqa: F401 + list_user_followers, # noqa: F401 + list_user_following, # noqa: F401 + list_webhooks, # noqa: F401 + merge_pull_request, # noqa: F401 + model_info, # noqa: F401 + move_repo, # noqa: F401 + paper_info, # noqa: F401 + parse_safetensors_file_metadata, # noqa: F401 + pause_inference_endpoint, # noqa: F401 + pause_space, # noqa: F401 + preupload_lfs_files, # noqa: F401 + reject_access_request, # noqa: F401 + rename_discussion, # noqa: F401 + repo_exists, # noqa: F401 + repo_info, # noqa: F401 + repo_type_and_id_from_hf_id, # noqa: F401 + request_space_hardware, # noqa: F401 + request_space_storage, # noqa: F401 + restart_space, # noqa: F401 + resume_inference_endpoint, # noqa: F401 + revision_exists, # noqa: F401 + run_as_future, # noqa: F401 + scale_to_zero_inference_endpoint, # noqa: F401 + set_space_sleep_time, # noqa: F401 + space_info, # noqa: F401 + super_squash_history, # noqa: F401 + unlike, # noqa: F401 + update_collection_item, # noqa: F401 + update_collection_metadata, # noqa: F401 + update_inference_endpoint, # noqa: F401 + update_repo_settings, # noqa: F401 + update_repo_visibility, # noqa: F401 + update_webhook, # noqa: F401 + upload_file, # noqa: F401 + upload_folder, # noqa: F401 + upload_large_folder, # noqa: F401 + whoami, # noqa: F401 + ) + from .hf_file_system import ( + HfFileSystem, # noqa: F401 + HfFileSystemFile, # noqa: F401 + HfFileSystemResolvedPath, # noqa: F401 + HfFileSystemStreamFile, # noqa: F401 + ) + from .hub_mixin import ( + ModelHubMixin, # noqa: F401 + PyTorchModelHubMixin, # noqa: F401 + ) + from .inference._client import ( + InferenceClient, # noqa: F401 + InferenceTimeoutError, # noqa: F401 + ) + from .inference._generated._async_client import AsyncInferenceClient # noqa: F401 + from .inference._generated.types import ( + AudioClassificationInput, # noqa: F401 + AudioClassificationOutputElement, # noqa: F401 + AudioClassificationOutputTransform, # noqa: F401 + AudioClassificationParameters, # noqa: F401 + AudioToAudioInput, # noqa: F401 + AudioToAudioOutputElement, # noqa: F401 + AutomaticSpeechRecognitionEarlyStoppingEnum, # noqa: F401 + AutomaticSpeechRecognitionGenerationParameters, # noqa: F401 + AutomaticSpeechRecognitionInput, # noqa: F401 + AutomaticSpeechRecognitionOutput, # noqa: F401 + AutomaticSpeechRecognitionOutputChunk, # noqa: F401 + AutomaticSpeechRecognitionParameters, # noqa: F401 + ChatCompletionInput, # noqa: F401 + ChatCompletionInputFunctionDefinition, # noqa: F401 + ChatCompletionInputFunctionName, # noqa: F401 + ChatCompletionInputGrammarType, # noqa: F401 + ChatCompletionInputGrammarTypeType, # noqa: F401 + ChatCompletionInputMessage, # noqa: F401 + ChatCompletionInputMessageChunk, # noqa: F401 + ChatCompletionInputMessageChunkType, # noqa: F401 + ChatCompletionInputStreamOptions, # noqa: F401 + ChatCompletionInputTool, # noqa: F401 + ChatCompletionInputToolChoiceClass, # noqa: F401 + ChatCompletionInputToolChoiceEnum, # noqa: F401 + ChatCompletionInputURL, # noqa: F401 + ChatCompletionOutput, # noqa: F401 + ChatCompletionOutputComplete, # noqa: F401 + ChatCompletionOutputFunctionDefinition, # noqa: F401 + ChatCompletionOutputLogprob, # noqa: F401 + ChatCompletionOutputLogprobs, # noqa: F401 + ChatCompletionOutputMessage, # noqa: F401 + ChatCompletionOutputToolCall, # noqa: F401 + ChatCompletionOutputTopLogprob, # noqa: F401 + ChatCompletionOutputUsage, # noqa: F401 + ChatCompletionStreamOutput, # noqa: F401 + ChatCompletionStreamOutputChoice, # noqa: F401 + ChatCompletionStreamOutputDelta, # noqa: F401 + ChatCompletionStreamOutputDeltaToolCall, # noqa: F401 + ChatCompletionStreamOutputFunction, # noqa: F401 + ChatCompletionStreamOutputLogprob, # noqa: F401 + ChatCompletionStreamOutputLogprobs, # noqa: F401 + ChatCompletionStreamOutputTopLogprob, # noqa: F401 + ChatCompletionStreamOutputUsage, # noqa: F401 + DepthEstimationInput, # noqa: F401 + DepthEstimationOutput, # noqa: F401 + DocumentQuestionAnsweringInput, # noqa: F401 + DocumentQuestionAnsweringInputData, # noqa: F401 + DocumentQuestionAnsweringOutputElement, # noqa: F401 + DocumentQuestionAnsweringParameters, # noqa: F401 + FeatureExtractionInput, # noqa: F401 + FeatureExtractionInputTruncationDirection, # noqa: F401 + FillMaskInput, # noqa: F401 + FillMaskOutputElement, # noqa: F401 + FillMaskParameters, # noqa: F401 + ImageClassificationInput, # noqa: F401 + ImageClassificationOutputElement, # noqa: F401 + ImageClassificationOutputTransform, # noqa: F401 + ImageClassificationParameters, # noqa: F401 + ImageSegmentationInput, # noqa: F401 + ImageSegmentationOutputElement, # noqa: F401 + ImageSegmentationParameters, # noqa: F401 + ImageSegmentationSubtask, # noqa: F401 + ImageToImageInput, # noqa: F401 + ImageToImageOutput, # noqa: F401 + ImageToImageParameters, # noqa: F401 + ImageToImageTargetSize, # noqa: F401 + ImageToTextEarlyStoppingEnum, # noqa: F401 + ImageToTextGenerationParameters, # noqa: F401 + ImageToTextInput, # noqa: F401 + ImageToTextOutput, # noqa: F401 + ImageToTextParameters, # noqa: F401 + ObjectDetectionBoundingBox, # noqa: F401 + ObjectDetectionInput, # noqa: F401 + ObjectDetectionOutputElement, # noqa: F401 + ObjectDetectionParameters, # noqa: F401 + Padding, # noqa: F401 + QuestionAnsweringInput, # noqa: F401 + QuestionAnsweringInputData, # noqa: F401 + QuestionAnsweringOutputElement, # noqa: F401 + QuestionAnsweringParameters, # noqa: F401 + SentenceSimilarityInput, # noqa: F401 + SentenceSimilarityInputData, # noqa: F401 + SummarizationInput, # noqa: F401 + SummarizationOutput, # noqa: F401 + SummarizationParameters, # noqa: F401 + SummarizationTruncationStrategy, # noqa: F401 + TableQuestionAnsweringInput, # noqa: F401 + TableQuestionAnsweringInputData, # noqa: F401 + TableQuestionAnsweringOutputElement, # noqa: F401 + TableQuestionAnsweringParameters, # noqa: F401 + Text2TextGenerationInput, # noqa: F401 + Text2TextGenerationOutput, # noqa: F401 + Text2TextGenerationParameters, # noqa: F401 + Text2TextGenerationTruncationStrategy, # noqa: F401 + TextClassificationInput, # noqa: F401 + TextClassificationOutputElement, # noqa: F401 + TextClassificationOutputTransform, # noqa: F401 + TextClassificationParameters, # noqa: F401 + TextGenerationInput, # noqa: F401 + TextGenerationInputGenerateParameters, # noqa: F401 + TextGenerationInputGrammarType, # noqa: F401 + TextGenerationOutput, # noqa: F401 + TextGenerationOutputBestOfSequence, # noqa: F401 + TextGenerationOutputDetails, # noqa: F401 + TextGenerationOutputFinishReason, # noqa: F401 + TextGenerationOutputPrefillToken, # noqa: F401 + TextGenerationOutputToken, # noqa: F401 + TextGenerationStreamOutput, # noqa: F401 + TextGenerationStreamOutputStreamDetails, # noqa: F401 + TextGenerationStreamOutputToken, # noqa: F401 + TextToAudioEarlyStoppingEnum, # noqa: F401 + TextToAudioGenerationParameters, # noqa: F401 + TextToAudioInput, # noqa: F401 + TextToAudioOutput, # noqa: F401 + TextToAudioParameters, # noqa: F401 + TextToImageInput, # noqa: F401 + TextToImageOutput, # noqa: F401 + TextToImageParameters, # noqa: F401 + TextToImageTargetSize, # noqa: F401 + TextToSpeechEarlyStoppingEnum, # noqa: F401 + TextToSpeechGenerationParameters, # noqa: F401 + TextToSpeechInput, # noqa: F401 + TextToSpeechOutput, # noqa: F401 + TextToSpeechParameters, # noqa: F401 + TokenClassificationAggregationStrategy, # noqa: F401 + TokenClassificationInput, # noqa: F401 + TokenClassificationOutputElement, # noqa: F401 + TokenClassificationParameters, # noqa: F401 + TranslationInput, # noqa: F401 + TranslationOutput, # noqa: F401 + TranslationParameters, # noqa: F401 + TranslationTruncationStrategy, # noqa: F401 + TypeEnum, # noqa: F401 + VideoClassificationInput, # noqa: F401 + VideoClassificationOutputElement, # noqa: F401 + VideoClassificationOutputTransform, # noqa: F401 + VideoClassificationParameters, # noqa: F401 + VisualQuestionAnsweringInput, # noqa: F401 + VisualQuestionAnsweringInputData, # noqa: F401 + VisualQuestionAnsweringOutputElement, # noqa: F401 + VisualQuestionAnsweringParameters, # noqa: F401 + ZeroShotClassificationInput, # noqa: F401 + ZeroShotClassificationOutputElement, # noqa: F401 + ZeroShotClassificationParameters, # noqa: F401 + ZeroShotImageClassificationInput, # noqa: F401 + ZeroShotImageClassificationOutputElement, # noqa: F401 + ZeroShotImageClassificationParameters, # noqa: F401 + ZeroShotObjectDetectionBoundingBox, # noqa: F401 + ZeroShotObjectDetectionInput, # noqa: F401 + ZeroShotObjectDetectionOutputElement, # noqa: F401 + ZeroShotObjectDetectionParameters, # noqa: F401 + ) + from .inference_api import InferenceApi # noqa: F401 + from .keras_mixin import ( + KerasModelHubMixin, # noqa: F401 + from_pretrained_keras, # noqa: F401 + push_to_hub_keras, # noqa: F401 + save_pretrained_keras, # noqa: F401 + ) + from .repocard import ( + DatasetCard, # noqa: F401 + ModelCard, # noqa: F401 + RepoCard, # noqa: F401 + SpaceCard, # noqa: F401 + metadata_eval_result, # noqa: F401 + metadata_load, # noqa: F401 + metadata_save, # noqa: F401 + metadata_update, # noqa: F401 + ) + from .repocard_data import ( + CardData, # noqa: F401 + DatasetCardData, # noqa: F401 + EvalResult, # noqa: F401 + ModelCardData, # noqa: F401 + SpaceCardData, # noqa: F401 + ) + from .repository import Repository # noqa: F401 + from .serialization import ( + StateDictSplit, # noqa: F401 + get_tf_storage_size, # noqa: F401 + get_torch_storage_id, # noqa: F401 + get_torch_storage_size, # noqa: F401 + load_state_dict_from_file, # noqa: F401 + load_torch_model, # noqa: F401 + save_torch_model, # noqa: F401 + save_torch_state_dict, # noqa: F401 + split_state_dict_into_shards_factory, # noqa: F401 + split_tf_state_dict_into_shards, # noqa: F401 + split_torch_state_dict_into_shards, # noqa: F401 + ) + from .serialization._dduf import ( + DDUFEntry, # noqa: F401 + export_entries_as_dduf, # noqa: F401 + export_folder_as_dduf, # noqa: F401 + read_dduf_file, # noqa: F401 + ) + from .utils import ( + CachedFileInfo, # noqa: F401 + CachedRepoInfo, # noqa: F401 + CachedRevisionInfo, # noqa: F401 + CacheNotFound, # noqa: F401 + CorruptedCacheException, # noqa: F401 + DeleteCacheStrategy, # noqa: F401 + HFCacheInfo, # noqa: F401 + HfFolder, # noqa: F401 + cached_assets_path, # noqa: F401 + configure_http_backend, # noqa: F401 + dump_environment_info, # noqa: F401 + get_session, # noqa: F401 + get_token, # noqa: F401 + logging, # noqa: F401 + scan_cache_dir, # noqa: F401 + ) diff --git a/meow/lib/python3.13/site-packages/huggingface_hub/_commit_scheduler.py b/meow/lib/python3.13/site-packages/huggingface_hub/_commit_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..f1f20339e7df2d17588623dc13bb3c6be6a46b53 --- /dev/null +++ b/meow/lib/python3.13/site-packages/huggingface_hub/_commit_scheduler.py @@ -0,0 +1,353 @@ +import atexit +import logging +import os +import time +from concurrent.futures import Future +from dataclasses import dataclass +from io import SEEK_END, SEEK_SET, BytesIO +from pathlib import Path +from threading import Lock, Thread +from typing import Dict, List, Optional, Union + +from .hf_api import DEFAULT_IGNORE_PATTERNS, CommitInfo, CommitOperationAdd, HfApi +from .utils import filter_repo_objects + + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class _FileToUpload: + """Temporary dataclass to store info about files to upload. Not meant to be used directly.""" + + local_path: Path + path_in_repo: str + size_limit: int + last_modified: float + + +class CommitScheduler: + """ + Scheduler to upload a local folder to the Hub at regular intervals (e.g. push to hub every 5 minutes). + + The recommended way to use the scheduler is to use it as a context manager. This ensures that the scheduler is + properly stopped and the last commit is triggered when the script ends. The scheduler can also be stopped manually + with the `stop` method. Checkout the [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#scheduled-uploads) + to learn more about how to use it. + + Args: + repo_id (`str`): + The id of the repo to commit to. + folder_path (`str` or `Path`): + Path to the local folder to upload regularly. + every (`int` or `float`, *optional*): + The number of minutes between each commit. Defaults to 5 minutes. + path_in_repo (`str`, *optional*): + Relative path of the directory in the repo, for example: `"checkpoints/"`. Defaults to the root folder + of the repository. + repo_type (`str`, *optional*): + The type of the repo to commit to. Defaults to `model`. + revision (`str`, *optional*): + The revision of the repo to commit to. Defaults to `main`. + private (`bool`, *optional*): + Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists. + token (`str`, *optional*): + The token to use to commit to the repo. Defaults to the token saved on the machine. + allow_patterns (`List[str]` or `str`, *optional*): + If provided, only files matching at least one pattern are uploaded. + ignore_patterns (`List[str]` or `str`, *optional*): + If provided, files matching any of the patterns are not uploaded. + squash_history (`bool`, *optional*): + Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is + useful to avoid degraded performances on the repo when it grows too large. + hf_api (`HfApi`, *optional*): + The [`HfApi`] client to use to commit to the Hub. Can be set with custom settings (user agent, token,...). + + Example: + ```py + >>> from pathlib import Path + >>> from huggingface_hub import CommitScheduler + + # Scheduler uploads every 10 minutes + >>> csv_path = Path("watched_folder/data.csv") + >>> CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path=csv_path.parent, every=10) + + >>> with csv_path.open("a") as f: + ... f.write("first line") + + # Some time later (...) + >>> with csv_path.open("a") as f: + ... f.write("second line") + ``` + + Example using a context manager: + ```py + >>> from pathlib import Path + >>> from huggingface_hub import CommitScheduler + + >>> with CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path="watched_folder", every=10) as scheduler: + ... csv_path = Path("watched_folder/data.csv") + ... with csv_path.open("a") as f: + ... f.write("first line") + ... (...) + ... with csv_path.open("a") as f: + ... f.write("second line") + + # Scheduler is now stopped and last commit have been triggered + ``` + """ + + def __init__( + self, + *, + repo_id: str, + folder_path: Union[str, Path], + every: Union[int, float] = 5, + path_in_repo: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + private: Optional[bool] = None, + token: Optional[str] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + squash_history: bool = False, + hf_api: Optional["HfApi"] = None, + ) -> None: + self.api = hf_api or HfApi(token=token) + + # Folder + self.folder_path = Path(folder_path).expanduser().resolve() + self.path_in_repo = path_in_repo or "" + self.allow_patterns = allow_patterns + + if ignore_patterns is None: + ignore_patterns = [] + elif isinstance(ignore_patterns, str): + ignore_patterns = [ignore_patterns] + self.ignore_patterns = ignore_patterns + DEFAULT_IGNORE_PATTERNS + + if self.folder_path.is_file(): + raise ValueError(f"'folder_path' must be a directory, not a file: '{self.folder_path}'.") + self.folder_path.mkdir(parents=True, exist_ok=True) + + # Repository + repo_url = self.api.create_repo(repo_id=repo_id, private=private, repo_type=repo_type, exist_ok=True) + self.repo_id = repo_url.repo_id + self.repo_type = repo_type + self.revision = revision + self.token = token + + # Keep track of already uploaded files + self.last_uploaded: Dict[Path, float] = {} # key is local path, value is timestamp + + # Scheduler + if not every > 0: + raise ValueError(f"'every' must be a positive integer, not '{every}'.") + self.lock = Lock() + self.every = every + self.squash_history = squash_history + + logger.info(f"Scheduled job to push '{self.folder_path}' to '{self.repo_id}' every {self.every} minutes.") + self._scheduler_thread = Thread(target=self._run_scheduler, daemon=True) + self._scheduler_thread.start() + atexit.register(self._push_to_hub) + + self.__stopped = False + + def stop(self) -> None: + """Stop the scheduler. + + A stopped scheduler cannot be restarted. Mostly for tests purposes. + """ + self.__stopped = True + + def __enter__(self) -> "CommitScheduler": + return self + + def __exit__(self, exc_type, exc_value, traceback) -> None: + # Upload last changes before exiting + self.trigger().result() + self.stop() + return + + def _run_scheduler(self) -> None: + """Dumb thread waiting between each scheduled push to Hub.""" + while True: + self.last_future = self.trigger() + time.sleep(self.every * 60) + if self.__stopped: + break + + def trigger(self) -> Future: + """Trigger a `push_to_hub` and return a future. + + This method is automatically called every `every` minutes. You can also call it manually to trigger a commit + immediately, without waiting for the next scheduled commit. + """ + return self.api.run_as_future(self._push_to_hub) + + def _push_to_hub(self) -> Optional[CommitInfo]: + if self.__stopped: # If stopped, already scheduled commits are ignored + return None + + logger.info("(Background) scheduled commit triggered.") + try: + value = self.push_to_hub() + if self.squash_history: + logger.info("(Background) squashing repo history.") + self.api.super_squash_history(repo_id=self.repo_id, repo_type=self.repo_type, branch=self.revision) + return value + except Exception as e: + logger.error(f"Error while pushing to Hub: {e}") # Depending on the setup, error might be silenced + raise + + def push_to_hub(self) -> Optional[CommitInfo]: + """ + Push folder to the Hub and return the commit info. + + + + This method is not meant to be called directly. It is run in the background by the scheduler, respecting a + queue mechanism to avoid concurrent commits. Making a direct call to the method might lead to concurrency + issues. + + + + The default behavior of `push_to_hub` is to assume an append-only folder. It lists all files in the folder and + uploads only changed files. If no changes are found, the method returns without committing anything. If you want + to change this behavior, you can inherit from [`CommitScheduler`] and override this method. This can be useful + for example to compress data together in a single file before committing. For more details and examples, check + out our [integration guide](https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#scheduled-uploads). + """ + # Check files to upload (with lock) + with self.lock: + logger.debug("Listing files to upload for scheduled commit.") + + # List files from folder (taken from `_prepare_upload_folder_additions`) + relpath_to_abspath = { + path.relative_to(self.folder_path).as_posix(): path + for path in sorted(self.folder_path.glob("**/*")) # sorted to be deterministic + if path.is_file() + } + prefix = f"{self.path_in_repo.strip('/')}/" if self.path_in_repo else "" + + # Filter with pattern + filter out unchanged files + retrieve current file size + files_to_upload: List[_FileToUpload] = [] + for relpath in filter_repo_objects( + relpath_to_abspath.keys(), allow_patterns=self.allow_patterns, ignore_patterns=self.ignore_patterns + ): + local_path = relpath_to_abspath[relpath] + stat = local_path.stat() + if self.last_uploaded.get(local_path) is None or self.last_uploaded[local_path] != stat.st_mtime: + files_to_upload.append( + _FileToUpload( + local_path=local_path, + path_in_repo=prefix + relpath, + size_limit=stat.st_size, + last_modified=stat.st_mtime, + ) + ) + + # Return if nothing to upload + if len(files_to_upload) == 0: + logger.debug("Dropping schedule commit: no changed file to upload.") + return None + + # Convert `_FileToUpload` as `CommitOperationAdd` (=> compute file shas + limit to file size) + logger.debug("Removing unchanged files since previous scheduled commit.") + add_operations = [ + CommitOperationAdd( + # Cap the file to its current size, even if the user append data to it while a scheduled commit is happening + path_or_fileobj=PartialFileIO(file_to_upload.local_path, size_limit=file_to_upload.size_limit), + path_in_repo=file_to_upload.path_in_repo, + ) + for file_to_upload in files_to_upload + ] + + # Upload files (append mode expected - no need for lock) + logger.debug("Uploading files for scheduled commit.") + commit_info = self.api.create_commit( + repo_id=self.repo_id, + repo_type=self.repo_type, + operations=add_operations, + commit_message="Scheduled Commit", + revision=self.revision, + ) + + # Successful commit: keep track of the latest "last_modified" for each file + for file in files_to_upload: + self.last_uploaded[file.local_path] = file.last_modified + return commit_info + + +class PartialFileIO(BytesIO): + """A file-like object that reads only the first part of a file. + + Useful to upload a file to the Hub when the user might still be appending data to it. Only the first part of the + file is uploaded (i.e. the part that was available when the filesystem was first scanned). + + In practice, only used internally by the CommitScheduler to regularly push a folder to the Hub with minimal + disturbance for the user. The object is passed to `CommitOperationAdd`. + + Only supports `read`, `tell` and `seek` methods. + + Args: + file_path (`str` or `Path`): + Path to the file to read. + size_limit (`int`): + The maximum number of bytes to read from the file. If the file is larger than this, only the first part + will be read (and uploaded). + """ + + def __init__(self, file_path: Union[str, Path], size_limit: int) -> None: + self._file_path = Path(file_path) + self._file = self._file_path.open("rb") + self._size_limit = min(size_limit, os.fstat(self._file.fileno()).st_size) + + def __del__(self) -> None: + self._file.close() + return super().__del__() + + def __repr__(self) -> str: + return f"" + + def __len__(self) -> int: + return self._size_limit + + def __getattribute__(self, name: str): + if name.startswith("_") or name in ("read", "tell", "seek"): # only 3 public methods supported + return super().__getattribute__(name) + raise NotImplementedError(f"PartialFileIO does not support '{name}'.") + + def tell(self) -> int: + """Return the current file position.""" + return self._file.tell() + + def seek(self, __offset: int, __whence: int = SEEK_SET) -> int: + """Change the stream position to the given offset. + + Behavior is the same as a regular file, except that the position is capped to the size limit. + """ + if __whence == SEEK_END: + # SEEK_END => set from the truncated end + __offset = len(self) + __offset + __whence = SEEK_SET + + pos = self._file.seek(__offset, __whence) + if pos > self._size_limit: + return self._file.seek(self._size_limit) + return pos + + def read(self, __size: Optional[int] = -1) -> bytes: + """Read at most `__size` bytes from the file. + + Behavior is the same as a regular file, except that it is capped to the size limit. + """ + current = self._file.tell() + if __size is None or __size < 0: + # Read until file limit + truncated_size = self._size_limit - current + else: + # Read until file limit or __size + truncated_size = min(__size, self._size_limit - current) + return self._file.read(truncated_size) diff --git a/meow/lib/python3.13/site-packages/huggingface_hub/_inference_endpoints.py b/meow/lib/python3.13/site-packages/huggingface_hub/_inference_endpoints.py new file mode 100644 index 0000000000000000000000000000000000000000..282b627b54489751bb5e8098f87881e52eda637a --- /dev/null +++ b/meow/lib/python3.13/site-packages/huggingface_hub/_inference_endpoints.py @@ -0,0 +1,396 @@ +import time +from dataclasses import dataclass, field +from datetime import datetime +from enum import Enum +from typing import TYPE_CHECKING, Dict, Optional, Union + +from huggingface_hub.errors import InferenceEndpointError, InferenceEndpointTimeoutError + +from .inference._client import InferenceClient +from .inference._generated._async_client import AsyncInferenceClient +from .utils import get_session, logging, parse_datetime + + +if TYPE_CHECKING: + from .hf_api import HfApi + + +logger = logging.get_logger(__name__) + + +class InferenceEndpointStatus(str, Enum): + PENDING = "pending" + INITIALIZING = "initializing" + UPDATING = "updating" + UPDATE_FAILED = "updateFailed" + RUNNING = "running" + PAUSED = "paused" + FAILED = "failed" + SCALED_TO_ZERO = "scaledToZero" + + +class InferenceEndpointType(str, Enum): + PUBlIC = "public" + PROTECTED = "protected" + PRIVATE = "private" + + +@dataclass +class InferenceEndpoint: + """ + Contains information about a deployed Inference Endpoint. + + Args: + name (`str`): + The unique name of the Inference Endpoint. + namespace (`str`): + The namespace where the Inference Endpoint is located. + repository (`str`): + The name of the model repository deployed on this Inference Endpoint. + status ([`InferenceEndpointStatus`]): + The current status of the Inference Endpoint. + url (`str`, *optional*): + The URL of the Inference Endpoint, if available. Only a deployed Inference Endpoint will have a URL. + framework (`str`): + The machine learning framework used for the model. + revision (`str`): + The specific model revision deployed on the Inference Endpoint. + task (`str`): + The task associated with the deployed model. + created_at (`datetime.datetime`): + The timestamp when the Inference Endpoint was created. + updated_at (`datetime.datetime`): + The timestamp of the last update of the Inference Endpoint. + type ([`InferenceEndpointType`]): + The type of the Inference Endpoint (public, protected, private). + raw (`Dict`): + The raw dictionary data returned from the API. + token (`str` or `bool`, *optional*): + Authentication token for the Inference Endpoint, if set when requesting the API. Will default to the + locally saved token if not provided. Pass `token=False` if you don't want to send your token to the server. + + Example: + ```python + >>> from huggingface_hub import get_inference_endpoint + >>> endpoint = get_inference_endpoint("my-text-to-image") + >>> endpoint + InferenceEndpoint(name='my-text-to-image', ...) + + # Get status + >>> endpoint.status + 'running' + >>> endpoint.url + 'https://my-text-to-image.region.vendor.endpoints.huggingface.cloud' + + # Run inference + >>> endpoint.client.text_to_image(...) + + # Pause endpoint to save $$$ + >>> endpoint.pause() + + # ... + # Resume and wait for deployment + >>> endpoint.resume() + >>> endpoint.wait() + >>> endpoint.client.text_to_image(...) + ``` + """ + + # Field in __repr__ + name: str = field(init=False) + namespace: str + repository: str = field(init=False) + status: InferenceEndpointStatus = field(init=False) + url: Optional[str] = field(init=False) + + # Other fields + framework: str = field(repr=False, init=False) + revision: str = field(repr=False, init=False) + task: str = field(repr=False, init=False) + created_at: datetime = field(repr=False, init=False) + updated_at: datetime = field(repr=False, init=False) + type: InferenceEndpointType = field(repr=False, init=False) + + # Raw dict from the API + raw: Dict = field(repr=False) + + # Internal fields + _token: Union[str, bool, None] = field(repr=False, compare=False) + _api: "HfApi" = field(repr=False, compare=False) + + @classmethod + def from_raw( + cls, raw: Dict, namespace: str, token: Union[str, bool, None] = None, api: Optional["HfApi"] = None + ) -> "InferenceEndpoint": + """Initialize object from raw dictionary.""" + if api is None: + from .hf_api import HfApi + + api = HfApi() + if token is None: + token = api.token + + # All other fields are populated in __post_init__ + return cls(raw=raw, namespace=namespace, _token=token, _api=api) + + def __post_init__(self) -> None: + """Populate fields from raw dictionary.""" + self._populate_from_raw() + + @property + def client(self) -> InferenceClient: + """Returns a client to make predictions on this Inference Endpoint. + + Returns: + [`InferenceClient`]: an inference client pointing to the deployed endpoint. + + Raises: + [`InferenceEndpointError`]: If the Inference Endpoint is not yet deployed. + """ + if self.url is None: + raise InferenceEndpointError( + "Cannot create a client for this Inference Endpoint as it is not yet deployed. " + "Please wait for the Inference Endpoint to be deployed using `endpoint.wait()` and try again." + ) + return InferenceClient(model=self.url, token=self._token) + + @property + def async_client(self) -> AsyncInferenceClient: + """Returns a client to make predictions on this Inference Endpoint. + + Returns: + [`AsyncInferenceClient`]: an asyncio-compatible inference client pointing to the deployed endpoint. + + Raises: + [`InferenceEndpointError`]: If the Inference Endpoint is not yet deployed. + """ + if self.url is None: + raise InferenceEndpointError( + "Cannot create a client for this Inference Endpoint as it is not yet deployed. " + "Please wait for the Inference Endpoint to be deployed using `endpoint.wait()` and try again." + ) + return AsyncInferenceClient(model=self.url, token=self._token) + + def wait(self, timeout: Optional[int] = None, refresh_every: int = 5) -> "InferenceEndpoint": + """Wait for the Inference Endpoint to be deployed. + + Information from the server will be fetched every 1s. If the Inference Endpoint is not deployed after `timeout` + seconds, a [`InferenceEndpointTimeoutError`] will be raised. The [`InferenceEndpoint`] will be mutated in place with the latest + data. + + Args: + timeout (`int`, *optional*): + The maximum time to wait for the Inference Endpoint to be deployed, in seconds. If `None`, will wait + indefinitely. + refresh_every (`int`, *optional*): + The time to wait between each fetch of the Inference Endpoint status, in seconds. Defaults to 5s. + + Returns: + [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data. + + Raises: + [`InferenceEndpointError`] + If the Inference Endpoint ended up in a failed state. + [`InferenceEndpointTimeoutError`] + If the Inference Endpoint is not deployed after `timeout` seconds. + """ + if timeout is not None and timeout < 0: + raise ValueError("`timeout` cannot be negative.") + if refresh_every <= 0: + raise ValueError("`refresh_every` must be positive.") + + start = time.time() + while True: + if self.url is not None: + # Means the URL is provisioned => check if the endpoint is reachable + response = get_session().get(self.url, headers=self._api._build_hf_headers(token=self._token)) + if response.status_code == 200: + logger.info("Inference Endpoint is ready to be used.") + return self + if self.status == InferenceEndpointStatus.FAILED: + raise InferenceEndpointError( + f"Inference Endpoint {self.name} failed to deploy. Please check the logs for more information." + ) + if timeout is not None: + if time.time() - start > timeout: + raise InferenceEndpointTimeoutError("Timeout while waiting for Inference Endpoint to be deployed.") + logger.info(f"Inference Endpoint is not deployed yet ({self.status}). Waiting {refresh_every}s...") + time.sleep(refresh_every) + self.fetch() + + def fetch(self) -> "InferenceEndpoint": + """Fetch latest information about the Inference Endpoint. + + Returns: + [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data. + """ + obj = self._api.get_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token) # type: ignore [arg-type] + self.raw = obj.raw + self._populate_from_raw() + return self + + def update( + self, + *, + # Compute update + accelerator: Optional[str] = None, + instance_size: Optional[str] = None, + instance_type: Optional[str] = None, + min_replica: Optional[int] = None, + max_replica: Optional[int] = None, + scale_to_zero_timeout: Optional[int] = None, + # Model update + repository: Optional[str] = None, + framework: Optional[str] = None, + revision: Optional[str] = None, + task: Optional[str] = None, + custom_image: Optional[Dict] = None, + secrets: Optional[Dict[str, str]] = None, + ) -> "InferenceEndpoint": + """Update the Inference Endpoint. + + This method allows the update of either the compute configuration, the deployed model, or both. All arguments are + optional but at least one must be provided. + + This is an alias for [`HfApi.update_inference_endpoint`]. The current object is mutated in place with the + latest data from the server. + + Args: + accelerator (`str`, *optional*): + The hardware accelerator to be used for inference (e.g. `"cpu"`). + instance_size (`str`, *optional*): + The size or type of the instance to be used for hosting the model (e.g. `"x4"`). + instance_type (`str`, *optional*): + The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`). + min_replica (`int`, *optional*): + The minimum number of replicas (instances) to keep running for the Inference Endpoint. + max_replica (`int`, *optional*): + The maximum number of replicas (instances) to scale to for the Inference Endpoint. + scale_to_zero_timeout (`int`, *optional*): + The duration in minutes before an inactive endpoint is scaled to zero. + + repository (`str`, *optional*): + The name of the model repository associated with the Inference Endpoint (e.g. `"gpt2"`). + framework (`str`, *optional*): + The machine learning framework used for the model (e.g. `"custom"`). + revision (`str`, *optional*): + The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`). + task (`str`, *optional*): + The task on which to deploy the model (e.g. `"text-classification"`). + custom_image (`Dict`, *optional*): + A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an + Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples). + secrets (`Dict[str, str]`, *optional*): + Secret values to inject in the container environment. + Returns: + [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data. + """ + # Make API call + obj = self._api.update_inference_endpoint( + name=self.name, + namespace=self.namespace, + accelerator=accelerator, + instance_size=instance_size, + instance_type=instance_type, + min_replica=min_replica, + max_replica=max_replica, + scale_to_zero_timeout=scale_to_zero_timeout, + repository=repository, + framework=framework, + revision=revision, + task=task, + custom_image=custom_image, + secrets=secrets, + token=self._token, # type: ignore [arg-type] + ) + + # Mutate current object + self.raw = obj.raw + self._populate_from_raw() + return self + + def pause(self) -> "InferenceEndpoint": + """Pause the Inference Endpoint. + + A paused Inference Endpoint will not be charged. It can be resumed at any time using [`InferenceEndpoint.resume`]. + This is different than scaling the Inference Endpoint to zero with [`InferenceEndpoint.scale_to_zero`], which + would be automatically restarted when a request is made to it. + + This is an alias for [`HfApi.pause_inference_endpoint`]. The current object is mutated in place with the + latest data from the server. + + Returns: + [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data. + """ + obj = self._api.pause_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token) # type: ignore [arg-type] + self.raw = obj.raw + self._populate_from_raw() + return self + + def resume(self, running_ok: bool = True) -> "InferenceEndpoint": + """Resume the Inference Endpoint. + + This is an alias for [`HfApi.resume_inference_endpoint`]. The current object is mutated in place with the + latest data from the server. + + Args: + running_ok (`bool`, *optional*): + If `True`, the method will not raise an error if the Inference Endpoint is already running. Defaults to + `True`. + + Returns: + [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data. + """ + obj = self._api.resume_inference_endpoint( + name=self.name, namespace=self.namespace, running_ok=running_ok, token=self._token + ) # type: ignore [arg-type] + self.raw = obj.raw + self._populate_from_raw() + return self + + def scale_to_zero(self) -> "InferenceEndpoint": + """Scale Inference Endpoint to zero. + + An Inference Endpoint scaled to zero will not be charged. It will be resume on the next request to it, with a + cold start delay. This is different than pausing the Inference Endpoint with [`InferenceEndpoint.pause`], which + would require a manual resume with [`InferenceEndpoint.resume`]. + + This is an alias for [`HfApi.scale_to_zero_inference_endpoint`]. The current object is mutated in place with the + latest data from the server. + + Returns: + [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data. + """ + obj = self._api.scale_to_zero_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token) # type: ignore [arg-type] + self.raw = obj.raw + self._populate_from_raw() + return self + + def delete(self) -> None: + """Delete the Inference Endpoint. + + This operation is not reversible. If you don't want to be charged for an Inference Endpoint, it is preferable + to pause it with [`InferenceEndpoint.pause`] or scale it to zero with [`InferenceEndpoint.scale_to_zero`]. + + This is an alias for [`HfApi.delete_inference_endpoint`]. + """ + self._api.delete_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token) # type: ignore [arg-type] + + def _populate_from_raw(self) -> None: + """Populate fields from raw dictionary. + + Called in __post_init__ + each time the Inference Endpoint is updated. + """ + # Repr fields + self.name = self.raw["name"] + self.repository = self.raw["model"]["repository"] + self.status = self.raw["status"]["state"] + self.url = self.raw["status"].get("url") + + # Other fields + self.framework = self.raw["model"]["framework"] + self.revision = self.raw["model"]["revision"] + self.task = self.raw["model"]["task"] + self.created_at = parse_datetime(self.raw["status"]["createdAt"]) + self.updated_at = parse_datetime(self.raw["status"]["updatedAt"]) + self.type = self.raw["type"] diff --git a/meow/lib/python3.13/site-packages/huggingface_hub/_local_folder.py b/meow/lib/python3.13/site-packages/huggingface_hub/_local_folder.py new file mode 100644 index 0000000000000000000000000000000000000000..6fd05f053a785ce00991a28c1a5d2c532bc86e1a --- /dev/null +++ b/meow/lib/python3.13/site-packages/huggingface_hub/_local_folder.py @@ -0,0 +1,421 @@ +# coding=utf-8 +# Copyright 2024-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains utilities to handle the `../.cache/huggingface` folder in local directories. + +First discussed in https://github.com/huggingface/huggingface_hub/issues/1738 to store +download metadata when downloading files from the hub to a local directory (without +using the cache). + +./.cache/huggingface folder structure: +[4.0K] data +├── [4.0K] .cache +│ └── [4.0K] huggingface +│ └── [4.0K] download +│ ├── [ 16] file.parquet.metadata +│ ├── [ 16] file.txt.metadata +│ └── [4.0K] folder +│ └── [ 16] file.parquet.metadata +│ +├── [6.5G] file.parquet +├── [1.5K] file.txt +└── [4.0K] folder + └── [ 16] file.parquet + + +Download metadata file structure: +``` +# file.txt.metadata +11c5a3d5811f50298f278a704980280950aedb10 +a16a55fda99d2f2e7b69cce5cf93ff4ad3049930 +1712656091.123 + +# file.parquet.metadata +11c5a3d5811f50298f278a704980280950aedb10 +7c5d3f4b8b76583b422fcb9189ad6c89d5d97a094541ce8932dce3ecabde1421 +1712656091.123 +} +``` +""" + +import logging +import os +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + +from .utils import WeakFileLock + + +logger = logging.getLogger(__name__) + + +@dataclass +class LocalDownloadFilePaths: + """ + Paths to the files related to a download process in a local dir. + + Returned by [`get_local_download_paths`]. + + Attributes: + file_path (`Path`): + Path where the file will be saved. + lock_path (`Path`): + Path to the lock file used to ensure atomicity when reading/writing metadata. + metadata_path (`Path`): + Path to the metadata file. + """ + + file_path: Path + lock_path: Path + metadata_path: Path + + def incomplete_path(self, etag: str) -> Path: + """Return the path where a file will be temporarily downloaded before being moved to `file_path`.""" + return self.metadata_path.with_suffix(f".{etag}.incomplete") + + +@dataclass(frozen=True) +class LocalUploadFilePaths: + """ + Paths to the files related to an upload process in a local dir. + + Returned by [`get_local_upload_paths`]. + + Attributes: + path_in_repo (`str`): + Path of the file in the repo. + file_path (`Path`): + Path where the file will be saved. + lock_path (`Path`): + Path to the lock file used to ensure atomicity when reading/writing metadata. + metadata_path (`Path`): + Path to the metadata file. + """ + + path_in_repo: str + file_path: Path + lock_path: Path + metadata_path: Path + + +@dataclass +class LocalDownloadFileMetadata: + """ + Metadata about a file in the local directory related to a download process. + + Attributes: + filename (`str`): + Path of the file in the repo. + commit_hash (`str`): + Commit hash of the file in the repo. + etag (`str`): + ETag of the file in the repo. Used to check if the file has changed. + For LFS files, this is the sha256 of the file. For regular files, it corresponds to the git hash. + timestamp (`int`): + Unix timestamp of when the metadata was saved i.e. when the metadata was accurate. + """ + + filename: str + commit_hash: str + etag: str + timestamp: float + + +@dataclass +class LocalUploadFileMetadata: + """ + Metadata about a file in the local directory related to an upload process. + """ + + size: int + + # Default values correspond to "we don't know yet" + timestamp: Optional[float] = None + should_ignore: Optional[bool] = None + sha256: Optional[str] = None + upload_mode: Optional[str] = None + is_uploaded: bool = False + is_committed: bool = False + + def save(self, paths: LocalUploadFilePaths) -> None: + """Save the metadata to disk.""" + with WeakFileLock(paths.lock_path): + with paths.metadata_path.open("w") as f: + new_timestamp = time.time() + f.write(str(new_timestamp) + "\n") + + f.write(str(self.size)) # never None + f.write("\n") + + if self.should_ignore is not None: + f.write(str(int(self.should_ignore))) + f.write("\n") + + if self.sha256 is not None: + f.write(self.sha256) + f.write("\n") + + if self.upload_mode is not None: + f.write(self.upload_mode) + f.write("\n") + + f.write(str(int(self.is_uploaded)) + "\n") + f.write(str(int(self.is_committed)) + "\n") + + self.timestamp = new_timestamp + + +def get_local_download_paths(local_dir: Path, filename: str) -> LocalDownloadFilePaths: + """Compute paths to the files related to a download process. + + Folders containing the paths are all guaranteed to exist. + + Args: + local_dir (`Path`): + Path to the local directory in which files are downloaded. + filename (`str`): + Path of the file in the repo. + + Return: + [`LocalDownloadFilePaths`]: the paths to the files (file_path, lock_path, metadata_path, incomplete_path). + """ + # filename is the path in the Hub repository (separated by '/') + # make sure to have a cross platform transcription + sanitized_filename = os.path.join(*filename.split("/")) + if os.name == "nt": + if sanitized_filename.startswith("..\\") or "\\..\\" in sanitized_filename: + raise ValueError( + f"Invalid filename: cannot handle filename '{sanitized_filename}' on Windows. Please ask the repository" + " owner to rename this file." + ) + file_path = local_dir / sanitized_filename + metadata_path = _huggingface_dir(local_dir) / "download" / f"{sanitized_filename}.metadata" + lock_path = metadata_path.with_suffix(".lock") + + # Some Windows versions do not allow for paths longer than 255 characters. + # In this case, we must specify it as an extended path by using the "\\?\" prefix + if os.name == "nt": + if not str(local_dir).startswith("\\\\?\\") and len(os.path.abspath(lock_path)) > 255: + file_path = Path("\\\\?\\" + os.path.abspath(file_path)) + lock_path = Path("\\\\?\\" + os.path.abspath(lock_path)) + metadata_path = Path("\\\\?\\" + os.path.abspath(metadata_path)) + + file_path.parent.mkdir(parents=True, exist_ok=True) + metadata_path.parent.mkdir(parents=True, exist_ok=True) + return LocalDownloadFilePaths(file_path=file_path, lock_path=lock_path, metadata_path=metadata_path) + + +def get_local_upload_paths(local_dir: Path, filename: str) -> LocalUploadFilePaths: + """Compute paths to the files related to an upload process. + + Folders containing the paths are all guaranteed to exist. + + Args: + local_dir (`Path`): + Path to the local directory that is uploaded. + filename (`str`): + Path of the file in the repo. + + Return: + [`LocalUploadFilePaths`]: the paths to the files (file_path, lock_path, metadata_path). + """ + # filename is the path in the Hub repository (separated by '/') + # make sure to have a cross platform transcription + sanitized_filename = os.path.join(*filename.split("/")) + if os.name == "nt": + if sanitized_filename.startswith("..\\") or "\\..\\" in sanitized_filename: + raise ValueError( + f"Invalid filename: cannot handle filename '{sanitized_filename}' on Windows. Please ask the repository" + " owner to rename this file." + ) + file_path = local_dir / sanitized_filename + metadata_path = _huggingface_dir(local_dir) / "upload" / f"{sanitized_filename}.metadata" + lock_path = metadata_path.with_suffix(".lock") + + # Some Windows versions do not allow for paths longer than 255 characters. + # In this case, we must specify it as an extended path by using the "\\?\" prefix + if os.name == "nt": + if not str(local_dir).startswith("\\\\?\\") and len(os.path.abspath(lock_path)) > 255: + file_path = Path("\\\\?\\" + os.path.abspath(file_path)) + lock_path = Path("\\\\?\\" + os.path.abspath(lock_path)) + metadata_path = Path("\\\\?\\" + os.path.abspath(metadata_path)) + + file_path.parent.mkdir(parents=True, exist_ok=True) + metadata_path.parent.mkdir(parents=True, exist_ok=True) + return LocalUploadFilePaths( + path_in_repo=filename, file_path=file_path, lock_path=lock_path, metadata_path=metadata_path + ) + + +def read_download_metadata(local_dir: Path, filename: str) -> Optional[LocalDownloadFileMetadata]: + """Read metadata about a file in the local directory related to a download process. + + Args: + local_dir (`Path`): + Path to the local directory in which files are downloaded. + filename (`str`): + Path of the file in the repo. + + Return: + `[LocalDownloadFileMetadata]` or `None`: the metadata if it exists, `None` otherwise. + """ + paths = get_local_download_paths(local_dir, filename) + with WeakFileLock(paths.lock_path): + if paths.metadata_path.exists(): + try: + with paths.metadata_path.open() as f: + commit_hash = f.readline().strip() + etag = f.readline().strip() + timestamp = float(f.readline().strip()) + metadata = LocalDownloadFileMetadata( + filename=filename, + commit_hash=commit_hash, + etag=etag, + timestamp=timestamp, + ) + except Exception as e: + # remove the metadata file if it is corrupted / not the right format + logger.warning( + f"Invalid metadata file {paths.metadata_path}: {e}. Removing it from disk and continue." + ) + try: + paths.metadata_path.unlink() + except Exception as e: + logger.warning(f"Could not remove corrupted metadata file {paths.metadata_path}: {e}") + + try: + # check if the file exists and hasn't been modified since the metadata was saved + stat = paths.file_path.stat() + if ( + stat.st_mtime - 1 <= metadata.timestamp + ): # allow 1s difference as stat.st_mtime might not be precise + return metadata + logger.info(f"Ignored metadata for '{filename}' (outdated). Will re-compute hash.") + except FileNotFoundError: + # file does not exist => metadata is outdated + return None + return None + + +def read_upload_metadata(local_dir: Path, filename: str) -> LocalUploadFileMetadata: + """Read metadata about a file in the local directory related to an upload process. + + TODO: factorize logic with `read_download_metadata`. + + Args: + local_dir (`Path`): + Path to the local directory in which files are downloaded. + filename (`str`): + Path of the file in the repo. + + Return: + `[LocalUploadFileMetadata]` or `None`: the metadata if it exists, `None` otherwise. + """ + paths = get_local_upload_paths(local_dir, filename) + with WeakFileLock(paths.lock_path): + if paths.metadata_path.exists(): + try: + with paths.metadata_path.open() as f: + timestamp = float(f.readline().strip()) + + size = int(f.readline().strip()) # never None + + _should_ignore = f.readline().strip() + should_ignore = None if _should_ignore == "" else bool(int(_should_ignore)) + + _sha256 = f.readline().strip() + sha256 = None if _sha256 == "" else _sha256 + + _upload_mode = f.readline().strip() + upload_mode = None if _upload_mode == "" else _upload_mode + if upload_mode not in (None, "regular", "lfs"): + raise ValueError(f"Invalid upload mode in metadata {paths.path_in_repo}: {upload_mode}") + + is_uploaded = bool(int(f.readline().strip())) + is_committed = bool(int(f.readline().strip())) + + metadata = LocalUploadFileMetadata( + timestamp=timestamp, + size=size, + should_ignore=should_ignore, + sha256=sha256, + upload_mode=upload_mode, + is_uploaded=is_uploaded, + is_committed=is_committed, + ) + except Exception as e: + # remove the metadata file if it is corrupted / not the right format + logger.warning( + f"Invalid metadata file {paths.metadata_path}: {e}. Removing it from disk and continue." + ) + try: + paths.metadata_path.unlink() + except Exception as e: + logger.warning(f"Could not remove corrupted metadata file {paths.metadata_path}: {e}") + + # TODO: can we do better? + if ( + metadata.timestamp is not None + and metadata.is_uploaded # file was uploaded + and not metadata.is_committed # but not committed + and time.time() - metadata.timestamp > 20 * 3600 # and it's been more than 20 hours + ): # => we consider it as garbage-collected by S3 + metadata.is_uploaded = False + + # check if the file exists and hasn't been modified since the metadata was saved + try: + if metadata.timestamp is not None and paths.file_path.stat().st_mtime <= metadata.timestamp: + return metadata + logger.info(f"Ignored metadata for '{filename}' (outdated). Will re-compute hash.") + except FileNotFoundError: + # file does not exist => metadata is outdated + pass + + # empty metadata => we don't know anything expect its size + return LocalUploadFileMetadata(size=paths.file_path.stat().st_size) + + +def write_download_metadata(local_dir: Path, filename: str, commit_hash: str, etag: str) -> None: + """Write metadata about a file in the local directory related to a download process. + + Args: + local_dir (`Path`): + Path to the local directory in which files are downloaded. + """ + paths = get_local_download_paths(local_dir, filename) + with WeakFileLock(paths.lock_path): + with paths.metadata_path.open("w") as f: + f.write(f"{commit_hash}\n{etag}\n{time.time()}\n") + + +def _huggingface_dir(local_dir: Path) -> Path: + """Return the path to the `.cache/huggingface` directory in a local directory.""" + # Wrap in lru_cache to avoid overwriting the .gitignore file if called multiple times + path = local_dir / ".cache" / "huggingface" + path.mkdir(exist_ok=True, parents=True) + + # Create a .gitignore file in the .cache/huggingface directory if it doesn't exist + # Should be thread-safe enough like this. + gitignore = path / ".gitignore" + gitignore_lock = path / ".gitignore.lock" + if not gitignore.exists(): + try: + with WeakFileLock(gitignore_lock): + gitignore.write_text("*") + gitignore_lock.unlink() + except OSError: # FileNotFoundError, PermissionError, etc. + pass + return path diff --git a/meow/lib/python3.13/site-packages/huggingface_hub/_login.py b/meow/lib/python3.13/site-packages/huggingface_hub/_login.py new file mode 100644 index 0000000000000000000000000000000000000000..b14702201d45bebde47f95a5bc7fc85c9e93c84b --- /dev/null +++ b/meow/lib/python3.13/site-packages/huggingface_hub/_login.py @@ -0,0 +1,520 @@ +# Copyright 2020 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains methods to log in to the Hub.""" + +import os +import subprocess +from getpass import getpass +from pathlib import Path +from typing import Optional + +from . import constants +from .commands._cli_utils import ANSI +from .utils import ( + capture_output, + get_token, + is_google_colab, + is_notebook, + list_credential_helpers, + logging, + run_subprocess, + set_git_credential, + unset_git_credential, +) +from .utils._auth import ( + _get_token_by_name, + _get_token_from_environment, + _get_token_from_file, + _get_token_from_google_colab, + _save_stored_tokens, + _save_token, + get_stored_tokens, +) +from .utils._deprecation import _deprecate_arguments, _deprecate_positional_args + + +logger = logging.get_logger(__name__) + +_HF_LOGO_ASCII = """ + _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_| + _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_| + _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_| +""" + + +@_deprecate_arguments( + version="1.0", + deprecated_args="write_permission", + custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.", +) +@_deprecate_positional_args(version="1.0") +def login( + token: Optional[str] = None, + *, + add_to_git_credential: bool = False, + new_session: bool = True, + write_permission: bool = False, +) -> None: + """Login the machine to access the Hub. + + The `token` is persisted in cache and set as a git credential. Once done, the machine + is logged in and the access token will be available across all `huggingface_hub` + components. If `token` is not provided, it will be prompted to the user either with + a widget (in a notebook) or via the terminal. + + To log in from outside of a script, one can also use `huggingface-cli login` which is + a cli command that wraps [`login`]. + + + + [`login`] is a drop-in replacement method for [`notebook_login`] as it wraps and + extends its capabilities. + + + + + + When the token is not passed, [`login`] will automatically detect if the script runs + in a notebook or not. However, this detection might not be accurate due to the + variety of notebooks that exists nowadays. If that is the case, you can always force + the UI by using [`notebook_login`] or [`interpreter_login`]. + + + + Args: + token (`str`, *optional*): + User access token to generate from https://huggingface.co/settings/token. + add_to_git_credential (`bool`, defaults to `False`): + If `True`, token will be set as git credential. If no git credential helper + is configured, a warning will be displayed to the user. If `token` is `None`, + the value of `add_to_git_credential` is ignored and will be prompted again + to the end user. + new_session (`bool`, defaults to `True`): + If `True`, will request a token even if one is already saved on the machine. + write_permission (`bool`): + Ignored and deprecated argument. + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If an organization token is passed. Only personal account tokens are valid + to log in. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If token is invalid. + [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError) + If running in a notebook but `ipywidgets` is not installed. + """ + if token is not None: + if not add_to_git_credential: + logger.info( + "The token has not been saved to the git credentials helper. Pass " + "`add_to_git_credential=True` in this function directly or " + "`--add-to-git-credential` if using via `huggingface-cli` if " + "you want to set the git credential as well." + ) + _login(token, add_to_git_credential=add_to_git_credential) + elif is_notebook(): + notebook_login(new_session=new_session) + else: + interpreter_login(new_session=new_session) + + +def logout(token_name: Optional[str] = None) -> None: + """Logout the machine from the Hub. + + Token is deleted from the machine and removed from git credential. + + Args: + token_name (`str`, *optional*): + Name of the access token to logout from. If `None`, will logout from all saved access tokens. + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError): + If the access token name is not found. + """ + if get_token() is None and not get_stored_tokens(): # No active token and no saved access tokens + logger.warning("Not logged in!") + return + if not token_name: + # Delete all saved access tokens and token + for file_path in (constants.HF_TOKEN_PATH, constants.HF_STORED_TOKENS_PATH): + try: + Path(file_path).unlink() + except FileNotFoundError: + pass + logger.info("Successfully logged out from all access tokens.") + else: + _logout_from_token(token_name) + logger.info(f"Successfully logged out from access token: {token_name}.") + + unset_git_credential() + + # Check if still logged in + if _get_token_from_google_colab() is not None: + raise EnvironmentError( + "You are automatically logged in using a Google Colab secret.\n" + "To log out, you must unset the `HF_TOKEN` secret in your Colab settings." + ) + if _get_token_from_environment() is not None: + raise EnvironmentError( + "Token has been deleted from your machine but you are still logged in.\n" + "To log out, you must clear out both `HF_TOKEN` and `HUGGING_FACE_HUB_TOKEN` environment variables." + ) + + +def auth_switch(token_name: str, add_to_git_credential: bool = False) -> None: + """Switch to a different access token. + + Args: + token_name (`str`): + Name of the access token to switch to. + add_to_git_credential (`bool`, defaults to `False`): + If `True`, token will be set as git credential. If no git credential helper + is configured, a warning will be displayed to the user. If `token` is `None`, + the value of `add_to_git_credential` is ignored and will be prompted again + to the end user. + + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError): + If the access token name is not found. + """ + token = _get_token_by_name(token_name) + if not token: + raise ValueError(f"Access token {token_name} not found in {constants.HF_STORED_TOKENS_PATH}") + # Write token to HF_TOKEN_PATH + _set_active_token(token_name, add_to_git_credential) + logger.info(f"The current active token is: {token_name}") + token_from_environment = _get_token_from_environment() + if token_from_environment is not None and token_from_environment != token: + logger.warning( + "The environment variable `HF_TOKEN` is set and will override the access token you've just switched to." + ) + + +def auth_list() -> None: + """List all stored access tokens.""" + tokens = get_stored_tokens() + + if not tokens: + logger.info("No access tokens found.") + return + # Find current token + current_token = get_token() + current_token_name = None + for token_name in tokens: + if tokens.get(token_name) == current_token: + current_token_name = token_name + # Print header + max_offset = max(len("token"), max(len(token) for token in tokens)) + 2 + print(f" {{:<{max_offset}}}| {{:<15}}".format("name", "token")) + print("-" * (max_offset + 2) + "|" + "-" * 15) + + # Print saved access tokens + for token_name in tokens: + token = tokens.get(token_name, "") + masked_token = f"{token[:3]}****{token[-4:]}" if token != "" else token + is_current = "*" if token == current_token else " " + + print(f"{is_current} {{:<{max_offset}}}| {{:<15}}".format(token_name, masked_token)) + + if _get_token_from_environment(): + logger.warning( + "\nNote: Environment variable `HF_TOKEN` is set and is the current active token independently from the stored tokens listed above." + ) + elif current_token_name is None: + logger.warning( + "\nNote: No active token is set and no environment variable `HF_TOKEN` is found. Use `huggingface-cli login` to log in." + ) + + +### +# Interpreter-based login (text) +### + + +@_deprecate_arguments( + version="1.0", + deprecated_args="write_permission", + custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.", +) +@_deprecate_positional_args(version="1.0") +def interpreter_login(*, new_session: bool = True, write_permission: bool = False) -> None: + """ + Displays a prompt to log in to the HF website and store the token. + + This is equivalent to [`login`] without passing a token when not run in a notebook. + [`interpreter_login`] is useful if you want to force the use of the terminal prompt + instead of a notebook widget. + + For more details, see [`login`]. + + Args: + new_session (`bool`, defaults to `True`): + If `True`, will request a token even if one is already saved on the machine. + write_permission (`bool`): + Ignored and deprecated argument. + """ + if not new_session and get_token() is not None: + logger.info("User is already logged in.") + return + + from .commands.delete_cache import _ask_for_confirmation_no_tui + + print(_HF_LOGO_ASCII) + if get_token() is not None: + logger.info( + " A token is already saved on your machine. Run `huggingface-cli" + " whoami` to get more information or `huggingface-cli logout` if you want" + " to log out." + ) + logger.info(" Setting a new token will erase the existing one.") + + logger.info( + " To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens ." + ) + if os.name == "nt": + logger.info("Token can be pasted using 'Right-Click'.") + token = getpass("Enter your token (input will not be visible): ") + add_to_git_credential = _ask_for_confirmation_no_tui("Add token as git credential?") + + _login(token=token, add_to_git_credential=add_to_git_credential) + + +### +# Notebook-based login (widget) +### + +NOTEBOOK_LOGIN_PASSWORD_HTML = """

Immediately click login after typing your password or +it might be stored in plain text in this notebook file.
""" + + +NOTEBOOK_LOGIN_TOKEN_HTML_START = """

Copy a token from your Hugging Face +tokens page and paste it below.
Immediately click login after copying +your token or it might be stored in plain text in this notebook file.
""" + + +NOTEBOOK_LOGIN_TOKEN_HTML_END = """ +Pro Tip: If you don't already have one, you can create a dedicated +'notebooks' token with 'write' access, that you can then easily reuse for all +notebooks. """ + + +@_deprecate_arguments( + version="1.0", + deprecated_args="write_permission", + custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.", +) +@_deprecate_positional_args(version="1.0") +def notebook_login(*, new_session: bool = True, write_permission: bool = False) -> None: + """ + Displays a widget to log in to the HF website and store the token. + + This is equivalent to [`login`] without passing a token when run in a notebook. + [`notebook_login`] is useful if you want to force the use of the notebook widget + instead of a prompt in the terminal. + + For more details, see [`login`]. + + Args: + new_session (`bool`, defaults to `True`): + If `True`, will request a token even if one is already saved on the machine. + write_permission (`bool`): + Ignored and deprecated argument. + """ + try: + import ipywidgets.widgets as widgets # type: ignore + from IPython.display import display # type: ignore + except ImportError: + raise ImportError( + "The `notebook_login` function can only be used in a notebook (Jupyter or" + " Colab) and you need the `ipywidgets` module: `pip install ipywidgets`." + ) + if not new_session and get_token() is not None: + logger.info("User is already logged in.") + return + + box_layout = widgets.Layout(display="flex", flex_flow="column", align_items="center", width="50%") + + token_widget = widgets.Password(description="Token:") + git_checkbox_widget = widgets.Checkbox(value=True, description="Add token as git credential?") + token_finish_button = widgets.Button(description="Login") + + login_token_widget = widgets.VBox( + [ + widgets.HTML(NOTEBOOK_LOGIN_TOKEN_HTML_START), + token_widget, + git_checkbox_widget, + token_finish_button, + widgets.HTML(NOTEBOOK_LOGIN_TOKEN_HTML_END), + ], + layout=box_layout, + ) + display(login_token_widget) + + # On click events + def login_token_event(t): + """Event handler for the login button.""" + token = token_widget.value + add_to_git_credential = git_checkbox_widget.value + # Erase token and clear value to make sure it's not saved in the notebook. + token_widget.value = "" + # Hide inputs + login_token_widget.children = [widgets.Label("Connecting...")] + try: + with capture_output() as captured: + _login(token, add_to_git_credential=add_to_git_credential) + message = captured.getvalue() + except Exception as error: + message = str(error) + # Print result (success message or error) + login_token_widget.children = [widgets.Label(line) for line in message.split("\n") if line.strip()] + + token_finish_button.on_click(login_token_event) + + +### +# Login private helpers +### + + +def _login( + token: str, + add_to_git_credential: bool, +) -> None: + from .hf_api import whoami # avoid circular import + + if token.startswith("api_org"): + raise ValueError("You must use your personal account token, not an organization token.") + + token_info = whoami(token) + permission = token_info["auth"]["accessToken"]["role"] + logger.info(f"Token is valid (permission: {permission}).") + + token_name = token_info["auth"]["accessToken"]["displayName"] + # Store token locally + _save_token(token=token, token_name=token_name) + # Set active token + _set_active_token(token_name=token_name, add_to_git_credential=add_to_git_credential) + logger.info("Login successful.") + if _get_token_from_environment(): + logger.warning( + "Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured." + ) + else: + logger.info(f"The current active token is: `{token_name}`") + + +def _logout_from_token(token_name: str) -> None: + """Logout from a specific access token. + + Args: + token_name (`str`): + The name of the access token to logout from. + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError): + If the access token name is not found. + """ + stored_tokens = get_stored_tokens() + # If there is no access tokens saved or the access token name is not found, do nothing + if not stored_tokens or token_name not in stored_tokens: + return + + token = stored_tokens.pop(token_name) + _save_stored_tokens(stored_tokens) + + if token == _get_token_from_file(): + logger.warning(f"Active token '{token_name}' has been deleted.") + Path(constants.HF_TOKEN_PATH).unlink(missing_ok=True) + + +def _set_active_token( + token_name: str, + add_to_git_credential: bool, +) -> None: + """Set the active access token. + + Args: + token_name (`str`): + The name of the token to set as active. + """ + token = _get_token_by_name(token_name) + if not token: + raise ValueError(f"Token {token_name} not found in {constants.HF_STORED_TOKENS_PATH}") + if add_to_git_credential: + if _is_git_credential_helper_configured(): + set_git_credential(token) + logger.info( + "Your token has been saved in your configured git credential helpers" + + f" ({','.join(list_credential_helpers())})." + ) + else: + logger.warning("Token has not been saved to git credential helper.") + # Write token to HF_TOKEN_PATH + path = Path(constants.HF_TOKEN_PATH) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(token) + logger.info(f"Your token has been saved to {constants.HF_TOKEN_PATH}") + + +def _is_git_credential_helper_configured() -> bool: + """Check if a git credential helper is configured. + + Warns user if not the case (except for Google Colab where "store" is set by default + by `huggingface_hub`). + """ + helpers = list_credential_helpers() + if len(helpers) > 0: + return True # Do not warn: at least 1 helper is set + + # Only in Google Colab to avoid the warning message + # See https://github.com/huggingface/huggingface_hub/issues/1043#issuecomment-1247010710 + if is_google_colab(): + _set_store_as_git_credential_helper_globally() + return True # Do not warn: "store" is used by default in Google Colab + + # Otherwise, warn user + print( + ANSI.red( + "Cannot authenticate through git-credential as no helper is defined on your" + " machine.\nYou might have to re-authenticate when pushing to the Hugging" + " Face Hub.\nRun the following command in your terminal in case you want to" + " set the 'store' credential helper as default.\n\ngit config --global" + " credential.helper store\n\nRead" + " https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more" + " details." + ) + ) + return False + + +def _set_store_as_git_credential_helper_globally() -> None: + """Set globally the credential.helper to `store`. + + To be used only in Google Colab as we assume the user doesn't care about the git + credential config. It is the only particular case where we don't want to display the + warning message in [`notebook_login()`]. + + Related: + - https://github.com/huggingface/huggingface_hub/issues/1043 + - https://github.com/huggingface/huggingface_hub/issues/1051 + - https://git-scm.com/docs/git-credential-store + """ + try: + run_subprocess("git config --global credential.helper store") + except subprocess.CalledProcessError as exc: + raise EnvironmentError(exc.stderr) diff --git a/meow/lib/python3.13/site-packages/huggingface_hub/_snapshot_download.py b/meow/lib/python3.13/site-packages/huggingface_hub/_snapshot_download.py new file mode 100644 index 0000000000000000000000000000000000000000..b928dd346664121eb0c3e033fc39af136cbcdcc8 --- /dev/null +++ b/meow/lib/python3.13/site-packages/huggingface_hub/_snapshot_download.py @@ -0,0 +1,307 @@ +import os +from pathlib import Path +from typing import Dict, List, Literal, Optional, Union + +import requests +from tqdm.auto import tqdm as base_tqdm +from tqdm.contrib.concurrent import thread_map + +from . import constants +from .errors import GatedRepoError, LocalEntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError +from .file_download import REGEX_COMMIT_HASH, hf_hub_download, repo_folder_name +from .hf_api import DatasetInfo, HfApi, ModelInfo, SpaceInfo +from .utils import OfflineModeIsEnabled, filter_repo_objects, logging, validate_hf_hub_args +from .utils import tqdm as hf_tqdm + + +logger = logging.get_logger(__name__) + + +@validate_hf_hub_args +def snapshot_download( + repo_id: str, + *, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + cache_dir: Union[str, Path, None] = None, + local_dir: Union[str, Path, None] = None, + library_name: Optional[str] = None, + library_version: Optional[str] = None, + user_agent: Optional[Union[Dict, str]] = None, + proxies: Optional[Dict] = None, + etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT, + force_download: bool = False, + token: Optional[Union[bool, str]] = None, + local_files_only: bool = False, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + max_workers: int = 8, + tqdm_class: Optional[base_tqdm] = None, + headers: Optional[Dict[str, str]] = None, + endpoint: Optional[str] = None, + # Deprecated args + local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto", + resume_download: Optional[bool] = None, +) -> str: + """Download repo files. + + Download a whole snapshot of a repo's files at the specified revision. This is useful when you want all files from + a repo, because you don't know which ones you will need a priori. All files are nested inside a folder in order + to keep their actual filename relative to that folder. You can also filter which files to download using + `allow_patterns` and `ignore_patterns`. + + If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this + option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir` + to store some metadata related to the downloaded files. While this mechanism is not as robust as the main + cache-system, it's optimized for regularly pulling the latest version of a repository. + + An alternative would be to clone the repo but this requires git and git-lfs to be installed and properly + configured. It is also not possible to filter which files to download when cloning a repository using git. + + Args: + repo_id (`str`): + A user or an organization name and a repo name separated by a `/`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if downloading from a dataset or space, + `None` or `"model"` if downloading from a model. Default is `None`. + revision (`str`, *optional*): + An optional Git revision id which can be a branch name, a tag, or a + commit hash. + cache_dir (`str`, `Path`, *optional*): + Path to the folder where cached files are stored. + local_dir (`str` or `Path`, *optional*): + If provided, the downloaded files will be placed under this directory. + library_name (`str`, *optional*): + The name of the library to which the object corresponds. + library_version (`str`, *optional*): + The version of the library. + user_agent (`str`, `dict`, *optional*): + The user-agent info in the form of a dictionary or a string. + proxies (`dict`, *optional*): + Dictionary mapping protocol to the URL of the proxy passed to + `requests.request`. + etag_timeout (`float`, *optional*, defaults to `10`): + When fetching ETag, how many seconds to wait for the server to send + data before giving up which is passed to `requests.request`. + force_download (`bool`, *optional*, defaults to `False`): + Whether the file should be downloaded even if it already exists in the local cache. + token (`str`, `bool`, *optional*): + A token to be used for the download. + - If `True`, the token is read from the HuggingFace config + folder. + - If a string, it's used as the authentication token. + headers (`dict`, *optional*): + Additional headers to include in the request. Those headers take precedence over the others. + local_files_only (`bool`, *optional*, defaults to `False`): + If `True`, avoid downloading the file and return the path to the + local cached file if it exists. + allow_patterns (`List[str]` or `str`, *optional*): + If provided, only files matching at least one pattern are downloaded. + ignore_patterns (`List[str]` or `str`, *optional*): + If provided, files matching any of the patterns are not downloaded. + max_workers (`int`, *optional*): + Number of concurrent threads to download files (1 thread = 1 file download). + Defaults to 8. + tqdm_class (`tqdm`, *optional*): + If provided, overwrites the default behavior for the progress bar. Passed + argument must inherit from `tqdm.auto.tqdm` or at least mimic its behavior. + Note that the `tqdm_class` is not passed to each individual download. + Defaults to the custom HF progress bar that can be disabled by setting + `HF_HUB_DISABLE_PROGRESS_BARS` environment variable. + + Returns: + `str`: folder path of the repo snapshot. + + Raises: + [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + [`~utils.RevisionNotFoundError`] + If the revision to download from cannot be found. + [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) + If `token=True` and the token cannot be found. + [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if + ETag cannot be determined. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if some parameter value is invalid. + """ + if cache_dir is None: + cache_dir = constants.HF_HUB_CACHE + if revision is None: + revision = constants.DEFAULT_REVISION + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + if repo_type is None: + repo_type = "model" + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}") + + storage_folder = os.path.join(cache_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type)) + + repo_info: Union[ModelInfo, DatasetInfo, SpaceInfo, None] = None + api_call_error: Optional[Exception] = None + if not local_files_only: + # try/except logic to handle different errors => taken from `hf_hub_download` + try: + # if we have internet connection we want to list files to download + api = HfApi( + library_name=library_name, + library_version=library_version, + user_agent=user_agent, + endpoint=endpoint, + headers=headers, + ) + repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type, revision=revision, token=token) + except (requests.exceptions.SSLError, requests.exceptions.ProxyError): + # Actually raise for those subclasses of ConnectionError + raise + except ( + requests.exceptions.ConnectionError, + requests.exceptions.Timeout, + OfflineModeIsEnabled, + ) as error: + # Internet connection is down + # => will try to use local files only + api_call_error = error + pass + except RevisionNotFoundError: + # The repo was found but the revision doesn't exist on the Hub (never existed or got deleted) + raise + except requests.HTTPError as error: + # Multiple reasons for an http error: + # - Repository is private and invalid/missing token sent + # - Repository is gated and invalid/missing token sent + # - Hub is down (error 500 or 504) + # => let's switch to 'local_files_only=True' to check if the files are already cached. + # (if it's not the case, the error will be re-raised) + api_call_error = error + pass + + # At this stage, if `repo_info` is None it means either: + # - internet connection is down + # - internet connection is deactivated (local_files_only=True or HF_HUB_OFFLINE=True) + # - repo is private/gated and invalid/missing token sent + # - Hub is down + # => let's look if we can find the appropriate folder in the cache: + # - if the specified revision is a commit hash, look inside "snapshots". + # - f the specified revision is a branch or tag, look inside "refs". + # => if local_dir is not None, we will return the path to the local folder if it exists. + if repo_info is None: + # Try to get which commit hash corresponds to the specified revision + commit_hash = None + if REGEX_COMMIT_HASH.match(revision): + commit_hash = revision + else: + ref_path = os.path.join(storage_folder, "refs", revision) + if os.path.exists(ref_path): + # retrieve commit_hash from refs file + with open(ref_path) as f: + commit_hash = f.read() + + # Try to locate snapshot folder for this commit hash + if commit_hash is not None: + snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash) + if os.path.exists(snapshot_folder): + # Snapshot folder exists => let's return it + # (but we can't check if all the files are actually there) + return snapshot_folder + # If local_dir is not None, return it if it exists and is not empty + if local_dir is not None: + local_dir = Path(local_dir) + if local_dir.is_dir() and any(local_dir.iterdir()): + logger.warning( + f"Returning existing local_dir `{local_dir}` as remote repo cannot be accessed in `snapshot_download` ({api_call_error})." + ) + return str(local_dir.resolve()) + # If we couldn't find the appropriate folder on disk, raise an error. + if local_files_only: + raise LocalEntryNotFoundError( + "Cannot find an appropriate cached snapshot folder for the specified revision on the local disk and " + "outgoing traffic has been disabled. To enable repo look-ups and downloads online, pass " + "'local_files_only=False' as input." + ) + elif isinstance(api_call_error, OfflineModeIsEnabled): + raise LocalEntryNotFoundError( + "Cannot find an appropriate cached snapshot folder for the specified revision on the local disk and " + "outgoing traffic has been disabled. To enable repo look-ups and downloads online, set " + "'HF_HUB_OFFLINE=0' as environment variable." + ) from api_call_error + elif isinstance(api_call_error, RepositoryNotFoundError) or isinstance(api_call_error, GatedRepoError): + # Repo not found => let's raise the actual error + raise api_call_error + else: + # Otherwise: most likely a connection issue or Hub downtime => let's warn the user + raise LocalEntryNotFoundError( + "An error happened while trying to locate the files on the Hub and we cannot find the appropriate" + " snapshot folder for the specified revision on the local disk. Please check your internet connection" + " and try again." + ) from api_call_error + + # At this stage, internet connection is up and running + # => let's download the files! + assert repo_info.sha is not None, "Repo info returned from server must have a revision sha." + assert repo_info.siblings is not None, "Repo info returned from server must have a siblings list." + filtered_repo_files = list( + filter_repo_objects( + items=[f.rfilename for f in repo_info.siblings], + allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, + ) + ) + commit_hash = repo_info.sha + snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash) + # if passed revision is not identical to commit_hash + # then revision has to be a branch name or tag name. + # In that case store a ref. + if revision != commit_hash: + ref_path = os.path.join(storage_folder, "refs", revision) + try: + os.makedirs(os.path.dirname(ref_path), exist_ok=True) + with open(ref_path, "w") as f: + f.write(commit_hash) + except OSError as e: + logger.warning(f"Ignored error while writing commit hash to {ref_path}: {e}.") + + # we pass the commit_hash to hf_hub_download + # so no network call happens if we already + # have the file locally. + def _inner_hf_hub_download(repo_file: str): + return hf_hub_download( + repo_id, + filename=repo_file, + repo_type=repo_type, + revision=commit_hash, + endpoint=endpoint, + cache_dir=cache_dir, + local_dir=local_dir, + local_dir_use_symlinks=local_dir_use_symlinks, + library_name=library_name, + library_version=library_version, + user_agent=user_agent, + proxies=proxies, + etag_timeout=etag_timeout, + resume_download=resume_download, + force_download=force_download, + token=token, + headers=headers, + ) + + if constants.HF_HUB_ENABLE_HF_TRANSFER: + # when using hf_transfer we don't want extra parallelism + # from the one hf_transfer provides + for file in filtered_repo_files: + _inner_hf_hub_download(file) + else: + thread_map( + _inner_hf_hub_download, + filtered_repo_files, + desc=f"Fetching {len(filtered_repo_files)} files", + max_workers=max_workers, + # User can use its own tqdm class or the default one from `huggingface_hub.utils` + tqdm_class=tqdm_class or hf_tqdm, + ) + + if local_dir is not None: + return str(os.path.realpath(local_dir)) + return snapshot_folder diff --git a/meow/lib/python3.13/site-packages/huggingface_hub/_upload_large_folder.py b/meow/lib/python3.13/site-packages/huggingface_hub/_upload_large_folder.py new file mode 100644 index 0000000000000000000000000000000000000000..a736e7562a3eb8159554cfd3b49fa6ed15628591 --- /dev/null +++ b/meow/lib/python3.13/site-packages/huggingface_hub/_upload_large_folder.py @@ -0,0 +1,621 @@ +# coding=utf-8 +# Copyright 2024-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import enum +import logging +import os +import queue +import shutil +import sys +import threading +import time +import traceback +from datetime import datetime +from pathlib import Path +from threading import Lock +from typing import TYPE_CHECKING, List, Optional, Tuple, Union + +from . import constants +from ._commit_api import CommitOperationAdd, UploadInfo, _fetch_upload_modes +from ._local_folder import LocalUploadFileMetadata, LocalUploadFilePaths, get_local_upload_paths, read_upload_metadata +from .constants import DEFAULT_REVISION, REPO_TYPES +from .utils import DEFAULT_IGNORE_PATTERNS, filter_repo_objects, tqdm +from .utils._cache_manager import _format_size +from .utils.sha import sha_fileobj + + +if TYPE_CHECKING: + from .hf_api import HfApi + +logger = logging.getLogger(__name__) + +WAITING_TIME_IF_NO_TASKS = 10 # seconds +MAX_NB_REGULAR_FILES_PER_COMMIT = 75 +MAX_NB_LFS_FILES_PER_COMMIT = 150 + + +def upload_large_folder_internal( + api: "HfApi", + repo_id: str, + folder_path: Union[str, Path], + *, + repo_type: str, # Repo type is required! + revision: Optional[str] = None, + private: Optional[bool] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + num_workers: Optional[int] = None, + print_report: bool = True, + print_report_every: int = 60, +): + """Upload a large folder to the Hub in the most resilient way possible. + + See [`HfApi.upload_large_folder`] for the full documentation. + """ + # 1. Check args and setup + if repo_type is None: + raise ValueError( + "For large uploads, `repo_type` is explicitly required. Please set it to `model`, `dataset` or `space`." + " If you are using the CLI, pass it as `--repo-type=model`." + ) + if repo_type not in REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {REPO_TYPES}") + if revision is None: + revision = DEFAULT_REVISION + + folder_path = Path(folder_path).expanduser().resolve() + if not folder_path.is_dir(): + raise ValueError(f"Provided path: '{folder_path}' is not a directory") + + if ignore_patterns is None: + ignore_patterns = [] + elif isinstance(ignore_patterns, str): + ignore_patterns = [ignore_patterns] + ignore_patterns += DEFAULT_IGNORE_PATTERNS + + if num_workers is None: + nb_cores = os.cpu_count() or 1 + num_workers = max(nb_cores - 2, 2) # Use all but 2 cores, or at least 2 cores + + # 2. Create repo if missing + repo_url = api.create_repo(repo_id=repo_id, repo_type=repo_type, private=private, exist_ok=True) + logger.info(f"Repo created: {repo_url}") + repo_id = repo_url.repo_id + + # 3. List files to upload + filtered_paths_list = filter_repo_objects( + (path.relative_to(folder_path).as_posix() for path in folder_path.glob("**/*") if path.is_file()), + allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, + ) + paths_list = [get_local_upload_paths(folder_path, relpath) for relpath in filtered_paths_list] + logger.info(f"Found {len(paths_list)} candidate files to upload") + + # Read metadata for each file + items = [ + (paths, read_upload_metadata(folder_path, paths.path_in_repo)) + for paths in tqdm(paths_list, desc="Recovering from metadata files") + ] + + # 4. Start workers + status = LargeUploadStatus(items) + threads = [ + threading.Thread( + target=_worker_job, + kwargs={ + "status": status, + "api": api, + "repo_id": repo_id, + "repo_type": repo_type, + "revision": revision, + }, + ) + for _ in range(num_workers) + ] + + for thread in threads: + thread.start() + + # 5. Print regular reports + if print_report: + print("\n\n" + status.current_report()) + last_report_ts = time.time() + while True: + time.sleep(1) + if time.time() - last_report_ts >= print_report_every: + if print_report: + _print_overwrite(status.current_report()) + last_report_ts = time.time() + if status.is_done(): + logging.info("Is done: exiting main loop") + break + + for thread in threads: + thread.join() + + logger.info(status.current_report()) + logging.info("Upload is complete!") + + +#################### +# Logic to manage workers and synchronize tasks +#################### + + +class WorkerJob(enum.Enum): + SHA256 = enum.auto() + GET_UPLOAD_MODE = enum.auto() + PREUPLOAD_LFS = enum.auto() + COMMIT = enum.auto() + WAIT = enum.auto() # if no tasks are available but we don't want to exit + + +JOB_ITEM_T = Tuple[LocalUploadFilePaths, LocalUploadFileMetadata] + + +class LargeUploadStatus: + """Contains information, queues and tasks for a large upload process.""" + + def __init__(self, items: List[JOB_ITEM_T]): + self.items = items + self.queue_sha256: "queue.Queue[JOB_ITEM_T]" = queue.Queue() + self.queue_get_upload_mode: "queue.Queue[JOB_ITEM_T]" = queue.Queue() + self.queue_preupload_lfs: "queue.Queue[JOB_ITEM_T]" = queue.Queue() + self.queue_commit: "queue.Queue[JOB_ITEM_T]" = queue.Queue() + self.lock = Lock() + + self.nb_workers_sha256: int = 0 + self.nb_workers_get_upload_mode: int = 0 + self.nb_workers_preupload_lfs: int = 0 + self.nb_workers_commit: int = 0 + self.nb_workers_waiting: int = 0 + self.last_commit_attempt: Optional[float] = None + + self._started_at = datetime.now() + + # Setup queues + for item in self.items: + paths, metadata = item + if metadata.sha256 is None: + self.queue_sha256.put(item) + elif metadata.upload_mode is None: + self.queue_get_upload_mode.put(item) + elif metadata.upload_mode == "lfs" and not metadata.is_uploaded: + self.queue_preupload_lfs.put(item) + elif not metadata.is_committed: + self.queue_commit.put(item) + else: + logger.debug(f"Skipping file {paths.path_in_repo} (already uploaded and committed)") + + def current_report(self) -> str: + """Generate a report of the current status of the large upload.""" + nb_hashed = 0 + size_hashed = 0 + nb_preuploaded = 0 + nb_lfs = 0 + nb_lfs_unsure = 0 + size_preuploaded = 0 + nb_committed = 0 + size_committed = 0 + total_size = 0 + ignored_files = 0 + total_files = 0 + + with self.lock: + for _, metadata in self.items: + if metadata.should_ignore: + ignored_files += 1 + continue + total_size += metadata.size + total_files += 1 + if metadata.sha256 is not None: + nb_hashed += 1 + size_hashed += metadata.size + if metadata.upload_mode == "lfs": + nb_lfs += 1 + if metadata.upload_mode is None: + nb_lfs_unsure += 1 + if metadata.is_uploaded: + nb_preuploaded += 1 + size_preuploaded += metadata.size + if metadata.is_committed: + nb_committed += 1 + size_committed += metadata.size + total_size_str = _format_size(total_size) + + now = datetime.now() + now_str = now.strftime("%Y-%m-%d %H:%M:%S") + elapsed = now - self._started_at + elapsed_str = str(elapsed).split(".")[0] # remove milliseconds + + message = "\n" + "-" * 10 + message += f" {now_str} ({elapsed_str}) " + message += "-" * 10 + "\n" + + message += "Files: " + message += f"hashed {nb_hashed}/{total_files} ({_format_size(size_hashed)}/{total_size_str}) | " + message += f"pre-uploaded: {nb_preuploaded}/{nb_lfs} ({_format_size(size_preuploaded)}/{total_size_str})" + if nb_lfs_unsure > 0: + message += f" (+{nb_lfs_unsure} unsure)" + message += f" | committed: {nb_committed}/{total_files} ({_format_size(size_committed)}/{total_size_str})" + message += f" | ignored: {ignored_files}\n" + + message += "Workers: " + message += f"hashing: {self.nb_workers_sha256} | " + message += f"get upload mode: {self.nb_workers_get_upload_mode} | " + message += f"pre-uploading: {self.nb_workers_preupload_lfs} | " + message += f"committing: {self.nb_workers_commit} | " + message += f"waiting: {self.nb_workers_waiting}\n" + message += "-" * 51 + + return message + + def is_done(self) -> bool: + with self.lock: + return all(metadata.is_committed or metadata.should_ignore for _, metadata in self.items) + + +def _worker_job( + status: LargeUploadStatus, + api: "HfApi", + repo_id: str, + repo_type: str, + revision: str, +): + """ + Main process for a worker. The worker will perform tasks based on the priority list until all files are uploaded + and committed. If no tasks are available, the worker will wait for 10 seconds before checking again. + + If a task fails for any reason, the item(s) are put back in the queue for another worker to pick up. + + Read `upload_large_folder` docstring for more information on how tasks are prioritized. + """ + while True: + next_job: Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]] = None + + # Determine next task + next_job = _determine_next_job(status) + if next_job is None: + return + job, items = next_job + + # Perform task + if job == WorkerJob.SHA256: + item = items[0] # single item + try: + _compute_sha256(item) + status.queue_get_upload_mode.put(item) + except KeyboardInterrupt: + raise + except Exception as e: + logger.error(f"Failed to compute sha256: {e}") + traceback.format_exc() + status.queue_sha256.put(item) + + with status.lock: + status.nb_workers_sha256 -= 1 + + elif job == WorkerJob.GET_UPLOAD_MODE: + try: + _get_upload_mode(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision) + except KeyboardInterrupt: + raise + except Exception as e: + logger.error(f"Failed to get upload mode: {e}") + traceback.format_exc() + + # Items are either: + # - dropped (if should_ignore) + # - put in LFS queue (if LFS) + # - put in commit queue (if regular) + # - or put back (if error occurred). + for item in items: + _, metadata = item + if metadata.should_ignore: + continue + if metadata.upload_mode == "lfs": + status.queue_preupload_lfs.put(item) + elif metadata.upload_mode == "regular": + status.queue_commit.put(item) + else: + status.queue_get_upload_mode.put(item) + + with status.lock: + status.nb_workers_get_upload_mode -= 1 + + elif job == WorkerJob.PREUPLOAD_LFS: + item = items[0] # single item + try: + _preupload_lfs(item, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision) + status.queue_commit.put(item) + except KeyboardInterrupt: + raise + except Exception as e: + logger.error(f"Failed to preupload LFS: {e}") + traceback.format_exc() + status.queue_preupload_lfs.put(item) + + with status.lock: + status.nb_workers_preupload_lfs -= 1 + + elif job == WorkerJob.COMMIT: + try: + _commit(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision) + except KeyboardInterrupt: + raise + except Exception as e: + logger.error(f"Failed to commit: {e}") + traceback.format_exc() + for item in items: + status.queue_commit.put(item) + with status.lock: + status.last_commit_attempt = time.time() + status.nb_workers_commit -= 1 + + elif job == WorkerJob.WAIT: + time.sleep(WAITING_TIME_IF_NO_TASKS) + with status.lock: + status.nb_workers_waiting -= 1 + + +def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]]: + with status.lock: + # 1. Commit if more than 5 minutes since last commit attempt (and at least 1 file) + if ( + status.nb_workers_commit == 0 + and status.queue_commit.qsize() > 0 + and status.last_commit_attempt is not None + and time.time() - status.last_commit_attempt > 5 * 60 + ): + status.nb_workers_commit += 1 + logger.debug("Job: commit (more than 5 minutes since last commit attempt)") + return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit)) + + # 2. Commit if at least 100 files are ready to commit + elif status.nb_workers_commit == 0 and status.queue_commit.qsize() >= 150: + status.nb_workers_commit += 1 + logger.debug("Job: commit (>100 files ready)") + return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit)) + + # 3. Get upload mode if at least 10 files + elif status.queue_get_upload_mode.qsize() >= 10: + status.nb_workers_get_upload_mode += 1 + logger.debug("Job: get upload mode (>10 files ready)") + return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50)) + + # 4. Preupload LFS file if at least 1 file and no worker is preuploading LFS + elif status.queue_preupload_lfs.qsize() > 0 and status.nb_workers_preupload_lfs == 0: + status.nb_workers_preupload_lfs += 1 + logger.debug("Job: preupload LFS (no other worker preuploading LFS)") + return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs)) + + # 5. Compute sha256 if at least 1 file and no worker is computing sha256 + elif status.queue_sha256.qsize() > 0 and status.nb_workers_sha256 == 0: + status.nb_workers_sha256 += 1 + logger.debug("Job: sha256 (no other worker computing sha256)") + return (WorkerJob.SHA256, _get_one(status.queue_sha256)) + + # 6. Get upload mode if at least 1 file and no worker is getting upload mode + elif status.queue_get_upload_mode.qsize() > 0 and status.nb_workers_get_upload_mode == 0: + status.nb_workers_get_upload_mode += 1 + logger.debug("Job: get upload mode (no other worker getting upload mode)") + return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50)) + + # 7. Preupload LFS file if at least 1 file + # Skip if hf_transfer is enabled and there is already a worker preuploading LFS + elif status.queue_preupload_lfs.qsize() > 0 and ( + status.nb_workers_preupload_lfs == 0 or not constants.HF_HUB_ENABLE_HF_TRANSFER + ): + status.nb_workers_preupload_lfs += 1 + logger.debug("Job: preupload LFS") + return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs)) + + # 8. Compute sha256 if at least 1 file + elif status.queue_sha256.qsize() > 0: + status.nb_workers_sha256 += 1 + logger.debug("Job: sha256") + return (WorkerJob.SHA256, _get_one(status.queue_sha256)) + + # 9. Get upload mode if at least 1 file + elif status.queue_get_upload_mode.qsize() > 0: + status.nb_workers_get_upload_mode += 1 + logger.debug("Job: get upload mode") + return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50)) + + # 10. Commit if at least 1 file and 1 min since last commit attempt + elif ( + status.nb_workers_commit == 0 + and status.queue_commit.qsize() > 0 + and status.last_commit_attempt is not None + and time.time() - status.last_commit_attempt > 1 * 60 + ): + status.nb_workers_commit += 1 + logger.debug("Job: commit (1 min since last commit attempt)") + return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit)) + + # 11. Commit if at least 1 file all other queues are empty and all workers are waiting + # e.g. when it's the last commit + elif ( + status.nb_workers_commit == 0 + and status.queue_commit.qsize() > 0 + and status.queue_sha256.qsize() == 0 + and status.queue_get_upload_mode.qsize() == 0 + and status.queue_preupload_lfs.qsize() == 0 + and status.nb_workers_sha256 == 0 + and status.nb_workers_get_upload_mode == 0 + and status.nb_workers_preupload_lfs == 0 + ): + status.nb_workers_commit += 1 + logger.debug("Job: commit") + return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit)) + + # 12. If all queues are empty, exit + elif all(metadata.is_committed or metadata.should_ignore for _, metadata in status.items): + logger.info("All files have been processed! Exiting worker.") + return None + + # 13. If no task is available, wait + else: + status.nb_workers_waiting += 1 + logger.debug(f"No task available, waiting... ({WAITING_TIME_IF_NO_TASKS}s)") + return (WorkerJob.WAIT, []) + + +#################### +# Atomic jobs (sha256, get_upload_mode, preupload_lfs, commit) +#################### + + +def _compute_sha256(item: JOB_ITEM_T) -> None: + """Compute sha256 of a file and save it in metadata.""" + paths, metadata = item + if metadata.sha256 is None: + with paths.file_path.open("rb") as f: + metadata.sha256 = sha_fileobj(f).hex() + metadata.save(paths) + + +def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None: + """Get upload mode for each file and update metadata. + + Also receive info if the file should be ignored. + """ + additions = [_build_hacky_operation(item) for item in items] + _fetch_upload_modes( + additions=additions, + repo_type=repo_type, + repo_id=repo_id, + headers=api._build_hf_headers(), + revision=revision, + ) + for item, addition in zip(items, additions): + paths, metadata = item + metadata.upload_mode = addition._upload_mode + metadata.should_ignore = addition._should_ignore + metadata.save(paths) + + +def _preupload_lfs(item: JOB_ITEM_T, api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None: + """Preupload LFS file and update metadata.""" + paths, metadata = item + addition = _build_hacky_operation(item) + api.preupload_lfs_files( + repo_id=repo_id, + repo_type=repo_type, + revision=revision, + additions=[addition], + ) + + metadata.is_uploaded = True + metadata.save(paths) + + +def _commit(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None: + """Commit files to the repo.""" + additions = [_build_hacky_operation(item) for item in items] + api.create_commit( + repo_id=repo_id, + repo_type=repo_type, + revision=revision, + operations=additions, + commit_message="Add files using upload-large-folder tool", + ) + for paths, metadata in items: + metadata.is_committed = True + metadata.save(paths) + + +#################### +# Hacks with CommitOperationAdd to bypass checks/sha256 calculation +#################### + + +class HackyCommitOperationAdd(CommitOperationAdd): + def __post_init__(self) -> None: + if isinstance(self.path_or_fileobj, Path): + self.path_or_fileobj = str(self.path_or_fileobj) + + +def _build_hacky_operation(item: JOB_ITEM_T) -> HackyCommitOperationAdd: + paths, metadata = item + operation = HackyCommitOperationAdd(path_in_repo=paths.path_in_repo, path_or_fileobj=paths.file_path) + with paths.file_path.open("rb") as file: + sample = file.peek(512)[:512] + if metadata.sha256 is None: + raise ValueError("sha256 must have been computed by now!") + operation.upload_info = UploadInfo(sha256=bytes.fromhex(metadata.sha256), size=metadata.size, sample=sample) + return operation + + +#################### +# Misc helpers +#################### + + +def _get_one(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]: + return [queue.get()] + + +def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) -> List[JOB_ITEM_T]: + return [queue.get() for _ in range(min(queue.qsize(), n))] + + +def _get_items_to_commit(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]: + """Special case for commit job: the number of items to commit depends on the type of files.""" + # Can take at most 50 regular files and/or 100 LFS files in a single commit + items: List[JOB_ITEM_T] = [] + nb_lfs, nb_regular = 0, 0 + while True: + # If empty queue => commit everything + if queue.qsize() == 0: + return items + + # If we have enough items => commit them + if nb_lfs >= MAX_NB_LFS_FILES_PER_COMMIT or nb_regular >= MAX_NB_REGULAR_FILES_PER_COMMIT: + return items + + # Else, get a new item and increase counter + item = queue.get() + items.append(item) + _, metadata = item + if metadata.upload_mode == "lfs": + nb_lfs += 1 + else: + nb_regular += 1 + + +def _print_overwrite(report: str) -> None: + """Print a report, overwriting the previous lines. + + Since tqdm in using `sys.stderr` to (re-)write progress bars, we need to use `sys.stdout` + to print the report. + + Note: works well only if no other process is writing to `sys.stdout`! + """ + report += "\n" + # Get terminal width + terminal_width = shutil.get_terminal_size().columns + + # Count number of lines that should be cleared + nb_lines = sum(len(line) // terminal_width + 1 for line in report.splitlines()) + + # Clear previous lines based on the number of lines in the report + for _ in range(nb_lines): + sys.stdout.write("\r\033[K") # Clear line + sys.stdout.write("\033[F") # Move cursor up one line + + # Print the new report, filling remaining space with whitespace + sys.stdout.write(report) + sys.stdout.write(" " * (terminal_width - len(report.splitlines()[-1]))) + sys.stdout.flush() diff --git a/meow/lib/python3.13/site-packages/huggingface_hub/community.py b/meow/lib/python3.13/site-packages/huggingface_hub/community.py new file mode 100644 index 0000000000000000000000000000000000000000..16f2f02428dd5c2ce6437534af0397801bda45c5 --- /dev/null +++ b/meow/lib/python3.13/site-packages/huggingface_hub/community.py @@ -0,0 +1,355 @@ +""" +Data structures to interact with Discussions and Pull Requests on the Hub. + +See [the Discussions and Pull Requests guide](https://huggingface.co/docs/hub/repositories-pull-requests-discussions) +for more information on Pull Requests, Discussions, and the community tab. +""" + +from dataclasses import dataclass +from datetime import datetime +from typing import List, Literal, Optional, Union + +from . import constants +from .utils import parse_datetime + + +DiscussionStatus = Literal["open", "closed", "merged", "draft"] + + +@dataclass +class Discussion: + """ + A Discussion or Pull Request on the Hub. + + This dataclass is not intended to be instantiated directly. + + Attributes: + title (`str`): + The title of the Discussion / Pull Request + status (`str`): + The status of the Discussion / Pull Request. + It must be one of: + * `"open"` + * `"closed"` + * `"merged"` (only for Pull Requests ) + * `"draft"` (only for Pull Requests ) + num (`int`): + The number of the Discussion / Pull Request. + repo_id (`str`): + The id (`"{namespace}/{repo_name}"`) of the repo on which + the Discussion / Pull Request was open. + repo_type (`str`): + The type of the repo on which the Discussion / Pull Request was open. + Possible values are: `"model"`, `"dataset"`, `"space"`. + author (`str`): + The username of the Discussion / Pull Request author. + Can be `"deleted"` if the user has been deleted since. + is_pull_request (`bool`): + Whether or not this is a Pull Request. + created_at (`datetime`): + The `datetime` of creation of the Discussion / Pull Request. + endpoint (`str`): + Endpoint of the Hub. Default is https://huggingface.co. + git_reference (`str`, *optional*): + (property) Git reference to which changes can be pushed if this is a Pull Request, `None` otherwise. + url (`str`): + (property) URL of the discussion on the Hub. + """ + + title: str + status: DiscussionStatus + num: int + repo_id: str + repo_type: str + author: str + is_pull_request: bool + created_at: datetime + endpoint: str + + @property + def git_reference(self) -> Optional[str]: + """ + If this is a Pull Request , returns the git reference to which changes can be pushed. + Returns `None` otherwise. + """ + if self.is_pull_request: + return f"refs/pr/{self.num}" + return None + + @property + def url(self) -> str: + """Returns the URL of the discussion on the Hub.""" + if self.repo_type is None or self.repo_type == constants.REPO_TYPE_MODEL: + return f"{self.endpoint}/{self.repo_id}/discussions/{self.num}" + return f"{self.endpoint}/{self.repo_type}s/{self.repo_id}/discussions/{self.num}" + + +@dataclass +class DiscussionWithDetails(Discussion): + """ + Subclass of [`Discussion`]. + + Attributes: + title (`str`): + The title of the Discussion / Pull Request + status (`str`): + The status of the Discussion / Pull Request. + It can be one of: + * `"open"` + * `"closed"` + * `"merged"` (only for Pull Requests ) + * `"draft"` (only for Pull Requests ) + num (`int`): + The number of the Discussion / Pull Request. + repo_id (`str`): + The id (`"{namespace}/{repo_name}"`) of the repo on which + the Discussion / Pull Request was open. + repo_type (`str`): + The type of the repo on which the Discussion / Pull Request was open. + Possible values are: `"model"`, `"dataset"`, `"space"`. + author (`str`): + The username of the Discussion / Pull Request author. + Can be `"deleted"` if the user has been deleted since. + is_pull_request (`bool`): + Whether or not this is a Pull Request. + created_at (`datetime`): + The `datetime` of creation of the Discussion / Pull Request. + events (`list` of [`DiscussionEvent`]) + The list of [`DiscussionEvents`] in this Discussion or Pull Request. + conflicting_files (`Union[List[str], bool, None]`, *optional*): + A list of conflicting files if this is a Pull Request. + `None` if `self.is_pull_request` is `False`. + `True` if there are conflicting files but the list can't be retrieved. + target_branch (`str`, *optional*): + The branch into which changes are to be merged if this is a + Pull Request . `None` if `self.is_pull_request` is `False`. + merge_commit_oid (`str`, *optional*): + If this is a merged Pull Request , this is set to the OID / SHA of + the merge commit, `None` otherwise. + diff (`str`, *optional*): + The git diff if this is a Pull Request , `None` otherwise. + endpoint (`str`): + Endpoint of the Hub. Default is https://huggingface.co. + git_reference (`str`, *optional*): + (property) Git reference to which changes can be pushed if this is a Pull Request, `None` otherwise. + url (`str`): + (property) URL of the discussion on the Hub. + """ + + events: List["DiscussionEvent"] + conflicting_files: Union[List[str], bool, None] + target_branch: Optional[str] + merge_commit_oid: Optional[str] + diff: Optional[str] + + +@dataclass +class DiscussionEvent: + """ + An event in a Discussion or Pull Request. + + Use concrete classes: + * [`DiscussionComment`] + * [`DiscussionStatusChange`] + * [`DiscussionCommit`] + * [`DiscussionTitleChange`] + + Attributes: + id (`str`): + The ID of the event. An hexadecimal string. + type (`str`): + The type of the event. + created_at (`datetime`): + A [`datetime`](https://docs.python.org/3/library/datetime.html?highlight=datetime#datetime.datetime) + object holding the creation timestamp for the event. + author (`str`): + The username of the Discussion / Pull Request author. + Can be `"deleted"` if the user has been deleted since. + """ + + id: str + type: str + created_at: datetime + author: str + + _event: dict + """Stores the original event data, in case we need to access it later.""" + + +@dataclass +class DiscussionComment(DiscussionEvent): + """A comment in a Discussion / Pull Request. + + Subclass of [`DiscussionEvent`]. + + + Attributes: + id (`str`): + The ID of the event. An hexadecimal string. + type (`str`): + The type of the event. + created_at (`datetime`): + A [`datetime`](https://docs.python.org/3/library/datetime.html?highlight=datetime#datetime.datetime) + object holding the creation timestamp for the event. + author (`str`): + The username of the Discussion / Pull Request author. + Can be `"deleted"` if the user has been deleted since. + content (`str`): + The raw markdown content of the comment. Mentions, links and images are not rendered. + edited (`bool`): + Whether or not this comment has been edited. + hidden (`bool`): + Whether or not this comment has been hidden. + """ + + content: str + edited: bool + hidden: bool + + @property + def rendered(self) -> str: + """The rendered comment, as a HTML string""" + return self._event["data"]["latest"]["html"] + + @property + def last_edited_at(self) -> datetime: + """The last edit time, as a `datetime` object.""" + return parse_datetime(self._event["data"]["latest"]["updatedAt"]) + + @property + def last_edited_by(self) -> str: + """The last edit time, as a `datetime` object.""" + return self._event["data"]["latest"].get("author", {}).get("name", "deleted") + + @property + def edit_history(self) -> List[dict]: + """The edit history of the comment""" + return self._event["data"]["history"] + + @property + def number_of_edits(self) -> int: + return len(self.edit_history) + + +@dataclass +class DiscussionStatusChange(DiscussionEvent): + """A change of status in a Discussion / Pull Request. + + Subclass of [`DiscussionEvent`]. + + Attributes: + id (`str`): + The ID of the event. An hexadecimal string. + type (`str`): + The type of the event. + created_at (`datetime`): + A [`datetime`](https://docs.python.org/3/library/datetime.html?highlight=datetime#datetime.datetime) + object holding the creation timestamp for the event. + author (`str`): + The username of the Discussion / Pull Request author. + Can be `"deleted"` if the user has been deleted since. + new_status (`str`): + The status of the Discussion / Pull Request after the change. + It can be one of: + * `"open"` + * `"closed"` + * `"merged"` (only for Pull Requests ) + """ + + new_status: str + + +@dataclass +class DiscussionCommit(DiscussionEvent): + """A commit in a Pull Request. + + Subclass of [`DiscussionEvent`]. + + Attributes: + id (`str`): + The ID of the event. An hexadecimal string. + type (`str`): + The type of the event. + created_at (`datetime`): + A [`datetime`](https://docs.python.org/3/library/datetime.html?highlight=datetime#datetime.datetime) + object holding the creation timestamp for the event. + author (`str`): + The username of the Discussion / Pull Request author. + Can be `"deleted"` if the user has been deleted since. + summary (`str`): + The summary of the commit. + oid (`str`): + The OID / SHA of the commit, as a hexadecimal string. + """ + + summary: str + oid: str + + +@dataclass +class DiscussionTitleChange(DiscussionEvent): + """A rename event in a Discussion / Pull Request. + + Subclass of [`DiscussionEvent`]. + + Attributes: + id (`str`): + The ID of the event. An hexadecimal string. + type (`str`): + The type of the event. + created_at (`datetime`): + A [`datetime`](https://docs.python.org/3/library/datetime.html?highlight=datetime#datetime.datetime) + object holding the creation timestamp for the event. + author (`str`): + The username of the Discussion / Pull Request author. + Can be `"deleted"` if the user has been deleted since. + old_title (`str`): + The previous title for the Discussion / Pull Request. + new_title (`str`): + The new title. + """ + + old_title: str + new_title: str + + +def deserialize_event(event: dict) -> DiscussionEvent: + """Instantiates a [`DiscussionEvent`] from a dict""" + event_id: str = event["id"] + event_type: str = event["type"] + created_at = parse_datetime(event["createdAt"]) + + common_args = dict( + id=event_id, + type=event_type, + created_at=created_at, + author=event.get("author", {}).get("name", "deleted"), + _event=event, + ) + + if event_type == "comment": + return DiscussionComment( + **common_args, + edited=event["data"]["edited"], + hidden=event["data"]["hidden"], + content=event["data"]["latest"]["raw"], + ) + if event_type == "status-change": + return DiscussionStatusChange( + **common_args, + new_status=event["data"]["status"], + ) + if event_type == "commit": + return DiscussionCommit( + **common_args, + summary=event["data"]["subject"], + oid=event["data"]["oid"], + ) + if event_type == "title-change": + return DiscussionTitleChange( + **common_args, + old_title=event["data"]["from"], + new_title=event["data"]["to"], + ) + + return DiscussionEvent(**common_args) diff --git a/meow/lib/python3.13/site-packages/huggingface_hub/errors.py b/meow/lib/python3.13/site-packages/huggingface_hub/errors.py new file mode 100644 index 0000000000000000000000000000000000000000..226c8bb4000420ac38f80e1df6b78e7131cef8f0 --- /dev/null +++ b/meow/lib/python3.13/site-packages/huggingface_hub/errors.py @@ -0,0 +1,329 @@ +"""Contains all custom errors.""" + +from pathlib import Path +from typing import Optional, Union + +from requests import HTTPError, Response + + +# CACHE ERRORS + + +class CacheNotFound(Exception): + """Exception thrown when the Huggingface cache is not found.""" + + cache_dir: Union[str, Path] + + def __init__(self, msg: str, cache_dir: Union[str, Path], *args, **kwargs): + super().__init__(msg, *args, **kwargs) + self.cache_dir = cache_dir + + +class CorruptedCacheException(Exception): + """Exception for any unexpected structure in the Huggingface cache-system.""" + + +# HEADERS ERRORS + + +class LocalTokenNotFoundError(EnvironmentError): + """Raised if local token is required but not found.""" + + +# HTTP ERRORS + + +class OfflineModeIsEnabled(ConnectionError): + """Raised when a request is made but `HF_HUB_OFFLINE=1` is set as environment variable.""" + + +class HfHubHTTPError(HTTPError): + """ + HTTPError to inherit from for any custom HTTP Error raised in HF Hub. + + Any HTTPError is converted at least into a `HfHubHTTPError`. If some information is + sent back by the server, it will be added to the error message. + + Added details: + - Request id from "X-Request-Id" header if exists. If not, fallback to "X-Amzn-Trace-Id" header if exists. + - Server error message from the header "X-Error-Message". + - Server error message if we can found one in the response body. + + Example: + ```py + import requests + from huggingface_hub.utils import get_session, hf_raise_for_status, HfHubHTTPError + + response = get_session().post(...) + try: + hf_raise_for_status(response) + except HfHubHTTPError as e: + print(str(e)) # formatted message + e.request_id, e.server_message # details returned by server + + # Complete the error message with additional information once it's raised + e.append_to_message("\n`create_commit` expects the repository to exist.") + raise + ``` + """ + + def __init__(self, message: str, response: Optional[Response] = None, *, server_message: Optional[str] = None): + self.request_id = ( + response.headers.get("x-request-id") or response.headers.get("X-Amzn-Trace-Id") + if response is not None + else None + ) + self.server_message = server_message + + super().__init__( + message, + response=response, # type: ignore [arg-type] + request=response.request if response is not None else None, # type: ignore [arg-type] + ) + + def append_to_message(self, additional_message: str) -> None: + """Append additional information to the `HfHubHTTPError` initial message.""" + self.args = (self.args[0] + additional_message,) + self.args[1:] + + +# INFERENCE CLIENT ERRORS + + +class InferenceTimeoutError(HTTPError, TimeoutError): + """Error raised when a model is unavailable or the request times out.""" + + +# INFERENCE ENDPOINT ERRORS + + +class InferenceEndpointError(Exception): + """Generic exception when dealing with Inference Endpoints.""" + + +class InferenceEndpointTimeoutError(InferenceEndpointError, TimeoutError): + """Exception for timeouts while waiting for Inference Endpoint.""" + + +# SAFETENSORS ERRORS + + +class SafetensorsParsingError(Exception): + """Raised when failing to parse a safetensors file metadata. + + This can be the case if the file is not a safetensors file or does not respect the specification. + """ + + +class NotASafetensorsRepoError(Exception): + """Raised when a repo is not a Safetensors repo i.e. doesn't have either a `model.safetensors` or a + `model.safetensors.index.json` file. + """ + + +# TEXT GENERATION ERRORS + + +class TextGenerationError(HTTPError): + """Generic error raised if text-generation went wrong.""" + + +# Text Generation Inference Errors +class ValidationError(TextGenerationError): + """Server-side validation error.""" + + +class GenerationError(TextGenerationError): + pass + + +class OverloadedError(TextGenerationError): + pass + + +class IncompleteGenerationError(TextGenerationError): + pass + + +class UnknownError(TextGenerationError): + pass + + +# VALIDATION ERRORS + + +class HFValidationError(ValueError): + """Generic exception thrown by `huggingface_hub` validators. + + Inherits from [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError). + """ + + +# FILE METADATA ERRORS + + +class FileMetadataError(OSError): + """Error triggered when the metadata of a file on the Hub cannot be retrieved (missing ETag or commit_hash). + + Inherits from `OSError` for backward compatibility. + """ + + +# REPOSITORY ERRORS + + +class RepositoryNotFoundError(HfHubHTTPError): + """ + Raised when trying to access a hf.co URL with an invalid repository name, or + with a private repo name the user does not have access to. + + Example: + + ```py + >>> from huggingface_hub import model_info + >>> model_info("") + (...) + huggingface_hub.utils._errors.RepositoryNotFoundError: 401 Client Error. (Request ID: PvMw_VjBMjVdMz53WKIzP) + + Repository Not Found for url: https://huggingface.co/api/models/%3Cnon_existent_repository%3E. + Please make sure you specified the correct `repo_id` and `repo_type`. + If the repo is private, make sure you are authenticated. + Invalid username or password. + ``` + """ + + +class GatedRepoError(RepositoryNotFoundError): + """ + Raised when trying to access a gated repository for which the user is not on the + authorized list. + + Note: derives from `RepositoryNotFoundError` to ensure backward compatibility. + + Example: + + ```py + >>> from huggingface_hub import model_info + >>> model_info("") + (...) + huggingface_hub.utils._errors.GatedRepoError: 403 Client Error. (Request ID: ViT1Bf7O_026LGSQuVqfa) + + Cannot access gated repo for url https://huggingface.co/api/models/ardent-figment/gated-model. + Access to model ardent-figment/gated-model is restricted and you are not in the authorized list. + Visit https://huggingface.co/ardent-figment/gated-model to ask for access. + ``` + """ + + +class DisabledRepoError(HfHubHTTPError): + """ + Raised when trying to access a repository that has been disabled by its author. + + Example: + + ```py + >>> from huggingface_hub import dataset_info + >>> dataset_info("laion/laion-art") + (...) + huggingface_hub.utils._errors.DisabledRepoError: 403 Client Error. (Request ID: Root=1-659fc3fa-3031673e0f92c71a2260dbe2;bc6f4dfb-b30a-4862-af0a-5cfe827610d8) + + Cannot access repository for url https://huggingface.co/api/datasets/laion/laion-art. + Access to this resource is disabled. + ``` + """ + + +# REVISION ERROR + + +class RevisionNotFoundError(HfHubHTTPError): + """ + Raised when trying to access a hf.co URL with a valid repository but an invalid + revision. + + Example: + + ```py + >>> from huggingface_hub import hf_hub_download + >>> hf_hub_download('bert-base-cased', 'config.json', revision='') + (...) + huggingface_hub.utils._errors.RevisionNotFoundError: 404 Client Error. (Request ID: Mwhe_c3Kt650GcdKEFomX) + + Revision Not Found for url: https://huggingface.co/bert-base-cased/resolve/%3Cnon-existent-revision%3E/config.json. + ``` + """ + + +# ENTRY ERRORS +class EntryNotFoundError(HfHubHTTPError): + """ + Raised when trying to access a hf.co URL with a valid repository and revision + but an invalid filename. + + Example: + + ```py + >>> from huggingface_hub import hf_hub_download + >>> hf_hub_download('bert-base-cased', '') + (...) + huggingface_hub.utils._errors.EntryNotFoundError: 404 Client Error. (Request ID: 53pNl6M0MxsnG5Sw8JA6x) + + Entry Not Found for url: https://huggingface.co/bert-base-cased/resolve/main/%3Cnon-existent-file%3E. + ``` + """ + + +class LocalEntryNotFoundError(EntryNotFoundError, FileNotFoundError, ValueError): + """ + Raised when trying to access a file or snapshot that is not on the disk when network is + disabled or unavailable (connection issue). The entry may exist on the Hub. + + Note: `ValueError` type is to ensure backward compatibility. + Note: `LocalEntryNotFoundError` derives from `HTTPError` because of `EntryNotFoundError` + even when it is not a network issue. + + Example: + + ```py + >>> from huggingface_hub import hf_hub_download + >>> hf_hub_download('bert-base-cased', '', local_files_only=True) + (...) + huggingface_hub.utils._errors.LocalEntryNotFoundError: Cannot find the requested files in the disk cache and outgoing traffic has been disabled. To enable hf.co look-ups and downloads online, set 'local_files_only' to False. + ``` + """ + + def __init__(self, message: str): + super().__init__(message, response=None) + + +# REQUEST ERROR +class BadRequestError(HfHubHTTPError, ValueError): + """ + Raised by `hf_raise_for_status` when the server returns a HTTP 400 error. + + Example: + + ```py + >>> resp = requests.post("hf.co/api/check", ...) + >>> hf_raise_for_status(resp, endpoint_name="check") + huggingface_hub.utils._errors.BadRequestError: Bad request for check endpoint: {details} (Request ID: XXX) + ``` + """ + + +# DDUF file format ERROR + + +class DDUFError(Exception): + """Base exception for errors related to the DDUF format.""" + + +class DDUFCorruptedFileError(DDUFError): + """Exception thrown when the DDUF file is corrupted.""" + + +class DDUFExportError(DDUFError): + """Base exception for errors during DDUF export.""" + + +class DDUFInvalidEntryNameError(DDUFExportError): + """Exception thrown when the entry name is invalid.""" diff --git a/meow/lib/python3.13/site-packages/huggingface_hub/file_download.py b/meow/lib/python3.13/site-packages/huggingface_hub/file_download.py new file mode 100644 index 0000000000000000000000000000000000000000..9e9d1ab449f7c885d8365053672a1248b2abfdda --- /dev/null +++ b/meow/lib/python3.13/site-packages/huggingface_hub/file_download.py @@ -0,0 +1,1622 @@ +import contextlib +import copy +import errno +import inspect +import os +import re +import shutil +import stat +import time +import uuid +import warnings +from dataclasses import dataclass +from pathlib import Path +from typing import Any, BinaryIO, Dict, Literal, NoReturn, Optional, Tuple, Union +from urllib.parse import quote, urlparse + +import requests + +from . import ( + __version__, # noqa: F401 # for backward compatibility + constants, +) +from ._local_folder import get_local_download_paths, read_download_metadata, write_download_metadata +from .constants import ( + HUGGINGFACE_CO_URL_TEMPLATE, # noqa: F401 # for backward compatibility + HUGGINGFACE_HUB_CACHE, # noqa: F401 # for backward compatibility +) +from .errors import ( + EntryNotFoundError, + FileMetadataError, + GatedRepoError, + LocalEntryNotFoundError, + RepositoryNotFoundError, + RevisionNotFoundError, +) +from .utils import ( + OfflineModeIsEnabled, + SoftTemporaryDirectory, + WeakFileLock, + build_hf_headers, + get_fastai_version, # noqa: F401 # for backward compatibility + get_fastcore_version, # noqa: F401 # for backward compatibility + get_graphviz_version, # noqa: F401 # for backward compatibility + get_jinja_version, # noqa: F401 # for backward compatibility + get_pydot_version, # noqa: F401 # for backward compatibility + get_session, + get_tf_version, # noqa: F401 # for backward compatibility + get_torch_version, # noqa: F401 # for backward compatibility + hf_raise_for_status, + is_fastai_available, # noqa: F401 # for backward compatibility + is_fastcore_available, # noqa: F401 # for backward compatibility + is_graphviz_available, # noqa: F401 # for backward compatibility + is_jinja_available, # noqa: F401 # for backward compatibility + is_pydot_available, # noqa: F401 # for backward compatibility + is_tf_available, # noqa: F401 # for backward compatibility + is_torch_available, # noqa: F401 # for backward compatibility + logging, + reset_sessions, + tqdm, + validate_hf_hub_args, +) +from .utils._runtime import _PY_VERSION # noqa: F401 # for backward compatibility +from .utils._typing import HTTP_METHOD_T +from .utils.sha import sha_fileobj +from .utils.tqdm import is_tqdm_disabled + + +logger = logging.get_logger(__name__) + +# Return value when trying to load a file from cache but the file does not exist in the distant repo. +_CACHED_NO_EXIST = object() +_CACHED_NO_EXIST_T = Any + +# Regex to get filename from a "Content-Disposition" header for CDN-served files +HEADER_FILENAME_PATTERN = re.compile(r'filename="(?P.*?)";') + +# Regex to check if the revision IS directly a commit_hash +REGEX_COMMIT_HASH = re.compile(r"^[0-9a-f]{40}$") + +# Regex to check if the file etag IS a valid sha256 +REGEX_SHA256 = re.compile(r"^[0-9a-f]{64}$") + +_are_symlinks_supported_in_dir: Dict[str, bool] = {} + + +def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool: + """Return whether the symlinks are supported on the machine. + + Since symlinks support can change depending on the mounted disk, we need to check + on the precise cache folder. By default, the default HF cache directory is checked. + + Args: + cache_dir (`str`, `Path`, *optional*): + Path to the folder where cached files are stored. + + Returns: [bool] Whether symlinks are supported in the directory. + """ + # Defaults to HF cache + if cache_dir is None: + cache_dir = constants.HF_HUB_CACHE + cache_dir = str(Path(cache_dir).expanduser().resolve()) # make it unique + + # Check symlink compatibility only once (per cache directory) at first time use + if cache_dir not in _are_symlinks_supported_in_dir: + _are_symlinks_supported_in_dir[cache_dir] = True + + os.makedirs(cache_dir, exist_ok=True) + with SoftTemporaryDirectory(dir=cache_dir) as tmpdir: + src_path = Path(tmpdir) / "dummy_file_src" + src_path.touch() + dst_path = Path(tmpdir) / "dummy_file_dst" + + # Relative source path as in `_create_symlink`` + relative_src = os.path.relpath(src_path, start=os.path.dirname(dst_path)) + try: + os.symlink(relative_src, dst_path) + except OSError: + # Likely running on Windows + _are_symlinks_supported_in_dir[cache_dir] = False + + if not constants.HF_HUB_DISABLE_SYMLINKS_WARNING: + message = ( + "`huggingface_hub` cache-system uses symlinks by default to" + " efficiently store duplicated files but your machine does not" + f" support them in {cache_dir}. Caching files will still work" + " but in a degraded version that might require more space on" + " your disk. This warning can be disabled by setting the" + " `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For" + " more details, see" + " https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations." + ) + if os.name == "nt": + message += ( + "\nTo support symlinks on Windows, you either need to" + " activate Developer Mode or to run Python as an" + " administrator. In order to activate developer mode," + " see this article:" + " https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development" + ) + warnings.warn(message) + + return _are_symlinks_supported_in_dir[cache_dir] + + +@dataclass(frozen=True) +class HfFileMetadata: + """Data structure containing information about a file versioned on the Hub. + + Returned by [`get_hf_file_metadata`] based on a URL. + + Args: + commit_hash (`str`, *optional*): + The commit_hash related to the file. + etag (`str`, *optional*): + Etag of the file on the server. + location (`str`): + Location where to download the file. Can be a Hub url or not (CDN). + size (`size`): + Size of the file. In case of an LFS file, contains the size of the actual + LFS file, not the pointer. + """ + + commit_hash: Optional[str] + etag: Optional[str] + location: str + size: Optional[int] + + +@validate_hf_hub_args +def hf_hub_url( + repo_id: str, + filename: str, + *, + subfolder: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + endpoint: Optional[str] = None, +) -> str: + """Construct the URL of a file from the given information. + + The resolved address can either be a huggingface.co-hosted url, or a link to + Cloudfront (a Content Delivery Network, or CDN) for large files which are + more than a few MBs. + + Args: + repo_id (`str`): + A namespace (user or an organization) name and a repo name separated + by a `/`. + filename (`str`): + The name of the file in the repo. + subfolder (`str`, *optional*): + An optional value corresponding to a folder inside the repo. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if downloading from a dataset or space, + `None` or `"model"` if downloading from a model. Default is `None`. + revision (`str`, *optional*): + An optional Git revision id which can be a branch name, a tag, or a + commit hash. + + Example: + + ```python + >>> from huggingface_hub import hf_hub_url + + >>> hf_hub_url( + ... repo_id="julien-c/EsperBERTo-small", filename="pytorch_model.bin" + ... ) + 'https://huggingface.co/julien-c/EsperBERTo-small/resolve/main/pytorch_model.bin' + ``` + + + + Notes: + + Cloudfront is replicated over the globe so downloads are way faster for + the end user (and it also lowers our bandwidth costs). + + Cloudfront aggressively caches files by default (default TTL is 24 + hours), however this is not an issue here because we implement a + git-based versioning system on huggingface.co, which means that we store + the files on S3/Cloudfront in a content-addressable way (i.e., the file + name is its hash). Using content-addressable filenames means cache can't + ever be stale. + + In terms of client-side caching from this library, we base our caching + on the objects' entity tag (`ETag`), which is an identifier of a + specific version of a resource [1]_. An object's ETag is: its git-sha1 + if stored in git, or its sha256 if stored in git-lfs. + + + + References: + + - [1] https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag + """ + if subfolder == "": + subfolder = None + if subfolder is not None: + filename = f"{subfolder}/{filename}" + + if repo_type not in constants.REPO_TYPES: + raise ValueError("Invalid repo type") + + if repo_type in constants.REPO_TYPES_URL_PREFIXES: + repo_id = constants.REPO_TYPES_URL_PREFIXES[repo_type] + repo_id + + if revision is None: + revision = constants.DEFAULT_REVISION + url = HUGGINGFACE_CO_URL_TEMPLATE.format( + repo_id=repo_id, revision=quote(revision, safe=""), filename=quote(filename) + ) + # Update endpoint if provided + if endpoint is not None and url.startswith(constants.ENDPOINT): + url = endpoint + url[len(constants.ENDPOINT) :] + return url + + +def _request_wrapper( + method: HTTP_METHOD_T, url: str, *, follow_relative_redirects: bool = False, **params +) -> requests.Response: + """Wrapper around requests methods to follow relative redirects if `follow_relative_redirects=True` even when + `allow_redirection=False`. + + Args: + method (`str`): + HTTP method, such as 'GET' or 'HEAD'. + url (`str`): + The URL of the resource to fetch. + follow_relative_redirects (`bool`, *optional*, defaults to `False`) + If True, relative redirection (redirection to the same site) will be resolved even when `allow_redirection` + kwarg is set to False. Useful when we want to follow a redirection to a renamed repository without + following redirection to a CDN. + **params (`dict`, *optional*): + Params to pass to `requests.request`. + """ + # Recursively follow relative redirects + if follow_relative_redirects: + response = _request_wrapper( + method=method, + url=url, + follow_relative_redirects=False, + **params, + ) + + # If redirection, we redirect only relative paths. + # This is useful in case of a renamed repository. + if 300 <= response.status_code <= 399: + parsed_target = urlparse(response.headers["Location"]) + if parsed_target.netloc == "": + # This means it is a relative 'location' headers, as allowed by RFC 7231. + # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') + # We want to follow this relative redirect ! + # + # Highly inspired by `resolve_redirects` from requests library. + # See https://github.com/psf/requests/blob/main/requests/sessions.py#L159 + next_url = urlparse(url)._replace(path=parsed_target.path).geturl() + return _request_wrapper(method=method, url=next_url, follow_relative_redirects=True, **params) + return response + + # Perform request and return if status_code is not in the retry list. + response = get_session().request(method=method, url=url, **params) + hf_raise_for_status(response) + return response + + +def http_get( + url: str, + temp_file: BinaryIO, + *, + proxies: Optional[Dict] = None, + resume_size: float = 0, + headers: Optional[Dict[str, str]] = None, + expected_size: Optional[int] = None, + displayed_filename: Optional[str] = None, + _nb_retries: int = 5, + _tqdm_bar: Optional[tqdm] = None, +) -> None: + """ + Download a remote file. Do not gobble up errors, and will return errors tailored to the Hugging Face Hub. + + If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely a + transient error (network outage?). We log a warning message and try to resume the download a few times before + giving up. The method gives up after 5 attempts if no new data has being received from the server. + + Args: + url (`str`): + The URL of the file to download. + temp_file (`BinaryIO`): + The file-like object where to save the file. + proxies (`dict`, *optional*): + Dictionary mapping protocol to the URL of the proxy passed to `requests.request`. + resume_size (`float`, *optional*): + The number of bytes already downloaded. If set to 0 (default), the whole file is download. If set to a + positive number, the download will resume at the given position. + headers (`dict`, *optional*): + Dictionary of HTTP Headers to send with the request. + expected_size (`int`, *optional*): + The expected size of the file to download. If set, the download will raise an error if the size of the + received content is different from the expected one. + displayed_filename (`str`, *optional*): + The filename of the file that is being downloaded. Value is used only to display a nice progress bar. If + not set, the filename is guessed from the URL or the `Content-Disposition` header. + """ + if expected_size is not None and resume_size == expected_size: + # If the file is already fully downloaded, we don't need to download it again. + return + + hf_transfer = None + if constants.HF_HUB_ENABLE_HF_TRANSFER: + if resume_size != 0: + warnings.warn("'hf_transfer' does not support `resume_size`: falling back to regular download method") + elif proxies is not None: + warnings.warn("'hf_transfer' does not support `proxies`: falling back to regular download method") + else: + try: + import hf_transfer # type: ignore[no-redef] + except ImportError: + raise ValueError( + "Fast download using 'hf_transfer' is enabled" + " (HF_HUB_ENABLE_HF_TRANSFER=1) but 'hf_transfer' package is not" + " available in your environment. Try `pip install hf_transfer`." + ) + + initial_headers = headers + headers = copy.deepcopy(headers) or {} + if resume_size > 0: + headers["Range"] = "bytes=%d-" % (resume_size,) + + r = _request_wrapper( + method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT + ) + hf_raise_for_status(r) + content_length = r.headers.get("Content-Length") + + # NOTE: 'total' is the total number of bytes to download, not the number of bytes in the file. + # If the file is compressed, the number of bytes in the saved file will be higher than 'total'. + total = resume_size + int(content_length) if content_length is not None else None + + if displayed_filename is None: + displayed_filename = url + content_disposition = r.headers.get("Content-Disposition") + if content_disposition is not None: + match = HEADER_FILENAME_PATTERN.search(content_disposition) + if match is not None: + # Means file is on CDN + displayed_filename = match.groupdict()["filename"] + + # Truncate filename if too long to display + if len(displayed_filename) > 40: + displayed_filename = f"(…){displayed_filename[-40:]}" + + consistency_error_message = ( + f"Consistency check failed: file should be of size {expected_size} but has size" + f" {{actual_size}} ({displayed_filename}).\nThis is usually due to network issues while downloading the file." + " Please retry with `force_download=True`." + ) + + # Stream file to buffer + progress_cm: tqdm = ( + tqdm( # type: ignore[assignment] + unit="B", + unit_scale=True, + total=total, + initial=resume_size, + desc=displayed_filename, + disable=is_tqdm_disabled(logger.getEffectiveLevel()), + name="huggingface_hub.http_get", + ) + if _tqdm_bar is None + else contextlib.nullcontext(_tqdm_bar) + # ^ `contextlib.nullcontext` mimics a context manager that does nothing + # Makes it easier to use the same code path for both cases but in the later + # case, the progress bar is not closed when exiting the context manager. + ) + + with progress_cm as progress: + if hf_transfer and total is not None and total > 5 * constants.DOWNLOAD_CHUNK_SIZE: + supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters + if not supports_callback: + warnings.warn( + "You are using an outdated version of `hf_transfer`. " + "Consider upgrading to latest version to enable progress bars " + "using `pip install -U hf_transfer`." + ) + try: + hf_transfer.download( + url=url, + filename=temp_file.name, + max_files=constants.HF_TRANSFER_CONCURRENCY, + chunk_size=constants.DOWNLOAD_CHUNK_SIZE, + headers=headers, + parallel_failures=3, + max_retries=5, + **({"callback": progress.update} if supports_callback else {}), + ) + except Exception as e: + raise RuntimeError( + "An error occurred while downloading using `hf_transfer`. Consider" + " disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling." + ) from e + if not supports_callback: + progress.update(total) + if expected_size is not None and expected_size != os.path.getsize(temp_file.name): + raise EnvironmentError( + consistency_error_message.format( + actual_size=os.path.getsize(temp_file.name), + ) + ) + return + new_resume_size = resume_size + try: + for chunk in r.iter_content(chunk_size=constants.DOWNLOAD_CHUNK_SIZE): + if chunk: # filter out keep-alive new chunks + progress.update(len(chunk)) + temp_file.write(chunk) + new_resume_size += len(chunk) + # Some data has been downloaded from the server so we reset the number of retries. + _nb_retries = 5 + except (requests.ConnectionError, requests.ReadTimeout) as e: + # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely + # a transient error (network outage?). We log a warning message and try to resume the download a few times + # before giving up. Tre retry mechanism is basic but should be enough in most cases. + if _nb_retries <= 0: + logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e)) + raise + logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e)) + time.sleep(1) + reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects + return http_get( + url=url, + temp_file=temp_file, + proxies=proxies, + resume_size=new_resume_size, + headers=initial_headers, + expected_size=expected_size, + _nb_retries=_nb_retries - 1, + _tqdm_bar=_tqdm_bar, + ) + + if expected_size is not None and expected_size != temp_file.tell(): + raise EnvironmentError( + consistency_error_message.format( + actual_size=temp_file.tell(), + ) + ) + + +def _normalize_etag(etag: Optional[str]) -> Optional[str]: + """Normalize ETag HTTP header, so it can be used to create nice filepaths. + + The HTTP spec allows two forms of ETag: + ETag: W/"" + ETag: "" + + For now, we only expect the second form from the server, but we want to be future-proof so we support both. For + more context, see `TestNormalizeEtag` tests and https://github.com/huggingface/huggingface_hub/pull/1428. + + Args: + etag (`str`, *optional*): HTTP header + + Returns: + `str` or `None`: string that can be used as a nice directory name. + Returns `None` if input is None. + """ + if etag is None: + return None + return etag.lstrip("W/").strip('"') + + +def _create_relative_symlink(src: str, dst: str, new_blob: bool = False) -> None: + """Alias method used in `transformers` conversion script.""" + return _create_symlink(src=src, dst=dst, new_blob=new_blob) + + +def _create_symlink(src: str, dst: str, new_blob: bool = False) -> None: + """Create a symbolic link named dst pointing to src. + + By default, it will try to create a symlink using a relative path. Relative paths have 2 advantages: + - If the cache_folder is moved (example: back-up on a shared drive), relative paths within the cache folder will + not break. + - Relative paths seems to be better handled on Windows. Issue was reported 3 times in less than a week when + changing from relative to absolute paths. See https://github.com/huggingface/huggingface_hub/issues/1398, + https://github.com/huggingface/diffusers/issues/2729 and https://github.com/huggingface/transformers/pull/22228. + NOTE: The issue with absolute paths doesn't happen on admin mode. + When creating a symlink from the cache to a local folder, it is possible that a relative path cannot be created. + This happens when paths are not on the same volume. In that case, we use absolute paths. + + + The result layout looks something like + └── [ 128] snapshots + ├── [ 128] 2439f60ef33a0d46d85da5001d52aeda5b00ce9f + │ ├── [ 52] README.md -> ../../../blobs/d7edf6bd2a681fb0175f7735299831ee1b22b812 + │ └── [ 76] pytorch_model.bin -> ../../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd + + If symlinks cannot be created on this platform (most likely to be Windows), the workaround is to avoid symlinks by + having the actual file in `dst`. If it is a new file (`new_blob=True`), we move it to `dst`. If it is not a new file + (`new_blob=False`), we don't know if the blob file is already referenced elsewhere. To avoid breaking existing + cache, the file is duplicated on the disk. + + In case symlinks are not supported, a warning message is displayed to the user once when loading `huggingface_hub`. + The warning message can be disabled with the `DISABLE_SYMLINKS_WARNING` environment variable. + """ + try: + os.remove(dst) + except OSError: + pass + + abs_src = os.path.abspath(os.path.expanduser(src)) + abs_dst = os.path.abspath(os.path.expanduser(dst)) + abs_dst_folder = os.path.dirname(abs_dst) + + # Use relative_dst in priority + try: + relative_src = os.path.relpath(abs_src, abs_dst_folder) + except ValueError: + # Raised on Windows if src and dst are not on the same volume. This is the case when creating a symlink to a + # local_dir instead of within the cache directory. + # See https://docs.python.org/3/library/os.path.html#os.path.relpath + relative_src = None + + try: + commonpath = os.path.commonpath([abs_src, abs_dst]) + _support_symlinks = are_symlinks_supported(commonpath) + except ValueError: + # Raised if src and dst are not on the same volume. Symlinks will still work on Linux/Macos. + # See https://docs.python.org/3/library/os.path.html#os.path.commonpath + _support_symlinks = os.name != "nt" + except PermissionError: + # Permission error means src and dst are not in the same volume (e.g. destination path has been provided + # by the user via `local_dir`. Let's test symlink support there) + _support_symlinks = are_symlinks_supported(abs_dst_folder) + except OSError as e: + # OS error (errno=30) means that the commonpath is readonly on Linux/MacOS. + if e.errno == errno.EROFS: + _support_symlinks = are_symlinks_supported(abs_dst_folder) + else: + raise + + # Symlinks are supported => let's create a symlink. + if _support_symlinks: + src_rel_or_abs = relative_src or abs_src + logger.debug(f"Creating pointer from {src_rel_or_abs} to {abs_dst}") + try: + os.symlink(src_rel_or_abs, abs_dst) + return + except FileExistsError: + if os.path.islink(abs_dst) and os.path.realpath(abs_dst) == os.path.realpath(abs_src): + # `abs_dst` already exists and is a symlink to the `abs_src` blob. It is most likely that the file has + # been cached twice concurrently (exactly between `os.remove` and `os.symlink`). Do nothing. + return + else: + # Very unlikely to happen. Means a file `dst` has been created exactly between `os.remove` and + # `os.symlink` and is not a symlink to the `abs_src` blob file. Raise exception. + raise + except PermissionError: + # Permission error means src and dst are not in the same volume (e.g. download to local dir) and symlink + # is supported on both volumes but not between them. Let's just make a hard copy in that case. + pass + + # Symlinks are not supported => let's move or copy the file. + if new_blob: + logger.info(f"Symlink not supported. Moving file from {abs_src} to {abs_dst}") + shutil.move(abs_src, abs_dst, copy_function=_copy_no_matter_what) + else: + logger.info(f"Symlink not supported. Copying file from {abs_src} to {abs_dst}") + shutil.copyfile(abs_src, abs_dst) + + +def _cache_commit_hash_for_specific_revision(storage_folder: str, revision: str, commit_hash: str) -> None: + """Cache reference between a revision (tag, branch or truncated commit hash) and the corresponding commit hash. + + Does nothing if `revision` is already a proper `commit_hash` or reference is already cached. + """ + if revision != commit_hash: + ref_path = Path(storage_folder) / "refs" / revision + ref_path.parent.mkdir(parents=True, exist_ok=True) + if not ref_path.exists() or commit_hash != ref_path.read_text(): + # Update ref only if has been updated. Could cause useless error in case + # repo is already cached and user doesn't have write access to cache folder. + # See https://github.com/huggingface/huggingface_hub/issues/1216. + ref_path.write_text(commit_hash) + + +@validate_hf_hub_args +def repo_folder_name(*, repo_id: str, repo_type: str) -> str: + """Return a serialized version of a hf.co repo name and type, safe for disk storage + as a single non-nested folder. + + Example: models--julien-c--EsperBERTo-small + """ + # remove all `/` occurrences to correctly convert repo to directory name + parts = [f"{repo_type}s", *repo_id.split("/")] + return constants.REPO_ID_SEPARATOR.join(parts) + + +def _check_disk_space(expected_size: int, target_dir: Union[str, Path]) -> None: + """Check disk usage and log a warning if there is not enough disk space to download the file. + + Args: + expected_size (`int`): + The expected size of the file in bytes. + target_dir (`str`): + The directory where the file will be stored after downloading. + """ + + target_dir = Path(target_dir) # format as `Path` + for path in [target_dir] + list(target_dir.parents): # first check target_dir, then each parents one by one + try: + target_dir_free = shutil.disk_usage(path).free + if target_dir_free < expected_size: + warnings.warn( + "Not enough free disk space to download the file. " + f"The expected file size is: {expected_size / 1e6:.2f} MB. " + f"The target location {target_dir} only has {target_dir_free / 1e6:.2f} MB free disk space." + ) + return + except OSError: # raise on anything: file does not exist or space disk cannot be checked + pass + + +@validate_hf_hub_args +def hf_hub_download( + repo_id: str, + filename: str, + *, + subfolder: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + library_name: Optional[str] = None, + library_version: Optional[str] = None, + cache_dir: Union[str, Path, None] = None, + local_dir: Union[str, Path, None] = None, + user_agent: Union[Dict, str, None] = None, + force_download: bool = False, + proxies: Optional[Dict] = None, + etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT, + token: Union[bool, str, None] = None, + local_files_only: bool = False, + headers: Optional[Dict[str, str]] = None, + endpoint: Optional[str] = None, + resume_download: Optional[bool] = None, + force_filename: Optional[str] = None, + local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto", +) -> str: + """Download a given file if it's not already present in the local cache. + + The new cache file layout looks like this: + - The cache directory contains one subfolder per repo_id (namespaced by repo type) + - inside each repo folder: + - refs is a list of the latest known revision => commit_hash pairs + - blobs contains the actual file blobs (identified by their git-sha or sha256, depending on + whether they're LFS files or not) + - snapshots contains one subfolder per commit, each "commit" contains the subset of the files + that have been resolved at that particular commit. Each filename is a symlink to the blob + at that particular commit. + + ``` + [ 96] . + └── [ 160] models--julien-c--EsperBERTo-small + ├── [ 160] blobs + │ ├── [321M] 403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd + │ ├── [ 398] 7cb18dc9bafbfcf74629a4b760af1b160957a83e + │ └── [1.4K] d7edf6bd2a681fb0175f7735299831ee1b22b812 + ├── [ 96] refs + │ └── [ 40] main + └── [ 128] snapshots + ├── [ 128] 2439f60ef33a0d46d85da5001d52aeda5b00ce9f + │ ├── [ 52] README.md -> ../../blobs/d7edf6bd2a681fb0175f7735299831ee1b22b812 + │ └── [ 76] pytorch_model.bin -> ../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd + └── [ 128] bbc77c8132af1cc5cf678da3f1ddf2de43606d48 + ├── [ 52] README.md -> ../../blobs/7cb18dc9bafbfcf74629a4b760af1b160957a83e + └── [ 76] pytorch_model.bin -> ../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd + ``` + + If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this + option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir` + to store some metadata related to the downloaded files. While this mechanism is not as robust as the main + cache-system, it's optimized for regularly pulling the latest version of a repository. + + Args: + repo_id (`str`): + A user or an organization name and a repo name separated by a `/`. + filename (`str`): + The name of the file in the repo. + subfolder (`str`, *optional*): + An optional value corresponding to a folder inside the model repo. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if downloading from a dataset or space, + `None` or `"model"` if downloading from a model. Default is `None`. + revision (`str`, *optional*): + An optional Git revision id which can be a branch name, a tag, or a + commit hash. + library_name (`str`, *optional*): + The name of the library to which the object corresponds. + library_version (`str`, *optional*): + The version of the library. + cache_dir (`str`, `Path`, *optional*): + Path to the folder where cached files are stored. + local_dir (`str` or `Path`, *optional*): + If provided, the downloaded file will be placed under this directory. + user_agent (`dict`, `str`, *optional*): + The user-agent info in the form of a dictionary or a string. + force_download (`bool`, *optional*, defaults to `False`): + Whether the file should be downloaded even if it already exists in + the local cache. + proxies (`dict`, *optional*): + Dictionary mapping protocol to the URL of the proxy passed to + `requests.request`. + etag_timeout (`float`, *optional*, defaults to `10`): + When fetching ETag, how many seconds to wait for the server to send + data before giving up which is passed to `requests.request`. + token (`str`, `bool`, *optional*): + A token to be used for the download. + - If `True`, the token is read from the HuggingFace config + folder. + - If a string, it's used as the authentication token. + local_files_only (`bool`, *optional*, defaults to `False`): + If `True`, avoid downloading the file and return the path to the + local cached file if it exists. + headers (`dict`, *optional*): + Additional headers to be sent with the request. + + Returns: + `str`: Local path of file or if networking is off, last version of file cached on disk. + + Raises: + [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + [`~utils.RevisionNotFoundError`] + If the revision to download from cannot be found. + [`~utils.EntryNotFoundError`] + If the file to download cannot be found. + [`~utils.LocalEntryNotFoundError`] + If network is disabled or unavailable and file is not found in cache. + [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) + If `token=True` but the token cannot be found. + [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) + If ETag cannot be determined. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If some parameter value is invalid. + + """ + if constants.HF_HUB_ETAG_TIMEOUT != constants.DEFAULT_ETAG_TIMEOUT: + # Respect environment variable above user value + etag_timeout = constants.HF_HUB_ETAG_TIMEOUT + + if force_filename is not None: + warnings.warn( + "The `force_filename` parameter is deprecated as a new caching system, " + "which keeps the filenames as they are on the Hub, is now in place.", + FutureWarning, + ) + if resume_download is not None: + warnings.warn( + "`resume_download` is deprecated and will be removed in version 1.0.0. " + "Downloads always resume when possible. " + "If you want to force a new download, use `force_download=True`.", + FutureWarning, + ) + + if cache_dir is None: + cache_dir = constants.HF_HUB_CACHE + if revision is None: + revision = constants.DEFAULT_REVISION + if isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + if isinstance(local_dir, Path): + local_dir = str(local_dir) + + if subfolder == "": + subfolder = None + if subfolder is not None: + # This is used to create a URL, and not a local path, hence the forward slash. + filename = f"{subfolder}/{filename}" + + if repo_type is None: + repo_type = "model" + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}") + + hf_headers = build_hf_headers( + token=token, + library_name=library_name, + library_version=library_version, + user_agent=user_agent, + headers=headers, + ) + + if local_dir is not None: + if local_dir_use_symlinks != "auto": + warnings.warn( + "`local_dir_use_symlinks` parameter is deprecated and will be ignored. " + "The process to download files to a local folder has been updated and do " + "not rely on symlinks anymore. You only need to pass a destination folder " + "as`local_dir`.\n" + "For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder." + ) + + return _hf_hub_download_to_local_dir( + # Destination + local_dir=local_dir, + # File info + repo_id=repo_id, + repo_type=repo_type, + filename=filename, + revision=revision, + # HTTP info + endpoint=endpoint, + etag_timeout=etag_timeout, + headers=hf_headers, + proxies=proxies, + token=token, + # Additional options + cache_dir=cache_dir, + force_download=force_download, + local_files_only=local_files_only, + ) + else: + return _hf_hub_download_to_cache_dir( + # Destination + cache_dir=cache_dir, + # File info + repo_id=repo_id, + filename=filename, + repo_type=repo_type, + revision=revision, + # HTTP info + endpoint=endpoint, + etag_timeout=etag_timeout, + headers=hf_headers, + proxies=proxies, + token=token, + # Additional options + local_files_only=local_files_only, + force_download=force_download, + ) + + +def _hf_hub_download_to_cache_dir( + *, + # Destination + cache_dir: str, + # File info + repo_id: str, + filename: str, + repo_type: str, + revision: str, + # HTTP info + endpoint: Optional[str], + etag_timeout: float, + headers: Dict[str, str], + proxies: Optional[Dict], + token: Optional[Union[bool, str]], + # Additional options + local_files_only: bool, + force_download: bool, +) -> str: + """Download a given file to a cache folder, if not already present. + + Method should not be called directly. Please use `hf_hub_download` instead. + """ + locks_dir = os.path.join(cache_dir, ".locks") + storage_folder = os.path.join(cache_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type)) + + # cross platform transcription of filename, to be used as a local file path. + relative_filename = os.path.join(*filename.split("/")) + if os.name == "nt": + if relative_filename.startswith("..\\") or "\\..\\" in relative_filename: + raise ValueError( + f"Invalid filename: cannot handle filename '{relative_filename}' on Windows. Please ask the repository" + " owner to rename this file." + ) + + # if user provides a commit_hash and they already have the file on disk, shortcut everything. + if REGEX_COMMIT_HASH.match(revision): + pointer_path = _get_pointer_path(storage_folder, revision, relative_filename) + if os.path.exists(pointer_path) and not force_download: + return pointer_path + + # Try to get metadata (etag, commit_hash, url, size) from the server. + # If we can't, a HEAD request error is returned. + (url_to_download, etag, commit_hash, expected_size, head_call_error) = _get_metadata_or_catch_error( + repo_id=repo_id, + filename=filename, + repo_type=repo_type, + revision=revision, + endpoint=endpoint, + proxies=proxies, + etag_timeout=etag_timeout, + headers=headers, + token=token, + local_files_only=local_files_only, + storage_folder=storage_folder, + relative_filename=relative_filename, + ) + + # etag can be None for several reasons: + # 1. we passed local_files_only. + # 2. we don't have a connection + # 3. Hub is down (HTTP 500, 503, 504) + # 4. repo is not found -for example private or gated- and invalid/missing token sent + # 5. Hub is blocked by a firewall or proxy is not set correctly. + # => Try to get the last downloaded one from the specified revision. + # + # If the specified revision is a commit hash, look inside "snapshots". + # If the specified revision is a branch or tag, look inside "refs". + if head_call_error is not None: + # Couldn't make a HEAD call => let's try to find a local file + if not force_download: + commit_hash = None + if REGEX_COMMIT_HASH.match(revision): + commit_hash = revision + else: + ref_path = os.path.join(storage_folder, "refs", revision) + if os.path.isfile(ref_path): + with open(ref_path) as f: + commit_hash = f.read() + + # Return pointer file if exists + if commit_hash is not None: + pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename) + if os.path.exists(pointer_path) and not force_download: + return pointer_path + + # Otherwise, raise appropriate error + _raise_on_head_call_error(head_call_error, force_download, local_files_only) + + # From now on, etag, commit_hash, url and size are not None. + assert etag is not None, "etag must have been retrieved from server" + assert commit_hash is not None, "commit_hash must have been retrieved from server" + assert url_to_download is not None, "file location must have been retrieved from server" + assert expected_size is not None, "expected_size must have been retrieved from server" + blob_path = os.path.join(storage_folder, "blobs", etag) + pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename) + + os.makedirs(os.path.dirname(blob_path), exist_ok=True) + os.makedirs(os.path.dirname(pointer_path), exist_ok=True) + + # if passed revision is not identical to commit_hash + # then revision has to be a branch name or tag name. + # In that case store a ref. + _cache_commit_hash_for_specific_revision(storage_folder, revision, commit_hash) + + # If file already exists, return it (except if force_download=True) + if not force_download: + if os.path.exists(pointer_path): + return pointer_path + + if os.path.exists(blob_path): + # we have the blob already, but not the pointer + _create_symlink(blob_path, pointer_path, new_blob=False) + return pointer_path + + # Prevent parallel downloads of the same file with a lock. + # etag could be duplicated across repos, + lock_path = os.path.join(locks_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type), f"{etag}.lock") + + # Some Windows versions do not allow for paths longer than 255 characters. + # In this case, we must specify it as an extended path by using the "\\?\" prefix. + if os.name == "nt" and len(os.path.abspath(lock_path)) > 255: + lock_path = "\\\\?\\" + os.path.abspath(lock_path) + + if os.name == "nt" and len(os.path.abspath(blob_path)) > 255: + blob_path = "\\\\?\\" + os.path.abspath(blob_path) + + Path(lock_path).parent.mkdir(parents=True, exist_ok=True) + with WeakFileLock(lock_path): + _download_to_tmp_and_move( + incomplete_path=Path(blob_path + ".incomplete"), + destination_path=Path(blob_path), + url_to_download=url_to_download, + proxies=proxies, + headers=headers, + expected_size=expected_size, + filename=filename, + force_download=force_download, + ) + if not os.path.exists(pointer_path): + _create_symlink(blob_path, pointer_path, new_blob=True) + + return pointer_path + + +def _hf_hub_download_to_local_dir( + *, + # Destination + local_dir: Union[str, Path], + # File info + repo_id: str, + repo_type: str, + filename: str, + revision: str, + # HTTP info + endpoint: Optional[str], + etag_timeout: float, + headers: Dict[str, str], + proxies: Optional[Dict], + token: Union[bool, str, None], + # Additional options + cache_dir: str, + force_download: bool, + local_files_only: bool, +) -> str: + """Download a given file to a local folder, if not already present. + + Method should not be called directly. Please use `hf_hub_download` instead. + """ + # Some Windows versions do not allow for paths longer than 255 characters. + # In this case, we must specify it as an extended path by using the "\\?\" prefix. + if os.name == "nt" and len(os.path.abspath(local_dir)) > 255: + local_dir = "\\\\?\\" + os.path.abspath(local_dir) + local_dir = Path(local_dir) + paths = get_local_download_paths(local_dir=local_dir, filename=filename) + local_metadata = read_download_metadata(local_dir=local_dir, filename=filename) + + # Local file exists + metadata exists + commit_hash matches => return file + if ( + not force_download + and REGEX_COMMIT_HASH.match(revision) + and paths.file_path.is_file() + and local_metadata is not None + and local_metadata.commit_hash == revision + ): + return str(paths.file_path) + + # Local file doesn't exist or commit_hash doesn't match => we need the etag + (url_to_download, etag, commit_hash, expected_size, head_call_error) = _get_metadata_or_catch_error( + repo_id=repo_id, + filename=filename, + repo_type=repo_type, + revision=revision, + endpoint=endpoint, + proxies=proxies, + etag_timeout=etag_timeout, + headers=headers, + token=token, + local_files_only=local_files_only, + ) + + if head_call_error is not None: + # No HEAD call but local file exists => default to local file + if not force_download and paths.file_path.is_file(): + logger.warning( + f"Couldn't access the Hub to check for update but local file already exists. Defaulting to existing file. (error: {head_call_error})" + ) + return str(paths.file_path) + # Otherwise => raise + _raise_on_head_call_error(head_call_error, force_download, local_files_only) + + # From now on, etag, commit_hash, url and size are not None. + assert etag is not None, "etag must have been retrieved from server" + assert commit_hash is not None, "commit_hash must have been retrieved from server" + assert url_to_download is not None, "file location must have been retrieved from server" + assert expected_size is not None, "expected_size must have been retrieved from server" + + # Local file exists => check if it's up-to-date + if not force_download and paths.file_path.is_file(): + # etag matches => update metadata and return file + if local_metadata is not None and local_metadata.etag == etag: + write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag) + return str(paths.file_path) + + # metadata is outdated + etag is a sha256 + # => means it's an LFS file (large) + # => let's compute local hash and compare + # => if match, update metadata and return file + if local_metadata is None and REGEX_SHA256.match(etag) is not None: + with open(paths.file_path, "rb") as f: + file_hash = sha_fileobj(f).hex() + if file_hash == etag: + write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag) + return str(paths.file_path) + + # Local file doesn't exist or etag isn't a match => retrieve file from remote (or cache) + + # If we are lucky enough, the file is already in the cache => copy it + if not force_download: + cached_path = try_to_load_from_cache( + repo_id=repo_id, + filename=filename, + cache_dir=cache_dir, + revision=commit_hash, + repo_type=repo_type, + ) + if isinstance(cached_path, str): + with WeakFileLock(paths.lock_path): + paths.file_path.parent.mkdir(parents=True, exist_ok=True) + shutil.copyfile(cached_path, paths.file_path) + write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag) + return str(paths.file_path) + + # Otherwise, let's download the file! + with WeakFileLock(paths.lock_path): + paths.file_path.unlink(missing_ok=True) # delete outdated file first + _download_to_tmp_and_move( + incomplete_path=paths.incomplete_path(etag), + destination_path=paths.file_path, + url_to_download=url_to_download, + proxies=proxies, + headers=headers, + expected_size=expected_size, + filename=filename, + force_download=force_download, + ) + + write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag) + return str(paths.file_path) + + +@validate_hf_hub_args +def try_to_load_from_cache( + repo_id: str, + filename: str, + cache_dir: Union[str, Path, None] = None, + revision: Optional[str] = None, + repo_type: Optional[str] = None, +) -> Union[str, _CACHED_NO_EXIST_T, None]: + """ + Explores the cache to return the latest cached file for a given revision if found. + + This function will not raise any exception if the file in not cached. + + Args: + cache_dir (`str` or `os.PathLike`): + The folder where the cached files lie. + repo_id (`str`): + The ID of the repo on huggingface.co. + filename (`str`): + The filename to look for inside `repo_id`. + revision (`str`, *optional*): + The specific model version to use. Will default to `"main"` if it's not provided and no `commit_hash` is + provided either. + repo_type (`str`, *optional*): + The type of the repository. Will default to `"model"`. + + Returns: + `Optional[str]` or `_CACHED_NO_EXIST`: + Will return `None` if the file was not cached. Otherwise: + - The exact path to the cached file if it's found in the cache + - A special value `_CACHED_NO_EXIST` if the file does not exist at the given commit hash and this fact was + cached. + + Example: + + ```python + from huggingface_hub import try_to_load_from_cache, _CACHED_NO_EXIST + + filepath = try_to_load_from_cache() + if isinstance(filepath, str): + # file exists and is cached + ... + elif filepath is _CACHED_NO_EXIST: + # non-existence of file is cached + ... + else: + # file is not cached + ... + ``` + """ + if revision is None: + revision = "main" + if repo_type is None: + repo_type = "model" + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}") + if cache_dir is None: + cache_dir = constants.HF_HUB_CACHE + + object_id = repo_id.replace("/", "--") + repo_cache = os.path.join(cache_dir, f"{repo_type}s--{object_id}") + if not os.path.isdir(repo_cache): + # No cache for this model + return None + + refs_dir = os.path.join(repo_cache, "refs") + snapshots_dir = os.path.join(repo_cache, "snapshots") + no_exist_dir = os.path.join(repo_cache, ".no_exist") + + # Resolve refs (for instance to convert main to the associated commit sha) + if os.path.isdir(refs_dir): + revision_file = os.path.join(refs_dir, revision) + if os.path.isfile(revision_file): + with open(revision_file) as f: + revision = f.read() + + # Check if file is cached as "no_exist" + if os.path.isfile(os.path.join(no_exist_dir, revision, filename)): + return _CACHED_NO_EXIST + + # Check if revision folder exists + if not os.path.exists(snapshots_dir): + return None + cached_shas = os.listdir(snapshots_dir) + if revision not in cached_shas: + # No cache for this revision and we won't try to return a random revision + return None + + # Check if file exists in cache + cached_file = os.path.join(snapshots_dir, revision, filename) + return cached_file if os.path.isfile(cached_file) else None + + +@validate_hf_hub_args +def get_hf_file_metadata( + url: str, + token: Union[bool, str, None] = None, + proxies: Optional[Dict] = None, + timeout: Optional[float] = constants.DEFAULT_REQUEST_TIMEOUT, + library_name: Optional[str] = None, + library_version: Optional[str] = None, + user_agent: Union[Dict, str, None] = None, + headers: Optional[Dict[str, str]] = None, +) -> HfFileMetadata: + """Fetch metadata of a file versioned on the Hub for a given url. + + Args: + url (`str`): + File url, for example returned by [`hf_hub_url`]. + token (`str` or `bool`, *optional*): + A token to be used for the download. + - If `True`, the token is read from the HuggingFace config + folder. + - If `False` or `None`, no token is provided. + - If a string, it's used as the authentication token. + proxies (`dict`, *optional*): + Dictionary mapping protocol to the URL of the proxy passed to + `requests.request`. + timeout (`float`, *optional*, defaults to 10): + How many seconds to wait for the server to send metadata before giving up. + library_name (`str`, *optional*): + The name of the library to which the object corresponds. + library_version (`str`, *optional*): + The version of the library. + user_agent (`dict`, `str`, *optional*): + The user-agent info in the form of a dictionary or a string. + headers (`dict`, *optional*): + Additional headers to be sent with the request. + + Returns: + A [`HfFileMetadata`] object containing metadata such as location, etag, size and + commit_hash. + """ + hf_headers = build_hf_headers( + token=token, + library_name=library_name, + library_version=library_version, + user_agent=user_agent, + headers=headers, + ) + hf_headers["Accept-Encoding"] = "identity" # prevent any compression => we want to know the real size of the file + + # Retrieve metadata + r = _request_wrapper( + method="HEAD", + url=url, + headers=hf_headers, + allow_redirects=False, + follow_relative_redirects=True, + proxies=proxies, + timeout=timeout, + ) + hf_raise_for_status(r) + + # Return + return HfFileMetadata( + commit_hash=r.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT), + # We favor a custom header indicating the etag of the linked resource, and + # we fallback to the regular etag header. + etag=_normalize_etag(r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")), + # Either from response headers (if redirected) or defaults to request url + # Do not use directly `url`, as `_request_wrapper` might have followed relative + # redirects. + location=r.headers.get("Location") or r.request.url, # type: ignore + size=_int_or_none( + r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_SIZE) or r.headers.get("Content-Length") + ), + ) + + +def _get_metadata_or_catch_error( + *, + repo_id: str, + filename: str, + repo_type: str, + revision: str, + endpoint: Optional[str], + proxies: Optional[Dict], + etag_timeout: Optional[float], + headers: Dict[str, str], # mutated inplace! + token: Union[bool, str, None], + local_files_only: bool, + relative_filename: Optional[str] = None, # only used to store `.no_exists` in cache + storage_folder: Optional[str] = None, # only used to store `.no_exists` in cache +) -> Union[ + # Either an exception is caught and returned + Tuple[None, None, None, None, Exception], + # Or the metadata is returned as + # `(url_to_download, etag, commit_hash, expected_size, None)` + Tuple[str, str, str, int, None], +]: + """Get metadata for a file on the Hub, safely handling network issues. + + Returns either the etag, commit_hash and expected size of the file, or the error + raised while fetching the metadata. + + NOTE: This function mutates `headers` inplace! It removes the `authorization` header + if the file is a LFS blob and the domain of the url is different from the + domain of the location (typically an S3 bucket). + """ + if local_files_only: + return ( + None, + None, + None, + None, + OfflineModeIsEnabled( + f"Cannot access file since 'local_files_only=True' as been set. (repo_id: {repo_id}, repo_type: {repo_type}, revision: {revision}, filename: {filename})" + ), + ) + + url = hf_hub_url(repo_id, filename, repo_type=repo_type, revision=revision, endpoint=endpoint) + url_to_download: str = url + etag: Optional[str] = None + commit_hash: Optional[str] = None + expected_size: Optional[int] = None + head_error_call: Optional[Exception] = None + + # Try to get metadata from the server. + # Do not raise yet if the file is not found or not accessible. + if not local_files_only: + try: + try: + metadata = get_hf_file_metadata( + url=url, proxies=proxies, timeout=etag_timeout, headers=headers, token=token + ) + except EntryNotFoundError as http_error: + if storage_folder is not None and relative_filename is not None: + # Cache the non-existence of the file + commit_hash = http_error.response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT) + if commit_hash is not None: + no_exist_file_path = Path(storage_folder) / ".no_exist" / commit_hash / relative_filename + try: + no_exist_file_path.parent.mkdir(parents=True, exist_ok=True) + no_exist_file_path.touch() + except OSError as e: + logger.error( + f"Could not cache non-existence of file. Will ignore error and continue. Error: {e}" + ) + _cache_commit_hash_for_specific_revision(storage_folder, revision, commit_hash) + raise + + # Commit hash must exist + commit_hash = metadata.commit_hash + if commit_hash is None: + raise FileMetadataError( + "Distant resource does not seem to be on huggingface.co. It is possible that a configuration issue" + " prevents you from downloading resources from https://huggingface.co. Please check your firewall" + " and proxy settings and make sure your SSL certificates are updated." + ) + + # Etag must exist + # If we don't have any of those, raise an error. + etag = metadata.etag + if etag is None: + raise FileMetadataError( + "Distant resource does not have an ETag, we won't be able to reliably ensure reproducibility." + ) + + # Size must exist + expected_size = metadata.size + if expected_size is None: + raise FileMetadataError("Distant resource does not have a Content-Length.") + + # In case of a redirect, save an extra redirect on the request.get call, + # and ensure we download the exact atomic version even if it changed + # between the HEAD and the GET (unlikely, but hey). + # + # If url domain is different => we are downloading from a CDN => url is signed => don't send auth + # If url domain is the same => redirect due to repo rename AND downloading a regular file => keep auth + if url != metadata.location: + url_to_download = metadata.location + if urlparse(url).netloc != urlparse(metadata.location).netloc: + # Remove authorization header when downloading a LFS blob + headers.pop("authorization", None) + except (requests.exceptions.SSLError, requests.exceptions.ProxyError): + # Actually raise for those subclasses of ConnectionError + raise + except ( + requests.exceptions.ConnectionError, + requests.exceptions.Timeout, + OfflineModeIsEnabled, + ) as error: + # Otherwise, our Internet connection is down. + # etag is None + head_error_call = error + except (RevisionNotFoundError, EntryNotFoundError): + # The repo was found but the revision or entry doesn't exist on the Hub (never existed or got deleted) + raise + except requests.HTTPError as error: + # Multiple reasons for an http error: + # - Repository is private and invalid/missing token sent + # - Repository is gated and invalid/missing token sent + # - Hub is down (error 500 or 504) + # => let's switch to 'local_files_only=True' to check if the files are already cached. + # (if it's not the case, the error will be re-raised) + head_error_call = error + except FileMetadataError as error: + # Multiple reasons for a FileMetadataError: + # - Wrong network configuration (proxy, firewall, SSL certificates) + # - Inconsistency on the Hub + # => let's switch to 'local_files_only=True' to check if the files are already cached. + # (if it's not the case, the error will be re-raised) + head_error_call = error + + if not (local_files_only or etag is not None or head_error_call is not None): + raise RuntimeError("etag is empty due to uncovered problems") + + return (url_to_download, etag, commit_hash, expected_size, head_error_call) # type: ignore [return-value] + + +def _raise_on_head_call_error(head_call_error: Exception, force_download: bool, local_files_only: bool) -> NoReturn: + """Raise an appropriate error when the HEAD call failed and we cannot locate a local file.""" + + # No head call => we cannot force download. + if force_download: + if local_files_only: + raise ValueError("Cannot pass 'force_download=True' and 'local_files_only=True' at the same time.") + elif isinstance(head_call_error, OfflineModeIsEnabled): + raise ValueError("Cannot pass 'force_download=True' when offline mode is enabled.") from head_call_error + else: + raise ValueError("Force download failed due to the above error.") from head_call_error + + # No head call + couldn't find an appropriate file on disk => raise an error. + if local_files_only: + raise LocalEntryNotFoundError( + "Cannot find the requested files in the disk cache and outgoing traffic has been disabled. To enable" + " hf.co look-ups and downloads online, set 'local_files_only' to False." + ) + elif isinstance(head_call_error, RepositoryNotFoundError) or isinstance(head_call_error, GatedRepoError): + # Repo not found or gated => let's raise the actual error + raise head_call_error + else: + # Otherwise: most likely a connection issue or Hub downtime => let's warn the user + raise LocalEntryNotFoundError( + "An error happened while trying to locate the file on the Hub and we cannot find the requested files" + " in the local cache. Please check your connection and try again or make sure your Internet connection" + " is on." + ) from head_call_error + + +def _download_to_tmp_and_move( + incomplete_path: Path, + destination_path: Path, + url_to_download: str, + proxies: Optional[Dict], + headers: Dict[str, str], + expected_size: Optional[int], + filename: str, + force_download: bool, +) -> None: + """Download content from a URL to a destination path. + + Internal logic: + - return early if file is already downloaded + - resume download if possible (from incomplete file) + - do not resume download if `force_download=True` or `HF_HUB_ENABLE_HF_TRANSFER=True` + - check disk space before downloading + - download content to a temporary file + - set correct permissions on temporary file + - move the temporary file to the destination path + + Both `incomplete_path` and `destination_path` must be on the same volume to avoid a local copy. + """ + if destination_path.exists() and not force_download: + # Do nothing if already exists (except if force_download=True) + return + + if incomplete_path.exists() and (force_download or (constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies)): + # By default, we will try to resume the download if possible. + # However, if the user has set `force_download=True` or if `hf_transfer` is enabled, then we should + # not resume the download => delete the incomplete file. + message = f"Removing incomplete file '{incomplete_path}'" + if force_download: + message += " (force_download=True)" + elif constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies: + message += " (hf_transfer=True)" + logger.info(message) + incomplete_path.unlink(missing_ok=True) + + with incomplete_path.open("ab") as f: + resume_size = f.tell() + message = f"Downloading '{filename}' to '{incomplete_path}'" + if resume_size > 0 and expected_size is not None: + message += f" (resume from {resume_size}/{expected_size})" + logger.info(message) + + if expected_size is not None: # might be None if HTTP header not set correctly + # Check disk space in both tmp and destination path + _check_disk_space(expected_size, incomplete_path.parent) + _check_disk_space(expected_size, destination_path.parent) + + http_get( + url_to_download, + f, + proxies=proxies, + resume_size=resume_size, + headers=headers, + expected_size=expected_size, + ) + + logger.info(f"Download complete. Moving file to {destination_path}") + _chmod_and_move(incomplete_path, destination_path) + + +def _int_or_none(value: Optional[str]) -> Optional[int]: + try: + return int(value) # type: ignore + except (TypeError, ValueError): + return None + + +def _chmod_and_move(src: Path, dst: Path) -> None: + """Set correct permission before moving a blob from tmp directory to cache dir. + + Do not take into account the `umask` from the process as there is no convenient way + to get it that is thread-safe. + + See: + - About umask: https://docs.python.org/3/library/os.html#os.umask + - Thread-safety: https://stackoverflow.com/a/70343066 + - About solution: https://github.com/huggingface/huggingface_hub/pull/1220#issuecomment-1326211591 + - Fix issue: https://github.com/huggingface/huggingface_hub/issues/1141 + - Fix issue: https://github.com/huggingface/huggingface_hub/issues/1215 + """ + # Get umask by creating a temporary file in the cached repo folder. + tmp_file = dst.parent.parent / f"tmp_{uuid.uuid4()}" + try: + tmp_file.touch() + cache_dir_mode = Path(tmp_file).stat().st_mode + os.chmod(str(src), stat.S_IMODE(cache_dir_mode)) + except OSError as e: + logger.warning( + f"Could not set the permissions on the file '{src}'. " + f"Error: {e}.\nContinuing without setting permissions." + ) + finally: + try: + tmp_file.unlink() + except OSError: + # fails if `tmp_file.touch()` failed => do nothing + # See https://github.com/huggingface/huggingface_hub/issues/2359 + pass + + shutil.move(str(src), str(dst), copy_function=_copy_no_matter_what) + + +def _copy_no_matter_what(src: str, dst: str) -> None: + """Copy file from src to dst. + + If `shutil.copy2` fails, fallback to `shutil.copyfile`. + """ + try: + # Copy file with metadata and permission + # Can fail e.g. if dst is an S3 mount + shutil.copy2(src, dst) + except OSError: + # Copy only file content + shutil.copyfile(src, dst) + + +def _get_pointer_path(storage_folder: str, revision: str, relative_filename: str) -> str: + # Using `os.path.abspath` instead of `Path.resolve()` to avoid resolving symlinks + snapshot_path = os.path.join(storage_folder, "snapshots") + pointer_path = os.path.join(snapshot_path, revision, relative_filename) + if Path(os.path.abspath(snapshot_path)) not in Path(os.path.abspath(pointer_path)).parents: + raise ValueError( + "Invalid pointer path: cannot create pointer path in snapshot folder if" + f" `storage_folder='{storage_folder}'`, `revision='{revision}'` and" + f" `relative_filename='{relative_filename}'`." + ) + return pointer_path diff --git a/meow/lib/python3.13/site-packages/huggingface_hub/hf_api.py b/meow/lib/python3.13/site-packages/huggingface_hub/hf_api.py new file mode 100644 index 0000000000000000000000000000000000000000..cf0256655c8eeb2664c025d4f1d791e84a4daa52 --- /dev/null +++ b/meow/lib/python3.13/site-packages/huggingface_hub/hf_api.py @@ -0,0 +1,9634 @@ +# coding=utf-8 +# Copyright 2019-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import inspect +import json +import re +import struct +import warnings +from collections import defaultdict +from concurrent.futures import Future, ThreadPoolExecutor +from dataclasses import asdict, dataclass, field +from datetime import datetime +from functools import wraps +from itertools import islice +from pathlib import Path +from typing import ( + Any, + BinaryIO, + Callable, + Dict, + Iterable, + Iterator, + List, + Literal, + Optional, + Tuple, + TypeVar, + Union, + overload, +) +from urllib.parse import quote + +import requests +from requests.exceptions import HTTPError +from tqdm.auto import tqdm as base_tqdm +from tqdm.contrib.concurrent import thread_map + +from . import constants +from ._commit_api import ( + CommitOperation, + CommitOperationAdd, + CommitOperationCopy, + CommitOperationDelete, + _fetch_files_to_copy, + _fetch_upload_modes, + _prepare_commit_payload, + _upload_lfs_files, + _warn_on_overwriting_operations, +) +from ._inference_endpoints import InferenceEndpoint, InferenceEndpointType +from ._space_api import SpaceHardware, SpaceRuntime, SpaceStorage, SpaceVariable +from ._upload_large_folder import upload_large_folder_internal +from .community import ( + Discussion, + DiscussionComment, + DiscussionStatusChange, + DiscussionTitleChange, + DiscussionWithDetails, + deserialize_event, +) +from .constants import ( + DEFAULT_ETAG_TIMEOUT, # noqa: F401 # kept for backward compatibility + DEFAULT_REQUEST_TIMEOUT, # noqa: F401 # kept for backward compatibility + DEFAULT_REVISION, # noqa: F401 # kept for backward compatibility + DISCUSSION_STATUS, # noqa: F401 # kept for backward compatibility + DISCUSSION_TYPES, # noqa: F401 # kept for backward compatibility + ENDPOINT, # noqa: F401 # kept for backward compatibility + INFERENCE_ENDPOINTS_ENDPOINT, # noqa: F401 # kept for backward compatibility + REGEX_COMMIT_OID, # noqa: F401 # kept for backward compatibility + REPO_TYPE_MODEL, # noqa: F401 # kept for backward compatibility + REPO_TYPES, # noqa: F401 # kept for backward compatibility + REPO_TYPES_MAPPING, # noqa: F401 # kept for backward compatibility + REPO_TYPES_URL_PREFIXES, # noqa: F401 # kept for backward compatibility + SAFETENSORS_INDEX_FILE, # noqa: F401 # kept for backward compatibility + SAFETENSORS_MAX_HEADER_LENGTH, # noqa: F401 # kept for backward compatibility + SAFETENSORS_SINGLE_FILE, # noqa: F401 # kept for backward compatibility + SPACES_SDK_TYPES, # noqa: F401 # kept for backward compatibility + WEBHOOK_DOMAIN_T, # noqa: F401 # kept for backward compatibility + DiscussionStatusFilter, # noqa: F401 # kept for backward compatibility + DiscussionTypeFilter, # noqa: F401 # kept for backward compatibility +) +from .errors import ( + BadRequestError, + EntryNotFoundError, + GatedRepoError, + HfHubHTTPError, + RepositoryNotFoundError, + RevisionNotFoundError, +) +from .file_download import HfFileMetadata, get_hf_file_metadata, hf_hub_url +from .repocard_data import DatasetCardData, ModelCardData, SpaceCardData +from .utils import ( + DEFAULT_IGNORE_PATTERNS, + HfFolder, # noqa: F401 # kept for backward compatibility + LocalTokenNotFoundError, + NotASafetensorsRepoError, + SafetensorsFileMetadata, + SafetensorsParsingError, + SafetensorsRepoMetadata, + TensorInfo, + build_hf_headers, + filter_repo_objects, + fix_hf_endpoint_in_url, + get_session, + hf_raise_for_status, + logging, + paginate, + parse_datetime, + validate_hf_hub_args, +) +from .utils import tqdm as hf_tqdm +from .utils._deprecation import _deprecate_method +from .utils._typing import CallableT +from .utils.endpoint_helpers import _is_emission_within_threshold + + +R = TypeVar("R") # Return type +CollectionItemType_T = Literal["model", "dataset", "space", "paper"] + +ExpandModelProperty_T = Literal[ + "author", + "baseModels", + "cardData", + "childrenModelCount", + "config", + "createdAt", + "disabled", + "downloads", + "downloadsAllTime", + "gated", + "gguf", + "inference", + "lastModified", + "library_name", + "likes", + "mask_token", + "model-index", + "pipeline_tag", + "private", + "safetensors", + "sha", + "siblings", + "spaces", + "tags", + "transformersInfo", + "trendingScore", + "widgetData", +] + +ExpandDatasetProperty_T = Literal[ + "author", + "cardData", + "citation", + "createdAt", + "disabled", + "description", + "downloads", + "downloadsAllTime", + "gated", + "lastModified", + "likes", + "paperswithcode_id", + "private", + "siblings", + "sha", + "trendingScore", + "tags", +] + +ExpandSpaceProperty_T = Literal[ + "author", + "cardData", + "createdAt", + "datasets", + "disabled", + "lastModified", + "likes", + "models", + "private", + "runtime", + "sdk", + "siblings", + "sha", + "subdomain", + "tags", + "trendingScore", +] + +USERNAME_PLACEHOLDER = "hf_user" +_REGEX_DISCUSSION_URL = re.compile(r".*/discussions/(\d+)$") + +_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE = ( + "\nNote: Creating a commit assumes that the repo already exists on the" + " Huggingface Hub. Please use `create_repo` if it's not the case." +) +_AUTH_CHECK_NO_REPO_ERROR_MESSAGE = ( + "\nNote: The repository either does not exist or you do not have access rights." + " Please check the repository ID and your access permissions." + " If this is a private repository, ensure that your token is correct." +) +logger = logging.get_logger(__name__) + + +def repo_type_and_id_from_hf_id(hf_id: str, hub_url: Optional[str] = None) -> Tuple[Optional[str], Optional[str], str]: + """ + Returns the repo type and ID from a huggingface.co URL linking to a + repository + + Args: + hf_id (`str`): + An URL or ID of a repository on the HF hub. Accepted values are: + + - https://huggingface.co/// + - https://huggingface.co// + - hf://// + - hf:/// + - // + - / + - + hub_url (`str`, *optional*): + The URL of the HuggingFace Hub, defaults to https://huggingface.co + + Returns: + A tuple with three items: repo_type (`str` or `None`), namespace (`str` or + `None`) and repo_id (`str`). + + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If URL cannot be parsed. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If `repo_type` is unknown. + """ + input_hf_id = hf_id + + hub_url = re.sub(r"https?://", "", hub_url if hub_url is not None else constants.ENDPOINT) + is_hf_url = hub_url in hf_id and "@" not in hf_id + + HFFS_PREFIX = "hf://" + if hf_id.startswith(HFFS_PREFIX): # Remove "hf://" prefix if exists + hf_id = hf_id[len(HFFS_PREFIX) :] + + url_segments = hf_id.split("/") + is_hf_id = len(url_segments) <= 3 + + namespace: Optional[str] + if is_hf_url: + namespace, repo_id = url_segments[-2:] + if namespace == hub_url: + namespace = None + if len(url_segments) > 2 and hub_url not in url_segments[-3]: + repo_type = url_segments[-3] + elif namespace in constants.REPO_TYPES_MAPPING: + # Mean canonical dataset or model + repo_type = constants.REPO_TYPES_MAPPING[namespace] + namespace = None + else: + repo_type = None + elif is_hf_id: + if len(url_segments) == 3: + # Passed // or // + repo_type, namespace, repo_id = url_segments[-3:] + elif len(url_segments) == 2: + if url_segments[0] in constants.REPO_TYPES_MAPPING: + # Passed '' or 'datasets/' for a canonical model or dataset + repo_type = constants.REPO_TYPES_MAPPING[url_segments[0]] + namespace = None + repo_id = hf_id.split("/")[-1] + else: + # Passed / or / + namespace, repo_id = hf_id.split("/")[-2:] + repo_type = None + else: + # Passed + repo_id = url_segments[0] + namespace, repo_type = None, None + else: + raise ValueError(f"Unable to retrieve user and repo ID from the passed HF ID: {hf_id}") + + # Check if repo type is known (mapping "spaces" => "space" + empty value => `None`) + if repo_type in constants.REPO_TYPES_MAPPING: + repo_type = constants.REPO_TYPES_MAPPING[repo_type] + if repo_type == "": + repo_type = None + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Unknown `repo_type`: '{repo_type}' ('{input_hf_id}')") + + return repo_type, namespace, repo_id + + +@dataclass +class LastCommitInfo(dict): + oid: str + title: str + date: datetime + + def __post_init__(self): # hack to make LastCommitInfo backward compatible + self.update(asdict(self)) + + +@dataclass +class BlobLfsInfo(dict): + size: int + sha256: str + pointer_size: int + + def __post_init__(self): # hack to make BlobLfsInfo backward compatible + self.update(asdict(self)) + + +@dataclass +class BlobSecurityInfo(dict): + safe: bool # duplicate information with "status" field, keeping it for backward compatibility + status: str + av_scan: Optional[Dict] + pickle_import_scan: Optional[Dict] + + def __post_init__(self): # hack to make BlogSecurityInfo backward compatible + self.update(asdict(self)) + + +@dataclass +class TransformersInfo(dict): + auto_model: str + custom_class: Optional[str] = None + # possible `pipeline_tag` values: https://github.com/huggingface/huggingface.js/blob/3ee32554b8620644a6287e786b2a83bf5caf559c/packages/tasks/src/pipelines.ts#L72 + pipeline_tag: Optional[str] = None + processor: Optional[str] = None + + def __post_init__(self): # hack to make TransformersInfo backward compatible + self.update(asdict(self)) + + +@dataclass +class SafeTensorsInfo(dict): + parameters: Dict[str, int] + total: int + + def __post_init__(self): # hack to make SafeTensorsInfo backward compatible + self.update(asdict(self)) + + +@dataclass +class CommitInfo(str): + """Data structure containing information about a newly created commit. + + Returned by any method that creates a commit on the Hub: [`create_commit`], [`upload_file`], [`upload_folder`], + [`delete_file`], [`delete_folder`]. It inherits from `str` for backward compatibility but using methods specific + to `str` is deprecated. + + Attributes: + commit_url (`str`): + Url where to find the commit. + + commit_message (`str`): + The summary (first line) of the commit that has been created. + + commit_description (`str`): + Description of the commit that has been created. Can be empty. + + oid (`str`): + Commit hash id. Example: `"91c54ad1727ee830252e457677f467be0bfd8a57"`. + + pr_url (`str`, *optional*): + Url to the PR that has been created, if any. Populated when `create_pr=True` + is passed. + + pr_revision (`str`, *optional*): + Revision of the PR that has been created, if any. Populated when + `create_pr=True` is passed. Example: `"refs/pr/1"`. + + pr_num (`int`, *optional*): + Number of the PR discussion that has been created, if any. Populated when + `create_pr=True` is passed. Can be passed as `discussion_num` in + [`get_discussion_details`]. Example: `1`. + + repo_url (`RepoUrl`): + Repo URL of the commit containing info like repo_id, repo_type, etc. + + _url (`str`, *optional*): + Legacy url for `str` compatibility. Can be the url to the uploaded file on the Hub (if returned by + [`upload_file`]), to the uploaded folder on the Hub (if returned by [`upload_folder`]) or to the commit on + the Hub (if returned by [`create_commit`]). Defaults to `commit_url`. It is deprecated to use this + attribute. Please use `commit_url` instead. + """ + + commit_url: str + commit_message: str + commit_description: str + oid: str + pr_url: Optional[str] = None + + # Computed from `commit_url` in `__post_init__` + repo_url: RepoUrl = field(init=False) + + # Computed from `pr_url` in `__post_init__` + pr_revision: Optional[str] = field(init=False) + pr_num: Optional[str] = field(init=False) + + # legacy url for `str` compatibility (ex: url to uploaded file, url to uploaded folder, url to PR, etc.) + _url: str = field(repr=False, default=None) # type: ignore # defaults to `commit_url` + + def __new__(cls, *args, commit_url: str, _url: Optional[str] = None, **kwargs): + return str.__new__(cls, _url or commit_url) + + def __post_init__(self): + """Populate pr-related fields after initialization. + + See https://docs.python.org/3.10/library/dataclasses.html#post-init-processing. + """ + # Repo info + self.repo_url = RepoUrl(self.commit_url.split("/commit/")[0]) + + # PR info + if self.pr_url is not None: + self.pr_revision = _parse_revision_from_pr_url(self.pr_url) + self.pr_num = int(self.pr_revision.split("/")[-1]) + else: + self.pr_revision = None + self.pr_num = None + + +@dataclass +class AccessRequest: + """Data structure containing information about a user access request. + + Attributes: + username (`str`): + Username of the user who requested access. + fullname (`str`): + Fullname of the user who requested access. + email (`Optional[str]`): + Email of the user who requested access. + Can only be `None` in the /accepted list if the user was granted access manually. + timestamp (`datetime`): + Timestamp of the request. + status (`Literal["pending", "accepted", "rejected"]`): + Status of the request. Can be one of `["pending", "accepted", "rejected"]`. + fields (`Dict[str, Any]`, *optional*): + Additional fields filled by the user in the gate form. + """ + + username: str + fullname: str + email: Optional[str] + timestamp: datetime + status: Literal["pending", "accepted", "rejected"] + + # Additional fields filled by the user in the gate form + fields: Optional[Dict[str, Any]] = None + + +@dataclass +class WebhookWatchedItem: + """Data structure containing information about the items watched by a webhook. + + Attributes: + type (`Literal["dataset", "model", "org", "space", "user"]`): + Type of the item to be watched. Can be one of `["dataset", "model", "org", "space", "user"]`. + name (`str`): + Name of the item to be watched. Can be the username, organization name, model name, dataset name or space name. + """ + + type: Literal["dataset", "model", "org", "space", "user"] + name: str + + +@dataclass +class WebhookInfo: + """Data structure containing information about a webhook. + + Attributes: + id (`str`): + ID of the webhook. + url (`str`): + URL of the webhook. + watched (`List[WebhookWatchedItem]`): + List of items watched by the webhook, see [`WebhookWatchedItem`]. + domains (`List[WEBHOOK_DOMAIN_T]`): + List of domains the webhook is watching. Can be one of `["repo", "discussions"]`. + secret (`str`, *optional*): + Secret of the webhook. + disabled (`bool`): + Whether the webhook is disabled or not. + """ + + id: str + url: str + watched: List[WebhookWatchedItem] + domains: List[constants.WEBHOOK_DOMAIN_T] + secret: Optional[str] + disabled: bool + + +class RepoUrl(str): + """Subclass of `str` describing a repo URL on the Hub. + + `RepoUrl` is returned by `HfApi.create_repo`. It inherits from `str` for backward + compatibility. At initialization, the URL is parsed to populate properties: + - endpoint (`str`) + - namespace (`Optional[str]`) + - repo_name (`str`) + - repo_id (`str`) + - repo_type (`Literal["model", "dataset", "space"]`) + - url (`str`) + + Args: + url (`Any`): + String value of the repo url. + endpoint (`str`, *optional*): + Endpoint of the Hub. Defaults to . + + Example: + ```py + >>> RepoUrl('https://huggingface.co/gpt2') + RepoUrl('https://huggingface.co/gpt2', endpoint='https://huggingface.co', repo_type='model', repo_id='gpt2') + + >>> RepoUrl('https://hub-ci.huggingface.co/datasets/dummy_user/dummy_dataset', endpoint='https://hub-ci.huggingface.co') + RepoUrl('https://hub-ci.huggingface.co/datasets/dummy_user/dummy_dataset', endpoint='https://hub-ci.huggingface.co', repo_type='dataset', repo_id='dummy_user/dummy_dataset') + + >>> RepoUrl('hf://datasets/my-user/my-dataset') + RepoUrl('hf://datasets/my-user/my-dataset', endpoint='https://huggingface.co', repo_type='dataset', repo_id='user/dataset') + + >>> HfApi.create_repo("dummy_model") + RepoUrl('https://huggingface.co/Wauplin/dummy_model', endpoint='https://huggingface.co', repo_type='model', repo_id='Wauplin/dummy_model') + ``` + + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If URL cannot be parsed. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If `repo_type` is unknown. + """ + + def __new__(cls, url: Any, endpoint: Optional[str] = None): + url = fix_hf_endpoint_in_url(url, endpoint=endpoint) + return super(RepoUrl, cls).__new__(cls, url) + + def __init__(self, url: Any, endpoint: Optional[str] = None) -> None: + super().__init__() + # Parse URL + self.endpoint = endpoint or constants.ENDPOINT + repo_type, namespace, repo_name = repo_type_and_id_from_hf_id(self, hub_url=self.endpoint) + + # Populate fields + self.namespace = namespace + self.repo_name = repo_name + self.repo_id = repo_name if namespace is None else f"{namespace}/{repo_name}" + self.repo_type = repo_type or constants.REPO_TYPE_MODEL + self.url = str(self) # just in case it's needed + + def __repr__(self) -> str: + return f"RepoUrl('{self}', endpoint='{self.endpoint}', repo_type='{self.repo_type}', repo_id='{self.repo_id}')" + + +@dataclass +class RepoSibling: + """ + Contains basic information about a repo file inside a repo on the Hub. + + + + All attributes of this class are optional except `rfilename`. This is because only the file names are returned when + listing repositories on the Hub (with [`list_models`], [`list_datasets`] or [`list_spaces`]). If you need more + information like file size, blob id or lfs details, you must request them specifically from one repo at a time + (using [`model_info`], [`dataset_info`] or [`space_info`]) as it adds more constraints on the backend server to + retrieve these. + + + + Attributes: + rfilename (str): + file name, relative to the repo root. + size (`int`, *optional*): + The file's size, in bytes. This attribute is defined when `files_metadata` argument of [`repo_info`] is set + to `True`. It's `None` otherwise. + blob_id (`str`, *optional*): + The file's git OID. This attribute is defined when `files_metadata` argument of [`repo_info`] is set to + `True`. It's `None` otherwise. + lfs (`BlobLfsInfo`, *optional*): + The file's LFS metadata. This attribute is defined when`files_metadata` argument of [`repo_info`] is set to + `True` and the file is stored with Git LFS. It's `None` otherwise. + """ + + rfilename: str + size: Optional[int] = None + blob_id: Optional[str] = None + lfs: Optional[BlobLfsInfo] = None + + +@dataclass +class RepoFile: + """ + Contains information about a file on the Hub. + + Attributes: + path (str): + file path relative to the repo root. + size (`int`): + The file's size, in bytes. + blob_id (`str`): + The file's git OID. + lfs (`BlobLfsInfo`): + The file's LFS metadata. + last_commit (`LastCommitInfo`, *optional*): + The file's last commit metadata. Only defined if [`list_repo_tree`] and [`get_paths_info`] + are called with `expand=True`. + security (`BlobSecurityInfo`, *optional*): + The file's security scan metadata. Only defined if [`list_repo_tree`] and [`get_paths_info`] + are called with `expand=True`. + """ + + path: str + size: int + blob_id: str + lfs: Optional[BlobLfsInfo] = None + last_commit: Optional[LastCommitInfo] = None + security: Optional[BlobSecurityInfo] = None + + def __init__(self, **kwargs): + self.path = kwargs.pop("path") + self.size = kwargs.pop("size") + self.blob_id = kwargs.pop("oid") + lfs = kwargs.pop("lfs", None) + if lfs is not None: + lfs = BlobLfsInfo(size=lfs["size"], sha256=lfs["oid"], pointer_size=lfs["pointerSize"]) + self.lfs = lfs + last_commit = kwargs.pop("lastCommit", None) or kwargs.pop("last_commit", None) + if last_commit is not None: + last_commit = LastCommitInfo( + oid=last_commit["id"], title=last_commit["title"], date=parse_datetime(last_commit["date"]) + ) + self.last_commit = last_commit + security = kwargs.pop("securityFileStatus", None) + if security is not None: + safe = security["status"] == "safe" + security = BlobSecurityInfo( + safe=safe, + status=security["status"], + av_scan=security["avScan"], + pickle_import_scan=security["pickleImportScan"], + ) + self.security = security + + # backwards compatibility + self.rfilename = self.path + self.lastCommit = self.last_commit + + +@dataclass +class RepoFolder: + """ + Contains information about a folder on the Hub. + + Attributes: + path (str): + folder path relative to the repo root. + tree_id (`str`): + The folder's git OID. + last_commit (`LastCommitInfo`, *optional*): + The folder's last commit metadata. Only defined if [`list_repo_tree`] and [`get_paths_info`] + are called with `expand=True`. + """ + + path: str + tree_id: str + last_commit: Optional[LastCommitInfo] = None + + def __init__(self, **kwargs): + self.path = kwargs.pop("path") + self.tree_id = kwargs.pop("oid") + last_commit = kwargs.pop("lastCommit", None) or kwargs.pop("last_commit", None) + if last_commit is not None: + last_commit = LastCommitInfo( + oid=last_commit["id"], title=last_commit["title"], date=parse_datetime(last_commit["date"]) + ) + self.last_commit = last_commit + + +@dataclass +class ModelInfo: + """ + Contains information about a model on the Hub. + + + + Most attributes of this class are optional. This is because the data returned by the Hub depends on the query made. + In general, the more specific the query, the more information is returned. On the contrary, when listing models + using [`list_models`] only a subset of the attributes are returned. + + + + Attributes: + id (`str`): + ID of model. + author (`str`, *optional*): + Author of the model. + sha (`str`, *optional*): + Repo SHA at this particular revision. + created_at (`datetime`, *optional*): + Date of creation of the repo on the Hub. Note that the lowest value is `2022-03-02T23:29:04.000Z`, + corresponding to the date when we began to store creation dates. + last_modified (`datetime`, *optional*): + Date of last commit to the repo. + private (`bool`): + Is the repo private. + disabled (`bool`, *optional*): + Is the repo disabled. + downloads (`int`): + Number of downloads of the model over the last 30 days. + downloads_all_time (`int`): + Cumulated number of downloads of the model since its creation. + gated (`Literal["auto", "manual", False]`, *optional*): + Is the repo gated. + If so, whether there is manual or automatic approval. + gguf (`Dict`, *optional*): + GGUF information of the model. + inference (`Literal["cold", "frozen", "warm"]`, *optional*): + Status of the model on the inference API. + Warm models are available for immediate use. Cold models will be loaded on first inference call. + Frozen models are not available in Inference API. + likes (`int`): + Number of likes of the model. + library_name (`str`, *optional*): + Library associated with the model. + tags (`List[str]`): + List of tags of the model. Compared to `card_data.tags`, contains extra tags computed by the Hub + (e.g. supported libraries, model's arXiv). + pipeline_tag (`str`, *optional*): + Pipeline tag associated with the model. + mask_token (`str`, *optional*): + Mask token used by the model. + widget_data (`Any`, *optional*): + Widget data associated with the model. + model_index (`Dict`, *optional*): + Model index for evaluation. + config (`Dict`, *optional*): + Model configuration. + transformers_info (`TransformersInfo`, *optional*): + Transformers-specific info (auto class, processor, etc.) associated with the model. + trending_score (`int`, *optional*): + Trending score of the model. + card_data (`ModelCardData`, *optional*): + Model Card Metadata as a [`huggingface_hub.repocard_data.ModelCardData`] object. + siblings (`List[RepoSibling]`): + List of [`huggingface_hub.hf_api.RepoSibling`] objects that constitute the model. + spaces (`List[str]`, *optional*): + List of spaces using the model. + safetensors (`SafeTensorsInfo`, *optional*): + Model's safetensors information. + security_repo_status (`Dict`, *optional*): + Model's security scan status. + """ + + id: str + author: Optional[str] + sha: Optional[str] + created_at: Optional[datetime] + last_modified: Optional[datetime] + private: Optional[bool] + disabled: Optional[bool] + downloads: Optional[int] + downloads_all_time: Optional[int] + gated: Optional[Literal["auto", "manual", False]] + gguf: Optional[Dict] + inference: Optional[Literal["warm", "cold", "frozen"]] + likes: Optional[int] + library_name: Optional[str] + tags: Optional[List[str]] + pipeline_tag: Optional[str] + mask_token: Optional[str] + card_data: Optional[ModelCardData] + widget_data: Optional[Any] + model_index: Optional[Dict] + config: Optional[Dict] + transformers_info: Optional[TransformersInfo] + trending_score: Optional[int] + siblings: Optional[List[RepoSibling]] + spaces: Optional[List[str]] + safetensors: Optional[SafeTensorsInfo] + security_repo_status: Optional[Dict] + + def __init__(self, **kwargs): + self.id = kwargs.pop("id") + self.author = kwargs.pop("author", None) + self.sha = kwargs.pop("sha", None) + last_modified = kwargs.pop("lastModified", None) or kwargs.pop("last_modified", None) + self.last_modified = parse_datetime(last_modified) if last_modified else None + created_at = kwargs.pop("createdAt", None) or kwargs.pop("created_at", None) + self.created_at = parse_datetime(created_at) if created_at else None + self.private = kwargs.pop("private", None) + self.gated = kwargs.pop("gated", None) + self.disabled = kwargs.pop("disabled", None) + self.downloads = kwargs.pop("downloads", None) + self.downloads_all_time = kwargs.pop("downloadsAllTime", None) + self.likes = kwargs.pop("likes", None) + self.library_name = kwargs.pop("library_name", None) + self.gguf = kwargs.pop("gguf", None) + self.inference = kwargs.pop("inference", None) + self.tags = kwargs.pop("tags", None) + self.pipeline_tag = kwargs.pop("pipeline_tag", None) + self.mask_token = kwargs.pop("mask_token", None) + self.trending_score = kwargs.pop("trendingScore", None) + + card_data = kwargs.pop("cardData", None) or kwargs.pop("card_data", None) + self.card_data = ( + ModelCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data + ) + + self.widget_data = kwargs.pop("widgetData", None) + self.model_index = kwargs.pop("model-index", None) or kwargs.pop("model_index", None) + self.config = kwargs.pop("config", None) + transformers_info = kwargs.pop("transformersInfo", None) or kwargs.pop("transformers_info", None) + self.transformers_info = TransformersInfo(**transformers_info) if transformers_info else None + siblings = kwargs.pop("siblings", None) + self.siblings = ( + [ + RepoSibling( + rfilename=sibling["rfilename"], + size=sibling.get("size"), + blob_id=sibling.get("blobId"), + lfs=( + BlobLfsInfo( + size=sibling["lfs"]["size"], + sha256=sibling["lfs"]["sha256"], + pointer_size=sibling["lfs"]["pointerSize"], + ) + if sibling.get("lfs") + else None + ), + ) + for sibling in siblings + ] + if siblings is not None + else None + ) + self.spaces = kwargs.pop("spaces", None) + safetensors = kwargs.pop("safetensors", None) + self.safetensors = ( + SafeTensorsInfo( + parameters=safetensors["parameters"], + total=safetensors["total"], + ) + if safetensors + else None + ) + self.security_repo_status = kwargs.pop("securityRepoStatus", None) + # backwards compatibility + self.lastModified = self.last_modified + self.cardData = self.card_data + self.transformersInfo = self.transformers_info + self.__dict__.update(**kwargs) + + +@dataclass +class DatasetInfo: + """ + Contains information about a dataset on the Hub. + + + + Most attributes of this class are optional. This is because the data returned by the Hub depends on the query made. + In general, the more specific the query, the more information is returned. On the contrary, when listing datasets + using [`list_datasets`] only a subset of the attributes are returned. + + + + Attributes: + id (`str`): + ID of dataset. + author (`str`): + Author of the dataset. + sha (`str`): + Repo SHA at this particular revision. + created_at (`datetime`, *optional*): + Date of creation of the repo on the Hub. Note that the lowest value is `2022-03-02T23:29:04.000Z`, + corresponding to the date when we began to store creation dates. + last_modified (`datetime`, *optional*): + Date of last commit to the repo. + private (`bool`): + Is the repo private. + disabled (`bool`, *optional*): + Is the repo disabled. + gated (`Literal["auto", "manual", False]`, *optional*): + Is the repo gated. + If so, whether there is manual or automatic approval. + downloads (`int`): + Number of downloads of the dataset over the last 30 days. + downloads_all_time (`int`): + Cumulated number of downloads of the model since its creation. + likes (`int`): + Number of likes of the dataset. + tags (`List[str]`): + List of tags of the dataset. + card_data (`DatasetCardData`, *optional*): + Model Card Metadata as a [`huggingface_hub.repocard_data.DatasetCardData`] object. + siblings (`List[RepoSibling]`): + List of [`huggingface_hub.hf_api.RepoSibling`] objects that constitute the dataset. + paperswithcode_id (`str`, *optional*): + Papers with code ID of the dataset. + trending_score (`int`, *optional*): + Trending score of the dataset. + """ + + id: str + author: Optional[str] + sha: Optional[str] + created_at: Optional[datetime] + last_modified: Optional[datetime] + private: Optional[bool] + gated: Optional[Literal["auto", "manual", False]] + disabled: Optional[bool] + downloads: Optional[int] + downloads_all_time: Optional[int] + likes: Optional[int] + paperswithcode_id: Optional[str] + tags: Optional[List[str]] + trending_score: Optional[int] + card_data: Optional[DatasetCardData] + siblings: Optional[List[RepoSibling]] + + def __init__(self, **kwargs): + self.id = kwargs.pop("id") + self.author = kwargs.pop("author", None) + self.sha = kwargs.pop("sha", None) + created_at = kwargs.pop("createdAt", None) or kwargs.pop("created_at", None) + self.created_at = parse_datetime(created_at) if created_at else None + last_modified = kwargs.pop("lastModified", None) or kwargs.pop("last_modified", None) + self.last_modified = parse_datetime(last_modified) if last_modified else None + self.private = kwargs.pop("private", None) + self.gated = kwargs.pop("gated", None) + self.disabled = kwargs.pop("disabled", None) + self.downloads = kwargs.pop("downloads", None) + self.downloads_all_time = kwargs.pop("downloadsAllTime", None) + self.likes = kwargs.pop("likes", None) + self.paperswithcode_id = kwargs.pop("paperswithcode_id", None) + self.tags = kwargs.pop("tags", None) + self.trending_score = kwargs.pop("trendingScore", None) + + card_data = kwargs.pop("cardData", None) or kwargs.pop("card_data", None) + self.card_data = ( + DatasetCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data + ) + siblings = kwargs.pop("siblings", None) + self.siblings = ( + [ + RepoSibling( + rfilename=sibling["rfilename"], + size=sibling.get("size"), + blob_id=sibling.get("blobId"), + lfs=( + BlobLfsInfo( + size=sibling["lfs"]["size"], + sha256=sibling["lfs"]["sha256"], + pointer_size=sibling["lfs"]["pointerSize"], + ) + if sibling.get("lfs") + else None + ), + ) + for sibling in siblings + ] + if siblings is not None + else None + ) + + # backwards compatibility + self.lastModified = self.last_modified + self.cardData = self.card_data + self.__dict__.update(**kwargs) + + +@dataclass +class SpaceInfo: + """ + Contains information about a Space on the Hub. + + + + Most attributes of this class are optional. This is because the data returned by the Hub depends on the query made. + In general, the more specific the query, the more information is returned. On the contrary, when listing spaces + using [`list_spaces`] only a subset of the attributes are returned. + + + + Attributes: + id (`str`): + ID of the Space. + author (`str`, *optional*): + Author of the Space. + sha (`str`, *optional*): + Repo SHA at this particular revision. + created_at (`datetime`, *optional*): + Date of creation of the repo on the Hub. Note that the lowest value is `2022-03-02T23:29:04.000Z`, + corresponding to the date when we began to store creation dates. + last_modified (`datetime`, *optional*): + Date of last commit to the repo. + private (`bool`): + Is the repo private. + gated (`Literal["auto", "manual", False]`, *optional*): + Is the repo gated. + If so, whether there is manual or automatic approval. + disabled (`bool`, *optional*): + Is the Space disabled. + host (`str`, *optional*): + Host URL of the Space. + subdomain (`str`, *optional*): + Subdomain of the Space. + likes (`int`): + Number of likes of the Space. + tags (`List[str]`): + List of tags of the Space. + siblings (`List[RepoSibling]`): + List of [`huggingface_hub.hf_api.RepoSibling`] objects that constitute the Space. + card_data (`SpaceCardData`, *optional*): + Space Card Metadata as a [`huggingface_hub.repocard_data.SpaceCardData`] object. + runtime (`SpaceRuntime`, *optional*): + Space runtime information as a [`huggingface_hub.hf_api.SpaceRuntime`] object. + sdk (`str`, *optional*): + SDK used by the Space. + models (`List[str]`, *optional*): + List of models used by the Space. + datasets (`List[str]`, *optional*): + List of datasets used by the Space. + trending_score (`int`, *optional*): + Trending score of the Space. + """ + + id: str + author: Optional[str] + sha: Optional[str] + created_at: Optional[datetime] + last_modified: Optional[datetime] + private: Optional[bool] + gated: Optional[Literal["auto", "manual", False]] + disabled: Optional[bool] + host: Optional[str] + subdomain: Optional[str] + likes: Optional[int] + sdk: Optional[str] + tags: Optional[List[str]] + siblings: Optional[List[RepoSibling]] + trending_score: Optional[int] + card_data: Optional[SpaceCardData] + runtime: Optional[SpaceRuntime] + models: Optional[List[str]] + datasets: Optional[List[str]] + + def __init__(self, **kwargs): + self.id = kwargs.pop("id") + self.author = kwargs.pop("author", None) + self.sha = kwargs.pop("sha", None) + created_at = kwargs.pop("createdAt", None) or kwargs.pop("created_at", None) + self.created_at = parse_datetime(created_at) if created_at else None + last_modified = kwargs.pop("lastModified", None) or kwargs.pop("last_modified", None) + self.last_modified = parse_datetime(last_modified) if last_modified else None + self.private = kwargs.pop("private", None) + self.gated = kwargs.pop("gated", None) + self.disabled = kwargs.pop("disabled", None) + self.host = kwargs.pop("host", None) + self.subdomain = kwargs.pop("subdomain", None) + self.likes = kwargs.pop("likes", None) + self.sdk = kwargs.pop("sdk", None) + self.tags = kwargs.pop("tags", None) + self.trending_score = kwargs.pop("trendingScore", None) + card_data = kwargs.pop("cardData", None) or kwargs.pop("card_data", None) + self.card_data = ( + SpaceCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data + ) + siblings = kwargs.pop("siblings", None) + self.siblings = ( + [ + RepoSibling( + rfilename=sibling["rfilename"], + size=sibling.get("size"), + blob_id=sibling.get("blobId"), + lfs=( + BlobLfsInfo( + size=sibling["lfs"]["size"], + sha256=sibling["lfs"]["sha256"], + pointer_size=sibling["lfs"]["pointerSize"], + ) + if sibling.get("lfs") + else None + ), + ) + for sibling in siblings + ] + if siblings is not None + else None + ) + runtime = kwargs.pop("runtime", None) + self.runtime = SpaceRuntime(runtime) if runtime else None + self.models = kwargs.pop("models", None) + self.datasets = kwargs.pop("datasets", None) + + # backwards compatibility + self.lastModified = self.last_modified + self.cardData = self.card_data + self.__dict__.update(**kwargs) + + +@dataclass +class CollectionItem: + """ + Contains information about an item of a Collection (model, dataset, Space or paper). + + Attributes: + item_object_id (`str`): + Unique ID of the item in the collection. + item_id (`str`): + ID of the underlying object on the Hub. Can be either a repo_id or a paper id + e.g. `"jbilcke-hf/ai-comic-factory"`, `"2307.09288"`. + item_type (`str`): + Type of the underlying object. Can be one of `"model"`, `"dataset"`, `"space"` or `"paper"`. + position (`int`): + Position of the item in the collection. + note (`str`, *optional*): + Note associated with the item, as plain text. + """ + + item_object_id: str # id in database + item_id: str # repo_id or paper id + item_type: str + position: int + note: Optional[str] = None + + def __init__( + self, _id: str, id: str, type: CollectionItemType_T, position: int, note: Optional[Dict] = None, **kwargs + ) -> None: + self.item_object_id: str = _id # id in database + self.item_id: str = id # repo_id or paper id + self.item_type: CollectionItemType_T = type + self.position: int = position + self.note: str = note["text"] if note is not None else None + + +@dataclass +class Collection: + """ + Contains information about a Collection on the Hub. + + Attributes: + slug (`str`): + Slug of the collection. E.g. `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. + title (`str`): + Title of the collection. E.g. `"Recent models"`. + owner (`str`): + Owner of the collection. E.g. `"TheBloke"`. + items (`List[CollectionItem]`): + List of items in the collection. + last_updated (`datetime`): + Date of the last update of the collection. + position (`int`): + Position of the collection in the list of collections of the owner. + private (`bool`): + Whether the collection is private or not. + theme (`str`): + Theme of the collection. E.g. `"green"`. + upvotes (`int`): + Number of upvotes of the collection. + description (`str`, *optional*): + Description of the collection, as plain text. + url (`str`): + (property) URL of the collection on the Hub. + """ + + slug: str + title: str + owner: str + items: List[CollectionItem] + last_updated: datetime + position: int + private: bool + theme: str + upvotes: int + description: Optional[str] = None + + def __init__(self, **kwargs) -> None: + self.slug = kwargs.pop("slug") + self.title = kwargs.pop("title") + self.owner = kwargs.pop("owner") + self.items = [CollectionItem(**item) for item in kwargs.pop("items")] + self.last_updated = parse_datetime(kwargs.pop("lastUpdated")) + self.position = kwargs.pop("position") + self.private = kwargs.pop("private") + self.theme = kwargs.pop("theme") + self.upvotes = kwargs.pop("upvotes") + self.description = kwargs.pop("description", None) + endpoint = kwargs.pop("endpoint", None) + if endpoint is None: + endpoint = constants.ENDPOINT + self._url = f"{endpoint}/collections/{self.slug}" + + @property + def url(self) -> str: + """Returns the URL of the collection on the Hub.""" + return self._url + + +@dataclass +class GitRefInfo: + """ + Contains information about a git reference for a repo on the Hub. + + Attributes: + name (`str`): + Name of the reference (e.g. tag name or branch name). + ref (`str`): + Full git ref on the Hub (e.g. `"refs/heads/main"` or `"refs/tags/v1.0"`). + target_commit (`str`): + OID of the target commit for the ref (e.g. `"e7da7f221d5bf496a48136c0cd264e630fe9fcc8"`) + """ + + name: str + ref: str + target_commit: str + + +@dataclass +class GitRefs: + """ + Contains information about all git references for a repo on the Hub. + + Object is returned by [`list_repo_refs`]. + + Attributes: + branches (`List[GitRefInfo]`): + A list of [`GitRefInfo`] containing information about branches on the repo. + converts (`List[GitRefInfo]`): + A list of [`GitRefInfo`] containing information about "convert" refs on the repo. + Converts are refs used (internally) to push preprocessed data in Dataset repos. + tags (`List[GitRefInfo]`): + A list of [`GitRefInfo`] containing information about tags on the repo. + pull_requests (`List[GitRefInfo]`, *optional*): + A list of [`GitRefInfo`] containing information about pull requests on the repo. + Only returned if `include_prs=True` is set. + """ + + branches: List[GitRefInfo] + converts: List[GitRefInfo] + tags: List[GitRefInfo] + pull_requests: Optional[List[GitRefInfo]] = None + + +@dataclass +class GitCommitInfo: + """ + Contains information about a git commit for a repo on the Hub. Check out [`list_repo_commits`] for more details. + + Attributes: + commit_id (`str`): + OID of the commit (e.g. `"e7da7f221d5bf496a48136c0cd264e630fe9fcc8"`) + authors (`List[str]`): + List of authors of the commit. + created_at (`datetime`): + Datetime when the commit was created. + title (`str`): + Title of the commit. This is a free-text value entered by the authors. + message (`str`): + Description of the commit. This is a free-text value entered by the authors. + formatted_title (`str`): + Title of the commit formatted as HTML. Only returned if `formatted=True` is set. + formatted_message (`str`): + Description of the commit formatted as HTML. Only returned if `formatted=True` is set. + """ + + commit_id: str + + authors: List[str] + created_at: datetime + title: str + message: str + + formatted_title: Optional[str] + formatted_message: Optional[str] + + +@dataclass +class UserLikes: + """ + Contains information about a user likes on the Hub. + + Attributes: + user (`str`): + Name of the user for which we fetched the likes. + total (`int`): + Total number of likes. + datasets (`List[str]`): + List of datasets liked by the user (as repo_ids). + models (`List[str]`): + List of models liked by the user (as repo_ids). + spaces (`List[str]`): + List of spaces liked by the user (as repo_ids). + """ + + # Metadata + user: str + total: int + + # User likes + datasets: List[str] + models: List[str] + spaces: List[str] + + +@dataclass +class Organization: + """ + Contains information about an organization on the Hub. + + Attributes: + avatar_url (`str`): + URL of the organization's avatar. + name (`str`): + Name of the organization on the Hub (unique). + fullname (`str`): + Organization's full name. + """ + + avatar_url: str + name: str + fullname: str + + def __init__(self, **kwargs) -> None: + self.avatar_url = kwargs.pop("avatarUrl", "") + self.name = kwargs.pop("name", "") + self.fullname = kwargs.pop("fullname", "") + + # forward compatibility + self.__dict__.update(**kwargs) + + +@dataclass +class User: + """ + Contains information about a user on the Hub. + + Attributes: + username (`str`): + Name of the user on the Hub (unique). + fullname (`str`): + User's full name. + avatar_url (`str`): + URL of the user's avatar. + details (`str`, *optional*): + User's details. + is_following (`bool`, *optional*): + Whether the authenticated user is following this user. + is_pro (`bool`, *optional*): + Whether the user is a pro user. + num_models (`int`, *optional*): + Number of models created by the user. + num_datasets (`int`, *optional*): + Number of datasets created by the user. + num_spaces (`int`, *optional*): + Number of spaces created by the user. + num_discussions (`int`, *optional*): + Number of discussions initiated by the user. + num_papers (`int`, *optional*): + Number of papers authored by the user. + num_upvotes (`int`, *optional*): + Number of upvotes received by the user. + num_likes (`int`, *optional*): + Number of likes given by the user. + num_following (`int`, *optional*): + Number of users this user is following. + num_followers (`int`, *optional*): + Number of users following this user. + orgs (list of [`Organization`]): + List of organizations the user is part of. + """ + + # Metadata + username: str + fullname: str + avatar_url: str + details: Optional[str] = None + is_following: Optional[bool] = None + is_pro: Optional[bool] = None + num_models: Optional[int] = None + num_datasets: Optional[int] = None + num_spaces: Optional[int] = None + num_discussions: Optional[int] = None + num_papers: Optional[int] = None + num_upvotes: Optional[int] = None + num_likes: Optional[int] = None + num_following: Optional[int] = None + num_followers: Optional[int] = None + orgs: List[Organization] = field(default_factory=list) + + def __init__(self, **kwargs) -> None: + self.username = kwargs.pop("user", "") + self.fullname = kwargs.pop("fullname", "") + self.avatar_url = kwargs.pop("avatarUrl", "") + self.is_following = kwargs.pop("isFollowing", None) + self.is_pro = kwargs.pop("isPro", None) + self.details = kwargs.pop("details", None) + self.num_models = kwargs.pop("numModels", None) + self.num_datasets = kwargs.pop("numDatasets", None) + self.num_spaces = kwargs.pop("numSpaces", None) + self.num_discussions = kwargs.pop("numDiscussions", None) + self.num_papers = kwargs.pop("numPapers", None) + self.num_upvotes = kwargs.pop("numUpvotes", None) + self.num_likes = kwargs.pop("numLikes", None) + self.num_following = kwargs.pop("numFollowing", None) + self.num_followers = kwargs.pop("numFollowers", None) + self.user_type = kwargs.pop("type", None) + self.orgs = [Organization(**org) for org in kwargs.pop("orgs", [])] + + # forward compatibility + self.__dict__.update(**kwargs) + + +@dataclass +class PaperInfo: + """ + Contains information about a paper on the Hub. + + Attributes: + id (`str`): + arXiv paper ID. + authors (`List[str]`, **optional**): + Names of paper authors + published_at (`datetime`, **optional**): + Date paper published. + title (`str`, **optional**): + Title of the paper. + summary (`str`, **optional**): + Summary of the paper. + upvotes (`int`, **optional**): + Number of upvotes for the paper on the Hub. + discussion_id (`str`, **optional**): + Discussion ID for the paper on the Hub. + source (`str`, **optional**): + Source of the paper. + comments (`int`, **optional**): + Number of comments for the paper on the Hub. + submitted_at (`datetime`, **optional**): + Date paper appeared in daily papers on the Hub. + submitted_by (`User`, **optional**): + Information about who submitted the daily paper. + """ + + id: str + authors: Optional[List[str]] + published_at: Optional[datetime] + title: Optional[str] + summary: Optional[str] + upvotes: Optional[int] + discussion_id: Optional[str] + source: Optional[str] + comments: Optional[int] + submitted_at: Optional[datetime] + submitted_by: Optional[User] + + def __init__(self, **kwargs) -> None: + paper = kwargs.pop("paper", {}) + self.id = kwargs.pop("id", None) or paper.pop("id", None) + authors = paper.pop("authors", None) or kwargs.pop("authors", None) + self.authors = [author.pop("name", None) for author in authors] if authors else None + published_at = paper.pop("publishedAt", None) or kwargs.pop("publishedAt", None) + self.published_at = parse_datetime(published_at) if published_at else None + self.title = kwargs.pop("title", None) + self.source = kwargs.pop("source", None) + self.summary = paper.pop("summary", None) or kwargs.pop("summary", None) + self.upvotes = paper.pop("upvotes", None) or kwargs.pop("upvotes", None) + self.discussion_id = paper.pop("discussionId", None) or kwargs.pop("discussionId", None) + self.comments = kwargs.pop("numComments", 0) + submitted_at = kwargs.pop("publishedAt", None) or kwargs.pop("submittedOnDailyAt", None) + self.submitted_at = parse_datetime(submitted_at) if submitted_at else None + submitted_by = kwargs.pop("submittedBy", None) or kwargs.pop("submittedOnDailyBy", None) + self.submitted_by = User(**submitted_by) if submitted_by else None + + # forward compatibility + self.__dict__.update(**kwargs) + + +def future_compatible(fn: CallableT) -> CallableT: + """Wrap a method of `HfApi` to handle `run_as_future=True`. + + A method flagged as "future_compatible" will be called in a thread if `run_as_future=True` and return a + `concurrent.futures.Future` instance. Otherwise, it will be called normally and return the result. + """ + sig = inspect.signature(fn) + args_params = list(sig.parameters)[1:] # remove "self" from list + + @wraps(fn) + def _inner(self, *args, **kwargs): + # Get `run_as_future` value if provided (default to False) + if "run_as_future" in kwargs: + run_as_future = kwargs["run_as_future"] + kwargs["run_as_future"] = False # avoid recursion error + else: + run_as_future = False + for param, value in zip(args_params, args): + if param == "run_as_future": + run_as_future = value + break + + # Call the function in a thread if `run_as_future=True` + if run_as_future: + return self.run_as_future(fn, self, *args, **kwargs) + + # Otherwise, call the function normally + return fn(self, *args, **kwargs) + + _inner.is_future_compatible = True # type: ignore + return _inner # type: ignore + + +class HfApi: + """ + Client to interact with the Hugging Face Hub via HTTP. + + The client is initialized with some high-level settings used in all requests + made to the Hub (HF endpoint, authentication, user agents...). Using the `HfApi` + client is preferred but not mandatory as all of its public methods are exposed + directly at the root of `huggingface_hub`. + + Args: + endpoint (`str`, *optional*): + Endpoint of the Hub. Defaults to . + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + library_name (`str`, *optional*): + The name of the library that is making the HTTP request. Will be added to + the user-agent header. Example: `"transformers"`. + library_version (`str`, *optional*): + The version of the library that is making the HTTP request. Will be added + to the user-agent header. Example: `"4.24.0"`. + user_agent (`str`, `dict`, *optional*): + The user agent info in the form of a dictionary or a single string. It will + be completed with information about the installed packages. + headers (`dict`, *optional*): + Additional headers to be sent with each request. Example: `{"X-My-Header": "value"}`. + Headers passed here are taking precedence over the default headers. + """ + + def __init__( + self, + endpoint: Optional[str] = None, + token: Union[str, bool, None] = None, + library_name: Optional[str] = None, + library_version: Optional[str] = None, + user_agent: Union[Dict, str, None] = None, + headers: Optional[Dict[str, str]] = None, + ) -> None: + self.endpoint = endpoint if endpoint is not None else constants.ENDPOINT + self.token = token + self.library_name = library_name + self.library_version = library_version + self.user_agent = user_agent + self.headers = headers + self._thread_pool: Optional[ThreadPoolExecutor] = None + + def run_as_future(self, fn: Callable[..., R], *args, **kwargs) -> Future[R]: + """ + Run a method in the background and return a Future instance. + + The main goal is to run methods without blocking the main thread (e.g. to push data during a training). + Background jobs are queued to preserve order but are not ran in parallel. If you need to speed-up your scripts + by parallelizing lots of call to the API, you must setup and use your own [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor). + + Note: Most-used methods like [`upload_file`], [`upload_folder`] and [`create_commit`] have a `run_as_future: bool` + argument to directly call them in the background. This is equivalent to calling `api.run_as_future(...)` on them + but less verbose. + + Args: + fn (`Callable`): + The method to run in the background. + *args, **kwargs: + Arguments with which the method will be called. + + Return: + `Future`: a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) instance to + get the result of the task. + + Example: + ```py + >>> from huggingface_hub import HfApi + >>> api = HfApi() + >>> future = api.run_as_future(api.whoami) # instant + >>> future.done() + False + >>> future.result() # wait until complete and return result + (...) + >>> future.done() + True + ``` + """ + if self._thread_pool is None: + self._thread_pool = ThreadPoolExecutor(max_workers=1) + self._thread_pool + return self._thread_pool.submit(fn, *args, **kwargs) + + @validate_hf_hub_args + def whoami(self, token: Union[bool, str, None] = None) -> Dict: + """ + Call HF API to know "whoami". + + Args: + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + """ + r = get_session().get( + f"{self.endpoint}/api/whoami-v2", + headers=self._build_hf_headers( + # If `token` is provided and not `None`, it will be used by default. + # Otherwise, the token must be retrieved from cache or env variable. + token=(token or self.token or True), + ), + ) + try: + hf_raise_for_status(r) + except HTTPError as e: + raise HTTPError( + "Invalid user token. If you didn't pass a user token, make sure you " + "are properly logged in by executing `huggingface-cli login`, and " + "if you did pass a user token, double-check it's correct.", + request=e.request, + response=e.response, + ) from e + return r.json() + + @_deprecate_method( + version="1.0", + message=( + "Permissions are more complex than when `get_token_permission` was first introduced. " + "OAuth and fine-grain tokens allows for more detailed permissions. " + "If you need to know the permissions associated with a token, please use `whoami` and check the `'auth'` key." + ), + ) + def get_token_permission( + self, token: Union[bool, str, None] = None + ) -> Literal["read", "write", "fineGrained", None]: + """ + Check if a given `token` is valid and return its permissions. + + + + This method is deprecated and will be removed in version 1.0. Permissions are more complex than when + `get_token_permission` was first introduced. OAuth and fine-grain tokens allows for more detailed permissions. + If you need to know the permissions associated with a token, please use `whoami` and check the `'auth'` key. + + + + For more details about tokens, please refer to https://huggingface.co/docs/hub/security-tokens#what-are-user-access-tokens. + + Args: + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Literal["read", "write", "fineGrained", None]`: Permission granted by the token ("read" or "write"). Returns `None` if no + token passed, if token is invalid or if role is not returned by the server. This typically happens when the token is an OAuth token. + """ + try: + return self.whoami(token=token)["auth"]["accessToken"]["role"] + except (LocalTokenNotFoundError, HTTPError, KeyError): + return None + + def get_model_tags(self) -> Dict: + """ + List all valid model tags as a nested namespace object + """ + path = f"{self.endpoint}/api/models-tags-by-type" + r = get_session().get(path) + hf_raise_for_status(r) + return r.json() + + def get_dataset_tags(self) -> Dict: + """ + List all valid dataset tags as a nested namespace object. + """ + path = f"{self.endpoint}/api/datasets-tags-by-type" + r = get_session().get(path) + hf_raise_for_status(r) + return r.json() + + @validate_hf_hub_args + def list_models( + self, + *, + # Search-query parameter + filter: Union[str, Iterable[str], None] = None, + author: Optional[str] = None, + gated: Optional[bool] = None, + inference: Optional[Literal["cold", "frozen", "warm"]] = None, + library: Optional[Union[str, List[str]]] = None, + language: Optional[Union[str, List[str]]] = None, + model_name: Optional[str] = None, + task: Optional[Union[str, List[str]]] = None, + trained_dataset: Optional[Union[str, List[str]]] = None, + tags: Optional[Union[str, List[str]]] = None, + search: Optional[str] = None, + pipeline_tag: Optional[str] = None, + emissions_thresholds: Optional[Tuple[float, float]] = None, + # Sorting and pagination parameters + sort: Union[Literal["last_modified"], str, None] = None, + direction: Optional[Literal[-1]] = None, + limit: Optional[int] = None, + # Additional data to fetch + expand: Optional[List[ExpandModelProperty_T]] = None, + full: Optional[bool] = None, + cardData: bool = False, + fetch_config: bool = False, + token: Union[bool, str, None] = None, + ) -> Iterable[ModelInfo]: + """ + List models hosted on the Huggingface Hub, given some filters. + + Args: + filter (`str` or `Iterable[str]`, *optional*): + A string or list of string to filter models on the Hub. + author (`str`, *optional*): + A string which identify the author (user or organization) of the + returned models. + gated (`bool`, *optional*): + A boolean to filter models on the Hub that are gated or not. By default, all models are returned. + If `gated=True` is passed, only gated models are returned. + If `gated=False` is passed, only non-gated models are returned. + inference (`Literal["cold", "frozen", "warm"]`, *optional*): + A string to filter models on the Hub by their state on the Inference API. + Warm models are available for immediate use. Cold models will be loaded on first inference call. + Frozen models are not available in Inference API. + library (`str` or `List`, *optional*): + A string or list of strings of foundational libraries models were + originally trained from, such as pytorch, tensorflow, or allennlp. + language (`str` or `List`, *optional*): + A string or list of strings of languages, both by name and country + code, such as "en" or "English" + model_name (`str`, *optional*): + A string that contain complete or partial names for models on the + Hub, such as "bert" or "bert-base-cased" + task (`str` or `List`, *optional*): + A string or list of strings of tasks models were designed for, such + as: "fill-mask" or "automatic-speech-recognition" + trained_dataset (`str` or `List`, *optional*): + A string tag or a list of string tags of the trained dataset for a + model on the Hub. + tags (`str` or `List`, *optional*): + A string tag or a list of tags to filter models on the Hub by, such + as `text-generation` or `spacy`. + search (`str`, *optional*): + A string that will be contained in the returned model ids. + pipeline_tag (`str`, *optional*): + A string pipeline tag to filter models on the Hub by, such as `summarization`. + emissions_thresholds (`Tuple`, *optional*): + A tuple of two ints or floats representing a minimum and maximum + carbon footprint to filter the resulting models with in grams. + sort (`Literal["last_modified"]` or `str`, *optional*): + The key with which to sort the resulting models. Possible values are "last_modified", "trending_score", + "created_at", "downloads" and "likes". + direction (`Literal[-1]` or `int`, *optional*): + Direction in which to sort. The value `-1` sorts by descending + order while all other values sort by ascending order. + limit (`int`, *optional*): + The limit on the number of models fetched. Leaving this option + to `None` fetches all models. + expand (`List[ExpandModelProperty_T]`, *optional*): + List properties to return in the response. When used, only the properties in the list will be returned. + This parameter cannot be used if `full`, `cardData` or `fetch_config` are passed. + Possible values are `"author"`, `"baseModels"`, `"cardData"`, `"childrenModelCount"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"gguf"`, `"inference"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"`, `"trendingScore"` and `"widgetData"`. + full (`bool`, *optional*): + Whether to fetch all model data, including the `last_modified`, + the `sha`, the files and the `tags`. This is set to `True` by + default when using a filter. + cardData (`bool`, *optional*): + Whether to grab the metadata for the model as well. Can contain + useful information such as carbon emissions, metrics, and + datasets trained on. + fetch_config (`bool`, *optional*): + Whether to fetch the model configs as well. This is not included + in `full` due to its size. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + + Returns: + `Iterable[ModelInfo]`: an iterable of [`huggingface_hub.hf_api.ModelInfo`] objects. + + Example usage with the `filter` argument: + + ```python + >>> from huggingface_hub import HfApi + + >>> api = HfApi() + + # List all models + >>> api.list_models() + + # List only the text classification models + >>> api.list_models(filter="text-classification") + + # List only models from the AllenNLP library + >>> api.list_models(filter="allennlp") + ``` + + Example usage with the `search` argument: + + ```python + >>> from huggingface_hub import HfApi + + >>> api = HfApi() + + # List all models with "bert" in their name + >>> api.list_models(search="bert") + + # List all models with "bert" in their name made by google + >>> api.list_models(search="bert", author="google") + ``` + """ + if expand and (full or cardData or fetch_config): + raise ValueError("`expand` cannot be used if `full`, `cardData` or `fetch_config` are passed.") + + if emissions_thresholds is not None and cardData is None: + raise ValueError("`emissions_thresholds` were passed without setting `cardData=True`.") + + path = f"{self.endpoint}/api/models" + headers = self._build_hf_headers(token=token) + params: Dict[str, Any] = {} + + # Build the filter list + filter_list: List[str] = [] + if filter: + filter_list.extend([filter] if isinstance(filter, str) else filter) + if library: + filter_list.extend([library] if isinstance(library, str) else library) + if task: + filter_list.extend([task] if isinstance(task, str) else task) + if trained_dataset: + if isinstance(trained_dataset, str): + trained_dataset = [trained_dataset] + for dataset in trained_dataset: + if not dataset.startswith("dataset:"): + dataset = f"dataset:{dataset}" + filter_list.append(dataset) + if language: + filter_list.extend([language] if isinstance(language, str) else language) + if tags: + filter_list.extend([tags] if isinstance(tags, str) else tags) + if len(filter_list) > 0: + params["filter"] = filter_list + + # Handle other query params + if author: + params["author"] = author + if gated is not None: + params["gated"] = gated + if inference is not None: + params["inference"] = inference + if pipeline_tag: + params["pipeline_tag"] = pipeline_tag + search_list = [] + if model_name: + search_list.append(model_name) + if search: + search_list.append(search) + if len(search_list) > 0: + params["search"] = search_list + if sort is not None: + params["sort"] = ( + "lastModified" + if sort == "last_modified" + else "trendingScore" + if sort == "trending_score" + else "createdAt" + if sort == "created_at" + else sort + ) + if direction is not None: + params["direction"] = direction + if limit is not None: + params["limit"] = limit + + # Request additional data + if full: + params["full"] = True + if fetch_config: + params["config"] = True + if cardData: + params["cardData"] = True + if expand: + params["expand"] = expand + + # `items` is a generator + items = paginate(path, params=params, headers=headers) + if limit is not None: + items = islice(items, limit) # Do not iterate over all pages + for item in items: + if "siblings" not in item: + item["siblings"] = None + model_info = ModelInfo(**item) + if emissions_thresholds is None or _is_emission_within_threshold(model_info, *emissions_thresholds): + yield model_info + + @validate_hf_hub_args + def list_datasets( + self, + *, + # Search-query parameter + filter: Union[str, Iterable[str], None] = None, + author: Optional[str] = None, + benchmark: Optional[Union[str, List[str]]] = None, + dataset_name: Optional[str] = None, + gated: Optional[bool] = None, + language_creators: Optional[Union[str, List[str]]] = None, + language: Optional[Union[str, List[str]]] = None, + multilinguality: Optional[Union[str, List[str]]] = None, + size_categories: Optional[Union[str, List[str]]] = None, + tags: Optional[Union[str, List[str]]] = None, + task_categories: Optional[Union[str, List[str]]] = None, + task_ids: Optional[Union[str, List[str]]] = None, + search: Optional[str] = None, + # Sorting and pagination parameters + sort: Optional[Union[Literal["last_modified"], str]] = None, + direction: Optional[Literal[-1]] = None, + limit: Optional[int] = None, + # Additional data to fetch + expand: Optional[List[ExpandDatasetProperty_T]] = None, + full: Optional[bool] = None, + token: Union[bool, str, None] = None, + ) -> Iterable[DatasetInfo]: + """ + List datasets hosted on the Huggingface Hub, given some filters. + + Args: + filter (`str` or `Iterable[str]`, *optional*): + A string or list of string to filter datasets on the hub. + author (`str`, *optional*): + A string which identify the author of the returned datasets. + benchmark (`str` or `List`, *optional*): + A string or list of strings that can be used to identify datasets on + the Hub by their official benchmark. + dataset_name (`str`, *optional*): + A string or list of strings that can be used to identify datasets on + the Hub by its name, such as `SQAC` or `wikineural` + gated (`bool`, *optional*): + A boolean to filter datasets on the Hub that are gated or not. By default, all datasets are returned. + If `gated=True` is passed, only gated datasets are returned. + If `gated=False` is passed, only non-gated datasets are returned. + language_creators (`str` or `List`, *optional*): + A string or list of strings that can be used to identify datasets on + the Hub with how the data was curated, such as `crowdsourced` or + `machine_generated`. + language (`str` or `List`, *optional*): + A string or list of strings representing a two-character language to + filter datasets by on the Hub. + multilinguality (`str` or `List`, *optional*): + A string or list of strings representing a filter for datasets that + contain multiple languages. + size_categories (`str` or `List`, *optional*): + A string or list of strings that can be used to identify datasets on + the Hub by the size of the dataset such as `100K>> from huggingface_hub import HfApi + + >>> api = HfApi() + + # List all datasets + >>> api.list_datasets() + + + # List only the text classification datasets + >>> api.list_datasets(filter="task_categories:text-classification") + + + # List only the datasets in russian for language modeling + >>> api.list_datasets( + ... filter=("language:ru", "task_ids:language-modeling") + ... ) + + # List FiftyOne datasets (identified by the tag "fiftyone" in dataset card) + >>> api.list_datasets(tags="fiftyone") + ``` + + Example usage with the `search` argument: + + ```python + >>> from huggingface_hub import HfApi + + >>> api = HfApi() + + # List all datasets with "text" in their name + >>> api.list_datasets(search="text") + + # List all datasets with "text" in their name made by google + >>> api.list_datasets(search="text", author="google") + ``` + """ + if expand and full: + raise ValueError("`expand` cannot be used if `full` is passed.") + + path = f"{self.endpoint}/api/datasets" + headers = self._build_hf_headers(token=token) + params: Dict[str, Any] = {} + + # Build `filter` list + filter_list = [] + if filter is not None: + if isinstance(filter, str): + filter_list.append(filter) + else: + filter_list.extend(filter) + for key, value in ( + ("benchmark", benchmark), + ("language_creators", language_creators), + ("language", language), + ("multilinguality", multilinguality), + ("size_categories", size_categories), + ("task_categories", task_categories), + ("task_ids", task_ids), + ): + if value: + if isinstance(value, str): + value = [value] + for value_item in value: + if not value_item.startswith(f"{key}:"): + data = f"{key}:{value_item}" + filter_list.append(data) + if tags is not None: + filter_list.extend([tags] if isinstance(tags, str) else tags) + if len(filter_list) > 0: + params["filter"] = filter_list + + # Handle other query params + if author: + params["author"] = author + if gated is not None: + params["gated"] = gated + search_list = [] + if dataset_name: + search_list.append(dataset_name) + if search: + search_list.append(search) + if len(search_list) > 0: + params["search"] = search_list + if sort is not None: + params["sort"] = ( + "lastModified" + if sort == "last_modified" + else "trendingScore" + if sort == "trending_score" + else "createdAt" + if sort == "created_at" + else sort + ) + if direction is not None: + params["direction"] = direction + if limit is not None: + params["limit"] = limit + + # Request additional data + if expand: + params["expand"] = expand + if full: + params["full"] = True + + items = paginate(path, params=params, headers=headers) + if limit is not None: + items = islice(items, limit) # Do not iterate over all pages + for item in items: + if "siblings" not in item: + item["siblings"] = None + yield DatasetInfo(**item) + + @validate_hf_hub_args + def list_spaces( + self, + *, + # Search-query parameter + filter: Union[str, Iterable[str], None] = None, + author: Optional[str] = None, + search: Optional[str] = None, + datasets: Union[str, Iterable[str], None] = None, + models: Union[str, Iterable[str], None] = None, + linked: bool = False, + # Sorting and pagination parameters + sort: Union[Literal["last_modified"], str, None] = None, + direction: Optional[Literal[-1]] = None, + limit: Optional[int] = None, + # Additional data to fetch + expand: Optional[List[ExpandSpaceProperty_T]] = None, + full: Optional[bool] = None, + token: Union[bool, str, None] = None, + ) -> Iterable[SpaceInfo]: + """ + List spaces hosted on the Huggingface Hub, given some filters. + + Args: + filter (`str` or `Iterable`, *optional*): + A string tag or list of tags that can be used to identify Spaces on the Hub. + author (`str`, *optional*): + A string which identify the author of the returned Spaces. + search (`str`, *optional*): + A string that will be contained in the returned Spaces. + datasets (`str` or `Iterable`, *optional*): + Whether to return Spaces that make use of a dataset. + The name of a specific dataset can be passed as a string. + models (`str` or `Iterable`, *optional*): + Whether to return Spaces that make use of a model. + The name of a specific model can be passed as a string. + linked (`bool`, *optional*): + Whether to return Spaces that make use of either a model or a dataset. + sort (`Literal["last_modified"]` or `str`, *optional*): + The key with which to sort the resulting models. Possible values are "last_modified", "trending_score", + "created_at" and "likes". + direction (`Literal[-1]` or `int`, *optional*): + Direction in which to sort. The value `-1` sorts by descending + order while all other values sort by ascending order. + limit (`int`, *optional*): + The limit on the number of Spaces fetched. Leaving this option + to `None` fetches all Spaces. + expand (`List[ExpandSpaceProperty_T]`, *optional*): + List properties to return in the response. When used, only the properties in the list will be returned. + This parameter cannot be used if `full` is passed. + Possible values are `"author"`, `"cardData"`, `"datasets"`, `"disabled"`, `"lastModified"`, `"createdAt"`, `"likes"`, `"models"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"` and `"trendingScore"`. + full (`bool`, *optional*): + Whether to fetch all Spaces data, including the `last_modified`, `siblings` + and `card_data` fields. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Iterable[SpaceInfo]`: an iterable of [`huggingface_hub.hf_api.SpaceInfo`] objects. + """ + if expand and full: + raise ValueError("`expand` cannot be used if `full` is passed.") + + path = f"{self.endpoint}/api/spaces" + headers = self._build_hf_headers(token=token) + params: Dict[str, Any] = {} + if filter is not None: + params["filter"] = filter + if author is not None: + params["author"] = author + if search is not None: + params["search"] = search + if sort is not None: + params["sort"] = ( + "lastModified" + if sort == "last_modified" + else "trendingScore" + if sort == "trending_score" + else "createdAt" + if sort == "created_at" + else sort + ) + if direction is not None: + params["direction"] = direction + if limit is not None: + params["limit"] = limit + if linked: + params["linked"] = True + if datasets is not None: + params["datasets"] = datasets + if models is not None: + params["models"] = models + + # Request additional data + if expand: + params["expand"] = expand + if full: + params["full"] = True + + items = paginate(path, params=params, headers=headers) + if limit is not None: + items = islice(items, limit) # Do not iterate over all pages + for item in items: + if "siblings" not in item: + item["siblings"] = None + yield SpaceInfo(**item) + + @validate_hf_hub_args + def like( + self, + repo_id: str, + *, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + ) -> None: + """ + Like a given repo on the Hub (e.g. set as favorite). + + See also [`unlike`] and [`list_liked_repos`]. + + Args: + repo_id (`str`): + The repository to like. Example: `"user/my-cool-model"`. + + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if liking a dataset or space, `None` or + `"model"` if liking a model. Default is `None`. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private + but not authenticated or repo does not exist. + + Example: + ```python + >>> from huggingface_hub import like, list_liked_repos, unlike + >>> like("gpt2") + >>> "gpt2" in list_liked_repos().models + True + >>> unlike("gpt2") + >>> "gpt2" in list_liked_repos().models + False + ``` + """ + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + response = get_session().post( + url=f"{self.endpoint}/api/{repo_type}s/{repo_id}/like", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + + @validate_hf_hub_args + def unlike( + self, + repo_id: str, + *, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + ) -> None: + """ + Unlike a given repo on the Hub (e.g. remove from favorite list). + + See also [`like`] and [`list_liked_repos`]. + + Args: + repo_id (`str`): + The repository to unlike. Example: `"user/my-cool-model"`. + + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if unliking a dataset or space, `None` or + `"model"` if unliking a model. Default is `None`. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private + but not authenticated or repo does not exist. + + Example: + ```python + >>> from huggingface_hub import like, list_liked_repos, unlike + >>> like("gpt2") + >>> "gpt2" in list_liked_repos().models + True + >>> unlike("gpt2") + >>> "gpt2" in list_liked_repos().models + False + ``` + """ + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + response = get_session().delete( + url=f"{self.endpoint}/api/{repo_type}s/{repo_id}/like", headers=self._build_hf_headers(token=token) + ) + hf_raise_for_status(response) + + @validate_hf_hub_args + def list_liked_repos( + self, + user: Optional[str] = None, + *, + token: Union[bool, str, None] = None, + ) -> UserLikes: + """ + List all public repos liked by a user on huggingface.co. + + This list is public so token is optional. If `user` is not passed, it defaults to + the logged in user. + + See also [`like`] and [`unlike`]. + + Args: + user (`str`, *optional*): + Name of the user for which you want to fetch the likes. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`UserLikes`]: object containing the user name and 3 lists of repo ids (1 for + models, 1 for datasets and 1 for Spaces). + + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If `user` is not passed and no token found (either from argument or from machine). + + Example: + ```python + >>> from huggingface_hub import list_liked_repos + + >>> likes = list_liked_repos("julien-c") + + >>> likes.user + "julien-c" + + >>> likes.models + ["osanseviero/streamlit_1.15", "Xhaheen/ChatGPT_HF", ...] + ``` + """ + # User is either provided explicitly or retrieved from current token. + if user is None: + me = self.whoami(token=token) + if me["type"] == "user": + user = me["name"] + else: + raise ValueError( + "Cannot list liked repos. You must provide a 'user' as input or be logged in as a user." + ) + + path = f"{self.endpoint}/api/users/{user}/likes" + headers = self._build_hf_headers(token=token) + + likes = list(paginate(path, params={}, headers=headers)) + # Looping over a list of items similar to: + # { + # 'createdAt': '2021-09-09T21:53:27.000Z', + # 'repo': { + # 'name': 'PaddlePaddle/PaddleOCR', + # 'type': 'space' + # } + # } + # Let's loop 3 times over the received list. Less efficient but more straightforward to read. + return UserLikes( + user=user, + total=len(likes), + models=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "model"], + datasets=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "dataset"], + spaces=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "space"], + ) + + @validate_hf_hub_args + def list_repo_likers( + self, + repo_id: str, + *, + repo_type: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> Iterable[User]: + """ + List all users who liked a given repo on the hugging Face Hub. + + See also [`like`] and [`list_liked_repos`]. + + Args: + repo_id (`str`): + The repository to retrieve . Example: `"user/my-cool-model"`. + + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + + Returns: + `Iterable[User]`: an iterable of [`huggingface_hub.hf_api.User`] objects. + """ + + # Construct the API endpoint + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/likers" + for liker in paginate(path, params={}, headers=self._build_hf_headers(token=token)): + yield User(username=liker["user"], fullname=liker["fullname"], avatar_url=liker["avatarUrl"]) + + @validate_hf_hub_args + def model_info( + self, + repo_id: str, + *, + revision: Optional[str] = None, + timeout: Optional[float] = None, + securityStatus: Optional[bool] = None, + files_metadata: bool = False, + expand: Optional[List[ExpandModelProperty_T]] = None, + token: Union[bool, str, None] = None, + ) -> ModelInfo: + """ + Get info on one specific model on huggingface.co + + Model can be private if you pass an acceptable token or are logged in. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + revision (`str`, *optional*): + The revision of the model repository from which to get the + information. + timeout (`float`, *optional*): + Whether to set a timeout for the request to the Hub. + securityStatus (`bool`, *optional*): + Whether to retrieve the security status from the model + repository as well. The security status will be returned in the `security_repo_status` field. + files_metadata (`bool`, *optional*): + Whether or not to retrieve metadata for files in the repository + (size, LFS metadata, etc). Defaults to `False`. + expand (`List[ExpandModelProperty_T]`, *optional*): + List properties to return in the response. When used, only the properties in the list will be returned. + This parameter cannot be used if `securityStatus` or `files_metadata` are passed. + Possible values are `"author"`, `"baseModels"`, `"cardData"`, `"childrenModelCount"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"gguf"`, `"inference"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"`, `"trendingScore"` and `"widgetData"`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`huggingface_hub.hf_api.ModelInfo`]: The model repository information. + + + + Raises the following errors: + + - [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + - [`~utils.RevisionNotFoundError`] + If the revision to download from cannot be found. + + + """ + if expand and (securityStatus or files_metadata): + raise ValueError("`expand` cannot be used if `securityStatus` or `files_metadata` are set.") + + headers = self._build_hf_headers(token=token) + path = ( + f"{self.endpoint}/api/models/{repo_id}" + if revision is None + else (f"{self.endpoint}/api/models/{repo_id}/revision/{quote(revision, safe='')}") + ) + params: Dict = {} + if securityStatus: + params["securityStatus"] = True + if files_metadata: + params["blobs"] = True + if expand: + params["expand"] = expand + r = get_session().get(path, headers=headers, timeout=timeout, params=params) + hf_raise_for_status(r) + data = r.json() + return ModelInfo(**data) + + @validate_hf_hub_args + def dataset_info( + self, + repo_id: str, + *, + revision: Optional[str] = None, + timeout: Optional[float] = None, + files_metadata: bool = False, + expand: Optional[List[ExpandDatasetProperty_T]] = None, + token: Union[bool, str, None] = None, + ) -> DatasetInfo: + """ + Get info on one specific dataset on huggingface.co. + + Dataset can be private if you pass an acceptable token. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + revision (`str`, *optional*): + The revision of the dataset repository from which to get the + information. + timeout (`float`, *optional*): + Whether to set a timeout for the request to the Hub. + files_metadata (`bool`, *optional*): + Whether or not to retrieve metadata for files in the repository + (size, LFS metadata, etc). Defaults to `False`. + expand (`List[ExpandDatasetProperty_T]`, *optional*): + List properties to return in the response. When used, only the properties in the list will be returned. + This parameter cannot be used if `files_metadata` is passed. + Possible values are `"author"`, `"cardData"`, `"citation"`, `"createdAt"`, `"disabled"`, `"description"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"lastModified"`, `"likes"`, `"paperswithcode_id"`, `"private"`, `"siblings"`, `"sha"`, `"tags"` and `"trendingScore"`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`hf_api.DatasetInfo`]: The dataset repository information. + + + + Raises the following errors: + + - [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + - [`~utils.RevisionNotFoundError`] + If the revision to download from cannot be found. + + + """ + if expand and files_metadata: + raise ValueError("`expand` cannot be used if `files_metadata` is set.") + + headers = self._build_hf_headers(token=token) + path = ( + f"{self.endpoint}/api/datasets/{repo_id}" + if revision is None + else (f"{self.endpoint}/api/datasets/{repo_id}/revision/{quote(revision, safe='')}") + ) + params: Dict = {} + if files_metadata: + params["blobs"] = True + if expand: + params["expand"] = expand + + r = get_session().get(path, headers=headers, timeout=timeout, params=params) + hf_raise_for_status(r) + data = r.json() + return DatasetInfo(**data) + + @validate_hf_hub_args + def space_info( + self, + repo_id: str, + *, + revision: Optional[str] = None, + timeout: Optional[float] = None, + files_metadata: bool = False, + expand: Optional[List[ExpandModelProperty_T]] = None, + token: Union[bool, str, None] = None, + ) -> SpaceInfo: + """ + Get info on one specific Space on huggingface.co. + + Space can be private if you pass an acceptable token. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + revision (`str`, *optional*): + The revision of the space repository from which to get the + information. + timeout (`float`, *optional*): + Whether to set a timeout for the request to the Hub. + files_metadata (`bool`, *optional*): + Whether or not to retrieve metadata for files in the repository + (size, LFS metadata, etc). Defaults to `False`. + expand (`List[ExpandSpaceProperty_T]`, *optional*): + List properties to return in the response. When used, only the properties in the list will be returned. + This parameter cannot be used if `full` is passed. + Possible values are `"author"`, `"cardData"`, `"createdAt"`, `"datasets"`, `"disabled"`, `"lastModified"`, `"likes"`, `"models"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"` and `"trendingScore"`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`~hf_api.SpaceInfo`]: The space repository information. + + + + Raises the following errors: + + - [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + - [`~utils.RevisionNotFoundError`] + If the revision to download from cannot be found. + + + """ + if expand and files_metadata: + raise ValueError("`expand` cannot be used if `files_metadata` is set.") + + headers = self._build_hf_headers(token=token) + path = ( + f"{self.endpoint}/api/spaces/{repo_id}" + if revision is None + else (f"{self.endpoint}/api/spaces/{repo_id}/revision/{quote(revision, safe='')}") + ) + params: Dict = {} + if files_metadata: + params["blobs"] = True + if expand: + params["expand"] = expand + + r = get_session().get(path, headers=headers, timeout=timeout, params=params) + hf_raise_for_status(r) + data = r.json() + return SpaceInfo(**data) + + @validate_hf_hub_args + def repo_info( + self, + repo_id: str, + *, + revision: Optional[str] = None, + repo_type: Optional[str] = None, + timeout: Optional[float] = None, + files_metadata: bool = False, + expand: Optional[Union[ExpandModelProperty_T, ExpandDatasetProperty_T, ExpandSpaceProperty_T]] = None, + token: Union[bool, str, None] = None, + ) -> Union[ModelInfo, DatasetInfo, SpaceInfo]: + """ + Get the info object for a given repo of a given type. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + revision (`str`, *optional*): + The revision of the repository from which to get the + information. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space, + `None` or `"model"` if getting repository info from a model. Default is `None`. + timeout (`float`, *optional*): + Whether to set a timeout for the request to the Hub. + expand (`ExpandModelProperty_T` or `ExpandDatasetProperty_T` or `ExpandSpaceProperty_T`, *optional*): + List properties to return in the response. When used, only the properties in the list will be returned. + This parameter cannot be used if `files_metadata` is passed. + For an exhaustive list of available properties, check out [`model_info`], [`dataset_info`] or [`space_info`]. + files_metadata (`bool`, *optional*): + Whether or not to retrieve metadata for files in the repository + (size, LFS metadata, etc). Defaults to `False`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Union[SpaceInfo, DatasetInfo, ModelInfo]`: The repository information, as a + [`huggingface_hub.hf_api.DatasetInfo`], [`huggingface_hub.hf_api.ModelInfo`] + or [`huggingface_hub.hf_api.SpaceInfo`] object. + + + + Raises the following errors: + + - [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + - [`~utils.RevisionNotFoundError`] + If the revision to download from cannot be found. + + + """ + if repo_type is None or repo_type == "model": + method = self.model_info + elif repo_type == "dataset": + method = self.dataset_info # type: ignore + elif repo_type == "space": + method = self.space_info # type: ignore + else: + raise ValueError("Unsupported repo type.") + return method( + repo_id, + revision=revision, + token=token, + timeout=timeout, + expand=expand, # type: ignore[arg-type] + files_metadata=files_metadata, + ) + + @validate_hf_hub_args + def repo_exists( + self, + repo_id: str, + *, + repo_type: Optional[str] = None, + token: Union[str, bool, None] = None, + ) -> bool: + """ + Checks if a repository exists on the Hugging Face Hub. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space, + `None` or `"model"` if getting repository info from a model. Default is `None`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + True if the repository exists, False otherwise. + + Examples: + ```py + >>> from huggingface_hub import repo_exists + >>> repo_exists("google/gemma-7b") + True + >>> repo_exists("google/not-a-repo") + False + ``` + """ + try: + self.repo_info(repo_id=repo_id, repo_type=repo_type, token=token) + return True + except GatedRepoError: + return True # we don't have access but it exists + except RepositoryNotFoundError: + return False + + @validate_hf_hub_args + def revision_exists( + self, + repo_id: str, + revision: str, + *, + repo_type: Optional[str] = None, + token: Union[str, bool, None] = None, + ) -> bool: + """ + Checks if a specific revision exists on a repo on the Hugging Face Hub. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + revision (`str`): + The revision of the repository to check. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space, + `None` or `"model"` if getting repository info from a model. Default is `None`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + True if the repository and the revision exists, False otherwise. + + Examples: + ```py + >>> from huggingface_hub import revision_exists + >>> revision_exists("google/gemma-7b", "float16") + True + >>> revision_exists("google/gemma-7b", "not-a-revision") + False + ``` + """ + try: + self.repo_info(repo_id=repo_id, revision=revision, repo_type=repo_type, token=token) + return True + except RevisionNotFoundError: + return False + except RepositoryNotFoundError: + return False + + @validate_hf_hub_args + def file_exists( + self, + repo_id: str, + filename: str, + *, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + token: Union[str, bool, None] = None, + ) -> bool: + """ + Checks if a file exists in a repository on the Hugging Face Hub. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + filename (`str`): + The name of the file to check, for example: + `"config.json"` + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space, + `None` or `"model"` if getting repository info from a model. Default is `None`. + revision (`str`, *optional*): + The revision of the repository from which to get the information. Defaults to `"main"` branch. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + True if the file exists, False otherwise. + + Examples: + ```py + >>> from huggingface_hub import file_exists + >>> file_exists("bigcode/starcoder", "config.json") + True + >>> file_exists("bigcode/starcoder", "not-a-file") + False + >>> file_exists("bigcode/not-a-repo", "config.json") + False + ``` + """ + url = hf_hub_url( + repo_id=repo_id, repo_type=repo_type, revision=revision, filename=filename, endpoint=self.endpoint + ) + try: + if token is None: + token = self.token + get_hf_file_metadata(url, token=token) + return True + except GatedRepoError: # raise specifically on gated repo + raise + except (RepositoryNotFoundError, EntryNotFoundError, RevisionNotFoundError): + return False + + @validate_hf_hub_args + def list_repo_files( + self, + repo_id: str, + *, + revision: Optional[str] = None, + repo_type: Optional[str] = None, + token: Union[str, bool, None] = None, + ) -> List[str]: + """ + Get the list of files in a given repo. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated by a `/`. + revision (`str`, *optional*): + The revision of the model repository from which to get the information. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or space, `None` or `"model"` if uploading to + a model. Default is `None`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `List[str]`: the list of files in a given repository. + """ + return [ + f.rfilename + for f in self.list_repo_tree( + repo_id=repo_id, recursive=True, revision=revision, repo_type=repo_type, token=token + ) + if isinstance(f, RepoFile) + ] + + @validate_hf_hub_args + def list_repo_tree( + self, + repo_id: str, + path_in_repo: Optional[str] = None, + *, + recursive: bool = False, + expand: bool = False, + revision: Optional[str] = None, + repo_type: Optional[str] = None, + token: Union[str, bool, None] = None, + ) -> Iterable[Union[RepoFile, RepoFolder]]: + """ + List a repo tree's files and folders and get information about them. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated by a `/`. + path_in_repo (`str`, *optional*): + Relative path of the tree (folder) in the repo, for example: + `"checkpoints/1fec34a/results"`. Will default to the root tree (folder) of the repository. + recursive (`bool`, *optional*, defaults to `False`): + Whether to list tree's files and folders recursively. + expand (`bool`, *optional*, defaults to `False`): + Whether to fetch more information about the tree's files and folders (e.g. last commit and files' security scan results). This + operation is more expensive for the server so only 50 results are returned per page (instead of 1000). + As pagination is implemented in `huggingface_hub`, this is transparent for you except for the time it + takes to get the results. + revision (`str`, *optional*): + The revision of the repository from which to get the tree. Defaults to `"main"` branch. + repo_type (`str`, *optional*): + The type of the repository from which to get the tree (`"model"`, `"dataset"` or `"space"`. + Defaults to `"model"`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Iterable[Union[RepoFile, RepoFolder]]`: + The information about the tree's files and folders, as an iterable of [`RepoFile`] and [`RepoFolder`] objects. The order of the files and folders is + not guaranteed. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo + does not exist. + [`~utils.RevisionNotFoundError`]: + If revision is not found (error 404) on the repo. + [`~utils.EntryNotFoundError`]: + If the tree (folder) does not exist (error 404) on the repo. + + Examples: + + Get information about a repo's tree. + ```py + >>> from huggingface_hub import list_repo_tree + >>> repo_tree = list_repo_tree("lysandre/arxiv-nlp") + >>> repo_tree + + >>> list(repo_tree) + [ + RepoFile(path='.gitattributes', size=391, blob_id='ae8c63daedbd4206d7d40126955d4e6ab1c80f8f', lfs=None, last_commit=None, security=None), + RepoFile(path='README.md', size=391, blob_id='43bd404b159de6fba7c2f4d3264347668d43af25', lfs=None, last_commit=None, security=None), + RepoFile(path='config.json', size=554, blob_id='2f9618c3a19b9a61add74f70bfb121335aeef666', lfs=None, last_commit=None, security=None), + RepoFile( + path='flax_model.msgpack', size=497764107, blob_id='8095a62ccb4d806da7666fcda07467e2d150218e', + lfs={'size': 497764107, 'sha256': 'd88b0d6a6ff9c3f8151f9d3228f57092aaea997f09af009eefd7373a77b5abb9', 'pointer_size': 134}, last_commit=None, security=None + ), + RepoFile(path='merges.txt', size=456318, blob_id='226b0752cac7789c48f0cb3ec53eda48b7be36cc', lfs=None, last_commit=None, security=None), + RepoFile( + path='pytorch_model.bin', size=548123560, blob_id='64eaa9c526867e404b68f2c5d66fd78e27026523', + lfs={'size': 548123560, 'sha256': '9be78edb5b928eba33aa88f431551348f7466ba9f5ef3daf1d552398722a5436', 'pointer_size': 134}, last_commit=None, security=None + ), + RepoFile(path='vocab.json', size=898669, blob_id='b00361fece0387ca34b4b8b8539ed830d644dbeb', lfs=None, last_commit=None, security=None)] + ] + ``` + + Get even more information about a repo's tree (last commit and files' security scan results) + ```py + >>> from huggingface_hub import list_repo_tree + >>> repo_tree = list_repo_tree("prompthero/openjourney-v4", expand=True) + >>> list(repo_tree) + [ + RepoFolder( + path='feature_extractor', + tree_id='aa536c4ea18073388b5b0bc791057a7296a00398', + last_commit={ + 'oid': '47b62b20b20e06b9de610e840282b7e6c3d51190', + 'title': 'Upload diffusers weights (#48)', + 'date': datetime.datetime(2023, 3, 21, 9, 5, 27, tzinfo=datetime.timezone.utc) + } + ), + RepoFolder( + path='safety_checker', + tree_id='65aef9d787e5557373fdf714d6c34d4fcdd70440', + last_commit={ + 'oid': '47b62b20b20e06b9de610e840282b7e6c3d51190', + 'title': 'Upload diffusers weights (#48)', + 'date': datetime.datetime(2023, 3, 21, 9, 5, 27, tzinfo=datetime.timezone.utc) + } + ), + RepoFile( + path='model_index.json', + size=582, + blob_id='d3d7c1e8c3e78eeb1640b8e2041ee256e24c9ee1', + lfs=None, + last_commit={ + 'oid': 'b195ed2d503f3eb29637050a886d77bd81d35f0e', + 'title': 'Fix deprecation warning by changing `CLIPFeatureExtractor` to `CLIPImageProcessor`. (#54)', + 'date': datetime.datetime(2023, 5, 15, 21, 41, 59, tzinfo=datetime.timezone.utc) + }, + security={ + 'safe': True, + 'av_scan': {'virusFound': False, 'virusNames': None}, + 'pickle_import_scan': None + } + ) + ... + ] + ``` + """ + repo_type = repo_type or constants.REPO_TYPE_MODEL + revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION + headers = self._build_hf_headers(token=token) + + encoded_path_in_repo = "/" + quote(path_in_repo, safe="") if path_in_repo else "" + tree_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/tree/{revision}{encoded_path_in_repo}" + for path_info in paginate(path=tree_url, headers=headers, params={"recursive": recursive, "expand": expand}): + yield (RepoFile(**path_info) if path_info["type"] == "file" else RepoFolder(**path_info)) + + @validate_hf_hub_args + def list_repo_refs( + self, + repo_id: str, + *, + repo_type: Optional[str] = None, + include_pull_requests: bool = False, + token: Union[str, bool, None] = None, + ) -> GitRefs: + """ + Get the list of refs of a given repo (both tags and branches). + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if listing refs from a dataset or a Space, + `None` or `"model"` if listing from a model. Default is `None`. + include_pull_requests (`bool`, *optional*): + Whether to include refs from pull requests in the list. Defaults to `False`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Example: + ```py + >>> from huggingface_hub import HfApi + >>> api = HfApi() + >>> api.list_repo_refs("gpt2") + GitRefs(branches=[GitRefInfo(name='main', ref='refs/heads/main', target_commit='e7da7f221d5bf496a48136c0cd264e630fe9fcc8')], converts=[], tags=[]) + + >>> api.list_repo_refs("bigcode/the-stack", repo_type='dataset') + GitRefs( + branches=[ + GitRefInfo(name='main', ref='refs/heads/main', target_commit='18edc1591d9ce72aa82f56c4431b3c969b210ae3'), + GitRefInfo(name='v1.1.a1', ref='refs/heads/v1.1.a1', target_commit='f9826b862d1567f3822d3d25649b0d6d22ace714') + ], + converts=[], + tags=[ + GitRefInfo(name='v1.0', ref='refs/tags/v1.0', target_commit='c37a8cd1e382064d8aced5e05543c5f7753834da') + ] + ) + ``` + + Returns: + [`GitRefs`]: object containing all information about branches and tags for a + repo on the Hub. + """ + repo_type = repo_type or constants.REPO_TYPE_MODEL + response = get_session().get( + f"{self.endpoint}/api/{repo_type}s/{repo_id}/refs", + headers=self._build_hf_headers(token=token), + params={"include_prs": 1} if include_pull_requests else {}, + ) + hf_raise_for_status(response) + data = response.json() + + def _format_as_git_ref_info(item: Dict) -> GitRefInfo: + return GitRefInfo(name=item["name"], ref=item["ref"], target_commit=item["targetCommit"]) + + return GitRefs( + branches=[_format_as_git_ref_info(item) for item in data["branches"]], + converts=[_format_as_git_ref_info(item) for item in data["converts"]], + tags=[_format_as_git_ref_info(item) for item in data["tags"]], + pull_requests=[_format_as_git_ref_info(item) for item in data["pullRequests"]] + if include_pull_requests + else None, + ) + + @validate_hf_hub_args + def list_repo_commits( + self, + repo_id: str, + *, + repo_type: Optional[str] = None, + token: Union[bool, str, None] = None, + revision: Optional[str] = None, + formatted: bool = False, + ) -> List[GitCommitInfo]: + """ + Get the list of commits of a given revision for a repo on the Hub. + + Commits are sorted by date (last commit first). + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated by a `/`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if listing commits from a dataset or a Space, `None` or `"model"` if + listing from a model. Default is `None`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + revision (`str`, *optional*): + The git revision to commit from. Defaults to the head of the `"main"` branch. + formatted (`bool`): + Whether to return the HTML-formatted title and description of the commits. Defaults to False. + + Example: + ```py + >>> from huggingface_hub import HfApi + >>> api = HfApi() + + # Commits are sorted by date (last commit first) + >>> initial_commit = api.list_repo_commits("gpt2")[-1] + + # Initial commit is always a system commit containing the `.gitattributes` file. + >>> initial_commit + GitCommitInfo( + commit_id='9b865efde13a30c13e0a33e536cf3e4a5a9d71d8', + authors=['system'], + created_at=datetime.datetime(2019, 2, 18, 10, 36, 15, tzinfo=datetime.timezone.utc), + title='initial commit', + message='', + formatted_title=None, + formatted_message=None + ) + + # Create an empty branch by deriving from initial commit + >>> api.create_branch("gpt2", "new_empty_branch", revision=initial_commit.commit_id) + ``` + + Returns: + List[[`GitCommitInfo`]]: list of objects containing information about the commits for a repo on the Hub. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo + does not exist. + [`~utils.RevisionNotFoundError`]: + If revision is not found (error 404) on the repo. + """ + repo_type = repo_type or constants.REPO_TYPE_MODEL + revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION + + # Paginate over results and return the list of commits. + return [ + GitCommitInfo( + commit_id=item["id"], + authors=[author["user"] for author in item["authors"]], + created_at=parse_datetime(item["date"]), + title=item["title"], + message=item["message"], + formatted_title=item.get("formatted", {}).get("title"), + formatted_message=item.get("formatted", {}).get("message"), + ) + for item in paginate( + f"{self.endpoint}/api/{repo_type}s/{repo_id}/commits/{revision}", + headers=self._build_hf_headers(token=token), + params={"expand[]": "formatted"} if formatted else {}, + ) + ] + + @validate_hf_hub_args + def get_paths_info( + self, + repo_id: str, + paths: Union[List[str], str], + *, + expand: bool = False, + revision: Optional[str] = None, + repo_type: Optional[str] = None, + token: Union[str, bool, None] = None, + ) -> List[Union[RepoFile, RepoFolder]]: + """ + Get information about a repo's paths. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated by a `/`. + paths (`Union[List[str], str]`, *optional*): + The paths to get information about. If a path do not exist, it is ignored without raising + an exception. + expand (`bool`, *optional*, defaults to `False`): + Whether to fetch more information about the paths (e.g. last commit and files' security scan results). This + operation is more expensive for the server so only 50 results are returned per page (instead of 1000). + As pagination is implemented in `huggingface_hub`, this is transparent for you except for the time it + takes to get the results. + revision (`str`, *optional*): + The revision of the repository from which to get the information. Defaults to `"main"` branch. + repo_type (`str`, *optional*): + The type of the repository from which to get the information (`"model"`, `"dataset"` or `"space"`. + Defaults to `"model"`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `List[Union[RepoFile, RepoFolder]]`: + The information about the paths, as a list of [`RepoFile`] and [`RepoFolder`] objects. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo + does not exist. + [`~utils.RevisionNotFoundError`]: + If revision is not found (error 404) on the repo. + + Example: + ```py + >>> from huggingface_hub import get_paths_info + >>> paths_info = get_paths_info("allenai/c4", ["README.md", "en"], repo_type="dataset") + >>> paths_info + [ + RepoFile(path='README.md', size=2379, blob_id='f84cb4c97182890fc1dbdeaf1a6a468fd27b4fff', lfs=None, last_commit=None, security=None), + RepoFolder(path='en', tree_id='dc943c4c40f53d02b31ced1defa7e5f438d5862e', last_commit=None) + ] + ``` + """ + repo_type = repo_type or constants.REPO_TYPE_MODEL + revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION + headers = self._build_hf_headers(token=token) + + response = get_session().post( + f"{self.endpoint}/api/{repo_type}s/{repo_id}/paths-info/{revision}", + data={ + "paths": paths if isinstance(paths, list) else [paths], + "expand": expand, + }, + headers=headers, + ) + hf_raise_for_status(response) + paths_info = response.json() + return [ + RepoFile(**path_info) if path_info["type"] == "file" else RepoFolder(**path_info) + for path_info in paths_info + ] + + @validate_hf_hub_args + def super_squash_history( + self, + repo_id: str, + *, + branch: Optional[str] = None, + commit_message: Optional[str] = None, + repo_type: Optional[str] = None, + token: Union[str, bool, None] = None, + ) -> None: + """Squash commit history on a branch for a repo on the Hub. + + Squashing the repo history is useful when you know you'll make hundreds of commits and you don't want to + clutter the history. Squashing commits can only be performed from the head of a branch. + + + + Once squashed, the commit history cannot be retrieved. This is a non-revertible operation. + + + + + + Once the history of a branch has been squashed, it is not possible to merge it back into another branch since + their history will have diverged. + + + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated by a `/`. + branch (`str`, *optional*): + The branch to squash. Defaults to the head of the `"main"` branch. + commit_message (`str`, *optional*): + The commit message to use for the squashed commit. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if listing commits from a dataset or a Space, `None` or `"model"` if + listing from a model. Default is `None`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo + does not exist. + [`~utils.RevisionNotFoundError`]: + If the branch to squash cannot be found. + [`~utils.BadRequestError`]: + If invalid reference for a branch. You cannot squash history on tags. + + Example: + ```py + >>> from huggingface_hub import HfApi + >>> api = HfApi() + + # Create repo + >>> repo_id = api.create_repo("test-squash").repo_id + + # Make a lot of commits. + >>> api.upload_file(repo_id=repo_id, path_in_repo="file.txt", path_or_fileobj=b"content") + >>> api.upload_file(repo_id=repo_id, path_in_repo="lfs.bin", path_or_fileobj=b"content") + >>> api.upload_file(repo_id=repo_id, path_in_repo="file.txt", path_or_fileobj=b"another_content") + + # Squash history + >>> api.super_squash_history(repo_id=repo_id) + ``` + """ + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + if repo_type not in constants.REPO_TYPES: + raise ValueError("Invalid repo type") + if branch is None: + branch = constants.DEFAULT_REVISION + + # Prepare request + url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/super-squash/{branch}" + headers = self._build_hf_headers(token=token) + commit_message = commit_message or f"Super-squash branch '{branch}' using huggingface_hub" + + # Super-squash + response = get_session().post(url=url, headers=headers, json={"message": commit_message}) + hf_raise_for_status(response) + + @validate_hf_hub_args + def create_repo( + self, + repo_id: str, + *, + token: Union[str, bool, None] = None, + private: Optional[bool] = None, + repo_type: Optional[str] = None, + exist_ok: bool = False, + resource_group_id: Optional[str] = None, + space_sdk: Optional[str] = None, + space_hardware: Optional[SpaceHardware] = None, + space_storage: Optional[SpaceStorage] = None, + space_sleep_time: Optional[int] = None, + space_secrets: Optional[List[Dict[str, str]]] = None, + space_variables: Optional[List[Dict[str, str]]] = None, + ) -> RepoUrl: + """Create an empty repo on the HuggingFace Hub. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + private (`bool`, *optional*): + Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + exist_ok (`bool`, *optional*, defaults to `False`): + If `True`, do not raise an error if repo already exists. + resource_group_id (`str`, *optional*): + Resource group in which to create the repo. Resource groups is only available for organizations and + allow to define which members of the organization can access the resource. The ID of a resource group + can be found in the URL of the resource's page on the Hub (e.g. `"66670e5163145ca562cb1988"`). + To learn more about resource groups, see https://huggingface.co/docs/hub/en/security-resource-groups. + space_sdk (`str`, *optional*): + Choice of SDK to use if repo_type is "space". Can be "streamlit", "gradio", "docker", or "static". + space_hardware (`SpaceHardware` or `str`, *optional*): + Choice of Hardware if repo_type is "space". See [`SpaceHardware`] for a complete list. + space_storage (`SpaceStorage` or `str`, *optional*): + Choice of persistent storage tier. Example: `"small"`. See [`SpaceStorage`] for a complete list. + space_sleep_time (`int`, *optional*): + Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want + your Space to sleep (default behavior for upgraded hardware). For free hardware, you can't configure + the sleep time (value is fixed to 48 hours of inactivity). + See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details. + space_secrets (`List[Dict[str, str]]`, *optional*): + A list of secret keys to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional. + For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets. + space_variables (`List[Dict[str, str]]`, *optional*): + A list of public environment variables to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional. + For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables. + + Returns: + [`RepoUrl`]: URL to the newly created repo. Value is a subclass of `str` containing + attributes like `endpoint`, `repo_type` and `repo_id`. + """ + organization, name = repo_id.split("/") if "/" in repo_id else (None, repo_id) + + path = f"{self.endpoint}/api/repos/create" + + if repo_type not in constants.REPO_TYPES: + raise ValueError("Invalid repo type") + + json: Dict[str, Any] = {"name": name, "organization": organization} + if private is not None: + json["private"] = private + if repo_type is not None: + json["type"] = repo_type + if repo_type == "space": + if space_sdk is None: + raise ValueError( + "No space_sdk provided. `create_repo` expects space_sdk to be one" + f" of {constants.SPACES_SDK_TYPES} when repo_type is 'space'`" + ) + if space_sdk not in constants.SPACES_SDK_TYPES: + raise ValueError(f"Invalid space_sdk. Please choose one of {constants.SPACES_SDK_TYPES}.") + json["sdk"] = space_sdk + + if space_sdk is not None and repo_type != "space": + warnings.warn("Ignoring provided space_sdk because repo_type is not 'space'.") + + function_args = [ + "space_hardware", + "space_storage", + "space_sleep_time", + "space_secrets", + "space_variables", + ] + json_keys = ["hardware", "storageTier", "sleepTimeSeconds", "secrets", "variables"] + values = [space_hardware, space_storage, space_sleep_time, space_secrets, space_variables] + + if repo_type == "space": + json.update({k: v for k, v in zip(json_keys, values) if v is not None}) + else: + provided_space_args = [key for key, value in zip(function_args, values) if value is not None] + + if provided_space_args: + warnings.warn(f"Ignoring provided {', '.join(provided_space_args)} because repo_type is not 'space'.") + + if getattr(self, "_lfsmultipartthresh", None): + # Testing purposes only. + # See https://github.com/huggingface/huggingface_hub/pull/733/files#r820604472 + json["lfsmultipartthresh"] = self._lfsmultipartthresh # type: ignore + + if resource_group_id is not None: + json["resourceGroupId"] = resource_group_id + + headers = self._build_hf_headers(token=token) + while True: + r = get_session().post(path, headers=headers, json=json) + if r.status_code == 409 and "Cannot create repo: another conflicting operation is in progress" in r.text: + # Since https://github.com/huggingface/moon-landing/pull/7272 (private repo), it is not possible to + # concurrently create repos on the Hub for a same user. This is rarely an issue, except when running + # tests. To avoid any inconvenience, we retry to create the repo for this specific error. + # NOTE: This could have being fixed directly in the tests but adding it here should fixed CIs for all + # dependent libraries. + # NOTE: If a fix is implemented server-side, we should be able to remove this retry mechanism. + logger.debug("Create repo failed due to a concurrency issue. Retrying...") + continue + break + + try: + hf_raise_for_status(r) + except HTTPError as err: + if exist_ok and err.response.status_code == 409: + # Repo already exists and `exist_ok=True` + pass + elif exist_ok and err.response.status_code == 403: + # No write permission on the namespace but repo might already exist + try: + self.repo_info(repo_id=repo_id, repo_type=repo_type, token=token) + if repo_type is None or repo_type == constants.REPO_TYPE_MODEL: + return RepoUrl(f"{self.endpoint}/{repo_id}") + return RepoUrl(f"{self.endpoint}/{repo_type}/{repo_id}") + except HfHubHTTPError: + raise err + else: + raise + + d = r.json() + return RepoUrl(d["url"], endpoint=self.endpoint) + + @validate_hf_hub_args + def delete_repo( + self, + repo_id: str, + *, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + missing_ok: bool = False, + ) -> None: + """ + Delete a repo from the HuggingFace Hub. CAUTION: this is irreversible. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. + missing_ok (`bool`, *optional*, defaults to `False`): + If `True`, do not raise an error if repo does not exist. + + Raises: + [`~utils.RepositoryNotFoundError`] + If the repository to delete from cannot be found and `missing_ok` is set to False (default). + """ + organization, name = repo_id.split("/") if "/" in repo_id else (None, repo_id) + + path = f"{self.endpoint}/api/repos/delete" + + if repo_type not in constants.REPO_TYPES: + raise ValueError("Invalid repo type") + + json = {"name": name, "organization": organization} + if repo_type is not None: + json["type"] = repo_type + + headers = self._build_hf_headers(token=token) + r = get_session().delete(path, headers=headers, json=json) + try: + hf_raise_for_status(r) + except RepositoryNotFoundError: + if not missing_ok: + raise + + @_deprecate_method(version="0.29", message="Please use `update_repo_settings` instead.") + @validate_hf_hub_args + def update_repo_visibility( + self, + repo_id: str, + private: bool = False, + *, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + ) -> Dict[str, bool]: + """Update the visibility setting of a repository. + + Deprecated. Use `update_repo_settings` instead. + + Args: + repo_id (`str`, *optional*): + A namespace (user or an organization) and a repo name separated by a `/`. + private (`bool`, *optional*, defaults to `False`): + Whether the model repo should be private. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + + Returns: + The HTTP response in json. + + + + Raises the following errors: + + - [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + + + """ + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL # default repo type + + r = get_session().put( + url=f"{self.endpoint}/api/{repo_type}s/{repo_id}/settings", + headers=self._build_hf_headers(token=token), + json={"private": private}, + ) + hf_raise_for_status(r) + return r.json() + + @validate_hf_hub_args + def update_repo_settings( + self, + repo_id: str, + *, + gated: Optional[Literal["auto", "manual", False]] = None, + private: Optional[bool] = None, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + ) -> None: + """ + Update the settings of a repository, including gated access and visibility. + + To give more control over how repos are used, the Hub allows repo authors to enable + access requests for their repos, and also to set the visibility of the repo to private. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated by a /. + gated (`Literal["auto", "manual", False]`, *optional*): + The gated status for the repository. If set to `None` (default), the `gated` setting of the repository won't be updated. + * "auto": The repository is gated, and access requests are automatically approved or denied based on predefined criteria. + * "manual": The repository is gated, and access requests require manual approval. + * False : The repository is not gated, and anyone can access it. + private (`bool`, *optional*): + Whether the model repo should be private. + token (`Union[str, bool, None]`, *optional*): + A valid user access token (string). Defaults to the locally saved token, + which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass False. + repo_type (`str`, *optional*): + The type of the repository to update settings from (`"model"`, `"dataset"` or `"space"`). + Defaults to `"model"`. + + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If gated is not one of "auto", "manual", or False. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If repo_type is not one of the values in constants.REPO_TYPES. + [`~utils.HfHubHTTPError`]: + If the request to the Hugging Face Hub API fails. + [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + """ + + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL # default repo type + + # Check if both gated and private are None + if gated is None and private is None: + raise ValueError("At least one of 'gated' or 'private' must be provided.") + + # Build headers + headers = self._build_hf_headers(token=token) + + # Prepare the JSON payload for the PUT request + payload: Dict = {} + + if gated is not None: + if gated not in ["auto", "manual", False]: + raise ValueError(f"Invalid gated status, must be one of 'auto', 'manual', or False. Got '{gated}'.") + payload["gated"] = gated + + if private is not None: + payload["private"] = private + + r = get_session().put( + url=f"{self.endpoint}/api/{repo_type}s/{repo_id}/settings", + headers=headers, + json=payload, + ) + hf_raise_for_status(r) + + def move_repo( + self, + from_id: str, + to_id: str, + *, + repo_type: Optional[str] = None, + token: Union[str, bool, None] = None, + ): + """ + Moving a repository from namespace1/repo_name1 to namespace2/repo_name2 + + Note there are certain limitations. For more information about moving + repositories, please see + https://hf.co/docs/hub/repositories-settings#renaming-or-transferring-a-repo. + + Args: + from_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. Original repository identifier. + to_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. Final repository identifier. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + + + Raises the following errors: + + - [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + + + """ + if len(from_id.split("/")) != 2: + raise ValueError(f"Invalid repo_id: {from_id}. It should have a namespace (:namespace:/:repo_name:)") + + if len(to_id.split("/")) != 2: + raise ValueError(f"Invalid repo_id: {to_id}. It should have a namespace (:namespace:/:repo_name:)") + + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL # Hub won't accept `None`. + + json = {"fromRepo": from_id, "toRepo": to_id, "type": repo_type} + + path = f"{self.endpoint}/api/repos/move" + headers = self._build_hf_headers(token=token) + r = get_session().post(path, headers=headers, json=json) + try: + hf_raise_for_status(r) + except HfHubHTTPError as e: + e.append_to_message( + "\nFor additional documentation please see" + " https://hf.co/docs/hub/repositories-settings#renaming-or-transferring-a-repo." + ) + raise + + @overload + def create_commit( # type: ignore + self, + repo_id: str, + operations: Iterable[CommitOperation], + *, + commit_message: str, + commit_description: Optional[str] = None, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + num_threads: int = 5, + parent_commit: Optional[str] = None, + run_as_future: Literal[False] = ..., + ) -> CommitInfo: ... + + @overload + def create_commit( + self, + repo_id: str, + operations: Iterable[CommitOperation], + *, + commit_message: str, + commit_description: Optional[str] = None, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + num_threads: int = 5, + parent_commit: Optional[str] = None, + run_as_future: Literal[True] = ..., + ) -> Future[CommitInfo]: ... + + @validate_hf_hub_args + @future_compatible + def create_commit( + self, + repo_id: str, + operations: Iterable[CommitOperation], + *, + commit_message: str, + commit_description: Optional[str] = None, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + num_threads: int = 5, + parent_commit: Optional[str] = None, + run_as_future: bool = False, + ) -> Union[CommitInfo, Future[CommitInfo]]: + """ + Creates a commit in the given repo, deleting & uploading files as needed. + + + + The input list of `CommitOperation` will be mutated during the commit process. Do not reuse the same objects + for multiple commits. + + + + + + `create_commit` assumes that the repo already exists on the Hub. If you get a + Client error 404, please make sure you are authenticated and that `repo_id` and + `repo_type` are set correctly. If repo does not exist, create it first using + [`~hf_api.create_repo`]. + + + + + + `create_commit` is limited to 25k LFS files and a 1GB payload for regular files. + + + + Args: + repo_id (`str`): + The repository in which the commit will be created, for example: + `"username/custom_transformers"` + + operations (`Iterable` of [`~hf_api.CommitOperation`]): + An iterable of operations to include in the commit, either: + + - [`~hf_api.CommitOperationAdd`] to upload a file + - [`~hf_api.CommitOperationDelete`] to delete a file + - [`~hf_api.CommitOperationCopy`] to copy a file + + Operation objects will be mutated to include information relative to the upload. Do not reuse the + same objects for multiple commits. + + commit_message (`str`): + The summary (first line) of the commit that will be created. + + commit_description (`str`, *optional*): + The description of the commit that will be created + + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + + revision (`str`, *optional*): + The git revision to commit from. Defaults to the head of the `"main"` branch. + + create_pr (`boolean`, *optional*): + Whether or not to create a Pull Request with that commit. Defaults to `False`. + If `revision` is not set, PR is opened against the `"main"` branch. If + `revision` is set and is a branch, PR is opened against this branch. If + `revision` is set and is not a branch name (example: a commit oid), an + `RevisionNotFoundError` is returned by the server. + + num_threads (`int`, *optional*): + Number of concurrent threads for uploading files. Defaults to 5. + Setting it to 2 means at most 2 files will be uploaded concurrently. + + parent_commit (`str`, *optional*): + The OID / SHA of the parent commit, as a hexadecimal string. + Shorthands (7 first characters) are also supported. If specified and `create_pr` is `False`, + the commit will fail if `revision` does not point to `parent_commit`. If specified and `create_pr` + is `True`, the pull request will be created from `parent_commit`. Specifying `parent_commit` + ensures the repo has not changed before committing the changes, and can be especially useful + if the repo is updated / committed to concurrently. + run_as_future (`bool`, *optional*): + Whether or not to run this method in the background. Background jobs are run sequentially without + blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) + object. Defaults to `False`. + + Returns: + [`CommitInfo`] or `Future`: + Instance of [`CommitInfo`] containing information about the newly created commit (commit hash, commit + url, pr url, commit message,...). If `run_as_future=True` is passed, returns a Future object which will + contain the result when executed. + + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If commit message is empty. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If parent commit is not a valid commit OID. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If a README.md file with an invalid metadata section is committed. In this case, the commit will fail + early, before trying to upload any file. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If `create_pr` is `True` and revision is neither `None` nor `"main"`. + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private + but not authenticated or repo does not exist. + """ + if parent_commit is not None and not constants.REGEX_COMMIT_OID.fullmatch(parent_commit): + raise ValueError( + f"`parent_commit` is not a valid commit OID. It must match the following regex: {constants.REGEX_COMMIT_OID}" + ) + + if commit_message is None or len(commit_message) == 0: + raise ValueError("`commit_message` can't be empty, please pass a value.") + + commit_description = commit_description if commit_description is not None else "" + repo_type = repo_type if repo_type is not None else constants.REPO_TYPE_MODEL + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + unquoted_revision = revision or constants.DEFAULT_REVISION + revision = quote(unquoted_revision, safe="") + create_pr = create_pr if create_pr is not None else False + + headers = self._build_hf_headers(token=token) + + operations = list(operations) + additions = [op for op in operations if isinstance(op, CommitOperationAdd)] + copies = [op for op in operations if isinstance(op, CommitOperationCopy)] + nb_additions = len(additions) + nb_copies = len(copies) + nb_deletions = len(operations) - nb_additions - nb_copies + + for addition in additions: + if addition._is_committed: + raise ValueError( + f"CommitOperationAdd {addition} has already being committed and cannot be reused. Please create a" + " new CommitOperationAdd object if you want to create a new commit." + ) + + if repo_type != "dataset": + for addition in additions: + if addition.path_in_repo.endswith((".arrow", ".parquet")): + warnings.warn( + f"It seems that you are about to commit a data file ({addition.path_in_repo}) to a {repo_type}" + " repository. You are sure this is intended? If you are trying to upload a dataset, please" + " set `repo_type='dataset'` or `--repo-type=dataset` in a CLI." + ) + + logger.debug( + f"About to commit to the hub: {len(additions)} addition(s), {len(copies)} copie(s) and" + f" {nb_deletions} deletion(s)." + ) + + # If updating a README.md file, make sure the metadata format is valid + # It's better to fail early than to fail after all the files have been uploaded. + for addition in additions: + if addition.path_in_repo == "README.md": + with addition.as_file() as file: + content = file.read().decode() + self._validate_yaml(content, repo_type=repo_type, token=token) + # Skip other additions after `README.md` has been processed + break + + # If updating twice the same file or update then delete a file in a single commit + _warn_on_overwriting_operations(operations) + + self.preupload_lfs_files( + repo_id=repo_id, + additions=additions, + token=token, + repo_type=repo_type, + revision=unquoted_revision, # first-class methods take unquoted revision + create_pr=create_pr, + num_threads=num_threads, + free_memory=False, # do not remove `CommitOperationAdd.path_or_fileobj` on LFS files for "normal" users + ) + + # Remove no-op operations (files that have not changed) + operations_without_no_op = [] + for operation in operations: + if ( + isinstance(operation, CommitOperationAdd) + and operation._remote_oid is not None + and operation._remote_oid == operation._local_oid + ): + # File already exists on the Hub and has not changed: we can skip it. + logger.debug(f"Skipping upload for '{operation.path_in_repo}' as the file has not changed.") + continue + operations_without_no_op.append(operation) + if len(operations) != len(operations_without_no_op): + logger.info( + f"Removing {len(operations) - len(operations_without_no_op)} file(s) from commit that have not changed." + ) + + # Return early if empty commit + if len(operations_without_no_op) == 0: + logger.warning("No files have been modified since last commit. Skipping to prevent empty commit.") + + # Get latest commit info + try: + info = self.repo_info(repo_id=repo_id, repo_type=repo_type, revision=unquoted_revision, token=token) + except RepositoryNotFoundError as e: + e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE) + raise + + # Return commit info based on latest commit + url_prefix = self.endpoint + if repo_type is not None and repo_type != constants.REPO_TYPE_MODEL: + url_prefix = f"{url_prefix}/{repo_type}s" + return CommitInfo( + commit_url=f"{url_prefix}/{repo_id}/commit/{info.sha}", + commit_message=commit_message, + commit_description=commit_description, + oid=info.sha, # type: ignore[arg-type] + ) + + files_to_copy = _fetch_files_to_copy( + copies=copies, + repo_type=repo_type, + repo_id=repo_id, + headers=headers, + revision=unquoted_revision, + endpoint=self.endpoint, + ) + commit_payload = _prepare_commit_payload( + operations=operations, + files_to_copy=files_to_copy, + commit_message=commit_message, + commit_description=commit_description, + parent_commit=parent_commit, + ) + commit_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/commit/{revision}" + + def _payload_as_ndjson() -> Iterable[bytes]: + for item in commit_payload: + yield json.dumps(item).encode() + yield b"\n" + + headers = { + # See https://github.com/huggingface/huggingface_hub/issues/1085#issuecomment-1265208073 + "Content-Type": "application/x-ndjson", + **headers, + } + data = b"".join(_payload_as_ndjson()) + params = {"create_pr": "1"} if create_pr else None + + try: + commit_resp = get_session().post(url=commit_url, headers=headers, data=data, params=params) + hf_raise_for_status(commit_resp, endpoint_name="commit") + except RepositoryNotFoundError as e: + e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE) + raise + except EntryNotFoundError as e: + if nb_deletions > 0 and "A file with this name doesn't exist" in str(e): + e.append_to_message( + "\nMake sure to differentiate file and folder paths in delete" + " operations with a trailing '/' or using `is_folder=True/False`." + ) + raise + + # Mark additions as committed (cannot be reused in another commit) + for addition in additions: + addition._is_committed = True + + commit_data = commit_resp.json() + return CommitInfo( + commit_url=commit_data["commitUrl"], + commit_message=commit_message, + commit_description=commit_description, + oid=commit_data["commitOid"], + pr_url=commit_data["pullRequestUrl"] if create_pr else None, + ) + + def preupload_lfs_files( + self, + repo_id: str, + additions: Iterable[CommitOperationAdd], + *, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + num_threads: int = 5, + free_memory: bool = True, + gitignore_content: Optional[str] = None, + ): + """Pre-upload LFS files to S3 in preparation on a future commit. + + This method is useful if you are generating the files to upload on-the-fly and you don't want to store them + in memory before uploading them all at once. + + + + This is a power-user method. You shouldn't need to call it directly to make a normal commit. + Use [`create_commit`] directly instead. + + + + + + Commit operations will be mutated during the process. In particular, the attached `path_or_fileobj` will be + removed after the upload to save memory (and replaced by an empty `bytes` object). Do not reuse the same + objects except to pass them to [`create_commit`]. If you don't want to remove the attached content from the + commit operation object, pass `free_memory=False`. + + + + Args: + repo_id (`str`): + The repository in which you will commit the files, for example: `"username/custom_transformers"`. + + operations (`Iterable` of [`CommitOperationAdd`]): + The list of files to upload. Warning: the objects in this list will be mutated to include information + relative to the upload. Do not reuse the same objects for multiple commits. + + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + repo_type (`str`, *optional*): + The type of repository to upload to (e.g. `"model"` -default-, `"dataset"` or `"space"`). + + revision (`str`, *optional*): + The git revision to commit from. Defaults to the head of the `"main"` branch. + + create_pr (`boolean`, *optional*): + Whether or not you plan to create a Pull Request with that commit. Defaults to `False`. + + num_threads (`int`, *optional*): + Number of concurrent threads for uploading files. Defaults to 5. + Setting it to 2 means at most 2 files will be uploaded concurrently. + + gitignore_content (`str`, *optional*): + The content of the `.gitignore` file to know which files should be ignored. The order of priority + is to first check if `gitignore_content` is passed, then check if the `.gitignore` file is present + in the list of files to commit and finally default to the `.gitignore` file already hosted on the Hub + (if any). + + Example: + ```py + >>> from huggingface_hub import CommitOperationAdd, preupload_lfs_files, create_commit, create_repo + + >>> repo_id = create_repo("test_preupload").repo_id + + # Generate and preupload LFS files one by one + >>> operations = [] # List of all `CommitOperationAdd` objects that will be generated + >>> for i in range(5): + ... content = ... # generate binary content + ... addition = CommitOperationAdd(path_in_repo=f"shard_{i}_of_5.bin", path_or_fileobj=content) + ... preupload_lfs_files(repo_id, additions=[addition]) # upload + free memory + ... operations.append(addition) + + # Create commit + >>> create_commit(repo_id, operations=operations, commit_message="Commit all shards") + ``` + """ + repo_type = repo_type if repo_type is not None else constants.REPO_TYPE_MODEL + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION + create_pr = create_pr if create_pr is not None else False + headers = self._build_hf_headers(token=token) + + # Check if a `gitignore` file is being committed to the Hub. + additions = list(additions) + if gitignore_content is None: + for addition in additions: + if addition.path_in_repo == ".gitignore": + with addition.as_file() as f: + gitignore_content = f.read().decode() + break + + # Filter out already uploaded files + new_additions = [addition for addition in additions if not addition._is_uploaded] + + # Check which new files are LFS + try: + _fetch_upload_modes( + additions=new_additions, + repo_type=repo_type, + repo_id=repo_id, + headers=headers, + revision=revision, + endpoint=self.endpoint, + create_pr=create_pr or False, + gitignore_content=gitignore_content, + ) + except RepositoryNotFoundError as e: + e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE) + raise + + # Filter out regular files + new_lfs_additions = [addition for addition in new_additions if addition._upload_mode == "lfs"] + + # Filter out files listed in .gitignore + new_lfs_additions_to_upload = [] + for addition in new_lfs_additions: + if addition._should_ignore: + logger.debug(f"Skipping upload for LFS file '{addition.path_in_repo}' (ignored by gitignore file).") + else: + new_lfs_additions_to_upload.append(addition) + if len(new_lfs_additions) != len(new_lfs_additions_to_upload): + logger.info( + f"Skipped upload for {len(new_lfs_additions) - len(new_lfs_additions_to_upload)} LFS file(s) " + "(ignored by gitignore file)." + ) + + # Upload new LFS files + _upload_lfs_files( + additions=new_lfs_additions_to_upload, + repo_type=repo_type, + repo_id=repo_id, + headers=headers, + endpoint=self.endpoint, + num_threads=num_threads, + # If `create_pr`, we don't want to check user permission on the revision as users with read permission + # should still be able to create PRs even if they don't have write permission on the target branch of the + # PR (i.e. `revision`). + revision=revision if not create_pr else None, + ) + for addition in new_lfs_additions_to_upload: + addition._is_uploaded = True + if free_memory: + addition.path_or_fileobj = b"" + + @overload + def upload_file( # type: ignore + self, + *, + path_or_fileobj: Union[str, Path, bytes, BinaryIO], + path_in_repo: str, + repo_id: str, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + run_as_future: Literal[False] = ..., + ) -> CommitInfo: ... + + @overload + def upload_file( + self, + *, + path_or_fileobj: Union[str, Path, bytes, BinaryIO], + path_in_repo: str, + repo_id: str, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + run_as_future: Literal[True] = ..., + ) -> Future[CommitInfo]: ... + + @validate_hf_hub_args + @future_compatible + def upload_file( + self, + *, + path_or_fileobj: Union[str, Path, bytes, BinaryIO], + path_in_repo: str, + repo_id: str, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + run_as_future: bool = False, + ) -> Union[CommitInfo, Future[CommitInfo]]: + """ + Upload a local file (up to 50 GB) to the given repo. The upload is done + through a HTTP post request, and doesn't require git or git-lfs to be + installed. + + Args: + path_or_fileobj (`str`, `Path`, `bytes`, or `IO`): + Path to a file on the local machine or binary data stream / + fileobj / buffer. + path_in_repo (`str`): + Relative filepath in the repo, for example: + `"checkpoints/1fec34a/weights.bin"` + repo_id (`str`): + The repository to which the file will be uploaded, for example: + `"username/custom_transformers"` + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + revision (`str`, *optional*): + The git revision to commit from. Defaults to the head of the `"main"` branch. + commit_message (`str`, *optional*): + The summary / title / first line of the generated commit + commit_description (`str` *optional*) + The description of the generated commit + create_pr (`boolean`, *optional*): + Whether or not to create a Pull Request with that commit. Defaults to `False`. + If `revision` is not set, PR is opened against the `"main"` branch. If + `revision` is set and is a branch, PR is opened against this branch. If + `revision` is set and is not a branch name (example: a commit oid), an + `RevisionNotFoundError` is returned by the server. + parent_commit (`str`, *optional*): + The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. + If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. + If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. + Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be + especially useful if the repo is updated / committed to concurrently. + run_as_future (`bool`, *optional*): + Whether or not to run this method in the background. Background jobs are run sequentially without + blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) + object. Defaults to `False`. + + + Returns: + [`CommitInfo`] or `Future`: + Instance of [`CommitInfo`] containing information about the newly created commit (commit hash, commit + url, pr url, commit message,...). If `run_as_future=True` is passed, returns a Future object which will + contain the result when executed. + + + Raises the following errors: + + - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + if the HuggingFace API returned an error + - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if some parameter value is invalid + - [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + - [`~utils.RevisionNotFoundError`] + If the revision to download from cannot be found. + + + + + + `upload_file` assumes that the repo already exists on the Hub. If you get a + Client error 404, please make sure you are authenticated and that `repo_id` and + `repo_type` are set correctly. If repo does not exist, create it first using + [`~hf_api.create_repo`]. + + + + Example: + + ```python + >>> from huggingface_hub import upload_file + + >>> with open("./local/filepath", "rb") as fobj: + ... upload_file( + ... path_or_fileobj=fileobj, + ... path_in_repo="remote/file/path.h5", + ... repo_id="username/my-dataset", + ... repo_type="dataset", + ... token="my_token", + ... ) + "https://huggingface.co/datasets/username/my-dataset/blob/main/remote/file/path.h5" + + >>> upload_file( + ... path_or_fileobj=".\\\\local\\\\file\\\\path", + ... path_in_repo="remote/file/path.h5", + ... repo_id="username/my-model", + ... token="my_token", + ... ) + "https://huggingface.co/username/my-model/blob/main/remote/file/path.h5" + + >>> upload_file( + ... path_or_fileobj=".\\\\local\\\\file\\\\path", + ... path_in_repo="remote/file/path.h5", + ... repo_id="username/my-model", + ... token="my_token", + ... create_pr=True, + ... ) + "https://huggingface.co/username/my-model/blob/refs%2Fpr%2F1/remote/file/path.h5" + ``` + """ + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + + commit_message = ( + commit_message if commit_message is not None else f"Upload {path_in_repo} with huggingface_hub" + ) + operation = CommitOperationAdd( + path_or_fileobj=path_or_fileobj, + path_in_repo=path_in_repo, + ) + + commit_info = self.create_commit( + repo_id=repo_id, + repo_type=repo_type, + operations=[operation], + commit_message=commit_message, + commit_description=commit_description, + token=token, + revision=revision, + create_pr=create_pr, + parent_commit=parent_commit, + ) + + if commit_info.pr_url is not None: + revision = quote(_parse_revision_from_pr_url(commit_info.pr_url), safe="") + if repo_type in constants.REPO_TYPES_URL_PREFIXES: + repo_id = constants.REPO_TYPES_URL_PREFIXES[repo_type] + repo_id + revision = revision if revision is not None else constants.DEFAULT_REVISION + + return CommitInfo( + commit_url=commit_info.commit_url, + commit_message=commit_info.commit_message, + commit_description=commit_info.commit_description, + oid=commit_info.oid, + pr_url=commit_info.pr_url, + # Similar to `hf_hub_url` but it's "blob" instead of "resolve" + # TODO: remove this in v1.0 + _url=f"{self.endpoint}/{repo_id}/blob/{revision}/{path_in_repo}", + ) + + @overload + def upload_folder( # type: ignore + self, + *, + repo_id: str, + folder_path: Union[str, Path], + path_in_repo: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + delete_patterns: Optional[Union[List[str], str]] = None, + run_as_future: Literal[False] = ..., + ) -> CommitInfo: ... + + @overload + def upload_folder( # type: ignore + self, + *, + repo_id: str, + folder_path: Union[str, Path], + path_in_repo: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + delete_patterns: Optional[Union[List[str], str]] = None, + run_as_future: Literal[True] = ..., + ) -> Future[CommitInfo]: ... + + @validate_hf_hub_args + @future_compatible + def upload_folder( + self, + *, + repo_id: str, + folder_path: Union[str, Path], + path_in_repo: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + delete_patterns: Optional[Union[List[str], str]] = None, + run_as_future: bool = False, + ) -> Union[CommitInfo, Future[CommitInfo]]: + """ + Upload a local folder to the given repo. The upload is done through a HTTP requests, and doesn't require git or + git-lfs to be installed. + + The structure of the folder will be preserved. Files with the same name already present in the repository will + be overwritten. Others will be left untouched. + + Use the `allow_patterns` and `ignore_patterns` arguments to specify which files to upload. These parameters + accept either a single pattern or a list of patterns. Patterns are Standard Wildcards (globbing patterns) as + documented [here](https://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm). If both `allow_patterns` and + `ignore_patterns` are provided, both constraints apply. By default, all files from the folder are uploaded. + + Use the `delete_patterns` argument to specify remote files you want to delete. Input type is the same as for + `allow_patterns` (see above). If `path_in_repo` is also provided, the patterns are matched against paths + relative to this folder. For example, `upload_folder(..., path_in_repo="experiment", delete_patterns="logs/*")` + will delete any remote file under `./experiment/logs/`. Note that the `.gitattributes` file will not be deleted + even if it matches the patterns. + + Any `.git/` folder present in any subdirectory will be ignored. However, please be aware that the `.gitignore` + file is not taken into account. + + Uses `HfApi.create_commit` under the hood. + + Args: + repo_id (`str`): + The repository to which the file will be uploaded, for example: + `"username/custom_transformers"` + folder_path (`str` or `Path`): + Path to the folder to upload on the local file system + path_in_repo (`str`, *optional*): + Relative path of the directory in the repo, for example: + `"checkpoints/1fec34a/results"`. Will default to the root folder of the repository. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + revision (`str`, *optional*): + The git revision to commit from. Defaults to the head of the `"main"` branch. + commit_message (`str`, *optional*): + The summary / title / first line of the generated commit. Defaults to: + `f"Upload {path_in_repo} with huggingface_hub"` + commit_description (`str` *optional*): + The description of the generated commit + create_pr (`boolean`, *optional*): + Whether or not to create a Pull Request with that commit. Defaults to `False`. If `revision` is not + set, PR is opened against the `"main"` branch. If `revision` is set and is a branch, PR is opened + against this branch. If `revision` is set and is not a branch name (example: a commit oid), an + `RevisionNotFoundError` is returned by the server. + parent_commit (`str`, *optional*): + The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. + If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. + If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. + Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be + especially useful if the repo is updated / committed to concurrently. + allow_patterns (`List[str]` or `str`, *optional*): + If provided, only files matching at least one pattern are uploaded. + ignore_patterns (`List[str]` or `str`, *optional*): + If provided, files matching any of the patterns are not uploaded. + delete_patterns (`List[str]` or `str`, *optional*): + If provided, remote files matching any of the patterns will be deleted from the repo while committing + new files. This is useful if you don't know which files have already been uploaded. + Note: to avoid discrepancies the `.gitattributes` file is not deleted even if it matches the pattern. + run_as_future (`bool`, *optional*): + Whether or not to run this method in the background. Background jobs are run sequentially without + blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) + object. Defaults to `False`. + + Returns: + [`CommitInfo`] or `Future`: + Instance of [`CommitInfo`] containing information about the newly created commit (commit hash, commit + url, pr url, commit message,...). If `run_as_future=True` is passed, returns a Future object which will + contain the result when executed. + + + + Raises the following errors: + + - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + if the HuggingFace API returned an error + - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if some parameter value is invalid + + + + + + `upload_folder` assumes that the repo already exists on the Hub. If you get a Client error 404, please make + sure you are authenticated and that `repo_id` and `repo_type` are set correctly. If repo does not exist, create + it first using [`~hf_api.create_repo`]. + + + + + + When dealing with a large folder (thousands of files or hundreds of GB), we recommend using [`~hf_api.upload_large_folder`] instead. + + + + Example: + + ```python + # Upload checkpoints folder except the log files + >>> upload_folder( + ... folder_path="local/checkpoints", + ... path_in_repo="remote/experiment/checkpoints", + ... repo_id="username/my-dataset", + ... repo_type="datasets", + ... token="my_token", + ... ignore_patterns="**/logs/*.txt", + ... ) + # "https://huggingface.co/datasets/username/my-dataset/tree/main/remote/experiment/checkpoints" + + # Upload checkpoints folder including logs while deleting existing logs from the repo + # Useful if you don't know exactly which log files have already being pushed + >>> upload_folder( + ... folder_path="local/checkpoints", + ... path_in_repo="remote/experiment/checkpoints", + ... repo_id="username/my-dataset", + ... repo_type="datasets", + ... token="my_token", + ... delete_patterns="**/logs/*.txt", + ... ) + "https://huggingface.co/datasets/username/my-dataset/tree/main/remote/experiment/checkpoints" + + # Upload checkpoints folder while creating a PR + >>> upload_folder( + ... folder_path="local/checkpoints", + ... path_in_repo="remote/experiment/checkpoints", + ... repo_id="username/my-dataset", + ... repo_type="datasets", + ... token="my_token", + ... create_pr=True, + ... ) + "https://huggingface.co/datasets/username/my-dataset/tree/refs%2Fpr%2F1/remote/experiment/checkpoints" + + ``` + """ + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + + # By default, upload folder to the root directory in repo. + if path_in_repo is None: + path_in_repo = "" + + # Do not upload .git folder + if ignore_patterns is None: + ignore_patterns = [] + elif isinstance(ignore_patterns, str): + ignore_patterns = [ignore_patterns] + ignore_patterns += DEFAULT_IGNORE_PATTERNS + + delete_operations = self._prepare_folder_deletions( + repo_id=repo_id, + repo_type=repo_type, + revision=constants.DEFAULT_REVISION if create_pr else revision, + token=token, + path_in_repo=path_in_repo, + delete_patterns=delete_patterns, + ) + add_operations = self._prepare_upload_folder_additions( + folder_path, + path_in_repo, + allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, + token=token, + repo_type=repo_type, + ) + + # Optimize operations: if some files will be overwritten, we don't need to delete them first + if len(add_operations) > 0: + added_paths = set(op.path_in_repo for op in add_operations) + delete_operations = [ + delete_op for delete_op in delete_operations if delete_op.path_in_repo not in added_paths + ] + commit_operations = delete_operations + add_operations + + commit_message = commit_message or "Upload folder using huggingface_hub" + + commit_info = self.create_commit( + repo_type=repo_type, + repo_id=repo_id, + operations=commit_operations, + commit_message=commit_message, + commit_description=commit_description, + token=token, + revision=revision, + create_pr=create_pr, + parent_commit=parent_commit, + ) + + # Create url to uploaded folder (for legacy return value) + if create_pr and commit_info.pr_url is not None: + revision = quote(_parse_revision_from_pr_url(commit_info.pr_url), safe="") + if repo_type in constants.REPO_TYPES_URL_PREFIXES: + repo_id = constants.REPO_TYPES_URL_PREFIXES[repo_type] + repo_id + revision = revision if revision is not None else constants.DEFAULT_REVISION + + return CommitInfo( + commit_url=commit_info.commit_url, + commit_message=commit_info.commit_message, + commit_description=commit_info.commit_description, + oid=commit_info.oid, + pr_url=commit_info.pr_url, + # Similar to `hf_hub_url` but it's "tree" instead of "resolve" + # TODO: remove this in v1.0 + _url=f"{self.endpoint}/{repo_id}/tree/{revision}/{path_in_repo}", + ) + + @validate_hf_hub_args + def delete_file( + self, + path_in_repo: str, + repo_id: str, + *, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + ) -> CommitInfo: + """ + Deletes a file in the given repo. + + Args: + path_in_repo (`str`): + Relative filepath in the repo, for example: + `"checkpoints/1fec34a/weights.bin"` + repo_id (`str`): + The repository from which the file will be deleted, for example: + `"username/custom_transformers"` + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if the file is in a dataset or + space, `None` or `"model"` if in a model. Default is `None`. + revision (`str`, *optional*): + The git revision to commit from. Defaults to the head of the `"main"` branch. + commit_message (`str`, *optional*): + The summary / title / first line of the generated commit. Defaults to + `f"Delete {path_in_repo} with huggingface_hub"`. + commit_description (`str` *optional*) + The description of the generated commit + create_pr (`boolean`, *optional*): + Whether or not to create a Pull Request with that commit. Defaults to `False`. + If `revision` is not set, PR is opened against the `"main"` branch. If + `revision` is set and is a branch, PR is opened against this branch. If + `revision` is set and is not a branch name (example: a commit oid), an + `RevisionNotFoundError` is returned by the server. + parent_commit (`str`, *optional*): + The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. + If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. + If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. + Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be + especially useful if the repo is updated / committed to concurrently. + + + + + Raises the following errors: + + - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + if the HuggingFace API returned an error + - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if some parameter value is invalid + - [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + - [`~utils.RevisionNotFoundError`] + If the revision to download from cannot be found. + - [`~utils.EntryNotFoundError`] + If the file to download cannot be found. + + + + """ + commit_message = ( + commit_message if commit_message is not None else f"Delete {path_in_repo} with huggingface_hub" + ) + + operations = [CommitOperationDelete(path_in_repo=path_in_repo)] + + return self.create_commit( + repo_id=repo_id, + repo_type=repo_type, + token=token, + operations=operations, + revision=revision, + commit_message=commit_message, + commit_description=commit_description, + create_pr=create_pr, + parent_commit=parent_commit, + ) + + @validate_hf_hub_args + def delete_files( + self, + repo_id: str, + delete_patterns: List[str], + *, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + ) -> CommitInfo: + """ + Delete files from a repository on the Hub. + + If a folder path is provided, the entire folder is deleted as well as + all files it contained. + + Args: + repo_id (`str`): + The repository from which the folder will be deleted, for example: + `"username/custom_transformers"` + delete_patterns (`List[str]`): + List of files or folders to delete. Each string can either be + a file path, a folder path or a Unix shell-style wildcard. + E.g. `["file.txt", "folder/", "data/*.parquet"]` + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + to the stored token. + repo_type (`str`, *optional*): + Type of the repo to delete files from. Can be `"model"`, + `"dataset"` or `"space"`. Defaults to `"model"`. + revision (`str`, *optional*): + The git revision to commit from. Defaults to the head of the `"main"` branch. + commit_message (`str`, *optional*): + The summary (first line) of the generated commit. Defaults to + `f"Delete files using huggingface_hub"`. + commit_description (`str` *optional*) + The description of the generated commit. + create_pr (`boolean`, *optional*): + Whether or not to create a Pull Request with that commit. Defaults to `False`. + If `revision` is not set, PR is opened against the `"main"` branch. If + `revision` is set and is a branch, PR is opened against this branch. If + `revision` is set and is not a branch name (example: a commit oid), an + `RevisionNotFoundError` is returned by the server. + parent_commit (`str`, *optional*): + The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. + If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. + If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. + Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be + especially useful if the repo is updated / committed to concurrently. + """ + operations = self._prepare_folder_deletions( + repo_id=repo_id, repo_type=repo_type, delete_patterns=delete_patterns, path_in_repo="", revision=revision + ) + + if commit_message is None: + commit_message = f"Delete files {' '.join(delete_patterns)} with huggingface_hub" + + return self.create_commit( + repo_id=repo_id, + repo_type=repo_type, + token=token, + operations=operations, + revision=revision, + commit_message=commit_message, + commit_description=commit_description, + create_pr=create_pr, + parent_commit=parent_commit, + ) + + @validate_hf_hub_args + def delete_folder( + self, + path_in_repo: str, + repo_id: str, + *, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + commit_message: Optional[str] = None, + commit_description: Optional[str] = None, + create_pr: Optional[bool] = None, + parent_commit: Optional[str] = None, + ) -> CommitInfo: + """ + Deletes a folder in the given repo. + + Simple wrapper around [`create_commit`] method. + + Args: + path_in_repo (`str`): + Relative folder path in the repo, for example: `"checkpoints/1fec34a"`. + repo_id (`str`): + The repository from which the folder will be deleted, for example: + `"username/custom_transformers"` + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + to the stored token. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if the folder is in a dataset or + space, `None` or `"model"` if in a model. Default is `None`. + revision (`str`, *optional*): + The git revision to commit from. Defaults to the head of the `"main"` branch. + commit_message (`str`, *optional*): + The summary / title / first line of the generated commit. Defaults to + `f"Delete folder {path_in_repo} with huggingface_hub"`. + commit_description (`str` *optional*) + The description of the generated commit. + create_pr (`boolean`, *optional*): + Whether or not to create a Pull Request with that commit. Defaults to `False`. + If `revision` is not set, PR is opened against the `"main"` branch. If + `revision` is set and is a branch, PR is opened against this branch. If + `revision` is set and is not a branch name (example: a commit oid), an + `RevisionNotFoundError` is returned by the server. + parent_commit (`str`, *optional*): + The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. + If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. + If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. + Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be + especially useful if the repo is updated / committed to concurrently. + """ + return self.create_commit( + repo_id=repo_id, + repo_type=repo_type, + token=token, + operations=[CommitOperationDelete(path_in_repo=path_in_repo, is_folder=True)], + revision=revision, + commit_message=( + commit_message if commit_message is not None else f"Delete folder {path_in_repo} with huggingface_hub" + ), + commit_description=commit_description, + create_pr=create_pr, + parent_commit=parent_commit, + ) + + def upload_large_folder( + self, + repo_id: str, + folder_path: Union[str, Path], + *, + repo_type: str, # Repo type is required! + revision: Optional[str] = None, + private: Optional[bool] = None, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + num_workers: Optional[int] = None, + print_report: bool = True, + print_report_every: int = 60, + ) -> None: + """Upload a large folder to the Hub in the most resilient way possible. + + Several workers are started to upload files in an optimized way. Before being committed to a repo, files must be + hashed and be pre-uploaded if they are LFS files. Workers will perform these tasks for each file in the folder. + At each step, some metadata information about the upload process is saved in the folder under `.cache/.huggingface/` + to be able to resume the process if interrupted. The whole process might result in several commits. + + Args: + repo_id (`str`): + The repository to which the file will be uploaded. + E.g. `"HuggingFaceTB/smollm-corpus"`. + folder_path (`str` or `Path`): + Path to the folder to upload on the local file system. + repo_type (`str`): + Type of the repository. Must be one of `"model"`, `"dataset"` or `"space"`. + Unlike in all other `HfApi` methods, `repo_type` is explicitly required here. This is to avoid + any mistake when uploading a large folder to the Hub, and therefore prevent from having to re-upload + everything. + revision (`str`, `optional`): + The branch to commit to. If not provided, the `main` branch will be used. + private (`bool`, `optional`): + Whether the repository should be private. + If `None` (default), the repo will be public unless the organization's default is private. + allow_patterns (`List[str]` or `str`, *optional*): + If provided, only files matching at least one pattern are uploaded. + ignore_patterns (`List[str]` or `str`, *optional*): + If provided, files matching any of the patterns are not uploaded. + num_workers (`int`, *optional*): + Number of workers to start. Defaults to `os.cpu_count() - 2` (minimum 2). + A higher number of workers may speed up the process if your machine allows it. However, on machines with a + slower connection, it is recommended to keep the number of workers low to ensure better resumability. + Indeed, partially uploaded files will have to be completely re-uploaded if the process is interrupted. + print_report (`bool`, *optional*): + Whether to print a report of the upload progress. Defaults to True. + Report is printed to `sys.stdout` every X seconds (60 by defaults) and overwrites the previous report. + print_report_every (`int`, *optional*): + Frequency at which the report is printed. Defaults to 60 seconds. + + + + A few things to keep in mind: + - Repository limits still apply: https://huggingface.co/docs/hub/repositories-recommendations + - Do not start several processes in parallel. + - You can interrupt and resume the process at any time. + - Do not upload the same folder to several repositories. If you need to do so, you must delete the local `.cache/.huggingface/` folder first. + + + + + + While being much more robust to upload large folders, `upload_large_folder` is more limited than [`upload_folder`] feature-wise. In practice: + - you cannot set a custom `path_in_repo`. If you want to upload to a subfolder, you need to set the proper structure locally. + - you cannot set a custom `commit_message` and `commit_description` since multiple commits are created. + - you cannot delete from the repo while uploading. Please make a separate commit first. + - you cannot create a PR directly. Please create a PR first (from the UI or using [`create_pull_request`]) and then commit to it by passing `revision`. + + + + **Technical details:** + + `upload_large_folder` process is as follow: + 1. (Check parameters and setup.) + 2. Create repo if missing. + 3. List local files to upload. + 4. Start workers. Workers can perform the following tasks: + - Hash a file. + - Get upload mode (regular or LFS) for a list of files. + - Pre-upload an LFS file. + - Commit a bunch of files. + Once a worker finishes a task, it will move on to the next task based on the priority list (see below) until + all files are uploaded and committed. + 5. While workers are up, regularly print a report to sys.stdout. + + Order of priority: + 1. Commit if more than 5 minutes since last commit attempt (and at least 1 file). + 2. Commit if at least 150 files are ready to commit. + 3. Get upload mode if at least 10 files have been hashed. + 4. Pre-upload LFS file if at least 1 file and no worker is pre-uploading. + 5. Hash file if at least 1 file and no worker is hashing. + 6. Get upload mode if at least 1 file and no worker is getting upload mode. + 7. Pre-upload LFS file if at least 1 file (exception: if hf_transfer is enabled, only 1 worker can preupload LFS at a time). + 8. Hash file if at least 1 file to hash. + 9. Get upload mode if at least 1 file to get upload mode. + 10. Commit if at least 1 file to commit and at least 1 min since last commit attempt. + 11. Commit if at least 1 file to commit and all other queues are empty. + + Special rules: + - If `hf_transfer` is enabled, only 1 LFS uploader at a time. Otherwise the CPU would be bloated by `hf_transfer`. + - Only one worker can commit at a time. + - If no tasks are available, the worker waits for 10 seconds before checking again. + """ + return upload_large_folder_internal( + self, + repo_id=repo_id, + folder_path=folder_path, + repo_type=repo_type, + revision=revision, + private=private, + allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, + num_workers=num_workers, + print_report=print_report, + print_report_every=print_report_every, + ) + + @validate_hf_hub_args + def get_hf_file_metadata( + self, + *, + url: str, + token: Union[bool, str, None] = None, + proxies: Optional[Dict] = None, + timeout: Optional[float] = constants.DEFAULT_REQUEST_TIMEOUT, + ) -> HfFileMetadata: + """Fetch metadata of a file versioned on the Hub for a given url. + + Args: + url (`str`): + File url, for example returned by [`hf_hub_url`]. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + proxies (`dict`, *optional*): + Dictionary mapping protocol to the URL of the proxy passed to `requests.request`. + timeout (`float`, *optional*, defaults to 10): + How many seconds to wait for the server to send metadata before giving up. + + Returns: + A [`HfFileMetadata`] object containing metadata such as location, etag, size and commit_hash. + """ + if token is None: + # Cannot do `token = token or self.token` as token can be `False`. + token = self.token + + return get_hf_file_metadata( + url=url, + token=token, + proxies=proxies, + timeout=timeout, + library_name=self.library_name, + library_version=self.library_version, + user_agent=self.user_agent, + ) + + @validate_hf_hub_args + def hf_hub_download( + self, + repo_id: str, + filename: str, + *, + subfolder: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + cache_dir: Union[str, Path, None] = None, + local_dir: Union[str, Path, None] = None, + force_download: bool = False, + proxies: Optional[Dict] = None, + etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT, + token: Union[bool, str, None] = None, + local_files_only: bool = False, + # Deprecated args + resume_download: Optional[bool] = None, + force_filename: Optional[str] = None, + local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto", + ) -> str: + """Download a given file if it's not already present in the local cache. + + The new cache file layout looks like this: + - The cache directory contains one subfolder per repo_id (namespaced by repo type) + - inside each repo folder: + - refs is a list of the latest known revision => commit_hash pairs + - blobs contains the actual file blobs (identified by their git-sha or sha256, depending on + whether they're LFS files or not) + - snapshots contains one subfolder per commit, each "commit" contains the subset of the files + that have been resolved at that particular commit. Each filename is a symlink to the blob + at that particular commit. + + ``` + [ 96] . + └── [ 160] models--julien-c--EsperBERTo-small + ├── [ 160] blobs + │ ├── [321M] 403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd + │ ├── [ 398] 7cb18dc9bafbfcf74629a4b760af1b160957a83e + │ └── [1.4K] d7edf6bd2a681fb0175f7735299831ee1b22b812 + ├── [ 96] refs + │ └── [ 40] main + └── [ 128] snapshots + ├── [ 128] 2439f60ef33a0d46d85da5001d52aeda5b00ce9f + │ ├── [ 52] README.md -> ../../blobs/d7edf6bd2a681fb0175f7735299831ee1b22b812 + │ └── [ 76] pytorch_model.bin -> ../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd + └── [ 128] bbc77c8132af1cc5cf678da3f1ddf2de43606d48 + ├── [ 52] README.md -> ../../blobs/7cb18dc9bafbfcf74629a4b760af1b160957a83e + └── [ 76] pytorch_model.bin -> ../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd + ``` + + If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this + option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir` + to store some metadata related to the downloaded files. While this mechanism is not as robust as the main + cache-system, it's optimized for regularly pulling the latest version of a repository. + + Args: + repo_id (`str`): + A user or an organization name and a repo name separated by a `/`. + filename (`str`): + The name of the file in the repo. + subfolder (`str`, *optional*): + An optional value corresponding to a folder inside the model repo. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if downloading from a dataset or space, + `None` or `"model"` if downloading from a model. Default is `None`. + revision (`str`, *optional*): + An optional Git revision id which can be a branch name, a tag, or a + commit hash. + cache_dir (`str`, `Path`, *optional*): + Path to the folder where cached files are stored. + local_dir (`str` or `Path`, *optional*): + If provided, the downloaded file will be placed under this directory. + force_download (`bool`, *optional*, defaults to `False`): + Whether the file should be downloaded even if it already exists in + the local cache. + proxies (`dict`, *optional*): + Dictionary mapping protocol to the URL of the proxy passed to + `requests.request`. + etag_timeout (`float`, *optional*, defaults to `10`): + When fetching ETag, how many seconds to wait for the server to send + data before giving up which is passed to `requests.request`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + local_files_only (`bool`, *optional*, defaults to `False`): + If `True`, avoid downloading the file and return the path to the + local cached file if it exists. + + Returns: + `str`: Local path of file or if networking is off, last version of file cached on disk. + + Raises: + [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + [`~utils.RevisionNotFoundError`] + If the revision to download from cannot be found. + [`~utils.EntryNotFoundError`] + If the file to download cannot be found. + [`~utils.LocalEntryNotFoundError`] + If network is disabled or unavailable and file is not found in cache. + [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) + If `token=True` but the token cannot be found. + [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) + If ETag cannot be determined. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If some parameter value is invalid. + """ + from .file_download import hf_hub_download + + if token is None: + # Cannot do `token = token or self.token` as token can be `False`. + token = self.token + + return hf_hub_download( + repo_id=repo_id, + filename=filename, + subfolder=subfolder, + repo_type=repo_type, + revision=revision, + endpoint=self.endpoint, + library_name=self.library_name, + library_version=self.library_version, + cache_dir=cache_dir, + local_dir=local_dir, + local_dir_use_symlinks=local_dir_use_symlinks, + user_agent=self.user_agent, + force_download=force_download, + force_filename=force_filename, + proxies=proxies, + etag_timeout=etag_timeout, + resume_download=resume_download, + token=token, + headers=self.headers, + local_files_only=local_files_only, + ) + + @validate_hf_hub_args + def snapshot_download( + self, + repo_id: str, + *, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + cache_dir: Union[str, Path, None] = None, + local_dir: Union[str, Path, None] = None, + proxies: Optional[Dict] = None, + etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT, + force_download: bool = False, + token: Union[bool, str, None] = None, + local_files_only: bool = False, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + max_workers: int = 8, + tqdm_class: Optional[base_tqdm] = None, + # Deprecated args + local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto", + resume_download: Optional[bool] = None, + ) -> str: + """Download repo files. + + Download a whole snapshot of a repo's files at the specified revision. This is useful when you want all files from + a repo, because you don't know which ones you will need a priori. All files are nested inside a folder in order + to keep their actual filename relative to that folder. You can also filter which files to download using + `allow_patterns` and `ignore_patterns`. + + If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this + option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir` + to store some metadata related to the downloaded files.While this mechanism is not as robust as the main + cache-system, it's optimized for regularly pulling the latest version of a repository. + + An alternative would be to clone the repo but this requires git and git-lfs to be installed and properly + configured. It is also not possible to filter which files to download when cloning a repository using git. + + Args: + repo_id (`str`): + A user or an organization name and a repo name separated by a `/`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if downloading from a dataset or space, + `None` or `"model"` if downloading from a model. Default is `None`. + revision (`str`, *optional*): + An optional Git revision id which can be a branch name, a tag, or a + commit hash. + cache_dir (`str`, `Path`, *optional*): + Path to the folder where cached files are stored. + local_dir (`str` or `Path`, *optional*): + If provided, the downloaded files will be placed under this directory. + proxies (`dict`, *optional*): + Dictionary mapping protocol to the URL of the proxy passed to + `requests.request`. + etag_timeout (`float`, *optional*, defaults to `10`): + When fetching ETag, how many seconds to wait for the server to send + data before giving up which is passed to `requests.request`. + force_download (`bool`, *optional*, defaults to `False`): + Whether the file should be downloaded even if it already exists in the local cache. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + local_files_only (`bool`, *optional*, defaults to `False`): + If `True`, avoid downloading the file and return the path to the + local cached file if it exists. + allow_patterns (`List[str]` or `str`, *optional*): + If provided, only files matching at least one pattern are downloaded. + ignore_patterns (`List[str]` or `str`, *optional*): + If provided, files matching any of the patterns are not downloaded. + max_workers (`int`, *optional*): + Number of concurrent threads to download files (1 thread = 1 file download). + Defaults to 8. + tqdm_class (`tqdm`, *optional*): + If provided, overwrites the default behavior for the progress bar. Passed + argument must inherit from `tqdm.auto.tqdm` or at least mimic its behavior. + Note that the `tqdm_class` is not passed to each individual download. + Defaults to the custom HF progress bar that can be disabled by setting + `HF_HUB_DISABLE_PROGRESS_BARS` environment variable. + + Returns: + `str`: folder path of the repo snapshot. + + Raises: + [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + [`~utils.RevisionNotFoundError`] + If the revision to download from cannot be found. + [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) + If `token=True` and the token cannot be found. + [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if + ETag cannot be determined. + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if some parameter value is invalid. + """ + from ._snapshot_download import snapshot_download + + if token is None: + # Cannot do `token = token or self.token` as token can be `False`. + token = self.token + + return snapshot_download( + repo_id=repo_id, + repo_type=repo_type, + revision=revision, + endpoint=self.endpoint, + cache_dir=cache_dir, + local_dir=local_dir, + local_dir_use_symlinks=local_dir_use_symlinks, + library_name=self.library_name, + library_version=self.library_version, + user_agent=self.user_agent, + proxies=proxies, + etag_timeout=etag_timeout, + resume_download=resume_download, + force_download=force_download, + token=token, + local_files_only=local_files_only, + allow_patterns=allow_patterns, + ignore_patterns=ignore_patterns, + max_workers=max_workers, + tqdm_class=tqdm_class, + ) + + def get_safetensors_metadata( + self, + repo_id: str, + *, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> SafetensorsRepoMetadata: + """ + Parse metadata for a safetensors repo on the Hub. + + We first check if the repo has a single safetensors file or a sharded safetensors repo. If it's a single + safetensors file, we parse the metadata from this file. If it's a sharded safetensors repo, we parse the + metadata from the index file and then parse the metadata from each shard. + + To parse metadata from a single safetensors file, use [`parse_safetensors_file_metadata`]. + + For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format. + + Args: + repo_id (`str`): + A user or an organization name and a repo name separated by a `/`. + filename (`str`): + The name of the file in the repo. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if the file is in a dataset or space, `None` or `"model"` if in a + model. Default is `None`. + revision (`str`, *optional*): + The git revision to fetch the file from. Can be a branch name, a tag, or a commit hash. Defaults to the + head of the `"main"` branch. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`SafetensorsRepoMetadata`]: information related to safetensors repo. + + Raises: + [`NotASafetensorsRepoError`] + If the repo is not a safetensors repo i.e. doesn't have either a + `model.safetensors` or a `model.safetensors.index.json` file. + [`SafetensorsParsingError`] + If a safetensors file header couldn't be parsed correctly. + + Example: + ```py + # Parse repo with single weights file + >>> metadata = get_safetensors_metadata("bigscience/bloomz-560m") + >>> metadata + SafetensorsRepoMetadata( + metadata=None, + sharded=False, + weight_map={'h.0.input_layernorm.bias': 'model.safetensors', ...}, + files_metadata={'model.safetensors': SafetensorsFileMetadata(...)} + ) + >>> metadata.files_metadata["model.safetensors"].metadata + {'format': 'pt'} + + # Parse repo with sharded model + >>> metadata = get_safetensors_metadata("bigscience/bloom") + Parse safetensors files: 100%|██████████████████████████████████████████| 72/72 [00:12<00:00, 5.78it/s] + >>> metadata + SafetensorsRepoMetadata(metadata={'total_size': 352494542848}, sharded=True, weight_map={...}, files_metadata={...}) + >>> len(metadata.files_metadata) + 72 # All safetensors files have been fetched + + # Parse repo with sharded model + >>> get_safetensors_metadata("runwayml/stable-diffusion-v1-5") + NotASafetensorsRepoError: 'runwayml/stable-diffusion-v1-5' is not a safetensors repo. Couldn't find 'model.safetensors.index.json' or 'model.safetensors' files. + ``` + """ + if self.file_exists( # Single safetensors file => non-sharded model + repo_id=repo_id, + filename=constants.SAFETENSORS_SINGLE_FILE, + repo_type=repo_type, + revision=revision, + token=token, + ): + file_metadata = self.parse_safetensors_file_metadata( + repo_id=repo_id, + filename=constants.SAFETENSORS_SINGLE_FILE, + repo_type=repo_type, + revision=revision, + token=token, + ) + return SafetensorsRepoMetadata( + metadata=None, + sharded=False, + weight_map={ + tensor_name: constants.SAFETENSORS_SINGLE_FILE for tensor_name in file_metadata.tensors.keys() + }, + files_metadata={constants.SAFETENSORS_SINGLE_FILE: file_metadata}, + ) + elif self.file_exists( # Multiple safetensors files => sharded with index + repo_id=repo_id, + filename=constants.SAFETENSORS_INDEX_FILE, + repo_type=repo_type, + revision=revision, + token=token, + ): + # Fetch index + index_file = self.hf_hub_download( + repo_id=repo_id, + filename=constants.SAFETENSORS_INDEX_FILE, + repo_type=repo_type, + revision=revision, + token=token, + ) + with open(index_file) as f: + index = json.load(f) + + weight_map = index.get("weight_map", {}) + + # Fetch metadata per shard + files_metadata = {} + + def _parse(filename: str) -> None: + files_metadata[filename] = self.parse_safetensors_file_metadata( + repo_id=repo_id, filename=filename, repo_type=repo_type, revision=revision, token=token + ) + + thread_map( + _parse, + set(weight_map.values()), + desc="Parse safetensors files", + tqdm_class=hf_tqdm, + ) + + return SafetensorsRepoMetadata( + metadata=index.get("metadata", None), + sharded=True, + weight_map=weight_map, + files_metadata=files_metadata, + ) + else: + # Not a safetensors repo + raise NotASafetensorsRepoError( + f"'{repo_id}' is not a safetensors repo. Couldn't find '{constants.SAFETENSORS_INDEX_FILE}' or '{constants.SAFETENSORS_SINGLE_FILE}' files." + ) + + def parse_safetensors_file_metadata( + self, + repo_id: str, + filename: str, + *, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> SafetensorsFileMetadata: + """ + Parse metadata from a safetensors file on the Hub. + + To parse metadata from all safetensors files in a repo at once, use [`get_safetensors_metadata`]. + + For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format. + + Args: + repo_id (`str`): + A user or an organization name and a repo name separated by a `/`. + filename (`str`): + The name of the file in the repo. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if the file is in a dataset or space, `None` or `"model"` if in a + model. Default is `None`. + revision (`str`, *optional*): + The git revision to fetch the file from. Can be a branch name, a tag, or a commit hash. Defaults to the + head of the `"main"` branch. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`SafetensorsFileMetadata`]: information related to a safetensors file. + + Raises: + [`NotASafetensorsRepoError`]: + If the repo is not a safetensors repo i.e. doesn't have either a + `model.safetensors` or a `model.safetensors.index.json` file. + [`SafetensorsParsingError`]: + If a safetensors file header couldn't be parsed correctly. + """ + url = hf_hub_url( + repo_id=repo_id, filename=filename, repo_type=repo_type, revision=revision, endpoint=self.endpoint + ) + _headers = self._build_hf_headers(token=token) + + # 1. Fetch first 100kb + # Empirically, 97% of safetensors files have a metadata size < 100kb (over the top 1000 models on the Hub). + # We assume fetching 100kb is faster than making 2 GET requests. Therefore we always fetch the first 100kb to + # avoid the 2nd GET in most cases. + # See https://github.com/huggingface/huggingface_hub/pull/1855#discussion_r1404286419. + response = get_session().get(url, headers={**_headers, "range": "bytes=0-100000"}) + hf_raise_for_status(response) + + # 2. Parse metadata size + metadata_size = struct.unpack(" constants.SAFETENSORS_MAX_HEADER_LENGTH: + raise SafetensorsParsingError( + f"Failed to parse safetensors header for '{filename}' (repo '{repo_id}', revision " + f"'{revision or constants.DEFAULT_REVISION}'): safetensors header is too big. Maximum supported size is " + f"{constants.SAFETENSORS_MAX_HEADER_LENGTH} bytes (got {metadata_size})." + ) + + # 3.a. Get metadata from payload + if metadata_size <= 100000: + metadata_as_bytes = response.content[8 : 8 + metadata_size] + else: # 3.b. Request full metadata + response = get_session().get(url, headers={**_headers, "range": f"bytes=8-{metadata_size+7}"}) + hf_raise_for_status(response) + metadata_as_bytes = response.content + + # 4. Parse json header + try: + metadata_as_dict = json.loads(metadata_as_bytes.decode(errors="ignore")) + except json.JSONDecodeError as e: + raise SafetensorsParsingError( + f"Failed to parse safetensors header for '{filename}' (repo '{repo_id}', revision " + f"'{revision or constants.DEFAULT_REVISION}'): header is not json-encoded string. Please make sure this is a " + "correctly formatted safetensors file." + ) from e + + try: + return SafetensorsFileMetadata( + metadata=metadata_as_dict.get("__metadata__", {}), + tensors={ + key: TensorInfo( + dtype=tensor["dtype"], + shape=tensor["shape"], + data_offsets=tuple(tensor["data_offsets"]), # type: ignore + ) + for key, tensor in metadata_as_dict.items() + if key != "__metadata__" + }, + ) + except (KeyError, IndexError) as e: + raise SafetensorsParsingError( + f"Failed to parse safetensors header for '{filename}' (repo '{repo_id}', revision " + f"'{revision or constants.DEFAULT_REVISION}'): header format not recognized. Please make sure this is a correctly" + " formatted safetensors file." + ) from e + + @validate_hf_hub_args + def create_branch( + self, + repo_id: str, + *, + branch: str, + revision: Optional[str] = None, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + exist_ok: bool = False, + ) -> None: + """ + Create a new branch for a repo on the Hub, starting from the specified revision (defaults to `main`). + To find a revision suiting your needs, you can use [`list_repo_refs`] or [`list_repo_commits`]. + + Args: + repo_id (`str`): + The repository in which the branch will be created. + Example: `"user/my-cool-model"`. + + branch (`str`): + The name of the branch to create. + + revision (`str`, *optional*): + The git revision to create the branch from. It can be a branch name or + the OID/SHA of a commit, as a hexadecimal string. Defaults to the head + of the `"main"` branch. + + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if creating a branch on a dataset or + space, `None` or `"model"` if tagging a model. Default is `None`. + + exist_ok (`bool`, *optional*, defaults to `False`): + If `True`, do not raise an error if branch already exists. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private + but not authenticated or repo does not exist. + [`~utils.BadRequestError`]: + If invalid reference for a branch. Ex: `refs/pr/5` or 'refs/foo/bar'. + [`~utils.HfHubHTTPError`]: + If the branch already exists on the repo (error 409) and `exist_ok` is + set to `False`. + """ + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + branch = quote(branch, safe="") + + # Prepare request + branch_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/branch/{branch}" + headers = self._build_hf_headers(token=token) + payload = {} + if revision is not None: + payload["startingPoint"] = revision + + # Create branch + response = get_session().post(url=branch_url, headers=headers, json=payload) + try: + hf_raise_for_status(response) + except HfHubHTTPError as e: + if exist_ok and e.response.status_code == 409: + return + elif exist_ok and e.response.status_code == 403: + # No write permission on the namespace but branch might already exist + try: + refs = self.list_repo_refs(repo_id=repo_id, repo_type=repo_type, token=token) + for branch_ref in refs.branches: + if branch_ref.name == branch: + return # Branch already exists => do not raise + except HfHubHTTPError: + pass # We raise the original error if the branch does not exist + raise + + @validate_hf_hub_args + def delete_branch( + self, + repo_id: str, + *, + branch: str, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + ) -> None: + """ + Delete a branch from a repo on the Hub. + + Args: + repo_id (`str`): + The repository in which a branch will be deleted. + Example: `"user/my-cool-model"`. + + branch (`str`): + The name of the branch to delete. + + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if creating a branch on a dataset or + space, `None` or `"model"` if tagging a model. Default is `None`. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private + but not authenticated or repo does not exist. + [`~utils.HfHubHTTPError`]: + If trying to delete a protected branch. Ex: `main` cannot be deleted. + [`~utils.HfHubHTTPError`]: + If trying to delete a branch that does not exist. + + """ + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + branch = quote(branch, safe="") + + # Prepare request + branch_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/branch/{branch}" + headers = self._build_hf_headers(token=token) + + # Delete branch + response = get_session().delete(url=branch_url, headers=headers) + hf_raise_for_status(response) + + @validate_hf_hub_args + def create_tag( + self, + repo_id: str, + *, + tag: str, + tag_message: Optional[str] = None, + revision: Optional[str] = None, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + exist_ok: bool = False, + ) -> None: + """ + Tag a given commit of a repo on the Hub. + + Args: + repo_id (`str`): + The repository in which a commit will be tagged. + Example: `"user/my-cool-model"`. + + tag (`str`): + The name of the tag to create. + + tag_message (`str`, *optional*): + The description of the tag to create. + + revision (`str`, *optional*): + The git revision to tag. It can be a branch name or the OID/SHA of a + commit, as a hexadecimal string. Shorthands (7 first characters) are + also supported. Defaults to the head of the `"main"` branch. + + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if tagging a dataset or + space, `None` or `"model"` if tagging a model. Default is + `None`. + + exist_ok (`bool`, *optional*, defaults to `False`): + If `True`, do not raise an error if tag already exists. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private + but not authenticated or repo does not exist. + [`~utils.RevisionNotFoundError`]: + If revision is not found (error 404) on the repo. + [`~utils.HfHubHTTPError`]: + If the branch already exists on the repo (error 409) and `exist_ok` is + set to `False`. + """ + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION + + # Prepare request + tag_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/tag/{revision}" + headers = self._build_hf_headers(token=token) + payload = {"tag": tag} + if tag_message is not None: + payload["message"] = tag_message + + # Tag + response = get_session().post(url=tag_url, headers=headers, json=payload) + try: + hf_raise_for_status(response) + except HfHubHTTPError as e: + if not (e.response.status_code == 409 and exist_ok): + raise + + @validate_hf_hub_args + def delete_tag( + self, + repo_id: str, + *, + tag: str, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + ) -> None: + """ + Delete a tag from a repo on the Hub. + + Args: + repo_id (`str`): + The repository in which a tag will be deleted. + Example: `"user/my-cool-model"`. + + tag (`str`): + The name of the tag to delete. + + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if tagging a dataset or space, `None` or + `"model"` if tagging a model. Default is `None`. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If repository is not found (error 404): wrong repo_id/repo_type, private + but not authenticated or repo does not exist. + [`~utils.RevisionNotFoundError`]: + If tag is not found. + """ + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + tag = quote(tag, safe="") + + # Prepare request + tag_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/tag/{tag}" + headers = self._build_hf_headers(token=token) + + # Un-tag + response = get_session().delete(url=tag_url, headers=headers) + hf_raise_for_status(response) + + @validate_hf_hub_args + def get_full_repo_name( + self, + model_id: str, + *, + organization: Optional[str] = None, + token: Union[bool, str, None] = None, + ): + """ + Returns the repository name for a given model ID and optional + organization. + + Args: + model_id (`str`): + The name of the model. + organization (`str`, *optional*): + If passed, the repository name will be in the organization + namespace instead of the user namespace. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `str`: The repository name in the user's namespace + ({username}/{model_id}) if no organization is passed, and under the + organization namespace ({organization}/{model_id}) otherwise. + """ + if organization is None: + if "/" in model_id: + username = model_id.split("/")[0] + else: + username = self.whoami(token=token)["name"] # type: ignore + return f"{username}/{model_id}" + else: + return f"{organization}/{model_id}" + + @validate_hf_hub_args + def get_repo_discussions( + self, + repo_id: str, + *, + author: Optional[str] = None, + discussion_type: Optional[constants.DiscussionTypeFilter] = None, + discussion_status: Optional[constants.DiscussionStatusFilter] = None, + repo_type: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> Iterator[Discussion]: + """ + Fetches Discussions and Pull Requests for the given repo. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + author (`str`, *optional*): + Pass a value to filter by discussion author. `None` means no filter. + Default is `None`. + discussion_type (`str`, *optional*): + Set to `"pull_request"` to fetch only pull requests, `"discussion"` + to fetch only discussions. Set to `"all"` or `None` to fetch both. + Default is `None`. + discussion_status (`str`, *optional*): + Set to `"open"` (respectively `"closed"`) to fetch only open + (respectively closed) discussions. Set to `"all"` or `None` + to fetch both. + Default is `None`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if fetching from a dataset or + space, `None` or `"model"` if fetching from a model. Default is + `None`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Iterator[Discussion]`: An iterator of [`Discussion`] objects. + + Example: + Collecting all discussions of a repo in a list: + + ```python + >>> from huggingface_hub import get_repo_discussions + >>> discussions_list = list(get_repo_discussions(repo_id="bert-base-uncased")) + ``` + + Iterating over discussions of a repo: + + ```python + >>> from huggingface_hub import get_repo_discussions + >>> for discussion in get_repo_discussions(repo_id="bert-base-uncased"): + ... print(discussion.num, discussion.title) + ``` + """ + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + if discussion_type is not None and discussion_type not in constants.DISCUSSION_TYPES: + raise ValueError(f"Invalid discussion_type, must be one of {constants.DISCUSSION_TYPES}") + if discussion_status is not None and discussion_status not in constants.DISCUSSION_STATUS: + raise ValueError(f"Invalid discussion_status, must be one of {constants.DISCUSSION_STATUS}") + + headers = self._build_hf_headers(token=token) + path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions" + + params: Dict[str, Union[str, int]] = {} + if discussion_type is not None: + params["type"] = discussion_type + if discussion_status is not None: + params["status"] = discussion_status + if author is not None: + params["author"] = author + + def _fetch_discussion_page(page_index: int): + params["p"] = page_index + resp = get_session().get(path, headers=headers, params=params) + hf_raise_for_status(resp) + paginated_discussions = resp.json() + total = paginated_discussions["count"] + start = paginated_discussions["start"] + discussions = paginated_discussions["discussions"] + has_next = (start + len(discussions)) < total + return discussions, has_next + + has_next, page_index = True, 0 + + while has_next: + discussions, has_next = _fetch_discussion_page(page_index=page_index) + for discussion in discussions: + yield Discussion( + title=discussion["title"], + num=discussion["num"], + author=discussion.get("author", {}).get("name", "deleted"), + created_at=parse_datetime(discussion["createdAt"]), + status=discussion["status"], + repo_id=discussion["repo"]["name"], + repo_type=discussion["repo"]["type"], + is_pull_request=discussion["isPullRequest"], + endpoint=self.endpoint, + ) + page_index = page_index + 1 + + @validate_hf_hub_args + def get_discussion_details( + self, + repo_id: str, + discussion_num: int, + *, + repo_type: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> DiscussionWithDetails: + """Fetches a Discussion's / Pull Request 's details from the Hub. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + discussion_num (`int`): + The number of the Discussion or Pull Request . Must be a strictly positive integer. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: [`DiscussionWithDetails`] + + + + Raises the following errors: + + - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + if the HuggingFace API returned an error + - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if some parameter value is invalid + - [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + + + """ + if not isinstance(discussion_num, int) or discussion_num <= 0: + raise ValueError("Invalid discussion_num, must be a positive integer") + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + + path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions/{discussion_num}" + headers = self._build_hf_headers(token=token) + resp = get_session().get(path, params={"diff": "1"}, headers=headers) + hf_raise_for_status(resp) + + discussion_details = resp.json() + is_pull_request = discussion_details["isPullRequest"] + + target_branch = discussion_details["changes"]["base"] if is_pull_request else None + conflicting_files = discussion_details["filesWithConflicts"] if is_pull_request else None + merge_commit_oid = discussion_details["changes"].get("mergeCommitId", None) if is_pull_request else None + + return DiscussionWithDetails( + title=discussion_details["title"], + num=discussion_details["num"], + author=discussion_details.get("author", {}).get("name", "deleted"), + created_at=parse_datetime(discussion_details["createdAt"]), + status=discussion_details["status"], + repo_id=discussion_details["repo"]["name"], + repo_type=discussion_details["repo"]["type"], + is_pull_request=discussion_details["isPullRequest"], + events=[deserialize_event(evt) for evt in discussion_details["events"]], + conflicting_files=conflicting_files, + target_branch=target_branch, + merge_commit_oid=merge_commit_oid, + diff=discussion_details.get("diff"), + endpoint=self.endpoint, + ) + + @validate_hf_hub_args + def create_discussion( + self, + repo_id: str, + title: str, + *, + token: Union[bool, str, None] = None, + description: Optional[str] = None, + repo_type: Optional[str] = None, + pull_request: bool = False, + ) -> DiscussionWithDetails: + """Creates a Discussion or Pull Request. + + Pull Requests created programmatically will be in `"draft"` status. + + Creating a Pull Request with changes can also be done at once with [`HfApi.create_commit`]. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + title (`str`): + The title of the discussion. It can be up to 200 characters long, + and must be at least 3 characters long. Leading and trailing whitespaces + will be stripped. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + description (`str`, *optional*): + An optional description for the Pull Request. + Defaults to `"Discussion opened with the huggingface_hub Python library"` + pull_request (`bool`, *optional*): + Whether to create a Pull Request or discussion. If `True`, creates a Pull Request. + If `False`, creates a discussion. Defaults to `False`. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + + Returns: [`DiscussionWithDetails`] + + + + Raises the following errors: + + - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + if the HuggingFace API returned an error + - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if some parameter value is invalid + - [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + + """ + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + + if description is not None: + description = description.strip() + description = ( + description + if description + else ( + f"{'Pull Request' if pull_request else 'Discussion'} opened with the" + " [huggingface_hub Python" + " library](https://huggingface.co/docs/huggingface_hub)" + ) + ) + + headers = self._build_hf_headers(token=token) + resp = get_session().post( + f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions", + json={ + "title": title.strip(), + "description": description, + "pullRequest": pull_request, + }, + headers=headers, + ) + hf_raise_for_status(resp) + num = resp.json()["num"] + return self.get_discussion_details( + repo_id=repo_id, + repo_type=repo_type, + discussion_num=num, + token=token, + ) + + @validate_hf_hub_args + def create_pull_request( + self, + repo_id: str, + title: str, + *, + token: Union[bool, str, None] = None, + description: Optional[str] = None, + repo_type: Optional[str] = None, + ) -> DiscussionWithDetails: + """Creates a Pull Request . Pull Requests created programmatically will be in `"draft"` status. + + Creating a Pull Request with changes can also be done at once with [`HfApi.create_commit`]; + + This is a wrapper around [`HfApi.create_discussion`]. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + title (`str`): + The title of the discussion. It can be up to 200 characters long, + and must be at least 3 characters long. Leading and trailing whitespaces + will be stripped. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + description (`str`, *optional*): + An optional description for the Pull Request. + Defaults to `"Discussion opened with the huggingface_hub Python library"` + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + + Returns: [`DiscussionWithDetails`] + + + + Raises the following errors: + + - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + if the HuggingFace API returned an error + - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if some parameter value is invalid + - [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + + """ + return self.create_discussion( + repo_id=repo_id, + title=title, + token=token, + description=description, + repo_type=repo_type, + pull_request=True, + ) + + def _post_discussion_changes( + self, + *, + repo_id: str, + discussion_num: int, + resource: str, + body: Optional[dict] = None, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + ) -> requests.Response: + """Internal utility to POST changes to a Discussion or Pull Request""" + if not isinstance(discussion_num, int) or discussion_num <= 0: + raise ValueError("Invalid discussion_num, must be a positive integer") + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + repo_id = f"{repo_type}s/{repo_id}" + + path = f"{self.endpoint}/api/{repo_id}/discussions/{discussion_num}/{resource}" + + headers = self._build_hf_headers(token=token) + resp = requests.post(path, headers=headers, json=body) + hf_raise_for_status(resp) + return resp + + @validate_hf_hub_args + def comment_discussion( + self, + repo_id: str, + discussion_num: int, + comment: str, + *, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + ) -> DiscussionComment: + """Creates a new comment on the given Discussion. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + discussion_num (`int`): + The number of the Discussion or Pull Request . Must be a strictly positive integer. + comment (`str`): + The content of the comment to create. Comments support markdown formatting. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`DiscussionComment`]: the newly created comment + + + Examples: + ```python + + >>> comment = \"\"\" + ... Hello @otheruser! + ... + ... # This is a title + ... + ... **This is bold**, *this is italic* and ~this is strikethrough~ + ... And [this](http://url) is a link + ... \"\"\" + + >>> HfApi().comment_discussion( + ... repo_id="username/repo_name", + ... discussion_num=34 + ... comment=comment + ... ) + # DiscussionComment(id='deadbeef0000000', type='comment', ...) + + ``` + + + + Raises the following errors: + + - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + if the HuggingFace API returned an error + - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if some parameter value is invalid + - [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + + + """ + resp = self._post_discussion_changes( + repo_id=repo_id, + repo_type=repo_type, + discussion_num=discussion_num, + token=token, + resource="comment", + body={"comment": comment}, + ) + return deserialize_event(resp.json()["newMessage"]) # type: ignore + + @validate_hf_hub_args + def rename_discussion( + self, + repo_id: str, + discussion_num: int, + new_title: str, + *, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + ) -> DiscussionTitleChange: + """Renames a Discussion. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + discussion_num (`int`): + The number of the Discussion or Pull Request . Must be a strictly positive integer. + new_title (`str`): + The new title for the discussion + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`DiscussionTitleChange`]: the title change event + + + Examples: + ```python + >>> new_title = "New title, fixing a typo" + >>> HfApi().rename_discussion( + ... repo_id="username/repo_name", + ... discussion_num=34 + ... new_title=new_title + ... ) + # DiscussionTitleChange(id='deadbeef0000000', type='title-change', ...) + + ``` + + + + Raises the following errors: + + - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + if the HuggingFace API returned an error + - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if some parameter value is invalid + - [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + + + """ + resp = self._post_discussion_changes( + repo_id=repo_id, + repo_type=repo_type, + discussion_num=discussion_num, + token=token, + resource="title", + body={"title": new_title}, + ) + return deserialize_event(resp.json()["newTitle"]) # type: ignore + + @validate_hf_hub_args + def change_discussion_status( + self, + repo_id: str, + discussion_num: int, + new_status: Literal["open", "closed"], + *, + token: Union[bool, str, None] = None, + comment: Optional[str] = None, + repo_type: Optional[str] = None, + ) -> DiscussionStatusChange: + """Closes or re-opens a Discussion or Pull Request. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + discussion_num (`int`): + The number of the Discussion or Pull Request . Must be a strictly positive integer. + new_status (`str`): + The new status for the discussion, either `"open"` or `"closed"`. + comment (`str`, *optional*): + An optional comment to post with the status change. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`DiscussionStatusChange`]: the status change event + + + Examples: + ```python + >>> new_title = "New title, fixing a typo" + >>> HfApi().rename_discussion( + ... repo_id="username/repo_name", + ... discussion_num=34 + ... new_title=new_title + ... ) + # DiscussionStatusChange(id='deadbeef0000000', type='status-change', ...) + + ``` + + + + Raises the following errors: + + - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + if the HuggingFace API returned an error + - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if some parameter value is invalid + - [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + + + """ + if new_status not in ["open", "closed"]: + raise ValueError("Invalid status, valid statuses are: 'open' and 'closed'") + body: Dict[str, str] = {"status": new_status} + if comment and comment.strip(): + body["comment"] = comment.strip() + resp = self._post_discussion_changes( + repo_id=repo_id, + repo_type=repo_type, + discussion_num=discussion_num, + token=token, + resource="status", + body=body, + ) + return deserialize_event(resp.json()["newStatus"]) # type: ignore + + @validate_hf_hub_args + def merge_pull_request( + self, + repo_id: str, + discussion_num: int, + *, + token: Union[bool, str, None] = None, + comment: Optional[str] = None, + repo_type: Optional[str] = None, + ): + """Merges a Pull Request. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + discussion_num (`int`): + The number of the Discussion or Pull Request . Must be a strictly positive integer. + comment (`str`, *optional*): + An optional comment to post with the status change. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`DiscussionStatusChange`]: the status change event + + + + Raises the following errors: + + - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + if the HuggingFace API returned an error + - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if some parameter value is invalid + - [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + + + """ + self._post_discussion_changes( + repo_id=repo_id, + repo_type=repo_type, + discussion_num=discussion_num, + token=token, + resource="merge", + body={"comment": comment.strip()} if comment and comment.strip() else None, + ) + + @validate_hf_hub_args + def edit_discussion_comment( + self, + repo_id: str, + discussion_num: int, + comment_id: str, + new_content: str, + *, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + ) -> DiscussionComment: + """Edits a comment on a Discussion / Pull Request. + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + discussion_num (`int`): + The number of the Discussion or Pull Request . Must be a strictly positive integer. + comment_id (`str`): + The ID of the comment to edit. + new_content (`str`): + The new content of the comment. Comments support markdown formatting. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`DiscussionComment`]: the edited comment + + + + Raises the following errors: + + - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + if the HuggingFace API returned an error + - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if some parameter value is invalid + - [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + + + """ + resp = self._post_discussion_changes( + repo_id=repo_id, + repo_type=repo_type, + discussion_num=discussion_num, + token=token, + resource=f"comment/{comment_id.lower()}/edit", + body={"content": new_content}, + ) + return deserialize_event(resp.json()["updatedComment"]) # type: ignore + + @validate_hf_hub_args + def hide_discussion_comment( + self, + repo_id: str, + discussion_num: int, + comment_id: str, + *, + token: Union[bool, str, None] = None, + repo_type: Optional[str] = None, + ) -> DiscussionComment: + """Hides a comment on a Discussion / Pull Request. + + + Hidden comments' content cannot be retrieved anymore. Hiding a comment is irreversible. + + + Args: + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + discussion_num (`int`): + The number of the Discussion or Pull Request . Must be a strictly positive integer. + comment_id (`str`): + The ID of the comment to edit. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if uploading to a dataset or + space, `None` or `"model"` if uploading to a model. Default is + `None`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`DiscussionComment`]: the hidden comment + + + + Raises the following errors: + + - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + if the HuggingFace API returned an error + - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if some parameter value is invalid + - [`~utils.RepositoryNotFoundError`] + If the repository to download from cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + + + """ + warnings.warn( + "Hidden comments' content cannot be retrieved anymore. Hiding a comment is irreversible.", + UserWarning, + ) + resp = self._post_discussion_changes( + repo_id=repo_id, + repo_type=repo_type, + discussion_num=discussion_num, + token=token, + resource=f"comment/{comment_id.lower()}/hide", + ) + return deserialize_event(resp.json()["updatedComment"]) # type: ignore + + @validate_hf_hub_args + def add_space_secret( + self, + repo_id: str, + key: str, + value: str, + *, + description: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> None: + """Adds or updates a secret in a Space. + + Secrets allow to set secret keys or tokens to a Space without hardcoding them. + For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets. + + Args: + repo_id (`str`): + ID of the repo to update. Example: `"bigcode/in-the-stack"`. + key (`str`): + Secret key. Example: `"GITHUB_API_KEY"` + value (`str`): + Secret value. Example: `"your_github_api_key"`. + description (`str`, *optional*): + Secret description. Example: `"Github API key to access the Github API"`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + """ + payload = {"key": key, "value": value} + if description is not None: + payload["description"] = description + r = get_session().post( + f"{self.endpoint}/api/spaces/{repo_id}/secrets", + headers=self._build_hf_headers(token=token), + json=payload, + ) + hf_raise_for_status(r) + + @validate_hf_hub_args + def delete_space_secret(self, repo_id: str, key: str, *, token: Union[bool, str, None] = None) -> None: + """Deletes a secret from a Space. + + Secrets allow to set secret keys or tokens to a Space without hardcoding them. + For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets. + + Args: + repo_id (`str`): + ID of the repo to update. Example: `"bigcode/in-the-stack"`. + key (`str`): + Secret key. Example: `"GITHUB_API_KEY"`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + """ + r = get_session().delete( + f"{self.endpoint}/api/spaces/{repo_id}/secrets", + headers=self._build_hf_headers(token=token), + json={"key": key}, + ) + hf_raise_for_status(r) + + @validate_hf_hub_args + def get_space_variables(self, repo_id: str, *, token: Union[bool, str, None] = None) -> Dict[str, SpaceVariable]: + """Gets all variables from a Space. + + Variables allow to set environment variables to a Space without hardcoding them. + For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables + + Args: + repo_id (`str`): + ID of the repo to query. Example: `"bigcode/in-the-stack"`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + """ + r = get_session().get( + f"{self.endpoint}/api/spaces/{repo_id}/variables", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(r) + return {k: SpaceVariable(k, v) for k, v in r.json().items()} + + @validate_hf_hub_args + def add_space_variable( + self, + repo_id: str, + key: str, + value: str, + *, + description: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> Dict[str, SpaceVariable]: + """Adds or updates a variable in a Space. + + Variables allow to set environment variables to a Space without hardcoding them. + For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables + + Args: + repo_id (`str`): + ID of the repo to update. Example: `"bigcode/in-the-stack"`. + key (`str`): + Variable key. Example: `"MODEL_REPO_ID"` + value (`str`): + Variable value. Example: `"the_model_repo_id"`. + description (`str`): + Description of the variable. Example: `"Model Repo ID of the implemented model"`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + """ + payload = {"key": key, "value": value} + if description is not None: + payload["description"] = description + r = get_session().post( + f"{self.endpoint}/api/spaces/{repo_id}/variables", + headers=self._build_hf_headers(token=token), + json=payload, + ) + hf_raise_for_status(r) + return {k: SpaceVariable(k, v) for k, v in r.json().items()} + + @validate_hf_hub_args + def delete_space_variable( + self, repo_id: str, key: str, *, token: Union[bool, str, None] = None + ) -> Dict[str, SpaceVariable]: + """Deletes a variable from a Space. + + Variables allow to set environment variables to a Space without hardcoding them. + For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables + + Args: + repo_id (`str`): + ID of the repo to update. Example: `"bigcode/in-the-stack"`. + key (`str`): + Variable key. Example: `"MODEL_REPO_ID"` + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + """ + r = get_session().delete( + f"{self.endpoint}/api/spaces/{repo_id}/variables", + headers=self._build_hf_headers(token=token), + json={"key": key}, + ) + hf_raise_for_status(r) + return {k: SpaceVariable(k, v) for k, v in r.json().items()} + + @validate_hf_hub_args + def get_space_runtime(self, repo_id: str, *, token: Union[bool, str, None] = None) -> SpaceRuntime: + """Gets runtime information about a Space. + + Args: + repo_id (`str`): + ID of the repo to update. Example: `"bigcode/in-the-stack"`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + Returns: + [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware. + """ + r = get_session().get( + f"{self.endpoint}/api/spaces/{repo_id}/runtime", headers=self._build_hf_headers(token=token) + ) + hf_raise_for_status(r) + return SpaceRuntime(r.json()) + + @validate_hf_hub_args + def request_space_hardware( + self, + repo_id: str, + hardware: SpaceHardware, + *, + token: Union[bool, str, None] = None, + sleep_time: Optional[int] = None, + ) -> SpaceRuntime: + """Request new hardware for a Space. + + Args: + repo_id (`str`): + ID of the repo to update. Example: `"bigcode/in-the-stack"`. + hardware (`str` or [`SpaceHardware`]): + Hardware on which to run the Space. Example: `"t4-medium"`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + sleep_time (`int`, *optional*): + Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want + your Space to sleep (default behavior for upgraded hardware). For free hardware, you can't configure + the sleep time (value is fixed to 48 hours of inactivity). + See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details. + Returns: + [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware. + + + + It is also possible to request hardware directly when creating the Space repo! See [`create_repo`] for details. + + + """ + if sleep_time is not None and hardware == SpaceHardware.CPU_BASIC: + warnings.warn( + "If your Space runs on the default 'cpu-basic' hardware, it will go to sleep if inactive for more" + " than 48 hours. This value is not configurable. If you don't want your Space to deactivate or if" + " you want to set a custom sleep time, you need to upgrade to a paid Hardware.", + UserWarning, + ) + payload: Dict[str, Any] = {"flavor": hardware} + if sleep_time is not None: + payload["sleepTimeSeconds"] = sleep_time + r = get_session().post( + f"{self.endpoint}/api/spaces/{repo_id}/hardware", + headers=self._build_hf_headers(token=token), + json=payload, + ) + hf_raise_for_status(r) + return SpaceRuntime(r.json()) + + @validate_hf_hub_args + def set_space_sleep_time( + self, repo_id: str, sleep_time: int, *, token: Union[bool, str, None] = None + ) -> SpaceRuntime: + """Set a custom sleep time for a Space running on upgraded hardware.. + + Your Space will go to sleep after X seconds of inactivity. You are not billed when your Space is in "sleep" + mode. If a new visitor lands on your Space, it will "wake it up". Only upgraded hardware can have a + configurable sleep time. To know more about the sleep stage, please refer to + https://huggingface.co/docs/hub/spaces-gpus#sleep-time. + + Args: + repo_id (`str`): + ID of the repo to update. Example: `"bigcode/in-the-stack"`. + sleep_time (`int`, *optional*): + Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want + your Space to pause (default behavior for upgraded hardware). For free hardware, you can't configure + the sleep time (value is fixed to 48 hours of inactivity). + See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + Returns: + [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware. + + + + It is also possible to set a custom sleep time when requesting hardware with [`request_space_hardware`]. + + + """ + r = get_session().post( + f"{self.endpoint}/api/spaces/{repo_id}/sleeptime", + headers=self._build_hf_headers(token=token), + json={"seconds": sleep_time}, + ) + hf_raise_for_status(r) + runtime = SpaceRuntime(r.json()) + + hardware = runtime.requested_hardware or runtime.hardware + if hardware == SpaceHardware.CPU_BASIC: + warnings.warn( + "If your Space runs on the default 'cpu-basic' hardware, it will go to sleep if inactive for more" + " than 48 hours. This value is not configurable. If you don't want your Space to deactivate or if" + " you want to set a custom sleep time, you need to upgrade to a paid Hardware.", + UserWarning, + ) + return runtime + + @validate_hf_hub_args + def pause_space(self, repo_id: str, *, token: Union[bool, str, None] = None) -> SpaceRuntime: + """Pause your Space. + + A paused Space stops executing until manually restarted by its owner. This is different from the sleeping + state in which free Spaces go after 48h of inactivity. Paused time is not billed to your account, no matter the + hardware you've selected. To restart your Space, use [`restart_space`] and go to your Space settings page. + + For more details, please visit [the docs](https://huggingface.co/docs/hub/spaces-gpus#pause). + + Args: + repo_id (`str`): + ID of the Space to pause. Example: `"Salesforce/BLIP2"`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`SpaceRuntime`]: Runtime information about your Space including `stage=PAUSED` and requested hardware. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If your Space is not found (error 404). Most probably wrong repo_id or your space is private but you + are not authenticated. + [`~utils.HfHubHTTPError`]: + 403 Forbidden: only the owner of a Space can pause it. If you want to manage a Space that you don't + own, either ask the owner by opening a Discussion or duplicate the Space. + [`~utils.BadRequestError`]: + If your Space is a static Space. Static Spaces are always running and never billed. If you want to hide + a static Space, you can set it to private. + """ + r = get_session().post( + f"{self.endpoint}/api/spaces/{repo_id}/pause", headers=self._build_hf_headers(token=token) + ) + hf_raise_for_status(r) + return SpaceRuntime(r.json()) + + @validate_hf_hub_args + def restart_space( + self, repo_id: str, *, token: Union[bool, str, None] = None, factory_reboot: bool = False + ) -> SpaceRuntime: + """Restart your Space. + + This is the only way to programmatically restart a Space if you've put it on Pause (see [`pause_space`]). You + must be the owner of the Space to restart it. If you are using an upgraded hardware, your account will be + billed as soon as the Space is restarted. You can trigger a restart no matter the current state of a Space. + + For more details, please visit [the docs](https://huggingface.co/docs/hub/spaces-gpus#pause). + + Args: + repo_id (`str`): + ID of the Space to restart. Example: `"Salesforce/BLIP2"`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + factory_reboot (`bool`, *optional*): + If `True`, the Space will be rebuilt from scratch without caching any requirements. + + Returns: + [`SpaceRuntime`]: Runtime information about your Space. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If your Space is not found (error 404). Most probably wrong repo_id or your space is private but you + are not authenticated. + [`~utils.HfHubHTTPError`]: + 403 Forbidden: only the owner of a Space can restart it. If you want to restart a Space that you don't + own, either ask the owner by opening a Discussion or duplicate the Space. + [`~utils.BadRequestError`]: + If your Space is a static Space. Static Spaces are always running and never billed. If you want to hide + a static Space, you can set it to private. + """ + params = {} + if factory_reboot: + params["factory"] = "true" + r = get_session().post( + f"{self.endpoint}/api/spaces/{repo_id}/restart", headers=self._build_hf_headers(token=token), params=params + ) + hf_raise_for_status(r) + return SpaceRuntime(r.json()) + + @validate_hf_hub_args + def duplicate_space( + self, + from_id: str, + to_id: Optional[str] = None, + *, + private: Optional[bool] = None, + token: Union[bool, str, None] = None, + exist_ok: bool = False, + hardware: Optional[SpaceHardware] = None, + storage: Optional[SpaceStorage] = None, + sleep_time: Optional[int] = None, + secrets: Optional[List[Dict[str, str]]] = None, + variables: Optional[List[Dict[str, str]]] = None, + ) -> RepoUrl: + """Duplicate a Space. + + Programmatically duplicate a Space. The new Space will be created in your account and will be in the same state + as the original Space (running or paused). You can duplicate a Space no matter the current state of a Space. + + Args: + from_id (`str`): + ID of the Space to duplicate. Example: `"pharma/CLIP-Interrogator"`. + to_id (`str`, *optional*): + ID of the new Space. Example: `"dog/CLIP-Interrogator"`. If not provided, the new Space will have the same + name as the original Space, but in your account. + private (`bool`, *optional*): + Whether the new Space should be private or not. Defaults to the same privacy as the original Space. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + exist_ok (`bool`, *optional*, defaults to `False`): + If `True`, do not raise an error if repo already exists. + hardware (`SpaceHardware` or `str`, *optional*): + Choice of Hardware. Example: `"t4-medium"`. See [`SpaceHardware`] for a complete list. + storage (`SpaceStorage` or `str`, *optional*): + Choice of persistent storage tier. Example: `"small"`. See [`SpaceStorage`] for a complete list. + sleep_time (`int`, *optional*): + Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want + your Space to sleep (default behavior for upgraded hardware). For free hardware, you can't configure + the sleep time (value is fixed to 48 hours of inactivity). + See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details. + secrets (`List[Dict[str, str]]`, *optional*): + A list of secret keys to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional. + For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets. + variables (`List[Dict[str, str]]`, *optional*): + A list of public environment variables to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional. + For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables. + + Returns: + [`RepoUrl`]: URL to the newly created repo. Value is a subclass of `str` containing + attributes like `endpoint`, `repo_type` and `repo_id`. + + Raises: + [`~utils.RepositoryNotFoundError`]: + If one of `from_id` or `to_id` cannot be found. This may be because it doesn't exist, + or because it is set to `private` and you do not have access. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + If the HuggingFace API returned an error + + Example: + ```python + >>> from huggingface_hub import duplicate_space + + # Duplicate a Space to your account + >>> duplicate_space("multimodalart/dreambooth-training") + RepoUrl('https://huggingface.co/spaces/nateraw/dreambooth-training',...) + + # Can set custom destination id and visibility flag. + >>> duplicate_space("multimodalart/dreambooth-training", to_id="my-dreambooth", private=True) + RepoUrl('https://huggingface.co/spaces/nateraw/my-dreambooth',...) + ``` + """ + # Parse to_id if provided + parsed_to_id = RepoUrl(to_id) if to_id is not None else None + + # Infer target repo_id + to_namespace = ( # set namespace manually or default to username + parsed_to_id.namespace + if parsed_to_id is not None and parsed_to_id.namespace is not None + else self.whoami(token)["name"] + ) + to_repo_name = parsed_to_id.repo_name if to_id is not None else RepoUrl(from_id).repo_name # type: ignore + + # repository must be a valid repo_id (namespace/repo_name). + payload: Dict[str, Any] = {"repository": f"{to_namespace}/{to_repo_name}"} + + keys = ["private", "hardware", "storageTier", "sleepTimeSeconds", "secrets", "variables"] + values = [private, hardware, storage, sleep_time, secrets, variables] + payload.update({k: v for k, v in zip(keys, values) if v is not None}) + + if sleep_time is not None and hardware == SpaceHardware.CPU_BASIC: + warnings.warn( + "If your Space runs on the default 'cpu-basic' hardware, it will go to sleep if inactive for more" + " than 48 hours. This value is not configurable. If you don't want your Space to deactivate or if" + " you want to set a custom sleep time, you need to upgrade to a paid Hardware.", + UserWarning, + ) + + r = get_session().post( + f"{self.endpoint}/api/spaces/{from_id}/duplicate", + headers=self._build_hf_headers(token=token), + json=payload, + ) + + try: + hf_raise_for_status(r) + except HTTPError as err: + if exist_ok and err.response.status_code == 409: + # Repo already exists and `exist_ok=True` + pass + else: + raise + + return RepoUrl(r.json()["url"], endpoint=self.endpoint) + + @validate_hf_hub_args + def request_space_storage( + self, + repo_id: str, + storage: SpaceStorage, + *, + token: Union[bool, str, None] = None, + ) -> SpaceRuntime: + """Request persistent storage for a Space. + + Args: + repo_id (`str`): + ID of the Space to update. Example: `"open-llm-leaderboard/open_llm_leaderboard"`. + storage (`str` or [`SpaceStorage`]): + Storage tier. Either 'small', 'medium', or 'large'. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + Returns: + [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware. + + + + It is not possible to decrease persistent storage after its granted. To do so, you must delete it + via [`delete_space_storage`]. + + + """ + payload: Dict[str, SpaceStorage] = {"tier": storage} + r = get_session().post( + f"{self.endpoint}/api/spaces/{repo_id}/storage", + headers=self._build_hf_headers(token=token), + json=payload, + ) + hf_raise_for_status(r) + return SpaceRuntime(r.json()) + + @validate_hf_hub_args + def delete_space_storage( + self, + repo_id: str, + *, + token: Union[bool, str, None] = None, + ) -> SpaceRuntime: + """Delete persistent storage for a Space. + + Args: + repo_id (`str`): + ID of the Space to update. Example: `"open-llm-leaderboard/open_llm_leaderboard"`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + Returns: + [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware. + Raises: + [`BadRequestError`] + If space has no persistent storage. + + """ + r = get_session().delete( + f"{self.endpoint}/api/spaces/{repo_id}/storage", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(r) + return SpaceRuntime(r.json()) + + ####################### + # Inference Endpoints # + ####################### + + def list_inference_endpoints( + self, namespace: Optional[str] = None, *, token: Union[bool, str, None] = None + ) -> List[InferenceEndpoint]: + """Lists all inference endpoints for the given namespace. + + Args: + namespace (`str`, *optional*): + The namespace to list endpoints for. Defaults to the current user. Set to `"*"` to list all endpoints + from all namespaces (i.e. personal namespace and all orgs the user belongs to). + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + List[`InferenceEndpoint`]: A list of all inference endpoints for the given namespace. + + Example: + ```python + >>> from huggingface_hub import HfApi + >>> api = HfApi() + >>> api.list_inference_endpoints() + [InferenceEndpoint(name='my-endpoint', ...), ...] + ``` + """ + # Special case: list all endpoints for all namespaces the user has access to + if namespace == "*": + user = self.whoami(token=token) + + # List personal endpoints first + endpoints: List[InferenceEndpoint] = list_inference_endpoints(namespace=self._get_namespace(token=token)) + + # Then list endpoints for all orgs the user belongs to and ignore 401 errors (no billing or no access) + for org in user.get("orgs", []): + try: + endpoints += list_inference_endpoints(namespace=org["name"], token=token) + except HfHubHTTPError as error: + if error.response.status_code == 401: # Either no billing or user don't have access) + logger.debug("Cannot list Inference Endpoints for org '%s': %s", org["name"], error) + pass + + return endpoints + + # Normal case: list endpoints for a specific namespace + namespace = namespace or self._get_namespace(token=token) + + response = get_session().get( + f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + + return [ + InferenceEndpoint.from_raw(endpoint, namespace=namespace, token=token) + for endpoint in response.json()["items"] + ] + + def create_inference_endpoint( + self, + name: str, + *, + repository: str, + framework: str, + accelerator: str, + instance_size: str, + instance_type: str, + region: str, + vendor: str, + account_id: Optional[str] = None, + min_replica: int = 0, + max_replica: int = 1, + scale_to_zero_timeout: int = 15, + revision: Optional[str] = None, + task: Optional[str] = None, + custom_image: Optional[Dict] = None, + secrets: Optional[Dict[str, str]] = None, + type: InferenceEndpointType = InferenceEndpointType.PROTECTED, + namespace: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> InferenceEndpoint: + """Create a new Inference Endpoint. + + Args: + name (`str`): + The unique name for the new Inference Endpoint. + repository (`str`): + The name of the model repository associated with the Inference Endpoint (e.g. `"gpt2"`). + framework (`str`): + The machine learning framework used for the model (e.g. `"custom"`). + accelerator (`str`): + The hardware accelerator to be used for inference (e.g. `"cpu"`). + instance_size (`str`): + The size or type of the instance to be used for hosting the model (e.g. `"x4"`). + instance_type (`str`): + The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`). + region (`str`): + The cloud region in which the Inference Endpoint will be created (e.g. `"us-east-1"`). + vendor (`str`): + The cloud provider or vendor where the Inference Endpoint will be hosted (e.g. `"aws"`). + account_id (`str`, *optional*): + The account ID used to link a VPC to a private Inference Endpoint (if applicable). + min_replica (`int`, *optional*): + The minimum number of replicas (instances) to keep running for the Inference Endpoint. Defaults to 0. + max_replica (`int`, *optional*): + The maximum number of replicas (instances) to scale to for the Inference Endpoint. Defaults to 1. + scale_to_zero_timeout (`int`, *optional*): + The duration in minutes before an inactive endpoint is scaled to zero. Defaults to 15. + revision (`str`, *optional*): + The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`). + task (`str`, *optional*): + The task on which to deploy the model (e.g. `"text-classification"`). + custom_image (`Dict`, *optional*): + A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an + Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples). + secrets (`Dict[str, str]`, *optional*): + Secret values to inject in the container environment. + type ([`InferenceEndpointType]`, *optional*): + The type of the Inference Endpoint, which can be `"protected"` (default), `"public"` or `"private"`. + namespace (`str`, *optional*): + The namespace where the Inference Endpoint will be created. Defaults to the current user's namespace. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`InferenceEndpoint`]: information about the updated Inference Endpoint. + + Example: + ```python + >>> from huggingface_hub import HfApi + >>> api = HfApi() + >>> endpoint = api.create_inference_endpoint( + ... "my-endpoint-name", + ... repository="gpt2", + ... framework="pytorch", + ... task="text-generation", + ... accelerator="cpu", + ... vendor="aws", + ... region="us-east-1", + ... type="protected", + ... instance_size="x2", + ... instance_type="intel-icl", + ... ) + >>> endpoint + InferenceEndpoint(name='my-endpoint-name', status="pending",...) + + # Run inference on the endpoint + >>> endpoint.client.text_generation(...) + "..." + ``` + + ```python + # Start an Inference Endpoint running Zephyr-7b-beta on TGI + >>> from huggingface_hub import HfApi + >>> api = HfApi() + >>> endpoint = api.create_inference_endpoint( + ... "aws-zephyr-7b-beta-0486", + ... repository="HuggingFaceH4/zephyr-7b-beta", + ... framework="pytorch", + ... task="text-generation", + ... accelerator="gpu", + ... vendor="aws", + ... region="us-east-1", + ... type="protected", + ... instance_size="x1", + ... instance_type="nvidia-a10g", + ... custom_image={ + ... "health_route": "/health", + ... "env": { + ... "MAX_BATCH_PREFILL_TOKENS": "2048", + ... "MAX_INPUT_LENGTH": "1024", + ... "MAX_TOTAL_TOKENS": "1512", + ... "MODEL_ID": "/repository" + ... }, + ... "url": "ghcr.io/huggingface/text-generation-inference:1.1.0", + ... }, + ... secrets={"MY_SECRET_KEY": "secret_value"}, + ... ) + + ``` + """ + namespace = namespace or self._get_namespace(token=token) + + image = {"custom": custom_image} if custom_image is not None else {"huggingface": {}} + payload: Dict = { + "accountId": account_id, + "compute": { + "accelerator": accelerator, + "instanceSize": instance_size, + "instanceType": instance_type, + "scaling": { + "maxReplica": max_replica, + "minReplica": min_replica, + "scaleToZeroTimeout": scale_to_zero_timeout, + }, + }, + "model": { + "framework": framework, + "repository": repository, + "revision": revision, + "task": task, + "image": image, + }, + "name": name, + "provider": { + "region": region, + "vendor": vendor, + }, + "type": type, + } + if secrets: + payload["model"]["secrets"] = secrets + response = get_session().post( + f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}", + headers=self._build_hf_headers(token=token), + json=payload, + ) + hf_raise_for_status(response) + + return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) + + def get_inference_endpoint( + self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None + ) -> InferenceEndpoint: + """Get information about an Inference Endpoint. + + Args: + name (`str`): + The name of the Inference Endpoint to retrieve information about. + namespace (`str`, *optional*): + The namespace in which the Inference Endpoint is located. Defaults to the current user. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`InferenceEndpoint`]: information about the requested Inference Endpoint. + + Example: + ```python + >>> from huggingface_hub import HfApi + >>> api = HfApi() + >>> endpoint = api.get_inference_endpoint("my-text-to-image") + >>> endpoint + InferenceEndpoint(name='my-text-to-image', ...) + + # Get status + >>> endpoint.status + 'running' + >>> endpoint.url + 'https://my-text-to-image.region.vendor.endpoints.huggingface.cloud' + + # Run inference + >>> endpoint.client.text_to_image(...) + ``` + """ + namespace = namespace or self._get_namespace(token=token) + + response = get_session().get( + f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + + return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) + + def update_inference_endpoint( + self, + name: str, + *, + # Compute update + accelerator: Optional[str] = None, + instance_size: Optional[str] = None, + instance_type: Optional[str] = None, + min_replica: Optional[int] = None, + max_replica: Optional[int] = None, + scale_to_zero_timeout: Optional[int] = None, + # Model update + repository: Optional[str] = None, + framework: Optional[str] = None, + revision: Optional[str] = None, + task: Optional[str] = None, + custom_image: Optional[Dict] = None, + secrets: Optional[Dict[str, str]] = None, + # Other + namespace: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> InferenceEndpoint: + """Update an Inference Endpoint. + + This method allows the update of either the compute configuration, the deployed model, or both. All arguments are + optional but at least one must be provided. + + For convenience, you can also update an Inference Endpoint using [`InferenceEndpoint.update`]. + + Args: + name (`str`): + The name of the Inference Endpoint to update. + + accelerator (`str`, *optional*): + The hardware accelerator to be used for inference (e.g. `"cpu"`). + instance_size (`str`, *optional*): + The size or type of the instance to be used for hosting the model (e.g. `"x4"`). + instance_type (`str`, *optional*): + The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`). + min_replica (`int`, *optional*): + The minimum number of replicas (instances) to keep running for the Inference Endpoint. + max_replica (`int`, *optional*): + The maximum number of replicas (instances) to scale to for the Inference Endpoint. + scale_to_zero_timeout (`int`, *optional*): + The duration in minutes before an inactive endpoint is scaled to zero. + + repository (`str`, *optional*): + The name of the model repository associated with the Inference Endpoint (e.g. `"gpt2"`). + framework (`str`, *optional*): + The machine learning framework used for the model (e.g. `"custom"`). + revision (`str`, *optional*): + The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`). + task (`str`, *optional*): + The task on which to deploy the model (e.g. `"text-classification"`). + custom_image (`Dict`, *optional*): + A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an + Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples). + secrets (`Dict[str, str]`, *optional*): + Secret values to inject in the container environment. + namespace (`str`, *optional*): + The namespace where the Inference Endpoint will be updated. Defaults to the current user's namespace. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`InferenceEndpoint`]: information about the updated Inference Endpoint. + """ + namespace = namespace or self._get_namespace(token=token) + + # Populate only the fields that are not None + payload: Dict = defaultdict(lambda: defaultdict(dict)) + if accelerator is not None: + payload["compute"]["accelerator"] = accelerator + if instance_size is not None: + payload["compute"]["instanceSize"] = instance_size + if instance_type is not None: + payload["compute"]["instanceType"] = instance_type + if max_replica is not None: + payload["compute"]["scaling"]["maxReplica"] = max_replica + if min_replica is not None: + payload["compute"]["scaling"]["minReplica"] = min_replica + if scale_to_zero_timeout is not None: + payload["compute"]["scaling"]["scaleToZeroTimeout"] = scale_to_zero_timeout + if repository is not None: + payload["model"]["repository"] = repository + if framework is not None: + payload["model"]["framework"] = framework + if revision is not None: + payload["model"]["revision"] = revision + if task is not None: + payload["model"]["task"] = task + if custom_image is not None: + payload["model"]["image"] = {"custom": custom_image} + if secrets is not None: + payload["model"]["secrets"] = secrets + + response = get_session().put( + f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}", + headers=self._build_hf_headers(token=token), + json=payload, + ) + hf_raise_for_status(response) + + return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) + + def delete_inference_endpoint( + self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None + ) -> None: + """Delete an Inference Endpoint. + + This operation is not reversible. If you don't want to be charged for an Inference Endpoint, it is preferable + to pause it with [`pause_inference_endpoint`] or scale it to zero with [`scale_to_zero_inference_endpoint`]. + + For convenience, you can also delete an Inference Endpoint using [`InferenceEndpoint.delete`]. + + Args: + name (`str`): + The name of the Inference Endpoint to delete. + namespace (`str`, *optional*): + The namespace in which the Inference Endpoint is located. Defaults to the current user. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + """ + namespace = namespace or self._get_namespace(token=token) + response = get_session().delete( + f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + + def pause_inference_endpoint( + self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None + ) -> InferenceEndpoint: + """Pause an Inference Endpoint. + + A paused Inference Endpoint will not be charged. It can be resumed at any time using [`resume_inference_endpoint`]. + This is different than scaling the Inference Endpoint to zero with [`scale_to_zero_inference_endpoint`], which + would be automatically restarted when a request is made to it. + + For convenience, you can also pause an Inference Endpoint using [`pause_inference_endpoint`]. + + Args: + name (`str`): + The name of the Inference Endpoint to pause. + namespace (`str`, *optional*): + The namespace in which the Inference Endpoint is located. Defaults to the current user. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`InferenceEndpoint`]: information about the paused Inference Endpoint. + """ + namespace = namespace or self._get_namespace(token=token) + + response = get_session().post( + f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}/pause", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + + return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) + + def resume_inference_endpoint( + self, + name: str, + *, + namespace: Optional[str] = None, + running_ok: bool = True, + token: Union[bool, str, None] = None, + ) -> InferenceEndpoint: + """Resume an Inference Endpoint. + + For convenience, you can also resume an Inference Endpoint using [`InferenceEndpoint.resume`]. + + Args: + name (`str`): + The name of the Inference Endpoint to resume. + namespace (`str`, *optional*): + The namespace in which the Inference Endpoint is located. Defaults to the current user. + running_ok (`bool`, *optional*): + If `True`, the method will not raise an error if the Inference Endpoint is already running. Defaults to + `True`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`InferenceEndpoint`]: information about the resumed Inference Endpoint. + """ + namespace = namespace or self._get_namespace(token=token) + + response = get_session().post( + f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}/resume", + headers=self._build_hf_headers(token=token), + ) + try: + hf_raise_for_status(response) + except HfHubHTTPError as error: + # If already running (and it's ok), then fetch current status and return + if running_ok and error.response.status_code == 400 and "already running" in error.response.text: + return self.get_inference_endpoint(name, namespace=namespace, token=token) + # Otherwise, raise the error + raise + + return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) + + def scale_to_zero_inference_endpoint( + self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None + ) -> InferenceEndpoint: + """Scale Inference Endpoint to zero. + + An Inference Endpoint scaled to zero will not be charged. It will be resume on the next request to it, with a + cold start delay. This is different than pausing the Inference Endpoint with [`pause_inference_endpoint`], which + would require a manual resume with [`resume_inference_endpoint`]. + + For convenience, you can also scale an Inference Endpoint to zero using [`InferenceEndpoint.scale_to_zero`]. + + Args: + name (`str`): + The name of the Inference Endpoint to scale to zero. + namespace (`str`, *optional*): + The namespace in which the Inference Endpoint is located. Defaults to the current user. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`InferenceEndpoint`]: information about the scaled-to-zero Inference Endpoint. + """ + namespace = namespace or self._get_namespace(token=token) + + response = get_session().post( + f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}/scale-to-zero", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + + return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) + + def _get_namespace(self, token: Union[bool, str, None] = None) -> str: + """Get the default namespace for the current user.""" + me = self.whoami(token=token) + if me["type"] == "user": + return me["name"] + else: + raise ValueError( + "Cannot determine default namespace. You must provide a 'namespace' as input or be logged in as a" + " user." + ) + + ######################## + # Collection Endpoints # + ######################## + @validate_hf_hub_args + def list_collections( + self, + *, + owner: Union[List[str], str, None] = None, + item: Union[List[str], str, None] = None, + sort: Optional[Literal["lastModified", "trending", "upvotes"]] = None, + limit: Optional[int] = None, + token: Union[bool, str, None] = None, + ) -> Iterable[Collection]: + """List collections on the Huggingface Hub, given some filters. + + + + When listing collections, the item list per collection is truncated to 4 items maximum. To retrieve all items + from a collection, you must use [`get_collection`]. + + + + Args: + owner (`List[str]` or `str`, *optional*): + Filter by owner's username. + item (`List[str]` or `str`, *optional*): + Filter collections containing a particular items. Example: `"models/teknium/OpenHermes-2.5-Mistral-7B"`, `"datasets/squad"` or `"papers/2311.12983"`. + sort (`Literal["lastModified", "trending", "upvotes"]`, *optional*): + Sort collections by last modified, trending or upvotes. + limit (`int`, *optional*): + Maximum number of collections to be returned. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Iterable[Collection]`: an iterable of [`Collection`] objects. + """ + # Construct the API endpoint + path = f"{self.endpoint}/api/collections" + headers = self._build_hf_headers(token=token) + params: Dict = {} + if owner is not None: + params.update({"owner": owner}) + if item is not None: + params.update({"item": item}) + if sort is not None: + params.update({"sort": sort}) + if limit is not None: + params.update({"limit": limit}) + + # Paginate over the results until limit is reached + items = paginate(path, headers=headers, params=params) + if limit is not None: + items = islice(items, limit) # Do not iterate over all pages + + # Parse as Collection and return + for position, collection_data in enumerate(items): + yield Collection(position=position, **collection_data) + + def get_collection(self, collection_slug: str, *, token: Union[bool, str, None] = None) -> Collection: + """Gets information about a Collection on the Hub. + + Args: + collection_slug (`str`): + Slug of the collection of the Hub. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: [`Collection`] + + Example: + + ```py + >>> from huggingface_hub import get_collection + >>> collection = get_collection("TheBloke/recent-models-64f9a55bb3115b4f513ec026") + >>> collection.title + 'Recent models' + >>> len(collection.items) + 37 + >>> collection.items[0] + CollectionItem( + item_object_id='651446103cd773a050bf64c2', + item_id='TheBloke/U-Amethyst-20B-AWQ', + item_type='model', + position=88, + note=None + ) + ``` + """ + r = get_session().get( + f"{self.endpoint}/api/collections/{collection_slug}", headers=self._build_hf_headers(token=token) + ) + hf_raise_for_status(r) + return Collection(**{**r.json(), "endpoint": self.endpoint}) + + def create_collection( + self, + title: str, + *, + namespace: Optional[str] = None, + description: Optional[str] = None, + private: bool = False, + exists_ok: bool = False, + token: Union[bool, str, None] = None, + ) -> Collection: + """Create a new Collection on the Hub. + + Args: + title (`str`): + Title of the collection to create. Example: `"Recent models"`. + namespace (`str`, *optional*): + Namespace of the collection to create (username or org). Will default to the owner name. + description (`str`, *optional*): + Description of the collection to create. + private (`bool`, *optional*): + Whether the collection should be private or not. Defaults to `False` (i.e. public collection). + exists_ok (`bool`, *optional*): + If `True`, do not raise an error if collection already exists. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: [`Collection`] + + Example: + + ```py + >>> from huggingface_hub import create_collection + >>> collection = create_collection( + ... title="ICCV 2023", + ... description="Portfolio of models, papers and demos I presented at ICCV 2023", + ... ) + >>> collection.slug + "username/iccv-2023-64f9a55bb3115b4f513ec026" + ``` + """ + if namespace is None: + namespace = self.whoami(token)["name"] + + payload = { + "title": title, + "namespace": namespace, + "private": private, + } + if description is not None: + payload["description"] = description + + r = get_session().post( + f"{self.endpoint}/api/collections", headers=self._build_hf_headers(token=token), json=payload + ) + try: + hf_raise_for_status(r) + except HTTPError as err: + if exists_ok and err.response.status_code == 409: + # Collection already exists and `exists_ok=True` + slug = r.json()["slug"] + return self.get_collection(slug, token=token) + else: + raise + return Collection(**{**r.json(), "endpoint": self.endpoint}) + + def update_collection_metadata( + self, + collection_slug: str, + *, + title: Optional[str] = None, + description: Optional[str] = None, + position: Optional[int] = None, + private: Optional[bool] = None, + theme: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> Collection: + """Update metadata of a collection on the Hub. + + All arguments are optional. Only provided metadata will be updated. + + Args: + collection_slug (`str`): + Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. + title (`str`): + Title of the collection to update. + description (`str`, *optional*): + Description of the collection to update. + position (`int`, *optional*): + New position of the collection in the list of collections of the user. + private (`bool`, *optional*): + Whether the collection should be private or not. + theme (`str`, *optional*): + Theme of the collection on the Hub. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: [`Collection`] + + Example: + + ```py + >>> from huggingface_hub import update_collection_metadata + >>> collection = update_collection_metadata( + ... collection_slug="username/iccv-2023-64f9a55bb3115b4f513ec026", + ... title="ICCV Oct. 2023" + ... description="Portfolio of models, datasets, papers and demos I presented at ICCV Oct. 2023", + ... private=False, + ... theme="pink", + ... ) + >>> collection.slug + "username/iccv-oct-2023-64f9a55bb3115b4f513ec026" + # ^collection slug got updated but not the trailing ID + ``` + """ + payload = { + "position": position, + "private": private, + "theme": theme, + "title": title, + "description": description, + } + r = get_session().patch( + f"{self.endpoint}/api/collections/{collection_slug}", + headers=self._build_hf_headers(token=token), + # Only send not-none values to the API + json={key: value for key, value in payload.items() if value is not None}, + ) + hf_raise_for_status(r) + return Collection(**{**r.json()["data"], "endpoint": self.endpoint}) + + def delete_collection( + self, collection_slug: str, *, missing_ok: bool = False, token: Union[bool, str, None] = None + ) -> None: + """Delete a collection on the Hub. + + Args: + collection_slug (`str`): + Slug of the collection to delete. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. + missing_ok (`bool`, *optional*): + If `True`, do not raise an error if collection doesn't exists. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Example: + + ```py + >>> from huggingface_hub import delete_collection + >>> collection = delete_collection("username/useless-collection-64f9a55bb3115b4f513ec026", missing_ok=True) + ``` + + + + This is a non-revertible action. A deleted collection cannot be restored. + + + """ + r = get_session().delete( + f"{self.endpoint}/api/collections/{collection_slug}", headers=self._build_hf_headers(token=token) + ) + try: + hf_raise_for_status(r) + except HTTPError as err: + if missing_ok and err.response.status_code == 404: + # Collection doesn't exists and `missing_ok=True` + return + else: + raise + + def add_collection_item( + self, + collection_slug: str, + item_id: str, + item_type: CollectionItemType_T, + *, + note: Optional[str] = None, + exists_ok: bool = False, + token: Union[bool, str, None] = None, + ) -> Collection: + """Add an item to a collection on the Hub. + + Args: + collection_slug (`str`): + Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. + item_id (`str`): + ID of the item to add to the collection. It can be the ID of a repo on the Hub (e.g. `"facebook/bart-large-mnli"`) + or a paper id (e.g. `"2307.09288"`). + item_type (`str`): + Type of the item to add. Can be one of `"model"`, `"dataset"`, `"space"` or `"paper"`. + note (`str`, *optional*): + A note to attach to the item in the collection. The maximum size for a note is 500 characters. + exists_ok (`bool`, *optional*): + If `True`, do not raise an error if item already exists. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: [`Collection`] + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` + or `admin` role in the organization the repo belongs to or if you passed a `read` token. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the item you try to add to the collection does not exist on the Hub. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 409 if the item you try to add to the collection is already in the collection (and exists_ok=False) + + Example: + + ```py + >>> from huggingface_hub import add_collection_item + >>> collection = add_collection_item( + ... collection_slug="davanstrien/climate-64f99dc2a5067f6b65531bab", + ... item_id="pierre-loic/climate-news-articles", + ... item_type="dataset" + ... ) + >>> collection.items[-1].item_id + "pierre-loic/climate-news-articles" + # ^item got added to the collection on last position + + # Add item with a note + >>> add_collection_item( + ... collection_slug="davanstrien/climate-64f99dc2a5067f6b65531bab", + ... item_id="datasets/climate_fever", + ... item_type="dataset" + ... note="This dataset adopts the FEVER methodology that consists of 1,535 real-world claims regarding climate-change collected on the internet." + ... ) + (...) + ``` + """ + payload: Dict[str, Any] = {"item": {"id": item_id, "type": item_type}} + if note is not None: + payload["note"] = note + r = get_session().post( + f"{self.endpoint}/api/collections/{collection_slug}/items", + headers=self._build_hf_headers(token=token), + json=payload, + ) + try: + hf_raise_for_status(r) + except HTTPError as err: + if exists_ok and err.response.status_code == 409: + # Item already exists and `exists_ok=True` + return self.get_collection(collection_slug, token=token) + else: + raise + return Collection(**{**r.json(), "endpoint": self.endpoint}) + + def update_collection_item( + self, + collection_slug: str, + item_object_id: str, + *, + note: Optional[str] = None, + position: Optional[int] = None, + token: Union[bool, str, None] = None, + ) -> None: + """Update an item in a collection. + + Args: + collection_slug (`str`): + Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. + item_object_id (`str`): + ID of the item in the collection. This is not the id of the item on the Hub (repo_id or paper id). + It must be retrieved from a [`CollectionItem`] object. Example: `collection.items[0].item_object_id`. + note (`str`, *optional*): + A note to attach to the item in the collection. The maximum size for a note is 500 characters. + position (`int`, *optional*): + New position of the item in the collection. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Example: + + ```py + >>> from huggingface_hub import get_collection, update_collection_item + + # Get collection first + >>> collection = get_collection("TheBloke/recent-models-64f9a55bb3115b4f513ec026") + + # Update item based on its ID (add note + update position) + >>> update_collection_item( + ... collection_slug="TheBloke/recent-models-64f9a55bb3115b4f513ec026", + ... item_object_id=collection.items[-1].item_object_id, + ... note="Newly updated model!" + ... position=0, + ... ) + ``` + """ + payload = {"position": position, "note": note} + r = get_session().patch( + f"{self.endpoint}/api/collections/{collection_slug}/items/{item_object_id}", + headers=self._build_hf_headers(token=token), + # Only send not-none values to the API + json={key: value for key, value in payload.items() if value is not None}, + ) + hf_raise_for_status(r) + + def delete_collection_item( + self, + collection_slug: str, + item_object_id: str, + *, + missing_ok: bool = False, + token: Union[bool, str, None] = None, + ) -> None: + """Delete an item from a collection. + + Args: + collection_slug (`str`): + Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. + item_object_id (`str`): + ID of the item in the collection. This is not the id of the item on the Hub (repo_id or paper id). + It must be retrieved from a [`CollectionItem`] object. Example: `collection.items[0].item_object_id`. + missing_ok (`bool`, *optional*): + If `True`, do not raise an error if item doesn't exists. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Example: + + ```py + >>> from huggingface_hub import get_collection, delete_collection_item + + # Get collection first + >>> collection = get_collection("TheBloke/recent-models-64f9a55bb3115b4f513ec026") + + # Delete item based on its ID + >>> delete_collection_item( + ... collection_slug="TheBloke/recent-models-64f9a55bb3115b4f513ec026", + ... item_object_id=collection.items[-1].item_object_id, + ... ) + ``` + """ + r = get_session().delete( + f"{self.endpoint}/api/collections/{collection_slug}/items/{item_object_id}", + headers=self._build_hf_headers(token=token), + ) + try: + hf_raise_for_status(r) + except HTTPError as err: + if missing_ok and err.response.status_code == 404: + # Item already deleted and `missing_ok=True` + return + else: + raise + + ########################## + # Manage access requests # + ########################## + + @validate_hf_hub_args + def list_pending_access_requests( + self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None + ) -> List[AccessRequest]: + """ + Get pending access requests for a given gated repo. + + A pending request means the user has requested access to the repo but the request has not been processed yet. + If the approval mode is automatic, this list should be empty. Pending requests can be accepted or rejected + using [`accept_access_request`] and [`reject_access_request`]. + + For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. + + Args: + repo_id (`str`): + The id of the repo to get access requests for. + repo_type (`str`, *optional*): + The type of the repo to get access requests for. Must be one of `model`, `dataset` or `space`. + Defaults to `model`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `List[AccessRequest]`: A list of [`AccessRequest`] objects. Each time contains a `username`, `email`, + `status` and `timestamp` attribute. If the gated repo has a custom form, the `fields` attribute will + be populated with user's answers. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 400 if the repo is not gated. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` + or `admin` role in the organization the repo belongs to or if you passed a `read` token. + + Example: + ```py + >>> from huggingface_hub import list_pending_access_requests, accept_access_request + + # List pending requests + >>> requests = list_pending_access_requests("meta-llama/Llama-2-7b") + >>> len(requests) + 411 + >>> requests[0] + [ + AccessRequest( + username='clem', + fullname='Clem 🤗', + email='***', + timestamp=datetime.datetime(2023, 11, 23, 18, 4, 53, 828000, tzinfo=datetime.timezone.utc), + status='pending', + fields=None, + ), + ... + ] + + # Accept Clem's request + >>> accept_access_request("meta-llama/Llama-2-7b", "clem") + ``` + """ + return self._list_access_requests(repo_id, "pending", repo_type=repo_type, token=token) + + @validate_hf_hub_args + def list_accepted_access_requests( + self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None + ) -> List[AccessRequest]: + """ + Get accepted access requests for a given gated repo. + + An accepted request means the user has requested access to the repo and the request has been accepted. The user + can download any file of the repo. If the approval mode is automatic, this list should contains by default all + requests. Accepted requests can be cancelled or rejected at any time using [`cancel_access_request`] and + [`reject_access_request`]. A cancelled request will go back to the pending list while a rejected request will + go to the rejected list. In both cases, the user will lose access to the repo. + + For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. + + Args: + repo_id (`str`): + The id of the repo to get access requests for. + repo_type (`str`, *optional*): + The type of the repo to get access requests for. Must be one of `model`, `dataset` or `space`. + Defaults to `model`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `List[AccessRequest]`: A list of [`AccessRequest`] objects. Each time contains a `username`, `email`, + `status` and `timestamp` attribute. If the gated repo has a custom form, the `fields` attribute will + be populated with user's answers. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 400 if the repo is not gated. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` + or `admin` role in the organization the repo belongs to or if you passed a `read` token. + + Example: + ```py + >>> from huggingface_hub import list_accepted_access_requests + + >>> requests = list_accepted_access_requests("meta-llama/Llama-2-7b") + >>> len(requests) + 411 + >>> requests[0] + [ + AccessRequest( + username='clem', + fullname='Clem 🤗', + email='***', + timestamp=datetime.datetime(2023, 11, 23, 18, 4, 53, 828000, tzinfo=datetime.timezone.utc), + status='accepted', + fields=None, + ), + ... + ] + ``` + """ + return self._list_access_requests(repo_id, "accepted", repo_type=repo_type, token=token) + + @validate_hf_hub_args + def list_rejected_access_requests( + self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None + ) -> List[AccessRequest]: + """ + Get rejected access requests for a given gated repo. + + A rejected request means the user has requested access to the repo and the request has been explicitly rejected + by a repo owner (either you or another user from your organization). The user cannot download any file of the + repo. Rejected requests can be accepted or cancelled at any time using [`accept_access_request`] and + [`cancel_access_request`]. A cancelled request will go back to the pending list while an accepted request will + go to the accepted list. + + For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. + + Args: + repo_id (`str`): + The id of the repo to get access requests for. + repo_type (`str`, *optional*): + The type of the repo to get access requests for. Must be one of `model`, `dataset` or `space`. + Defaults to `model`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `List[AccessRequest]`: A list of [`AccessRequest`] objects. Each time contains a `username`, `email`, + `status` and `timestamp` attribute. If the gated repo has a custom form, the `fields` attribute will + be populated with user's answers. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 400 if the repo is not gated. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` + or `admin` role in the organization the repo belongs to or if you passed a `read` token. + + Example: + ```py + >>> from huggingface_hub import list_rejected_access_requests + + >>> requests = list_rejected_access_requests("meta-llama/Llama-2-7b") + >>> len(requests) + 411 + >>> requests[0] + [ + AccessRequest( + username='clem', + fullname='Clem 🤗', + email='***', + timestamp=datetime.datetime(2023, 11, 23, 18, 4, 53, 828000, tzinfo=datetime.timezone.utc), + status='rejected', + fields=None, + ), + ... + ] + ``` + """ + return self._list_access_requests(repo_id, "rejected", repo_type=repo_type, token=token) + + def _list_access_requests( + self, + repo_id: str, + status: Literal["accepted", "rejected", "pending"], + repo_type: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> List[AccessRequest]: + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + + response = get_session().get( + f"{constants.ENDPOINT}/api/{repo_type}s/{repo_id}/user-access-request/{status}", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + return [ + AccessRequest( + username=request["user"]["user"], + fullname=request["user"]["fullname"], + email=request["user"].get("email"), + status=request["status"], + timestamp=parse_datetime(request["timestamp"]), + fields=request.get("fields"), # only if custom fields in form + ) + for request in response.json() + ] + + @validate_hf_hub_args + def cancel_access_request( + self, repo_id: str, user: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None + ) -> None: + """ + Cancel an access request from a user for a given gated repo. + + A cancelled request will go back to the pending list and the user will lose access to the repo. + + For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. + + Args: + repo_id (`str`): + The id of the repo to cancel access request for. + user (`str`): + The username of the user which access request should be cancelled. + repo_type (`str`, *optional*): + The type of the repo to cancel access request for. Must be one of `model`, `dataset` or `space`. + Defaults to `model`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 400 if the repo is not gated. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` + or `admin` role in the organization the repo belongs to or if you passed a `read` token. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user does not exist on the Hub. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user access request cannot be found. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user access request is already in the pending list. + """ + self._handle_access_request(repo_id, user, "pending", repo_type=repo_type, token=token) + + @validate_hf_hub_args + def accept_access_request( + self, repo_id: str, user: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None + ) -> None: + """ + Accept an access request from a user for a given gated repo. + + Once the request is accepted, the user will be able to download any file of the repo and access the community + tab. If the approval mode is automatic, you don't have to accept requests manually. An accepted request can be + cancelled or rejected at any time using [`cancel_access_request`] and [`reject_access_request`]. + + For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. + + Args: + repo_id (`str`): + The id of the repo to accept access request for. + user (`str`): + The username of the user which access request should be accepted. + repo_type (`str`, *optional*): + The type of the repo to accept access request for. Must be one of `model`, `dataset` or `space`. + Defaults to `model`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 400 if the repo is not gated. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` + or `admin` role in the organization the repo belongs to or if you passed a `read` token. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user does not exist on the Hub. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user access request cannot be found. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user access request is already in the accepted list. + """ + self._handle_access_request(repo_id, user, "accepted", repo_type=repo_type, token=token) + + @validate_hf_hub_args + def reject_access_request( + self, repo_id: str, user: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None + ) -> None: + """ + Reject an access request from a user for a given gated repo. + + A rejected request will go to the rejected list. The user cannot download any file of the repo. Rejected + requests can be accepted or cancelled at any time using [`accept_access_request`] and [`cancel_access_request`]. + A cancelled request will go back to the pending list while an accepted request will go to the accepted list. + + For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. + + Args: + repo_id (`str`): + The id of the repo to reject access request for. + user (`str`): + The username of the user which access request should be rejected. + repo_type (`str`, *optional*): + The type of the repo to reject access request for. Must be one of `model`, `dataset` or `space`. + Defaults to `model`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 400 if the repo is not gated. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` + or `admin` role in the organization the repo belongs to or if you passed a `read` token. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user does not exist on the Hub. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user access request cannot be found. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user access request is already in the rejected list. + """ + self._handle_access_request(repo_id, user, "rejected", repo_type=repo_type, token=token) + + @validate_hf_hub_args + def _handle_access_request( + self, + repo_id: str, + user: str, + status: Literal["accepted", "rejected", "pending"], + repo_type: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> None: + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + + response = get_session().post( + f"{constants.ENDPOINT}/api/{repo_type}s/{repo_id}/user-access-request/handle", + headers=self._build_hf_headers(token=token), + json={"user": user, "status": status}, + ) + hf_raise_for_status(response) + + @validate_hf_hub_args + def grant_access( + self, repo_id: str, user: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None + ) -> None: + """ + Grant access to a user for a given gated repo. + + Granting access don't require for the user to send an access request by themselves. The user is automatically + added to the accepted list meaning they can download the files You can revoke the granted access at any time + using [`cancel_access_request`] or [`reject_access_request`]. + + For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. + + Args: + repo_id (`str`): + The id of the repo to grant access to. + user (`str`): + The username of the user to grant access. + repo_type (`str`, *optional*): + The type of the repo to grant access to. Must be one of `model`, `dataset` or `space`. + Defaults to `model`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 400 if the repo is not gated. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 400 if the user already has access to the repo. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` + or `admin` role in the organization the repo belongs to or if you passed a `read` token. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 if the user does not exist on the Hub. + """ + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + + response = get_session().post( + f"{constants.ENDPOINT}/api/{repo_type}s/{repo_id}/user-access-request/grant", + headers=self._build_hf_headers(token=token), + json={"user": user}, + ) + hf_raise_for_status(response) + return response.json() + + ################### + # Manage webhooks # + ################### + + @validate_hf_hub_args + def get_webhook(self, webhook_id: str, *, token: Union[bool, str, None] = None) -> WebhookInfo: + """Get a webhook by its id. + + Args: + webhook_id (`str`): + The unique identifier of the webhook to get. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved token, which is the recommended + method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`WebhookInfo`]: + Info about the webhook. + + Example: + ```python + >>> from huggingface_hub import get_webhook + >>> webhook = get_webhook("654bbbc16f2ec14d77f109cc") + >>> print(webhook) + WebhookInfo( + id="654bbbc16f2ec14d77f109cc", + watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], + url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", + secret="my-secret", + domains=["repo", "discussion"], + disabled=False, + ) + ``` + """ + response = get_session().get( + f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + webhook_data = response.json()["webhook"] + + watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]] + + webhook = WebhookInfo( + id=webhook_data["id"], + url=webhook_data["url"], + watched=watched_items, + domains=webhook_data["domains"], + secret=webhook_data.get("secret"), + disabled=webhook_data["disabled"], + ) + + return webhook + + @validate_hf_hub_args + def list_webhooks(self, *, token: Union[bool, str, None] = None) -> List[WebhookInfo]: + """List all configured webhooks. + + Args: + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved token, which is the recommended + method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `List[WebhookInfo]`: + List of webhook info objects. + + Example: + ```python + >>> from huggingface_hub import list_webhooks + >>> webhooks = list_webhooks() + >>> len(webhooks) + 2 + >>> webhooks[0] + WebhookInfo( + id="654bbbc16f2ec14d77f109cc", + watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], + url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", + secret="my-secret", + domains=["repo", "discussion"], + disabled=False, + ) + ``` + """ + response = get_session().get( + f"{constants.ENDPOINT}/api/settings/webhooks", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + webhooks_data = response.json() + + return [ + WebhookInfo( + id=webhook["id"], + url=webhook["url"], + watched=[WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook["watched"]], + domains=webhook["domains"], + secret=webhook.get("secret"), + disabled=webhook["disabled"], + ) + for webhook in webhooks_data + ] + + @validate_hf_hub_args + def create_webhook( + self, + *, + url: str, + watched: List[Union[Dict, WebhookWatchedItem]], + domains: Optional[List[constants.WEBHOOK_DOMAIN_T]] = None, + secret: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> WebhookInfo: + """Create a new webhook. + + Args: + url (`str`): + URL to send the payload to. + watched (`List[WebhookWatchedItem]`): + List of [`WebhookWatchedItem`] to be watched by the webhook. It can be users, orgs, models, datasets or spaces. + Watched items can also be provided as plain dictionaries. + domains (`List[Literal["repo", "discussion"]]`, optional): + List of domains to watch. It can be "repo", "discussion" or both. + secret (`str`, optional): + A secret to sign the payload with. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved token, which is the recommended + method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`WebhookInfo`]: + Info about the newly created webhook. + + Example: + ```python + >>> from huggingface_hub import create_webhook + >>> payload = create_webhook( + ... watched=[{"type": "user", "name": "julien-c"}, {"type": "org", "name": "HuggingFaceH4"}], + ... url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", + ... domains=["repo", "discussion"], + ... secret="my-secret", + ... ) + >>> print(payload) + WebhookInfo( + id="654bbbc16f2ec14d77f109cc", + url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", + watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], + domains=["repo", "discussion"], + secret="my-secret", + disabled=False, + ) + ``` + """ + watched_dicts = [asdict(item) if isinstance(item, WebhookWatchedItem) else item for item in watched] + + response = get_session().post( + f"{constants.ENDPOINT}/api/settings/webhooks", + json={"watched": watched_dicts, "url": url, "domains": domains, "secret": secret}, + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + webhook_data = response.json()["webhook"] + watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]] + + webhook = WebhookInfo( + id=webhook_data["id"], + url=webhook_data["url"], + watched=watched_items, + domains=webhook_data["domains"], + secret=webhook_data.get("secret"), + disabled=webhook_data["disabled"], + ) + + return webhook + + @validate_hf_hub_args + def update_webhook( + self, + webhook_id: str, + *, + url: Optional[str] = None, + watched: Optional[List[Union[Dict, WebhookWatchedItem]]] = None, + domains: Optional[List[constants.WEBHOOK_DOMAIN_T]] = None, + secret: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> WebhookInfo: + """Update an existing webhook. + + Args: + webhook_id (`str`): + The unique identifier of the webhook to be updated. + url (`str`, optional): + The URL to which the payload will be sent. + watched (`List[WebhookWatchedItem]`, optional): + List of items to watch. It can be users, orgs, models, datasets, or spaces. + Refer to [`WebhookWatchedItem`] for more details. Watched items can also be provided as plain dictionaries. + domains (`List[Literal["repo", "discussion"]]`, optional): + The domains to watch. This can include "repo", "discussion", or both. + secret (`str`, optional): + A secret to sign the payload with, providing an additional layer of security. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved token, which is the recommended + method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`WebhookInfo`]: + Info about the updated webhook. + + Example: + ```python + >>> from huggingface_hub import update_webhook + >>> updated_payload = update_webhook( + ... webhook_id="654bbbc16f2ec14d77f109cc", + ... url="https://new.webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", + ... watched=[{"type": "user", "name": "julien-c"}, {"type": "org", "name": "HuggingFaceH4"}], + ... domains=["repo"], + ... secret="my-secret", + ... ) + >>> print(updated_payload) + WebhookInfo( + id="654bbbc16f2ec14d77f109cc", + url="https://new.webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", + watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], + domains=["repo"], + secret="my-secret", + disabled=False, + ``` + """ + if watched is None: + watched = [] + watched_dicts = [asdict(item) if isinstance(item, WebhookWatchedItem) else item for item in watched] + + response = get_session().post( + f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}", + json={"watched": watched_dicts, "url": url, "domains": domains, "secret": secret}, + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + webhook_data = response.json()["webhook"] + + watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]] + + webhook = WebhookInfo( + id=webhook_data["id"], + url=webhook_data["url"], + watched=watched_items, + domains=webhook_data["domains"], + secret=webhook_data.get("secret"), + disabled=webhook_data["disabled"], + ) + + return webhook + + @validate_hf_hub_args + def enable_webhook(self, webhook_id: str, *, token: Union[bool, str, None] = None) -> WebhookInfo: + """Enable a webhook (makes it "active"). + + Args: + webhook_id (`str`): + The unique identifier of the webhook to enable. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved token, which is the recommended + method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`WebhookInfo`]: + Info about the enabled webhook. + + Example: + ```python + >>> from huggingface_hub import enable_webhook + >>> enabled_webhook = enable_webhook("654bbbc16f2ec14d77f109cc") + >>> enabled_webhook + WebhookInfo( + id="654bbbc16f2ec14d77f109cc", + url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", + watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], + domains=["repo", "discussion"], + secret="my-secret", + disabled=False, + ) + ``` + """ + response = get_session().post( + f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}/enable", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + webhook_data = response.json()["webhook"] + + watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]] + + webhook = WebhookInfo( + id=webhook_data["id"], + url=webhook_data["url"], + watched=watched_items, + domains=webhook_data["domains"], + secret=webhook_data.get("secret"), + disabled=webhook_data["disabled"], + ) + + return webhook + + @validate_hf_hub_args + def disable_webhook(self, webhook_id: str, *, token: Union[bool, str, None] = None) -> WebhookInfo: + """Disable a webhook (makes it "disabled"). + + Args: + webhook_id (`str`): + The unique identifier of the webhook to disable. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved token, which is the recommended + method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + [`WebhookInfo`]: + Info about the disabled webhook. + + Example: + ```python + >>> from huggingface_hub import disable_webhook + >>> disabled_webhook = disable_webhook("654bbbc16f2ec14d77f109cc") + >>> disabled_webhook + WebhookInfo( + id="654bbbc16f2ec14d77f109cc", + url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", + watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], + domains=["repo", "discussion"], + secret="my-secret", + disabled=True, + ) + ``` + """ + response = get_session().post( + f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}/disable", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + webhook_data = response.json()["webhook"] + + watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]] + + webhook = WebhookInfo( + id=webhook_data["id"], + url=webhook_data["url"], + watched=watched_items, + domains=webhook_data["domains"], + secret=webhook_data.get("secret"), + disabled=webhook_data["disabled"], + ) + + return webhook + + @validate_hf_hub_args + def delete_webhook(self, webhook_id: str, *, token: Union[bool, str, None] = None) -> None: + """Delete a webhook. + + Args: + webhook_id (`str`): + The unique identifier of the webhook to delete. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved token, which is the recommended + method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `None` + + Example: + ```python + >>> from huggingface_hub import delete_webhook + >>> delete_webhook("654bbbc16f2ec14d77f109cc") + ``` + """ + response = get_session().delete( + f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + + ############# + # Internals # + ############# + + def _build_hf_headers( + self, + token: Union[bool, str, None] = None, + library_name: Optional[str] = None, + library_version: Optional[str] = None, + user_agent: Union[Dict, str, None] = None, + ) -> Dict[str, str]: + """ + Alias for [`build_hf_headers`] that uses the token from [`HfApi`] client + when `token` is not provided. + """ + if token is None: + # Cannot do `token = token or self.token` as token can be `False`. + token = self.token + return build_hf_headers( + token=token, + library_name=library_name or self.library_name, + library_version=library_version or self.library_version, + user_agent=user_agent or self.user_agent, + headers=self.headers, + ) + + def _prepare_folder_deletions( + self, + repo_id: str, + repo_type: Optional[str], + revision: Optional[str], + path_in_repo: str, + delete_patterns: Optional[Union[List[str], str]], + token: Union[bool, str, None] = None, + ) -> List[CommitOperationDelete]: + """Generate the list of Delete operations for a commit to delete files from a repo. + + List remote files and match them against the `delete_patterns` constraints. Returns a list of [`CommitOperationDelete`] + with the matching items. + + Note: `.gitattributes` file is essential to make a repo work properly on the Hub. This file will always be + kept even if it matches the `delete_patterns` constraints. + """ + if delete_patterns is None: + # If no delete patterns, no need to list and filter remote files + return [] + + # List remote files + filenames = self.list_repo_files(repo_id=repo_id, revision=revision, repo_type=repo_type, token=token) + + # Compute relative path in repo + if path_in_repo and path_in_repo not in (".", "./"): + path_in_repo = path_in_repo.strip("/") + "/" # harmonize + relpath_to_abspath = { + file[len(path_in_repo) :]: file for file in filenames if file.startswith(path_in_repo) + } + else: + relpath_to_abspath = {file: file for file in filenames} + + # Apply filter on relative paths and return + return [ + CommitOperationDelete(path_in_repo=relpath_to_abspath[relpath], is_folder=False) + for relpath in filter_repo_objects(relpath_to_abspath.keys(), allow_patterns=delete_patterns) + if relpath_to_abspath[relpath] != ".gitattributes" + ] + + def _prepare_upload_folder_additions( + self, + folder_path: Union[str, Path], + path_in_repo: str, + allow_patterns: Optional[Union[List[str], str]] = None, + ignore_patterns: Optional[Union[List[str], str]] = None, + repo_type: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> List[CommitOperationAdd]: + """Generate the list of Add operations for a commit to upload a folder. + + Files not matching the `allow_patterns` (allowlist) and `ignore_patterns` (denylist) + constraints are discarded. + """ + + folder_path = Path(folder_path).expanduser().resolve() + if not folder_path.is_dir(): + raise ValueError(f"Provided path: '{folder_path}' is not a directory") + + # List files from folder + relpath_to_abspath = { + path.relative_to(folder_path).as_posix(): path + for path in sorted(folder_path.glob("**/*")) # sorted to be deterministic + if path.is_file() + } + + # Filter files + # Patterns are applied on the path relative to `folder_path`. `path_in_repo` is prefixed after the filtering. + filtered_repo_objects = list( + filter_repo_objects( + relpath_to_abspath.keys(), allow_patterns=allow_patterns, ignore_patterns=ignore_patterns + ) + ) + + prefix = f"{path_in_repo.strip('/')}/" if path_in_repo else "" + + # If updating a README.md file, make sure the metadata format is valid + # It's better to fail early than to fail after all the files have been hashed. + if "README.md" in filtered_repo_objects: + self._validate_yaml( + content=relpath_to_abspath["README.md"].read_text(encoding="utf8"), + repo_type=repo_type, + token=token, + ) + if len(filtered_repo_objects) > 30: + log = logger.warning if len(filtered_repo_objects) > 200 else logger.info + log( + "It seems you are trying to upload a large folder at once. This might take some time and then fail if " + "the folder is too large. For such cases, it is recommended to upload in smaller batches or to use " + "`HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, " + "check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder." + ) + + logger.info(f"Start hashing {len(filtered_repo_objects)} files.") + operations = [ + CommitOperationAdd( + path_or_fileobj=relpath_to_abspath[relpath], # absolute path on disk + path_in_repo=prefix + relpath, # "absolute" path in repo + ) + for relpath in filtered_repo_objects + ] + logger.info(f"Finished hashing {len(filtered_repo_objects)} files.") + return operations + + def _validate_yaml(self, content: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None): + """ + Validate YAML from `README.md`, used before file hashing and upload. + + Args: + content (`str`): + Content of `README.md` to validate. + repo_type (`str`, *optional*): + The type of the repo to grant access to. Must be one of `model`, `dataset` or `space`. + Defaults to `model`. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Raises: + - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + if YAML is invalid + """ + repo_type = repo_type if repo_type is not None else constants.REPO_TYPE_MODEL + headers = self._build_hf_headers(token=token) + + response = get_session().post( + f"{self.endpoint}/api/validate-yaml", + json={"content": content, "repoType": repo_type}, + headers=headers, + ) + # Handle warnings (example: empty metadata) + response_content = response.json() + message = "\n".join([f"- {warning.get('message')}" for warning in response_content.get("warnings", [])]) + if message: + warnings.warn(f"Warnings while validating metadata in README.md:\n{message}") + + # Raise on errors + try: + hf_raise_for_status(response) + except BadRequestError as e: + errors = response_content.get("errors", []) + message = "\n".join([f"- {error.get('message')}" for error in errors]) + raise ValueError(f"Invalid metadata in README.md.\n{message}") from e + + def get_user_overview(self, username: str, token: Union[bool, str, None] = None) -> User: + """ + Get an overview of a user on the Hub. + + Args: + username (`str`): + Username of the user to get an overview of. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `User`: A [`User`] object with the user's overview. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 If the user does not exist on the Hub. + """ + r = get_session().get( + f"{constants.ENDPOINT}/api/users/{username}/overview", headers=self._build_hf_headers(token=token) + ) + hf_raise_for_status(r) + return User(**r.json()) + + def list_organization_members(self, organization: str, token: Union[bool, str, None] = None) -> Iterable[User]: + """ + List of members of an organization on the Hub. + + Args: + organization (`str`): + Name of the organization to get the members of. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Iterable[User]`: A list of [`User`] objects with the members of the organization. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 If the organization does not exist on the Hub. + + """ + for member in paginate( + path=f"{constants.ENDPOINT}/api/organizations/{organization}/members", + params={}, + headers=self._build_hf_headers(token=token), + ): + yield User(**member) + + def list_user_followers(self, username: str, token: Union[bool, str, None] = None) -> Iterable[User]: + """ + Get the list of followers of a user on the Hub. + + Args: + username (`str`): + Username of the user to get the followers of. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Iterable[User]`: A list of [`User`] objects with the followers of the user. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 If the user does not exist on the Hub. + + """ + for follower in paginate( + path=f"{constants.ENDPOINT}/api/users/{username}/followers", + params={}, + headers=self._build_hf_headers(token=token), + ): + yield User(**follower) + + def list_user_following(self, username: str, token: Union[bool, str, None] = None) -> Iterable[User]: + """ + Get the list of users followed by a user on the Hub. + + Args: + username (`str`): + Username of the user to get the users followed by. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Iterable[User]`: A list of [`User`] objects with the users followed by the user. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 If the user does not exist on the Hub. + + """ + for followed_user in paginate( + path=f"{constants.ENDPOINT}/api/users/{username}/following", + params={}, + headers=self._build_hf_headers(token=token), + ): + yield User(**followed_user) + + def list_papers( + self, + *, + query: Optional[str] = None, + token: Union[bool, str, None] = None, + ) -> Iterable[PaperInfo]: + """ + List daily papers on the Hugging Face Hub given a search query. + + Args: + query (`str`, *optional*): + A search query string to find papers. + If provided, returns papers that match the query. + token (Union[bool, str, None], *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Iterable[PaperInfo]`: an iterable of [`huggingface_hub.hf_api.PaperInfo`] objects. + + Example: + + ```python + >>> from huggingface_hub import HfApi + + >>> api = HfApi() + + # List all papers with "attention" in their title + >>> api.list_papers(query="attention") + ``` + """ + path = f"{self.endpoint}/api/papers/search" + params = {} + if query: + params["q"] = query + r = get_session().get( + path, + params=params, + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(r) + for paper in r.json(): + yield PaperInfo(**paper) + + def paper_info(self, id: str) -> PaperInfo: + """ + Get information for a paper on the Hub. + + Args: + id (`str`, **optional**): + ArXiv id of the paper. + + Returns: + `PaperInfo`: A `PaperInfo` object. + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): + HTTP 404 If the paper does not exist on the Hub. + """ + path = f"{self.endpoint}/api/papers/{id}" + r = get_session().get(path) + hf_raise_for_status(r) + return PaperInfo(**r.json()) + + def auth_check( + self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None + ) -> None: + """ + Check if the provided user token has access to a specific repository on the Hugging Face Hub. + + This method verifies whether the user, authenticated via the provided token, has access to the specified + repository. If the repository is not found or if the user lacks the required permissions to access it, + the method raises an appropriate exception. + + Args: + repo_id (`str`): + The repository to check for access. Format should be `"user/repo_name"`. + Example: `"user/my-cool-model"`. + + repo_type (`str`, *optional*): + The type of the repository. Should be one of `"model"`, `"dataset"`, or `"space"`. + If not specified, the default is `"model"`. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + + Raises: + [`~utils.RepositoryNotFoundError`]: + Raised if the repository does not exist, is private, or the user does not have access. This can + occur if the `repo_id` or `repo_type` is incorrect or if the repository is private but the user + is not authenticated. + + [`~utils.GatedRepoError`]: + Raised if the repository exists but is gated and the user is not authorized to access it. + + Example: + Check if the user has access to a repository: + + ```python + >>> from huggingface_hub import auth_check + >>> from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError + + try: + auth_check("user/my-cool-model") + except GatedRepoError: + # Handle gated repository error + print("You do not have permission to access this gated repository.") + except RepositoryNotFoundError: + # Handle repository not found error + print("The repository was not found or you do not have access.") + ``` + + In this example: + - If the user has access, the method completes successfully. + - If the repository is gated or does not exist, appropriate exceptions are raised, allowing the user + to handle them accordingly. + """ + headers = self._build_hf_headers(token=token) + if repo_type is None: + repo_type = constants.REPO_TYPE_MODEL + if repo_type not in constants.REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") + path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/auth-check" + r = get_session().get(path, headers=headers) + hf_raise_for_status(r) + + +def _parse_revision_from_pr_url(pr_url: str) -> str: + """Safely parse revision number from a PR url. + + Example: + ```py + >>> _parse_revision_from_pr_url("https://huggingface.co/bigscience/bloom/discussions/2") + "refs/pr/2" + ``` + """ + re_match = re.match(_REGEX_DISCUSSION_URL, pr_url) + if re_match is None: + raise RuntimeError(f"Unexpected response from the hub, expected a Pull Request URL but got: '{pr_url}'") + return f"refs/pr/{re_match[1]}" + + +api = HfApi() + +whoami = api.whoami +auth_check = api.auth_check +get_token_permission = api.get_token_permission + +list_models = api.list_models +model_info = api.model_info + +list_datasets = api.list_datasets +dataset_info = api.dataset_info + +list_spaces = api.list_spaces +space_info = api.space_info + +list_papers = api.list_papers +paper_info = api.paper_info + +repo_exists = api.repo_exists +revision_exists = api.revision_exists +file_exists = api.file_exists +repo_info = api.repo_info +list_repo_files = api.list_repo_files +list_repo_refs = api.list_repo_refs +list_repo_commits = api.list_repo_commits +list_repo_tree = api.list_repo_tree +get_paths_info = api.get_paths_info + +get_model_tags = api.get_model_tags +get_dataset_tags = api.get_dataset_tags + +create_commit = api.create_commit +create_repo = api.create_repo +delete_repo = api.delete_repo +update_repo_visibility = api.update_repo_visibility +update_repo_settings = api.update_repo_settings +super_squash_history = api.super_squash_history +move_repo = api.move_repo +upload_file = api.upload_file +upload_folder = api.upload_folder +delete_file = api.delete_file +delete_folder = api.delete_folder +delete_files = api.delete_files +upload_large_folder = api.upload_large_folder +preupload_lfs_files = api.preupload_lfs_files +create_branch = api.create_branch +delete_branch = api.delete_branch +create_tag = api.create_tag +delete_tag = api.delete_tag +get_full_repo_name = api.get_full_repo_name + +# Safetensors helpers +get_safetensors_metadata = api.get_safetensors_metadata +parse_safetensors_file_metadata = api.parse_safetensors_file_metadata + +# Background jobs +run_as_future = api.run_as_future + +# Activity API +list_liked_repos = api.list_liked_repos +list_repo_likers = api.list_repo_likers +like = api.like +unlike = api.unlike + +# Community API +get_discussion_details = api.get_discussion_details +get_repo_discussions = api.get_repo_discussions +create_discussion = api.create_discussion +create_pull_request = api.create_pull_request +change_discussion_status = api.change_discussion_status +comment_discussion = api.comment_discussion +edit_discussion_comment = api.edit_discussion_comment +rename_discussion = api.rename_discussion +merge_pull_request = api.merge_pull_request + +# Space API +add_space_secret = api.add_space_secret +delete_space_secret = api.delete_space_secret +get_space_variables = api.get_space_variables +add_space_variable = api.add_space_variable +delete_space_variable = api.delete_space_variable +get_space_runtime = api.get_space_runtime +request_space_hardware = api.request_space_hardware +set_space_sleep_time = api.set_space_sleep_time +pause_space = api.pause_space +restart_space = api.restart_space +duplicate_space = api.duplicate_space +request_space_storage = api.request_space_storage +delete_space_storage = api.delete_space_storage + +# Inference Endpoint API +list_inference_endpoints = api.list_inference_endpoints +create_inference_endpoint = api.create_inference_endpoint +get_inference_endpoint = api.get_inference_endpoint +update_inference_endpoint = api.update_inference_endpoint +delete_inference_endpoint = api.delete_inference_endpoint +pause_inference_endpoint = api.pause_inference_endpoint +resume_inference_endpoint = api.resume_inference_endpoint +scale_to_zero_inference_endpoint = api.scale_to_zero_inference_endpoint + +# Collections API +get_collection = api.get_collection +list_collections = api.list_collections +create_collection = api.create_collection +update_collection_metadata = api.update_collection_metadata +delete_collection = api.delete_collection +add_collection_item = api.add_collection_item +update_collection_item = api.update_collection_item +delete_collection_item = api.delete_collection_item +delete_collection_item = api.delete_collection_item + +# Access requests API +list_pending_access_requests = api.list_pending_access_requests +list_accepted_access_requests = api.list_accepted_access_requests +list_rejected_access_requests = api.list_rejected_access_requests +cancel_access_request = api.cancel_access_request +accept_access_request = api.accept_access_request +reject_access_request = api.reject_access_request +grant_access = api.grant_access + +# Webhooks API +create_webhook = api.create_webhook +disable_webhook = api.disable_webhook +delete_webhook = api.delete_webhook +enable_webhook = api.enable_webhook +get_webhook = api.get_webhook +list_webhooks = api.list_webhooks +update_webhook = api.update_webhook + + +# User API +get_user_overview = api.get_user_overview +list_organization_members = api.list_organization_members +list_user_followers = api.list_user_followers +list_user_following = api.list_user_following diff --git a/meow/lib/python3.13/site-packages/huggingface_hub/inference_api.py b/meow/lib/python3.13/site-packages/huggingface_hub/inference_api.py new file mode 100644 index 0000000000000000000000000000000000000000..f895fcc61c3867838b013ecd3f6789cbc010b5b3 --- /dev/null +++ b/meow/lib/python3.13/site-packages/huggingface_hub/inference_api.py @@ -0,0 +1,217 @@ +import io +from typing import Any, Dict, List, Optional, Union + +from . import constants +from .hf_api import HfApi +from .utils import build_hf_headers, get_session, is_pillow_available, logging, validate_hf_hub_args +from .utils._deprecation import _deprecate_method + + +logger = logging.get_logger(__name__) + + +ALL_TASKS = [ + # NLP + "text-classification", + "token-classification", + "table-question-answering", + "question-answering", + "zero-shot-classification", + "translation", + "summarization", + "conversational", + "feature-extraction", + "text-generation", + "text2text-generation", + "fill-mask", + "sentence-similarity", + # Audio + "text-to-speech", + "automatic-speech-recognition", + "audio-to-audio", + "audio-classification", + "voice-activity-detection", + # Computer vision + "image-classification", + "object-detection", + "image-segmentation", + "text-to-image", + "image-to-image", + # Others + "tabular-classification", + "tabular-regression", +] + + +class InferenceApi: + """Client to configure requests and make calls to the HuggingFace Inference API. + + Example: + + ```python + >>> from huggingface_hub.inference_api import InferenceApi + + >>> # Mask-fill example + >>> inference = InferenceApi("bert-base-uncased") + >>> inference(inputs="The goal of life is [MASK].") + [{'sequence': 'the goal of life is life.', 'score': 0.10933292657136917, 'token': 2166, 'token_str': 'life'}] + + >>> # Question Answering example + >>> inference = InferenceApi("deepset/roberta-base-squad2") + >>> inputs = { + ... "question": "What's my name?", + ... "context": "My name is Clara and I live in Berkeley.", + ... } + >>> inference(inputs) + {'score': 0.9326569437980652, 'start': 11, 'end': 16, 'answer': 'Clara'} + + >>> # Zero-shot example + >>> inference = InferenceApi("typeform/distilbert-base-uncased-mnli") + >>> inputs = "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!" + >>> params = {"candidate_labels": ["refund", "legal", "faq"]} + >>> inference(inputs, params) + {'sequence': 'Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!', 'labels': ['refund', 'faq', 'legal'], 'scores': [0.9378499388694763, 0.04914155602455139, 0.013008488342165947]} + + >>> # Overriding configured task + >>> inference = InferenceApi("bert-base-uncased", task="feature-extraction") + + >>> # Text-to-image + >>> inference = InferenceApi("stabilityai/stable-diffusion-2-1") + >>> inference("cat") + + + >>> # Return as raw response to parse the output yourself + >>> inference = InferenceApi("mio/amadeus") + >>> response = inference("hello world", raw_response=True) + >>> response.headers + {"Content-Type": "audio/flac", ...} + >>> response.content # raw bytes from server + b'(...)' + ``` + """ + + @validate_hf_hub_args + @_deprecate_method( + version="1.0", + message=( + "`InferenceApi` client is deprecated in favor of the more feature-complete `InferenceClient`. Check out" + " this guide to learn how to convert your script to use it:" + " https://huggingface.co/docs/huggingface_hub/guides/inference#legacy-inferenceapi-client." + ), + ) + def __init__( + self, + repo_id: str, + task: Optional[str] = None, + token: Optional[str] = None, + gpu: bool = False, + ): + """Inits headers and API call information. + + Args: + repo_id (``str``): + Id of repository (e.g. `user/bert-base-uncased`). + task (``str``, `optional`, defaults ``None``): + Whether to force a task instead of using task specified in the + repository. + token (`str`, `optional`): + The API token to use as HTTP bearer authorization. This is not + the authentication token. You can find the token in + https://huggingface.co/settings/token. Alternatively, you can + find both your organizations and personal API tokens using + `HfApi().whoami(token)`. + gpu (`bool`, `optional`, defaults `False`): + Whether to use GPU instead of CPU for inference(requires Startup + plan at least). + """ + self.options = {"wait_for_model": True, "use_gpu": gpu} + self.headers = build_hf_headers(token=token) + + # Configure task + model_info = HfApi(token=token).model_info(repo_id=repo_id) + if not model_info.pipeline_tag and not task: + raise ValueError( + "Task not specified in the repository. Please add it to the model card" + " using pipeline_tag" + " (https://huggingface.co/docs#how-is-a-models-type-of-inference-api-and-widget-determined)" + ) + + if task and task != model_info.pipeline_tag: + if task not in ALL_TASKS: + raise ValueError(f"Invalid task {task}. Make sure it's valid.") + + logger.warning( + "You're using a different task than the one specified in the" + " repository. Be sure to know what you're doing :)" + ) + self.task = task + else: + assert model_info.pipeline_tag is not None, "Pipeline tag cannot be None" + self.task = model_info.pipeline_tag + + self.api_url = f"{constants.INFERENCE_ENDPOINT}/pipeline/{self.task}/{repo_id}" + + def __repr__(self): + # Do not add headers to repr to avoid leaking token. + return f"InferenceAPI(api_url='{self.api_url}', task='{self.task}', options={self.options})" + + def __call__( + self, + inputs: Optional[Union[str, Dict, List[str], List[List[str]]]] = None, + params: Optional[Dict] = None, + data: Optional[bytes] = None, + raw_response: bool = False, + ) -> Any: + """Make a call to the Inference API. + + Args: + inputs (`str` or `Dict` or `List[str]` or `List[List[str]]`, *optional*): + Inputs for the prediction. + params (`Dict`, *optional*): + Additional parameters for the models. Will be sent as `parameters` in the + payload. + data (`bytes`, *optional*): + Bytes content of the request. In this case, leave `inputs` and `params` empty. + raw_response (`bool`, defaults to `False`): + If `True`, the raw `Response` object is returned. You can parse its content + as preferred. By default, the content is parsed into a more practical format + (json dictionary or PIL Image for example). + """ + # Build payload + payload: Dict[str, Any] = { + "options": self.options, + } + if inputs: + payload["inputs"] = inputs + if params: + payload["parameters"] = params + + # Make API call + response = get_session().post(self.api_url, headers=self.headers, json=payload, data=data) + + # Let the user handle the response + if raw_response: + return response + + # By default, parse the response for the user. + content_type = response.headers.get("Content-Type") or "" + if content_type.startswith("image"): + if not is_pillow_available(): + raise ImportError( + f"Task '{self.task}' returned as image but Pillow is not installed." + " Please install it (`pip install Pillow`) or pass" + " `raw_response=True` to get the raw `Response` object and parse" + " the image by yourself." + ) + + from PIL import Image + + return Image.open(io.BytesIO(response.content)) + elif content_type == "application/json": + return response.json() + else: + raise NotImplementedError( + f"{content_type} output type is not implemented yet. You can pass" + " `raw_response=True` to get the raw `Response` object and parse the" + " output by yourself." + ) diff --git a/meow/lib/python3.13/site-packages/huggingface_hub/lfs.py b/meow/lib/python3.13/site-packages/huggingface_hub/lfs.py new file mode 100644 index 0000000000000000000000000000000000000000..c2d4f36829dfe941ce60c8b711c1cc912e8c324a --- /dev/null +++ b/meow/lib/python3.13/site-packages/huggingface_hub/lfs.py @@ -0,0 +1,460 @@ +# coding=utf-8 +# Copyright 2019-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Git LFS related type definitions and utilities""" + +import inspect +import io +import re +import warnings +from dataclasses import dataclass +from math import ceil +from os.path import getsize +from pathlib import Path +from typing import TYPE_CHECKING, BinaryIO, Dict, Iterable, List, Optional, Tuple, TypedDict +from urllib.parse import unquote + +from huggingface_hub import constants + +from .utils import ( + build_hf_headers, + fix_hf_endpoint_in_url, + get_session, + hf_raise_for_status, + http_backoff, + logging, + tqdm, + validate_hf_hub_args, +) +from .utils._lfs import SliceFileObj +from .utils.sha import sha256, sha_fileobj +from .utils.tqdm import is_tqdm_disabled + + +if TYPE_CHECKING: + from ._commit_api import CommitOperationAdd + +logger = logging.get_logger(__name__) + +OID_REGEX = re.compile(r"^[0-9a-f]{40}$") + +LFS_MULTIPART_UPLOAD_COMMAND = "lfs-multipart-upload" + +LFS_HEADERS = { + "Accept": "application/vnd.git-lfs+json", + "Content-Type": "application/vnd.git-lfs+json", +} + + +@dataclass +class UploadInfo: + """ + Dataclass holding required information to determine whether a blob + should be uploaded to the hub using the LFS protocol or the regular protocol + + Args: + sha256 (`bytes`): + SHA256 hash of the blob + size (`int`): + Size in bytes of the blob + sample (`bytes`): + First 512 bytes of the blob + """ + + sha256: bytes + size: int + sample: bytes + + @classmethod + def from_path(cls, path: str): + size = getsize(path) + with io.open(path, "rb") as file: + sample = file.peek(512)[:512] + sha = sha_fileobj(file) + return cls(size=size, sha256=sha, sample=sample) + + @classmethod + def from_bytes(cls, data: bytes): + sha = sha256(data).digest() + return cls(size=len(data), sample=data[:512], sha256=sha) + + @classmethod + def from_fileobj(cls, fileobj: BinaryIO): + sample = fileobj.read(512) + fileobj.seek(0, io.SEEK_SET) + sha = sha_fileobj(fileobj) + size = fileobj.tell() + fileobj.seek(0, io.SEEK_SET) + return cls(size=size, sha256=sha, sample=sample) + + +@validate_hf_hub_args +def post_lfs_batch_info( + upload_infos: Iterable[UploadInfo], + token: Optional[str], + repo_type: str, + repo_id: str, + revision: Optional[str] = None, + endpoint: Optional[str] = None, + headers: Optional[Dict[str, str]] = None, +) -> Tuple[List[dict], List[dict]]: + """ + Requests the LFS batch endpoint to retrieve upload instructions + + Learn more: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md + + Args: + upload_infos (`Iterable` of `UploadInfo`): + `UploadInfo` for the files that are being uploaded, typically obtained + from `CommitOperationAdd.upload_info` + repo_type (`str`): + Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`. + repo_id (`str`): + A namespace (user or an organization) and a repo name separated + by a `/`. + revision (`str`, *optional*): + The git revision to upload to. + headers (`dict`, *optional*): + Additional headers to include in the request + + Returns: + `LfsBatchInfo`: 2-tuple: + - First element is the list of upload instructions from the server + - Second element is an list of errors, if any + + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If an argument is invalid or the server response is malformed. + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + If the server returned an error. + """ + endpoint = endpoint if endpoint is not None else constants.ENDPOINT + url_prefix = "" + if repo_type in constants.REPO_TYPES_URL_PREFIXES: + url_prefix = constants.REPO_TYPES_URL_PREFIXES[repo_type] + batch_url = f"{endpoint}/{url_prefix}{repo_id}.git/info/lfs/objects/batch" + payload: Dict = { + "operation": "upload", + "transfers": ["basic", "multipart"], + "objects": [ + { + "oid": upload.sha256.hex(), + "size": upload.size, + } + for upload in upload_infos + ], + "hash_algo": "sha256", + } + if revision is not None: + payload["ref"] = {"name": unquote(revision)} # revision has been previously 'quoted' + + headers = { + **LFS_HEADERS, + **build_hf_headers(token=token), + **(headers or {}), + } + resp = get_session().post(batch_url, headers=headers, json=payload) + hf_raise_for_status(resp) + batch_info = resp.json() + + objects = batch_info.get("objects", None) + if not isinstance(objects, list): + raise ValueError("Malformed response from server") + + return ( + [_validate_batch_actions(obj) for obj in objects if "error" not in obj], + [_validate_batch_error(obj) for obj in objects if "error" in obj], + ) + + +class PayloadPartT(TypedDict): + partNumber: int + etag: str + + +class CompletionPayloadT(TypedDict): + """Payload that will be sent to the Hub when uploading multi-part.""" + + oid: str + parts: List[PayloadPartT] + + +def lfs_upload( + operation: "CommitOperationAdd", + lfs_batch_action: Dict, + token: Optional[str] = None, + headers: Optional[Dict[str, str]] = None, + endpoint: Optional[str] = None, +) -> None: + """ + Handles uploading a given object to the Hub with the LFS protocol. + + Can be a No-op if the content of the file is already present on the hub large file storage. + + Args: + operation (`CommitOperationAdd`): + The add operation triggering this upload. + lfs_batch_action (`dict`): + Upload instructions from the LFS batch endpoint for this object. See [`~utils.lfs.post_lfs_batch_info`] for + more details. + headers (`dict`, *optional*): + Headers to include in the request, including authentication and user agent headers. + + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If `lfs_batch_action` is improperly formatted + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + If the upload resulted in an error + """ + # 0. If LFS file is already present, skip upload + _validate_batch_actions(lfs_batch_action) + actions = lfs_batch_action.get("actions") + if actions is None: + # The file was already uploaded + logger.debug(f"Content of file {operation.path_in_repo} is already present upstream - skipping upload") + return + + # 1. Validate server response (check required keys in dict) + upload_action = lfs_batch_action["actions"]["upload"] + _validate_lfs_action(upload_action) + verify_action = lfs_batch_action["actions"].get("verify") + if verify_action is not None: + _validate_lfs_action(verify_action) + + # 2. Upload file (either single part or multi-part) + header = upload_action.get("header", {}) + chunk_size = header.get("chunk_size") + upload_url = fix_hf_endpoint_in_url(upload_action["href"], endpoint=endpoint) + if chunk_size is not None: + try: + chunk_size = int(chunk_size) + except (ValueError, TypeError): + raise ValueError( + f"Malformed response from LFS batch endpoint: `chunk_size` should be an integer. Got '{chunk_size}'." + ) + _upload_multi_part(operation=operation, header=header, chunk_size=chunk_size, upload_url=upload_url) + else: + _upload_single_part(operation=operation, upload_url=upload_url) + + # 3. Verify upload went well + if verify_action is not None: + _validate_lfs_action(verify_action) + verify_url = fix_hf_endpoint_in_url(verify_action["href"], endpoint) + verify_resp = get_session().post( + verify_url, + headers=build_hf_headers(token=token, headers=headers), + json={"oid": operation.upload_info.sha256.hex(), "size": operation.upload_info.size}, + ) + hf_raise_for_status(verify_resp) + logger.debug(f"{operation.path_in_repo}: Upload successful") + + +def _validate_lfs_action(lfs_action: dict): + """validates response from the LFS batch endpoint""" + if not ( + isinstance(lfs_action.get("href"), str) + and (lfs_action.get("header") is None or isinstance(lfs_action.get("header"), dict)) + ): + raise ValueError("lfs_action is improperly formatted") + return lfs_action + + +def _validate_batch_actions(lfs_batch_actions: dict): + """validates response from the LFS batch endpoint""" + if not (isinstance(lfs_batch_actions.get("oid"), str) and isinstance(lfs_batch_actions.get("size"), int)): + raise ValueError("lfs_batch_actions is improperly formatted") + + upload_action = lfs_batch_actions.get("actions", {}).get("upload") + verify_action = lfs_batch_actions.get("actions", {}).get("verify") + if upload_action is not None: + _validate_lfs_action(upload_action) + if verify_action is not None: + _validate_lfs_action(verify_action) + return lfs_batch_actions + + +def _validate_batch_error(lfs_batch_error: dict): + """validates response from the LFS batch endpoint""" + if not (isinstance(lfs_batch_error.get("oid"), str) and isinstance(lfs_batch_error.get("size"), int)): + raise ValueError("lfs_batch_error is improperly formatted") + error_info = lfs_batch_error.get("error") + if not ( + isinstance(error_info, dict) + and isinstance(error_info.get("message"), str) + and isinstance(error_info.get("code"), int) + ): + raise ValueError("lfs_batch_error is improperly formatted") + return lfs_batch_error + + +def _upload_single_part(operation: "CommitOperationAdd", upload_url: str) -> None: + """ + Uploads `fileobj` as a single PUT HTTP request (basic LFS transfer protocol) + + Args: + upload_url (`str`): + The URL to PUT the file to. + fileobj: + The file-like object holding the data to upload. + + Returns: `requests.Response` + + Raises: + [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + If the upload resulted in an error. + """ + with operation.as_file(with_tqdm=True) as fileobj: + # S3 might raise a transient 500 error -> let's retry if that happens + response = http_backoff("PUT", upload_url, data=fileobj, retry_on_status_codes=(500, 502, 503, 504)) + hf_raise_for_status(response) + + +def _upload_multi_part(operation: "CommitOperationAdd", header: Dict, chunk_size: int, upload_url: str) -> None: + """ + Uploads file using HF multipart LFS transfer protocol. + """ + # 1. Get upload URLs for each part + sorted_parts_urls = _get_sorted_parts_urls(header=header, upload_info=operation.upload_info, chunk_size=chunk_size) + + # 2. Upload parts (either with hf_transfer or in pure Python) + use_hf_transfer = constants.HF_HUB_ENABLE_HF_TRANSFER + if ( + constants.HF_HUB_ENABLE_HF_TRANSFER + and not isinstance(operation.path_or_fileobj, str) + and not isinstance(operation.path_or_fileobj, Path) + ): + warnings.warn( + "hf_transfer is enabled but does not support uploading from bytes or BinaryIO, falling back to regular" + " upload" + ) + use_hf_transfer = False + + response_headers = ( + _upload_parts_hf_transfer(operation=operation, sorted_parts_urls=sorted_parts_urls, chunk_size=chunk_size) + if use_hf_transfer + else _upload_parts_iteratively(operation=operation, sorted_parts_urls=sorted_parts_urls, chunk_size=chunk_size) + ) + + # 3. Send completion request + completion_res = get_session().post( + upload_url, + json=_get_completion_payload(response_headers, operation.upload_info.sha256.hex()), + headers=LFS_HEADERS, + ) + hf_raise_for_status(completion_res) + + +def _get_sorted_parts_urls(header: Dict, upload_info: UploadInfo, chunk_size: int) -> List[str]: + sorted_part_upload_urls = [ + upload_url + for _, upload_url in sorted( + [ + (int(part_num, 10), upload_url) + for part_num, upload_url in header.items() + if part_num.isdigit() and len(part_num) > 0 + ], + key=lambda t: t[0], + ) + ] + num_parts = len(sorted_part_upload_urls) + if num_parts != ceil(upload_info.size / chunk_size): + raise ValueError("Invalid server response to upload large LFS file") + return sorted_part_upload_urls + + +def _get_completion_payload(response_headers: List[Dict], oid: str) -> CompletionPayloadT: + parts: List[PayloadPartT] = [] + for part_number, header in enumerate(response_headers): + etag = header.get("etag") + if etag is None or etag == "": + raise ValueError(f"Invalid etag (`{etag}`) returned for part {part_number + 1}") + parts.append( + { + "partNumber": part_number + 1, + "etag": etag, + } + ) + return {"oid": oid, "parts": parts} + + +def _upload_parts_iteratively( + operation: "CommitOperationAdd", sorted_parts_urls: List[str], chunk_size: int +) -> List[Dict]: + headers = [] + with operation.as_file(with_tqdm=True) as fileobj: + for part_idx, part_upload_url in enumerate(sorted_parts_urls): + with SliceFileObj( + fileobj, + seek_from=chunk_size * part_idx, + read_limit=chunk_size, + ) as fileobj_slice: + # S3 might raise a transient 500 error -> let's retry if that happens + part_upload_res = http_backoff( + "PUT", part_upload_url, data=fileobj_slice, retry_on_status_codes=(500, 502, 503, 504) + ) + hf_raise_for_status(part_upload_res) + headers.append(part_upload_res.headers) + return headers # type: ignore + + +def _upload_parts_hf_transfer( + operation: "CommitOperationAdd", sorted_parts_urls: List[str], chunk_size: int +) -> List[Dict]: + # Upload file using an external Rust-based package. Upload is faster but support less features (no progress bars). + try: + from hf_transfer import multipart_upload + except ImportError: + raise ValueError( + "Fast uploading using 'hf_transfer' is enabled (HF_HUB_ENABLE_HF_TRANSFER=1) but 'hf_transfer' package is" + " not available in your environment. Try `pip install hf_transfer`." + ) + + supports_callback = "callback" in inspect.signature(multipart_upload).parameters + if not supports_callback: + warnings.warn( + "You are using an outdated version of `hf_transfer`. Consider upgrading to latest version to enable progress bars using `pip install -U hf_transfer`." + ) + + total = operation.upload_info.size + desc = operation.path_in_repo + if len(desc) > 40: + desc = f"(…){desc[-40:]}" + + with tqdm( + unit="B", + unit_scale=True, + total=total, + initial=0, + desc=desc, + disable=is_tqdm_disabled(logger.getEffectiveLevel()), + name="huggingface_hub.lfs_upload", + ) as progress: + try: + output = multipart_upload( + file_path=operation.path_or_fileobj, + parts_urls=sorted_parts_urls, + chunk_size=chunk_size, + max_files=128, + parallel_failures=127, # could be removed + max_retries=5, + **({"callback": progress.update} if supports_callback else {}), + ) + except Exception as e: + raise RuntimeError( + "An error occurred while uploading using `hf_transfer`. Consider disabling HF_HUB_ENABLE_HF_TRANSFER for" + " better error handling." + ) from e + if not supports_callback: + progress.update(total) + return output diff --git a/meow/lib/python3.13/site-packages/idna/__init__.py b/meow/lib/python3.13/site-packages/idna/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cfdc030a751b089fc7e38fc88093b791605d501d --- /dev/null +++ b/meow/lib/python3.13/site-packages/idna/__init__.py @@ -0,0 +1,45 @@ +from .core import ( + IDNABidiError, + IDNAError, + InvalidCodepoint, + InvalidCodepointContext, + alabel, + check_bidi, + check_hyphen_ok, + check_initial_combiner, + check_label, + check_nfc, + decode, + encode, + ulabel, + uts46_remap, + valid_contextj, + valid_contexto, + valid_label_length, + valid_string_length, +) +from .intranges import intranges_contain +from .package_data import __version__ + +__all__ = [ + "__version__", + "IDNABidiError", + "IDNAError", + "InvalidCodepoint", + "InvalidCodepointContext", + "alabel", + "check_bidi", + "check_hyphen_ok", + "check_initial_combiner", + "check_label", + "check_nfc", + "decode", + "encode", + "intranges_contain", + "ulabel", + "uts46_remap", + "valid_contextj", + "valid_contexto", + "valid_label_length", + "valid_string_length", +] diff --git a/meow/lib/python3.13/site-packages/idna/compat.py b/meow/lib/python3.13/site-packages/idna/compat.py new file mode 100644 index 0000000000000000000000000000000000000000..1df9f2a70e6815908f2784e88897a9a359eef84c --- /dev/null +++ b/meow/lib/python3.13/site-packages/idna/compat.py @@ -0,0 +1,15 @@ +from typing import Any, Union + +from .core import decode, encode + + +def ToASCII(label: str) -> bytes: + return encode(label) + + +def ToUnicode(label: Union[bytes, bytearray]) -> str: + return decode(label) + + +def nameprep(s: Any) -> None: + raise NotImplementedError("IDNA 2008 does not utilise nameprep protocol") diff --git a/meow/lib/python3.13/site-packages/idna/core.py b/meow/lib/python3.13/site-packages/idna/core.py new file mode 100644 index 0000000000000000000000000000000000000000..9115f123f0274832af5ba1cf3c5481cc5353eecd --- /dev/null +++ b/meow/lib/python3.13/site-packages/idna/core.py @@ -0,0 +1,437 @@ +import bisect +import re +import unicodedata +from typing import Optional, Union + +from . import idnadata +from .intranges import intranges_contain + +_virama_combining_class = 9 +_alabel_prefix = b"xn--" +_unicode_dots_re = re.compile("[\u002e\u3002\uff0e\uff61]") + + +class IDNAError(UnicodeError): + """Base exception for all IDNA-encoding related problems""" + + pass + + +class IDNABidiError(IDNAError): + """Exception when bidirectional requirements are not satisfied""" + + pass + + +class InvalidCodepoint(IDNAError): + """Exception when a disallowed or unallocated codepoint is used""" + + pass + + +class InvalidCodepointContext(IDNAError): + """Exception when the codepoint is not valid in the context it is used""" + + pass + + +def _combining_class(cp: int) -> int: + v = unicodedata.combining(chr(cp)) + if v == 0: + if not unicodedata.name(chr(cp)): + raise ValueError("Unknown character in unicodedata") + return v + + +def _is_script(cp: str, script: str) -> bool: + return intranges_contain(ord(cp), idnadata.scripts[script]) + + +def _punycode(s: str) -> bytes: + return s.encode("punycode") + + +def _unot(s: int) -> str: + return "U+{:04X}".format(s) + + +def valid_label_length(label: Union[bytes, str]) -> bool: + if len(label) > 63: + return False + return True + + +def valid_string_length(label: Union[bytes, str], trailing_dot: bool) -> bool: + if len(label) > (254 if trailing_dot else 253): + return False + return True + + +def check_bidi(label: str, check_ltr: bool = False) -> bool: + # Bidi rules should only be applied if string contains RTL characters + bidi_label = False + for idx, cp in enumerate(label, 1): + direction = unicodedata.bidirectional(cp) + if direction == "": + # String likely comes from a newer version of Unicode + raise IDNABidiError("Unknown directionality in label {} at position {}".format(repr(label), idx)) + if direction in ["R", "AL", "AN"]: + bidi_label = True + if not bidi_label and not check_ltr: + return True + + # Bidi rule 1 + direction = unicodedata.bidirectional(label[0]) + if direction in ["R", "AL"]: + rtl = True + elif direction == "L": + rtl = False + else: + raise IDNABidiError("First codepoint in label {} must be directionality L, R or AL".format(repr(label))) + + valid_ending = False + number_type: Optional[str] = None + for idx, cp in enumerate(label, 1): + direction = unicodedata.bidirectional(cp) + + if rtl: + # Bidi rule 2 + if direction not in [ + "R", + "AL", + "AN", + "EN", + "ES", + "CS", + "ET", + "ON", + "BN", + "NSM", + ]: + raise IDNABidiError("Invalid direction for codepoint at position {} in a right-to-left label".format(idx)) + # Bidi rule 3 + if direction in ["R", "AL", "EN", "AN"]: + valid_ending = True + elif direction != "NSM": + valid_ending = False + # Bidi rule 4 + if direction in ["AN", "EN"]: + if not number_type: + number_type = direction + else: + if number_type != direction: + raise IDNABidiError("Can not mix numeral types in a right-to-left label") + else: + # Bidi rule 5 + if direction not in ["L", "EN", "ES", "CS", "ET", "ON", "BN", "NSM"]: + raise IDNABidiError("Invalid direction for codepoint at position {} in a left-to-right label".format(idx)) + # Bidi rule 6 + if direction in ["L", "EN"]: + valid_ending = True + elif direction != "NSM": + valid_ending = False + + if not valid_ending: + raise IDNABidiError("Label ends with illegal codepoint directionality") + + return True + + +def check_initial_combiner(label: str) -> bool: + if unicodedata.category(label[0])[0] == "M": + raise IDNAError("Label begins with an illegal combining character") + return True + + +def check_hyphen_ok(label: str) -> bool: + if label[2:4] == "--": + raise IDNAError("Label has disallowed hyphens in 3rd and 4th position") + if label[0] == "-" or label[-1] == "-": + raise IDNAError("Label must not start or end with a hyphen") + return True + + +def check_nfc(label: str) -> None: + if unicodedata.normalize("NFC", label) != label: + raise IDNAError("Label must be in Normalization Form C") + + +def valid_contextj(label: str, pos: int) -> bool: + cp_value = ord(label[pos]) + + if cp_value == 0x200C: + if pos > 0: + if _combining_class(ord(label[pos - 1])) == _virama_combining_class: + return True + + ok = False + for i in range(pos - 1, -1, -1): + joining_type = idnadata.joining_types.get(ord(label[i])) + if joining_type == ord("T"): + continue + elif joining_type in [ord("L"), ord("D")]: + ok = True + break + else: + break + + if not ok: + return False + + ok = False + for i in range(pos + 1, len(label)): + joining_type = idnadata.joining_types.get(ord(label[i])) + if joining_type == ord("T"): + continue + elif joining_type in [ord("R"), ord("D")]: + ok = True + break + else: + break + return ok + + if cp_value == 0x200D: + if pos > 0: + if _combining_class(ord(label[pos - 1])) == _virama_combining_class: + return True + return False + + else: + return False + + +def valid_contexto(label: str, pos: int, exception: bool = False) -> bool: + cp_value = ord(label[pos]) + + if cp_value == 0x00B7: + if 0 < pos < len(label) - 1: + if ord(label[pos - 1]) == 0x006C and ord(label[pos + 1]) == 0x006C: + return True + return False + + elif cp_value == 0x0375: + if pos < len(label) - 1 and len(label) > 1: + return _is_script(label[pos + 1], "Greek") + return False + + elif cp_value == 0x05F3 or cp_value == 0x05F4: + if pos > 0: + return _is_script(label[pos - 1], "Hebrew") + return False + + elif cp_value == 0x30FB: + for cp in label: + if cp == "\u30fb": + continue + if _is_script(cp, "Hiragana") or _is_script(cp, "Katakana") or _is_script(cp, "Han"): + return True + return False + + elif 0x660 <= cp_value <= 0x669: + for cp in label: + if 0x6F0 <= ord(cp) <= 0x06F9: + return False + return True + + elif 0x6F0 <= cp_value <= 0x6F9: + for cp in label: + if 0x660 <= ord(cp) <= 0x0669: + return False + return True + + return False + + +def check_label(label: Union[str, bytes, bytearray]) -> None: + if isinstance(label, (bytes, bytearray)): + label = label.decode("utf-8") + if len(label) == 0: + raise IDNAError("Empty Label") + + check_nfc(label) + check_hyphen_ok(label) + check_initial_combiner(label) + + for pos, cp in enumerate(label): + cp_value = ord(cp) + if intranges_contain(cp_value, idnadata.codepoint_classes["PVALID"]): + continue + elif intranges_contain(cp_value, idnadata.codepoint_classes["CONTEXTJ"]): + try: + if not valid_contextj(label, pos): + raise InvalidCodepointContext( + "Joiner {} not allowed at position {} in {}".format(_unot(cp_value), pos + 1, repr(label)) + ) + except ValueError: + raise IDNAError( + "Unknown codepoint adjacent to joiner {} at position {} in {}".format( + _unot(cp_value), pos + 1, repr(label) + ) + ) + elif intranges_contain(cp_value, idnadata.codepoint_classes["CONTEXTO"]): + if not valid_contexto(label, pos): + raise InvalidCodepointContext( + "Codepoint {} not allowed at position {} in {}".format(_unot(cp_value), pos + 1, repr(label)) + ) + else: + raise InvalidCodepoint( + "Codepoint {} at position {} of {} not allowed".format(_unot(cp_value), pos + 1, repr(label)) + ) + + check_bidi(label) + + +def alabel(label: str) -> bytes: + try: + label_bytes = label.encode("ascii") + ulabel(label_bytes) + if not valid_label_length(label_bytes): + raise IDNAError("Label too long") + return label_bytes + except UnicodeEncodeError: + pass + + check_label(label) + label_bytes = _alabel_prefix + _punycode(label) + + if not valid_label_length(label_bytes): + raise IDNAError("Label too long") + + return label_bytes + + +def ulabel(label: Union[str, bytes, bytearray]) -> str: + if not isinstance(label, (bytes, bytearray)): + try: + label_bytes = label.encode("ascii") + except UnicodeEncodeError: + check_label(label) + return label + else: + label_bytes = label + + label_bytes = label_bytes.lower() + if label_bytes.startswith(_alabel_prefix): + label_bytes = label_bytes[len(_alabel_prefix) :] + if not label_bytes: + raise IDNAError("Malformed A-label, no Punycode eligible content found") + if label_bytes.decode("ascii")[-1] == "-": + raise IDNAError("A-label must not end with a hyphen") + else: + check_label(label_bytes) + return label_bytes.decode("ascii") + + try: + label = label_bytes.decode("punycode") + except UnicodeError: + raise IDNAError("Invalid A-label") + check_label(label) + return label + + +def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False) -> str: + """Re-map the characters in the string according to UTS46 processing.""" + from .uts46data import uts46data + + output = "" + + for pos, char in enumerate(domain): + code_point = ord(char) + try: + uts46row = uts46data[code_point if code_point < 256 else bisect.bisect_left(uts46data, (code_point, "Z")) - 1] + status = uts46row[1] + replacement: Optional[str] = None + if len(uts46row) == 3: + replacement = uts46row[2] + if ( + status == "V" + or (status == "D" and not transitional) + or (status == "3" and not std3_rules and replacement is None) + ): + output += char + elif replacement is not None and ( + status == "M" or (status == "3" and not std3_rules) or (status == "D" and transitional) + ): + output += replacement + elif status != "I": + raise IndexError() + except IndexError: + raise InvalidCodepoint( + "Codepoint {} not allowed at position {} in {}".format(_unot(code_point), pos + 1, repr(domain)) + ) + + return unicodedata.normalize("NFC", output) + + +def encode( + s: Union[str, bytes, bytearray], + strict: bool = False, + uts46: bool = False, + std3_rules: bool = False, + transitional: bool = False, +) -> bytes: + if not isinstance(s, str): + try: + s = str(s, "ascii") + except UnicodeDecodeError: + raise IDNAError("should pass a unicode string to the function rather than a byte string.") + if uts46: + s = uts46_remap(s, std3_rules, transitional) + trailing_dot = False + result = [] + if strict: + labels = s.split(".") + else: + labels = _unicode_dots_re.split(s) + if not labels or labels == [""]: + raise IDNAError("Empty domain") + if labels[-1] == "": + del labels[-1] + trailing_dot = True + for label in labels: + s = alabel(label) + if s: + result.append(s) + else: + raise IDNAError("Empty label") + if trailing_dot: + result.append(b"") + s = b".".join(result) + if not valid_string_length(s, trailing_dot): + raise IDNAError("Domain too long") + return s + + +def decode( + s: Union[str, bytes, bytearray], + strict: bool = False, + uts46: bool = False, + std3_rules: bool = False, +) -> str: + try: + if not isinstance(s, str): + s = str(s, "ascii") + except UnicodeDecodeError: + raise IDNAError("Invalid ASCII in A-label") + if uts46: + s = uts46_remap(s, std3_rules, False) + trailing_dot = False + result = [] + if not strict: + labels = _unicode_dots_re.split(s) + else: + labels = s.split(".") + if not labels or labels == [""]: + raise IDNAError("Empty domain") + if not labels[-1]: + del labels[-1] + trailing_dot = True + for label in labels: + s = ulabel(label) + if s: + result.append(s) + else: + raise IDNAError("Empty label") + if trailing_dot: + result.append("") + return ".".join(result) diff --git a/meow/lib/python3.13/site-packages/idna/idnadata.py b/meow/lib/python3.13/site-packages/idna/idnadata.py new file mode 100644 index 0000000000000000000000000000000000000000..4be6004622efcdc36a8d15efc0ac3e138a4bae02 --- /dev/null +++ b/meow/lib/python3.13/site-packages/idna/idnadata.py @@ -0,0 +1,4243 @@ +# This file is automatically generated by tools/idna-data + +__version__ = "15.1.0" +scripts = { + "Greek": ( + 0x37000000374, + 0x37500000378, + 0x37A0000037E, + 0x37F00000380, + 0x38400000385, + 0x38600000387, + 0x3880000038B, + 0x38C0000038D, + 0x38E000003A2, + 0x3A3000003E2, + 0x3F000000400, + 0x1D2600001D2B, + 0x1D5D00001D62, + 0x1D6600001D6B, + 0x1DBF00001DC0, + 0x1F0000001F16, + 0x1F1800001F1E, + 0x1F2000001F46, + 0x1F4800001F4E, + 0x1F5000001F58, + 0x1F5900001F5A, + 0x1F5B00001F5C, + 0x1F5D00001F5E, + 0x1F5F00001F7E, + 0x1F8000001FB5, + 0x1FB600001FC5, + 0x1FC600001FD4, + 0x1FD600001FDC, + 0x1FDD00001FF0, + 0x1FF200001FF5, + 0x1FF600001FFF, + 0x212600002127, + 0xAB650000AB66, + 0x101400001018F, + 0x101A0000101A1, + 0x1D2000001D246, + ), + "Han": ( + 0x2E8000002E9A, + 0x2E9B00002EF4, + 0x2F0000002FD6, + 0x300500003006, + 0x300700003008, + 0x30210000302A, + 0x30380000303C, + 0x340000004DC0, + 0x4E000000A000, + 0xF9000000FA6E, + 0xFA700000FADA, + 0x16FE200016FE4, + 0x16FF000016FF2, + 0x200000002A6E0, + 0x2A7000002B73A, + 0x2B7400002B81E, + 0x2B8200002CEA2, + 0x2CEB00002EBE1, + 0x2EBF00002EE5E, + 0x2F8000002FA1E, + 0x300000003134B, + 0x31350000323B0, + ), + "Hebrew": ( + 0x591000005C8, + 0x5D0000005EB, + 0x5EF000005F5, + 0xFB1D0000FB37, + 0xFB380000FB3D, + 0xFB3E0000FB3F, + 0xFB400000FB42, + 0xFB430000FB45, + 0xFB460000FB50, + ), + "Hiragana": ( + 0x304100003097, + 0x309D000030A0, + 0x1B0010001B120, + 0x1B1320001B133, + 0x1B1500001B153, + 0x1F2000001F201, + ), + "Katakana": ( + 0x30A1000030FB, + 0x30FD00003100, + 0x31F000003200, + 0x32D0000032FF, + 0x330000003358, + 0xFF660000FF70, + 0xFF710000FF9E, + 0x1AFF00001AFF4, + 0x1AFF50001AFFC, + 0x1AFFD0001AFFF, + 0x1B0000001B001, + 0x1B1200001B123, + 0x1B1550001B156, + 0x1B1640001B168, + ), +} +joining_types = { + 0xAD: 84, + 0x300: 84, + 0x301: 84, + 0x302: 84, + 0x303: 84, + 0x304: 84, + 0x305: 84, + 0x306: 84, + 0x307: 84, + 0x308: 84, + 0x309: 84, + 0x30A: 84, + 0x30B: 84, + 0x30C: 84, + 0x30D: 84, + 0x30E: 84, + 0x30F: 84, + 0x310: 84, + 0x311: 84, + 0x312: 84, + 0x313: 84, + 0x314: 84, + 0x315: 84, + 0x316: 84, + 0x317: 84, + 0x318: 84, + 0x319: 84, + 0x31A: 84, + 0x31B: 84, + 0x31C: 84, + 0x31D: 84, + 0x31E: 84, + 0x31F: 84, + 0x320: 84, + 0x321: 84, + 0x322: 84, + 0x323: 84, + 0x324: 84, + 0x325: 84, + 0x326: 84, + 0x327: 84, + 0x328: 84, + 0x329: 84, + 0x32A: 84, + 0x32B: 84, + 0x32C: 84, + 0x32D: 84, + 0x32E: 84, + 0x32F: 84, + 0x330: 84, + 0x331: 84, + 0x332: 84, + 0x333: 84, + 0x334: 84, + 0x335: 84, + 0x336: 84, + 0x337: 84, + 0x338: 84, + 0x339: 84, + 0x33A: 84, + 0x33B: 84, + 0x33C: 84, + 0x33D: 84, + 0x33E: 84, + 0x33F: 84, + 0x340: 84, + 0x341: 84, + 0x342: 84, + 0x343: 84, + 0x344: 84, + 0x345: 84, + 0x346: 84, + 0x347: 84, + 0x348: 84, + 0x349: 84, + 0x34A: 84, + 0x34B: 84, + 0x34C: 84, + 0x34D: 84, + 0x34E: 84, + 0x34F: 84, + 0x350: 84, + 0x351: 84, + 0x352: 84, + 0x353: 84, + 0x354: 84, + 0x355: 84, + 0x356: 84, + 0x357: 84, + 0x358: 84, + 0x359: 84, + 0x35A: 84, + 0x35B: 84, + 0x35C: 84, + 0x35D: 84, + 0x35E: 84, + 0x35F: 84, + 0x360: 84, + 0x361: 84, + 0x362: 84, + 0x363: 84, + 0x364: 84, + 0x365: 84, + 0x366: 84, + 0x367: 84, + 0x368: 84, + 0x369: 84, + 0x36A: 84, + 0x36B: 84, + 0x36C: 84, + 0x36D: 84, + 0x36E: 84, + 0x36F: 84, + 0x483: 84, + 0x484: 84, + 0x485: 84, + 0x486: 84, + 0x487: 84, + 0x488: 84, + 0x489: 84, + 0x591: 84, + 0x592: 84, + 0x593: 84, + 0x594: 84, + 0x595: 84, + 0x596: 84, + 0x597: 84, + 0x598: 84, + 0x599: 84, + 0x59A: 84, + 0x59B: 84, + 0x59C: 84, + 0x59D: 84, + 0x59E: 84, + 0x59F: 84, + 0x5A0: 84, + 0x5A1: 84, + 0x5A2: 84, + 0x5A3: 84, + 0x5A4: 84, + 0x5A5: 84, + 0x5A6: 84, + 0x5A7: 84, + 0x5A8: 84, + 0x5A9: 84, + 0x5AA: 84, + 0x5AB: 84, + 0x5AC: 84, + 0x5AD: 84, + 0x5AE: 84, + 0x5AF: 84, + 0x5B0: 84, + 0x5B1: 84, + 0x5B2: 84, + 0x5B3: 84, + 0x5B4: 84, + 0x5B5: 84, + 0x5B6: 84, + 0x5B7: 84, + 0x5B8: 84, + 0x5B9: 84, + 0x5BA: 84, + 0x5BB: 84, + 0x5BC: 84, + 0x5BD: 84, + 0x5BF: 84, + 0x5C1: 84, + 0x5C2: 84, + 0x5C4: 84, + 0x5C5: 84, + 0x5C7: 84, + 0x610: 84, + 0x611: 84, + 0x612: 84, + 0x613: 84, + 0x614: 84, + 0x615: 84, + 0x616: 84, + 0x617: 84, + 0x618: 84, + 0x619: 84, + 0x61A: 84, + 0x61C: 84, + 0x620: 68, + 0x622: 82, + 0x623: 82, + 0x624: 82, + 0x625: 82, + 0x626: 68, + 0x627: 82, + 0x628: 68, + 0x629: 82, + 0x62A: 68, + 0x62B: 68, + 0x62C: 68, + 0x62D: 68, + 0x62E: 68, + 0x62F: 82, + 0x630: 82, + 0x631: 82, + 0x632: 82, + 0x633: 68, + 0x634: 68, + 0x635: 68, + 0x636: 68, + 0x637: 68, + 0x638: 68, + 0x639: 68, + 0x63A: 68, + 0x63B: 68, + 0x63C: 68, + 0x63D: 68, + 0x63E: 68, + 0x63F: 68, + 0x640: 67, + 0x641: 68, + 0x642: 68, + 0x643: 68, + 0x644: 68, + 0x645: 68, + 0x646: 68, + 0x647: 68, + 0x648: 82, + 0x649: 68, + 0x64A: 68, + 0x64B: 84, + 0x64C: 84, + 0x64D: 84, + 0x64E: 84, + 0x64F: 84, + 0x650: 84, + 0x651: 84, + 0x652: 84, + 0x653: 84, + 0x654: 84, + 0x655: 84, + 0x656: 84, + 0x657: 84, + 0x658: 84, + 0x659: 84, + 0x65A: 84, + 0x65B: 84, + 0x65C: 84, + 0x65D: 84, + 0x65E: 84, + 0x65F: 84, + 0x66E: 68, + 0x66F: 68, + 0x670: 84, + 0x671: 82, + 0x672: 82, + 0x673: 82, + 0x675: 82, + 0x676: 82, + 0x677: 82, + 0x678: 68, + 0x679: 68, + 0x67A: 68, + 0x67B: 68, + 0x67C: 68, + 0x67D: 68, + 0x67E: 68, + 0x67F: 68, + 0x680: 68, + 0x681: 68, + 0x682: 68, + 0x683: 68, + 0x684: 68, + 0x685: 68, + 0x686: 68, + 0x687: 68, + 0x688: 82, + 0x689: 82, + 0x68A: 82, + 0x68B: 82, + 0x68C: 82, + 0x68D: 82, + 0x68E: 82, + 0x68F: 82, + 0x690: 82, + 0x691: 82, + 0x692: 82, + 0x693: 82, + 0x694: 82, + 0x695: 82, + 0x696: 82, + 0x697: 82, + 0x698: 82, + 0x699: 82, + 0x69A: 68, + 0x69B: 68, + 0x69C: 68, + 0x69D: 68, + 0x69E: 68, + 0x69F: 68, + 0x6A0: 68, + 0x6A1: 68, + 0x6A2: 68, + 0x6A3: 68, + 0x6A4: 68, + 0x6A5: 68, + 0x6A6: 68, + 0x6A7: 68, + 0x6A8: 68, + 0x6A9: 68, + 0x6AA: 68, + 0x6AB: 68, + 0x6AC: 68, + 0x6AD: 68, + 0x6AE: 68, + 0x6AF: 68, + 0x6B0: 68, + 0x6B1: 68, + 0x6B2: 68, + 0x6B3: 68, + 0x6B4: 68, + 0x6B5: 68, + 0x6B6: 68, + 0x6B7: 68, + 0x6B8: 68, + 0x6B9: 68, + 0x6BA: 68, + 0x6BB: 68, + 0x6BC: 68, + 0x6BD: 68, + 0x6BE: 68, + 0x6BF: 68, + 0x6C0: 82, + 0x6C1: 68, + 0x6C2: 68, + 0x6C3: 82, + 0x6C4: 82, + 0x6C5: 82, + 0x6C6: 82, + 0x6C7: 82, + 0x6C8: 82, + 0x6C9: 82, + 0x6CA: 82, + 0x6CB: 82, + 0x6CC: 68, + 0x6CD: 82, + 0x6CE: 68, + 0x6CF: 82, + 0x6D0: 68, + 0x6D1: 68, + 0x6D2: 82, + 0x6D3: 82, + 0x6D5: 82, + 0x6D6: 84, + 0x6D7: 84, + 0x6D8: 84, + 0x6D9: 84, + 0x6DA: 84, + 0x6DB: 84, + 0x6DC: 84, + 0x6DF: 84, + 0x6E0: 84, + 0x6E1: 84, + 0x6E2: 84, + 0x6E3: 84, + 0x6E4: 84, + 0x6E7: 84, + 0x6E8: 84, + 0x6EA: 84, + 0x6EB: 84, + 0x6EC: 84, + 0x6ED: 84, + 0x6EE: 82, + 0x6EF: 82, + 0x6FA: 68, + 0x6FB: 68, + 0x6FC: 68, + 0x6FF: 68, + 0x70F: 84, + 0x710: 82, + 0x711: 84, + 0x712: 68, + 0x713: 68, + 0x714: 68, + 0x715: 82, + 0x716: 82, + 0x717: 82, + 0x718: 82, + 0x719: 82, + 0x71A: 68, + 0x71B: 68, + 0x71C: 68, + 0x71D: 68, + 0x71E: 82, + 0x71F: 68, + 0x720: 68, + 0x721: 68, + 0x722: 68, + 0x723: 68, + 0x724: 68, + 0x725: 68, + 0x726: 68, + 0x727: 68, + 0x728: 82, + 0x729: 68, + 0x72A: 82, + 0x72B: 68, + 0x72C: 82, + 0x72D: 68, + 0x72E: 68, + 0x72F: 82, + 0x730: 84, + 0x731: 84, + 0x732: 84, + 0x733: 84, + 0x734: 84, + 0x735: 84, + 0x736: 84, + 0x737: 84, + 0x738: 84, + 0x739: 84, + 0x73A: 84, + 0x73B: 84, + 0x73C: 84, + 0x73D: 84, + 0x73E: 84, + 0x73F: 84, + 0x740: 84, + 0x741: 84, + 0x742: 84, + 0x743: 84, + 0x744: 84, + 0x745: 84, + 0x746: 84, + 0x747: 84, + 0x748: 84, + 0x749: 84, + 0x74A: 84, + 0x74D: 82, + 0x74E: 68, + 0x74F: 68, + 0x750: 68, + 0x751: 68, + 0x752: 68, + 0x753: 68, + 0x754: 68, + 0x755: 68, + 0x756: 68, + 0x757: 68, + 0x758: 68, + 0x759: 82, + 0x75A: 82, + 0x75B: 82, + 0x75C: 68, + 0x75D: 68, + 0x75E: 68, + 0x75F: 68, + 0x760: 68, + 0x761: 68, + 0x762: 68, + 0x763: 68, + 0x764: 68, + 0x765: 68, + 0x766: 68, + 0x767: 68, + 0x768: 68, + 0x769: 68, + 0x76A: 68, + 0x76B: 82, + 0x76C: 82, + 0x76D: 68, + 0x76E: 68, + 0x76F: 68, + 0x770: 68, + 0x771: 82, + 0x772: 68, + 0x773: 82, + 0x774: 82, + 0x775: 68, + 0x776: 68, + 0x777: 68, + 0x778: 82, + 0x779: 82, + 0x77A: 68, + 0x77B: 68, + 0x77C: 68, + 0x77D: 68, + 0x77E: 68, + 0x77F: 68, + 0x7A6: 84, + 0x7A7: 84, + 0x7A8: 84, + 0x7A9: 84, + 0x7AA: 84, + 0x7AB: 84, + 0x7AC: 84, + 0x7AD: 84, + 0x7AE: 84, + 0x7AF: 84, + 0x7B0: 84, + 0x7CA: 68, + 0x7CB: 68, + 0x7CC: 68, + 0x7CD: 68, + 0x7CE: 68, + 0x7CF: 68, + 0x7D0: 68, + 0x7D1: 68, + 0x7D2: 68, + 0x7D3: 68, + 0x7D4: 68, + 0x7D5: 68, + 0x7D6: 68, + 0x7D7: 68, + 0x7D8: 68, + 0x7D9: 68, + 0x7DA: 68, + 0x7DB: 68, + 0x7DC: 68, + 0x7DD: 68, + 0x7DE: 68, + 0x7DF: 68, + 0x7E0: 68, + 0x7E1: 68, + 0x7E2: 68, + 0x7E3: 68, + 0x7E4: 68, + 0x7E5: 68, + 0x7E6: 68, + 0x7E7: 68, + 0x7E8: 68, + 0x7E9: 68, + 0x7EA: 68, + 0x7EB: 84, + 0x7EC: 84, + 0x7ED: 84, + 0x7EE: 84, + 0x7EF: 84, + 0x7F0: 84, + 0x7F1: 84, + 0x7F2: 84, + 0x7F3: 84, + 0x7FA: 67, + 0x7FD: 84, + 0x816: 84, + 0x817: 84, + 0x818: 84, + 0x819: 84, + 0x81B: 84, + 0x81C: 84, + 0x81D: 84, + 0x81E: 84, + 0x81F: 84, + 0x820: 84, + 0x821: 84, + 0x822: 84, + 0x823: 84, + 0x825: 84, + 0x826: 84, + 0x827: 84, + 0x829: 84, + 0x82A: 84, + 0x82B: 84, + 0x82C: 84, + 0x82D: 84, + 0x840: 82, + 0x841: 68, + 0x842: 68, + 0x843: 68, + 0x844: 68, + 0x845: 68, + 0x846: 82, + 0x847: 82, + 0x848: 68, + 0x849: 82, + 0x84A: 68, + 0x84B: 68, + 0x84C: 68, + 0x84D: 68, + 0x84E: 68, + 0x84F: 68, + 0x850: 68, + 0x851: 68, + 0x852: 68, + 0x853: 68, + 0x854: 82, + 0x855: 68, + 0x856: 82, + 0x857: 82, + 0x858: 82, + 0x859: 84, + 0x85A: 84, + 0x85B: 84, + 0x860: 68, + 0x862: 68, + 0x863: 68, + 0x864: 68, + 0x865: 68, + 0x867: 82, + 0x868: 68, + 0x869: 82, + 0x86A: 82, + 0x870: 82, + 0x871: 82, + 0x872: 82, + 0x873: 82, + 0x874: 82, + 0x875: 82, + 0x876: 82, + 0x877: 82, + 0x878: 82, + 0x879: 82, + 0x87A: 82, + 0x87B: 82, + 0x87C: 82, + 0x87D: 82, + 0x87E: 82, + 0x87F: 82, + 0x880: 82, + 0x881: 82, + 0x882: 82, + 0x883: 67, + 0x884: 67, + 0x885: 67, + 0x886: 68, + 0x889: 68, + 0x88A: 68, + 0x88B: 68, + 0x88C: 68, + 0x88D: 68, + 0x88E: 82, + 0x898: 84, + 0x899: 84, + 0x89A: 84, + 0x89B: 84, + 0x89C: 84, + 0x89D: 84, + 0x89E: 84, + 0x89F: 84, + 0x8A0: 68, + 0x8A1: 68, + 0x8A2: 68, + 0x8A3: 68, + 0x8A4: 68, + 0x8A5: 68, + 0x8A6: 68, + 0x8A7: 68, + 0x8A8: 68, + 0x8A9: 68, + 0x8AA: 82, + 0x8AB: 82, + 0x8AC: 82, + 0x8AE: 82, + 0x8AF: 68, + 0x8B0: 68, + 0x8B1: 82, + 0x8B2: 82, + 0x8B3: 68, + 0x8B4: 68, + 0x8B5: 68, + 0x8B6: 68, + 0x8B7: 68, + 0x8B8: 68, + 0x8B9: 82, + 0x8BA: 68, + 0x8BB: 68, + 0x8BC: 68, + 0x8BD: 68, + 0x8BE: 68, + 0x8BF: 68, + 0x8C0: 68, + 0x8C1: 68, + 0x8C2: 68, + 0x8C3: 68, + 0x8C4: 68, + 0x8C5: 68, + 0x8C6: 68, + 0x8C7: 68, + 0x8C8: 68, + 0x8CA: 84, + 0x8CB: 84, + 0x8CC: 84, + 0x8CD: 84, + 0x8CE: 84, + 0x8CF: 84, + 0x8D0: 84, + 0x8D1: 84, + 0x8D2: 84, + 0x8D3: 84, + 0x8D4: 84, + 0x8D5: 84, + 0x8D6: 84, + 0x8D7: 84, + 0x8D8: 84, + 0x8D9: 84, + 0x8DA: 84, + 0x8DB: 84, + 0x8DC: 84, + 0x8DD: 84, + 0x8DE: 84, + 0x8DF: 84, + 0x8E0: 84, + 0x8E1: 84, + 0x8E3: 84, + 0x8E4: 84, + 0x8E5: 84, + 0x8E6: 84, + 0x8E7: 84, + 0x8E8: 84, + 0x8E9: 84, + 0x8EA: 84, + 0x8EB: 84, + 0x8EC: 84, + 0x8ED: 84, + 0x8EE: 84, + 0x8EF: 84, + 0x8F0: 84, + 0x8F1: 84, + 0x8F2: 84, + 0x8F3: 84, + 0x8F4: 84, + 0x8F5: 84, + 0x8F6: 84, + 0x8F7: 84, + 0x8F8: 84, + 0x8F9: 84, + 0x8FA: 84, + 0x8FB: 84, + 0x8FC: 84, + 0x8FD: 84, + 0x8FE: 84, + 0x8FF: 84, + 0x900: 84, + 0x901: 84, + 0x902: 84, + 0x93A: 84, + 0x93C: 84, + 0x941: 84, + 0x942: 84, + 0x943: 84, + 0x944: 84, + 0x945: 84, + 0x946: 84, + 0x947: 84, + 0x948: 84, + 0x94D: 84, + 0x951: 84, + 0x952: 84, + 0x953: 84, + 0x954: 84, + 0x955: 84, + 0x956: 84, + 0x957: 84, + 0x962: 84, + 0x963: 84, + 0x981: 84, + 0x9BC: 84, + 0x9C1: 84, + 0x9C2: 84, + 0x9C3: 84, + 0x9C4: 84, + 0x9CD: 84, + 0x9E2: 84, + 0x9E3: 84, + 0x9FE: 84, + 0xA01: 84, + 0xA02: 84, + 0xA3C: 84, + 0xA41: 84, + 0xA42: 84, + 0xA47: 84, + 0xA48: 84, + 0xA4B: 84, + 0xA4C: 84, + 0xA4D: 84, + 0xA51: 84, + 0xA70: 84, + 0xA71: 84, + 0xA75: 84, + 0xA81: 84, + 0xA82: 84, + 0xABC: 84, + 0xAC1: 84, + 0xAC2: 84, + 0xAC3: 84, + 0xAC4: 84, + 0xAC5: 84, + 0xAC7: 84, + 0xAC8: 84, + 0xACD: 84, + 0xAE2: 84, + 0xAE3: 84, + 0xAFA: 84, + 0xAFB: 84, + 0xAFC: 84, + 0xAFD: 84, + 0xAFE: 84, + 0xAFF: 84, + 0xB01: 84, + 0xB3C: 84, + 0xB3F: 84, + 0xB41: 84, + 0xB42: 84, + 0xB43: 84, + 0xB44: 84, + 0xB4D: 84, + 0xB55: 84, + 0xB56: 84, + 0xB62: 84, + 0xB63: 84, + 0xB82: 84, + 0xBC0: 84, + 0xBCD: 84, + 0xC00: 84, + 0xC04: 84, + 0xC3C: 84, + 0xC3E: 84, + 0xC3F: 84, + 0xC40: 84, + 0xC46: 84, + 0xC47: 84, + 0xC48: 84, + 0xC4A: 84, + 0xC4B: 84, + 0xC4C: 84, + 0xC4D: 84, + 0xC55: 84, + 0xC56: 84, + 0xC62: 84, + 0xC63: 84, + 0xC81: 84, + 0xCBC: 84, + 0xCBF: 84, + 0xCC6: 84, + 0xCCC: 84, + 0xCCD: 84, + 0xCE2: 84, + 0xCE3: 84, + 0xD00: 84, + 0xD01: 84, + 0xD3B: 84, + 0xD3C: 84, + 0xD41: 84, + 0xD42: 84, + 0xD43: 84, + 0xD44: 84, + 0xD4D: 84, + 0xD62: 84, + 0xD63: 84, + 0xD81: 84, + 0xDCA: 84, + 0xDD2: 84, + 0xDD3: 84, + 0xDD4: 84, + 0xDD6: 84, + 0xE31: 84, + 0xE34: 84, + 0xE35: 84, + 0xE36: 84, + 0xE37: 84, + 0xE38: 84, + 0xE39: 84, + 0xE3A: 84, + 0xE47: 84, + 0xE48: 84, + 0xE49: 84, + 0xE4A: 84, + 0xE4B: 84, + 0xE4C: 84, + 0xE4D: 84, + 0xE4E: 84, + 0xEB1: 84, + 0xEB4: 84, + 0xEB5: 84, + 0xEB6: 84, + 0xEB7: 84, + 0xEB8: 84, + 0xEB9: 84, + 0xEBA: 84, + 0xEBB: 84, + 0xEBC: 84, + 0xEC8: 84, + 0xEC9: 84, + 0xECA: 84, + 0xECB: 84, + 0xECC: 84, + 0xECD: 84, + 0xECE: 84, + 0xF18: 84, + 0xF19: 84, + 0xF35: 84, + 0xF37: 84, + 0xF39: 84, + 0xF71: 84, + 0xF72: 84, + 0xF73: 84, + 0xF74: 84, + 0xF75: 84, + 0xF76: 84, + 0xF77: 84, + 0xF78: 84, + 0xF79: 84, + 0xF7A: 84, + 0xF7B: 84, + 0xF7C: 84, + 0xF7D: 84, + 0xF7E: 84, + 0xF80: 84, + 0xF81: 84, + 0xF82: 84, + 0xF83: 84, + 0xF84: 84, + 0xF86: 84, + 0xF87: 84, + 0xF8D: 84, + 0xF8E: 84, + 0xF8F: 84, + 0xF90: 84, + 0xF91: 84, + 0xF92: 84, + 0xF93: 84, + 0xF94: 84, + 0xF95: 84, + 0xF96: 84, + 0xF97: 84, + 0xF99: 84, + 0xF9A: 84, + 0xF9B: 84, + 0xF9C: 84, + 0xF9D: 84, + 0xF9E: 84, + 0xF9F: 84, + 0xFA0: 84, + 0xFA1: 84, + 0xFA2: 84, + 0xFA3: 84, + 0xFA4: 84, + 0xFA5: 84, + 0xFA6: 84, + 0xFA7: 84, + 0xFA8: 84, + 0xFA9: 84, + 0xFAA: 84, + 0xFAB: 84, + 0xFAC: 84, + 0xFAD: 84, + 0xFAE: 84, + 0xFAF: 84, + 0xFB0: 84, + 0xFB1: 84, + 0xFB2: 84, + 0xFB3: 84, + 0xFB4: 84, + 0xFB5: 84, + 0xFB6: 84, + 0xFB7: 84, + 0xFB8: 84, + 0xFB9: 84, + 0xFBA: 84, + 0xFBB: 84, + 0xFBC: 84, + 0xFC6: 84, + 0x102D: 84, + 0x102E: 84, + 0x102F: 84, + 0x1030: 84, + 0x1032: 84, + 0x1033: 84, + 0x1034: 84, + 0x1035: 84, + 0x1036: 84, + 0x1037: 84, + 0x1039: 84, + 0x103A: 84, + 0x103D: 84, + 0x103E: 84, + 0x1058: 84, + 0x1059: 84, + 0x105E: 84, + 0x105F: 84, + 0x1060: 84, + 0x1071: 84, + 0x1072: 84, + 0x1073: 84, + 0x1074: 84, + 0x1082: 84, + 0x1085: 84, + 0x1086: 84, + 0x108D: 84, + 0x109D: 84, + 0x135D: 84, + 0x135E: 84, + 0x135F: 84, + 0x1712: 84, + 0x1713: 84, + 0x1714: 84, + 0x1732: 84, + 0x1733: 84, + 0x1752: 84, + 0x1753: 84, + 0x1772: 84, + 0x1773: 84, + 0x17B4: 84, + 0x17B5: 84, + 0x17B7: 84, + 0x17B8: 84, + 0x17B9: 84, + 0x17BA: 84, + 0x17BB: 84, + 0x17BC: 84, + 0x17BD: 84, + 0x17C6: 84, + 0x17C9: 84, + 0x17CA: 84, + 0x17CB: 84, + 0x17CC: 84, + 0x17CD: 84, + 0x17CE: 84, + 0x17CF: 84, + 0x17D0: 84, + 0x17D1: 84, + 0x17D2: 84, + 0x17D3: 84, + 0x17DD: 84, + 0x1807: 68, + 0x180A: 67, + 0x180B: 84, + 0x180C: 84, + 0x180D: 84, + 0x180F: 84, + 0x1820: 68, + 0x1821: 68, + 0x1822: 68, + 0x1823: 68, + 0x1824: 68, + 0x1825: 68, + 0x1826: 68, + 0x1827: 68, + 0x1828: 68, + 0x1829: 68, + 0x182A: 68, + 0x182B: 68, + 0x182C: 68, + 0x182D: 68, + 0x182E: 68, + 0x182F: 68, + 0x1830: 68, + 0x1831: 68, + 0x1832: 68, + 0x1833: 68, + 0x1834: 68, + 0x1835: 68, + 0x1836: 68, + 0x1837: 68, + 0x1838: 68, + 0x1839: 68, + 0x183A: 68, + 0x183B: 68, + 0x183C: 68, + 0x183D: 68, + 0x183E: 68, + 0x183F: 68, + 0x1840: 68, + 0x1841: 68, + 0x1842: 68, + 0x1843: 68, + 0x1844: 68, + 0x1845: 68, + 0x1846: 68, + 0x1847: 68, + 0x1848: 68, + 0x1849: 68, + 0x184A: 68, + 0x184B: 68, + 0x184C: 68, + 0x184D: 68, + 0x184E: 68, + 0x184F: 68, + 0x1850: 68, + 0x1851: 68, + 0x1852: 68, + 0x1853: 68, + 0x1854: 68, + 0x1855: 68, + 0x1856: 68, + 0x1857: 68, + 0x1858: 68, + 0x1859: 68, + 0x185A: 68, + 0x185B: 68, + 0x185C: 68, + 0x185D: 68, + 0x185E: 68, + 0x185F: 68, + 0x1860: 68, + 0x1861: 68, + 0x1862: 68, + 0x1863: 68, + 0x1864: 68, + 0x1865: 68, + 0x1866: 68, + 0x1867: 68, + 0x1868: 68, + 0x1869: 68, + 0x186A: 68, + 0x186B: 68, + 0x186C: 68, + 0x186D: 68, + 0x186E: 68, + 0x186F: 68, + 0x1870: 68, + 0x1871: 68, + 0x1872: 68, + 0x1873: 68, + 0x1874: 68, + 0x1875: 68, + 0x1876: 68, + 0x1877: 68, + 0x1878: 68, + 0x1885: 84, + 0x1886: 84, + 0x1887: 68, + 0x1888: 68, + 0x1889: 68, + 0x188A: 68, + 0x188B: 68, + 0x188C: 68, + 0x188D: 68, + 0x188E: 68, + 0x188F: 68, + 0x1890: 68, + 0x1891: 68, + 0x1892: 68, + 0x1893: 68, + 0x1894: 68, + 0x1895: 68, + 0x1896: 68, + 0x1897: 68, + 0x1898: 68, + 0x1899: 68, + 0x189A: 68, + 0x189B: 68, + 0x189C: 68, + 0x189D: 68, + 0x189E: 68, + 0x189F: 68, + 0x18A0: 68, + 0x18A1: 68, + 0x18A2: 68, + 0x18A3: 68, + 0x18A4: 68, + 0x18A5: 68, + 0x18A6: 68, + 0x18A7: 68, + 0x18A8: 68, + 0x18A9: 84, + 0x18AA: 68, + 0x1920: 84, + 0x1921: 84, + 0x1922: 84, + 0x1927: 84, + 0x1928: 84, + 0x1932: 84, + 0x1939: 84, + 0x193A: 84, + 0x193B: 84, + 0x1A17: 84, + 0x1A18: 84, + 0x1A1B: 84, + 0x1A56: 84, + 0x1A58: 84, + 0x1A59: 84, + 0x1A5A: 84, + 0x1A5B: 84, + 0x1A5C: 84, + 0x1A5D: 84, + 0x1A5E: 84, + 0x1A60: 84, + 0x1A62: 84, + 0x1A65: 84, + 0x1A66: 84, + 0x1A67: 84, + 0x1A68: 84, + 0x1A69: 84, + 0x1A6A: 84, + 0x1A6B: 84, + 0x1A6C: 84, + 0x1A73: 84, + 0x1A74: 84, + 0x1A75: 84, + 0x1A76: 84, + 0x1A77: 84, + 0x1A78: 84, + 0x1A79: 84, + 0x1A7A: 84, + 0x1A7B: 84, + 0x1A7C: 84, + 0x1A7F: 84, + 0x1AB0: 84, + 0x1AB1: 84, + 0x1AB2: 84, + 0x1AB3: 84, + 0x1AB4: 84, + 0x1AB5: 84, + 0x1AB6: 84, + 0x1AB7: 84, + 0x1AB8: 84, + 0x1AB9: 84, + 0x1ABA: 84, + 0x1ABB: 84, + 0x1ABC: 84, + 0x1ABD: 84, + 0x1ABE: 84, + 0x1ABF: 84, + 0x1AC0: 84, + 0x1AC1: 84, + 0x1AC2: 84, + 0x1AC3: 84, + 0x1AC4: 84, + 0x1AC5: 84, + 0x1AC6: 84, + 0x1AC7: 84, + 0x1AC8: 84, + 0x1AC9: 84, + 0x1ACA: 84, + 0x1ACB: 84, + 0x1ACC: 84, + 0x1ACD: 84, + 0x1ACE: 84, + 0x1B00: 84, + 0x1B01: 84, + 0x1B02: 84, + 0x1B03: 84, + 0x1B34: 84, + 0x1B36: 84, + 0x1B37: 84, + 0x1B38: 84, + 0x1B39: 84, + 0x1B3A: 84, + 0x1B3C: 84, + 0x1B42: 84, + 0x1B6B: 84, + 0x1B6C: 84, + 0x1B6D: 84, + 0x1B6E: 84, + 0x1B6F: 84, + 0x1B70: 84, + 0x1B71: 84, + 0x1B72: 84, + 0x1B73: 84, + 0x1B80: 84, + 0x1B81: 84, + 0x1BA2: 84, + 0x1BA3: 84, + 0x1BA4: 84, + 0x1BA5: 84, + 0x1BA8: 84, + 0x1BA9: 84, + 0x1BAB: 84, + 0x1BAC: 84, + 0x1BAD: 84, + 0x1BE6: 84, + 0x1BE8: 84, + 0x1BE9: 84, + 0x1BED: 84, + 0x1BEF: 84, + 0x1BF0: 84, + 0x1BF1: 84, + 0x1C2C: 84, + 0x1C2D: 84, + 0x1C2E: 84, + 0x1C2F: 84, + 0x1C30: 84, + 0x1C31: 84, + 0x1C32: 84, + 0x1C33: 84, + 0x1C36: 84, + 0x1C37: 84, + 0x1CD0: 84, + 0x1CD1: 84, + 0x1CD2: 84, + 0x1CD4: 84, + 0x1CD5: 84, + 0x1CD6: 84, + 0x1CD7: 84, + 0x1CD8: 84, + 0x1CD9: 84, + 0x1CDA: 84, + 0x1CDB: 84, + 0x1CDC: 84, + 0x1CDD: 84, + 0x1CDE: 84, + 0x1CDF: 84, + 0x1CE0: 84, + 0x1CE2: 84, + 0x1CE3: 84, + 0x1CE4: 84, + 0x1CE5: 84, + 0x1CE6: 84, + 0x1CE7: 84, + 0x1CE8: 84, + 0x1CED: 84, + 0x1CF4: 84, + 0x1CF8: 84, + 0x1CF9: 84, + 0x1DC0: 84, + 0x1DC1: 84, + 0x1DC2: 84, + 0x1DC3: 84, + 0x1DC4: 84, + 0x1DC5: 84, + 0x1DC6: 84, + 0x1DC7: 84, + 0x1DC8: 84, + 0x1DC9: 84, + 0x1DCA: 84, + 0x1DCB: 84, + 0x1DCC: 84, + 0x1DCD: 84, + 0x1DCE: 84, + 0x1DCF: 84, + 0x1DD0: 84, + 0x1DD1: 84, + 0x1DD2: 84, + 0x1DD3: 84, + 0x1DD4: 84, + 0x1DD5: 84, + 0x1DD6: 84, + 0x1DD7: 84, + 0x1DD8: 84, + 0x1DD9: 84, + 0x1DDA: 84, + 0x1DDB: 84, + 0x1DDC: 84, + 0x1DDD: 84, + 0x1DDE: 84, + 0x1DDF: 84, + 0x1DE0: 84, + 0x1DE1: 84, + 0x1DE2: 84, + 0x1DE3: 84, + 0x1DE4: 84, + 0x1DE5: 84, + 0x1DE6: 84, + 0x1DE7: 84, + 0x1DE8: 84, + 0x1DE9: 84, + 0x1DEA: 84, + 0x1DEB: 84, + 0x1DEC: 84, + 0x1DED: 84, + 0x1DEE: 84, + 0x1DEF: 84, + 0x1DF0: 84, + 0x1DF1: 84, + 0x1DF2: 84, + 0x1DF3: 84, + 0x1DF4: 84, + 0x1DF5: 84, + 0x1DF6: 84, + 0x1DF7: 84, + 0x1DF8: 84, + 0x1DF9: 84, + 0x1DFA: 84, + 0x1DFB: 84, + 0x1DFC: 84, + 0x1DFD: 84, + 0x1DFE: 84, + 0x1DFF: 84, + 0x200B: 84, + 0x200D: 67, + 0x200E: 84, + 0x200F: 84, + 0x202A: 84, + 0x202B: 84, + 0x202C: 84, + 0x202D: 84, + 0x202E: 84, + 0x2060: 84, + 0x2061: 84, + 0x2062: 84, + 0x2063: 84, + 0x2064: 84, + 0x206A: 84, + 0x206B: 84, + 0x206C: 84, + 0x206D: 84, + 0x206E: 84, + 0x206F: 84, + 0x20D0: 84, + 0x20D1: 84, + 0x20D2: 84, + 0x20D3: 84, + 0x20D4: 84, + 0x20D5: 84, + 0x20D6: 84, + 0x20D7: 84, + 0x20D8: 84, + 0x20D9: 84, + 0x20DA: 84, + 0x20DB: 84, + 0x20DC: 84, + 0x20DD: 84, + 0x20DE: 84, + 0x20DF: 84, + 0x20E0: 84, + 0x20E1: 84, + 0x20E2: 84, + 0x20E3: 84, + 0x20E4: 84, + 0x20E5: 84, + 0x20E6: 84, + 0x20E7: 84, + 0x20E8: 84, + 0x20E9: 84, + 0x20EA: 84, + 0x20EB: 84, + 0x20EC: 84, + 0x20ED: 84, + 0x20EE: 84, + 0x20EF: 84, + 0x20F0: 84, + 0x2CEF: 84, + 0x2CF0: 84, + 0x2CF1: 84, + 0x2D7F: 84, + 0x2DE0: 84, + 0x2DE1: 84, + 0x2DE2: 84, + 0x2DE3: 84, + 0x2DE4: 84, + 0x2DE5: 84, + 0x2DE6: 84, + 0x2DE7: 84, + 0x2DE8: 84, + 0x2DE9: 84, + 0x2DEA: 84, + 0x2DEB: 84, + 0x2DEC: 84, + 0x2DED: 84, + 0x2DEE: 84, + 0x2DEF: 84, + 0x2DF0: 84, + 0x2DF1: 84, + 0x2DF2: 84, + 0x2DF3: 84, + 0x2DF4: 84, + 0x2DF5: 84, + 0x2DF6: 84, + 0x2DF7: 84, + 0x2DF8: 84, + 0x2DF9: 84, + 0x2DFA: 84, + 0x2DFB: 84, + 0x2DFC: 84, + 0x2DFD: 84, + 0x2DFE: 84, + 0x2DFF: 84, + 0x302A: 84, + 0x302B: 84, + 0x302C: 84, + 0x302D: 84, + 0x3099: 84, + 0x309A: 84, + 0xA66F: 84, + 0xA670: 84, + 0xA671: 84, + 0xA672: 84, + 0xA674: 84, + 0xA675: 84, + 0xA676: 84, + 0xA677: 84, + 0xA678: 84, + 0xA679: 84, + 0xA67A: 84, + 0xA67B: 84, + 0xA67C: 84, + 0xA67D: 84, + 0xA69E: 84, + 0xA69F: 84, + 0xA6F0: 84, + 0xA6F1: 84, + 0xA802: 84, + 0xA806: 84, + 0xA80B: 84, + 0xA825: 84, + 0xA826: 84, + 0xA82C: 84, + 0xA840: 68, + 0xA841: 68, + 0xA842: 68, + 0xA843: 68, + 0xA844: 68, + 0xA845: 68, + 0xA846: 68, + 0xA847: 68, + 0xA848: 68, + 0xA849: 68, + 0xA84A: 68, + 0xA84B: 68, + 0xA84C: 68, + 0xA84D: 68, + 0xA84E: 68, + 0xA84F: 68, + 0xA850: 68, + 0xA851: 68, + 0xA852: 68, + 0xA853: 68, + 0xA854: 68, + 0xA855: 68, + 0xA856: 68, + 0xA857: 68, + 0xA858: 68, + 0xA859: 68, + 0xA85A: 68, + 0xA85B: 68, + 0xA85C: 68, + 0xA85D: 68, + 0xA85E: 68, + 0xA85F: 68, + 0xA860: 68, + 0xA861: 68, + 0xA862: 68, + 0xA863: 68, + 0xA864: 68, + 0xA865: 68, + 0xA866: 68, + 0xA867: 68, + 0xA868: 68, + 0xA869: 68, + 0xA86A: 68, + 0xA86B: 68, + 0xA86C: 68, + 0xA86D: 68, + 0xA86E: 68, + 0xA86F: 68, + 0xA870: 68, + 0xA871: 68, + 0xA872: 76, + 0xA8C4: 84, + 0xA8C5: 84, + 0xA8E0: 84, + 0xA8E1: 84, + 0xA8E2: 84, + 0xA8E3: 84, + 0xA8E4: 84, + 0xA8E5: 84, + 0xA8E6: 84, + 0xA8E7: 84, + 0xA8E8: 84, + 0xA8E9: 84, + 0xA8EA: 84, + 0xA8EB: 84, + 0xA8EC: 84, + 0xA8ED: 84, + 0xA8EE: 84, + 0xA8EF: 84, + 0xA8F0: 84, + 0xA8F1: 84, + 0xA8FF: 84, + 0xA926: 84, + 0xA927: 84, + 0xA928: 84, + 0xA929: 84, + 0xA92A: 84, + 0xA92B: 84, + 0xA92C: 84, + 0xA92D: 84, + 0xA947: 84, + 0xA948: 84, + 0xA949: 84, + 0xA94A: 84, + 0xA94B: 84, + 0xA94C: 84, + 0xA94D: 84, + 0xA94E: 84, + 0xA94F: 84, + 0xA950: 84, + 0xA951: 84, + 0xA980: 84, + 0xA981: 84, + 0xA982: 84, + 0xA9B3: 84, + 0xA9B6: 84, + 0xA9B7: 84, + 0xA9B8: 84, + 0xA9B9: 84, + 0xA9BC: 84, + 0xA9BD: 84, + 0xA9E5: 84, + 0xAA29: 84, + 0xAA2A: 84, + 0xAA2B: 84, + 0xAA2C: 84, + 0xAA2D: 84, + 0xAA2E: 84, + 0xAA31: 84, + 0xAA32: 84, + 0xAA35: 84, + 0xAA36: 84, + 0xAA43: 84, + 0xAA4C: 84, + 0xAA7C: 84, + 0xAAB0: 84, + 0xAAB2: 84, + 0xAAB3: 84, + 0xAAB4: 84, + 0xAAB7: 84, + 0xAAB8: 84, + 0xAABE: 84, + 0xAABF: 84, + 0xAAC1: 84, + 0xAAEC: 84, + 0xAAED: 84, + 0xAAF6: 84, + 0xABE5: 84, + 0xABE8: 84, + 0xABED: 84, + 0xFB1E: 84, + 0xFE00: 84, + 0xFE01: 84, + 0xFE02: 84, + 0xFE03: 84, + 0xFE04: 84, + 0xFE05: 84, + 0xFE06: 84, + 0xFE07: 84, + 0xFE08: 84, + 0xFE09: 84, + 0xFE0A: 84, + 0xFE0B: 84, + 0xFE0C: 84, + 0xFE0D: 84, + 0xFE0E: 84, + 0xFE0F: 84, + 0xFE20: 84, + 0xFE21: 84, + 0xFE22: 84, + 0xFE23: 84, + 0xFE24: 84, + 0xFE25: 84, + 0xFE26: 84, + 0xFE27: 84, + 0xFE28: 84, + 0xFE29: 84, + 0xFE2A: 84, + 0xFE2B: 84, + 0xFE2C: 84, + 0xFE2D: 84, + 0xFE2E: 84, + 0xFE2F: 84, + 0xFEFF: 84, + 0xFFF9: 84, + 0xFFFA: 84, + 0xFFFB: 84, + 0x101FD: 84, + 0x102E0: 84, + 0x10376: 84, + 0x10377: 84, + 0x10378: 84, + 0x10379: 84, + 0x1037A: 84, + 0x10A01: 84, + 0x10A02: 84, + 0x10A03: 84, + 0x10A05: 84, + 0x10A06: 84, + 0x10A0C: 84, + 0x10A0D: 84, + 0x10A0E: 84, + 0x10A0F: 84, + 0x10A38: 84, + 0x10A39: 84, + 0x10A3A: 84, + 0x10A3F: 84, + 0x10AC0: 68, + 0x10AC1: 68, + 0x10AC2: 68, + 0x10AC3: 68, + 0x10AC4: 68, + 0x10AC5: 82, + 0x10AC7: 82, + 0x10AC9: 82, + 0x10ACA: 82, + 0x10ACD: 76, + 0x10ACE: 82, + 0x10ACF: 82, + 0x10AD0: 82, + 0x10AD1: 82, + 0x10AD2: 82, + 0x10AD3: 68, + 0x10AD4: 68, + 0x10AD5: 68, + 0x10AD6: 68, + 0x10AD7: 76, + 0x10AD8: 68, + 0x10AD9: 68, + 0x10ADA: 68, + 0x10ADB: 68, + 0x10ADC: 68, + 0x10ADD: 82, + 0x10ADE: 68, + 0x10ADF: 68, + 0x10AE0: 68, + 0x10AE1: 82, + 0x10AE4: 82, + 0x10AE5: 84, + 0x10AE6: 84, + 0x10AEB: 68, + 0x10AEC: 68, + 0x10AED: 68, + 0x10AEE: 68, + 0x10AEF: 82, + 0x10B80: 68, + 0x10B81: 82, + 0x10B82: 68, + 0x10B83: 82, + 0x10B84: 82, + 0x10B85: 82, + 0x10B86: 68, + 0x10B87: 68, + 0x10B88: 68, + 0x10B89: 82, + 0x10B8A: 68, + 0x10B8B: 68, + 0x10B8C: 82, + 0x10B8D: 68, + 0x10B8E: 82, + 0x10B8F: 82, + 0x10B90: 68, + 0x10B91: 82, + 0x10BA9: 82, + 0x10BAA: 82, + 0x10BAB: 82, + 0x10BAC: 82, + 0x10BAD: 68, + 0x10BAE: 68, + 0x10D00: 76, + 0x10D01: 68, + 0x10D02: 68, + 0x10D03: 68, + 0x10D04: 68, + 0x10D05: 68, + 0x10D06: 68, + 0x10D07: 68, + 0x10D08: 68, + 0x10D09: 68, + 0x10D0A: 68, + 0x10D0B: 68, + 0x10D0C: 68, + 0x10D0D: 68, + 0x10D0E: 68, + 0x10D0F: 68, + 0x10D10: 68, + 0x10D11: 68, + 0x10D12: 68, + 0x10D13: 68, + 0x10D14: 68, + 0x10D15: 68, + 0x10D16: 68, + 0x10D17: 68, + 0x10D18: 68, + 0x10D19: 68, + 0x10D1A: 68, + 0x10D1B: 68, + 0x10D1C: 68, + 0x10D1D: 68, + 0x10D1E: 68, + 0x10D1F: 68, + 0x10D20: 68, + 0x10D21: 68, + 0x10D22: 82, + 0x10D23: 68, + 0x10D24: 84, + 0x10D25: 84, + 0x10D26: 84, + 0x10D27: 84, + 0x10EAB: 84, + 0x10EAC: 84, + 0x10EFD: 84, + 0x10EFE: 84, + 0x10EFF: 84, + 0x10F30: 68, + 0x10F31: 68, + 0x10F32: 68, + 0x10F33: 82, + 0x10F34: 68, + 0x10F35: 68, + 0x10F36: 68, + 0x10F37: 68, + 0x10F38: 68, + 0x10F39: 68, + 0x10F3A: 68, + 0x10F3B: 68, + 0x10F3C: 68, + 0x10F3D: 68, + 0x10F3E: 68, + 0x10F3F: 68, + 0x10F40: 68, + 0x10F41: 68, + 0x10F42: 68, + 0x10F43: 68, + 0x10F44: 68, + 0x10F46: 84, + 0x10F47: 84, + 0x10F48: 84, + 0x10F49: 84, + 0x10F4A: 84, + 0x10F4B: 84, + 0x10F4C: 84, + 0x10F4D: 84, + 0x10F4E: 84, + 0x10F4F: 84, + 0x10F50: 84, + 0x10F51: 68, + 0x10F52: 68, + 0x10F53: 68, + 0x10F54: 82, + 0x10F70: 68, + 0x10F71: 68, + 0x10F72: 68, + 0x10F73: 68, + 0x10F74: 82, + 0x10F75: 82, + 0x10F76: 68, + 0x10F77: 68, + 0x10F78: 68, + 0x10F79: 68, + 0x10F7A: 68, + 0x10F7B: 68, + 0x10F7C: 68, + 0x10F7D: 68, + 0x10F7E: 68, + 0x10F7F: 68, + 0x10F80: 68, + 0x10F81: 68, + 0x10F82: 84, + 0x10F83: 84, + 0x10F84: 84, + 0x10F85: 84, + 0x10FB0: 68, + 0x10FB2: 68, + 0x10FB3: 68, + 0x10FB4: 82, + 0x10FB5: 82, + 0x10FB6: 82, + 0x10FB8: 68, + 0x10FB9: 82, + 0x10FBA: 82, + 0x10FBB: 68, + 0x10FBC: 68, + 0x10FBD: 82, + 0x10FBE: 68, + 0x10FBF: 68, + 0x10FC1: 68, + 0x10FC2: 82, + 0x10FC3: 82, + 0x10FC4: 68, + 0x10FC9: 82, + 0x10FCA: 68, + 0x10FCB: 76, + 0x11001: 84, + 0x11038: 84, + 0x11039: 84, + 0x1103A: 84, + 0x1103B: 84, + 0x1103C: 84, + 0x1103D: 84, + 0x1103E: 84, + 0x1103F: 84, + 0x11040: 84, + 0x11041: 84, + 0x11042: 84, + 0x11043: 84, + 0x11044: 84, + 0x11045: 84, + 0x11046: 84, + 0x11070: 84, + 0x11073: 84, + 0x11074: 84, + 0x1107F: 84, + 0x11080: 84, + 0x11081: 84, + 0x110B3: 84, + 0x110B4: 84, + 0x110B5: 84, + 0x110B6: 84, + 0x110B9: 84, + 0x110BA: 84, + 0x110C2: 84, + 0x11100: 84, + 0x11101: 84, + 0x11102: 84, + 0x11127: 84, + 0x11128: 84, + 0x11129: 84, + 0x1112A: 84, + 0x1112B: 84, + 0x1112D: 84, + 0x1112E: 84, + 0x1112F: 84, + 0x11130: 84, + 0x11131: 84, + 0x11132: 84, + 0x11133: 84, + 0x11134: 84, + 0x11173: 84, + 0x11180: 84, + 0x11181: 84, + 0x111B6: 84, + 0x111B7: 84, + 0x111B8: 84, + 0x111B9: 84, + 0x111BA: 84, + 0x111BB: 84, + 0x111BC: 84, + 0x111BD: 84, + 0x111BE: 84, + 0x111C9: 84, + 0x111CA: 84, + 0x111CB: 84, + 0x111CC: 84, + 0x111CF: 84, + 0x1122F: 84, + 0x11230: 84, + 0x11231: 84, + 0x11234: 84, + 0x11236: 84, + 0x11237: 84, + 0x1123E: 84, + 0x11241: 84, + 0x112DF: 84, + 0x112E3: 84, + 0x112E4: 84, + 0x112E5: 84, + 0x112E6: 84, + 0x112E7: 84, + 0x112E8: 84, + 0x112E9: 84, + 0x112EA: 84, + 0x11300: 84, + 0x11301: 84, + 0x1133B: 84, + 0x1133C: 84, + 0x11340: 84, + 0x11366: 84, + 0x11367: 84, + 0x11368: 84, + 0x11369: 84, + 0x1136A: 84, + 0x1136B: 84, + 0x1136C: 84, + 0x11370: 84, + 0x11371: 84, + 0x11372: 84, + 0x11373: 84, + 0x11374: 84, + 0x11438: 84, + 0x11439: 84, + 0x1143A: 84, + 0x1143B: 84, + 0x1143C: 84, + 0x1143D: 84, + 0x1143E: 84, + 0x1143F: 84, + 0x11442: 84, + 0x11443: 84, + 0x11444: 84, + 0x11446: 84, + 0x1145E: 84, + 0x114B3: 84, + 0x114B4: 84, + 0x114B5: 84, + 0x114B6: 84, + 0x114B7: 84, + 0x114B8: 84, + 0x114BA: 84, + 0x114BF: 84, + 0x114C0: 84, + 0x114C2: 84, + 0x114C3: 84, + 0x115B2: 84, + 0x115B3: 84, + 0x115B4: 84, + 0x115B5: 84, + 0x115BC: 84, + 0x115BD: 84, + 0x115BF: 84, + 0x115C0: 84, + 0x115DC: 84, + 0x115DD: 84, + 0x11633: 84, + 0x11634: 84, + 0x11635: 84, + 0x11636: 84, + 0x11637: 84, + 0x11638: 84, + 0x11639: 84, + 0x1163A: 84, + 0x1163D: 84, + 0x1163F: 84, + 0x11640: 84, + 0x116AB: 84, + 0x116AD: 84, + 0x116B0: 84, + 0x116B1: 84, + 0x116B2: 84, + 0x116B3: 84, + 0x116B4: 84, + 0x116B5: 84, + 0x116B7: 84, + 0x1171D: 84, + 0x1171E: 84, + 0x1171F: 84, + 0x11722: 84, + 0x11723: 84, + 0x11724: 84, + 0x11725: 84, + 0x11727: 84, + 0x11728: 84, + 0x11729: 84, + 0x1172A: 84, + 0x1172B: 84, + 0x1182F: 84, + 0x11830: 84, + 0x11831: 84, + 0x11832: 84, + 0x11833: 84, + 0x11834: 84, + 0x11835: 84, + 0x11836: 84, + 0x11837: 84, + 0x11839: 84, + 0x1183A: 84, + 0x1193B: 84, + 0x1193C: 84, + 0x1193E: 84, + 0x11943: 84, + 0x119D4: 84, + 0x119D5: 84, + 0x119D6: 84, + 0x119D7: 84, + 0x119DA: 84, + 0x119DB: 84, + 0x119E0: 84, + 0x11A01: 84, + 0x11A02: 84, + 0x11A03: 84, + 0x11A04: 84, + 0x11A05: 84, + 0x11A06: 84, + 0x11A07: 84, + 0x11A08: 84, + 0x11A09: 84, + 0x11A0A: 84, + 0x11A33: 84, + 0x11A34: 84, + 0x11A35: 84, + 0x11A36: 84, + 0x11A37: 84, + 0x11A38: 84, + 0x11A3B: 84, + 0x11A3C: 84, + 0x11A3D: 84, + 0x11A3E: 84, + 0x11A47: 84, + 0x11A51: 84, + 0x11A52: 84, + 0x11A53: 84, + 0x11A54: 84, + 0x11A55: 84, + 0x11A56: 84, + 0x11A59: 84, + 0x11A5A: 84, + 0x11A5B: 84, + 0x11A8A: 84, + 0x11A8B: 84, + 0x11A8C: 84, + 0x11A8D: 84, + 0x11A8E: 84, + 0x11A8F: 84, + 0x11A90: 84, + 0x11A91: 84, + 0x11A92: 84, + 0x11A93: 84, + 0x11A94: 84, + 0x11A95: 84, + 0x11A96: 84, + 0x11A98: 84, + 0x11A99: 84, + 0x11C30: 84, + 0x11C31: 84, + 0x11C32: 84, + 0x11C33: 84, + 0x11C34: 84, + 0x11C35: 84, + 0x11C36: 84, + 0x11C38: 84, + 0x11C39: 84, + 0x11C3A: 84, + 0x11C3B: 84, + 0x11C3C: 84, + 0x11C3D: 84, + 0x11C3F: 84, + 0x11C92: 84, + 0x11C93: 84, + 0x11C94: 84, + 0x11C95: 84, + 0x11C96: 84, + 0x11C97: 84, + 0x11C98: 84, + 0x11C99: 84, + 0x11C9A: 84, + 0x11C9B: 84, + 0x11C9C: 84, + 0x11C9D: 84, + 0x11C9E: 84, + 0x11C9F: 84, + 0x11CA0: 84, + 0x11CA1: 84, + 0x11CA2: 84, + 0x11CA3: 84, + 0x11CA4: 84, + 0x11CA5: 84, + 0x11CA6: 84, + 0x11CA7: 84, + 0x11CAA: 84, + 0x11CAB: 84, + 0x11CAC: 84, + 0x11CAD: 84, + 0x11CAE: 84, + 0x11CAF: 84, + 0x11CB0: 84, + 0x11CB2: 84, + 0x11CB3: 84, + 0x11CB5: 84, + 0x11CB6: 84, + 0x11D31: 84, + 0x11D32: 84, + 0x11D33: 84, + 0x11D34: 84, + 0x11D35: 84, + 0x11D36: 84, + 0x11D3A: 84, + 0x11D3C: 84, + 0x11D3D: 84, + 0x11D3F: 84, + 0x11D40: 84, + 0x11D41: 84, + 0x11D42: 84, + 0x11D43: 84, + 0x11D44: 84, + 0x11D45: 84, + 0x11D47: 84, + 0x11D90: 84, + 0x11D91: 84, + 0x11D95: 84, + 0x11D97: 84, + 0x11EF3: 84, + 0x11EF4: 84, + 0x11F00: 84, + 0x11F01: 84, + 0x11F36: 84, + 0x11F37: 84, + 0x11F38: 84, + 0x11F39: 84, + 0x11F3A: 84, + 0x11F40: 84, + 0x11F42: 84, + 0x13430: 84, + 0x13431: 84, + 0x13432: 84, + 0x13433: 84, + 0x13434: 84, + 0x13435: 84, + 0x13436: 84, + 0x13437: 84, + 0x13438: 84, + 0x13439: 84, + 0x1343A: 84, + 0x1343B: 84, + 0x1343C: 84, + 0x1343D: 84, + 0x1343E: 84, + 0x1343F: 84, + 0x13440: 84, + 0x13447: 84, + 0x13448: 84, + 0x13449: 84, + 0x1344A: 84, + 0x1344B: 84, + 0x1344C: 84, + 0x1344D: 84, + 0x1344E: 84, + 0x1344F: 84, + 0x13450: 84, + 0x13451: 84, + 0x13452: 84, + 0x13453: 84, + 0x13454: 84, + 0x13455: 84, + 0x16AF0: 84, + 0x16AF1: 84, + 0x16AF2: 84, + 0x16AF3: 84, + 0x16AF4: 84, + 0x16B30: 84, + 0x16B31: 84, + 0x16B32: 84, + 0x16B33: 84, + 0x16B34: 84, + 0x16B35: 84, + 0x16B36: 84, + 0x16F4F: 84, + 0x16F8F: 84, + 0x16F90: 84, + 0x16F91: 84, + 0x16F92: 84, + 0x16FE4: 84, + 0x1BC9D: 84, + 0x1BC9E: 84, + 0x1BCA0: 84, + 0x1BCA1: 84, + 0x1BCA2: 84, + 0x1BCA3: 84, + 0x1CF00: 84, + 0x1CF01: 84, + 0x1CF02: 84, + 0x1CF03: 84, + 0x1CF04: 84, + 0x1CF05: 84, + 0x1CF06: 84, + 0x1CF07: 84, + 0x1CF08: 84, + 0x1CF09: 84, + 0x1CF0A: 84, + 0x1CF0B: 84, + 0x1CF0C: 84, + 0x1CF0D: 84, + 0x1CF0E: 84, + 0x1CF0F: 84, + 0x1CF10: 84, + 0x1CF11: 84, + 0x1CF12: 84, + 0x1CF13: 84, + 0x1CF14: 84, + 0x1CF15: 84, + 0x1CF16: 84, + 0x1CF17: 84, + 0x1CF18: 84, + 0x1CF19: 84, + 0x1CF1A: 84, + 0x1CF1B: 84, + 0x1CF1C: 84, + 0x1CF1D: 84, + 0x1CF1E: 84, + 0x1CF1F: 84, + 0x1CF20: 84, + 0x1CF21: 84, + 0x1CF22: 84, + 0x1CF23: 84, + 0x1CF24: 84, + 0x1CF25: 84, + 0x1CF26: 84, + 0x1CF27: 84, + 0x1CF28: 84, + 0x1CF29: 84, + 0x1CF2A: 84, + 0x1CF2B: 84, + 0x1CF2C: 84, + 0x1CF2D: 84, + 0x1CF30: 84, + 0x1CF31: 84, + 0x1CF32: 84, + 0x1CF33: 84, + 0x1CF34: 84, + 0x1CF35: 84, + 0x1CF36: 84, + 0x1CF37: 84, + 0x1CF38: 84, + 0x1CF39: 84, + 0x1CF3A: 84, + 0x1CF3B: 84, + 0x1CF3C: 84, + 0x1CF3D: 84, + 0x1CF3E: 84, + 0x1CF3F: 84, + 0x1CF40: 84, + 0x1CF41: 84, + 0x1CF42: 84, + 0x1CF43: 84, + 0x1CF44: 84, + 0x1CF45: 84, + 0x1CF46: 84, + 0x1D167: 84, + 0x1D168: 84, + 0x1D169: 84, + 0x1D173: 84, + 0x1D174: 84, + 0x1D175: 84, + 0x1D176: 84, + 0x1D177: 84, + 0x1D178: 84, + 0x1D179: 84, + 0x1D17A: 84, + 0x1D17B: 84, + 0x1D17C: 84, + 0x1D17D: 84, + 0x1D17E: 84, + 0x1D17F: 84, + 0x1D180: 84, + 0x1D181: 84, + 0x1D182: 84, + 0x1D185: 84, + 0x1D186: 84, + 0x1D187: 84, + 0x1D188: 84, + 0x1D189: 84, + 0x1D18A: 84, + 0x1D18B: 84, + 0x1D1AA: 84, + 0x1D1AB: 84, + 0x1D1AC: 84, + 0x1D1AD: 84, + 0x1D242: 84, + 0x1D243: 84, + 0x1D244: 84, + 0x1DA00: 84, + 0x1DA01: 84, + 0x1DA02: 84, + 0x1DA03: 84, + 0x1DA04: 84, + 0x1DA05: 84, + 0x1DA06: 84, + 0x1DA07: 84, + 0x1DA08: 84, + 0x1DA09: 84, + 0x1DA0A: 84, + 0x1DA0B: 84, + 0x1DA0C: 84, + 0x1DA0D: 84, + 0x1DA0E: 84, + 0x1DA0F: 84, + 0x1DA10: 84, + 0x1DA11: 84, + 0x1DA12: 84, + 0x1DA13: 84, + 0x1DA14: 84, + 0x1DA15: 84, + 0x1DA16: 84, + 0x1DA17: 84, + 0x1DA18: 84, + 0x1DA19: 84, + 0x1DA1A: 84, + 0x1DA1B: 84, + 0x1DA1C: 84, + 0x1DA1D: 84, + 0x1DA1E: 84, + 0x1DA1F: 84, + 0x1DA20: 84, + 0x1DA21: 84, + 0x1DA22: 84, + 0x1DA23: 84, + 0x1DA24: 84, + 0x1DA25: 84, + 0x1DA26: 84, + 0x1DA27: 84, + 0x1DA28: 84, + 0x1DA29: 84, + 0x1DA2A: 84, + 0x1DA2B: 84, + 0x1DA2C: 84, + 0x1DA2D: 84, + 0x1DA2E: 84, + 0x1DA2F: 84, + 0x1DA30: 84, + 0x1DA31: 84, + 0x1DA32: 84, + 0x1DA33: 84, + 0x1DA34: 84, + 0x1DA35: 84, + 0x1DA36: 84, + 0x1DA3B: 84, + 0x1DA3C: 84, + 0x1DA3D: 84, + 0x1DA3E: 84, + 0x1DA3F: 84, + 0x1DA40: 84, + 0x1DA41: 84, + 0x1DA42: 84, + 0x1DA43: 84, + 0x1DA44: 84, + 0x1DA45: 84, + 0x1DA46: 84, + 0x1DA47: 84, + 0x1DA48: 84, + 0x1DA49: 84, + 0x1DA4A: 84, + 0x1DA4B: 84, + 0x1DA4C: 84, + 0x1DA4D: 84, + 0x1DA4E: 84, + 0x1DA4F: 84, + 0x1DA50: 84, + 0x1DA51: 84, + 0x1DA52: 84, + 0x1DA53: 84, + 0x1DA54: 84, + 0x1DA55: 84, + 0x1DA56: 84, + 0x1DA57: 84, + 0x1DA58: 84, + 0x1DA59: 84, + 0x1DA5A: 84, + 0x1DA5B: 84, + 0x1DA5C: 84, + 0x1DA5D: 84, + 0x1DA5E: 84, + 0x1DA5F: 84, + 0x1DA60: 84, + 0x1DA61: 84, + 0x1DA62: 84, + 0x1DA63: 84, + 0x1DA64: 84, + 0x1DA65: 84, + 0x1DA66: 84, + 0x1DA67: 84, + 0x1DA68: 84, + 0x1DA69: 84, + 0x1DA6A: 84, + 0x1DA6B: 84, + 0x1DA6C: 84, + 0x1DA75: 84, + 0x1DA84: 84, + 0x1DA9B: 84, + 0x1DA9C: 84, + 0x1DA9D: 84, + 0x1DA9E: 84, + 0x1DA9F: 84, + 0x1DAA1: 84, + 0x1DAA2: 84, + 0x1DAA3: 84, + 0x1DAA4: 84, + 0x1DAA5: 84, + 0x1DAA6: 84, + 0x1DAA7: 84, + 0x1DAA8: 84, + 0x1DAA9: 84, + 0x1DAAA: 84, + 0x1DAAB: 84, + 0x1DAAC: 84, + 0x1DAAD: 84, + 0x1DAAE: 84, + 0x1DAAF: 84, + 0x1E000: 84, + 0x1E001: 84, + 0x1E002: 84, + 0x1E003: 84, + 0x1E004: 84, + 0x1E005: 84, + 0x1E006: 84, + 0x1E008: 84, + 0x1E009: 84, + 0x1E00A: 84, + 0x1E00B: 84, + 0x1E00C: 84, + 0x1E00D: 84, + 0x1E00E: 84, + 0x1E00F: 84, + 0x1E010: 84, + 0x1E011: 84, + 0x1E012: 84, + 0x1E013: 84, + 0x1E014: 84, + 0x1E015: 84, + 0x1E016: 84, + 0x1E017: 84, + 0x1E018: 84, + 0x1E01B: 84, + 0x1E01C: 84, + 0x1E01D: 84, + 0x1E01E: 84, + 0x1E01F: 84, + 0x1E020: 84, + 0x1E021: 84, + 0x1E023: 84, + 0x1E024: 84, + 0x1E026: 84, + 0x1E027: 84, + 0x1E028: 84, + 0x1E029: 84, + 0x1E02A: 84, + 0x1E08F: 84, + 0x1E130: 84, + 0x1E131: 84, + 0x1E132: 84, + 0x1E133: 84, + 0x1E134: 84, + 0x1E135: 84, + 0x1E136: 84, + 0x1E2AE: 84, + 0x1E2EC: 84, + 0x1E2ED: 84, + 0x1E2EE: 84, + 0x1E2EF: 84, + 0x1E4EC: 84, + 0x1E4ED: 84, + 0x1E4EE: 84, + 0x1E4EF: 84, + 0x1E8D0: 84, + 0x1E8D1: 84, + 0x1E8D2: 84, + 0x1E8D3: 84, + 0x1E8D4: 84, + 0x1E8D5: 84, + 0x1E8D6: 84, + 0x1E900: 68, + 0x1E901: 68, + 0x1E902: 68, + 0x1E903: 68, + 0x1E904: 68, + 0x1E905: 68, + 0x1E906: 68, + 0x1E907: 68, + 0x1E908: 68, + 0x1E909: 68, + 0x1E90A: 68, + 0x1E90B: 68, + 0x1E90C: 68, + 0x1E90D: 68, + 0x1E90E: 68, + 0x1E90F: 68, + 0x1E910: 68, + 0x1E911: 68, + 0x1E912: 68, + 0x1E913: 68, + 0x1E914: 68, + 0x1E915: 68, + 0x1E916: 68, + 0x1E917: 68, + 0x1E918: 68, + 0x1E919: 68, + 0x1E91A: 68, + 0x1E91B: 68, + 0x1E91C: 68, + 0x1E91D: 68, + 0x1E91E: 68, + 0x1E91F: 68, + 0x1E920: 68, + 0x1E921: 68, + 0x1E922: 68, + 0x1E923: 68, + 0x1E924: 68, + 0x1E925: 68, + 0x1E926: 68, + 0x1E927: 68, + 0x1E928: 68, + 0x1E929: 68, + 0x1E92A: 68, + 0x1E92B: 68, + 0x1E92C: 68, + 0x1E92D: 68, + 0x1E92E: 68, + 0x1E92F: 68, + 0x1E930: 68, + 0x1E931: 68, + 0x1E932: 68, + 0x1E933: 68, + 0x1E934: 68, + 0x1E935: 68, + 0x1E936: 68, + 0x1E937: 68, + 0x1E938: 68, + 0x1E939: 68, + 0x1E93A: 68, + 0x1E93B: 68, + 0x1E93C: 68, + 0x1E93D: 68, + 0x1E93E: 68, + 0x1E93F: 68, + 0x1E940: 68, + 0x1E941: 68, + 0x1E942: 68, + 0x1E943: 68, + 0x1E944: 84, + 0x1E945: 84, + 0x1E946: 84, + 0x1E947: 84, + 0x1E948: 84, + 0x1E949: 84, + 0x1E94A: 84, + 0x1E94B: 84, + 0xE0001: 84, + 0xE0020: 84, + 0xE0021: 84, + 0xE0022: 84, + 0xE0023: 84, + 0xE0024: 84, + 0xE0025: 84, + 0xE0026: 84, + 0xE0027: 84, + 0xE0028: 84, + 0xE0029: 84, + 0xE002A: 84, + 0xE002B: 84, + 0xE002C: 84, + 0xE002D: 84, + 0xE002E: 84, + 0xE002F: 84, + 0xE0030: 84, + 0xE0031: 84, + 0xE0032: 84, + 0xE0033: 84, + 0xE0034: 84, + 0xE0035: 84, + 0xE0036: 84, + 0xE0037: 84, + 0xE0038: 84, + 0xE0039: 84, + 0xE003A: 84, + 0xE003B: 84, + 0xE003C: 84, + 0xE003D: 84, + 0xE003E: 84, + 0xE003F: 84, + 0xE0040: 84, + 0xE0041: 84, + 0xE0042: 84, + 0xE0043: 84, + 0xE0044: 84, + 0xE0045: 84, + 0xE0046: 84, + 0xE0047: 84, + 0xE0048: 84, + 0xE0049: 84, + 0xE004A: 84, + 0xE004B: 84, + 0xE004C: 84, + 0xE004D: 84, + 0xE004E: 84, + 0xE004F: 84, + 0xE0050: 84, + 0xE0051: 84, + 0xE0052: 84, + 0xE0053: 84, + 0xE0054: 84, + 0xE0055: 84, + 0xE0056: 84, + 0xE0057: 84, + 0xE0058: 84, + 0xE0059: 84, + 0xE005A: 84, + 0xE005B: 84, + 0xE005C: 84, + 0xE005D: 84, + 0xE005E: 84, + 0xE005F: 84, + 0xE0060: 84, + 0xE0061: 84, + 0xE0062: 84, + 0xE0063: 84, + 0xE0064: 84, + 0xE0065: 84, + 0xE0066: 84, + 0xE0067: 84, + 0xE0068: 84, + 0xE0069: 84, + 0xE006A: 84, + 0xE006B: 84, + 0xE006C: 84, + 0xE006D: 84, + 0xE006E: 84, + 0xE006F: 84, + 0xE0070: 84, + 0xE0071: 84, + 0xE0072: 84, + 0xE0073: 84, + 0xE0074: 84, + 0xE0075: 84, + 0xE0076: 84, + 0xE0077: 84, + 0xE0078: 84, + 0xE0079: 84, + 0xE007A: 84, + 0xE007B: 84, + 0xE007C: 84, + 0xE007D: 84, + 0xE007E: 84, + 0xE007F: 84, + 0xE0100: 84, + 0xE0101: 84, + 0xE0102: 84, + 0xE0103: 84, + 0xE0104: 84, + 0xE0105: 84, + 0xE0106: 84, + 0xE0107: 84, + 0xE0108: 84, + 0xE0109: 84, + 0xE010A: 84, + 0xE010B: 84, + 0xE010C: 84, + 0xE010D: 84, + 0xE010E: 84, + 0xE010F: 84, + 0xE0110: 84, + 0xE0111: 84, + 0xE0112: 84, + 0xE0113: 84, + 0xE0114: 84, + 0xE0115: 84, + 0xE0116: 84, + 0xE0117: 84, + 0xE0118: 84, + 0xE0119: 84, + 0xE011A: 84, + 0xE011B: 84, + 0xE011C: 84, + 0xE011D: 84, + 0xE011E: 84, + 0xE011F: 84, + 0xE0120: 84, + 0xE0121: 84, + 0xE0122: 84, + 0xE0123: 84, + 0xE0124: 84, + 0xE0125: 84, + 0xE0126: 84, + 0xE0127: 84, + 0xE0128: 84, + 0xE0129: 84, + 0xE012A: 84, + 0xE012B: 84, + 0xE012C: 84, + 0xE012D: 84, + 0xE012E: 84, + 0xE012F: 84, + 0xE0130: 84, + 0xE0131: 84, + 0xE0132: 84, + 0xE0133: 84, + 0xE0134: 84, + 0xE0135: 84, + 0xE0136: 84, + 0xE0137: 84, + 0xE0138: 84, + 0xE0139: 84, + 0xE013A: 84, + 0xE013B: 84, + 0xE013C: 84, + 0xE013D: 84, + 0xE013E: 84, + 0xE013F: 84, + 0xE0140: 84, + 0xE0141: 84, + 0xE0142: 84, + 0xE0143: 84, + 0xE0144: 84, + 0xE0145: 84, + 0xE0146: 84, + 0xE0147: 84, + 0xE0148: 84, + 0xE0149: 84, + 0xE014A: 84, + 0xE014B: 84, + 0xE014C: 84, + 0xE014D: 84, + 0xE014E: 84, + 0xE014F: 84, + 0xE0150: 84, + 0xE0151: 84, + 0xE0152: 84, + 0xE0153: 84, + 0xE0154: 84, + 0xE0155: 84, + 0xE0156: 84, + 0xE0157: 84, + 0xE0158: 84, + 0xE0159: 84, + 0xE015A: 84, + 0xE015B: 84, + 0xE015C: 84, + 0xE015D: 84, + 0xE015E: 84, + 0xE015F: 84, + 0xE0160: 84, + 0xE0161: 84, + 0xE0162: 84, + 0xE0163: 84, + 0xE0164: 84, + 0xE0165: 84, + 0xE0166: 84, + 0xE0167: 84, + 0xE0168: 84, + 0xE0169: 84, + 0xE016A: 84, + 0xE016B: 84, + 0xE016C: 84, + 0xE016D: 84, + 0xE016E: 84, + 0xE016F: 84, + 0xE0170: 84, + 0xE0171: 84, + 0xE0172: 84, + 0xE0173: 84, + 0xE0174: 84, + 0xE0175: 84, + 0xE0176: 84, + 0xE0177: 84, + 0xE0178: 84, + 0xE0179: 84, + 0xE017A: 84, + 0xE017B: 84, + 0xE017C: 84, + 0xE017D: 84, + 0xE017E: 84, + 0xE017F: 84, + 0xE0180: 84, + 0xE0181: 84, + 0xE0182: 84, + 0xE0183: 84, + 0xE0184: 84, + 0xE0185: 84, + 0xE0186: 84, + 0xE0187: 84, + 0xE0188: 84, + 0xE0189: 84, + 0xE018A: 84, + 0xE018B: 84, + 0xE018C: 84, + 0xE018D: 84, + 0xE018E: 84, + 0xE018F: 84, + 0xE0190: 84, + 0xE0191: 84, + 0xE0192: 84, + 0xE0193: 84, + 0xE0194: 84, + 0xE0195: 84, + 0xE0196: 84, + 0xE0197: 84, + 0xE0198: 84, + 0xE0199: 84, + 0xE019A: 84, + 0xE019B: 84, + 0xE019C: 84, + 0xE019D: 84, + 0xE019E: 84, + 0xE019F: 84, + 0xE01A0: 84, + 0xE01A1: 84, + 0xE01A2: 84, + 0xE01A3: 84, + 0xE01A4: 84, + 0xE01A5: 84, + 0xE01A6: 84, + 0xE01A7: 84, + 0xE01A8: 84, + 0xE01A9: 84, + 0xE01AA: 84, + 0xE01AB: 84, + 0xE01AC: 84, + 0xE01AD: 84, + 0xE01AE: 84, + 0xE01AF: 84, + 0xE01B0: 84, + 0xE01B1: 84, + 0xE01B2: 84, + 0xE01B3: 84, + 0xE01B4: 84, + 0xE01B5: 84, + 0xE01B6: 84, + 0xE01B7: 84, + 0xE01B8: 84, + 0xE01B9: 84, + 0xE01BA: 84, + 0xE01BB: 84, + 0xE01BC: 84, + 0xE01BD: 84, + 0xE01BE: 84, + 0xE01BF: 84, + 0xE01C0: 84, + 0xE01C1: 84, + 0xE01C2: 84, + 0xE01C3: 84, + 0xE01C4: 84, + 0xE01C5: 84, + 0xE01C6: 84, + 0xE01C7: 84, + 0xE01C8: 84, + 0xE01C9: 84, + 0xE01CA: 84, + 0xE01CB: 84, + 0xE01CC: 84, + 0xE01CD: 84, + 0xE01CE: 84, + 0xE01CF: 84, + 0xE01D0: 84, + 0xE01D1: 84, + 0xE01D2: 84, + 0xE01D3: 84, + 0xE01D4: 84, + 0xE01D5: 84, + 0xE01D6: 84, + 0xE01D7: 84, + 0xE01D8: 84, + 0xE01D9: 84, + 0xE01DA: 84, + 0xE01DB: 84, + 0xE01DC: 84, + 0xE01DD: 84, + 0xE01DE: 84, + 0xE01DF: 84, + 0xE01E0: 84, + 0xE01E1: 84, + 0xE01E2: 84, + 0xE01E3: 84, + 0xE01E4: 84, + 0xE01E5: 84, + 0xE01E6: 84, + 0xE01E7: 84, + 0xE01E8: 84, + 0xE01E9: 84, + 0xE01EA: 84, + 0xE01EB: 84, + 0xE01EC: 84, + 0xE01ED: 84, + 0xE01EE: 84, + 0xE01EF: 84, +} +codepoint_classes = { + "PVALID": ( + 0x2D0000002E, + 0x300000003A, + 0x610000007B, + 0xDF000000F7, + 0xF800000100, + 0x10100000102, + 0x10300000104, + 0x10500000106, + 0x10700000108, + 0x1090000010A, + 0x10B0000010C, + 0x10D0000010E, + 0x10F00000110, + 0x11100000112, + 0x11300000114, + 0x11500000116, + 0x11700000118, + 0x1190000011A, + 0x11B0000011C, + 0x11D0000011E, + 0x11F00000120, + 0x12100000122, + 0x12300000124, + 0x12500000126, + 0x12700000128, + 0x1290000012A, + 0x12B0000012C, + 0x12D0000012E, + 0x12F00000130, + 0x13100000132, + 0x13500000136, + 0x13700000139, + 0x13A0000013B, + 0x13C0000013D, + 0x13E0000013F, + 0x14200000143, + 0x14400000145, + 0x14600000147, + 0x14800000149, + 0x14B0000014C, + 0x14D0000014E, + 0x14F00000150, + 0x15100000152, + 0x15300000154, + 0x15500000156, + 0x15700000158, + 0x1590000015A, + 0x15B0000015C, + 0x15D0000015E, + 0x15F00000160, + 0x16100000162, + 0x16300000164, + 0x16500000166, + 0x16700000168, + 0x1690000016A, + 0x16B0000016C, + 0x16D0000016E, + 0x16F00000170, + 0x17100000172, + 0x17300000174, + 0x17500000176, + 0x17700000178, + 0x17A0000017B, + 0x17C0000017D, + 0x17E0000017F, + 0x18000000181, + 0x18300000184, + 0x18500000186, + 0x18800000189, + 0x18C0000018E, + 0x19200000193, + 0x19500000196, + 0x1990000019C, + 0x19E0000019F, + 0x1A1000001A2, + 0x1A3000001A4, + 0x1A5000001A6, + 0x1A8000001A9, + 0x1AA000001AC, + 0x1AD000001AE, + 0x1B0000001B1, + 0x1B4000001B5, + 0x1B6000001B7, + 0x1B9000001BC, + 0x1BD000001C4, + 0x1CE000001CF, + 0x1D0000001D1, + 0x1D2000001D3, + 0x1D4000001D5, + 0x1D6000001D7, + 0x1D8000001D9, + 0x1DA000001DB, + 0x1DC000001DE, + 0x1DF000001E0, + 0x1E1000001E2, + 0x1E3000001E4, + 0x1E5000001E6, + 0x1E7000001E8, + 0x1E9000001EA, + 0x1EB000001EC, + 0x1ED000001EE, + 0x1EF000001F1, + 0x1F5000001F6, + 0x1F9000001FA, + 0x1FB000001FC, + 0x1FD000001FE, + 0x1FF00000200, + 0x20100000202, + 0x20300000204, + 0x20500000206, + 0x20700000208, + 0x2090000020A, + 0x20B0000020C, + 0x20D0000020E, + 0x20F00000210, + 0x21100000212, + 0x21300000214, + 0x21500000216, + 0x21700000218, + 0x2190000021A, + 0x21B0000021C, + 0x21D0000021E, + 0x21F00000220, + 0x22100000222, + 0x22300000224, + 0x22500000226, + 0x22700000228, + 0x2290000022A, + 0x22B0000022C, + 0x22D0000022E, + 0x22F00000230, + 0x23100000232, + 0x2330000023A, + 0x23C0000023D, + 0x23F00000241, + 0x24200000243, + 0x24700000248, + 0x2490000024A, + 0x24B0000024C, + 0x24D0000024E, + 0x24F000002B0, + 0x2B9000002C2, + 0x2C6000002D2, + 0x2EC000002ED, + 0x2EE000002EF, + 0x30000000340, + 0x34200000343, + 0x3460000034F, + 0x35000000370, + 0x37100000372, + 0x37300000374, + 0x37700000378, + 0x37B0000037E, + 0x39000000391, + 0x3AC000003CF, + 0x3D7000003D8, + 0x3D9000003DA, + 0x3DB000003DC, + 0x3DD000003DE, + 0x3DF000003E0, + 0x3E1000003E2, + 0x3E3000003E4, + 0x3E5000003E6, + 0x3E7000003E8, + 0x3E9000003EA, + 0x3EB000003EC, + 0x3ED000003EE, + 0x3EF000003F0, + 0x3F3000003F4, + 0x3F8000003F9, + 0x3FB000003FD, + 0x43000000460, + 0x46100000462, + 0x46300000464, + 0x46500000466, + 0x46700000468, + 0x4690000046A, + 0x46B0000046C, + 0x46D0000046E, + 0x46F00000470, + 0x47100000472, + 0x47300000474, + 0x47500000476, + 0x47700000478, + 0x4790000047A, + 0x47B0000047C, + 0x47D0000047E, + 0x47F00000480, + 0x48100000482, + 0x48300000488, + 0x48B0000048C, + 0x48D0000048E, + 0x48F00000490, + 0x49100000492, + 0x49300000494, + 0x49500000496, + 0x49700000498, + 0x4990000049A, + 0x49B0000049C, + 0x49D0000049E, + 0x49F000004A0, + 0x4A1000004A2, + 0x4A3000004A4, + 0x4A5000004A6, + 0x4A7000004A8, + 0x4A9000004AA, + 0x4AB000004AC, + 0x4AD000004AE, + 0x4AF000004B0, + 0x4B1000004B2, + 0x4B3000004B4, + 0x4B5000004B6, + 0x4B7000004B8, + 0x4B9000004BA, + 0x4BB000004BC, + 0x4BD000004BE, + 0x4BF000004C0, + 0x4C2000004C3, + 0x4C4000004C5, + 0x4C6000004C7, + 0x4C8000004C9, + 0x4CA000004CB, + 0x4CC000004CD, + 0x4CE000004D0, + 0x4D1000004D2, + 0x4D3000004D4, + 0x4D5000004D6, + 0x4D7000004D8, + 0x4D9000004DA, + 0x4DB000004DC, + 0x4DD000004DE, + 0x4DF000004E0, + 0x4E1000004E2, + 0x4E3000004E4, + 0x4E5000004E6, + 0x4E7000004E8, + 0x4E9000004EA, + 0x4EB000004EC, + 0x4ED000004EE, + 0x4EF000004F0, + 0x4F1000004F2, + 0x4F3000004F4, + 0x4F5000004F6, + 0x4F7000004F8, + 0x4F9000004FA, + 0x4FB000004FC, + 0x4FD000004FE, + 0x4FF00000500, + 0x50100000502, + 0x50300000504, + 0x50500000506, + 0x50700000508, + 0x5090000050A, + 0x50B0000050C, + 0x50D0000050E, + 0x50F00000510, + 0x51100000512, + 0x51300000514, + 0x51500000516, + 0x51700000518, + 0x5190000051A, + 0x51B0000051C, + 0x51D0000051E, + 0x51F00000520, + 0x52100000522, + 0x52300000524, + 0x52500000526, + 0x52700000528, + 0x5290000052A, + 0x52B0000052C, + 0x52D0000052E, + 0x52F00000530, + 0x5590000055A, + 0x56000000587, + 0x58800000589, + 0x591000005BE, + 0x5BF000005C0, + 0x5C1000005C3, + 0x5C4000005C6, + 0x5C7000005C8, + 0x5D0000005EB, + 0x5EF000005F3, + 0x6100000061B, + 0x62000000640, + 0x64100000660, + 0x66E00000675, + 0x679000006D4, + 0x6D5000006DD, + 0x6DF000006E9, + 0x6EA000006F0, + 0x6FA00000700, + 0x7100000074B, + 0x74D000007B2, + 0x7C0000007F6, + 0x7FD000007FE, + 0x8000000082E, + 0x8400000085C, + 0x8600000086B, + 0x87000000888, + 0x8890000088F, + 0x898000008E2, + 0x8E300000958, + 0x96000000964, + 0x96600000970, + 0x97100000984, + 0x9850000098D, + 0x98F00000991, + 0x993000009A9, + 0x9AA000009B1, + 0x9B2000009B3, + 0x9B6000009BA, + 0x9BC000009C5, + 0x9C7000009C9, + 0x9CB000009CF, + 0x9D7000009D8, + 0x9E0000009E4, + 0x9E6000009F2, + 0x9FC000009FD, + 0x9FE000009FF, + 0xA0100000A04, + 0xA0500000A0B, + 0xA0F00000A11, + 0xA1300000A29, + 0xA2A00000A31, + 0xA3200000A33, + 0xA3500000A36, + 0xA3800000A3A, + 0xA3C00000A3D, + 0xA3E00000A43, + 0xA4700000A49, + 0xA4B00000A4E, + 0xA5100000A52, + 0xA5C00000A5D, + 0xA6600000A76, + 0xA8100000A84, + 0xA8500000A8E, + 0xA8F00000A92, + 0xA9300000AA9, + 0xAAA00000AB1, + 0xAB200000AB4, + 0xAB500000ABA, + 0xABC00000AC6, + 0xAC700000ACA, + 0xACB00000ACE, + 0xAD000000AD1, + 0xAE000000AE4, + 0xAE600000AF0, + 0xAF900000B00, + 0xB0100000B04, + 0xB0500000B0D, + 0xB0F00000B11, + 0xB1300000B29, + 0xB2A00000B31, + 0xB3200000B34, + 0xB3500000B3A, + 0xB3C00000B45, + 0xB4700000B49, + 0xB4B00000B4E, + 0xB5500000B58, + 0xB5F00000B64, + 0xB6600000B70, + 0xB7100000B72, + 0xB8200000B84, + 0xB8500000B8B, + 0xB8E00000B91, + 0xB9200000B96, + 0xB9900000B9B, + 0xB9C00000B9D, + 0xB9E00000BA0, + 0xBA300000BA5, + 0xBA800000BAB, + 0xBAE00000BBA, + 0xBBE00000BC3, + 0xBC600000BC9, + 0xBCA00000BCE, + 0xBD000000BD1, + 0xBD700000BD8, + 0xBE600000BF0, + 0xC0000000C0D, + 0xC0E00000C11, + 0xC1200000C29, + 0xC2A00000C3A, + 0xC3C00000C45, + 0xC4600000C49, + 0xC4A00000C4E, + 0xC5500000C57, + 0xC5800000C5B, + 0xC5D00000C5E, + 0xC6000000C64, + 0xC6600000C70, + 0xC8000000C84, + 0xC8500000C8D, + 0xC8E00000C91, + 0xC9200000CA9, + 0xCAA00000CB4, + 0xCB500000CBA, + 0xCBC00000CC5, + 0xCC600000CC9, + 0xCCA00000CCE, + 0xCD500000CD7, + 0xCDD00000CDF, + 0xCE000000CE4, + 0xCE600000CF0, + 0xCF100000CF4, + 0xD0000000D0D, + 0xD0E00000D11, + 0xD1200000D45, + 0xD4600000D49, + 0xD4A00000D4F, + 0xD5400000D58, + 0xD5F00000D64, + 0xD6600000D70, + 0xD7A00000D80, + 0xD8100000D84, + 0xD8500000D97, + 0xD9A00000DB2, + 0xDB300000DBC, + 0xDBD00000DBE, + 0xDC000000DC7, + 0xDCA00000DCB, + 0xDCF00000DD5, + 0xDD600000DD7, + 0xDD800000DE0, + 0xDE600000DF0, + 0xDF200000DF4, + 0xE0100000E33, + 0xE3400000E3B, + 0xE4000000E4F, + 0xE5000000E5A, + 0xE8100000E83, + 0xE8400000E85, + 0xE8600000E8B, + 0xE8C00000EA4, + 0xEA500000EA6, + 0xEA700000EB3, + 0xEB400000EBE, + 0xEC000000EC5, + 0xEC600000EC7, + 0xEC800000ECF, + 0xED000000EDA, + 0xEDE00000EE0, + 0xF0000000F01, + 0xF0B00000F0C, + 0xF1800000F1A, + 0xF2000000F2A, + 0xF3500000F36, + 0xF3700000F38, + 0xF3900000F3A, + 0xF3E00000F43, + 0xF4400000F48, + 0xF4900000F4D, + 0xF4E00000F52, + 0xF5300000F57, + 0xF5800000F5C, + 0xF5D00000F69, + 0xF6A00000F6D, + 0xF7100000F73, + 0xF7400000F75, + 0xF7A00000F81, + 0xF8200000F85, + 0xF8600000F93, + 0xF9400000F98, + 0xF9900000F9D, + 0xF9E00000FA2, + 0xFA300000FA7, + 0xFA800000FAC, + 0xFAD00000FB9, + 0xFBA00000FBD, + 0xFC600000FC7, + 0x10000000104A, + 0x10500000109E, + 0x10D0000010FB, + 0x10FD00001100, + 0x120000001249, + 0x124A0000124E, + 0x125000001257, + 0x125800001259, + 0x125A0000125E, + 0x126000001289, + 0x128A0000128E, + 0x1290000012B1, + 0x12B2000012B6, + 0x12B8000012BF, + 0x12C0000012C1, + 0x12C2000012C6, + 0x12C8000012D7, + 0x12D800001311, + 0x131200001316, + 0x13180000135B, + 0x135D00001360, + 0x138000001390, + 0x13A0000013F6, + 0x14010000166D, + 0x166F00001680, + 0x16810000169B, + 0x16A0000016EB, + 0x16F1000016F9, + 0x170000001716, + 0x171F00001735, + 0x174000001754, + 0x17600000176D, + 0x176E00001771, + 0x177200001774, + 0x1780000017B4, + 0x17B6000017D4, + 0x17D7000017D8, + 0x17DC000017DE, + 0x17E0000017EA, + 0x18100000181A, + 0x182000001879, + 0x1880000018AB, + 0x18B0000018F6, + 0x19000000191F, + 0x19200000192C, + 0x19300000193C, + 0x19460000196E, + 0x197000001975, + 0x1980000019AC, + 0x19B0000019CA, + 0x19D0000019DA, + 0x1A0000001A1C, + 0x1A2000001A5F, + 0x1A6000001A7D, + 0x1A7F00001A8A, + 0x1A9000001A9A, + 0x1AA700001AA8, + 0x1AB000001ABE, + 0x1ABF00001ACF, + 0x1B0000001B4D, + 0x1B5000001B5A, + 0x1B6B00001B74, + 0x1B8000001BF4, + 0x1C0000001C38, + 0x1C4000001C4A, + 0x1C4D00001C7E, + 0x1CD000001CD3, + 0x1CD400001CFB, + 0x1D0000001D2C, + 0x1D2F00001D30, + 0x1D3B00001D3C, + 0x1D4E00001D4F, + 0x1D6B00001D78, + 0x1D7900001D9B, + 0x1DC000001E00, + 0x1E0100001E02, + 0x1E0300001E04, + 0x1E0500001E06, + 0x1E0700001E08, + 0x1E0900001E0A, + 0x1E0B00001E0C, + 0x1E0D00001E0E, + 0x1E0F00001E10, + 0x1E1100001E12, + 0x1E1300001E14, + 0x1E1500001E16, + 0x1E1700001E18, + 0x1E1900001E1A, + 0x1E1B00001E1C, + 0x1E1D00001E1E, + 0x1E1F00001E20, + 0x1E2100001E22, + 0x1E2300001E24, + 0x1E2500001E26, + 0x1E2700001E28, + 0x1E2900001E2A, + 0x1E2B00001E2C, + 0x1E2D00001E2E, + 0x1E2F00001E30, + 0x1E3100001E32, + 0x1E3300001E34, + 0x1E3500001E36, + 0x1E3700001E38, + 0x1E3900001E3A, + 0x1E3B00001E3C, + 0x1E3D00001E3E, + 0x1E3F00001E40, + 0x1E4100001E42, + 0x1E4300001E44, + 0x1E4500001E46, + 0x1E4700001E48, + 0x1E4900001E4A, + 0x1E4B00001E4C, + 0x1E4D00001E4E, + 0x1E4F00001E50, + 0x1E5100001E52, + 0x1E5300001E54, + 0x1E5500001E56, + 0x1E5700001E58, + 0x1E5900001E5A, + 0x1E5B00001E5C, + 0x1E5D00001E5E, + 0x1E5F00001E60, + 0x1E6100001E62, + 0x1E6300001E64, + 0x1E6500001E66, + 0x1E6700001E68, + 0x1E6900001E6A, + 0x1E6B00001E6C, + 0x1E6D00001E6E, + 0x1E6F00001E70, + 0x1E7100001E72, + 0x1E7300001E74, + 0x1E7500001E76, + 0x1E7700001E78, + 0x1E7900001E7A, + 0x1E7B00001E7C, + 0x1E7D00001E7E, + 0x1E7F00001E80, + 0x1E8100001E82, + 0x1E8300001E84, + 0x1E8500001E86, + 0x1E8700001E88, + 0x1E8900001E8A, + 0x1E8B00001E8C, + 0x1E8D00001E8E, + 0x1E8F00001E90, + 0x1E9100001E92, + 0x1E9300001E94, + 0x1E9500001E9A, + 0x1E9C00001E9E, + 0x1E9F00001EA0, + 0x1EA100001EA2, + 0x1EA300001EA4, + 0x1EA500001EA6, + 0x1EA700001EA8, + 0x1EA900001EAA, + 0x1EAB00001EAC, + 0x1EAD00001EAE, + 0x1EAF00001EB0, + 0x1EB100001EB2, + 0x1EB300001EB4, + 0x1EB500001EB6, + 0x1EB700001EB8, + 0x1EB900001EBA, + 0x1EBB00001EBC, + 0x1EBD00001EBE, + 0x1EBF00001EC0, + 0x1EC100001EC2, + 0x1EC300001EC4, + 0x1EC500001EC6, + 0x1EC700001EC8, + 0x1EC900001ECA, + 0x1ECB00001ECC, + 0x1ECD00001ECE, + 0x1ECF00001ED0, + 0x1ED100001ED2, + 0x1ED300001ED4, + 0x1ED500001ED6, + 0x1ED700001ED8, + 0x1ED900001EDA, + 0x1EDB00001EDC, + 0x1EDD00001EDE, + 0x1EDF00001EE0, + 0x1EE100001EE2, + 0x1EE300001EE4, + 0x1EE500001EE6, + 0x1EE700001EE8, + 0x1EE900001EEA, + 0x1EEB00001EEC, + 0x1EED00001EEE, + 0x1EEF00001EF0, + 0x1EF100001EF2, + 0x1EF300001EF4, + 0x1EF500001EF6, + 0x1EF700001EF8, + 0x1EF900001EFA, + 0x1EFB00001EFC, + 0x1EFD00001EFE, + 0x1EFF00001F08, + 0x1F1000001F16, + 0x1F2000001F28, + 0x1F3000001F38, + 0x1F4000001F46, + 0x1F5000001F58, + 0x1F6000001F68, + 0x1F7000001F71, + 0x1F7200001F73, + 0x1F7400001F75, + 0x1F7600001F77, + 0x1F7800001F79, + 0x1F7A00001F7B, + 0x1F7C00001F7D, + 0x1FB000001FB2, + 0x1FB600001FB7, + 0x1FC600001FC7, + 0x1FD000001FD3, + 0x1FD600001FD8, + 0x1FE000001FE3, + 0x1FE400001FE8, + 0x1FF600001FF7, + 0x214E0000214F, + 0x218400002185, + 0x2C3000002C60, + 0x2C6100002C62, + 0x2C6500002C67, + 0x2C6800002C69, + 0x2C6A00002C6B, + 0x2C6C00002C6D, + 0x2C7100002C72, + 0x2C7300002C75, + 0x2C7600002C7C, + 0x2C8100002C82, + 0x2C8300002C84, + 0x2C8500002C86, + 0x2C8700002C88, + 0x2C8900002C8A, + 0x2C8B00002C8C, + 0x2C8D00002C8E, + 0x2C8F00002C90, + 0x2C9100002C92, + 0x2C9300002C94, + 0x2C9500002C96, + 0x2C9700002C98, + 0x2C9900002C9A, + 0x2C9B00002C9C, + 0x2C9D00002C9E, + 0x2C9F00002CA0, + 0x2CA100002CA2, + 0x2CA300002CA4, + 0x2CA500002CA6, + 0x2CA700002CA8, + 0x2CA900002CAA, + 0x2CAB00002CAC, + 0x2CAD00002CAE, + 0x2CAF00002CB0, + 0x2CB100002CB2, + 0x2CB300002CB4, + 0x2CB500002CB6, + 0x2CB700002CB8, + 0x2CB900002CBA, + 0x2CBB00002CBC, + 0x2CBD00002CBE, + 0x2CBF00002CC0, + 0x2CC100002CC2, + 0x2CC300002CC4, + 0x2CC500002CC6, + 0x2CC700002CC8, + 0x2CC900002CCA, + 0x2CCB00002CCC, + 0x2CCD00002CCE, + 0x2CCF00002CD0, + 0x2CD100002CD2, + 0x2CD300002CD4, + 0x2CD500002CD6, + 0x2CD700002CD8, + 0x2CD900002CDA, + 0x2CDB00002CDC, + 0x2CDD00002CDE, + 0x2CDF00002CE0, + 0x2CE100002CE2, + 0x2CE300002CE5, + 0x2CEC00002CED, + 0x2CEE00002CF2, + 0x2CF300002CF4, + 0x2D0000002D26, + 0x2D2700002D28, + 0x2D2D00002D2E, + 0x2D3000002D68, + 0x2D7F00002D97, + 0x2DA000002DA7, + 0x2DA800002DAF, + 0x2DB000002DB7, + 0x2DB800002DBF, + 0x2DC000002DC7, + 0x2DC800002DCF, + 0x2DD000002DD7, + 0x2DD800002DDF, + 0x2DE000002E00, + 0x2E2F00002E30, + 0x300500003008, + 0x302A0000302E, + 0x303C0000303D, + 0x304100003097, + 0x30990000309B, + 0x309D0000309F, + 0x30A1000030FB, + 0x30FC000030FF, + 0x310500003130, + 0x31A0000031C0, + 0x31F000003200, + 0x340000004DC0, + 0x4E000000A48D, + 0xA4D00000A4FE, + 0xA5000000A60D, + 0xA6100000A62C, + 0xA6410000A642, + 0xA6430000A644, + 0xA6450000A646, + 0xA6470000A648, + 0xA6490000A64A, + 0xA64B0000A64C, + 0xA64D0000A64E, + 0xA64F0000A650, + 0xA6510000A652, + 0xA6530000A654, + 0xA6550000A656, + 0xA6570000A658, + 0xA6590000A65A, + 0xA65B0000A65C, + 0xA65D0000A65E, + 0xA65F0000A660, + 0xA6610000A662, + 0xA6630000A664, + 0xA6650000A666, + 0xA6670000A668, + 0xA6690000A66A, + 0xA66B0000A66C, + 0xA66D0000A670, + 0xA6740000A67E, + 0xA67F0000A680, + 0xA6810000A682, + 0xA6830000A684, + 0xA6850000A686, + 0xA6870000A688, + 0xA6890000A68A, + 0xA68B0000A68C, + 0xA68D0000A68E, + 0xA68F0000A690, + 0xA6910000A692, + 0xA6930000A694, + 0xA6950000A696, + 0xA6970000A698, + 0xA6990000A69A, + 0xA69B0000A69C, + 0xA69E0000A6E6, + 0xA6F00000A6F2, + 0xA7170000A720, + 0xA7230000A724, + 0xA7250000A726, + 0xA7270000A728, + 0xA7290000A72A, + 0xA72B0000A72C, + 0xA72D0000A72E, + 0xA72F0000A732, + 0xA7330000A734, + 0xA7350000A736, + 0xA7370000A738, + 0xA7390000A73A, + 0xA73B0000A73C, + 0xA73D0000A73E, + 0xA73F0000A740, + 0xA7410000A742, + 0xA7430000A744, + 0xA7450000A746, + 0xA7470000A748, + 0xA7490000A74A, + 0xA74B0000A74C, + 0xA74D0000A74E, + 0xA74F0000A750, + 0xA7510000A752, + 0xA7530000A754, + 0xA7550000A756, + 0xA7570000A758, + 0xA7590000A75A, + 0xA75B0000A75C, + 0xA75D0000A75E, + 0xA75F0000A760, + 0xA7610000A762, + 0xA7630000A764, + 0xA7650000A766, + 0xA7670000A768, + 0xA7690000A76A, + 0xA76B0000A76C, + 0xA76D0000A76E, + 0xA76F0000A770, + 0xA7710000A779, + 0xA77A0000A77B, + 0xA77C0000A77D, + 0xA77F0000A780, + 0xA7810000A782, + 0xA7830000A784, + 0xA7850000A786, + 0xA7870000A789, + 0xA78C0000A78D, + 0xA78E0000A790, + 0xA7910000A792, + 0xA7930000A796, + 0xA7970000A798, + 0xA7990000A79A, + 0xA79B0000A79C, + 0xA79D0000A79E, + 0xA79F0000A7A0, + 0xA7A10000A7A2, + 0xA7A30000A7A4, + 0xA7A50000A7A6, + 0xA7A70000A7A8, + 0xA7A90000A7AA, + 0xA7AF0000A7B0, + 0xA7B50000A7B6, + 0xA7B70000A7B8, + 0xA7B90000A7BA, + 0xA7BB0000A7BC, + 0xA7BD0000A7BE, + 0xA7BF0000A7C0, + 0xA7C10000A7C2, + 0xA7C30000A7C4, + 0xA7C80000A7C9, + 0xA7CA0000A7CB, + 0xA7D10000A7D2, + 0xA7D30000A7D4, + 0xA7D50000A7D6, + 0xA7D70000A7D8, + 0xA7D90000A7DA, + 0xA7F60000A7F8, + 0xA7FA0000A828, + 0xA82C0000A82D, + 0xA8400000A874, + 0xA8800000A8C6, + 0xA8D00000A8DA, + 0xA8E00000A8F8, + 0xA8FB0000A8FC, + 0xA8FD0000A92E, + 0xA9300000A954, + 0xA9800000A9C1, + 0xA9CF0000A9DA, + 0xA9E00000A9FF, + 0xAA000000AA37, + 0xAA400000AA4E, + 0xAA500000AA5A, + 0xAA600000AA77, + 0xAA7A0000AAC3, + 0xAADB0000AADE, + 0xAAE00000AAF0, + 0xAAF20000AAF7, + 0xAB010000AB07, + 0xAB090000AB0F, + 0xAB110000AB17, + 0xAB200000AB27, + 0xAB280000AB2F, + 0xAB300000AB5B, + 0xAB600000AB69, + 0xABC00000ABEB, + 0xABEC0000ABEE, + 0xABF00000ABFA, + 0xAC000000D7A4, + 0xFA0E0000FA10, + 0xFA110000FA12, + 0xFA130000FA15, + 0xFA1F0000FA20, + 0xFA210000FA22, + 0xFA230000FA25, + 0xFA270000FA2A, + 0xFB1E0000FB1F, + 0xFE200000FE30, + 0xFE730000FE74, + 0x100000001000C, + 0x1000D00010027, + 0x100280001003B, + 0x1003C0001003E, + 0x1003F0001004E, + 0x100500001005E, + 0x10080000100FB, + 0x101FD000101FE, + 0x102800001029D, + 0x102A0000102D1, + 0x102E0000102E1, + 0x1030000010320, + 0x1032D00010341, + 0x103420001034A, + 0x103500001037B, + 0x103800001039E, + 0x103A0000103C4, + 0x103C8000103D0, + 0x104280001049E, + 0x104A0000104AA, + 0x104D8000104FC, + 0x1050000010528, + 0x1053000010564, + 0x10597000105A2, + 0x105A3000105B2, + 0x105B3000105BA, + 0x105BB000105BD, + 0x1060000010737, + 0x1074000010756, + 0x1076000010768, + 0x1078000010781, + 0x1080000010806, + 0x1080800010809, + 0x1080A00010836, + 0x1083700010839, + 0x1083C0001083D, + 0x1083F00010856, + 0x1086000010877, + 0x108800001089F, + 0x108E0000108F3, + 0x108F4000108F6, + 0x1090000010916, + 0x109200001093A, + 0x10980000109B8, + 0x109BE000109C0, + 0x10A0000010A04, + 0x10A0500010A07, + 0x10A0C00010A14, + 0x10A1500010A18, + 0x10A1900010A36, + 0x10A3800010A3B, + 0x10A3F00010A40, + 0x10A6000010A7D, + 0x10A8000010A9D, + 0x10AC000010AC8, + 0x10AC900010AE7, + 0x10B0000010B36, + 0x10B4000010B56, + 0x10B6000010B73, + 0x10B8000010B92, + 0x10C0000010C49, + 0x10CC000010CF3, + 0x10D0000010D28, + 0x10D3000010D3A, + 0x10E8000010EAA, + 0x10EAB00010EAD, + 0x10EB000010EB2, + 0x10EFD00010F1D, + 0x10F2700010F28, + 0x10F3000010F51, + 0x10F7000010F86, + 0x10FB000010FC5, + 0x10FE000010FF7, + 0x1100000011047, + 0x1106600011076, + 0x1107F000110BB, + 0x110C2000110C3, + 0x110D0000110E9, + 0x110F0000110FA, + 0x1110000011135, + 0x1113600011140, + 0x1114400011148, + 0x1115000011174, + 0x1117600011177, + 0x11180000111C5, + 0x111C9000111CD, + 0x111CE000111DB, + 0x111DC000111DD, + 0x1120000011212, + 0x1121300011238, + 0x1123E00011242, + 0x1128000011287, + 0x1128800011289, + 0x1128A0001128E, + 0x1128F0001129E, + 0x1129F000112A9, + 0x112B0000112EB, + 0x112F0000112FA, + 0x1130000011304, + 0x113050001130D, + 0x1130F00011311, + 0x1131300011329, + 0x1132A00011331, + 0x1133200011334, + 0x113350001133A, + 0x1133B00011345, + 0x1134700011349, + 0x1134B0001134E, + 0x1135000011351, + 0x1135700011358, + 0x1135D00011364, + 0x113660001136D, + 0x1137000011375, + 0x114000001144B, + 0x114500001145A, + 0x1145E00011462, + 0x11480000114C6, + 0x114C7000114C8, + 0x114D0000114DA, + 0x11580000115B6, + 0x115B8000115C1, + 0x115D8000115DE, + 0x1160000011641, + 0x1164400011645, + 0x116500001165A, + 0x11680000116B9, + 0x116C0000116CA, + 0x117000001171B, + 0x1171D0001172C, + 0x117300001173A, + 0x1174000011747, + 0x118000001183B, + 0x118C0000118EA, + 0x118FF00011907, + 0x119090001190A, + 0x1190C00011914, + 0x1191500011917, + 0x1191800011936, + 0x1193700011939, + 0x1193B00011944, + 0x119500001195A, + 0x119A0000119A8, + 0x119AA000119D8, + 0x119DA000119E2, + 0x119E3000119E5, + 0x11A0000011A3F, + 0x11A4700011A48, + 0x11A5000011A9A, + 0x11A9D00011A9E, + 0x11AB000011AF9, + 0x11C0000011C09, + 0x11C0A00011C37, + 0x11C3800011C41, + 0x11C5000011C5A, + 0x11C7200011C90, + 0x11C9200011CA8, + 0x11CA900011CB7, + 0x11D0000011D07, + 0x11D0800011D0A, + 0x11D0B00011D37, + 0x11D3A00011D3B, + 0x11D3C00011D3E, + 0x11D3F00011D48, + 0x11D5000011D5A, + 0x11D6000011D66, + 0x11D6700011D69, + 0x11D6A00011D8F, + 0x11D9000011D92, + 0x11D9300011D99, + 0x11DA000011DAA, + 0x11EE000011EF7, + 0x11F0000011F11, + 0x11F1200011F3B, + 0x11F3E00011F43, + 0x11F5000011F5A, + 0x11FB000011FB1, + 0x120000001239A, + 0x1248000012544, + 0x12F9000012FF1, + 0x1300000013430, + 0x1344000013456, + 0x1440000014647, + 0x1680000016A39, + 0x16A4000016A5F, + 0x16A6000016A6A, + 0x16A7000016ABF, + 0x16AC000016ACA, + 0x16AD000016AEE, + 0x16AF000016AF5, + 0x16B0000016B37, + 0x16B4000016B44, + 0x16B5000016B5A, + 0x16B6300016B78, + 0x16B7D00016B90, + 0x16E6000016E80, + 0x16F0000016F4B, + 0x16F4F00016F88, + 0x16F8F00016FA0, + 0x16FE000016FE2, + 0x16FE300016FE5, + 0x16FF000016FF2, + 0x17000000187F8, + 0x1880000018CD6, + 0x18D0000018D09, + 0x1AFF00001AFF4, + 0x1AFF50001AFFC, + 0x1AFFD0001AFFF, + 0x1B0000001B123, + 0x1B1320001B133, + 0x1B1500001B153, + 0x1B1550001B156, + 0x1B1640001B168, + 0x1B1700001B2FC, + 0x1BC000001BC6B, + 0x1BC700001BC7D, + 0x1BC800001BC89, + 0x1BC900001BC9A, + 0x1BC9D0001BC9F, + 0x1CF000001CF2E, + 0x1CF300001CF47, + 0x1DA000001DA37, + 0x1DA3B0001DA6D, + 0x1DA750001DA76, + 0x1DA840001DA85, + 0x1DA9B0001DAA0, + 0x1DAA10001DAB0, + 0x1DF000001DF1F, + 0x1DF250001DF2B, + 0x1E0000001E007, + 0x1E0080001E019, + 0x1E01B0001E022, + 0x1E0230001E025, + 0x1E0260001E02B, + 0x1E08F0001E090, + 0x1E1000001E12D, + 0x1E1300001E13E, + 0x1E1400001E14A, + 0x1E14E0001E14F, + 0x1E2900001E2AF, + 0x1E2C00001E2FA, + 0x1E4D00001E4FA, + 0x1E7E00001E7E7, + 0x1E7E80001E7EC, + 0x1E7ED0001E7EF, + 0x1E7F00001E7FF, + 0x1E8000001E8C5, + 0x1E8D00001E8D7, + 0x1E9220001E94C, + 0x1E9500001E95A, + 0x200000002A6E0, + 0x2A7000002B73A, + 0x2B7400002B81E, + 0x2B8200002CEA2, + 0x2CEB00002EBE1, + 0x2EBF00002EE5E, + 0x300000003134B, + 0x31350000323B0, + ), + "CONTEXTJ": (0x200C0000200E,), + "CONTEXTO": ( + 0xB7000000B8, + 0x37500000376, + 0x5F3000005F5, + 0x6600000066A, + 0x6F0000006FA, + 0x30FB000030FC, + ), +} diff --git a/meow/lib/python3.13/site-packages/idna/intranges.py b/meow/lib/python3.13/site-packages/idna/intranges.py new file mode 100644 index 0000000000000000000000000000000000000000..7bfaa8d80d7dc471d572db0f949460901126e8bd --- /dev/null +++ b/meow/lib/python3.13/site-packages/idna/intranges.py @@ -0,0 +1,57 @@ +""" +Given a list of integers, made up of (hopefully) a small number of long runs +of consecutive integers, compute a representation of the form +((start1, end1), (start2, end2) ...). Then answer the question "was x present +in the original list?" in time O(log(# runs)). +""" + +import bisect +from typing import List, Tuple + + +def intranges_from_list(list_: List[int]) -> Tuple[int, ...]: + """Represent a list of integers as a sequence of ranges: + ((start_0, end_0), (start_1, end_1), ...), such that the original + integers are exactly those x such that start_i <= x < end_i for some i. + + Ranges are encoded as single integers (start << 32 | end), not as tuples. + """ + + sorted_list = sorted(list_) + ranges = [] + last_write = -1 + for i in range(len(sorted_list)): + if i + 1 < len(sorted_list): + if sorted_list[i] == sorted_list[i + 1] - 1: + continue + current_range = sorted_list[last_write + 1 : i + 1] + ranges.append(_encode_range(current_range[0], current_range[-1] + 1)) + last_write = i + + return tuple(ranges) + + +def _encode_range(start: int, end: int) -> int: + return (start << 32) | end + + +def _decode_range(r: int) -> Tuple[int, int]: + return (r >> 32), (r & ((1 << 32) - 1)) + + +def intranges_contain(int_: int, ranges: Tuple[int, ...]) -> bool: + """Determine if `int_` falls into one of the ranges in `ranges`.""" + tuple_ = _encode_range(int_, 0) + pos = bisect.bisect_left(ranges, tuple_) + # we could be immediately ahead of a tuple (start, end) + # with start < int_ <= end + if pos > 0: + left, right = _decode_range(ranges[pos - 1]) + if left <= int_ < right: + return True + # or we could be immediately behind a tuple (int_, end) + if pos < len(ranges): + left, _ = _decode_range(ranges[pos]) + if left == int_: + return True + return False diff --git a/meow/lib/python3.13/site-packages/idna/package_data.py b/meow/lib/python3.13/site-packages/idna/package_data.py new file mode 100644 index 0000000000000000000000000000000000000000..514ff7e2e68b65f309d30a0b06e6b290d2c353a8 --- /dev/null +++ b/meow/lib/python3.13/site-packages/idna/package_data.py @@ -0,0 +1 @@ +__version__ = "3.10" diff --git a/meow/lib/python3.13/site-packages/idna/py.typed b/meow/lib/python3.13/site-packages/idna/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/meow/lib/python3.13/site-packages/requests-2.32.3.dist-info/INSTALLER b/meow/lib/python3.13/site-packages/requests-2.32.3.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/meow/lib/python3.13/site-packages/requests-2.32.3.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/meow/lib/python3.13/site-packages/requests-2.32.3.dist-info/METADATA b/meow/lib/python3.13/site-packages/requests-2.32.3.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..72d9dc5313849bf6c7322781ef6bbe2400d02c45 --- /dev/null +++ b/meow/lib/python3.13/site-packages/requests-2.32.3.dist-info/METADATA @@ -0,0 +1,119 @@ +Metadata-Version: 2.1 +Name: requests +Version: 2.32.3 +Summary: Python HTTP for Humans. +Home-page: https://requests.readthedocs.io +Author: Kenneth Reitz +Author-email: me@kennethreitz.org +License: Apache-2.0 +Project-URL: Documentation, https://requests.readthedocs.io +Project-URL: Source, https://github.com/psf/requests +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Web Environment +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: Apache Software License +Classifier: Natural Language :: English +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Internet :: WWW/HTTP +Classifier: Topic :: Software Development :: Libraries +Requires-Python: >=3.8 +Description-Content-Type: text/markdown +License-File: LICENSE +Requires-Dist: charset-normalizer <4,>=2 +Requires-Dist: idna <4,>=2.5 +Requires-Dist: urllib3 <3,>=1.21.1 +Requires-Dist: certifi >=2017.4.17 +Provides-Extra: security +Provides-Extra: socks +Requires-Dist: PySocks !=1.5.7,>=1.5.6 ; extra == 'socks' +Provides-Extra: use_chardet_on_py3 +Requires-Dist: chardet <6,>=3.0.2 ; extra == 'use_chardet_on_py3' + +# Requests + +**Requests** is a simple, yet elegant, HTTP library. + +```python +>>> import requests +>>> r = requests.get('https://httpbin.org/basic-auth/user/pass', auth=('user', 'pass')) +>>> r.status_code +200 +>>> r.headers['content-type'] +'application/json; charset=utf8' +>>> r.encoding +'utf-8' +>>> r.text +'{"authenticated": true, ...' +>>> r.json() +{'authenticated': True, ...} +``` + +Requests allows you to send HTTP/1.1 requests extremely easily. There’s no need to manually add query strings to your URLs, or to form-encode your `PUT` & `POST` data — but nowadays, just use the `json` method! + +Requests is one of the most downloaded Python packages today, pulling in around `30M downloads / week`— according to GitHub, Requests is currently [depended upon](https://github.com/psf/requests/network/dependents?package_id=UGFja2FnZS01NzA4OTExNg%3D%3D) by `1,000,000+` repositories. You may certainly put your trust in this code. + +[![Downloads](https://static.pepy.tech/badge/requests/month)](https://pepy.tech/project/requests) +[![Supported Versions](https://img.shields.io/pypi/pyversions/requests.svg)](https://pypi.org/project/requests) +[![Contributors](https://img.shields.io/github/contributors/psf/requests.svg)](https://github.com/psf/requests/graphs/contributors) + +## Installing Requests and Supported Versions + +Requests is available on PyPI: + +```console +$ python -m pip install requests +``` + +Requests officially supports Python 3.8+. + +## Supported Features & Best–Practices + +Requests is ready for the demands of building robust and reliable HTTP–speaking applications, for the needs of today. + +- Keep-Alive & Connection Pooling +- International Domains and URLs +- Sessions with Cookie Persistence +- Browser-style TLS/SSL Verification +- Basic & Digest Authentication +- Familiar `dict`–like Cookies +- Automatic Content Decompression and Decoding +- Multi-part File Uploads +- SOCKS Proxy Support +- Connection Timeouts +- Streaming Downloads +- Automatic honoring of `.netrc` +- Chunked HTTP Requests + +## API Reference and User Guide available on [Read the Docs](https://requests.readthedocs.io) + +[![Read the Docs](https://raw.githubusercontent.com/psf/requests/main/ext/ss.png)](https://requests.readthedocs.io) + +## Cloning the repository + +When cloning the Requests repository, you may need to add the `-c +fetch.fsck.badTimezone=ignore` flag to avoid an error about a bad commit (see +[this issue](https://github.com/psf/requests/issues/2690) for more background): + +```shell +git clone -c fetch.fsck.badTimezone=ignore https://github.com/psf/requests.git +``` + +You can also apply this setting to your global Git config: + +```shell +git config --global fetch.fsck.badTimezone ignore +``` + +--- + +[![Kenneth Reitz](https://raw.githubusercontent.com/psf/requests/main/ext/kr.png)](https://kennethreitz.org) [![Python Software Foundation](https://raw.githubusercontent.com/psf/requests/main/ext/psf.png)](https://www.python.org/psf) diff --git a/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/INSTALLER b/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/LICENCE b/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/LICENCE new file mode 100644 index 0000000000000000000000000000000000000000..a8922b182e80d9bcb955e8b8ae2bd9a017d72977 --- /dev/null +++ b/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/LICENCE @@ -0,0 +1,49 @@ +`tqdm` is a product of collaborative work. +Unless otherwise stated, all authors (see commit logs) retain copyright +for their respective work, and release the work under the MIT licence +(text below). + +Exceptions or notable authors are listed below +in reverse chronological order: + +* files: * + MPL-2.0 2015-2024 (c) Casper da Costa-Luis + [casperdcl](https://github.com/casperdcl). +* files: tqdm/_tqdm.py + MIT 2016 (c) [PR #96] on behalf of Google Inc. +* files: tqdm/_tqdm.py README.rst .gitignore + MIT 2013 (c) Noam Yorav-Raphael, original author. + +[PR #96]: https://github.com/tqdm/tqdm/pull/96 + + +Mozilla Public Licence (MPL) v. 2.0 - Exhibit A +----------------------------------------------- + +This Source Code Form is subject to the terms of the +Mozilla Public License, v. 2.0. +If a copy of the MPL was not distributed with this project, +You can obtain one at https://mozilla.org/MPL/2.0/. + + +MIT License (MIT) +----------------- + +Copyright (c) 2013 noamraph + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/METADATA b/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..181b4dc8b2f8697d1c0374a612ffd8b2f2db346a --- /dev/null +++ b/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/METADATA @@ -0,0 +1,1594 @@ +Metadata-Version: 2.1 +Name: tqdm +Version: 4.67.1 +Summary: Fast, Extensible Progress Meter +Maintainer-email: tqdm developers +License: MPL-2.0 AND MIT +Project-URL: homepage, https://tqdm.github.io +Project-URL: repository, https://github.com/tqdm/tqdm +Project-URL: changelog, https://tqdm.github.io/releases +Project-URL: wiki, https://github.com/tqdm/tqdm/wiki +Keywords: progressbar,progressmeter,progress,bar,meter,rate,eta,console,terminal,time +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Console +Classifier: Environment :: MacOS X +Classifier: Environment :: Other Environment +Classifier: Environment :: Win32 (MS Windows) +Classifier: Environment :: X11 Applications +Classifier: Framework :: IPython +Classifier: Framework :: Jupyter +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Education +Classifier: Intended Audience :: End Users/Desktop +Classifier: Intended Audience :: Other Audience +Classifier: Intended Audience :: System Administrators +Classifier: License :: OSI Approved :: MIT License +Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0) +Classifier: Operating System :: MacOS +Classifier: Operating System :: MacOS :: MacOS X +Classifier: Operating System :: Microsoft +Classifier: Operating System :: Microsoft :: MS-DOS +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: POSIX +Classifier: Operating System :: POSIX :: BSD +Classifier: Operating System :: POSIX :: BSD :: FreeBSD +Classifier: Operating System :: POSIX :: Linux +Classifier: Operating System :: POSIX :: SunOS/Solaris +Classifier: Operating System :: Unix +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: Implementation +Classifier: Programming Language :: Python :: Implementation :: IronPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Programming Language :: Unix Shell +Classifier: Topic :: Desktop Environment +Classifier: Topic :: Education :: Computer Aided Instruction (CAI) +Classifier: Topic :: Education :: Testing +Classifier: Topic :: Office/Business +Classifier: Topic :: Other/Nonlisted Topic +Classifier: Topic :: Software Development :: Build Tools +Classifier: Topic :: Software Development :: Libraries +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Software Development :: Pre-processors +Classifier: Topic :: Software Development :: User Interfaces +Classifier: Topic :: System :: Installation/Setup +Classifier: Topic :: System :: Logging +Classifier: Topic :: System :: Monitoring +Classifier: Topic :: System :: Shells +Classifier: Topic :: Terminals +Classifier: Topic :: Utilities +Requires-Python: >=3.7 +Description-Content-Type: text/x-rst +License-File: LICENCE +Requires-Dist: colorama; platform_system == "Windows" +Provides-Extra: dev +Requires-Dist: pytest>=6; extra == "dev" +Requires-Dist: pytest-cov; extra == "dev" +Requires-Dist: pytest-timeout; extra == "dev" +Requires-Dist: pytest-asyncio>=0.24; extra == "dev" +Requires-Dist: nbval; extra == "dev" +Provides-Extra: discord +Requires-Dist: requests; extra == "discord" +Provides-Extra: slack +Requires-Dist: slack-sdk; extra == "slack" +Provides-Extra: telegram +Requires-Dist: requests; extra == "telegram" +Provides-Extra: notebook +Requires-Dist: ipywidgets>=6; extra == "notebook" + +|Logo| + +tqdm +==== + +|Py-Versions| |Versions| |Conda-Forge-Status| |Docker| |Snapcraft| + +|Build-Status| |Coverage-Status| |Branch-Coverage-Status| |Codacy-Grade| |Libraries-Rank| |PyPI-Downloads| + +|LICENCE| |OpenHub-Status| |binder-demo| |awesome-python| + +``tqdm`` derives from the Arabic word *taqaddum* (تقدّم) which can mean "progress," +and is an abbreviation for "I love you so much" in Spanish (*te quiero demasiado*). + +Instantly make your loops show a smart progress meter - just wrap any +iterable with ``tqdm(iterable)``, and you're done! + +.. code:: python + + from tqdm import tqdm + for i in tqdm(range(10000)): + ... + +``76%|████████████████████████        | 7568/10000 [00:33<00:10, 229.00it/s]`` + +``trange(N)`` can be also used as a convenient shortcut for +``tqdm(range(N))``. + +|Screenshot| + |Video| |Slides| |Merch| + +It can also be executed as a module with pipes: + +.. code:: sh + + $ seq 9999999 | tqdm --bytes | wc -l + 75.2MB [00:00, 217MB/s] + 9999999 + + $ tar -zcf - docs/ | tqdm --bytes --total `du -sb docs/ | cut -f1` \ + > backup.tgz + 32%|██████████▍ | 8.89G/27.9G [00:42<01:31, 223MB/s] + +Overhead is low -- about 60ns per iteration (80ns with ``tqdm.gui``), and is +unit tested against performance regression. +By comparison, the well-established +`ProgressBar `__ has +an 800ns/iter overhead. + +In addition to its low overhead, ``tqdm`` uses smart algorithms to predict +the remaining time and to skip unnecessary iteration displays, which allows +for a negligible overhead in most cases. + +``tqdm`` works on any platform +(Linux, Windows, Mac, FreeBSD, NetBSD, Solaris/SunOS), +in any console or in a GUI, and is also friendly with IPython/Jupyter notebooks. + +``tqdm`` does not require any dependencies (not even ``curses``!), just +Python and an environment supporting ``carriage return \r`` and +``line feed \n`` control characters. + +------------------------------------------ + +.. contents:: Table of contents + :backlinks: top + :local: + + +Installation +------------ + +Latest PyPI stable release +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +|Versions| |PyPI-Downloads| |Libraries-Dependents| + +.. code:: sh + + pip install tqdm + +Latest development release on GitHub +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +|GitHub-Status| |GitHub-Stars| |GitHub-Commits| |GitHub-Forks| |GitHub-Updated| + +Pull and install pre-release ``devel`` branch: + +.. code:: sh + + pip install "git+https://github.com/tqdm/tqdm.git@devel#egg=tqdm" + +Latest Conda release +~~~~~~~~~~~~~~~~~~~~ + +|Conda-Forge-Status| + +.. code:: sh + + conda install -c conda-forge tqdm + +Latest Snapcraft release +~~~~~~~~~~~~~~~~~~~~~~~~ + +|Snapcraft| + +There are 3 channels to choose from: + +.. code:: sh + + snap install tqdm # implies --stable, i.e. latest tagged release + snap install tqdm --candidate # master branch + snap install tqdm --edge # devel branch + +Note that ``snap`` binaries are purely for CLI use (not ``import``-able), and +automatically set up ``bash`` tab-completion. + +Latest Docker release +~~~~~~~~~~~~~~~~~~~~~ + +|Docker| + +.. code:: sh + + docker pull tqdm/tqdm + docker run -i --rm tqdm/tqdm --help + +Other +~~~~~ + +There are other (unofficial) places where ``tqdm`` may be downloaded, particularly for CLI use: + +|Repology| + +.. |Repology| image:: https://repology.org/badge/tiny-repos/python:tqdm.svg + :target: https://repology.org/project/python:tqdm/versions + +Changelog +--------- + +The list of all changes is available either on GitHub's Releases: +|GitHub-Status|, on the +`wiki `__, or on the +`website `__. + + +Usage +----- + +``tqdm`` is very versatile and can be used in a number of ways. +The three main ones are given below. + +Iterable-based +~~~~~~~~~~~~~~ + +Wrap ``tqdm()`` around any iterable: + +.. code:: python + + from tqdm import tqdm + from time import sleep + + text = "" + for char in tqdm(["a", "b", "c", "d"]): + sleep(0.25) + text = text + char + +``trange(i)`` is a special optimised instance of ``tqdm(range(i))``: + +.. code:: python + + from tqdm import trange + + for i in trange(100): + sleep(0.01) + +Instantiation outside of the loop allows for manual control over ``tqdm()``: + +.. code:: python + + pbar = tqdm(["a", "b", "c", "d"]) + for char in pbar: + sleep(0.25) + pbar.set_description("Processing %s" % char) + +Manual +~~~~~~ + +Manual control of ``tqdm()`` updates using a ``with`` statement: + +.. code:: python + + with tqdm(total=100) as pbar: + for i in range(10): + sleep(0.1) + pbar.update(10) + +If the optional variable ``total`` (or an iterable with ``len()``) is +provided, predictive stats are displayed. + +``with`` is also optional (you can just assign ``tqdm()`` to a variable, +but in this case don't forget to ``del`` or ``close()`` at the end: + +.. code:: python + + pbar = tqdm(total=100) + for i in range(10): + sleep(0.1) + pbar.update(10) + pbar.close() + +Module +~~~~~~ + +Perhaps the most wonderful use of ``tqdm`` is in a script or on the command +line. Simply inserting ``tqdm`` (or ``python -m tqdm``) between pipes will pass +through all ``stdin`` to ``stdout`` while printing progress to ``stderr``. + +The example below demonstrate counting the number of lines in all Python files +in the current directory, with timing information included. + +.. code:: sh + + $ time find . -name '*.py' -type f -exec cat \{} \; | wc -l + 857365 + + real 0m3.458s + user 0m0.274s + sys 0m3.325s + + $ time find . -name '*.py' -type f -exec cat \{} \; | tqdm | wc -l + 857366it [00:03, 246471.31it/s] + 857365 + + real 0m3.585s + user 0m0.862s + sys 0m3.358s + +Note that the usual arguments for ``tqdm`` can also be specified. + +.. code:: sh + + $ find . -name '*.py' -type f -exec cat \{} \; | + tqdm --unit loc --unit_scale --total 857366 >> /dev/null + 100%|█████████████████████████████████| 857K/857K [00:04<00:00, 246Kloc/s] + +Backing up a large directory? + +.. code:: sh + + $ tar -zcf - docs/ | tqdm --bytes --total `du -sb docs/ | cut -f1` \ + > backup.tgz + 44%|██████████████▊ | 153M/352M [00:14<00:18, 11.0MB/s] + +This can be beautified further: + +.. code:: sh + + $ BYTES=$(du -sb docs/ | cut -f1) + $ tar -cf - docs/ \ + | tqdm --bytes --total "$BYTES" --desc Processing | gzip \ + | tqdm --bytes --total "$BYTES" --desc Compressed --position 1 \ + > ~/backup.tgz + Processing: 100%|██████████████████████| 352M/352M [00:14<00:00, 30.2MB/s] + Compressed: 42%|█████████▎ | 148M/352M [00:14<00:19, 10.9MB/s] + +Or done on a file level using 7-zip: + +.. code:: sh + + $ 7z a -bd -r backup.7z docs/ | grep Compressing \ + | tqdm --total $(find docs/ -type f | wc -l) --unit files \ + | grep -v Compressing + 100%|██████████████████████████▉| 15327/15327 [01:00<00:00, 712.96files/s] + +Pre-existing CLI programs already outputting basic progress information will +benefit from ``tqdm``'s ``--update`` and ``--update_to`` flags: + +.. code:: sh + + $ seq 3 0.1 5 | tqdm --total 5 --update_to --null + 100%|████████████████████████████████████| 5.0/5 [00:00<00:00, 9673.21it/s] + $ seq 10 | tqdm --update --null # 1 + 2 + ... + 10 = 55 iterations + 55it [00:00, 90006.52it/s] + +FAQ and Known Issues +-------------------- + +|GitHub-Issues| + +The most common issues relate to excessive output on multiple lines, instead +of a neat one-line progress bar. + +- Consoles in general: require support for carriage return (``CR``, ``\r``). + + * Some cloud logging consoles which don't support ``\r`` properly + (`cloudwatch `__, + `K8s `__) may benefit from + ``export TQDM_POSITION=-1``. + +- Nested progress bars: + + * Consoles in general: require support for moving cursors up to the + previous line. For example, + `IDLE `__, + `ConEmu `__ and + `PyCharm `__ (also + `here `__, + `here `__, and + `here `__) + lack full support. + * Windows: additionally may require the Python module ``colorama`` + to ensure nested bars stay within their respective lines. + +- Unicode: + + * Environments which report that they support unicode will have solid smooth + progressbars. The fallback is an ``ascii``-only bar. + * Windows consoles often only partially support unicode and thus + `often require explicit ascii=True `__ + (also `here `__). This is due to + either normal-width unicode characters being incorrectly displayed as + "wide", or some unicode characters not rendering. + +- Wrapping generators: + + * Generator wrapper functions tend to hide the length of iterables. + ``tqdm`` does not. + * Replace ``tqdm(enumerate(...))`` with ``enumerate(tqdm(...))`` or + ``tqdm(enumerate(x), total=len(x), ...)``. + The same applies to ``numpy.ndenumerate``. + * Replace ``tqdm(zip(a, b))`` with ``zip(tqdm(a), b)`` or even + ``zip(tqdm(a), tqdm(b))``. + * The same applies to ``itertools``. + * Some useful convenience functions can be found under ``tqdm.contrib``. + +- `No intermediate output in docker-compose `__: + use ``docker-compose run`` instead of ``docker-compose up`` and ``tty: true``. + +- Overriding defaults via environment variables: + e.g. in CI/cloud jobs, ``export TQDM_MININTERVAL=5`` to avoid log spam. + This override logic is handled by the ``tqdm.utils.envwrap`` decorator + (useful independent of ``tqdm``). + +If you come across any other difficulties, browse and file |GitHub-Issues|. + +Documentation +------------- + +|Py-Versions| |README-Hits| (Since 19 May 2016) + +.. code:: python + + class tqdm(): + """ + Decorate an iterable object, returning an iterator which acts exactly + like the original iterable, but prints a dynamically updating + progressbar every time a value is requested. + """ + + @envwrap("TQDM_") # override defaults via env vars + def __init__(self, iterable=None, desc=None, total=None, leave=True, + file=None, ncols=None, mininterval=0.1, + maxinterval=10.0, miniters=None, ascii=None, disable=False, + unit='it', unit_scale=False, dynamic_ncols=False, + smoothing=0.3, bar_format=None, initial=0, position=None, + postfix=None, unit_divisor=1000, write_bytes=False, + lock_args=None, nrows=None, colour=None, delay=0): + +Parameters +~~~~~~~~~~ + +* iterable : iterable, optional + Iterable to decorate with a progressbar. + Leave blank to manually manage the updates. +* desc : str, optional + Prefix for the progressbar. +* total : int or float, optional + The number of expected iterations. If unspecified, + len(iterable) is used if possible. If float("inf") or as a last + resort, only basic progress statistics are displayed + (no ETA, no progressbar). + If ``gui`` is True and this parameter needs subsequent updating, + specify an initial arbitrary large positive number, + e.g. 9e9. +* leave : bool, optional + If [default: True], keeps all traces of the progressbar + upon termination of iteration. + If ``None``, will leave only if ``position`` is ``0``. +* file : ``io.TextIOWrapper`` or ``io.StringIO``, optional + Specifies where to output the progress messages + (default: sys.stderr). Uses ``file.write(str)`` and ``file.flush()`` + methods. For encoding, see ``write_bytes``. +* ncols : int, optional + The width of the entire output message. If specified, + dynamically resizes the progressbar to stay within this bound. + If unspecified, attempts to use environment width. The + fallback is a meter width of 10 and no limit for the counter and + statistics. If 0, will not print any meter (only stats). +* mininterval : float, optional + Minimum progress display update interval [default: 0.1] seconds. +* maxinterval : float, optional + Maximum progress display update interval [default: 10] seconds. + Automatically adjusts ``miniters`` to correspond to ``mininterval`` + after long display update lag. Only works if ``dynamic_miniters`` + or monitor thread is enabled. +* miniters : int or float, optional + Minimum progress display update interval, in iterations. + If 0 and ``dynamic_miniters``, will automatically adjust to equal + ``mininterval`` (more CPU efficient, good for tight loops). + If > 0, will skip display of specified number of iterations. + Tweak this and ``mininterval`` to get very efficient loops. + If your progress is erratic with both fast and slow iterations + (network, skipping items, etc) you should set miniters=1. +* ascii : bool or str, optional + If unspecified or False, use unicode (smooth blocks) to fill + the meter. The fallback is to use ASCII characters " 123456789#". +* disable : bool, optional + Whether to disable the entire progressbar wrapper + [default: False]. If set to None, disable on non-TTY. +* unit : str, optional + String that will be used to define the unit of each iteration + [default: it]. +* unit_scale : bool or int or float, optional + If 1 or True, the number of iterations will be reduced/scaled + automatically and a metric prefix following the + International System of Units standard will be added + (kilo, mega, etc.) [default: False]. If any other non-zero + number, will scale ``total`` and ``n``. +* dynamic_ncols : bool, optional + If set, constantly alters ``ncols`` and ``nrows`` to the + environment (allowing for window resizes) [default: False]. +* smoothing : float, optional + Exponential moving average smoothing factor for speed estimates + (ignored in GUI mode). Ranges from 0 (average speed) to 1 + (current/instantaneous speed) [default: 0.3]. +* bar_format : str, optional + Specify a custom bar string formatting. May impact performance. + [default: '{l_bar}{bar}{r_bar}'], where + l_bar='{desc}: {percentage:3.0f}%|' and + r_bar='| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, ' + '{rate_fmt}{postfix}]' + Possible vars: l_bar, bar, r_bar, n, n_fmt, total, total_fmt, + percentage, elapsed, elapsed_s, ncols, nrows, desc, unit, + rate, rate_fmt, rate_noinv, rate_noinv_fmt, + rate_inv, rate_inv_fmt, postfix, unit_divisor, + remaining, remaining_s, eta. + Note that a trailing ": " is automatically removed after {desc} + if the latter is empty. +* initial : int or float, optional + The initial counter value. Useful when restarting a progress + bar [default: 0]. If using float, consider specifying ``{n:.3f}`` + or similar in ``bar_format``, or specifying ``unit_scale``. +* position : int, optional + Specify the line offset to print this bar (starting from 0) + Automatic if unspecified. + Useful to manage multiple bars at once (eg, from threads). +* postfix : dict or ``*``, optional + Specify additional stats to display at the end of the bar. + Calls ``set_postfix(**postfix)`` if possible (dict). +* unit_divisor : float, optional + [default: 1000], ignored unless ``unit_scale`` is True. +* write_bytes : bool, optional + Whether to write bytes. If (default: False) will write unicode. +* lock_args : tuple, optional + Passed to ``refresh`` for intermediate output + (initialisation, iterating, and updating). +* nrows : int, optional + The screen height. If specified, hides nested bars outside this + bound. If unspecified, attempts to use environment height. + The fallback is 20. +* colour : str, optional + Bar colour (e.g. 'green', '#00ff00'). +* delay : float, optional + Don't display until [default: 0] seconds have elapsed. + +Extra CLI Options +~~~~~~~~~~~~~~~~~ + +* delim : chr, optional + Delimiting character [default: '\n']. Use '\0' for null. + N.B.: on Windows systems, Python converts '\n' to '\r\n'. +* buf_size : int, optional + String buffer size in bytes [default: 256] + used when ``delim`` is specified. +* bytes : bool, optional + If true, will count bytes, ignore ``delim``, and default + ``unit_scale`` to True, ``unit_divisor`` to 1024, and ``unit`` to 'B'. +* tee : bool, optional + If true, passes ``stdin`` to both ``stderr`` and ``stdout``. +* update : bool, optional + If true, will treat input as newly elapsed iterations, + i.e. numbers to pass to ``update()``. Note that this is slow + (~2e5 it/s) since every input must be decoded as a number. +* update_to : bool, optional + If true, will treat input as total elapsed iterations, + i.e. numbers to assign to ``self.n``. Note that this is slow + (~2e5 it/s) since every input must be decoded as a number. +* null : bool, optional + If true, will discard input (no stdout). +* manpath : str, optional + Directory in which to install tqdm man pages. +* comppath : str, optional + Directory in which to place tqdm completion. +* log : str, optional + CRITICAL|FATAL|ERROR|WARN(ING)|[default: 'INFO']|DEBUG|NOTSET. + +Returns +~~~~~~~ + +* out : decorated iterator. + +.. code:: python + + class tqdm(): + def update(self, n=1): + """ + Manually update the progress bar, useful for streams + such as reading files. + E.g.: + >>> t = tqdm(total=filesize) # Initialise + >>> for current_buffer in stream: + ... ... + ... t.update(len(current_buffer)) + >>> t.close() + The last line is highly recommended, but possibly not necessary if + ``t.update()`` will be called in such a way that ``filesize`` will be + exactly reached and printed. + + Parameters + ---------- + n : int or float, optional + Increment to add to the internal counter of iterations + [default: 1]. If using float, consider specifying ``{n:.3f}`` + or similar in ``bar_format``, or specifying ``unit_scale``. + + Returns + ------- + out : bool or None + True if a ``display()`` was triggered. + """ + + def close(self): + """Cleanup and (if leave=False) close the progressbar.""" + + def clear(self, nomove=False): + """Clear current bar display.""" + + def refresh(self): + """ + Force refresh the display of this bar. + + Parameters + ---------- + nolock : bool, optional + If ``True``, does not lock. + If [default: ``False``]: calls ``acquire()`` on internal lock. + lock_args : tuple, optional + Passed to internal lock's ``acquire()``. + If specified, will only ``display()`` if ``acquire()`` returns ``True``. + """ + + def unpause(self): + """Restart tqdm timer from last print time.""" + + def reset(self, total=None): + """ + Resets to 0 iterations for repeated use. + + Consider combining with ``leave=True``. + + Parameters + ---------- + total : int or float, optional. Total to use for the new bar. + """ + + def set_description(self, desc=None, refresh=True): + """ + Set/modify description of the progress bar. + + Parameters + ---------- + desc : str, optional + refresh : bool, optional + Forces refresh [default: True]. + """ + + def set_postfix(self, ordered_dict=None, refresh=True, **tqdm_kwargs): + """ + Set/modify postfix (additional stats) + with automatic formatting based on datatype. + + Parameters + ---------- + ordered_dict : dict or OrderedDict, optional + refresh : bool, optional + Forces refresh [default: True]. + kwargs : dict, optional + """ + + @classmethod + def write(cls, s, file=sys.stdout, end="\n"): + """Print a message via tqdm (without overlap with bars).""" + + @property + def format_dict(self): + """Public API for read-only member access.""" + + def display(self, msg=None, pos=None): + """ + Use ``self.sp`` to display ``msg`` in the specified ``pos``. + + Consider overloading this function when inheriting to use e.g.: + ``self.some_frontend(**self.format_dict)`` instead of ``self.sp``. + + Parameters + ---------- + msg : str, optional. What to display (default: ``repr(self)``). + pos : int, optional. Position to ``moveto`` + (default: ``abs(self.pos)``). + """ + + @classmethod + @contextmanager + def wrapattr(cls, stream, method, total=None, bytes=True, **tqdm_kwargs): + """ + stream : file-like object. + method : str, "read" or "write". The result of ``read()`` and + the first argument of ``write()`` should have a ``len()``. + + >>> with tqdm.wrapattr(file_obj, "read", total=file_obj.size) as fobj: + ... while True: + ... chunk = fobj.read(chunk_size) + ... if not chunk: + ... break + """ + + @classmethod + def pandas(cls, *targs, **tqdm_kwargs): + """Registers the current `tqdm` class with `pandas`.""" + + def trange(*args, **tqdm_kwargs): + """Shortcut for `tqdm(range(*args), **tqdm_kwargs)`.""" + +Convenience Functions +~~~~~~~~~~~~~~~~~~~~~ + +.. code:: python + + def tqdm.contrib.tenumerate(iterable, start=0, total=None, + tqdm_class=tqdm.auto.tqdm, **tqdm_kwargs): + """Equivalent of `numpy.ndenumerate` or builtin `enumerate`.""" + + def tqdm.contrib.tzip(iter1, *iter2plus, **tqdm_kwargs): + """Equivalent of builtin `zip`.""" + + def tqdm.contrib.tmap(function, *sequences, **tqdm_kwargs): + """Equivalent of builtin `map`.""" + +Submodules +~~~~~~~~~~ + +.. code:: python + + class tqdm.notebook.tqdm(tqdm.tqdm): + """IPython/Jupyter Notebook widget.""" + + class tqdm.auto.tqdm(tqdm.tqdm): + """Automatically chooses beween `tqdm.notebook` and `tqdm.tqdm`.""" + + class tqdm.asyncio.tqdm(tqdm.tqdm): + """Asynchronous version.""" + @classmethod + def as_completed(cls, fs, *, loop=None, timeout=None, total=None, + **tqdm_kwargs): + """Wrapper for `asyncio.as_completed`.""" + + class tqdm.gui.tqdm(tqdm.tqdm): + """Matplotlib GUI version.""" + + class tqdm.tk.tqdm(tqdm.tqdm): + """Tkinter GUI version.""" + + class tqdm.rich.tqdm(tqdm.tqdm): + """`rich.progress` version.""" + + class tqdm.keras.TqdmCallback(keras.callbacks.Callback): + """Keras callback for epoch and batch progress.""" + + class tqdm.dask.TqdmCallback(dask.callbacks.Callback): + """Dask callback for task progress.""" + + +``contrib`` ++++++++++++ + +The ``tqdm.contrib`` package also contains experimental modules: + +- ``tqdm.contrib.itertools``: Thin wrappers around ``itertools`` +- ``tqdm.contrib.concurrent``: Thin wrappers around ``concurrent.futures`` +- ``tqdm.contrib.slack``: Posts to `Slack `__ bots +- ``tqdm.contrib.discord``: Posts to `Discord `__ bots +- ``tqdm.contrib.telegram``: Posts to `Telegram `__ bots +- ``tqdm.contrib.bells``: Automagically enables all optional features + + * ``auto``, ``pandas``, ``slack``, ``discord``, ``telegram`` + +Examples and Advanced Usage +--------------------------- + +- See the `examples `__ + folder; +- import the module and run ``help()``; +- consult the `wiki `__; + + * this has an + `excellent article `__ + on how to make a **great** progressbar; + +- check out the `slides from PyData London `__, or +- run the |binder-demo|. + +Description and additional stats +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Custom information can be displayed and updated dynamically on ``tqdm`` bars +with the ``desc`` and ``postfix`` arguments: + +.. code:: python + + from tqdm import tqdm, trange + from random import random, randint + from time import sleep + + with trange(10) as t: + for i in t: + # Description will be displayed on the left + t.set_description('GEN %i' % i) + # Postfix will be displayed on the right, + # formatted automatically based on argument's datatype + t.set_postfix(loss=random(), gen=randint(1,999), str='h', + lst=[1, 2]) + sleep(0.1) + + with tqdm(total=10, bar_format="{postfix[0]} {postfix[1][value]:>8.2g}", + postfix=["Batch", {"value": 0}]) as t: + for i in range(10): + sleep(0.1) + t.postfix[1]["value"] = i / 2 + t.update() + +Points to remember when using ``{postfix[...]}`` in the ``bar_format`` string: + +- ``postfix`` also needs to be passed as an initial argument in a compatible + format, and +- ``postfix`` will be auto-converted to a string if it is a ``dict``-like + object. To prevent this behaviour, insert an extra item into the dictionary + where the key is not a string. + +Additional ``bar_format`` parameters may also be defined by overriding +``format_dict``, and the bar itself may be modified using ``ascii``: + +.. code:: python + + from tqdm import tqdm + class TqdmExtraFormat(tqdm): + """Provides a `total_time` format parameter""" + @property + def format_dict(self): + d = super().format_dict + total_time = d["elapsed"] * (d["total"] or 0) / max(d["n"], 1) + d.update(total_time=self.format_interval(total_time) + " in total") + return d + + for i in TqdmExtraFormat( + range(9), ascii=" .oO0", + bar_format="{total_time}: {percentage:.0f}%|{bar}{r_bar}"): + if i == 4: + break + +.. code:: + + 00:00 in total: 44%|0000. | 4/9 [00:00<00:00, 962.93it/s] + +Note that ``{bar}`` also supports a format specifier ``[width][type]``. + +- ``width`` + + * unspecified (default): automatic to fill ``ncols`` + * ``int >= 0``: fixed width overriding ``ncols`` logic + * ``int < 0``: subtract from the automatic default + +- ``type`` + + * ``a``: ascii (``ascii=True`` override) + * ``u``: unicode (``ascii=False`` override) + * ``b``: blank (``ascii=" "`` override) + +This means a fixed bar with right-justified text may be created by using: +``bar_format="{l_bar}{bar:10}|{bar:-10b}right-justified"`` + +Nested progress bars +~~~~~~~~~~~~~~~~~~~~ + +``tqdm`` supports nested progress bars. Here's an example: + +.. code:: python + + from tqdm.auto import trange + from time import sleep + + for i in trange(4, desc='1st loop'): + for j in trange(5, desc='2nd loop'): + for k in trange(50, desc='3rd loop', leave=False): + sleep(0.01) + +For manual control over positioning (e.g. for multi-processing use), +you may specify ``position=n`` where ``n=0`` for the outermost bar, +``n=1`` for the next, and so on. +However, it's best to check if ``tqdm`` can work without manual ``position`` +first. + +.. code:: python + + from time import sleep + from tqdm import trange, tqdm + from multiprocessing import Pool, RLock, freeze_support + + L = list(range(9)) + + def progresser(n): + interval = 0.001 / (n + 2) + total = 5000 + text = f"#{n}, est. {interval * total:<04.2}s" + for _ in trange(total, desc=text, position=n): + sleep(interval) + + if __name__ == '__main__': + freeze_support() # for Windows support + tqdm.set_lock(RLock()) # for managing output contention + p = Pool(initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),)) + p.map(progresser, L) + +Note that in Python 3, ``tqdm.write`` is thread-safe: + +.. code:: python + + from time import sleep + from tqdm import tqdm, trange + from concurrent.futures import ThreadPoolExecutor + + L = list(range(9)) + + def progresser(n): + interval = 0.001 / (n + 2) + total = 5000 + text = f"#{n}, est. {interval * total:<04.2}s" + for _ in trange(total, desc=text): + sleep(interval) + if n == 6: + tqdm.write("n == 6 completed.") + tqdm.write("`tqdm.write()` is thread-safe in py3!") + + if __name__ == '__main__': + with ThreadPoolExecutor() as p: + p.map(progresser, L) + +Hooks and callbacks +~~~~~~~~~~~~~~~~~~~ + +``tqdm`` can easily support callbacks/hooks and manual updates. +Here's an example with ``urllib``: + +**``urllib.urlretrieve`` documentation** + + | [...] + | If present, the hook function will be called once + | on establishment of the network connection and once after each block read + | thereafter. The hook will be passed three arguments; a count of blocks + | transferred so far, a block size in bytes, and the total size of the file. + | [...] + +.. code:: python + + import urllib, os + from tqdm import tqdm + urllib = getattr(urllib, 'request', urllib) + + class TqdmUpTo(tqdm): + """Provides `update_to(n)` which uses `tqdm.update(delta_n)`.""" + def update_to(self, b=1, bsize=1, tsize=None): + """ + b : int, optional + Number of blocks transferred so far [default: 1]. + bsize : int, optional + Size of each block (in tqdm units) [default: 1]. + tsize : int, optional + Total size (in tqdm units). If [default: None] remains unchanged. + """ + if tsize is not None: + self.total = tsize + return self.update(b * bsize - self.n) # also sets self.n = b * bsize + + eg_link = "https://caspersci.uk.to/matryoshka.zip" + with TqdmUpTo(unit='B', unit_scale=True, unit_divisor=1024, miniters=1, + desc=eg_link.split('/')[-1]) as t: # all optional kwargs + urllib.urlretrieve(eg_link, filename=os.devnull, + reporthook=t.update_to, data=None) + t.total = t.n + +Inspired by `twine#242 `__. +Functional alternative in +`examples/tqdm_wget.py `__. + +It is recommend to use ``miniters=1`` whenever there is potentially +large differences in iteration speed (e.g. downloading a file over +a patchy connection). + +**Wrapping read/write methods** + +To measure throughput through a file-like object's ``read`` or ``write`` +methods, use ``CallbackIOWrapper``: + +.. code:: python + + from tqdm.auto import tqdm + from tqdm.utils import CallbackIOWrapper + + with tqdm(total=file_obj.size, + unit='B', unit_scale=True, unit_divisor=1024) as t: + fobj = CallbackIOWrapper(t.update, file_obj, "read") + while True: + chunk = fobj.read(chunk_size) + if not chunk: + break + t.reset() + # ... continue to use `t` for something else + +Alternatively, use the even simpler ``wrapattr`` convenience function, +which would condense both the ``urllib`` and ``CallbackIOWrapper`` examples +down to: + +.. code:: python + + import urllib, os + from tqdm import tqdm + + eg_link = "https://caspersci.uk.to/matryoshka.zip" + response = getattr(urllib, 'request', urllib).urlopen(eg_link) + with tqdm.wrapattr(open(os.devnull, "wb"), "write", + miniters=1, desc=eg_link.split('/')[-1], + total=getattr(response, 'length', None)) as fout: + for chunk in response: + fout.write(chunk) + +The ``requests`` equivalent is nearly identical: + +.. code:: python + + import requests, os + from tqdm import tqdm + + eg_link = "https://caspersci.uk.to/matryoshka.zip" + response = requests.get(eg_link, stream=True) + with tqdm.wrapattr(open(os.devnull, "wb"), "write", + miniters=1, desc=eg_link.split('/')[-1], + total=int(response.headers.get('content-length', 0))) as fout: + for chunk in response.iter_content(chunk_size=4096): + fout.write(chunk) + +**Custom callback** + +``tqdm`` is known for intelligently skipping unnecessary displays. To make a +custom callback take advantage of this, simply use the return value of +``update()``. This is set to ``True`` if a ``display()`` was triggered. + +.. code:: python + + from tqdm.auto import tqdm as std_tqdm + + def external_callback(*args, **kwargs): + ... + + class TqdmExt(std_tqdm): + def update(self, n=1): + displayed = super().update(n) + if displayed: + external_callback(**self.format_dict) + return displayed + +``asyncio`` +~~~~~~~~~~~ + +Note that ``break`` isn't currently caught by asynchronous iterators. +This means that ``tqdm`` cannot clean up after itself in this case: + +.. code:: python + + from tqdm.asyncio import tqdm + + async for i in tqdm(range(9)): + if i == 2: + break + +Instead, either call ``pbar.close()`` manually or use the context manager syntax: + +.. code:: python + + from tqdm.asyncio import tqdm + + with tqdm(range(9)) as pbar: + async for i in pbar: + if i == 2: + break + +Pandas Integration +~~~~~~~~~~~~~~~~~~ + +Due to popular demand we've added support for ``pandas`` -- here's an example +for ``DataFrame.progress_apply`` and ``DataFrameGroupBy.progress_apply``: + +.. code:: python + + import pandas as pd + import numpy as np + from tqdm import tqdm + + df = pd.DataFrame(np.random.randint(0, 100, (100000, 6))) + + # Register `pandas.progress_apply` and `pandas.Series.map_apply` with `tqdm` + # (can use `tqdm.gui.tqdm`, `tqdm.notebook.tqdm`, optional kwargs, etc.) + tqdm.pandas(desc="my bar!") + + # Now you can use `progress_apply` instead of `apply` + # and `progress_map` instead of `map` + df.progress_apply(lambda x: x**2) + # can also groupby: + # df.groupby(0).progress_apply(lambda x: x**2) + +In case you're interested in how this works (and how to modify it for your +own callbacks), see the +`examples `__ +folder or import the module and run ``help()``. + +Keras Integration +~~~~~~~~~~~~~~~~~ + +A ``keras`` callback is also available: + +.. code:: python + + from tqdm.keras import TqdmCallback + + ... + + model.fit(..., verbose=0, callbacks=[TqdmCallback()]) + +Dask Integration +~~~~~~~~~~~~~~~~ + +A ``dask`` callback is also available: + +.. code:: python + + from tqdm.dask import TqdmCallback + + with TqdmCallback(desc="compute"): + ... + arr.compute() + + # or use callback globally + cb = TqdmCallback(desc="global") + cb.register() + arr.compute() + +IPython/Jupyter Integration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +IPython/Jupyter is supported via the ``tqdm.notebook`` submodule: + +.. code:: python + + from tqdm.notebook import trange, tqdm + from time import sleep + + for i in trange(3, desc='1st loop'): + for j in tqdm(range(100), desc='2nd loop'): + sleep(0.01) + +In addition to ``tqdm`` features, the submodule provides a native Jupyter +widget (compatible with IPython v1-v4 and Jupyter), fully working nested bars +and colour hints (blue: normal, green: completed, red: error/interrupt, +light blue: no ETA); as demonstrated below. + +|Screenshot-Jupyter1| +|Screenshot-Jupyter2| +|Screenshot-Jupyter3| + +The ``notebook`` version supports percentage or pixels for overall width +(e.g.: ``ncols='100%'`` or ``ncols='480px'``). + +It is also possible to let ``tqdm`` automatically choose between +console or notebook versions by using the ``autonotebook`` submodule: + +.. code:: python + + from tqdm.autonotebook import tqdm + tqdm.pandas() + +Note that this will issue a ``TqdmExperimentalWarning`` if run in a notebook +since it is not meant to be possible to distinguish between ``jupyter notebook`` +and ``jupyter console``. Use ``auto`` instead of ``autonotebook`` to suppress +this warning. + +Note that notebooks will display the bar in the cell where it was created. +This may be a different cell from the one where it is used. +If this is not desired, either + +- delay the creation of the bar to the cell where it must be displayed, or +- create the bar with ``display=False``, and in a later cell call + ``display(bar.container)``: + +.. code:: python + + from tqdm.notebook import tqdm + pbar = tqdm(..., display=False) + +.. code:: python + + # different cell + display(pbar.container) + +The ``keras`` callback has a ``display()`` method which can be used likewise: + +.. code:: python + + from tqdm.keras import TqdmCallback + cbk = TqdmCallback(display=False) + +.. code:: python + + # different cell + cbk.display() + model.fit(..., verbose=0, callbacks=[cbk]) + +Another possibility is to have a single bar (near the top of the notebook) +which is constantly re-used (using ``reset()`` rather than ``close()``). +For this reason, the notebook version (unlike the CLI version) does not +automatically call ``close()`` upon ``Exception``. + +.. code:: python + + from tqdm.notebook import tqdm + pbar = tqdm() + +.. code:: python + + # different cell + iterable = range(100) + pbar.reset(total=len(iterable)) # initialise with new `total` + for i in iterable: + pbar.update() + pbar.refresh() # force print final status but don't `close()` + +Custom Integration +~~~~~~~~~~~~~~~~~~ + +To change the default arguments (such as making ``dynamic_ncols=True``), +simply use built-in Python magic: + +.. code:: python + + from functools import partial + from tqdm import tqdm as std_tqdm + tqdm = partial(std_tqdm, dynamic_ncols=True) + +For further customisation, +``tqdm`` may be inherited from to create custom callbacks (as with the +``TqdmUpTo`` example `above <#hooks-and-callbacks>`__) or for custom frontends +(e.g. GUIs such as notebook or plotting packages). In the latter case: + +1. ``def __init__()`` to call ``super().__init__(..., gui=True)`` to disable + terminal ``status_printer`` creation. +2. Redefine: ``close()``, ``clear()``, ``display()``. + +Consider overloading ``display()`` to use e.g. +``self.frontend(**self.format_dict)`` instead of ``self.sp(repr(self))``. + +Some submodule examples of inheritance: + +- `tqdm/notebook.py `__ +- `tqdm/gui.py `__ +- `tqdm/tk.py `__ +- `tqdm/contrib/slack.py `__ +- `tqdm/contrib/discord.py `__ +- `tqdm/contrib/telegram.py `__ + +Dynamic Monitor/Meter +~~~~~~~~~~~~~~~~~~~~~ + +You can use a ``tqdm`` as a meter which is not monotonically increasing. +This could be because ``n`` decreases (e.g. a CPU usage monitor) or ``total`` +changes. + +One example would be recursively searching for files. The ``total`` is the +number of objects found so far, while ``n`` is the number of those objects which +are files (rather than folders): + +.. code:: python + + from tqdm import tqdm + import os.path + + def find_files_recursively(path, show_progress=True): + files = [] + # total=1 assumes `path` is a file + t = tqdm(total=1, unit="file", disable=not show_progress) + if not os.path.exists(path): + raise IOError("Cannot find:" + path) + + def append_found_file(f): + files.append(f) + t.update() + + def list_found_dir(path): + """returns os.listdir(path) assuming os.path.isdir(path)""" + listing = os.listdir(path) + # subtract 1 since a "file" we found was actually this directory + t.total += len(listing) - 1 + # fancy way to give info without forcing a refresh + t.set_postfix(dir=path[-10:], refresh=False) + t.update(0) # may trigger a refresh + return listing + + def recursively_search(path): + if os.path.isdir(path): + for f in list_found_dir(path): + recursively_search(os.path.join(path, f)) + else: + append_found_file(path) + + recursively_search(path) + t.set_postfix(dir=path) + t.close() + return files + +Using ``update(0)`` is a handy way to let ``tqdm`` decide when to trigger a +display refresh to avoid console spamming. + +Writing messages +~~~~~~~~~~~~~~~~ + +This is a work in progress (see +`#737 `__). + +Since ``tqdm`` uses a simple printing mechanism to display progress bars, +you should not write any message in the terminal using ``print()`` while +a progressbar is open. + +To write messages in the terminal without any collision with ``tqdm`` bar +display, a ``.write()`` method is provided: + +.. code:: python + + from tqdm.auto import tqdm, trange + from time import sleep + + bar = trange(10) + for i in bar: + # Print using tqdm class method .write() + sleep(0.1) + if not (i % 3): + tqdm.write("Done task %i" % i) + # Can also use bar.write() + +By default, this will print to standard output ``sys.stdout``. but you can +specify any file-like object using the ``file`` argument. For example, this +can be used to redirect the messages writing to a log file or class. + +Redirecting writing +~~~~~~~~~~~~~~~~~~~ + +If using a library that can print messages to the console, editing the library +by replacing ``print()`` with ``tqdm.write()`` may not be desirable. +In that case, redirecting ``sys.stdout`` to ``tqdm.write()`` is an option. + +To redirect ``sys.stdout``, create a file-like class that will write +any input string to ``tqdm.write()``, and supply the arguments +``file=sys.stdout, dynamic_ncols=True``. + +A reusable canonical example is given below: + +.. code:: python + + from time import sleep + import contextlib + import sys + from tqdm import tqdm + from tqdm.contrib import DummyTqdmFile + + + @contextlib.contextmanager + def std_out_err_redirect_tqdm(): + orig_out_err = sys.stdout, sys.stderr + try: + sys.stdout, sys.stderr = map(DummyTqdmFile, orig_out_err) + yield orig_out_err[0] + # Relay exceptions + except Exception as exc: + raise exc + # Always restore sys.stdout/err if necessary + finally: + sys.stdout, sys.stderr = orig_out_err + + def some_fun(i): + print("Fee, fi, fo,".split()[i]) + + # Redirect stdout to tqdm.write() (don't forget the `as save_stdout`) + with std_out_err_redirect_tqdm() as orig_stdout: + # tqdm needs the original stdout + # and dynamic_ncols=True to autodetect console width + for i in tqdm(range(3), file=orig_stdout, dynamic_ncols=True): + sleep(.5) + some_fun(i) + + # After the `with`, printing is restored + print("Done!") + +Redirecting ``logging`` +~~~~~~~~~~~~~~~~~~~~~~~ + +Similar to ``sys.stdout``/``sys.stderr`` as detailed above, console ``logging`` +may also be redirected to ``tqdm.write()``. + +Warning: if also redirecting ``sys.stdout``/``sys.stderr``, make sure to +redirect ``logging`` first if needed. + +Helper methods are available in ``tqdm.contrib.logging``. For example: + +.. code:: python + + import logging + from tqdm import trange + from tqdm.contrib.logging import logging_redirect_tqdm + + LOG = logging.getLogger(__name__) + + if __name__ == '__main__': + logging.basicConfig(level=logging.INFO) + with logging_redirect_tqdm(): + for i in trange(9): + if i == 4: + LOG.info("console logging redirected to `tqdm.write()`") + # logging restored + +Monitoring thread, intervals and miniters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``tqdm`` implements a few tricks to increase efficiency and reduce overhead. + +- Avoid unnecessary frequent bar refreshing: ``mininterval`` defines how long + to wait between each refresh. ``tqdm`` always gets updated in the background, + but it will display only every ``mininterval``. +- Reduce number of calls to check system clock/time. +- ``mininterval`` is more intuitive to configure than ``miniters``. + A clever adjustment system ``dynamic_miniters`` will automatically adjust + ``miniters`` to the amount of iterations that fit into time ``mininterval``. + Essentially, ``tqdm`` will check if it's time to print without actually + checking time. This behaviour can be still be bypassed by manually setting + ``miniters``. + +However, consider a case with a combination of fast and slow iterations. +After a few fast iterations, ``dynamic_miniters`` will set ``miniters`` to a +large number. When iteration rate subsequently slows, ``miniters`` will +remain large and thus reduce display update frequency. To address this: + +- ``maxinterval`` defines the maximum time between display refreshes. + A concurrent monitoring thread checks for overdue updates and forces one + where necessary. + +The monitoring thread should not have a noticeable overhead, and guarantees +updates at least every 10 seconds by default. +This value can be directly changed by setting the ``monitor_interval`` of +any ``tqdm`` instance (i.e. ``t = tqdm.tqdm(...); t.monitor_interval = 2``). +The monitor thread may be disabled application-wide by setting +``tqdm.tqdm.monitor_interval = 0`` before instantiation of any ``tqdm`` bar. + + +Merch +----- + +You can buy `tqdm branded merch `__ now! + +Contributions +------------- + +|GitHub-Commits| |GitHub-Issues| |GitHub-PRs| |OpenHub-Status| |GitHub-Contributions| |CII Best Practices| + +All source code is hosted on `GitHub `__. +Contributions are welcome. + +See the +`CONTRIBUTING `__ +file for more information. + +Developers who have made significant contributions, ranked by *SLoC* +(surviving lines of code, +`git fame `__ ``-wMC --excl '\.(png|gif|jpg)$'``), +are: + +==================== ======================================================== ==== ================================ +Name ID SLoC Notes +==================== ======================================================== ==== ================================ +Casper da Costa-Luis `casperdcl `__ ~80% primary maintainer |Gift-Casper| +Stephen Larroque `lrq3000 `__ ~9% team member +Martin Zugnoni `martinzugnoni `__ ~3% +Daniel Ecer `de-code `__ ~2% +Richard Sheridan `richardsheridan `__ ~1% +Guangshuo Chen `chengs `__ ~1% +Helio Machado `0x2b3bfa0 `__ ~1% +Kyle Altendorf `altendky `__ <1% +Noam Yorav-Raphael `noamraph `__ <1% original author +Matthew Stevens `mjstevens777 `__ <1% +Hadrien Mary `hadim `__ <1% team member +Mikhail Korobov `kmike `__ <1% team member +==================== ======================================================== ==== ================================ + +Ports to Other Languages +~~~~~~~~~~~~~~~~~~~~~~~~ + +A list is available on +`this wiki page `__. + + +LICENCE +------- + +Open Source (OSI approved): |LICENCE| + +Citation information: |DOI| + +|README-Hits| (Since 19 May 2016) + +.. |Logo| image:: https://tqdm.github.io/img/logo.gif +.. |Screenshot| image:: https://tqdm.github.io/img/tqdm.gif +.. |Video| image:: https://tqdm.github.io/img/video.jpg + :target: https://tqdm.github.io/video +.. |Slides| image:: https://tqdm.github.io/img/slides.jpg + :target: https://tqdm.github.io/PyData2019/slides.html +.. |Merch| image:: https://tqdm.github.io/img/merch.jpg + :target: https://tqdm.github.io/merch +.. |Build-Status| image:: https://img.shields.io/github/actions/workflow/status/tqdm/tqdm/test.yml?branch=master&label=tqdm&logo=GitHub + :target: https://github.com/tqdm/tqdm/actions/workflows/test.yml +.. |Coverage-Status| image:: https://img.shields.io/coveralls/github/tqdm/tqdm/master?logo=coveralls + :target: https://coveralls.io/github/tqdm/tqdm +.. |Branch-Coverage-Status| image:: https://codecov.io/gh/tqdm/tqdm/branch/master/graph/badge.svg + :target: https://codecov.io/gh/tqdm/tqdm +.. |Codacy-Grade| image:: https://app.codacy.com/project/badge/Grade/3f965571598f44549c7818f29cdcf177 + :target: https://www.codacy.com/gh/tqdm/tqdm/dashboard +.. |CII Best Practices| image:: https://bestpractices.coreinfrastructure.org/projects/3264/badge + :target: https://bestpractices.coreinfrastructure.org/projects/3264 +.. |GitHub-Status| image:: https://img.shields.io/github/tag/tqdm/tqdm.svg?maxAge=86400&logo=github&logoColor=white + :target: https://github.com/tqdm/tqdm/releases +.. |GitHub-Forks| image:: https://img.shields.io/github/forks/tqdm/tqdm.svg?logo=github&logoColor=white + :target: https://github.com/tqdm/tqdm/network +.. |GitHub-Stars| image:: https://img.shields.io/github/stars/tqdm/tqdm.svg?logo=github&logoColor=white + :target: https://github.com/tqdm/tqdm/stargazers +.. |GitHub-Commits| image:: https://img.shields.io/github/commit-activity/y/tqdm/tqdm.svg?logo=git&logoColor=white + :target: https://github.com/tqdm/tqdm/graphs/commit-activity +.. |GitHub-Issues| image:: https://img.shields.io/github/issues-closed/tqdm/tqdm.svg?logo=github&logoColor=white + :target: https://github.com/tqdm/tqdm/issues?q= +.. |GitHub-PRs| image:: https://img.shields.io/github/issues-pr-closed/tqdm/tqdm.svg?logo=github&logoColor=white + :target: https://github.com/tqdm/tqdm/pulls +.. |GitHub-Contributions| image:: https://img.shields.io/github/contributors/tqdm/tqdm.svg?logo=github&logoColor=white + :target: https://github.com/tqdm/tqdm/graphs/contributors +.. |GitHub-Updated| image:: https://img.shields.io/github/last-commit/tqdm/tqdm/master.svg?logo=github&logoColor=white&label=pushed + :target: https://github.com/tqdm/tqdm/pulse +.. |Gift-Casper| image:: https://img.shields.io/badge/dynamic/json.svg?color=ff69b4&label=gifts%20received&prefix=%C2%A3&query=%24..sum&url=https%3A%2F%2Fcaspersci.uk.to%2Fgifts.json + :target: https://cdcl.ml/sponsor +.. |Versions| image:: https://img.shields.io/pypi/v/tqdm.svg + :target: https://tqdm.github.io/releases +.. |PyPI-Downloads| image:: https://img.shields.io/pypi/dm/tqdm.svg?label=pypi%20downloads&logo=PyPI&logoColor=white + :target: https://pepy.tech/project/tqdm +.. |Py-Versions| image:: https://img.shields.io/pypi/pyversions/tqdm.svg?logo=python&logoColor=white + :target: https://pypi.org/project/tqdm +.. |Conda-Forge-Status| image:: https://img.shields.io/conda/v/conda-forge/tqdm.svg?label=conda-forge&logo=conda-forge + :target: https://anaconda.org/conda-forge/tqdm +.. |Snapcraft| image:: https://img.shields.io/badge/snap-install-82BEA0.svg?logo=snapcraft + :target: https://snapcraft.io/tqdm +.. |Docker| image:: https://img.shields.io/badge/docker-pull-blue.svg?logo=docker&logoColor=white + :target: https://hub.docker.com/r/tqdm/tqdm +.. |Libraries-Rank| image:: https://img.shields.io/librariesio/sourcerank/pypi/tqdm.svg?logo=koding&logoColor=white + :target: https://libraries.io/pypi/tqdm +.. |Libraries-Dependents| image:: https://img.shields.io/librariesio/dependent-repos/pypi/tqdm.svg?logo=koding&logoColor=white + :target: https://github.com/tqdm/tqdm/network/dependents +.. |OpenHub-Status| image:: https://www.openhub.net/p/tqdm/widgets/project_thin_badge?format=gif + :target: https://www.openhub.net/p/tqdm?ref=Thin+badge +.. |awesome-python| image:: https://awesome.re/mentioned-badge.svg + :target: https://github.com/vinta/awesome-python +.. |LICENCE| image:: https://img.shields.io/pypi/l/tqdm.svg + :target: https://raw.githubusercontent.com/tqdm/tqdm/master/LICENCE +.. |DOI| image:: https://img.shields.io/badge/DOI-10.5281/zenodo.595120-blue.svg + :target: https://doi.org/10.5281/zenodo.595120 +.. |binder-demo| image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/tqdm/tqdm/master?filepath=DEMO.ipynb +.. |Screenshot-Jupyter1| image:: https://tqdm.github.io/img/jupyter-1.gif +.. |Screenshot-Jupyter2| image:: https://tqdm.github.io/img/jupyter-2.gif +.. |Screenshot-Jupyter3| image:: https://tqdm.github.io/img/jupyter-3.gif +.. |README-Hits| image:: https://cgi.cdcl.ml/hits?q=tqdm&style=social&r=https://github.com/tqdm/tqdm&l=https://tqdm.github.io/img/favicon.png&f=https://tqdm.github.io/img/logo.gif + :target: https://cgi.cdcl.ml/hits?q=tqdm&a=plot&r=https://github.com/tqdm/tqdm&l=https://tqdm.github.io/img/favicon.png&f=https://tqdm.github.io/img/logo.gif&style=social diff --git a/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/RECORD b/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..009eab6dc153b8ba0d25bd469d60653350ccef4f --- /dev/null +++ b/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/RECORD @@ -0,0 +1,74 @@ +../../../bin/tqdm,sha256=1pQl4yWSuZeZRvAmARQ0Gvkv_swBIYMQN4sjsnzHQp0,235 +tqdm-4.67.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +tqdm-4.67.1.dist-info/LICENCE,sha256=3DMlLoKQFeOxUAhvubOkD2rW-zLC9GEM6BL6Z301mGo,1985 +tqdm-4.67.1.dist-info/METADATA,sha256=aIoWMt9SWhmP7FLc_vsSRtMerO6cA1qsrC1-r42P9mk,57675 +tqdm-4.67.1.dist-info/RECORD,, +tqdm-4.67.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91 +tqdm-4.67.1.dist-info/entry_points.txt,sha256=ReJCH7Ui3Zyh6M16E4OhsZ1oU7WtMXCfbtoyBhGO29Y,39 +tqdm-4.67.1.dist-info/top_level.txt,sha256=NLiUJNfmc9At15s7JURiwvqMEjUi9G5PMGRrmMYzNSM,5 +tqdm/__init__.py,sha256=9mQNYSSqP99JasubEC1POJLMmhkkBH6cJZxPIR5G2pQ,1572 +tqdm/__main__.py,sha256=bYt9eEaoRQWdejEHFD8REx9jxVEdZptECFsV7F49Ink,30 +tqdm/__pycache__/__init__.cpython-313.pyc,, +tqdm/__pycache__/__main__.cpython-313.pyc,, +tqdm/__pycache__/_dist_ver.cpython-313.pyc,, +tqdm/__pycache__/_main.cpython-313.pyc,, +tqdm/__pycache__/_monitor.cpython-313.pyc,, +tqdm/__pycache__/_tqdm.cpython-313.pyc,, +tqdm/__pycache__/_tqdm_gui.cpython-313.pyc,, +tqdm/__pycache__/_tqdm_notebook.cpython-313.pyc,, +tqdm/__pycache__/_tqdm_pandas.cpython-313.pyc,, +tqdm/__pycache__/_utils.cpython-313.pyc,, +tqdm/__pycache__/asyncio.cpython-313.pyc,, +tqdm/__pycache__/auto.cpython-313.pyc,, +tqdm/__pycache__/autonotebook.cpython-313.pyc,, +tqdm/__pycache__/cli.cpython-313.pyc,, +tqdm/__pycache__/dask.cpython-313.pyc,, +tqdm/__pycache__/gui.cpython-313.pyc,, +tqdm/__pycache__/keras.cpython-313.pyc,, +tqdm/__pycache__/notebook.cpython-313.pyc,, +tqdm/__pycache__/rich.cpython-313.pyc,, +tqdm/__pycache__/std.cpython-313.pyc,, +tqdm/__pycache__/tk.cpython-313.pyc,, +tqdm/__pycache__/utils.cpython-313.pyc,, +tqdm/__pycache__/version.cpython-313.pyc,, +tqdm/_dist_ver.py,sha256=m5AdYI-jB-v6P0VJ_70isH_p24EzSOGSwVvuAZmkmKY,23 +tqdm/_main.py,sha256=9ySvgmi_2Sw4CAo5UDW0Q2dxfTryboEWGHohfCJz0sA,283 +tqdm/_monitor.py,sha256=Uku-DPWgzJ7dO5CK08xKJK-E_F6qQ-JB3ksuXczSYR0,3699 +tqdm/_tqdm.py,sha256=LfLCuJ6bpsVo9xilmtBXyEm1vGnUCFrliW85j3J-nD4,283 +tqdm/_tqdm_gui.py,sha256=03Hc8KayxJveieI5-0-2NGiDpLvw9jZekofJUV7CCwk,287 +tqdm/_tqdm_notebook.py,sha256=BuHiLuxu6uEfZFaPJW3RPpPaxaVctEQA3kdSJSDL1hw,307 +tqdm/_tqdm_pandas.py,sha256=c9jptUgigN6axRDhRd4Rif98Tmxeopc1nFNFhIpbFUE,888 +tqdm/_utils.py,sha256=_4E73bfDj4f1s3sM42NLHNrZDOkijZoWq-n6xWLkdZ8,553 +tqdm/asyncio.py,sha256=Kp2rSkNRf9KRqa3d9YpgeZQ7L7EZf2Ki4bSc7UPIyoo,2757 +tqdm/auto.py,sha256=nDZflj6p2zKkjBCNBourrhS81zYfZy1_dQvbckrdW8o,871 +tqdm/autonotebook.py,sha256=Yb9F5uaiBPhfbDDFpbtoG8I2YUw3uQJ89rUDLbfR6ws,956 +tqdm/cli.py,sha256=SbKlN8QyZ2ogenqt-wT_p6_sx2OOdCjCyhoZBFnlmyI,11010 +tqdm/completion.sh,sha256=j79KbSmpIj_E11jfTfBXrGnUTzKXVpQ1vGVQvsyDRl4,946 +tqdm/contrib/__init__.py,sha256=OgSwVXm-vlDJ-2imtoQ9z8qdom4snMSRztH72KMA82A,2494 +tqdm/contrib/__pycache__/__init__.cpython-313.pyc,, +tqdm/contrib/__pycache__/bells.cpython-313.pyc,, +tqdm/contrib/__pycache__/concurrent.cpython-313.pyc,, +tqdm/contrib/__pycache__/discord.cpython-313.pyc,, +tqdm/contrib/__pycache__/itertools.cpython-313.pyc,, +tqdm/contrib/__pycache__/logging.cpython-313.pyc,, +tqdm/contrib/__pycache__/slack.cpython-313.pyc,, +tqdm/contrib/__pycache__/telegram.cpython-313.pyc,, +tqdm/contrib/__pycache__/utils_worker.cpython-313.pyc,, +tqdm/contrib/bells.py,sha256=Yx1HqGCmHrESCAO700j5wE__JCleNODJxedh1ijPLD0,837 +tqdm/contrib/concurrent.py,sha256=K1yjloKS5WRNFyjLRth0DmU5PAnDbF0A-GD27N-J4a8,3986 +tqdm/contrib/discord.py,sha256=MtVIL1s_dxH21G4sL8FBgQ4Wei23ho9Ek5T-AommvNc,5243 +tqdm/contrib/itertools.py,sha256=WdKKQU5eSzsqHu29SN_oH12huYZo0Jihqoi9-nVhwz4,774 +tqdm/contrib/logging.py,sha256=NsYtnKttj2mMrGm58mEdo5a9DP_2vv8pZyrimSuWulA,3760 +tqdm/contrib/slack.py,sha256=eP_Mr5sQonYniHxxQNGue3jk2JkIPmPWFZqIYxnOui0,4007 +tqdm/contrib/telegram.py,sha256=vn_9SATMbbwn2PAbzSDyOX6av3eBB01QBug11P4H-Og,5008 +tqdm/contrib/utils_worker.py,sha256=HJP5Mz1S1xyzEke2JaqJ2sYLHXADYoo2epT5AzQ38eA,1207 +tqdm/dask.py,sha256=9Ei58eVqTossRLhAfWyUFCduXYKjmLmwkaXIy-CHYfs,1319 +tqdm/gui.py,sha256=STIB3K8iDzDgkNUqWIpvcI_u0OGtbGNy5NwpALXhfWs,5479 +tqdm/keras.py,sha256=op9sBkb6q6c6dw2wJ0SD2ZwpPK7yM1Vbg4l1Qiy3MIo,4373 +tqdm/notebook.py,sha256=GtZ3IapLL1v8WNDaTSvPw0bJGTyfp71Vfz5HDnAzx1M,10895 +tqdm/rich.py,sha256=YyMPkEHVyYUVUR3adJKbVX26iTmNKpNMf3DEqmm-m60,5021 +tqdm/std.py,sha256=tWjz6-QCa92aqYjz7PIdkLUCAfiy-lJZheBtZyIIyO0,57461 +tqdm/tk.py,sha256=Gu0uwXwLCGPRGHORdi3WvBLGiseUp_xxX_h_gp9VpK0,6701 +tqdm/tqdm.1,sha256=aILyUPk2S4OPe_uWy2P4AMjUf0oQ6PUW0nLYXB-BWwI,7889 +tqdm/utils.py,sha256=6E0BQw3Sg7uGWKBM_cDn3P42tXswRhzkggbhBgLDjl8,11821 +tqdm/version.py,sha256=-1yWjfu3P0eghVsysHH07fbzdiADNRdzRtYPqOaqR2A,333 diff --git a/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/WHEEL b/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..ae527e7d64811439e61b93aa375defb30e06edfe --- /dev/null +++ b/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: setuptools (75.6.0) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/entry_points.txt b/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/entry_points.txt new file mode 100644 index 0000000000000000000000000000000000000000..540e60f4e073bc53a5f0a521a3639e0d80780af4 --- /dev/null +++ b/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +tqdm = tqdm.cli:main diff --git a/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/top_level.txt b/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..78620c472c9d799a14ccb02a0233f4669b3bcdcb --- /dev/null +++ b/meow/lib/python3.13/site-packages/tqdm-4.67.1.dist-info/top_level.txt @@ -0,0 +1 @@ +tqdm diff --git a/meow/lib/python3.13/site-packages/urllib3-2.3.0.dist-info/INSTALLER b/meow/lib/python3.13/site-packages/urllib3-2.3.0.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/meow/lib/python3.13/site-packages/urllib3-2.3.0.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/meow/lib/python3.13/site-packages/urllib3-2.3.0.dist-info/METADATA b/meow/lib/python3.13/site-packages/urllib3-2.3.0.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..d2064b9253f9c1d53d31096c87ea880b433f2e20 --- /dev/null +++ b/meow/lib/python3.13/site-packages/urllib3-2.3.0.dist-info/METADATA @@ -0,0 +1,154 @@ +Metadata-Version: 2.4 +Name: urllib3 +Version: 2.3.0 +Summary: HTTP library with thread-safe connection pooling, file post, and more. +Project-URL: Changelog, https://github.com/urllib3/urllib3/blob/main/CHANGES.rst +Project-URL: Documentation, https://urllib3.readthedocs.io +Project-URL: Code, https://github.com/urllib3/urllib3 +Project-URL: Issue tracker, https://github.com/urllib3/urllib3/issues +Author-email: Andrey Petrov +Maintainer-email: Seth Michael Larson , Quentin Pradet , Illia Volochii +License-File: LICENSE.txt +Keywords: filepost,http,httplib,https,pooling,ssl,threadsafe,urllib +Classifier: Environment :: Web Environment +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Internet :: WWW/HTTP +Classifier: Topic :: Software Development :: Libraries +Requires-Python: >=3.9 +Provides-Extra: brotli +Requires-Dist: brotli>=1.0.9; (platform_python_implementation == 'CPython') and extra == 'brotli' +Requires-Dist: brotlicffi>=0.8.0; (platform_python_implementation != 'CPython') and extra == 'brotli' +Provides-Extra: h2 +Requires-Dist: h2<5,>=4; extra == 'h2' +Provides-Extra: socks +Requires-Dist: pysocks!=1.5.7,<2.0,>=1.5.6; extra == 'socks' +Provides-Extra: zstd +Requires-Dist: zstandard>=0.18.0; extra == 'zstd' +Description-Content-Type: text/markdown + +

+ +![urllib3](https://github.com/urllib3/urllib3/raw/main/docs/_static/banner_github.svg) + +

+ +

+ PyPI Version + Python Versions + Join our Discord + Coverage Status + Build Status on GitHub + Documentation Status
+ OpenSSF Scorecard + SLSA 3 + CII Best Practices +

+ +urllib3 is a powerful, *user-friendly* HTTP client for Python. Much of the +Python ecosystem already uses urllib3 and you should too. +urllib3 brings many critical features that are missing from the Python +standard libraries: + +- Thread safety. +- Connection pooling. +- Client-side SSL/TLS verification. +- File uploads with multipart encoding. +- Helpers for retrying requests and dealing with HTTP redirects. +- Support for gzip, deflate, brotli, and zstd encoding. +- Proxy support for HTTP and SOCKS. +- 100% test coverage. + +urllib3 is powerful and easy to use: + +```python3 +>>> import urllib3 +>>> resp = urllib3.request("GET", "http://httpbin.org/robots.txt") +>>> resp.status +200 +>>> resp.data +b"User-agent: *\nDisallow: /deny\n" +``` + +## Installing + +urllib3 can be installed with [pip](https://pip.pypa.io): + +```bash +$ python -m pip install urllib3 +``` + +Alternatively, you can grab the latest source code from [GitHub](https://github.com/urllib3/urllib3): + +```bash +$ git clone https://github.com/urllib3/urllib3.git +$ cd urllib3 +$ pip install . +``` + + +## Documentation + +urllib3 has usage and reference documentation at [urllib3.readthedocs.io](https://urllib3.readthedocs.io). + + +## Community + +urllib3 has a [community Discord channel](https://discord.gg/urllib3) for asking questions and +collaborating with other contributors. Drop by and say hello 👋 + + +## Contributing + +urllib3 happily accepts contributions. Please see our +[contributing documentation](https://urllib3.readthedocs.io/en/latest/contributing.html) +for some tips on getting started. + + +## Security Disclosures + +To report a security vulnerability, please use the +[Tidelift security contact](https://tidelift.com/security). +Tidelift will coordinate the fix and disclosure with maintainers. + + +## Maintainers + +- [@sethmlarson](https://github.com/sethmlarson) (Seth M. Larson) +- [@pquentin](https://github.com/pquentin) (Quentin Pradet) +- [@illia-v](https://github.com/illia-v) (Illia Volochii) +- [@theacodes](https://github.com/theacodes) (Thea Flowers) +- [@haikuginger](https://github.com/haikuginger) (Jess Shapiro) +- [@lukasa](https://github.com/lukasa) (Cory Benfield) +- [@sigmavirus24](https://github.com/sigmavirus24) (Ian Stapleton Cordasco) +- [@shazow](https://github.com/shazow) (Andrey Petrov) + +👋 + + +## Sponsorship + +If your company benefits from this library, please consider [sponsoring its +development](https://urllib3.readthedocs.io/en/latest/sponsors.html). + + +## For Enterprise + +Professional support for urllib3 is available as part of the [Tidelift +Subscription][1]. Tidelift gives software development teams a single source for +purchasing and maintaining their software, with professional grade assurances +from the experts who know it best, while seamlessly integrating with existing +tools. + +[1]: https://tidelift.com/subscription/pkg/pypi-urllib3?utm_source=pypi-urllib3&utm_medium=referral&utm_campaign=readme diff --git a/meow/lib/python3.13/site-packages/urllib3-2.3.0.dist-info/RECORD b/meow/lib/python3.13/site-packages/urllib3-2.3.0.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..2a63e5aaa78693998ba44460d0e72ca4ce51bc12 --- /dev/null +++ b/meow/lib/python3.13/site-packages/urllib3-2.3.0.dist-info/RECORD @@ -0,0 +1,79 @@ +urllib3-2.3.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +urllib3-2.3.0.dist-info/METADATA,sha256=quSdNDoWoaG7idKpqr1_2N52fUBfxTtifpP3sofNwAY,6488 +urllib3-2.3.0.dist-info/RECORD,, +urllib3-2.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87 +urllib3-2.3.0.dist-info/licenses/LICENSE.txt,sha256=Ew46ZNX91dCWp1JpRjSn2d8oRGnehuVzIQAmgEHj1oY,1093 +urllib3/__init__.py,sha256=JMo1tg1nIV1AeJ2vENC_Txfl0e5h6Gzl9DGVk1rWRbo,6979 +urllib3/__pycache__/__init__.cpython-313.pyc,, +urllib3/__pycache__/_base_connection.cpython-313.pyc,, +urllib3/__pycache__/_collections.cpython-313.pyc,, +urllib3/__pycache__/_request_methods.cpython-313.pyc,, +urllib3/__pycache__/_version.cpython-313.pyc,, +urllib3/__pycache__/connection.cpython-313.pyc,, +urllib3/__pycache__/connectionpool.cpython-313.pyc,, +urllib3/__pycache__/exceptions.cpython-313.pyc,, +urllib3/__pycache__/fields.cpython-313.pyc,, +urllib3/__pycache__/filepost.cpython-313.pyc,, +urllib3/__pycache__/poolmanager.cpython-313.pyc,, +urllib3/__pycache__/response.cpython-313.pyc,, +urllib3/_base_connection.py,sha256=T1cwH3RhzsrBh6Bz3AOGVDboRsE7veijqZPXXQTR2Rg,5568 +urllib3/_collections.py,sha256=tM7c6J1iKtWZYV_QGYb8-r7Nr1524Dehnsa0Ufh6_mU,17295 +urllib3/_request_methods.py,sha256=gCeF85SO_UU4WoPwYHIoz_tw-eM_EVOkLFp8OFsC7DA,9931 +urllib3/_version.py,sha256=ChsIHG8bRc-eXUbXOgv4Fm4DstSKLq9FpsTAsaMeR08,411 +urllib3/connection.py,sha256=dsVIUaPrOdATuO9OGnF5GzMhlVKlAw-2qH9FWYuic4s,39875 +urllib3/connectionpool.py,sha256=ZEhudsa8BIubD2M0XoxBBsjxbsXwMgUScH7oQ9i-j1Y,43371 +urllib3/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +urllib3/contrib/__pycache__/__init__.cpython-313.pyc,, +urllib3/contrib/__pycache__/pyopenssl.cpython-313.pyc,, +urllib3/contrib/__pycache__/socks.cpython-313.pyc,, +urllib3/contrib/emscripten/__init__.py,sha256=u6KNgzjlFZbuAAXa_ybCR7gQ71VJESnF-IIdDA73brw,733 +urllib3/contrib/emscripten/__pycache__/__init__.cpython-313.pyc,, +urllib3/contrib/emscripten/__pycache__/connection.cpython-313.pyc,, +urllib3/contrib/emscripten/__pycache__/fetch.cpython-313.pyc,, +urllib3/contrib/emscripten/__pycache__/request.cpython-313.pyc,, +urllib3/contrib/emscripten/__pycache__/response.cpython-313.pyc,, +urllib3/contrib/emscripten/connection.py,sha256=j8DR_flE7hsoFhNfiqHLiaPaCsVbzG44jgahwvsQ52A,8771 +urllib3/contrib/emscripten/emscripten_fetch_worker.js,sha256=CDfYF_9CDobtx2lGidyJ1zjDEvwNT5F-dchmVWXDh0E,3655 +urllib3/contrib/emscripten/fetch.py,sha256=Li6sUnFuFog0fBiahk1Y0C0tdn5fxmj4ucDofPWFiXc,22867 +urllib3/contrib/emscripten/request.py,sha256=mL28szy1KvE3NJhWor5jNmarp8gwplDU-7gwGZY5g0Q,566 +urllib3/contrib/emscripten/response.py,sha256=sc0CJCEV_3t_HTadOmvWNuO-WkYbI1YfQWnBBmaeriE,9981 +urllib3/contrib/pyopenssl.py,sha256=uMNfy0e-wT-WNrTF-4oOQ17-I3w0NPrREm1t5AC9wxg,19398 +urllib3/contrib/socks.py,sha256=-iardc61GypsJzD6W6yuRS7KVCyfowcQrl_719H7lIM,7549 +urllib3/exceptions.py,sha256=VSkjQkvw3iolMKP_ZWfiiXumAj2VCvhmz2B3W-MP4BA,9633 +urllib3/fields.py,sha256=FCf7UULSkf10cuTRUWTQESzxgl1WT8e2aCy3kfyZins,10829 +urllib3/filepost.py,sha256=U8eNZ-mpKKHhrlbHEEiTxxgK16IejhEa7uz42yqA_dI,2388 +urllib3/http2/__init__.py,sha256=xzrASH7R5ANRkPJOot5lGnATOq3KKuyXzI42rcnwmqs,1741 +urllib3/http2/__pycache__/__init__.cpython-313.pyc,, +urllib3/http2/__pycache__/connection.cpython-313.pyc,, +urllib3/http2/__pycache__/probe.cpython-313.pyc,, +urllib3/http2/connection.py,sha256=GNlp9BjI3DmfSKe1W0b9IqRBeM8Q13xd2MA3ROcJ3dY,12668 +urllib3/http2/probe.py,sha256=nnAkqbhAakOiF75rz7W0udZ38Eeh_uD8fjV74N73FEI,3014 +urllib3/poolmanager.py,sha256=2_L2AjVDgoQ0qBmYbX9u9QqyU1u5J37zQbtv_-ueZQA,22913 +urllib3/py.typed,sha256=UaCuPFa3H8UAakbt-5G8SPacldTOGvJv18pPjUJ5gDY,93 +urllib3/response.py,sha256=PBW5ZnK3kYeq0fqeUYywyASKQWK7uRzSa-HgDBzZedU,45190 +urllib3/util/__init__.py,sha256=-qeS0QceivazvBEKDNFCAI-6ACcdDOE4TMvo7SLNlAQ,1001 +urllib3/util/__pycache__/__init__.cpython-313.pyc,, +urllib3/util/__pycache__/connection.cpython-313.pyc,, +urllib3/util/__pycache__/proxy.cpython-313.pyc,, +urllib3/util/__pycache__/request.cpython-313.pyc,, +urllib3/util/__pycache__/response.cpython-313.pyc,, +urllib3/util/__pycache__/retry.cpython-313.pyc,, +urllib3/util/__pycache__/ssl_.cpython-313.pyc,, +urllib3/util/__pycache__/ssl_match_hostname.cpython-313.pyc,, +urllib3/util/__pycache__/ssltransport.cpython-313.pyc,, +urllib3/util/__pycache__/timeout.cpython-313.pyc,, +urllib3/util/__pycache__/url.cpython-313.pyc,, +urllib3/util/__pycache__/util.cpython-313.pyc,, +urllib3/util/__pycache__/wait.cpython-313.pyc,, +urllib3/util/connection.py,sha256=JjO722lzHlzLXPTkr9ZWBdhseXnMVjMSb1DJLVrXSnQ,4444 +urllib3/util/proxy.py,sha256=seP8-Q5B6bB0dMtwPj-YcZZQ30vHuLqRu-tI0JZ2fzs,1148 +urllib3/util/request.py,sha256=qSwxEsJJ-9DUfFDEAZIgQe8sgyywKY0o3faH3ixi3i8,8218 +urllib3/util/response.py,sha256=vQE639uoEhj1vpjEdxu5lNIhJCSUZkd7pqllUI0BZOA,3374 +urllib3/util/retry.py,sha256=bj-2YUqblxLlv8THg5fxww-DM54XCbjgZXIQ71XioCY,18459 +urllib3/util/ssl_.py,sha256=CcYPnFKqJDn58Us2J5AFBQLMthIVVMjb69HwvdmPgoQ,18884 +urllib3/util/ssl_match_hostname.py,sha256=gaWqixoYtQ_GKO8fcRGFj3VXeMoqyxQQuUTPgWeiL_M,5812 +urllib3/util/ssltransport.py,sha256=Ez4O8pR_vT8dan_FvqBYS6dgDfBXEMfVfrzcdUoWfi4,8847 +urllib3/util/timeout.py,sha256=4eT1FVeZZU7h7mYD1Jq2OXNe4fxekdNvhoWUkZusRpA,10346 +urllib3/util/url.py,sha256=WRh-TMYXosmgp8m8lT4H5spoHw5yUjlcMCfU53AkoAs,15205 +urllib3/util/util.py,sha256=j3lbZK1jPyiwD34T8IgJzdWEZVT-4E-0vYIJi9UjeNA,1146 +urllib3/util/wait.py,sha256=_ph8IrUR3sqPqi0OopQgJUlH4wzkGeM5CiyA7XGGtmI,4423 diff --git a/meow/lib/python3.13/site-packages/urllib3-2.3.0.dist-info/WHEEL b/meow/lib/python3.13/site-packages/urllib3-2.3.0.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..12228d414b6cfed7c39d3781c85c63256a1d7fb5 --- /dev/null +++ b/meow/lib/python3.13/site-packages/urllib3-2.3.0.dist-info/WHEEL @@ -0,0 +1,4 @@ +Wheel-Version: 1.0 +Generator: hatchling 1.27.0 +Root-Is-Purelib: true +Tag: py3-none-any