diff --git "a/README.md" "b/README.md" --- "a/README.md" +++ "b/README.md" @@ -1,1919 +1,191 @@ ---- -datasets: -- sentence-transformers/all-nli -- sentence-transformers/nli-for-simcse -- sentence-transformers/msmarco-bm25 -- sentence-transformers/mr-tydi -- sentence-transformers/sentence-compression -- sentence-transformers/simple-wiki -- sentence-transformers/agnews -- sentence-transformers/ccnews -- sentence-transformers/npr -- sentence-transformers/gooaq -- sentence-transformers/yahoo-answers -- sentence-transformers/eli5 -- sentence-transformers/amazon-qa -- sentence-transformers/squad -- sentence-transformers/natural-questions -- sentence-transformers/hotpotqa -- sentence-transformers/quora-duplicates -- sentence-transformers/specter -- sentence-transformers/stackexchange-duplicates -- sentence-transformers/altlex -- sentence-transformers/stsb -language: -- en -- ar -- bn -- fi -- id -- ja -- ko -- ru -- sw -- te -- th -library_name: sentence-transformers -metrics: -- pearson_cosine -- spearman_cosine -- pearson_manhattan -- spearman_manhattan -- pearson_euclidean -- spearman_euclidean -- pearson_dot -- spearman_dot -- pearson_max -- spearman_max -pipeline_tag: sentence-similarity -tags: -- sentence-transformers -- sentence-similarity -- feature-extraction -- generated_from_trainer -- dataset_size:11551354 -- loss:MatryoshkaLoss -- loss:MultipleNegativesRankingLoss -widget: -- source_sentence: It depends on what they are. - sentences: - - Guatemala's former leader found guilty of genocide - - The men are jogging on the beach. - - It depends on what they are evaluating, and how. -- source_sentence: A woman is slicing an onion. - sentences: - - A woman is slicing lemons. - - two motorbikes on a dirt track. - - US drone kills 16 in Pakistan -- source_sentence: okay i'll keep that in mind yeah you serve that yourself or the - for a family - sentences: - - I will never forget that. You can have that on your own or share with a family. - - After approval is granted, the FCC will publish a notice in the Federal Register. - - I think I will forget about that. You will need to remind me. -- source_sentence: A little girl in brown is playing with two hula-hoops. - sentences: - - Animals are near each other. - - The girl has several toys. - - The person playing with hula-hoops is male. -- source_sentence: In some cases, members initially participated because of an existing - trust relationship with individual leaders or sponsors, and it was a challenge - to keep them returning until they saw value in participating and had built trust - with other members. - sentences: - - toddler is walking - - Trust is important to not only getting members, but keeping them as well. - - Trust is never important for recruiting or retaining any of the members. -model-index: -- name: SentenceTransformer - results: - - task: - type: semantic-similarity - name: Semantic Similarity - dataset: - name: sts dev 1024 - type: sts-dev-1024 - metrics: - - type: pearson_cosine - value: 0.8416643676223297 - name: Pearson Cosine - - type: spearman_cosine - value: 0.8392363927275444 - name: Spearman Cosine - - type: pearson_manhattan - value: 0.5415092010794227 - name: Pearson Manhattan - - type: spearman_manhattan - value: 0.5608363558940672 - name: Spearman Manhattan - - type: pearson_euclidean - value: 0.5418581801539682 - name: Pearson Euclidean - - type: spearman_euclidean - value: 0.5613244762714833 - name: Spearman Euclidean - - type: pearson_dot - value: 0.42045087316490104 - name: Pearson Dot - - type: spearman_dot - value: 0.5630110188234851 - name: Spearman Dot - - type: pearson_max - value: 0.8416643676223297 - name: Pearson Max - - type: spearman_max - value: 0.8392363927275444 - name: Spearman Max - - task: - type: semantic-similarity - name: Semantic Similarity - dataset: - name: sts dev 512 - type: sts-dev-512 - metrics: - - type: pearson_cosine - value: 0.8409052758628516 - name: Pearson Cosine - - type: spearman_cosine - value: 0.8384080261351986 - name: Spearman Cosine - - type: pearson_manhattan - value: 0.5408476868456095 - name: Pearson Manhattan - - type: spearman_manhattan - value: 0.5602888315874278 - name: Spearman Manhattan - - type: pearson_euclidean - value: 0.5419293810729837 - name: Pearson Euclidean - - type: spearman_euclidean - value: 0.5615048812468355 - name: Spearman Euclidean - - type: pearson_dot - value: 0.4169981584167454 - name: Pearson Dot - - type: spearman_dot - value: 0.5593212146808568 - name: Spearman Dot - - type: pearson_max - value: 0.8409052758628516 - name: Pearson Max - - type: spearman_max - value: 0.8384080261351986 - name: Spearman Max - - task: - type: semantic-similarity - name: Semantic Similarity - dataset: - name: sts dev 256 - type: sts-dev-256 - metrics: - - type: pearson_cosine - value: 0.8388423982166929 - name: Pearson Cosine - - type: spearman_cosine - value: 0.8369296147769071 - name: Spearman Cosine - - type: pearson_manhattan - value: 0.5409063572965348 - name: Pearson Manhattan - - type: spearman_manhattan - value: 0.558644667120197 - name: Spearman Manhattan - - type: pearson_euclidean - value: 0.5431382317962441 - name: Pearson Euclidean - - type: spearman_euclidean - value: 0.5608197975305511 - name: Spearman Euclidean - - type: pearson_dot - value: 0.41251450457591626 - name: Pearson Dot - - type: spearman_dot - value: 0.5517091844573435 - name: Spearman Dot - - type: pearson_max - value: 0.8388423982166929 - name: Pearson Max - - type: spearman_max - value: 0.8369296147769071 - name: Spearman Max - - task: - type: semantic-similarity - name: Semantic Similarity - dataset: - name: sts dev 128 - type: sts-dev-128 - metrics: - - type: pearson_cosine - value: 0.8339216550535867 - name: Pearson Cosine - - type: spearman_cosine - value: 0.8343811514627291 - name: Spearman Cosine - - type: pearson_manhattan - value: 0.5469608261339967 - name: Pearson Manhattan - - type: spearman_manhattan - value: 0.5625872469765152 - name: Spearman Manhattan - - type: pearson_euclidean - value: 0.5497573545533265 - name: Pearson Euclidean - - type: spearman_euclidean - value: 0.5657471105972286 - name: Spearman Euclidean - - type: pearson_dot - value: 0.3990592867510328 - name: Pearson Dot - - type: spearman_dot - value: 0.5380297530229515 - name: Spearman Dot - - type: pearson_max - value: 0.8339216550535867 - name: Pearson Max - - type: spearman_max - value: 0.8343811514627291 - name: Spearman Max - - task: - type: semantic-similarity - name: Semantic Similarity - dataset: - name: sts dev 64 - type: sts-dev-64 - metrics: - - type: pearson_cosine - value: 0.8182971695928284 - name: Pearson Cosine - - type: spearman_cosine - value: 0.8253866217421698 - name: Spearman Cosine - - type: pearson_manhattan - value: 0.5541003526214051 - name: Pearson Manhattan - - type: spearman_manhattan - value: 0.5696855805942952 - name: Spearman Manhattan - - type: pearson_euclidean - value: 0.5570543576011492 - name: Pearson Euclidean - - type: spearman_euclidean - value: 0.5732623446168936 - name: Spearman Euclidean - - type: pearson_dot - value: 0.38310330813055016 - name: Pearson Dot - - type: spearman_dot - value: 0.5151495623270115 - name: Spearman Dot - - type: pearson_max - value: 0.8182971695928284 - name: Pearson Max - - type: spearman_max - value: 0.8253866217421698 - name: Spearman Max ---- -# SentenceTransformer +# WordLlama -This is a [sentence-transformers](https://www.SBERT.net) model trained on the [all-nli](https://huggingface.co/datasets/sentence-transformers/all-nli), [nli-for-simcse](https://huggingface.co/datasets/sentence-transformers/nli-for-simcse), [msmarco](https://huggingface.co/datasets/sentence-transformers/msmarco-bm25), [mr-tydi](https://huggingface.co/datasets/sentence-transformers/mr-tydi), [compression](https://huggingface.co/datasets/sentence-transformers/sentence-compression), [simple-wiki](https://huggingface.co/datasets/sentence-transformers/simple-wiki), [agnews](https://huggingface.co/datasets/sentence-transformers/agnews), [ccnews](https://huggingface.co/datasets/sentence-transformers/ccnews), [npr](https://huggingface.co/datasets/sentence-transformers/npr), [gooaq](https://huggingface.co/datasets/sentence-transformers/gooaq), [yahoo-answers](https://huggingface.co/datasets/sentence-transformers/yahoo-answers), [eli5](https://huggingface.co/datasets/sentence-transformers/eli5), [amazon-qa](https://huggingface.co/datasets/sentence-transformers/amazon-qa), [squad](https://huggingface.co/datasets/sentence-transformers/squad), [natural_questions](https://huggingface.co/datasets/sentence-transformers/natural-questions), [hotpotqa](https://huggingface.co/datasets/sentence-transformers/hotpotqa), [quora_duplicates](https://huggingface.co/datasets/sentence-transformers/quora-duplicates), [quora_triplets](https://huggingface.co/datasets/sentence-transformers/quora-duplicates), [specter](https://huggingface.co/datasets/sentence-transformers/specter), [stackexchange_bbp](https://huggingface.co/datasets/sentence-transformers/stackexchange-duplicates), [stackexchange_ttp](https://huggingface.co/datasets/sentence-transformers/stackexchange-duplicates), [stackexchange_ppp](https://huggingface.co/datasets/sentence-transformers/stackexchange-duplicates) and [altlex](https://huggingface.co/datasets/sentence-transformers/altlex) datasets. It maps sentences & paragraphs to a None-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more. +The power of 15 trillion tokens of training, extracted, flogged and minimized into a cute little package for word embedding. -## Model Details +

+ Word Llama +

-### Model Description -- **Model Type:** Sentence Transformer - -- **Maximum Sequence Length:** None tokens -- **Output Dimensionality:** None tokens -- **Similarity Function:** Cosine Similarity -- **Training Datasets:** - - [all-nli](https://huggingface.co/datasets/sentence-transformers/all-nli) - - [nli-for-simcse](https://huggingface.co/datasets/sentence-transformers/nli-for-simcse) - - [msmarco](https://huggingface.co/datasets/sentence-transformers/msmarco-bm25) - - [mr-tydi](https://huggingface.co/datasets/sentence-transformers/mr-tydi) - - [compression](https://huggingface.co/datasets/sentence-transformers/sentence-compression) - - [simple-wiki](https://huggingface.co/datasets/sentence-transformers/simple-wiki) - - [agnews](https://huggingface.co/datasets/sentence-transformers/agnews) - - [ccnews](https://huggingface.co/datasets/sentence-transformers/ccnews) - - [npr](https://huggingface.co/datasets/sentence-transformers/npr) - - [gooaq](https://huggingface.co/datasets/sentence-transformers/gooaq) - - [yahoo-answers](https://huggingface.co/datasets/sentence-transformers/yahoo-answers) - - [eli5](https://huggingface.co/datasets/sentence-transformers/eli5) - - [amazon-qa](https://huggingface.co/datasets/sentence-transformers/amazon-qa) - - [squad](https://huggingface.co/datasets/sentence-transformers/squad) - - [natural_questions](https://huggingface.co/datasets/sentence-transformers/natural-questions) - - [hotpotqa](https://huggingface.co/datasets/sentence-transformers/hotpotqa) - - [quora_duplicates](https://huggingface.co/datasets/sentence-transformers/quora-duplicates) - - [quora_triplets](https://huggingface.co/datasets/sentence-transformers/quora-duplicates) - - [specter](https://huggingface.co/datasets/sentence-transformers/specter) - - [stackexchange_bbp](https://huggingface.co/datasets/sentence-transformers/stackexchange-duplicates) - - [stackexchange_ttp](https://huggingface.co/datasets/sentence-transformers/stackexchange-duplicates) - - [stackexchange_ppp](https://huggingface.co/datasets/sentence-transformers/stackexchange-duplicates) - - [altlex](https://huggingface.co/datasets/sentence-transformers/altlex) -- **Languages:** en, ar, bn, fi, id, ja, ko, ru, sw, te, th - -### Model Sources +## Table of Contents +- [Quick Start](#quick-start) +- [What is it?](#what-is-it) +- [MTEB Results](#mteb-results-l2_supercat) +- [Embed Text](#embed-text) +- [Training Notes](#training-notes) +- [Roadmap](#roadmap) +- [Extracting Token Embeddings](#extracting-token-embeddings) +- [Citations](#citations) +- [License](#license) -- **Documentation:** [Sentence Transformers Documentation](https://sbert.net) -- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers) -- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers) - -### Full Model Architecture - -``` -SentenceTransformer( - (0): WordLlamaEmbedding( - (embedding): Embedding(128256, 28672) - ) - (1): WeightedProjector( - (proj): Linear(in_features=28672, out_features=1024, bias=True) - ) - (2): AvgPool() -) -``` - -## Usage - -### Direct Usage (Sentence Transformers) - -First install the Sentence Transformers library: +## Quick Start +Install: ```bash -pip install -U sentence-transformers +pip install wordllama ``` -Then you can load this model and run inference. +Load the 256-dim model. ```python -from sentence_transformers import SentenceTransformer - -# Download from the 🤗 Hub -model = SentenceTransformer("sentence_transformers_model_id") -# Run inference -sentences = [ - 'In some cases, members initially participated because of an existing trust relationship with individual leaders or sponsors, and it was a challenge to keep them returning until they saw value in participating and had built trust with other members.', - 'Trust is important to not only getting members, but keeping them as well.', - 'Trust is never important for recruiting or retaining any of the members.', -] -embeddings = model.encode(sentences) -print(embeddings.shape) -# [3, 1024] - -# Get the similarity scores for the embeddings -similarities = model.similarity(embeddings, embeddings) -print(similarities.shape) -# [3, 3] +from wordllama import WordLlama + +# Load the default WordLlama model +wl = WordLlama.load() + +# Calculate similarity between two sentences +similarity_score = wl.similarity("i went to the car", "i went to the pawn shop") +print(similarity_score) # Output: 0.06641249096796882 + +# Rank documents based on their similarity to a query +query = "i went to the car" +candidates = ["i went to the park", "i went to the shop", "i went to the truck", "i went to the vehicle"] +ranked_docs = wl.rank(query, candidates) +print(ranked_docs) +# Output: +# [ +# ('i went to the vehicle', 0.7441646856486314), +# ('i went to the truck', 0.2832691551894259), +# ('i went to the shop', 0.19732814982305436), +# ('i went to the park', 0.15101404519322253) +# ] + +# additional inference methods +wl.deduplicate(candidates, threshold=0.8) # fuzzy deduplication +wl.cluster(docs, k=5, max_iterations=100, tolerance=1e-4) # labels using kmeans/kmeans++ init +wl.filter(query, candidates, threshold=0.3) # filter candidates based on query +wl.topk(query, candidates, k=3) # return topk strings based on query ``` - - - - - - -## Evaluation - -### Metrics - -#### Semantic Similarity -* Dataset: `sts-dev-1024` -* Evaluated with [EmbeddingSimilarityEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator) - -| Metric | Value | -|:--------------------|:-----------| -| pearson_cosine | 0.8417 | -| **spearman_cosine** | **0.8392** | -| pearson_manhattan | 0.5415 | -| spearman_manhattan | 0.5608 | -| pearson_euclidean | 0.5419 | -| spearman_euclidean | 0.5613 | -| pearson_dot | 0.4205 | -| spearman_dot | 0.563 | -| pearson_max | 0.8417 | -| spearman_max | 0.8392 | - -#### Semantic Similarity -* Dataset: `sts-dev-512` -* Evaluated with [EmbeddingSimilarityEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator) - -| Metric | Value | -|:--------------------|:-----------| -| pearson_cosine | 0.8409 | -| **spearman_cosine** | **0.8384** | -| pearson_manhattan | 0.5408 | -| spearman_manhattan | 0.5603 | -| pearson_euclidean | 0.5419 | -| spearman_euclidean | 0.5615 | -| pearson_dot | 0.417 | -| spearman_dot | 0.5593 | -| pearson_max | 0.8409 | -| spearman_max | 0.8384 | - -#### Semantic Similarity -* Dataset: `sts-dev-256` -* Evaluated with [EmbeddingSimilarityEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator) - -| Metric | Value | -|:--------------------|:-----------| -| pearson_cosine | 0.8388 | -| **spearman_cosine** | **0.8369** | -| pearson_manhattan | 0.5409 | -| spearman_manhattan | 0.5586 | -| pearson_euclidean | 0.5431 | -| spearman_euclidean | 0.5608 | -| pearson_dot | 0.4125 | -| spearman_dot | 0.5517 | -| pearson_max | 0.8388 | -| spearman_max | 0.8369 | - -#### Semantic Similarity -* Dataset: `sts-dev-128` -* Evaluated with [EmbeddingSimilarityEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator) - -| Metric | Value | -|:--------------------|:-----------| -| pearson_cosine | 0.8339 | -| **spearman_cosine** | **0.8344** | -| pearson_manhattan | 0.547 | -| spearman_manhattan | 0.5626 | -| pearson_euclidean | 0.5498 | -| spearman_euclidean | 0.5657 | -| pearson_dot | 0.3991 | -| spearman_dot | 0.538 | -| pearson_max | 0.8339 | -| spearman_max | 0.8344 | - -#### Semantic Similarity -* Dataset: `sts-dev-64` -* Evaluated with [EmbeddingSimilarityEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator) - -| Metric | Value | -|:--------------------|:-----------| -| pearson_cosine | 0.8183 | -| **spearman_cosine** | **0.8254** | -| pearson_manhattan | 0.5541 | -| spearman_manhattan | 0.5697 | -| pearson_euclidean | 0.5571 | -| spearman_euclidean | 0.5733 | -| pearson_dot | 0.3831 | -| spearman_dot | 0.5151 | -| pearson_max | 0.8183 | -| spearman_max | 0.8254 | - - - - - -## Training Details - -### Training Datasets - -#### all-nli - -* Dataset: [all-nli](https://huggingface.co/datasets/sentence-transformers/all-nli) at [d482672](https://huggingface.co/datasets/sentence-transformers/all-nli/tree/d482672c8e74ce18da116f430137434ba2e52fab) -* Size: 557,850 training samples -* Columns: anchor, positive, and negative -* Approximate statistics based on the first 1000 samples: - | | anchor | positive | negative | - |:--------|:-----------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------| - | type | string | string | string | - | details | | | | -* Samples: - | anchor | positive | negative | - |:------------------------------------------------------|:---------------------------------------------------------------------------|:----------------------------------------------------| - | An older man riding a bike. | An elderly man is biking | an old man is sleeping | - | The man is on a skateboard. | A shirtless man is doing a skateboard trick over a bike rail. | A man performs a bike trick on a ramp. | - | The Episcopalians are all going to hell. | The Episcopalians will not be going to heaven. | All Episcopalians will go to heaven. | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` - -#### nli-for-simcse - -* Dataset: [nli-for-simcse](https://huggingface.co/datasets/sentence-transformers/nli-for-simcse) at [926cae4](https://huggingface.co/datasets/sentence-transformers/nli-for-simcse/tree/926cae4af15a99b5cc2b053212bb52a4b377c418) -* Size: 274,951 training samples -* Columns: anchor, positive, and negative -* Approximate statistics based on the first 1000 samples: - | | anchor | positive | negative | - |:--------|:----------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------| - | type | string | string | string | - | details | | | | -* Samples: - | anchor | positive | negative | - |:-----------------------------------------------------------------------------------------|:-------------------------------------------------------------------|:-----------------------------------------------------------| - | Bicycler performing entertaining stunts while the sun sets in the distance. | A person on a bicycle performs under the setting sun. | A person performs stunts with a skateboard. | - | A man with a jackhammer demolishing cement | A worker is using a jackhammer. | A man is using a drill on a nail | - | 'The labourer is worthy of his hire', you know. | They are worth hiring. | I would never recommend him for the position. | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` - -#### msmarco - -* Dataset: [msmarco](https://huggingface.co/datasets/sentence-transformers/msmarco-bm25) at [ce8a493](https://huggingface.co/datasets/sentence-transformers/msmarco-bm25/tree/ce8a493a65af5e872c3c92f72a89e2e99e175f02) -* Size: 502,931 training samples -* Columns: query, positive, and negative -* Approximate statistics based on the first 1000 samples: - | | query | positive | negative | - |:--------|:------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------| - | type | string | string | string | - | details | | | | -* Samples: - | query | positive | negative | - |:------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | what is nakoa | Description. Nakoa Trail is a 2.5 mile loop that traverses the mid-elevation rainforest of Kahana Valley. Named after the native koa tree, the trail meanders through a mixed forest of native plants such as koa, hala (pandanus), and ferns, as well as exotic species such as ink berry, guava and octopus trees.escription. Nakoa Trail is a 2.5 mile loop that traverses the mid-elevation rainforest of Kahana Valley. Named after the native koa tree, the trail meanders through a mixed forest of native plants such as koa, hala (pandanus), and ferns, as well as exotic species such as ink berry, guava and octopus trees. | Nakoa meaning, origin, history | Meaning of name Name: Nakoa No. of characters: 5 Gender: Boy Origin: Unknown How to pronounce Nakoa na-koa, nak-oa , What does my name mean? | - | cost for install recessed lights s | more than IC. but to retrofit to AT with trim. kits and inserts will cost about $15 per light. the stickers inside the recessed lights can. be decieving. halo for instance has a red sticker. that says in big letters AIR TIGHT..in smaller. letters ..when used with the following inserts/trims. | Find here detailed information about recessed lighting installation costs. Average cost to install recessed lighting is about $780 (6 CFL lights installed in an existing ceiling with baffle trim). Find here detailed information about recessed lighting installation costs. Average cost to install recessed lighting is about $780 (6 CFL lights installed in an existing ceiling with baffle trim). | - | what is the cause of leg cramping | Most of the time, no apparent cause for night leg cramps can be identified. In general, night leg cramps are likely to be related to muscle fatigue and nerve problems. The risk of having night leg cramps increases with age. Pregnant women also have a higher likelihood of having night leg cramps. Several conditions, such as kidney failure and diabetic nerve damage, are known to cause night leg cramps. But if you have one of these, you're most likely aware of it and have symptoms other than night leg cramps. | Cramping leg pain: 1 Causes: Cramping leg pain. 2 Introduction: Cramping leg pain. 3 Cramping leg pain: Add a 3rd symptom. 4 Cramping leg pain: Remove a symptom. | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` +## What is it? -#### mr-tydi +WordLlama is a utility for NLP and word embedding model that recycles components from large language models (LLMs) to create efficient and compact word representations (such as GloVe, Word2Vec or FastText). +WordLlama begins by extracting the token embedding codebook from a state-of-the-art LLM (e.g., LLama3 70B), and training a small context-less model in a general purpose embedding framework. -* Dataset: [mr-tydi](https://huggingface.co/datasets/sentence-transformers/mr-tydi) at [abbdf55](https://huggingface.co/datasets/sentence-transformers/mr-tydi/tree/abbdf55c630352da943f779610c3ce6268118351) -* Size: 3,547 training samples -* Columns: anchor, positive, and negative -* Approximate statistics based on the first 1000 samples: - | | anchor | positive | negative | - |:--------|:------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------| - | type | string | string | string | - | details | | | | -* Samples: - | anchor | positive | negative | - |:------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | When was the video game Rock Band first introduced? | Rock Band (video game)
Rock Band is a music video game developed by Harmonix, published by MTV Games and distributed by Electronic Arts. It is the first title in the Rock Band series. The Xbox 360 and PlayStation 3 versions were released in North America on November 20, 2007,[2] while the PlayStation 2 version was released on December 18, 2007[6] and the Wii version on June 22, 2008. Harmonix previously developed the first two games in the Guitar Hero series, which popularized gameplay of rock music with guitar-shaped controllers. After development of the series was shifted to Neversoft, Harmonix conceived Rock Band as a new title that would offer multi-instrument gameplay.
| Rock Band
The first "Rock Band" features a more traditional "Career" mode for a single player on either lead guitar, drums, or vocals, dividing the on-disk songs into nine tiers arranged by difficulty of the song for that instrument. Each subsequent tier would only be unlocked after completing the songs in the previous tier. The player would earn in-game money based on their performance. This was removed in "Rock Band 2" for the improved "Tour" mode.
| - | Where did the Alavi Bohras originate? | Alavi Bohras
The Alavi Bohras (Arabic: علوي بھرۃ‎) are a Taiyebi Musta'alavi Isma'ili Shi'i Muslim community from Gujarat, India.[2] In India, during the time of the 18th Fatimid Imam Ma'ad al-Mustansir Billah around 1093 AD in Egypt, the designated learned people (wulaat) who were sent from Yemen by missionaries (du'aat) under the guidance of the Imam established a da'wah in Khambhat (Gujarat, India).
| Alawites
There are also about 3,900 Alawites living in the village of Ghajar, which is located on the border between Lebanon and the Israeli-occupied Golan Heights. In 1932 the residents of Ghajar were given the option of choosing their nationality, and overwhelmingly chose to be a part of Syria, which has a sizable Alawite minority. Before the 1967 Arab-Israeli War, the residents of Ghajar were counted in the 1960 Syrian census. Israel captured the Golan Heights from Syria, and after implementing Israeli civil law in 1981, the Alawite community chose to become Israeli citizens.
| - | What is the scientific name of the european polecat? | European polecat
The European polecat (Mustela putorius)– also known as the common ferret, black or forest polecat, or fitch (as well as some other names)– is a species of mustelid native to western Eurasia and north Morocco. It is of a generally dark brown colour, with a pale underbelly and a dark mask across the face. Occasionally, colour mutations, including albinos and erythrists, occur.[2] Compared to minks and other weasels– fellow members of the genus Mustela– the polecat has a shorter, more compact body;[3] a more powerfully built skull and dentition;[4] is less agile;[5] and it is well known for having the characteristic ability to secrete a particularly foul-smelling liquid to mark its territory.
| European mink
Fossil finds of the European mink are very rare, thus indicating the species is either a relative newcomer to Europe, probably having originated in North America, or a recent speciation caused by hybridization. It likely first arose in the Middle Pleistocene, with several fossils in Europe dated to the Late Pleistocene being found in caves and some suggesting early exploitation by humans. Genetic analyses indicate, rather than being closely related to the American mink, the European mink's closest relative is the European polecat (perhaps due to past hybridization) and the Siberian weasel, being intermediate in form between true polecats and other members of the genus. The closeness between the mink and polecat is emphasized because the species can hybridize.
| -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` +WordLlama improves on all MTEB benchmarks above word models like GloVe 300d, while being substantially smaller in size (**16MB default model** @ 256-dim vs >2GB). -#### compression +Features of WordLlama include: -* Dataset: [compression](https://huggingface.co/datasets/sentence-transformers/sentence-compression) at [605bc91](https://huggingface.co/datasets/sentence-transformers/sentence-compression/tree/605bc91d95631895ba25b6eda51a3cb596976c90) -* Size: 180,000 training samples -* Columns: text and simplified -* Approximate statistics based on the first 1000 samples: - | | text | simplified | - |:--------|:--------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------| - | type | string | string | - | details | | | -* Samples: - | text | simplified | - |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------| - | It was a disappointment that later turned into a big celebration in this nondescript village. | It was disappointment that turned into celebrations: | - | The European Union on late Thursday adopted new measures to simplify management rules for the structural and cohesion funds in order to counter the economic crisis that is affecting its member states. | EU adopts measures to simplify management of fund to counter the economic crisis | - | The European Union on Friday called on Syria to release political prisoners, following the January 28 arrest of a leading activist. | EU calls on Syria to release political prisoners | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` +1. **Matryoshka Representations**: Truncate embedding dimension as needed. +2. **Low Resource Requirements**: A simple token lookup with average pooling, enables this to operate fast on CPU. +3. **Binarization**: Models trained using the straight through estimator can be packed to small integer arrays for even faster hamming distance calculations. (coming soon) +4. **Numpy-only inference**: Lightweight and simple. -#### simple-wiki +For flexibility, WordLlama employs the Matryoshka representation learning training technique. The largest model (1024-dim) can be truncated to 64, 128, 256 or 512. +For binary embedding models, we implement straight-through estimators during training. For dense embeddings, 256 dimensions sufficiently captures most of the performance, while for binary embeddings validation accuracy is close to saturation at 512-dimensions (64 bytes packed). -* Dataset: [simple-wiki](https://huggingface.co/datasets/sentence-transformers/simple-wiki) at [60fd9b4](https://huggingface.co/datasets/sentence-transformers/simple-wiki/tree/60fd9b4680642ace0e2604cc2de44d376df419a7) -* Size: 102,225 training samples -* Columns: text and simplified -* Approximate statistics based on the first 1000 samples: - | | text | simplified | - |:--------|:-------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------| - | type | string | string | - | details | | | -* Samples: - | text | simplified | - |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------| - | Stephen , was created in 1969 because residents wanted to educate their children locally . | Stephen , was created in 1969 . | - | In some usages , the term dinner has continued to refer to the largest meal of the day , even when this meal is eaten at the end of the day and is preceded by two other meals . | Sometimes dinner can mean a meal eaten in the middle of the day . | - | At the UN , it was present under the name `` China '' until it lost its seat to the People 's Republic of China . Since then , the name `` China '' has been commonly used to refer only to the People 's Republic of China . | Now it is owned by the Communist People 's Republic of China see History and Political problems of China . | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` +The final weights are saved *after* weighting, projection and truncation of the entire tokenizer vocabulary. Thus, WordLlama becomes a single embedding matrix (nn.Embedding) that is considerably smaller than the gigabyte-sized llm codebooks we start with. The original tokenizer is still used to preprocess the text into tokens, and the reduced size token embeddings are average pooled. There is very little computation required, and the resulting model sizes range from 16mb to 250mb for the 128k llama3 vocabulary. -#### agnews +It's good option for some nlp-lite tasks. You can train sklearn classifiers on it, perform basic semantic matching, fuzzy deduplication, ranking and clustering. +I think it should work well for creating LLM output evaluators, or other preparatory tasks involved in multi-hop or agentic workflows. +You can perform your own llm surgery and train your own model on consumer GPUs in a few hours. +Because of its fast and portable size, it makes a good "Swiss-Army Knife" utility for exploratory analysis and utility applications. -* Dataset: [agnews](https://huggingface.co/datasets/sentence-transformers/agnews) at [e8a1e3d](https://huggingface.co/datasets/sentence-transformers/agnews/tree/e8a1e3d19a3c0c6da937ceaebc94a120b5003b9d) -* Size: 1,157,745 training samples -* Columns: title and description -* Approximate statistics based on the first 1000 samples: - | | title | description | - |:--------|:------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------| - | type | string | string | - | details | | | -* Samples: - | title | description | - |:-----------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | Ue: procedura infrazione per Italia | Nel mirino la tassazione dei fondi pensione stranieri | - | Letterman, Leno return Wednesday night | The return of TV's late-night funnymen after a two-month strike hiatus turned into a bizarre mix of picketing and presidential politics Wednesday as Mike Huckabee headed for Jay Leno's show and Hillary Clinton turned to David Letterman. | - | PSA After Prostate Surgery Not Always Ominous | Men with prostate cancer who undergo removal of the prostate (ie, radical prostatectomy) hope to see their PSA fall to zero, but sometimes it remains detectable in their blood. | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` +## MTEB Results (l2_supercat) -#### ccnews +| Metric | WL64 | WL128 | WL256 (X) | WL512 | WL1024 | GloVe 300d | Komninos | all-MiniLM-L6-v2 | +|------------------------|-------------|--------------|--------------|--------------|---------------|------------|----------|------------------| +| Clustering | 30.27 | 32.20 | 33.25 | 33.40 | 33.62 | 27.73 | 26.57 | 42.35 | +| Reranking | 50.38 | 51.52 | 52.03 | 52.32 | 52.39 | 43.29 | 44.75 | 58.04 | +| Classification | 53.14 | 56.25 | 58.21 | 59.13 | 59.50 | 57.29 | 57.65 | 63.05 | +| Pair Classification | 75.80 | 77.59 | 78.22 | 78.50 | 78.60 | 70.92 | 72.94 | 82.37 | +| STS | 66.24 | 67.53 | 67.91 | 68.22 | 68.27 | 61.85 | 62.46 | 78.90 | +| CQA DupStack | 18.76 | 22.54 | 24.12 | 24.59 | 24.83 | 15.47 | 16.79 | 41.32 | +| SummEval | 30.79 | 29.99 | 30.99 | 29.56 | 29.39 | 28.87 | 30.49 | 30.81 | -* Dataset: [ccnews](https://huggingface.co/datasets/sentence-transformers/ccnews) at [6118cc0](https://huggingface.co/datasets/sentence-transformers/ccnews/tree/6118cc09daf7977d6dddef2c6e4b7a4c92db9f57) -* Size: 614,664 training samples -* Columns: title and article -* Approximate statistics based on the first 1000 samples: - | | title | article | - |:--------|:------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------| - | type | string | string | - | details | | | -* Samples: - | title | article | - |:-----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | Angelina Jolie To Premiere New Film For Cambodian Royalty | Angelina Jolie will premiere her new Khmer Rouge survivor movie for a royal audience, including Cambodia’s King Norodom Sihamoni.
The Oscar-winning actress-turned-director is set to debut First They Killed My Father at a special screening at the historic Angkor Wat temple on Saturday, and the event will be attended by the country’s ruler and the Queen Mother, Norodom Monineath Sihanouk.
A number of top government representatives will also check out the film, according to officials overseeing the archaeological site.
Two other screens will also be set up near the area for a free public viewing, reports the Associated Press.
First They Killed My Father, Jolie’s latest film as a director, is based on Cambodian author and human rights activist Loung Ung’s tale of survival during the Khmer Rouge genocide in the late 1970s.
The star returned to Cambodia, where she previously adopted her 15-year-old son Maddox from an orphanage in 2002, to prepare for the drama in 2015, when she received the support of Cambodian premier Hun Sen.
Jolie met the Prime Minister, a former mid-ranking Khmer Rouge member, in Phnom Penh to discuss the importance of adapting the story. After he defected, he became an outspoken member of then-leader Pol Pot’s opposition.
Following their meeting, the Prime Minister’s spokesman, Eang Sophalleth, told reporters, “The film reflects facts in Cambodia and will be a way for younger generations to understand the country.”
First They Killed My Father will be released via online streaming site Netflix later this year.
The movie’s launch will provide a welcome distraction for Jolie, who is currently adjusting to life as a single mother of six, following her split from husband Brad Pitt in September.
Source:: WENN – Blog
Spread Love: Facebook
Twitter
Reddit
Pinterest
Google
More
Email
| - | Paris Police Report for Tuesday April 18th | 47 year old Bryan Evan Williams was arrested by Paris police in the 800 block of Deshong on a NISI judgement. He was charged with sexual assault of a child.
28 year old Patsy Denise Mason was arrested by Paris police in the 100 block of West Provine. She was booked into the Lamar County jail for felony theft.
Paris police arrested 22 year old Santavious Deontre Gray after a traffic stop in the 800 block of MLK after they found him in possession of marijuana and a controlled prescription medication. He was also allegedly in possession of materials commonly used to package narcotics for sale.
38 year old Bradley Heath Blankenship of Paris was arrested on an outstanding Hunt County felony warrant. He was charged with tampering or fabricating physical evidence.
30 year old Krystal Elaine Finch was stopped by Paris police for a traffic violation and a computer check showed she was wanted on an outstanding Lamar County warrant. She was booked into the Lamar County jail for theft.
| - | Mario Batali Accused Of Misconduct By Seven Additional Women - Report | Seven additional women are accusing disgraced chef Mario Batali, the former co-host of ABC’s The Chew, of alleged inappropriate touching in public, according to an exclusive story on food site Eater.
According to the story, Batali groped fans while posing for photos. One of the women, Natali Tene, a 28-year-old from Boston, says that in April 2017, Batali groped her at the Boston bar Towne Stove and Spirits. “She claims that Batali, who seemed intoxicated, suggested they take selfies together, then rubbed her breasts, grabbed her backside, put his hands between her legs, and kept squeezing her face into his as he kissed her,” Eater writes.
The resulting photos were provided to Eater. The site has posted short video clips of Batali kissing and groping women. Deadline has not independently varified the site’s claims.
“Eater is choosing to publish clips of two of these encounters,” the article states, “with the permission of the women in them. We realize the clips may be disturbing to readers, but the reporting around these issues can often seem abstracted; the women have agreed to make them public to offer a fuller view into what they experienced.”
The latest accusations arrive after New York police confirmed an investigation of two sexual assault accusations against Batali. The chef has denied the allegations.
Three of Batali’s Las Vegas restaurants recently announced their pending closures as the NYPD ramps up its investigation.
Eater says that the seven additional women who shared their Batali experiences with the site brings the total number of women alleging sexual misconduct against the chef to “at least 18.”
The additional incidents, Eater says, “did not escalate to the rape allegations currently being investigated by police,” but they “paint a broader picture of the chef’s alleged misconduct, behavior that started at least as early as the mid-’90s and continued until as recently October 2017.”
Another of the women in the Eater story is Jenny McCoy, a pastry chef and cookbook author who said Batali, “a chef she had long admired,” vigorously “rubbed her breasts when wine was spilled on her chest” at an event in New Orleans in 2007.
Eater says Batali declined comment the allegations.
| -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` +The [l2_supercat](https://huggingface.co/dleemiller/word-llama-l2-supercat) is a Llama2-vocabulary model. To train this model, I concatenated codebooks from several models, including Llama2 70B and phi3 medium (after removing additional special tokens). +Because several models have used the Llama2 tokenizer, their codebooks can be concatenated and trained together. Performance of the resulting model is comparable to training the Llama3 70B codebook, while being 4x smaller (32k vs 128k vocabulary). -#### npr +## Embed Text -* Dataset: [npr](https://huggingface.co/datasets/sentence-transformers/npr) at [6089338](https://huggingface.co/datasets/sentence-transformers/npr/tree/6089338c366623d24681654565832a6296c003d1) -* Size: 594,384 training samples -* Columns: title and body -* Approximate statistics based on the first 1000 samples: - | | title | body | - |:--------|:-----------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------| - | type | string | string | - | details | | | -* Samples: - | title | body | - |:----------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | The Real Story Behind 'The Soloist' | The unlikely friendship between Steve Lopez, a Los Angeles Times columnist, and Nathaniel Ayers, a homeless musician, has inspired newspaper columns, a book and now a movie starring Robert Downey Jr. and Jamie Foxx. Lopez met Ayers four years ago, when Ayers was a homeless musician on Skid Row in Los Angeles. Lopez learned Ayers had been a promising violinist, and that he had left the prestigious music program at the Juilliard School because of his struggle with mental illness. Lopez chronicled Ayers' struggle in several columns at the Los Angeles Times. These columns inspired readers to send instruments to Ayers through Lopez. The friendship that Lopez formed with Ayers and eventually helped the musician get off the street, settle into an apartment and find treatment for his schizophrenia. Lopez says his friendship with Ayers has "always been a two-way street, it's not just me doing for him." The writer explains that the musician re-ignited his passion for journalism and gave him a sense of well-being: "You know, there's this humility, there's this good feeling I have from giving something," Lopez says. Lopez published a book about Ayers called The Soloist: A Lost Dream, and Unlikely Friendship, and the Redemptive Power of Music. The book inspired the recently released movie The Soloist starring Jamie Foxx and Robert Downey Jr. This interview originally aired April 22, 2008. I can't get the image out of my head, this odd picture of grubby refinement. But when I go back to look for the violinist in Pershing Square, I come up empty. His disappearance only makes the mystery more provocative. Who was he? Where did he go? What is his story? Three weeks later, he's back, reappearing in the same spot, and I watch from across the street for a while before approaching. His playing is a little scratchy and tentative, but just like before, it's clear this is no beginner. There'd been some serious training in there, somewhere along the way. He doesn't appear to be playing for money, which seems strange for a homeless guy. He plays as if he's a student, oblivious to everyone around him, and this is a practice session. Strange place to practice. The ground shakes when buses roar by, and his strings are barely audible in the orchestra of horns, trucks and sirens. I gaze at the tops of buildings adorned with gargoyles and grand cornices. Men and women move about, duty-bound, ignoring him for the most part as they disappear around corners and into entryways. The man plays on, a lone fiddler. He throws his head back, closes his eyes, drifts. A portrait of tortured bliss. When he pauses, I move in. "Hello," I say. He jumps back, startled just as before. "Do you remember me?" I ask. "I remember your voice." He's still suspicious of me, suspicious of everything around him, it seems. He says he was trying to remember a Tchaikovsky piece he once knew quite well, but now it is as elusive as the meaning of a dream. It's obvious that he's troubled in some way, like so many others who wander the streets as if they inhabit a different planet than the rest of us, wrapped in many-layered outfits to keep from coming unraveled. He's wearing a ratty blue sweater with a light brown T-shirt over it and the collar of a shirt spilling out over the top of it all. Wrapped around his neck, like a scarf, is a yellow terrycloth towel. His pants hang low on his waist, fitted for a man three sizes bigger, and his grimy white sneakers have no laces. He tells me his name is Nathaniel Anthony Ayers. From Cleveland. He's going to keep practicing until he's proud of what he hears, he says, and I tell him I might like to write about him for the L.A. Times. "Seriously?" he asks. "You'd really want to write about me?" "Why not?" I ask. He's a handsome guy, lean and fit-looking, with a strong jaw and clean white teeth. He reminds me a little of Miles Davis. I ask where he lives and he says at the Midnight Mission, one of the biggest rescue operations on nearby Skid Row. Not inside, he specifies. But on the street, though he showers and takes some meals inside. "Why not sleep inside?" "Oh, no," he says. "I wouldn't want to do that." I wonder how safe it can be for a man trying to reconnect with Tchaikovsky as drug dealers, prostitutes and hustlers work streets teeming with the lame and the afflicted. Skid Row is a dumping ground for inmates released from the nearby county jail, and it's a place where the sirens never stop screaming. "Maybe I'll come by and visit you at the mission," I tell him. He nods, but I can see he doesn't trust me. He tucks the violin back under his chin, eager to get back to his music, and I know that if this one ever pans out, it's going to take some time. I'll have to check back with him now and again until he's comfortable enough to open up. Maybe I could go on his rounds with him over the course of a day or so, see if anyone can help fill in the blanks in his story or explain his condition. As he begins to play, I wave good-by | - | Commander: Troops Flush Al-Qaida from Baquba | It's been about two weeks since U.S. forces launched an offensive against al-Qaida fighters who held control of the city of Baquba, about 35 miles northeast of Baghdad, the capital of Diyala province in Iraq. About 10,000 U.S. troops are now posted in and around Baquba, along with about 1,500 Iraqi army forces. Brig. Gen. Mick Bednarek is commanding the operation, and he says al-Qaida fighters have been cleared — or have fled — from the western part of the city. | - | Man And Machines: Pat Metheny's Orchestrion | Among music composers, there's always been a desire to hear their works performed exactly as conceived. It's an impulse that helped popularize the player piano 100 years ago, synthesizers in the 1960s and digital software today. Jazz guitarist Pat Metheny recently revived the idea of automated instruments to perform a new series of compositions. He calls it an orchestrion -- a collection of real, self-playing acoustic instruments. He's recorded an album with it and is taking it on the road, playing his guitar while his computer triggers a full range of acoustic instruments at the same time. There's no studio trickery, no overdubs. "It's an odd combination, because everything is me," Metheny says. "Every little bit of it, from the compositions to the way the ride cymbal is played, is filtered through my sense of things. It's absolutely the most personal thing I've ever done." "It" is Metheny's 21st-century variation on the orchestrion -- an instrument that dates back to the turn of the last century. The Early Orchestrion Metheny pulls out a book. "This will blow your mind, actually," he says. "Pretty much what's going on there, right. You know, I think that's, like, 1890." It actually looks very different from Metheny's orchestrion, but many of the parts are there. At the Morris Museum in Morristown, N.J., conservator Jeremie Ryder cranks up the Popper's Rex, an orchestrion built in 1913 in Leipzig, Germany. "There's a full piano inside a keyboard-less piano that doesn't have a keyboard, so you can't play it," Ryder says. "There's also just under 100 organ pipes of different varieties to create different flavors and textures of sounds. There's also a full wooden bar xylophone inside of it. There's a set of orchestra bells that's similar to a glockenspiel inside of it, as well as a bass drum, snare drum, crash cymbal and triangle." Not that far off from Metheny's conception of the instrument. "It's not synthesizers, it's not samples; it's actual physical, living, breathing, smacking and getting-hit-around stuff," he says. "A whole ensemble of stuff that I can then write for and improvise with. And that's essentially what the Orchestrion Project is." Santa's Workshop In a small former church in Brooklyn, Metheny rehearses his orchestrion -- working out the technical kinks, fine-tuning the music he composed for it. Seeing the orchestrion in action is like watching Santa's workshop, if Santa were a jazz cat. In addition to two pianos, two robotic guitars, a vibraphone, drums and bells, a mechanism blows air across the top of liquid-filled jugs. They're all linked by wires to an Apple hard drive. Metheny triggers some instruments into action while he plays the guitar; many sequences are pre-programmed. "I mean, you can't help but sort of laugh when you see it all going," Metheny says. It took Metheny four years to create his orchestrion. For it, he employed the help of experts in the fields of robotics and solenoids -- electromagnetic triggers that can move a drumstick or shake a tambourine. "This is easily the most expensive undertaking I've ever attempted, and I'll be really happy to break even on this one," he says. "It's not cheap -- we're into the six figures, we can say that." Irritating 'Those People' Metheny's Orchestrion Project is both an album -- just released -- and a concert tour of Europe and the U.S. Performing with the orchestrion is a primary motivation behind the endeavor. "We've been able to make records by ourselves for many years," he says. "They were kind of dead-end ways, though, because you couldn't do that live. ... It was a studio experience. This, I can go out and do that live." Exploring musical styles and new technologies is nothing new to Metheny. "I mean, I often joke my first musical act was to plug it in," he says. In the 1980s, he caused more than a little controversy playing jazz on a guitar synthesizer. How the orchestrion will be received by critics remains to be seen -- the jazz world is not known for being especially warm to technological innovations. "I've always felt a big part of my job is to irritate those people," Metheny says. "When I'm not doing that, I feel like I'm not doing my job." The jazz world has mostly come to terms with electronic instruments and digital technology. If there's anything about Metheny's orchestrion that might raise eyebrows, it's the lack of human interaction -- an essential element of jazz performance. "Because I'm a jazz guy, there's going to be this question of interactivity and blah blah blah," he says. "The thing is, with this, I've created a world for myself where I can interact with dozens of different things a dozen different ways." Most listeners probably won't know that they're hearing automated instruments on Metheny's new album. But when they see it in concert, they'll know. "I expect it to be a character-building experience when things don't work," he says. "Which is going to happen -- I mean, we're talking about hundreds of movi | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` +Here’s how you can load pre-trained embeddings and use them to embed text: -#### gooaq - -* Dataset: [gooaq](https://huggingface.co/datasets/sentence-transformers/gooaq) at [b089f72](https://huggingface.co/datasets/sentence-transformers/gooaq/tree/b089f728748a068b7bc5234e5bcf5b25e3c8279c) -* Size: 3,012,496 training samples -* Columns: question and answer -* Approximate statistics based on the first 1000 samples: - | | question | answer | - |:--------|:------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------| - | type | string | string | - | details | | | -* Samples: - | question | answer | - |:--------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | what are the four main controls on a noise gate? | Noise gates usually have five main parameters: threshold, ratio (reduction), attack, hold and release. Threshold – this sets the level at which the gate opens to let the sound through. | - | is uncut gems on amazon prime? | Excited for the international release of Uncut Gems on Netflix tomorrow. ... Generally, Amazon Prime Video gets A24 movies between six and seven months after they are released in cinemas, which would fit the Safdie brothers' statement that Uncut Gems will be streaming in summer 2020. | - | how many days a week do you workout? | You need to be hitting the weights at least three days per week. The research says that at the very least, training a minimum of two days per week is needed to maximize muscle growth. How you structure your workouts and the amount of days you devote to strength training depends on your current fitness level. | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` - -#### yahoo-answers - -* Dataset: [yahoo-answers](https://huggingface.co/datasets/sentence-transformers/yahoo-answers) at [93b3605](https://huggingface.co/datasets/sentence-transformers/yahoo-answers/tree/93b3605c508cf93e3666c9d3e34640b5fe62b507) -* Size: 599,417 training samples -* Columns: question and answer -* Approximate statistics based on the first 1000 samples: - | | question | answer | - |:--------|:-------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------| - | type | string | string | - | details | | | -* Samples: - | question | answer | - |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | Nursery rhyme - "down by the river where the green grass grows..." Do you know of any other variations? I first heard this rhyme while watching one of the episodes in series five (2003) of "Cold Feet", a Granada Television production in the UK. It occured when Adam Williams (James Nesbitt) takes his baby son Matthew to a parent and baby singing circle. The words (as I remember them) are as follows:\n\nDown by the river where the green grass grows,\nthere sits XXX washing his clothes,\nhe sings, he sings, a song so sweet,\ncrying to his playmates up and down the street.\n\nThe song seemed to be intended for group singing; the same verse being sung repeatedly around the group like a game, using different group member's names where the XXX is.\n\nThe reason why I remembered it, and still recall it now, is that's it's probably the most absurd rhyme I've ever heard - and because of that it's also the most hilarious(!) The "Cold Feet" presentation of the song was all the more hilarious owing to the seriousness by which the group's singing leader enforced the rules of this singing game. | Nursery rhyme - "down by the river where the green grass grows..." Do you know of any other variations? Down by the river where the green grass grows\nThere sits _______ sweet as a rose\nShe sings, she sings, she she sings so sweet\nAlong came ______ and kissed her on the cheek.\n\nWhen I was a child we jumped rope to this rhyme, | - | Who believes in Buddishm? We are learning about ancient India in my class. The book started talking about Buddishm, and the religion sounds intersting. I want to follow the religion, but I'm not sure. Can anyone tell me some tips about Buddihsm or meditatoin, or yoga. And can anyone tell me if I'm doing something wrong? | Who believes in Buddishm? Since you are interested in learning, you are obviously not doing something wrong. There is a great book called "Buddhism: plain and simple" by Steve Hagen, it costs about 7 bucks on amazon and is a good introduction to Buddhism.\n\nAfter reading this short and easy book, you will realize that buddhism is not really a religion to follow, but simply a way of looking at the world that makes a lot of sense and can easily be applied to your everyday life, if you choose. Buddhists don't worship the buddha and he is not a God, just a person who lived once and no longer does. \n\nUnlike most religions, There is no reason for anyone to ever know that you regard buddhist teachings as useful, and you can practice other religions while studying buddhism and engaging and buddhist practices. Buddhists don't usually come out and say "I'm a Buddhist" except if they are trying to make their parents mad or something, which of course is not exactly part of the teachings.\n\nAlso, buddhism does involve the concept of karma, but it's not like do good things so good things will happen to you, it's more like, try to want good things a little less and fear bad things a little less because being trapped in desire and fear can prevent you from seeing certain truths. | - | I need the lyrics for Tenn. sat. night by red foley please can"t find them on the web? Some of the lyrics go like this listen while i tell you about a place i know down in tenn. where the tall corn grow thank you Earl | I need the lyrics for Tenn. sat. night by red foley please can"t find them on the web? TENNESSEE SATURDAY NIGHT\nRecorded by Red Foley\nWritten by Billy Hughes\n\n[A] Now, listen while I tell you 'bout a place I know\nDown in Tennessee where the [A7] tall corn grows\n[D] Hidden from the world in a bunch of pines\nWhere the [A] moon's a little bashful and it seldom shines\n[E] Civilized people live there alright\nAh, but they [*A] [P] all go native on Saturday Night. [A]\n[A*] = Strum once.\n\nOh, well the music is a fiddle and a cracked guitar\nThey get their kicks from an old fruit jar\nThey do the boogie to an old square dance\nThe woods are full of couples lookin' for romance\nThey struggle and they shuffle till broad daylight\nYes, they all go native on Saturday night.\n\nWhen they really get together there's a lot of fun\nThey all know the other fellow packs a gun\nEv'rybody does his best to act just right\n'Cause there's gonna be funeral if you start a fight\nSomebody takes a brogan and knocks out the light\nYes, they all go native on Saturday night.\n\nWell, now you've heard my story 'bout a place I know\nDown in Tennessee where the tall corn grows\nHidden from the world in a bunch of pines\nWhere the moon's a little bashful and it seldom shines\nCivilized people live there alright\nAh, but they all go native on Saturday Night. | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` - -#### eli5 - -* Dataset: [eli5](https://huggingface.co/datasets/sentence-transformers/eli5) at [6e3b20a](https://huggingface.co/datasets/sentence-transformers/eli5/tree/6e3b20a7a427560845779f217c3ff71493374bc0) -* Size: 325,475 training samples -* Columns: question and answer -* Approximate statistics based on the first 1000 samples: - | | question | answer | - |:--------|:------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------| - | type | string | string | - | details | | | -* Samples: - | question | answer | - |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | Why does consuming alcohol have a more voluminous history than that of marijuana, when one is a naturally occurring plant while the other takes more of a process? | Water was often considered unsafe to drink depending on how prevalent things like cholera, dysentery, cryptosporidium, etc... were. When you ferment a mash you have to boil it first if you want to make alcohol, otherwise you'll be brewing vinegar most of the time thanks to bacteria that eat the alcohol and excrete acetic acid. Boiling the water sterilizes it, and the alcohol itself is slightly antiseptic even in low concentrations. Another factor is that alcohol has caloric content. You can't subsist on cannabis, whereas although inadvisable you can keep alive on beer and whiskey for quite some time. Once the opium trade started laudanum, opium in any form was cheap and easy to get. Opium dens were common throughout much of the 17th and 18th century. Most medicine contained opium. Snake oil salesmen didn't often sell snake oil most of their elixirs were mostly opium. So for recreational drug use there were cheaper, stronger options. | - | Why have we found so many more dinosaur bones from hundreds of millions of years ago than our ancestors from over 40,000 years ago? | Dinosaurs were around for about 165 million years, and a lot of them were *huge*. The genus *Homo* is only a few million years old, and *Homo Sapiens* have only been kicking around for about 200,000 years. Compared to dinosaurs we are tiny, and haven't been around for very long at all. Which makes our fossils a lot scarcer than theirs. | - | How does a song get stuck in your head? | I've read (do not recall where) that the task-oriented part of your brain interprets the partial song stuck in your head as something that needs to be accomplished, and bugs the shit out of the rest of your brain until it forgets. | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` - -#### amazon-qa - -* Dataset: [amazon-qa](https://huggingface.co/datasets/sentence-transformers/amazon-qa) at [614a800](https://huggingface.co/datasets/sentence-transformers/amazon-qa/tree/614a80086ee434a4fdbb8629f07c6c2374df840b) -* Size: 1,500,000 training samples -* Columns: query and answer -* Approximate statistics based on the first 1000 samples: - | | query | answer | - |:--------|:-------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------| - | type | string | string | - | details | | | -* Samples: - | query | answer | - |:---------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | Is Thiis Compatable woth Windows 7?? | I only use it now to recharge the battery. I just use the SD card to transfer the pictures. so I don't have the software installed . It should work fine. | - | I want to buy this for a 13.5 amp electric snowblower and i will be plugging it in to a 15 amp plug outside my house. Will this work? | This is why I bought it and used it last winter. It work fine for me. Before this I was using three 25 foot cords that I had for general use. They were always unplugging It is heavy as there is a lot of copper in it. | - | Are these flowers fresh or dried | These are fresh. They come in a styro container with a cold pack. They are almost like a dandelion. You bite into them. About 3-4 bites per button. They stay refrigerated. They last about 3 weeks. After that they grow mold on them. Definitely a party hit. Bring them to a bar and it's definitely a crazy experiance. You definitely are the talk of the party. | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` - -#### squad - -* Dataset: [squad](https://huggingface.co/datasets/sentence-transformers/squad) at [d84c8c2](https://huggingface.co/datasets/sentence-transformers/squad/tree/d84c8c2ef64693264c890bb242d2e73fc0a46c40) -* Size: 87,599 training samples -* Columns: question and answer -* Approximate statistics based on the first 1000 samples: - | | question | answer | - |:--------|:------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------| - | type | string | string | - | details | | | -* Samples: - | question | answer | - |:------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | What percentage of Egyptians polled support death penalty for those leaving Islam? | The Pew Forum on Religion & Public Life ranks Egypt as the fifth worst country in the world for religious freedom. The United States Commission on International Religious Freedom, a bipartisan independent agency of the US government, has placed Egypt on its watch list of countries that require close monitoring due to the nature and extent of violations of religious freedom engaged in or tolerated by the government. According to a 2010 Pew Global Attitudes survey, 84% of Egyptians polled supported the death penalty for those who leave Islam; 77% supported whippings and cutting off of hands for theft and robbery; and 82% support stoning a person who commits adultery. | - | Ann Arbor ranks 1st among what goods sold? | The Ann Arbor Hands-On Museum is located in a renovated and expanded historic downtown fire station. Multiple art galleries exist in the city, notably in the downtown area and around the University of Michigan campus. Aside from a large restaurant scene in the Main Street, South State Street, and South University Avenue areas, Ann Arbor ranks first among U.S. cities in the number of booksellers and books sold per capita. The Ann Arbor District Library maintains four branch outlets in addition to its main downtown building. The city is also home to the Gerald R. Ford Presidential Library. | - | In developing countries, who makes most of the spending decisions? | One important aspect of the rule-of-law initiatives is the study and analysis of the rule of law’s impact on economic development. The rule-of-law movement cannot be fully successful in transitional and developing countries without an answer to the question: does the rule of law matter for economic development or not? Constitutional economics is the study of the compatibility of economic and financial decisions within existing constitutional law frameworks, and such a framework includes government spending on the judiciary, which, in many transitional and developing countries, is completely controlled by the executive. It is useful to distinguish between the two methods of corruption of the judiciary: corruption by the executive branch, in contrast to corruption by private actors. | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` - -#### natural_questions - -* Dataset: [natural_questions](https://huggingface.co/datasets/sentence-transformers/natural-questions) at [f9e894e](https://huggingface.co/datasets/sentence-transformers/natural-questions/tree/f9e894e1081e206e577b4eaa9ee6de2b06ae6f17) -* Size: 100,231 training samples -* Columns: query and answer -* Approximate statistics based on the first 1000 samples: - | | query | answer | - |:--------|:-----------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------| - | type | string | string | - | details | | | -* Samples: - | query | answer | - |:--------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | who are the basques and where do they live | Basques The Basques (/bɑːsks/ or /bæsks/; Basque: euskaldunak [eus̺kaldunak]; Spanish: vascos [ˈbaskos]; French: basques [bask]) are an indigenous ethnic group[5][6][7] characterised by the Basque language, a common culture and shared ancestry to the ancient Vascones and Aquitanians.[8] Basques are indigenous to and primarily inhabit an area traditionally known as the Basque Country (Basque: Euskal Herria), a region that is located around the western end of the Pyrenees on the coast of the Bay of Biscay and straddles parts of north-central Spain and south-western France. | - | where does the name led zeppelin come from | Led Zeppelin The band completed the Scandinavian tour as the New Yardbirds, playing together for the first time in front of a live audience at Gladsaxe Teen Clubs in Gladsaxe, Denmark, on 7 September 1968.[13] Later that month, they began recording their first album, which was based on their live set. The album was recorded and mixed in nine days, and Page covered the costs.[14] After the album's completion, the band were forced to change their name after Dreja issued a cease and desist letter, stating that Page was allowed to use the New Yardbirds moniker for the Scandinavian dates only.[15] One account of how the new band's name was chosen held that Moon and Entwistle had suggested that a supergroup with Page and Beck would go down like a "lead balloon", an idiom for disastrous results.[16] The group dropped the 'a' in lead at the suggestion of their manager, Peter Grant, so that those unfamiliar with the term would not pronounce it "leed".[17] The word "balloon" was replaced by "zeppelin", a word which, according to music journalist Keith Shadwick, brought "the perfect combination of heavy and light, combustibility and grace" to Page's mind.[16] | - | where is the heart located in the rib cage | Thorax The anatomy of the chest can also be described through the use of anatomical landmarks. The nipple in the male is situated in front of the fourth rib or a little below; vertically it lies a little external to a line drawn down from the middle of the clavicle; in the female it is not so constant. A little below it the lower limit of the great pectoral muscle is seen running upward and outward to the axilla; in the female this is obscured by the breast, which extends from the second to the sixth rib vertically and from the edge of the sternum to the mid-axillary line laterally. The female nipple is surrounded for half an inch by a more or less pigmented disc, the areola. The apex of a normal heart is in the fifth left intercostal space, three and a half inches from the mid-line. | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` - -#### hotpotqa - -* Dataset: [hotpotqa](https://huggingface.co/datasets/sentence-transformers/hotpotqa) at [f07d3cd](https://huggingface.co/datasets/sentence-transformers/hotpotqa/tree/f07d3cd2d290ea2e83ed35e33d67d6a4658b8786) -* Size: 84,516 training samples -* Columns: anchor, positive, and negative -* Approximate statistics based on the first 1000 samples: - | | anchor | positive | negative | - |:--------|:-------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------| - | type | string | string | string | - | details | | | | -* Samples: - | anchor | positive | negative | - |:------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | What was the occupation of the man who built a small, fast boat designed with a long narrow platform that could reach high speeds? | Donald Aronow Donald Joel Aronow (March 3, 1927 – February 3, 1987) was an American designer, builder and racer of the famous Magnum Marine, Cary, Cigarette, Donzi, and Formula speedboats. He built speedboats for the Shah of Iran, Charles Keating, Robert Vesco, Malcolm Forbes, and George H. W. Bush. Retired President Lyndon Johnson owned several 16 ft. Donzi speedboats on his Texas ranch with which he would race his Secret Service agents. | Charles Brady King Charles Brady King (February 2, 1868 – June 22, 1957) was an American engineer and entrepreneur remembered as an automotive pioneer, artist, etcher, musician, poet, architect, mystic, industrialist and inventor. | - | The Netherlands were defeated in the 2010 FIFA World Cup Final with a goal from a footballer who plays what position? | 2010 FIFA World Cup Final The 2010 FIFA World Cup Final (also known as the Battle of Johannesburg) was a football match that took place on 11 July 2010 at Soccer City in Johannesburg, South Africa, to determine the winner of the 2010 FIFA World Cup. Spain defeated the Netherlands 1–0 with a goal from Andrés Iniesta four minutes from the end of extra time. English referee Howard Webb was selected to officiate the match, which was marked by an unusually high number of yellow cards. | Ronald de Boer Ronaldus "Ronald" de Boer (] ; born 15 May 1970) is a Dutch former football midfielder who played for the Netherlands national team as well as a host of professional clubs in Europe. He is the older twin brother of Frank de Boer. | - | What county is WJMF-LP locacted? | WJMF-LP WJMF-LP is a low-power television station in Jackson, Mississippi. The station, which currently operates on Channel 6, is owned by Rainey Radio. The station currently acts as a radio station broadcasting a Oldies & Classic Hits format as "EZ 87.7", taking advantage of that station's audio signal on 87.75 MHz FM. | WFBF WFBF (89.9 FM) is a radio station broadcasting a Religious format. Licensed to Buffalo, New York, United States, the station serves the greater Buffalo area. The station is owned by Family Stations, Inc.. | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` - -#### quora_duplicates - -* Dataset: [quora_duplicates](https://huggingface.co/datasets/sentence-transformers/quora-duplicates) at [451a485](https://huggingface.co/datasets/sentence-transformers/quora-duplicates/tree/451a4850bd141edb44ade1b5828c259abd762cdb) -* Size: 149,263 training samples -* Columns: anchor and positive -* Approximate statistics based on the first 1000 samples: - | | anchor | positive | - |:--------|:------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------| - | type | string | string | - | details | | | -* Samples: - | anchor | positive | - |:----------------------------------------------------------------------|:--------------------------------------------------------------------------| - | How do I promote my youtube videos? | What is the best way to promote your YouTube Video? | - | Why do some people think the Earth is flat? | Why are there still people who think that the Earth is flat? | - | How can I solve rubix cube? Is there any formula for it? | How do you solve a Rubik's Cube? | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` - -#### quora_triplets - -* Dataset: [quora_triplets](https://huggingface.co/datasets/sentence-transformers/quora-duplicates) at [451a485](https://huggingface.co/datasets/sentence-transformers/quora-duplicates/tree/451a4850bd141edb44ade1b5828c259abd762cdb) -* Size: 101,762 training samples -* Columns: anchor, positive, and negative -* Approximate statistics based on the first 1000 samples: - | | anchor | positive | negative | - |:--------|:------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------| - | type | string | string | string | - | details | | | | -* Samples: - | anchor | positive | negative | - |:---------------------------------------------------|:---------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | What are the best ways to meet boys? | What's the best way to meet boys? | What is the best way to meet girls? | - | Do Chinese people eat babies/fetuses? | Is it true that people eat baby fetuses in China? If so, why do they do such a thing? | Chinese people: Do Chinese newborn babies look different? | - | Is world war 3 likely? | How likely is World War III in the future? | Since the UN is unable to control terrorism and groups like ISIS, al-Qaeda and countries that promote terrorism (even though it consumed those countries), can we assume that the world is heading towards World War III? | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` - -#### specter - -* Dataset: [specter](https://huggingface.co/datasets/sentence-transformers/specter) at [9a12e84](https://huggingface.co/datasets/sentence-transformers/specter/tree/9a12e84c5dfd3b358fea6d7db3aaf0b70b1afe7b) -* Size: 684,098 training samples -* Columns: anchor, positive, and negative -* Approximate statistics based on the first 1000 samples: - | | anchor | positive | negative | - |:--------|:------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------| - | type | string | string | string | - | details | | | | -* Samples: - | anchor | positive | negative | - |:--------------------------------------------------------------------------------|:-------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------| - | an introduction to privacy - preserving data mining . | Personalized privacy preservation | an integrated e - recruitment system for automated personality mining and applicant ranking . | - | Exploiting Semantic Information and Deep Matching for Optical Flow | Object scene flow for autonomous vehicles | Sequential Attention | - | The Uses of Big Data in Cities | Smart Cities in Europe | Channel Selection for Network-assisted D2D Communication via No-Regret Bandit Learning with Calibrated Forecasting | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` - -#### stackexchange_bbp - -* Dataset: [stackexchange_bbp](https://huggingface.co/datasets/sentence-transformers/stackexchange-duplicates) at [1c9657a](https://huggingface.co/datasets/sentence-transformers/stackexchange-duplicates/tree/1c9657aec12d9e101667bb9593efcc623c4a68ff) -* Size: 250,460 training samples -* Columns: body1 and body2 -* Approximate statistics based on the first 1000 samples: - | | body1 | body2 | - |:--------|:--------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------| - | type | string | string | - | details | | | -* Samples: - | body1 | body2 | - |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | Is there a multi-threaded cp command on Linux? I know how to do this on Windows, but I don't know how this is approached in a Linux environment. | I'm currently having trouble with dd invoked with a sparse file as input (if) and a file as output (of) with conv=sparse. dd seems to be using one core of the CPU (Intel(R) Core(TM) i7-3632QM CPU @ 2.20GHz 4 cores + 4 Intel Hyperthreads) only (100 % of 1 core), so I've been wondering whether it's possible to parallelize dd. I've been looking into info dd and man dd and there seems to built-in function in the version of corutils 8.23 checking sgp_dd from sg3-utils package (without understanding whether it suits my needs), but it doesn't seem to be able to handle sparse files dcfldd doesn't seems to have parallelization capabilities AFAIK an enhanced version/fork with internal handling of program parts in multiple threads (avoid context changes killing I/O performance) is preferred over a solution with GNU parallel running locally is preferred over a custom (possibly untested) code sniplet How to avoid CPU being the bottleneck of an I/O intensive operation? I'd like to run the command on Ubuntu 14.04 with Linux 3.13 and handle sparse file disk images with it on any filesystem supporting sparse file (at least the solution shouldn't be bound to one specific file system). Background: I'm trying to create a copy of 11TB sparse file (containing about 2TB data) on a zfs (zfsonlinux 0.6.4 unstable version, possibly buggy and the cause for the CPU bottleneck (eventually slow hole search)). That shouldn't change anything for the question of how to parallelize dd (in a very generic way). | - | I remember watching this as a child, it was very scary for me, I had nightmares. I think it's around 25-30 years old. It's happening on an airport, there is some time traveling included I think, and then some huge round creatures with mouths full of scary teeth like saws appear and eat everything. | I thought it was The Tommyknockers by Stephen King, but after a check with Wikipedia, it appears that's not the case. What I remember about the story (a TV movie I think, but definitely based on a book) was that the world is created anew every night, and that the old, used up, Earth was eaten nightly by some scary creatures. The major plot point involved some protagonists not getting onto the new world at midnight, and having to escape from the monsters that were eating up yesterday's planet. Does this ring a bell for anyone? I'm finding it very hard to Google for. | - | $\mid\sum_{i=1}^{n} x_i\mid\leq \sum_{i=1}^{n}\mid x_i\mid$ If $n$ is even we will divide the sum into groups of $2$ $x$'s namely $\mid x+x \mid \leq \mid x\mid+\mid x \mid$ and will repeat the process to get $\mid\sum_{i=1}^{n} x_i\mid\leq \sum_{i=1}^{n}\mid x_i\mid$ If $n$ is odd, we will divide the sum into an even number of $x$'s called $a$ and the leftover $x$ called $b$, by using the proof for even $n$ and the triangle inequality $\mid a+b\mid\leq \mid a \mid +\mid b \mid= \mid\sum_{i=1}^{n} x_i\mid\leq \sum_{i=1}^{n}\mid x_i\mid$ Is the proof valid? | I am trying to prove: $P(n): |x_1| + \cdots + |x_n| \leq |x_1 + \cdots +x_n|$ for all natural numbers $n$. The $x_i$ are real numbers. Base: Let $n =1$: we have $|x_1| \leq |x_1|$ which is clearly true Step: Let $k$ exist in the integers such that $k \geq 1$ and assume $P(k)$ is true. This is where I am lost. I do not see how to leverage the induction hypothesis. Here is my latest approach: Can you do the following in the induction step: Let $Y$ = |$x_1$ +...+$x_n$| and Let $Z$ = |$x_1$| +...+ |$x_n$| Then we have: |$Y$ + $x_n+1$| $\leq$ $Z$ + |$x_n+1$|. $Y$ $\leq$ $Z$ from the induction step, and then from the base case this is just another triangle inequality. End of proof. | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` - -#### stackexchange_ttp - -* Dataset: [stackexchange_ttp](https://huggingface.co/datasets/sentence-transformers/stackexchange-duplicates) at [1c9657a](https://huggingface.co/datasets/sentence-transformers/stackexchange-duplicates/tree/1c9657aec12d9e101667bb9593efcc623c4a68ff) -* Size: 304,525 training samples -* Columns: title1 and title2 -* Approximate statistics based on the first 1000 samples: - | | title1 | title2 | - |:--------|:-----------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------| - | type | string | string | - | details | | | -* Samples: - | title1 | title2 | - |:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------| - | The speed of the outer stars of galaxies | Relativity and Galaxy Rotation Speed | - | How to add signature to Preview on a Mac Mini? | How can I use the "signature" function in Preview on Mac Mini with no camera? | - | The double pointer points to the int variable to overwrite the memory | What is the strict aliasing rule? | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` - -#### stackexchange_ppp +```python +from wordllama import WordLlama -* Dataset: [stackexchange_ppp](https://huggingface.co/datasets/sentence-transformers/stackexchange-duplicates) at [1c9657a](https://huggingface.co/datasets/sentence-transformers/stackexchange-duplicates/tree/1c9657aec12d9e101667bb9593efcc623c4a68ff) -* Size: 250,519 training samples -* Columns: post1 and post2 -* Approximate statistics based on the first 1000 samples: - | | post1 | post2 | - |:--------|:--------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------| - | type | string | string | - | details | | | -* Samples: - | post1 | post2 | - |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | Electrostatic force between 2 shells. How to calculate it? I`ve been looking for this for some time now: I have two thin shells (S1 and S2) of radii R1 and R2, charged negatively C1 and C2 respectively, separated in space by a distance D. What is the force exerted by S1 on S2? (For my purposes, the charge can remain constantly distributed on the shell. Non-conductor, I'm guessing.) What do I have to read to learn this? I know Coulomb's equation (F=kQ1Q2/d^2). (If relevant, I need this to emulate repulsion in a very crude ion simulation, where the attraction is emulated by considering the ions point charges. Considering the electron shells as points would be great, but obviously doesn't work... that's when the sphere approximation should do the trick.) Thank you in advance. | What's the exact gravitational force between spherically symmetric masses? Consider spherical symmetric$^1$ masses of radii $R_1$ and $R_2$, with spherical symmetric density distributions $\rho_1(r_1)$ and $\rho_2(r_2)$, and with a distance between the centers of the spheres $d$. What is the exact force between them? I know point masses are a good approximation, but I'm looking for an exact formula. This would be useful for a gravity-simulation toy software. -- $^1$ Assume for simplicity the idealization where or centrifugal forces do not deform the spherical symmetric, i.e., the various mass parts are held in place by infinitely strong and rigid bonds. | - | Perform PCA. Extract PCs. Can one then tell what the most important _original_ features were, from the PCs? Suppose that you have 1000 features, and a data set made up of say, 50,000 points. Suppose then that we perform PCA, and we extract the top 5 PCs, since they explain 99.99 percent of the variance, and thats all we care about. From those top 5 PCs, can we 'go backwards', and be able to decipher, what the most 'important' features were from the original 1000? For example, can we answers the question, "What combination of my original 1000 features were responsible for my top PC?" Thank you. | Using principal component analysis (PCA) for feature selection I'm new to feature selection and I was wondering how you would use PCA to perform feature selection. Does PCA compute a relative score for each input variable that you can use to filter out noninformative input variables? Basically, I want to be able to order the original features in the data by variance or amount of information contained. | - | Where should I exchange euros to rubles? Where should I exchange euros to rubles once I arrive in Russia (Example: Moscow (at any of the three airports))? Is there a special place directly at/near the airport that takes care of this? Or Should I exchange euros to rubles while in home country (France). It was mentioned on site that: It's better to change USD or Euro in banks, it's more safer. At the airport there`s not best rates. Is that even true? Or Can my visa card withdraw rubbles at an ATM in Russia? | What is the best way to change money in Moscow? I have Euros. I will be in Russia along the Trans-siberian railway for about 12 days. I might go to Moscow by plane or by train. So, what is the best way to change money? I do not know how much I will spend. | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` +# Load pre-trained embeddings +# truncate dimension to 64 +wl = WordLlama.load(trunc_dim=64) -#### altlex +# Embed text +embeddings = wl.embed(["the quick brown fox jumps over the lazy dog", "and all that jazz"]) +print(embeddings.shape) # (2, 64) +``` -* Dataset: [altlex](https://huggingface.co/datasets/sentence-transformers/altlex) at [97eb209](https://huggingface.co/datasets/sentence-transformers/altlex/tree/97eb20963455c361d5a81c107c3596cff9e0cd82) -* Size: 112,696 training samples -* Columns: text and simplified -* Approximate statistics based on the first 1000 samples: - | | text | simplified | - |:--------|:-------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------| - | type | string | string | - | details | | | -* Samples: - | text | simplified | - |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | It is also the `` arrondissement '' with more people living there , with 192,432 inhabitants and a population density of inhabitants/km ² . | It is also the `` arrondissement '' with more people living there , with 192,432 inhabitants and a population density of 63.5886590443 / km2 inhabitants/km ² . | - | He was drafted by the Giants in the third round of the 2008 NFL Draft . | He was drafted by the New York Giants in the third round of the 2008 NFL Draft . | - | By September 1 a cyclonic circulation with a diameter of 690 mile ( 1115 km ) was evident on satellite imagery , well to the southeast of the Cape Verde Islands , and several hurricane forecast models anticipated development and strengthening . | By September 1 a cyclonic circulation with a diameter of 690 mile ( 1115 km ) was seen on satellite imagery , well to the southeast of the Cape Verde Islands , and several hurricane forecast models expected development and strengthening . | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` +Binary embedding models can be used like this: -### Evaluation Datasets +```python +# Binary embeddings are packed into uint32 +# 64-dims => array of 2x uint32 +wl = WordLlama.load(trunc_dim=64, binary=True) # this will download the binary model from huggingface +wl.embed("I went to the car") # Output: array([[3029168104, 2427562626]], dtype=uint32) -#### all-nli +# load binary trained model trained with straight through estimator +wl = WordLlama.load(dim=1024, binary=True) -* Dataset: [all-nli](https://huggingface.co/datasets/sentence-transformers/all-nli) at [d482672](https://huggingface.co/datasets/sentence-transformers/all-nli/tree/d482672c8e74ce18da116f430137434ba2e52fab) -* Size: 6,584 evaluation samples -* Columns: anchor, positive, and negative -* Approximate statistics based on the first 1000 samples: - | | anchor | positive | negative | - |:--------|:-----------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------| - | type | string | string | string | - | details | | | | -* Samples: - | anchor | positive | negative | - |:---------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------|:------------------------------------------------| - | So is the salt, drying in the huge, square pans at Las Salinas in the south. | Salt dries in pans at Las Salinas. | Pepper is made wet in Las Salinas. | - | A group of young men in sports uniforms from two opposing teams are in vigorous play on a grassy field. | Men in a sports team play on the field | Women play sports in the field | - | A man standing on a street in the city playing the accordion. | A street performer with his accordion. | A man is sitting on a bench. | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` +# Uses the hamming similarity to binarize +similarity_score = wl.similarity("i went to the car", "i went to the pawn shop") +print(similarity_score) # Output: 0.57421875 -#### stsb +ranked_docs = wl.rank("i went to the car", ["van", "truck"]) -* Dataset: [stsb](https://huggingface.co/datasets/sentence-transformers/stsb) at [ab7a5ac](https://huggingface.co/datasets/sentence-transformers/stsb/tree/ab7a5ac0e35aa22088bdcf23e7fd99b220e53308) -* Size: 1,379 evaluation samples -* Columns: sentence1, sentence2, and score -* Approximate statistics based on the first 1000 samples: - | | sentence1 | sentence2 | score | - |:--------|:------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------|:---------------------------------------------------------------| - | type | string | string | float | - | details | | | | -* Samples: - | sentence1 | sentence2 | score | - |:----------------------------------------------------------------------------------|:------------------------------------------------------|:-----------------| - | Two men are dancing together. | A woman opens a closet. | 0.0 | - | It depends on what you want to do next, and where you want to do it. | It depends on what you want to achieve. | 0.6 | - | You may have to experiment and find what you like. | You have to find out what works for you. | 1.0 | -* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: - ```json - { - "loss": "MultipleNegativesRankingLoss", - "matryoshka_dims": [ - 1024, - 512, - 256, - 128, - 64 - ], - "matryoshka_weights": [ - 1, - 1, - 1, - 1, - 1 - ], - "n_dims_per_step": -1 - } - ``` +wl.binary = False # turn off hamming and use cosine -### Training Hyperparameters -#### Non-Default Hyperparameters +# load a different model class +wl = WordLlama.load(config="l3_supercat", dim=1024) # downloads model from HF +``` -- `eval_strategy`: steps -- `per_device_train_batch_size`: 512 -- `learning_rate`: 0.0003 -- `warmup_steps`: 256 -- `fp16`: True +## Training Notes -#### All Hyperparameters -
Click to expand +Binary embedding models showed more pronounced improvement at higher dimensions, and either 512 or 1024 is recommended for binary embedding. -- `overwrite_output_dir`: False -- `do_predict`: False -- `eval_strategy`: steps -- `prediction_loss_only`: True -- `per_device_train_batch_size`: 512 -- `per_device_eval_batch_size`: 8 -- `per_gpu_train_batch_size`: None -- `per_gpu_eval_batch_size`: None -- `gradient_accumulation_steps`: 1 -- `eval_accumulation_steps`: None -- `torch_empty_cache_steps`: None -- `learning_rate`: 0.0003 -- `weight_decay`: 0.0 -- `adam_beta1`: 0.9 -- `adam_beta2`: 0.999 -- `adam_epsilon`: 1e-08 -- `max_grad_norm`: 1.0 -- `num_train_epochs`: 3 -- `max_steps`: -1 -- `lr_scheduler_type`: linear -- `lr_scheduler_kwargs`: {} -- `warmup_ratio`: 0.0 -- `warmup_steps`: 256 -- `log_level`: passive -- `log_level_replica`: warning -- `log_on_each_node`: True -- `logging_nan_inf_filter`: True -- `save_safetensors`: True -- `save_on_each_node`: False -- `save_only_model`: False -- `restore_callback_states_from_checkpoint`: False -- `no_cuda`: False -- `use_cpu`: False -- `use_mps_device`: False -- `seed`: 42 -- `data_seed`: None -- `jit_mode_eval`: False -- `use_ipex`: False -- `bf16`: False -- `fp16`: True -- `fp16_opt_level`: O1 -- `half_precision_backend`: auto -- `bf16_full_eval`: False -- `fp16_full_eval`: False -- `tf32`: None -- `local_rank`: 0 -- `ddp_backend`: None -- `tpu_num_cores`: None -- `tpu_metrics_debug`: False -- `debug`: [] -- `dataloader_drop_last`: False -- `dataloader_num_workers`: 0 -- `dataloader_prefetch_factor`: None -- `past_index`: -1 -- `disable_tqdm`: False -- `remove_unused_columns`: True -- `label_names`: None -- `load_best_model_at_end`: False -- `ignore_data_skip`: False -- `fsdp`: [] -- `fsdp_min_num_params`: 0 -- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False} -- `fsdp_transformer_layer_cls_to_wrap`: None -- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None} -- `deepspeed`: None -- `label_smoothing_factor`: 0.0 -- `optim`: adamw_torch -- `optim_args`: None -- `adafactor`: False -- `group_by_length`: False -- `length_column_name`: length -- `ddp_find_unused_parameters`: None -- `ddp_bucket_cap_mb`: None -- `ddp_broadcast_buffers`: False -- `dataloader_pin_memory`: True -- `dataloader_persistent_workers`: False -- `skip_memory_metrics`: True -- `use_legacy_prediction_loop`: False -- `push_to_hub`: False -- `resume_from_checkpoint`: None -- `hub_model_id`: None -- `hub_strategy`: every_save -- `hub_private_repo`: False -- `hub_always_push`: False -- `gradient_checkpointing`: False -- `gradient_checkpointing_kwargs`: None -- `include_inputs_for_metrics`: False -- `eval_do_concat_batches`: True -- `fp16_backend`: auto -- `push_to_hub_model_id`: None -- `push_to_hub_organization`: None -- `mp_parameters`: -- `auto_find_batch_size`: False -- `full_determinism`: False -- `torchdynamo`: None -- `ray_scope`: last -- `ddp_timeout`: 1800 -- `torch_compile`: False -- `torch_compile_backend`: None -- `torch_compile_mode`: None -- `dispatch_batches`: None -- `split_batches`: None -- `include_tokens_per_second`: False -- `include_num_input_tokens_seen`: False -- `neftune_noise_alpha`: None -- `optim_target_modules`: None -- `batch_eval_metrics`: False -- `eval_on_start`: False -- `eval_use_gather_object`: False -- `batch_sampler`: batch_sampler -- `multi_dataset_batch_sampler`: proportional +L2 Supercat was trained using a batch size of 512 on a single A100 for 12 hours. -
+## Roadmap -### Training Logs -
Click to expand +- Working on adding inference features: + - Semantic text splitting +- Add example notebooks + - DSPy evaluators + - RAG pipelines -| Epoch | Step | Training Loss | all-nli loss | stsb loss | sts-dev-1024_spearman_cosine | sts-dev-128_spearman_cosine | sts-dev-256_spearman_cosine | sts-dev-512_spearman_cosine | sts-dev-64_spearman_cosine | -|:------:|:-----:|:-------------:|:------------:|:---------:|:----------------------------:|:---------------------------:|:---------------------------:|:---------------------------:|:--------------------------:| -| 0.0111 | 250 | - | 8.7574 | 8.9801 | 0.7731 | 0.7643 | 0.7676 | 0.7709 | 0.7575 | -| 0.0222 | 500 | 12.8607 | 7.8157 | 9.4145 | 0.8051 | 0.8010 | 0.8027 | 0.8037 | 0.7947 | -| 0.0332 | 750 | - | 7.6363 | 9.3489 | 0.8115 | 0.8105 | 0.8112 | 0.8099 | 0.8031 | -| 0.0443 | 1000 | 8.5262 | 7.1231 | 9.6446 | 0.8134 | 0.8099 | 0.8116 | 0.8124 | 0.8016 | -| 0.0554 | 1250 | - | 7.0482 | 9.5721 | 0.8178 | 0.8156 | 0.8183 | 0.8176 | 0.8083 | -| 0.0665 | 1500 | 7.9474 | 7.0570 | 9.6487 | 0.8183 | 0.8178 | 0.8197 | 0.8180 | 0.8124 | -| 0.0775 | 1750 | - | 6.9422 | 9.9069 | 0.8222 | 0.8178 | 0.8208 | 0.8207 | 0.8116 | -| 0.0886 | 2000 | 7.4807 | 6.8832 | 9.7163 | 0.8222 | 0.8185 | 0.8215 | 0.8211 | 0.8132 | -| 0.0997 | 2250 | - | 6.7792 | 9.8574 | 0.8268 | 0.8235 | 0.8265 | 0.8268 | 0.8168 | -| 0.1108 | 2500 | 7.1821 | 6.7096 | 9.8983 | 0.8258 | 0.8233 | 0.8252 | 0.8248 | 0.8180 | -| 0.1218 | 2750 | - | 6.7428 | 10.1828 | 0.8285 | 0.8282 | 0.8282 | 0.8280 | 0.8203 | -| 0.1329 | 3000 | 7.0128 | 6.6502 | 10.1087 | 0.8274 | 0.8268 | 0.8273 | 0.8272 | 0.8194 | -| 0.1440 | 3250 | - | 6.7202 | 9.9754 | 0.8255 | 0.8257 | 0.8260 | 0.8254 | 0.8179 | -| 0.1551 | 3500 | 7.0972 | 6.6875 | 10.0659 | 0.8307 | 0.8295 | 0.8297 | 0.8299 | 0.8218 | -| 0.1661 | 3750 | - | 6.7054 | 9.8682 | 0.8291 | 0.8276 | 0.8282 | 0.8287 | 0.8218 | -| 0.1772 | 4000 | 7.4621 | 6.5801 | 10.0931 | 0.8302 | 0.8290 | 0.8297 | 0.8297 | 0.8219 | -| 0.1883 | 4250 | - | 6.6060 | 10.2889 | 0.8275 | 0.8256 | 0.8268 | 0.8269 | 0.8197 | -| 0.1994 | 4500 | 6.554 | 6.5631 | 10.0066 | 0.8288 | 0.8276 | 0.8282 | 0.8279 | 0.8233 | -| 0.2104 | 4750 | - | 6.5525 | 10.1221 | 0.8297 | 0.8266 | 0.8275 | 0.8288 | 0.8216 | -| 0.2215 | 5000 | 7.0091 | 6.5190 | 10.1191 | 0.8306 | 0.8294 | 0.8292 | 0.8302 | 0.8236 | -| 0.2326 | 5250 | - | 6.4706 | 10.2124 | 0.8339 | 0.8313 | 0.8321 | 0.8330 | 0.8248 | -| 0.2437 | 5500 | 6.5367 | 6.4550 | 10.0243 | 0.8300 | 0.8281 | 0.8285 | 0.8294 | 0.8203 | -| 0.2547 | 5750 | - | 6.3705 | 10.1453 | 0.8339 | 0.8320 | 0.8326 | 0.8332 | 0.8245 | -| 0.2658 | 6000 | 6.6695 | 6.4325 | 10.1527 | 0.8316 | 0.8298 | 0.8304 | 0.8306 | 0.8227 | -| 0.2769 | 6250 | - | 6.4018 | 10.1263 | 0.8324 | 0.8292 | 0.8302 | 0.8313 | 0.8227 | -| 0.2880 | 6500 | 7.0238 | 6.4006 | 10.0713 | 0.8330 | 0.8296 | 0.8305 | 0.8319 | 0.8223 | -| 0.2990 | 6750 | - | 6.3488 | 10.0695 | 0.8344 | 0.8317 | 0.8331 | 0.8334 | 0.8248 | -| 0.3101 | 7000 | 6.6325 | 6.3070 | 10.2803 | 0.8331 | 0.8289 | 0.8314 | 0.8323 | 0.8206 | -| 0.3212 | 7250 | - | 6.3835 | 10.1106 | 0.8353 | 0.8318 | 0.8340 | 0.8346 | 0.8228 | -| 0.3323 | 7500 | 6.6243 | 6.3751 | 10.1996 | 0.8348 | 0.8312 | 0.8336 | 0.8343 | 0.8248 | -| 0.3433 | 7750 | - | 6.3372 | 10.1291 | 0.8358 | 0.8320 | 0.8349 | 0.8351 | 0.8252 | -| 0.3544 | 8000 | 6.1741 | 6.4914 | 9.9826 | 0.8353 | 0.8319 | 0.8350 | 0.8348 | 0.8253 | -| 0.3655 | 8250 | - | 6.3994 | 10.2739 | 0.8351 | 0.8311 | 0.8338 | 0.8343 | 0.8230 | -| 0.3766 | 8500 | 6.6296 | 6.3337 | 10.2497 | 0.8314 | 0.8270 | 0.8296 | 0.8305 | 0.8198 | -| 0.3876 | 8750 | - | 6.3480 | 10.3990 | 0.8346 | 0.8309 | 0.8334 | 0.8339 | 0.8230 | -| 0.3987 | 9000 | 6.4285 | 6.3466 | 10.1777 | 0.8357 | 0.8312 | 0.8340 | 0.8347 | 0.8227 | -| 0.4098 | 9250 | - | 6.3256 | 10.2523 | 0.8381 | 0.8340 | 0.8368 | 0.8370 | 0.8253 | -| 0.4209 | 9500 | 6.4325 | 6.3792 | 10.3296 | 0.8365 | 0.8320 | 0.8357 | 0.8355 | 0.8253 | -| 0.4319 | 9750 | - | 6.3483 | 10.2153 | 0.8376 | 0.8310 | 0.8362 | 0.8366 | 0.8239 | -| 0.4430 | 10000 | 6.2276 | 6.3036 | 10.2832 | 0.8357 | 0.8312 | 0.8344 | 0.8348 | 0.8236 | -| 0.4541 | 10250 | - | 6.3055 | 10.2404 | 0.8367 | 0.8318 | 0.8360 | 0.8361 | 0.8222 | -| 0.4652 | 10500 | 6.6378 | 6.2523 | 10.2437 | 0.8343 | 0.8305 | 0.8342 | 0.8336 | 0.8224 | -| 0.4762 | 10750 | - | 6.3410 | 10.2337 | 0.8344 | 0.8307 | 0.8340 | 0.8337 | 0.8211 | -| 0.4873 | 11000 | 6.4274 | 6.2573 | 10.1813 | 0.8347 | 0.8313 | 0.8342 | 0.8343 | 0.8204 | -| 0.4984 | 11250 | - | 6.2849 | 10.2498 | 0.8371 | 0.8335 | 0.8371 | 0.8366 | 0.8227 | -| 0.5095 | 11500 | 6.225 | 6.2989 | 10.3207 | 0.8367 | 0.8315 | 0.8359 | 0.8363 | 0.8210 | -| 0.5205 | 11750 | - | 6.2877 | 10.2188 | 0.8353 | 0.8320 | 0.8354 | 0.8346 | 0.8194 | -| 0.5316 | 12000 | 6.2047 | 6.2832 | 10.2869 | 0.8366 | 0.8324 | 0.8360 | 0.8362 | 0.8209 | -| 0.5427 | 12250 | - | 6.2881 | 10.3640 | 0.8369 | 0.8327 | 0.8360 | 0.8364 | 0.8211 | -| 0.5538 | 12500 | 6.0971 | 6.2958 | 10.3717 | 0.8361 | 0.8321 | 0.8347 | 0.8357 | 0.8206 | -| 0.5648 | 12750 | - | 6.2646 | 10.2213 | 0.8358 | 0.8326 | 0.8354 | 0.8354 | 0.8214 | -| 0.5759 | 13000 | 6.3137 | 6.2153 | 10.3320 | 0.8358 | 0.8320 | 0.8351 | 0.8354 | 0.8220 | -| 0.5870 | 13250 | - | 6.2665 | 10.2855 | 0.8351 | 0.8311 | 0.8342 | 0.8347 | 0.8208 | -| 0.5981 | 13500 | 6.0677 | 6.2485 | 10.3355 | 0.8392 | 0.8350 | 0.8384 | 0.8386 | 0.8256 | -| 0.6091 | 13750 | - | 6.2862 | 10.3997 | 0.8378 | 0.8330 | 0.8363 | 0.8376 | 0.8243 | -| 0.6202 | 14000 | 5.8764 | 6.2351 | 10.1755 | 0.8372 | 0.8326 | 0.8365 | 0.8369 | 0.8244 | -| 0.6313 | 14250 | - | 6.2763 | 10.1765 | 0.8343 | 0.8314 | 0.8337 | 0.8343 | 0.8219 | -| 0.6424 | 14500 | 5.9394 | 6.2387 | 10.2561 | 0.8375 | 0.8336 | 0.8365 | 0.8372 | 0.8228 | -| 0.6534 | 14750 | - | 6.2517 | 10.3297 | 0.8364 | 0.8330 | 0.8359 | 0.8366 | 0.8231 | -| 0.6645 | 15000 | 5.6786 | 6.2833 | 10.3062 | 0.8368 | 0.8325 | 0.8359 | 0.8366 | 0.8217 | -| 0.6756 | 15250 | - | 6.3199 | 10.3065 | 0.8377 | 0.8331 | 0.8365 | 0.8373 | 0.8223 | -| 0.6867 | 15500 | 6.1093 | 6.2769 | 10.3470 | 0.8370 | 0.8334 | 0.8363 | 0.8368 | 0.8219 | -| 0.6977 | 15750 | - | 6.2986 | 10.2187 | 0.8386 | 0.8341 | 0.8370 | 0.8383 | 0.8231 | -| 0.7088 | 16000 | 6.2673 | 6.2542 | 10.3047 | 0.8381 | 0.8348 | 0.8372 | 0.8378 | 0.8225 | -| 0.7199 | 16250 | - | 6.2422 | 10.2005 | 0.8370 | 0.8334 | 0.8359 | 0.8365 | 0.8219 | -| 0.7310 | 16500 | 5.9434 | 6.2799 | 10.3385 | 0.8404 | 0.8357 | 0.8388 | 0.8398 | 0.8252 | -| 0.7420 | 16750 | - | 6.2538 | 10.3160 | 0.8384 | 0.8346 | 0.8369 | 0.8379 | 0.8248 | -| 0.7531 | 17000 | 5.9462 | 6.2650 | 10.2309 | 0.8384 | 0.8348 | 0.8373 | 0.8378 | 0.8251 | -| 0.7642 | 17250 | - | 6.2432 | 10.2839 | 0.8390 | 0.8348 | 0.8376 | 0.8387 | 0.8247 | -| 0.7753 | 17500 | 5.7999 | 6.2306 | 10.2509 | 0.8381 | 0.8343 | 0.8370 | 0.8376 | 0.8253 | -| 0.7863 | 17750 | - | 6.2274 | 10.3353 | 0.8387 | 0.8346 | 0.8373 | 0.8380 | 0.8247 | -| 0.7974 | 18000 | 6.1981 | 6.2429 | 10.2562 | 0.8388 | 0.8351 | 0.8378 | 0.8383 | 0.8259 | -| 0.8085 | 18250 | - | 6.2782 | 10.2582 | 0.8374 | 0.8324 | 0.8355 | 0.8367 | 0.8225 | -| 0.8196 | 18500 | 5.9797 | 6.2686 | 10.3103 | 0.8388 | 0.8341 | 0.8373 | 0.8384 | 0.8231 | -| 0.8306 | 18750 | - | 6.2913 | 10.2339 | 0.8386 | 0.8345 | 0.8367 | 0.8380 | 0.8233 | -| 0.8417 | 19000 | 6.1658 | 6.2897 | 10.3179 | 0.8375 | 0.8335 | 0.8356 | 0.8372 | 0.8231 | -| 0.8528 | 19250 | - | 6.2447 | 10.3506 | 0.8388 | 0.8352 | 0.8371 | 0.8383 | 0.8256 | -| 0.8639 | 19500 | 6.1131 | 6.2416 | 10.3443 | 0.8382 | 0.8339 | 0.8367 | 0.8380 | 0.8238 | -| 0.8749 | 19750 | - | 6.2145 | 10.3473 | 0.8379 | 0.8332 | 0.8357 | 0.8374 | 0.8240 | -| 0.8860 | 20000 | 6.1083 | 6.2246 | 10.4573 | 0.8404 | 0.8367 | 0.8386 | 0.8400 | 0.8265 | -| 0.8971 | 20250 | - | 6.2439 | 10.3609 | 0.8392 | 0.8347 | 0.8373 | 0.8389 | 0.8240 | -| 0.9082 | 20500 | 6.0657 | 6.2278 | 10.3957 | 0.8377 | 0.8336 | 0.8359 | 0.8373 | 0.8236 | -| 0.9192 | 20750 | - | 6.2022 | 10.3552 | 0.8380 | 0.8344 | 0.8366 | 0.8374 | 0.8236 | -| 0.9303 | 21000 | 5.9456 | 6.2159 | 10.4432 | 0.8377 | 0.8341 | 0.8358 | 0.8371 | 0.8229 | -| 0.9414 | 21250 | - | 6.1910 | 10.3497 | 0.8401 | 0.8359 | 0.8384 | 0.8394 | 0.8250 | -| 0.9525 | 21500 | 5.847 | 6.1918 | 10.3565 | 0.8370 | 0.8337 | 0.8353 | 0.8363 | 0.8237 | -| 0.9635 | 21750 | - | 6.1766 | 10.3773 | 0.8396 | 0.8360 | 0.8384 | 0.8390 | 0.8259 | -| 0.9746 | 22000 | 6.0172 | 6.1944 | 10.3657 | 0.8399 | 0.8357 | 0.8382 | 0.8394 | 0.8261 | -| 0.9857 | 22250 | - | 6.1842 | 10.4263 | 0.8402 | 0.8352 | 0.8390 | 0.8395 | 0.8259 | -| 0.9968 | 22500 | 5.8241 | 6.1852 | 10.3078 | 0.8389 | 0.8348 | 0.8381 | 0.8386 | 0.8255 | -| 1.0078 | 22750 | - | 6.1843 | 10.3573 | 0.8382 | 0.8339 | 0.8369 | 0.8377 | 0.8254 | -| 1.0189 | 23000 | 5.9069 | 6.1715 | 10.3335 | 0.8399 | 0.8358 | 0.8383 | 0.8395 | 0.8261 | -| 1.0300 | 23250 | - | 6.1612 | 10.3623 | 0.8389 | 0.8338 | 0.8374 | 0.8384 | 0.8254 | -| 1.0411 | 23500 | 5.7351 | 6.1785 | 10.3616 | 0.8390 | 0.8343 | 0.8369 | 0.8384 | 0.8254 | -| 1.0521 | 23750 | - | 6.1766 | 10.4426 | 0.8390 | 0.8336 | 0.8368 | 0.8382 | 0.8249 | -| 1.0632 | 24000 | 5.5553 | 6.1539 | 10.4702 | 0.8398 | 0.8343 | 0.8379 | 0.8393 | 0.8264 | -| 1.0743 | 24250 | - | 6.1759 | 10.4107 | 0.8388 | 0.8338 | 0.8372 | 0.8386 | 0.8253 | -| 1.0854 | 24500 | 5.7973 | 6.1994 | 10.3729 | 0.8376 | 0.8323 | 0.8362 | 0.8371 | 0.8240 | -| 1.0964 | 24750 | - | 6.1884 | 10.4265 | 0.8387 | 0.8339 | 0.8373 | 0.8382 | 0.8261 | -| 1.1075 | 25000 | 5.4409 | 6.1684 | 10.4698 | 0.8400 | 0.8351 | 0.8385 | 0.8394 | 0.8263 | -| 1.1186 | 25250 | - | 6.1650 | 10.4908 | 0.8397 | 0.8340 | 0.8380 | 0.8388 | 0.8261 | -| 1.1297 | 25500 | 5.3584 | 6.1547 | 10.4694 | 0.8399 | 0.8346 | 0.8381 | 0.8392 | 0.8259 | -| 1.1407 | 25750 | - | 6.2010 | 10.5110 | 0.8385 | 0.8344 | 0.8372 | 0.8379 | 0.8266 | -| 1.1518 | 26000 | 5.8368 | 6.2242 | 10.5004 | 0.8372 | 0.8325 | 0.8354 | 0.8365 | 0.8235 | -| 1.1629 | 26250 | - | 6.1876 | 10.4605 | 0.8389 | 0.8344 | 0.8374 | 0.8384 | 0.8269 | -| 1.1740 | 26500 | 5.9622 | 6.1911 | 10.4587 | 0.8393 | 0.8353 | 0.8378 | 0.8387 | 0.8269 | -| 1.1850 | 26750 | - | 6.1746 | 10.5848 | 0.8397 | 0.8354 | 0.8380 | 0.8389 | 0.8270 | -| 1.1961 | 27000 | 5.5691 | 6.2476 | 10.4958 | 0.8382 | 0.8346 | 0.8367 | 0.8375 | 0.8266 | -| 1.2072 | 27250 | - | 6.2107 | 10.4690 | 0.8382 | 0.8342 | 0.8365 | 0.8377 | 0.8270 | -| 1.2183 | 27500 | 5.6493 | 6.2484 | 10.5176 | 0.8374 | 0.8332 | 0.8351 | 0.8368 | 0.8264 | -| 1.2293 | 27750 | - | 6.2320 | 10.5484 | 0.8379 | 0.8339 | 0.8362 | 0.8375 | 0.8270 | -| 1.2404 | 28000 | 5.6868 | 6.1667 | 10.5633 | 0.8398 | 0.8355 | 0.8377 | 0.8390 | 0.8273 | -| 1.2515 | 28250 | - | 6.1830 | 10.5349 | 0.8390 | 0.8346 | 0.8365 | 0.8384 | 0.8262 | -| 1.2626 | 28500 | 5.6246 | 6.1708 | 10.4139 | 0.8383 | 0.8336 | 0.8360 | 0.8377 | 0.8267 | -| 1.2736 | 28750 | - | 6.1475 | 10.5089 | 0.8394 | 0.8351 | 0.8370 | 0.8388 | 0.8278 | -| 1.2847 | 29000 | 5.8284 | 6.1499 | 10.4673 | 0.8415 | 0.8378 | 0.8395 | 0.8409 | 0.8303 | -| 1.2958 | 29250 | - | 6.1569 | 10.5153 | 0.8405 | 0.8355 | 0.8382 | 0.8398 | 0.8281 | -| 1.3069 | 29500 | 5.7342 | 6.1392 | 10.4570 | 0.8402 | 0.8363 | 0.8383 | 0.8396 | 0.8275 | -| 1.3179 | 29750 | - | 6.1423 | 10.5317 | 0.8386 | 0.8337 | 0.8368 | 0.8382 | 0.8256 | -| 1.3290 | 30000 | 5.7179 | 6.1855 | 10.5171 | 0.8403 | 0.8354 | 0.8383 | 0.8399 | 0.8285 | -| 1.3401 | 30250 | - | 6.1828 | 10.4912 | 0.8383 | 0.8333 | 0.8366 | 0.8376 | 0.8254 | -| 1.3512 | 30500 | 5.2595 | 6.1808 | 10.5231 | 0.8387 | 0.8333 | 0.8364 | 0.8379 | 0.8260 | -| 1.3622 | 30750 | - | 6.2069 | 10.5407 | 0.8388 | 0.8334 | 0.8366 | 0.8381 | 0.8261 | -| 1.3733 | 31000 | 5.7271 | 6.1302 | 10.5953 | 0.8392 | 0.8336 | 0.8370 | 0.8386 | 0.8271 | -| 1.3844 | 31250 | - | 6.1720 | 10.6090 | 0.8390 | 0.8341 | 0.8367 | 0.8381 | 0.8258 | -| 1.3955 | 31500 | 5.6218 | 6.1746 | 10.5158 | 0.8373 | 0.8327 | 0.8353 | 0.8366 | 0.8252 | -| 1.4065 | 31750 | - | 6.1499 | 10.5364 | 0.8403 | 0.8347 | 0.8384 | 0.8394 | 0.8266 | -| 1.4176 | 32000 | 5.7631 | 6.2063 | 10.5460 | 0.8381 | 0.8331 | 0.8362 | 0.8372 | 0.8249 | -| 1.4287 | 32250 | - | 6.1868 | 10.5440 | 0.8390 | 0.8345 | 0.8377 | 0.8383 | 0.8270 | -| 1.4398 | 32500 | 5.4005 | 6.1774 | 10.5254 | 0.8397 | 0.8344 | 0.8381 | 0.8389 | 0.8265 | -| 1.4508 | 32750 | - | 6.1585 | 10.6074 | 0.8405 | 0.8348 | 0.8384 | 0.8395 | 0.8265 | -| 1.4619 | 33000 | 5.8908 | 6.1445 | 10.5644 | 0.8391 | 0.8338 | 0.8373 | 0.8384 | 0.8259 | -| 1.4730 | 33250 | - | 6.1387 | 10.5574 | 0.8393 | 0.8340 | 0.8378 | 0.8385 | 0.8250 | -| 1.4841 | 33500 | 5.6176 | 6.1424 | 10.5612 | 0.8381 | 0.8334 | 0.8364 | 0.8372 | 0.8250 | -| 1.4951 | 33750 | - | 6.1447 | 10.5670 | 0.8382 | 0.8334 | 0.8366 | 0.8372 | 0.8249 | -| 1.5062 | 34000 | 5.5828 | 6.1719 | 10.5706 | 0.8404 | 0.8356 | 0.8391 | 0.8397 | 0.8263 | -| 1.5173 | 34250 | - | 6.1907 | 10.5121 | 0.8392 | 0.8346 | 0.8376 | 0.8385 | 0.8247 | -| 1.5284 | 34500 | 5.4422 | 6.1765 | 10.5447 | 0.8400 | 0.8351 | 0.8386 | 0.8394 | 0.8251 | -| 1.5394 | 34750 | - | 6.2061 | 10.5390 | 0.8393 | 0.8346 | 0.8377 | 0.8388 | 0.8250 | -| 1.5505 | 35000 | 5.4882 | 6.1762 | 10.6035 | 0.8391 | 0.8339 | 0.8370 | 0.8385 | 0.8240 | -| 1.5616 | 35250 | - | 6.2041 | 10.5827 | 0.8400 | 0.8348 | 0.8382 | 0.8394 | 0.8240 | -| 1.5727 | 35500 | 5.5079 | 6.1635 | 10.5512 | 0.8385 | 0.8341 | 0.8370 | 0.8381 | 0.8241 | -| 1.5838 | 35750 | - | 6.1614 | 10.5011 | 0.8380 | 0.8328 | 0.8361 | 0.8374 | 0.8229 | -| 1.5948 | 36000 | 5.5339 | 6.1668 | 10.5051 | 0.8384 | 0.8341 | 0.8368 | 0.8376 | 0.8245 | -| 1.6059 | 36250 | - | 6.1807 | 10.6138 | 0.8388 | 0.8338 | 0.8369 | 0.8383 | 0.8247 | -| 1.6170 | 36500 | 5.2415 | 6.1481 | 10.5237 | 0.8396 | 0.8347 | 0.8379 | 0.8392 | 0.8256 | -| 1.6281 | 36750 | - | 6.1373 | 10.4843 | 0.8390 | 0.8343 | 0.8375 | 0.8385 | 0.8251 | -| 1.6391 | 37000 | 5.2894 | 6.1462 | 10.5398 | 0.8391 | 0.8345 | 0.8376 | 0.8386 | 0.8249 | -| 1.6502 | 37250 | - | 6.1953 | 10.5063 | 0.8385 | 0.8341 | 0.8369 | 0.8380 | 0.8245 | -| 1.6613 | 37500 | 5.1845 | 6.1740 | 10.5670 | 0.8386 | 0.8342 | 0.8371 | 0.8383 | 0.8245 | -| 1.6724 | 37750 | - | 6.1945 | 10.5550 | 0.8390 | 0.8345 | 0.8372 | 0.8385 | 0.8244 | -| 1.6834 | 38000 | 5.4936 | 6.1875 | 10.5304 | 0.8389 | 0.8343 | 0.8373 | 0.8383 | 0.8238 | -| 1.6945 | 38250 | - | 6.2066 | 10.4893 | 0.8385 | 0.8340 | 0.8369 | 0.8382 | 0.8236 | -| 1.7056 | 38500 | 5.6004 | 6.1983 | 10.5106 | 0.8387 | 0.8346 | 0.8373 | 0.8384 | 0.8241 | -| 1.7167 | 38750 | - | 6.1578 | 10.5475 | 0.8394 | 0.8351 | 0.8376 | 0.8388 | 0.8244 | -| 1.7277 | 39000 | 5.5001 | 6.2003 | 10.4501 | 0.8381 | 0.8337 | 0.8366 | 0.8375 | 0.8239 | -| 1.7388 | 39250 | - | 6.1990 | 10.5068 | 0.8393 | 0.8345 | 0.8372 | 0.8385 | 0.8250 | -| 1.7499 | 39500 | 5.4752 | 6.1837 | 10.4946 | 0.8391 | 0.8345 | 0.8376 | 0.8386 | 0.8247 | -| 1.7610 | 39750 | - | 6.1724 | 10.5385 | 0.8409 | 0.8364 | 0.8389 | 0.8402 | 0.8264 | -| 1.7720 | 40000 | 4.9596 | 6.1814 | 10.4667 | 0.8390 | 0.8347 | 0.8372 | 0.8383 | 0.8251 | -| 1.7831 | 40250 | - | 6.1890 | 10.4678 | 0.8379 | 0.8332 | 0.8359 | 0.8373 | 0.8237 | -| 1.7942 | 40500 | 5.728 | 6.1968 | 10.4825 | 0.8380 | 0.8336 | 0.8363 | 0.8375 | 0.8248 | -| 1.8053 | 40750 | - | 6.1927 | 10.5030 | 0.8399 | 0.8354 | 0.8379 | 0.8394 | 0.8254 | -| 1.8163 | 41000 | 5.5311 | 6.1913 | 10.4360 | 0.8394 | 0.8346 | 0.8375 | 0.8389 | 0.8249 | -| 1.8274 | 41250 | - | 6.2222 | 10.5528 | 0.8396 | 0.8348 | 0.8376 | 0.8391 | 0.8246 | -| 1.8385 | 41500 | 5.4588 | 6.2265 | 10.4623 | 0.8386 | 0.8342 | 0.8367 | 0.8381 | 0.8245 | -| 1.8496 | 41750 | - | 6.1881 | 10.5119 | 0.8390 | 0.8348 | 0.8372 | 0.8385 | 0.8256 | -| 1.8606 | 42000 | 5.6759 | 6.2088 | 10.5318 | 0.8387 | 0.8342 | 0.8366 | 0.8381 | 0.8250 | -| 1.8717 | 42250 | - | 6.1824 | 10.5468 | 0.8393 | 0.8352 | 0.8373 | 0.8389 | 0.8263 | -| 1.8828 | 42500 | 5.5916 | 6.1802 | 10.5495 | 0.8392 | 0.8346 | 0.8370 | 0.8388 | 0.8254 | -| 1.8939 | 42750 | - | 6.2055 | 10.5538 | 0.8390 | 0.8345 | 0.8369 | 0.8385 | 0.8250 | -| 1.9049 | 43000 | 5.545 | 6.1772 | 10.5902 | 0.8395 | 0.8354 | 0.8376 | 0.8390 | 0.8259 | -| 1.9160 | 43250 | - | 6.1812 | 10.5770 | 0.8392 | 0.8343 | 0.8369 | 0.8384 | 0.8254 | -| 1.9271 | 43500 | 5.4838 | 6.1927 | 10.5240 | 0.8396 | 0.8351 | 0.8375 | 0.8388 | 0.8252 | -| 1.9382 | 43750 | - | 6.1763 | 10.5798 | 0.8397 | 0.8356 | 0.8376 | 0.8390 | 0.8255 | -| 1.9492 | 44000 | 5.4396 | 6.1614 | 10.5465 | 0.8399 | 0.8357 | 0.8381 | 0.8391 | 0.8265 | -| 1.9603 | 44250 | - | 6.1659 | 10.5451 | 0.8398 | 0.8350 | 0.8376 | 0.8390 | 0.8254 | -| 1.9714 | 44500 | 5.4562 | 6.1601 | 10.5673 | 0.8407 | 0.8361 | 0.8390 | 0.8402 | 0.8266 | -| 1.9825 | 44750 | - | 6.1767 | 10.5592 | 0.8397 | 0.8349 | 0.8377 | 0.8389 | 0.8250 | -| 1.9935 | 45000 | 5.3376 | 6.1634 | 10.5610 | 0.8388 | 0.8340 | 0.8368 | 0.8380 | 0.8253 | -| 2.0046 | 45250 | - | 6.1679 | 10.5099 | 0.8392 | 0.8349 | 0.8377 | 0.8385 | 0.8261 | -| 2.0157 | 45500 | 5.6151 | 6.1376 | 10.5323 | 0.8390 | 0.8349 | 0.8372 | 0.8384 | 0.8263 | -| 2.0268 | 45750 | - | 6.1331 | 10.5563 | 0.8398 | 0.8349 | 0.8381 | 0.8391 | 0.8255 | -| 2.0378 | 46000 | 5.2304 | 6.1532 | 10.5037 | 0.8392 | 0.8344 | 0.8371 | 0.8385 | 0.8253 | -| 2.0489 | 46250 | - | 6.1473 | 10.5770 | 0.8393 | 0.8346 | 0.8371 | 0.8385 | 0.8252 | -| 2.0600 | 46500 | 5.2302 | 6.1273 | 10.5759 | 0.8398 | 0.8345 | 0.8374 | 0.8390 | 0.8260 | -| 2.0711 | 46750 | - | 6.1555 | 10.5763 | 0.8396 | 0.8344 | 0.8371 | 0.8387 | 0.8255 | -| 2.0821 | 47000 | 5.2761 | 6.1623 | 10.5645 | 0.8393 | 0.8339 | 0.8371 | 0.8386 | 0.8252 | -| 2.0932 | 47250 | - | 6.1632 | 10.5292 | 0.8389 | 0.8338 | 0.8368 | 0.8381 | 0.8257 | -| 2.1043 | 47500 | 5.2959 | 6.1410 | 10.5922 | 0.8402 | 0.8351 | 0.8380 | 0.8396 | 0.8264 | -| 2.1154 | 47750 | - | 6.1510 | 10.6167 | 0.8400 | 0.8347 | 0.8375 | 0.8390 | 0.8263 | -| 2.1264 | 48000 | 4.9302 | 6.1376 | 10.5731 | 0.8404 | 0.8351 | 0.8383 | 0.8396 | 0.8271 | -| 2.1375 | 48250 | - | 6.1582 | 10.6687 | 0.8396 | 0.8349 | 0.8375 | 0.8390 | 0.8267 | -| 2.1486 | 48500 | 5.414 | 6.1680 | 10.5798 | 0.8386 | 0.8341 | 0.8367 | 0.8380 | 0.8256 | -| 2.1597 | 48750 | - | 6.1564 | 10.6162 | 0.8386 | 0.8336 | 0.8365 | 0.8379 | 0.8257 | -| 2.1707 | 49000 | 5.3711 | 6.1731 | 10.5879 | 0.8393 | 0.8344 | 0.8373 | 0.8386 | 0.8264 | -| 2.1818 | 49250 | - | 6.1668 | 10.6035 | 0.8400 | 0.8351 | 0.8378 | 0.8390 | 0.8271 | -| 2.1929 | 49500 | 5.4593 | 6.2015 | 10.6357 | 0.8386 | 0.8337 | 0.8364 | 0.8377 | 0.8253 | -| 2.2040 | 49750 | - | 6.1767 | 10.6484 | 0.8388 | 0.8337 | 0.8364 | 0.8379 | 0.8257 | -| 2.2150 | 50000 | 5.1728 | 6.1918 | 10.6098 | 0.8393 | 0.8345 | 0.8370 | 0.8384 | 0.8266 | -| 2.2261 | 50250 | - | 6.1932 | 10.6157 | 0.8389 | 0.8339 | 0.8365 | 0.8381 | 0.8265 | -| 2.2372 | 50500 | 5.4751 | 6.1709 | 10.6850 | 0.8397 | 0.8345 | 0.8373 | 0.8389 | 0.8266 | -| 2.2483 | 50750 | - | 6.1526 | 10.6373 | 0.8394 | 0.8347 | 0.8371 | 0.8388 | 0.8265 | -| 2.2593 | 51000 | 5.2245 | 6.1406 | 10.5769 | 0.8402 | 0.8353 | 0.8379 | 0.8395 | 0.8278 | -| 2.2704 | 51250 | - | 6.1494 | 10.5849 | 0.8395 | 0.8348 | 0.8370 | 0.8387 | 0.8274 | -| 2.2815 | 51500 | 5.4315 | 6.1443 | 10.6092 | 0.8399 | 0.8348 | 0.8372 | 0.8391 | 0.8271 | -| 2.2926 | 51750 | - | 6.1425 | 10.6344 | 0.8406 | 0.8353 | 0.8378 | 0.8396 | 0.8268 | -| 2.3036 | 52000 | 5.4402 | 6.1439 | 10.6235 | 0.8401 | 0.8350 | 0.8374 | 0.8391 | 0.8268 | -| 2.3147 | 52250 | - | 6.1489 | 10.6129 | 0.8395 | 0.8344 | 0.8368 | 0.8385 | 0.8260 | -| 2.3258 | 52500 | 5.3059 | 6.1530 | 10.5902 | 0.8398 | 0.8348 | 0.8374 | 0.8390 | 0.8267 | -| 2.3369 | 52750 | - | 6.1629 | 10.6417 | 0.8393 | 0.8345 | 0.8369 | 0.8385 | 0.8264 | -| 2.3479 | 53000 | 5.03 | 6.1630 | 10.5881 | 0.8389 | 0.8338 | 0.8363 | 0.8382 | 0.8260 | -| 2.3590 | 53250 | - | 6.1797 | 10.6016 | 0.8389 | 0.8336 | 0.8363 | 0.8380 | 0.8258 | -| 2.3701 | 53500 | 5.2762 | 6.1476 | 10.6432 | 0.8394 | 0.8341 | 0.8369 | 0.8388 | 0.8264 | -| 2.3812 | 53750 | - | 6.1510 | 10.6257 | 0.8391 | 0.8341 | 0.8367 | 0.8383 | 0.8266 | -| 2.3922 | 54000 | 5.3996 | 6.1548 | 10.6080 | 0.8392 | 0.8340 | 0.8366 | 0.8382 | 0.8269 | -| 2.4033 | 54250 | - | 6.1542 | 10.6725 | 0.8393 | 0.8340 | 0.8367 | 0.8382 | 0.8260 | -| 2.4144 | 54500 | 5.3477 | 6.1601 | 10.6436 | 0.8395 | 0.8344 | 0.8373 | 0.8387 | 0.8267 | -| 2.4255 | 54750 | - | 6.1712 | 10.6230 | 0.8393 | 0.8343 | 0.8370 | 0.8385 | 0.8264 | -| 2.4365 | 55000 | 5.1706 | 6.1744 | 10.6099 | 0.8394 | 0.8339 | 0.8369 | 0.8386 | 0.8261 | -| 2.4476 | 55250 | - | 6.1521 | 10.6314 | 0.8402 | 0.8348 | 0.8377 | 0.8391 | 0.8271 | -| 2.4587 | 55500 | 5.4692 | 6.1470 | 10.6595 | 0.8404 | 0.8350 | 0.8379 | 0.8393 | 0.8270 | -| 2.4698 | 55750 | - | 6.1338 | 10.6269 | 0.8393 | 0.8342 | 0.8371 | 0.8384 | 0.8261 | -| 2.4808 | 56000 | 5.3056 | 6.1456 | 10.6481 | 0.8388 | 0.8334 | 0.8364 | 0.8377 | 0.8255 | -| 2.4919 | 56250 | - | 6.1403 | 10.6158 | 0.8390 | 0.8338 | 0.8367 | 0.8380 | 0.8257 | -| 2.5030 | 56500 | 5.2841 | 6.1547 | 10.6582 | 0.8390 | 0.8337 | 0.8367 | 0.8380 | 0.8255 | -| 2.5141 | 56750 | - | 6.1556 | 10.6549 | 0.8394 | 0.8341 | 0.8370 | 0.8384 | 0.8258 | -| 2.5251 | 57000 | 5.2041 | 6.1559 | 10.6457 | 0.8395 | 0.8345 | 0.8373 | 0.8385 | 0.8258 | -| 2.5362 | 57250 | - | 6.1577 | 10.6402 | 0.8394 | 0.8342 | 0.8371 | 0.8384 | 0.8257 | -| 2.5473 | 57500 | 5.0816 | 6.1535 | 10.6806 | 0.8400 | 0.8349 | 0.8377 | 0.8392 | 0.8263 | -| 2.5584 | 57750 | - | 6.1585 | 10.6697 | 0.8399 | 0.8347 | 0.8377 | 0.8390 | 0.8258 | -| 2.5694 | 58000 | 5.1907 | 6.1657 | 10.6445 | 0.8397 | 0.8348 | 0.8376 | 0.8388 | 0.8259 | -| 2.5805 | 58250 | - | 6.1512 | 10.6668 | 0.8395 | 0.8344 | 0.8372 | 0.8386 | 0.8255 | -| 2.5916 | 58500 | 5.3302 | 6.1640 | 10.6342 | 0.8388 | 0.8339 | 0.8364 | 0.8380 | 0.8250 | -| 2.6027 | 58750 | - | 6.1583 | 10.6381 | 0.8393 | 0.8342 | 0.8370 | 0.8385 | 0.8253 | -| 2.6137 | 59000 | 4.9311 | 6.1381 | 10.6685 | 0.8397 | 0.8347 | 0.8374 | 0.8388 | 0.8262 | -| 2.6248 | 59250 | - | 6.1465 | 10.6300 | 0.8394 | 0.8343 | 0.8371 | 0.8385 | 0.8259 | -| 2.6359 | 59500 | 4.9207 | 6.1440 | 10.6577 | 0.8389 | 0.8341 | 0.8367 | 0.8382 | 0.8253 | -| 2.6470 | 59750 | - | 6.1451 | 10.6594 | 0.8392 | 0.8340 | 0.8370 | 0.8385 | 0.8253 | -| 2.6580 | 60000 | 4.8968 | 6.1546 | 10.6325 | 0.8396 | 0.8344 | 0.8374 | 0.8388 | 0.8257 | -| 2.6691 | 60250 | - | 6.1565 | 10.6669 | 0.8396 | 0.8345 | 0.8374 | 0.8388 | 0.8262 | -| 2.6802 | 60500 | 5.1467 | 6.1706 | 10.6671 | 0.8392 | 0.8341 | 0.8371 | 0.8384 | 0.8254 | -| 2.6913 | 60750 | - | 6.1614 | 10.6516 | 0.8394 | 0.8345 | 0.8371 | 0.8385 | 0.8257 | -| 2.7023 | 61000 | 5.2112 | 6.1510 | 10.6544 | 0.8400 | 0.8349 | 0.8378 | 0.8392 | 0.8261 | -| 2.7134 | 61250 | - | 6.1570 | 10.6674 | 0.8393 | 0.8342 | 0.8371 | 0.8384 | 0.8256 | -| 2.7245 | 61500 | 5.3295 | 6.1543 | 10.6219 | 0.8394 | 0.8347 | 0.8374 | 0.8387 | 0.8257 | -| 2.7356 | 61750 | - | 6.1559 | 10.6441 | 0.8394 | 0.8344 | 0.8372 | 0.8385 | 0.8257 | -| 2.7466 | 62000 | 5.2325 | 6.1590 | 10.6511 | 0.8394 | 0.8344 | 0.8370 | 0.8385 | 0.8257 | -| 2.7577 | 62250 | - | 6.1589 | 10.6223 | 0.8396 | 0.8347 | 0.8373 | 0.8388 | 0.8261 | -| 2.7688 | 62500 | 4.7465 | 6.1546 | 10.6706 | 0.8396 | 0.8348 | 0.8375 | 0.8389 | 0.8260 | -| 2.7799 | 62750 | - | 6.1576 | 10.6073 | 0.8395 | 0.8346 | 0.8373 | 0.8388 | 0.8261 | -| 2.7909 | 63000 | 5.1445 | 6.1613 | 10.6317 | 0.8392 | 0.8343 | 0.8370 | 0.8384 | 0.8257 | -| 2.8020 | 63250 | - | 6.1619 | 10.6241 | 0.8392 | 0.8343 | 0.8369 | 0.8384 | 0.8255 | -| 2.8131 | 63500 | 5.4504 | 6.1602 | 10.6056 | 0.8391 | 0.8341 | 0.8368 | 0.8383 | 0.8254 | -| 2.8242 | 63750 | - | 6.1664 | 10.6452 | 0.8392 | 0.8340 | 0.8368 | 0.8383 | 0.8251 | -| 2.8352 | 64000 | 5.1417 | 6.1701 | 10.6432 | 0.8390 | 0.8342 | 0.8366 | 0.8382 | 0.8250 | -| 2.8463 | 64250 | - | 6.1631 | 10.6367 | 0.8390 | 0.8342 | 0.8368 | 0.8384 | 0.8252 | -| 2.8574 | 64500 | 5.3774 | 6.1674 | 10.6292 | 0.8390 | 0.8343 | 0.8368 | 0.8382 | 0.8253 | -| 2.8685 | 64750 | - | 6.1572 | 10.6353 | 0.8393 | 0.8345 | 0.8371 | 0.8385 | 0.8258 | -| 2.8795 | 65000 | 5.1903 | 6.1596 | 10.6405 | 0.8390 | 0.8342 | 0.8366 | 0.8383 | 0.8254 | -| 2.8906 | 65250 | - | 6.1654 | 10.6502 | 0.8390 | 0.8341 | 0.8367 | 0.8383 | 0.8253 | -| 2.9017 | 65500 | 5.2905 | 6.1624 | 10.6473 | 0.8391 | 0.8342 | 0.8368 | 0.8383 | 0.8253 | -| 2.9128 | 65750 | - | 6.1598 | 10.6436 | 0.8392 | 0.8342 | 0.8369 | 0.8383 | 0.8255 | -| 2.9238 | 66000 | 5.2441 | 6.1604 | 10.6527 | 0.8392 | 0.8342 | 0.8368 | 0.8383 | 0.8254 | -| 2.9349 | 66250 | - | 6.1606 | 10.6449 | 0.8393 | 0.8343 | 0.8369 | 0.8384 | 0.8254 | -| 2.9460 | 66500 | 5.3131 | 6.1585 | 10.6427 | 0.8392 | 0.8343 | 0.8370 | 0.8384 | 0.8255 | -| 2.9571 | 66750 | - | 6.1585 | 10.6419 | 0.8392 | 0.8344 | 0.8370 | 0.8384 | 0.8256 | -| 2.9681 | 67000 | 5.1176 | 6.1566 | 10.6439 | 0.8393 | 0.8344 | 0.8370 | 0.8384 | 0.8254 | -| 2.9792 | 67250 | - | 6.1581 | 10.6403 | 0.8392 | 0.8343 | 0.8369 | 0.8384 | 0.8253 | -| 2.9903 | 67500 | 5.1405 | 6.1582 | 10.6427 | 0.8392 | 0.8344 | 0.8369 | 0.8384 | 0.8254 | +## Extracting Token Embeddings -
+To extract token embeddings from a model, ensure you have agreed to the user agreement and logged in using the Hugging Face CLI (for llama3 models). You can then use the following snippet: -### Framework Versions -- Python: 3.9.5 -- Sentence Transformers: 3.0.1 -- Transformers: 4.43.1 -- PyTorch: 2.3.1+cu121 -- Accelerate: 0.33.0 -- Datasets: 2.20.0 -- Tokenizers: 0.19.1 +```python +from wordllama.extract import extract_safetensors -## Citation +# Extract embeddings for the specified configuration +extract_safetensors("llama3_70B", "path/to/saved/model-0001-of-00XX.safetensors") +``` -### BibTeX +HINT: Embeddings are usually in the first safetensors file, but not always. Sometimes there is a manifest, sometimes you have to snoop around and figure it out. -#### Sentence Transformers -```bibtex -@inproceedings{reimers-2019-sentence-bert, - title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks", - author = "Reimers, Nils and Gurevych, Iryna", - booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing", - month = "11", - year = "2019", - publisher = "Association for Computational Linguistics", - url = "https://arxiv.org/abs/1908.10084", -} +For training, use the scripts in the github repo. You have to add a configuration file (copy/modify an existing one into the folder). ``` - -#### MatryoshkaLoss -```bibtex -@misc{kusupati2024matryoshka, - title={Matryoshka Representation Learning}, - author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi}, - year={2024}, - eprint={2205.13147}, - archivePrefix={arXiv}, - primaryClass={cs.LG} -} +$ pip install wordllama[train] +$ python train.py train --config your_new_config +(training stuff happens) +$ python train.py save --config your_new_config --checkpoint ... --outdir /path/to/weights/ +(saves 1 model per matryoshka dim) ``` -#### MultipleNegativesRankingLoss +## Citations + +If you use WordLlama in your research or project, please consider citing it as follows: + ```bibtex -@misc{henderson2017efficient, - title={Efficient Natural Language Response Suggestion for Smart Reply}, - author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil}, - year={2017}, - eprint={1705.00652}, - archivePrefix={arXiv}, - primaryClass={cs.CL} +@software{miller2024wordllama, + author = {Miller, D. Lee}, + title = {WordLlama: Recycled Token Embeddings from Large Language Models}, + year = {2024}, + url = {https://github.com/dleemiller/wordllama}, + version = {0.2.3} } ``` - - - +## License - \ No newline at end of file