{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Pipeline\n", "\n", "This is the most basic object in huggingface transformers libray. It is a one-stop object for doing everything under the hood and abstracting away a lot of the complexity away from the task at hand like `tokenization`, `preprocessing`, `postprocessing` etc." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/huggingface/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", "No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).\n", "Using a pipeline without specifying a model name and revision in production is not recommended.\n", "/home/huggingface/lib/python3.10/site-packages/huggingface_hub/file_download.py:1150: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n" ] } ], "source": [ "from transformers import pipeline\n", "classifier = pipeline(task = \"sentiment-analysis\")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "sentences = [\n", " \"I have been sleeping a lot lately. Wish I could do more and procrastinate less\",\n", " \"It is a wonderful day today\",\n", " \"What the heck, this software sucks!!\"\n", "]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'label': 'NEGATIVE', 'score': 0.9991617202758789},\n", " {'label': 'POSITIVE', 'score': 0.999890923500061},\n", " {'label': 'NEGATIVE', 'score': 0.9995805621147156}]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "classifier(sentences)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Zero Shot Classification" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "sentences = [\n", " \"Rahul Dravid was a great coach and led India to win the world cup in 2024\",\n", " \"What is a transformer? It is a black box neural network model which can be used to do stuff with sequences\",\n", " \"How can one understand the meaning of life? It is not so simple\",\n", " \"Shaun had a great insight right in the middle of a surgery\"\n", "]\n", "\n", "labels = [\"Sports\", \"Education\", \"Other\"]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "No model was supplied, defaulted to facebook/bart-large-mnli and revision c626438 (https://huggingface.co/facebook/bart-large-mnli).\n", "Using a pipeline without specifying a model name and revision in production is not recommended.\n", "/home/huggingface/lib/python3.10/site-packages/huggingface_hub/file_download.py:1150: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n" ] } ], "source": [ "classifier = pipeline(\"zero-shot-classification\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'sequence': 'Rahul Dravid was a great coach and led India to win the world cup in 2024',\n", " 'labels': ['Sports', 'Other', 'Education'],\n", " 'scores': [0.967433512210846, 0.025695420801639557, 0.006871006917208433]},\n", " {'sequence': 'What is a transformer? It is a black box neural network model which can be used to do stuff with sequences',\n", " 'labels': ['Other', 'Education', 'Sports'],\n", " 'scores': [0.776347279548645, 0.11728236079216003, 0.10637037456035614]},\n", " {'sequence': 'How can one understand the meaning of life? It is not so simple',\n", " 'labels': ['Other', 'Education', 'Sports'],\n", " 'scores': [0.8647233247756958, 0.08910410851240158, 0.046172577887773514]},\n", " {'sequence': 'Shaun had a great insight right in the middle of a surgery',\n", " 'labels': ['Other', 'Sports', 'Education'],\n", " 'scores': [0.7419394850730896, 0.18247079849243164, 0.07558975368738174]}]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "classifier(sequences = sentences, candidate_labels = labels)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Text Generation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using default model" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "No model was supplied, defaulted to gpt2 and revision 6c0e608 (https://huggingface.co/gpt2).\n", "Using a pipeline without specifying a model name and revision in production is not recommended.\n", "/home/huggingface/lib/python3.10/site-packages/huggingface_hub/file_download.py:1150: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n" ] } ], "source": [ "generator = pipeline(task = \"text-generation\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "seed_text = \"Dhoni finishes off in style and the entire Indian team\"" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/huggingface/lib/python3.10/site-packages/transformers/generation/utils.py:1201: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation)\n", " warnings.warn(\n", "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n", "/home/huggingface/lib/python3.10/site-packages/transformers/generation/utils.py:1288: UserWarning: Using `max_length`'s default (50) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.\n", " warnings.warn(\n" ] }, { "data": { "text/plain": [ "[{'generated_text': 'Dhoni finishes off in style and the entire Indian team look forward to meeting him at home to continue their efforts towards an unbeaten run in this World Cup.'}]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "generator(text_inputs = seed_text)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n" ] }, { "data": { "text/plain": [ "[{'generated_text': \"Dhoni finishes off in style and the entire Indian team is delighted with his victory\\n\\nIndia have failed to impress Pakistan's Ranji Trophy winner\"},\n", " {'generated_text': \"Dhoni finishes off in style and the entire Indian team goes to great lengths to make him comfortable. It's a very important decision for the first\"},\n", " {'generated_text': 'Dhoni finishes off in style and the entire Indian team is immediately in a good position to secure victory.\\n\\nA few weeks from now,'}]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "generator(text_inputs = seed_text, num_return_sequences = 3, max_length = 30)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using specific model from huggingface hub" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/huggingface/lib/python3.10/site-packages/huggingface_hub/file_download.py:1150: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "/home/huggingface/lib/python3.10/site-packages/transformers/generation/utils.py:1201: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation)\n", " warnings.warn(\n", "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n" ] }, { "data": { "text/plain": [ "[{'generated_text': 'Dhoni finishes off in style and the entire Indian team has their legs.\\n\\n\\nThe match between the West Indian and the Americans was the'},\n", " {'generated_text': 'Dhoni finishes off in style and the entire Indian team is preparing to compete on October 31st.\\n\\nThe squad of India is made up'},\n", " {'generated_text': 'Dhoni finishes off in style and the entire Indian team looks happy to be back as usual this term,\" he added.'}]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "generator = pipeline(\"text-generation\", model = \"distilgpt2\")\n", "\n", "generator(text_inputs= seed_text, num_return_sequences = 3, max_length = 30)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Mask Filling" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "No model was supplied, defaulted to distilroberta-base and revision ec58a5b (https://huggingface.co/distilroberta-base).\n", "Using a pipeline without specifying a model name and revision in production is not recommended.\n", "/home/huggingface/lib/python3.10/site-packages/huggingface_hub/file_download.py:1150: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n" ] } ], "source": [ "filler = pipeline(\"fill-mask\")" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'score': 0.07598453760147095,\n", " 'token': 6943,\n", " 'token_str': ' depression',\n", " 'sequence': 'How deep is your depression?'},\n", " {'score': 0.035246096551418304,\n", " 'token': 12172,\n", " 'token_str': ' bubble',\n", " 'sequence': 'How deep is your bubble?'},\n", " {'score': 0.027820784598588943,\n", " 'token': 7530,\n", " 'token_str': ' addiction',\n", " 'sequence': 'How deep is your addiction?'},\n", " {'score': 0.014877567999064922,\n", " 'token': 4683,\n", " 'token_str': ' hole',\n", " 'sequence': 'How deep is your hole?'},\n", " {'score': 0.013593271374702454,\n", " 'token': 1144,\n", " 'token_str': ' heart',\n", " 'sequence': 'How deep is your heart?'}]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filler(\"How deep is your ?\", top_k = 5)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/huggingface/lib/python3.10/site-packages/huggingface_hub/file_download.py:1150: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']\n", "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" ] } ], "source": [ "filler = pipeline(\"fill-mask\", model = \"bert-base-cased\")" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'score': 0.0551474466919899,\n", " 'token': 1762,\n", " 'token_str': 'heart',\n", " 'sequence': 'How deep is your heart?'},\n", " {'score': 0.04252220690250397,\n", " 'token': 5785,\n", " 'token_str': 'wound',\n", " 'sequence': 'How deep is your wound?'},\n", " {'score': 0.038988541811704636,\n", " 'token': 3960,\n", " 'token_str': 'soul',\n", " 'sequence': 'How deep is your soul?'},\n", " {'score': 0.03589598089456558,\n", " 'token': 2922,\n", " 'token_str': 'throat',\n", " 'sequence': 'How deep is your throat?'},\n", " {'score': 0.0302369873970747,\n", " 'token': 1567,\n", " 'token_str': 'love',\n", " 'sequence': 'How deep is your love?'}]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filler(\"How deep is your [MASK]?\", top_k = 5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Named Entity Recognition (NER)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision f2482bf (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).\n", "Using a pipeline without specifying a model name and revision in production is not recommended.\n", "/home/huggingface/lib/python3.10/site-packages/huggingface_hub/file_download.py:1150: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "/home/huggingface/lib/python3.10/site-packages/transformers/pipelines/token_classification.py:157: UserWarning: `grouped_entities` is deprecated and will be removed in version v5.0.0, defaulted to `aggregation_strategy=\"simple\"` instead.\n", " warnings.warn(\n" ] } ], "source": [ "ner = pipeline(task = \"ner\", grouped_entities = True)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity_group': 'PER',\n", " 'score': 0.9884488,\n", " 'word': 'Sachin Tendulkar',\n", " 'start': 63,\n", " 'end': 79},\n", " {'entity_group': 'ORG',\n", " 'score': 0.9564063,\n", " 'word': 'Indian Cricket Team',\n", " 'start': 89,\n", " 'end': 108}]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ner(\"Hey everyone, please welcome, the chief guest for tonight: Mr. Sachin Tendulkar from the Indian Cricket Team\")" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision f2482bf (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).\n", "Using a pipeline without specifying a model name and revision in production is not recommended.\n", "/home/huggingface/lib/python3.10/site-packages/huggingface_hub/file_download.py:1150: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "/home/huggingface/lib/python3.10/site-packages/transformers/pipelines/token_classification.py:157: UserWarning: `grouped_entities` is deprecated and will be removed in version v5.0.0, defaulted to `aggregation_strategy=\"none\"` instead.\n", " warnings.warn(\n" ] }, { "data": { "text/plain": [ "[{'entity': 'I-PER',\n", " 'score': 0.9995166,\n", " 'index': 15,\n", " 'word': 'Sa',\n", " 'start': 63,\n", " 'end': 65},\n", " {'entity': 'I-PER',\n", " 'score': 0.9992397,\n", " 'index': 16,\n", " 'word': '##chin',\n", " 'start': 65,\n", " 'end': 69},\n", " {'entity': 'I-PER',\n", " 'score': 0.99916065,\n", " 'index': 17,\n", " 'word': 'Ten',\n", " 'start': 70,\n", " 'end': 73},\n", " {'entity': 'I-PER',\n", " 'score': 0.9957129,\n", " 'index': 18,\n", " 'word': '##du',\n", " 'start': 73,\n", " 'end': 75},\n", " {'entity': 'I-PER',\n", " 'score': 0.9410511,\n", " 'index': 19,\n", " 'word': '##lk',\n", " 'start': 75,\n", " 'end': 77},\n", " {'entity': 'I-PER',\n", " 'score': 0.99601185,\n", " 'index': 20,\n", " 'word': '##ar',\n", " 'start': 77,\n", " 'end': 79},\n", " {'entity': 'I-ORG',\n", " 'score': 0.9637556,\n", " 'index': 23,\n", " 'word': 'Indian',\n", " 'start': 89,\n", " 'end': 95},\n", " {'entity': 'I-ORG',\n", " 'score': 0.9248884,\n", " 'index': 24,\n", " 'word': 'Cricket',\n", " 'start': 96,\n", " 'end': 103},\n", " {'entity': 'I-ORG',\n", " 'score': 0.98057497,\n", " 'index': 25,\n", " 'word': 'Team',\n", " 'start': 104,\n", " 'end': 108}]" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ner = pipeline(task = \"ner\", grouped_entities = False)\n", "ner(\"Hey everyone, please welcome, the chief guest for tonight: Mr. Sachin Tendulkar from the Indian Cricket Team\")" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision f2482bf (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).\n", "Using a pipeline without specifying a model name and revision in production is not recommended.\n", "/home/huggingface/lib/python3.10/site-packages/huggingface_hub/file_download.py:1150: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n" ] } ], "source": [ "pos = pipeline(task = \"token-classification\")" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'entity': 'I-PER',\n", " 'score': 0.99938285,\n", " 'index': 4,\n", " 'word': 'S',\n", " 'start': 11,\n", " 'end': 12},\n", " {'entity': 'I-PER',\n", " 'score': 0.99815494,\n", " 'index': 5,\n", " 'word': '##yl',\n", " 'start': 12,\n", " 'end': 14},\n", " {'entity': 'I-PER',\n", " 'score': 0.9959072,\n", " 'index': 6,\n", " 'word': '##va',\n", " 'start': 14,\n", " 'end': 16},\n", " {'entity': 'I-PER',\n", " 'score': 0.99923277,\n", " 'index': 7,\n", " 'word': '##in',\n", " 'start': 16,\n", " 'end': 18},\n", " {'entity': 'I-ORG',\n", " 'score': 0.9738931,\n", " 'index': 12,\n", " 'word': 'Hu',\n", " 'start': 33,\n", " 'end': 35},\n", " {'entity': 'I-ORG',\n", " 'score': 0.97611505,\n", " 'index': 13,\n", " 'word': '##gging',\n", " 'start': 35,\n", " 'end': 40},\n", " {'entity': 'I-ORG',\n", " 'score': 0.9887976,\n", " 'index': 14,\n", " 'word': 'Face',\n", " 'start': 41,\n", " 'end': 45},\n", " {'entity': 'I-LOC',\n", " 'score': 0.9932106,\n", " 'index': 16,\n", " 'word': 'Brooklyn',\n", " 'start': 49,\n", " 'end': 57}]" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pos(\"My name is Sylvain and I work at Hugging Face in Brooklyn.\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Question Answering" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "No model was supplied, defaulted to distilbert-base-cased-distilled-squad and revision 626af31 (https://huggingface.co/distilbert-base-cased-distilled-squad).\n", "Using a pipeline without specifying a model name and revision in production is not recommended.\n", "/home/huggingface/lib/python3.10/site-packages/huggingface_hub/file_download.py:1150: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n" ] }, { "data": { "text/plain": [ "{'score': 0.21678458154201508,\n", " 'start': 48,\n", " 'end': 76,\n", " 'answer': 'I wish I could get some rest'}" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bot = pipeline(\"question-answering\")\n", "bot(\n", " question = \"How am I doing?\",\n", " context = \"I have just came back from a very busy trip and I wish I could get some rest.\"\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This is a model which is meant to extract the phrases from the given text which could be the answer and does not generate the answer." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Summarization" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).\n", "Using a pipeline without specifying a model name and revision in production is not recommended.\n", "/home/huggingface/lib/python3.10/site-packages/huggingface_hub/file_download.py:1150: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n" ] }, { "data": { "text/plain": [ "[{'summary_text': ' America has changed dramatically during recent years . The number of engineering graduates in the U.S. has declined in traditional engineering disciplines such as mechanical, civil, electrical, chemical, and aeronautical engineering . Rapidly developing economies such as China and India continue to encourage and advance the teaching of engineering .'}]" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "summary = pipeline(\"summarization\")\n", "\n", "summary(\n", "\"\"\"\n", " America has changed dramatically during recent years. Not only has the number of \n", " graduates in traditional engineering disciplines such as mechanical, civil, \n", " electrical, chemical, and aeronautical engineering declined, but in most of \n", " the premier American universities engineering curricula now concentrate on \n", " and encourage largely the study of engineering science. As a result, there \n", " are declining offerings in engineering subjects dealing with infrastructure, \n", " the environment, and related issues, and greater concentration on high \n", " technology subjects, largely supporting increasingly complex scientific \n", " developments. While the latter is important, it should not be at the expense \n", " of more traditional engineering.\n", "\n", " Rapidly developing economies such as China and India, as well as other \n", " industrial countries in Europe and Asia, continue to encourage and advance \n", " the teaching of engineering. Both China and India, respectively, graduate \n", " six and eight times as many traditional engineers as does the United States. \n", " Other industrial countries at minimum maintain their output, while America \n", " suffers an increasingly serious decline in the number of engineering graduates \n", " and a lack of well-educated engineers.\n", "\"\"\"\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Translation" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "ename": "KeyError", "evalue": "'translation'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[34], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m translator \u001b[38;5;241m=\u001b[39m \u001b[43mpipeline\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtranslation\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mHariSekhar/Eng_Marathi_translation\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/huggingface/lib/python3.10/site-packages/transformers/pipelines/__init__.py:692\u001b[0m, in \u001b[0;36mpipeline\u001b[0;34m(task, model, config, tokenizer, feature_extractor, image_processor, framework, revision, use_fast, use_auth_token, device, device_map, torch_dtype, trust_remote_code, model_kwargs, pipeline_class, **kwargs)\u001b[0m\n\u001b[1;32m 690\u001b[0m hub_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39m_commit_hash\n\u001b[1;32m 691\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m config \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(model, \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 692\u001b[0m config \u001b[38;5;241m=\u001b[39m \u001b[43mAutoConfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_from_pipeline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 693\u001b[0m hub_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m config\u001b[38;5;241m.\u001b[39m_commit_hash\n\u001b[1;32m 695\u001b[0m custom_tasks \u001b[38;5;241m=\u001b[39m {}\n", "File \u001b[0;32m~/huggingface/lib/python3.10/site-packages/transformers/models/auto/configuration_auto.py:917\u001b[0m, in \u001b[0;36mAutoConfig.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[1;32m 915\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_pretrained(pretrained_model_name_or_path, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 916\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_type\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m config_dict:\n\u001b[0;32m--> 917\u001b[0m config_class \u001b[38;5;241m=\u001b[39m \u001b[43mCONFIG_MAPPING\u001b[49m\u001b[43m[\u001b[49m\u001b[43mconfig_dict\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel_type\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 918\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m config_class\u001b[38;5;241m.\u001b[39mfrom_dict(config_dict, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39munused_kwargs)\n\u001b[1;32m 919\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 920\u001b[0m \u001b[38;5;66;03m# Fallback: use pattern matching on the string.\u001b[39;00m\n\u001b[1;32m 921\u001b[0m \u001b[38;5;66;03m# We go from longer names to shorter names to catch roberta before bert (for instance)\u001b[39;00m\n", "File \u001b[0;32m~/huggingface/lib/python3.10/site-packages/transformers/models/auto/configuration_auto.py:623\u001b[0m, in \u001b[0;36m_LazyConfigMapping.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 621\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_extra_content[key]\n\u001b[1;32m 622\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping:\n\u001b[0;32m--> 623\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key)\n\u001b[1;32m 624\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mapping[key]\n\u001b[1;32m 625\u001b[0m module_name \u001b[38;5;241m=\u001b[39m model_type_to_module_name(key)\n", "\u001b[0;31mKeyError\u001b[0m: 'translation'" ] } ], "source": [ "translator = pipeline(\"translation\", model = \"HariSekhar/Eng_Marathi_translation\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "translator(\"\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" } }, "nbformat": 4, "nbformat_minor": 2 }