{ "cells": [ { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# ! pip install --upgrade azure-ai-textanalytics" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "key = \"198414c4d7e54bde91ec77bf776d5211\"\n", "endpoint = \"https://new-entity.cognitiveservices.azure.com/\"\n", "# endpoint = \"https://eastus.api.cognitive.microsoft.com/\"\n", "\n", "from azure.ai.textanalytics import TextAnalyticsClient\n", "from azure.core.credentials import AzureKeyCredential\n", "\n", "# Authenticate the client using your key and endpoint \n", "def authenticate_client():\n", " ta_credential = AzureKeyCredential(key)\n", " text_analytics_client = TextAnalyticsClient(\n", " endpoint=endpoint, \n", " credential=ta_credential)\n", " return text_analytics_client\n", "\n", "client = authenticate_client()\n" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Named Entities:\n", "\n", "\tText: \t razor kraken \tCategory: \t Organization \tSubCategory: \t None \n", "\tConfidence Score: \t 0.54 \tLength: \t 12 \tOffset: \t 0 \n", "\n", "\tText: \t headphones \tCategory: \t Product \tSubCategory: \t None \n", "\tConfidence Score: \t 0.5 \tLength: \t 10 \tOffset: \t 13 \n", "\n" ] } ], "source": [ "key = \"2fd114e7967a4da58854be231fd766a3\"\n", "endpoint = \"https://entity-collection.cognitiveservices.azure.com/\"\n", "# endpoint = \"https://eastus.api.cognitive.microsoft.com/\"\n", "\n", "from azure.ai.textanalytics import TextAnalyticsClient\n", "from azure.core.credentials import AzureKeyCredential\n", "\n", "# Authenticate the client using your key and endpoint \n", "def authenticate_client():\n", " ta_credential = AzureKeyCredential(key)\n", " text_analytics_client = TextAnalyticsClient(\n", " endpoint=endpoint, \n", " credential=ta_credential)\n", " return text_analytics_client\n", "\n", "client = authenticate_client()\n", "\n", "# Example function for recognizing entities from text\n", "def entity_recognition_example(client):\n", "\n", " try:\n", " documents = [\"razor kraken headphones\"]\n", " result = client.recognize_entities(documents = documents)[0]\n", "\n", " print(\"Named Entities:\\n\")\n", " for entity in result.entities:\n", " print(\"\\tText: \\t\", entity.text, \"\\tCategory: \\t\", entity.category, \"\\tSubCategory: \\t\", entity.subcategory,\n", " \"\\n\\tConfidence Score: \\t\", round(entity.confidence_score, 2), \"\\tLength: \\t\", entity.length, \"\\tOffset: \\t\", entity.offset, \"\\n\")\n", "\n", " except Exception as err:\n", " print(\"Encountered exception. {}\".format(err))\n", "entity_recognition_example(client)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "def replace_original_text(original_text:str):\n", " try:\n", " result = client.recognize_entities(documents = [original_text])[0]\n", "\n", " for entity in result.entities:\n", " # print(\"\\tText: \\t\", entity.text, \"\\tCategory: \\t\", entity.category, \"\\tSubCategory: \\t\", entity.subcategory,\n", " # \"\\n\\tConfidence Score: \\t\", round(entity.confidence_score, 2), \"\\tLength: \\t\", entity.length, \"\\tOffset: \\t\", entity.offset, \"\\n\")\n", " original_text= original_text.replace(\n", " entity.text, \n", " entity.text+ f' ({entity.category}) '\n", " )\n", " return original_text\n", "\n", " except Exception as err:\n", " \n", " print(\"Encountered exception. {}\".format(err))\n", " return original_text\n", " " ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'best cat ear headphones (Product) '" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "replace_original_text(original_text=\"best cat ear headphones\")" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Barack Obama (Person) in the White House (Location) '" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "replace_original_text(\n", " 'Barack Obama in the White House'\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from azure.core.credentials import AzureKeyCredential\n", "from azure.ai.textanalytics import TextAnalyticsClient\n", "\n", "credential = AzureKeyCredential(\"c8b849064d6649ea87cbd8fbbd39f708\")\n", "text_analytics_client = TextAnalyticsClient(endpoint=\"https://entity-retrieval.cognitiveservices.azure.com/\", credential=credential)\n", "# text_analytics_client = TextAnalyticsClient(endpoint=\"https://ktitji5.eastus.cognitiveservices.azure.com/\", credential=credential)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Get the endpoint for the Language service resource\n", "# ! az cognitiveservices account show --name \"resource-name\" --resource-group \"resource-group-name\" --query \"properties.endpoint\"" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "documents = [\n", " {\"id\": \"1\", \"language\": \"en\", \"text\": \"I hated the movie. It was so slow!\"},\n", " {\"id\": \"2\", \"language\": \"en\", \"text\": \"The movie made it into my top ten favorites. What a great movie!\"},\n", "]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "ename": "ClientAuthenticationError", "evalue": "(401) Access denied due to invalid subscription key or wrong API endpoint. Make sure to provide a valid key for an active subscription and use a correct regional API endpoint for your resource.\nCode: 401\nMessage: Access denied due to invalid subscription key or wrong API endpoint. Make sure to provide a valid key for an active subscription and use a correct regional API endpoint for your resource.", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mClientAuthenticationError\u001b[0m Traceback (most recent call last)", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/azure/ai/textanalytics/_text_analytics_client.py:991\u001b[0m, in \u001b[0;36mTextAnalyticsClient.analyze_sentiment\u001b[0;34m(self, documents, **kwargs)\u001b[0m\n\u001b[1;32m 988\u001b[0m models \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_client\u001b[39m.\u001b[39mmodels(api_version\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_api_version)\n\u001b[1;32m 989\u001b[0m \u001b[39mreturn\u001b[39;00m cast(\n\u001b[1;32m 990\u001b[0m List[Union[AnalyzeSentimentResult, DocumentError]],\n\u001b[0;32m--> 991\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_client\u001b[39m.\u001b[39;49manalyze_text(\n\u001b[1;32m 992\u001b[0m body\u001b[39m=\u001b[39;49mmodels\u001b[39m.\u001b[39;49mAnalyzeTextSentimentAnalysisInput(\n\u001b[1;32m 993\u001b[0m analysis_input\u001b[39m=\u001b[39;49m{\u001b[39m\"\u001b[39;49m\u001b[39mdocuments\u001b[39;49m\u001b[39m\"\u001b[39;49m: docs},\n\u001b[1;32m 994\u001b[0m parameters\u001b[39m=\u001b[39;49mmodels\u001b[39m.\u001b[39;49mSentimentAnalysisTaskParameters(\n\u001b[1;32m 995\u001b[0m logging_opt_out\u001b[39m=\u001b[39;49mdisable_service_logs,\n\u001b[1;32m 996\u001b[0m model_version\u001b[39m=\u001b[39;49mmodel_version,\n\u001b[1;32m 997\u001b[0m string_index_type\u001b[39m=\u001b[39;49mstring_index_type_compatibility(string_index_type),\n\u001b[1;32m 998\u001b[0m opinion_mining\u001b[39m=\u001b[39;49mshow_opinion_mining,\n\u001b[1;32m 999\u001b[0m )\n\u001b[1;32m 1000\u001b[0m ),\n\u001b[1;32m 1001\u001b[0m show_stats\u001b[39m=\u001b[39;49mshow_stats,\n\u001b[1;32m 1002\u001b[0m \u001b[39mcls\u001b[39;49m\u001b[39m=\u001b[39;49mkwargs\u001b[39m.\u001b[39;49mpop(\u001b[39m\"\u001b[39;49m\u001b[39mcls\u001b[39;49m\u001b[39m\"\u001b[39;49m, sentiment_result),\n\u001b[1;32m 1003\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs\n\u001b[1;32m 1004\u001b[0m )\n\u001b[1;32m 1005\u001b[0m )\n\u001b[1;32m 1007\u001b[0m \u001b[39m# api_versions 3.0, 3.1\u001b[39;00m\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/azure/ai/textanalytics/_generated/_operations_mixin.py:109\u001b[0m, in \u001b[0;36mTextAnalyticsClientOperationsMixin.analyze_text\u001b[0;34m(self, body, show_stats, **kwargs)\u001b[0m\n\u001b[1;32m 108\u001b[0m mixin_instance\u001b[39m.\u001b[39m_deserialize \u001b[39m=\u001b[39m Deserializer(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_models_dict(api_version))\n\u001b[0;32m--> 109\u001b[0m \u001b[39mreturn\u001b[39;00m mixin_instance\u001b[39m.\u001b[39;49manalyze_text(body, show_stats, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/azure/core/tracing/decorator.py:78\u001b[0m, in \u001b[0;36mdistributed_trace..decorator..wrapper_use_tracer\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[39mif\u001b[39;00m span_impl_type \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m---> 78\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 80\u001b[0m \u001b[39m# Merge span is parameter is set, but only if no explicit parent are passed\u001b[39;00m\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/azure/ai/textanalytics/_generated/v2022_05_01/operations/_text_analytics_client_operations.py:299\u001b[0m, in \u001b[0;36mTextAnalyticsClientOperationsMixin.analyze_text\u001b[0;34m(self, body, show_stats, **kwargs)\u001b[0m\n\u001b[1;32m 298\u001b[0m \u001b[39mif\u001b[39;00m response\u001b[39m.\u001b[39mstatus_code \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m [\u001b[39m200\u001b[39m]:\n\u001b[0;32m--> 299\u001b[0m map_error(status_code\u001b[39m=\u001b[39;49mresponse\u001b[39m.\u001b[39;49mstatus_code, response\u001b[39m=\u001b[39;49mresponse, error_map\u001b[39m=\u001b[39;49merror_map)\n\u001b[1;32m 300\u001b[0m error \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_deserialize\u001b[39m.\u001b[39mfailsafe_deserialize(_models\u001b[39m.\u001b[39mErrorResponse, pipeline_response)\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/azure/core/exceptions.py:165\u001b[0m, in \u001b[0;36mmap_error\u001b[0;34m(status_code, response, error_map)\u001b[0m\n\u001b[1;32m 164\u001b[0m error \u001b[39m=\u001b[39m error_type(response\u001b[39m=\u001b[39mresponse)\n\u001b[0;32m--> 165\u001b[0m \u001b[39mraise\u001b[39;00m error\n", "\u001b[0;31mClientAuthenticationError\u001b[0m: (401) Access denied due to invalid subscription key or wrong API endpoint. Make sure to provide a valid key for an active subscription and use a correct regional API endpoint for your resource.\nCode: 401\nMessage: Access denied due to invalid subscription key or wrong API endpoint. Make sure to provide a valid key for an active subscription and use a correct regional API endpoint for your resource.", "\nThe above exception was the direct cause of the following exception:\n", "\u001b[0;31mClientAuthenticationError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m/home/ubuntu/SentenceStructureComparision/research/12_text_analytics_using_azure.ipynb Cell 12\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m response \u001b[39m=\u001b[39m text_analytics_client\u001b[39m.\u001b[39;49manalyze_sentiment(documents)\n\u001b[1;32m 2\u001b[0m successful_responses \u001b[39m=\u001b[39m [doc \u001b[39mfor\u001b[39;00m doc \u001b[39min\u001b[39;00m response \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m doc\u001b[39m.\u001b[39mis_error]\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/azure/core/tracing/decorator.py:78\u001b[0m, in \u001b[0;36mdistributed_trace..decorator..wrapper_use_tracer\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 76\u001b[0m span_impl_type \u001b[39m=\u001b[39m settings\u001b[39m.\u001b[39mtracing_implementation()\n\u001b[1;32m 77\u001b[0m \u001b[39mif\u001b[39;00m span_impl_type \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m---> 78\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 80\u001b[0m \u001b[39m# Merge span is parameter is set, but only if no explicit parent are passed\u001b[39;00m\n\u001b[1;32m 81\u001b[0m \u001b[39mif\u001b[39;00m merge_span \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m passed_in_parent:\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/azure/ai/textanalytics/_validate.py:74\u001b[0m, in \u001b[0;36mvalidate_multiapi_args..decorator..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[39m# the latest version is selected, we assume all features supported\u001b[39;00m\n\u001b[1;32m 73\u001b[0m \u001b[39mif\u001b[39;00m selected_api_version \u001b[39m==\u001b[39m VERSIONS_SUPPORTED[\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m]:\n\u001b[0;32m---> 74\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 76\u001b[0m \u001b[39mif\u001b[39;00m version_method_added \u001b[39mand\u001b[39;00m version_method_added \u001b[39m!=\u001b[39m selected_api_version \u001b[39mand\u001b[39;00m \\\n\u001b[1;32m 77\u001b[0m VERSIONS_SUPPORTED\u001b[39m.\u001b[39mindex(selected_api_version) \u001b[39m<\u001b[39m VERSIONS_SUPPORTED\u001b[39m.\u001b[39mindex(version_method_added):\n\u001b[1;32m 78\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 79\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mclient\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m{\u001b[39;00mfunc\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m is not available in API version \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 80\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mselected_api_version\u001b[39m}\u001b[39;00m\u001b[39m. Use service API version \u001b[39m\u001b[39m{\u001b[39;00mversion_method_added\u001b[39m}\u001b[39;00m\u001b[39m or newer.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 81\u001b[0m )\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/azure/ai/textanalytics/_text_analytics_client.py:1022\u001b[0m, in \u001b[0;36mTextAnalyticsClient.analyze_sentiment\u001b[0;34m(self, documents, **kwargs)\u001b[0m\n\u001b[1;32m 1008\u001b[0m \u001b[39mreturn\u001b[39;00m cast(\n\u001b[1;32m 1009\u001b[0m List[Union[AnalyzeSentimentResult, DocumentError]],\n\u001b[1;32m 1010\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_client\u001b[39m.\u001b[39msentiment(\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1019\u001b[0m )\n\u001b[1;32m 1020\u001b[0m )\n\u001b[1;32m 1021\u001b[0m \u001b[39mexcept\u001b[39;00m HttpResponseError \u001b[39mas\u001b[39;00m error:\n\u001b[0;32m-> 1022\u001b[0m \u001b[39mreturn\u001b[39;00m process_http_response_error(error)\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/azure/ai/textanalytics/_response_handlers.py:60\u001b[0m, in \u001b[0;36mprocess_http_response_error\u001b[0;34m(error)\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[39mif\u001b[39;00m error\u001b[39m.\u001b[39mstatus_code \u001b[39m==\u001b[39m \u001b[39m404\u001b[39m:\n\u001b[1;32m 59\u001b[0m raise_error \u001b[39m=\u001b[39m ResourceNotFoundError\n\u001b[0;32m---> 60\u001b[0m \u001b[39mraise\u001b[39;00m raise_error(response\u001b[39m=\u001b[39merror\u001b[39m.\u001b[39mresponse, error_format\u001b[39m=\u001b[39mCSODataV4Format) \u001b[39mfrom\u001b[39;00m \u001b[39merror\u001b[39;00m\n", "\u001b[0;31mClientAuthenticationError\u001b[0m: (401) Access denied due to invalid subscription key or wrong API endpoint. Make sure to provide a valid key for an active subscription and use a correct regional API endpoint for your resource.\nCode: 401\nMessage: Access denied due to invalid subscription key or wrong API endpoint. Make sure to provide a valid key for an active subscription and use a correct regional API endpoint for your resource." ] } ], "source": [ "response = text_analytics_client.analyze_sentiment(documents)\n", "successful_responses = [doc for doc in response if not doc.is_error]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "In this sample, we want to find the articles that mention Microsoft to read.\n" ] }, { "ename": "ClientAuthenticationError", "evalue": "(401) Access denied due to invalid subscription key or wrong API endpoint. Make sure to provide a valid key for an active subscription and use a correct regional API endpoint for your resource.\nCode: 401\nMessage: Access denied due to invalid subscription key or wrong API endpoint. Make sure to provide a valid key for an active subscription and use a correct regional API endpoint for your resource.", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mClientAuthenticationError\u001b[0m Traceback (most recent call last)", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/azure/ai/textanalytics/_text_analytics_client.py:900\u001b[0m, in \u001b[0;36mTextAnalyticsClient.extract_key_phrases\u001b[0;34m(self, documents, disable_service_logs, language, model_version, show_stats, **kwargs)\u001b[0m\n\u001b[1;32m 897\u001b[0m models \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_client\u001b[39m.\u001b[39mmodels(api_version\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_api_version)\n\u001b[1;32m 898\u001b[0m \u001b[39mreturn\u001b[39;00m cast(\n\u001b[1;32m 899\u001b[0m List[Union[ExtractKeyPhrasesResult, DocumentError]],\n\u001b[0;32m--> 900\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_client\u001b[39m.\u001b[39;49manalyze_text(\n\u001b[1;32m 901\u001b[0m body\u001b[39m=\u001b[39;49mmodels\u001b[39m.\u001b[39;49mAnalyzeTextKeyPhraseExtractionInput(\n\u001b[1;32m 902\u001b[0m analysis_input\u001b[39m=\u001b[39;49m{\u001b[39m\"\u001b[39;49m\u001b[39mdocuments\u001b[39;49m\u001b[39m\"\u001b[39;49m: docs},\n\u001b[1;32m 903\u001b[0m parameters\u001b[39m=\u001b[39;49mmodels\u001b[39m.\u001b[39;49mKeyPhraseTaskParameters(\n\u001b[1;32m 904\u001b[0m logging_opt_out\u001b[39m=\u001b[39;49mdisable_service_logs,\n\u001b[1;32m 905\u001b[0m model_version\u001b[39m=\u001b[39;49mmodel_version,\n\u001b[1;32m 906\u001b[0m )\n\u001b[1;32m 907\u001b[0m ),\n\u001b[1;32m 908\u001b[0m show_stats\u001b[39m=\u001b[39;49mshow_stats,\n\u001b[1;32m 909\u001b[0m \u001b[39mcls\u001b[39;49m\u001b[39m=\u001b[39;49mkwargs\u001b[39m.\u001b[39;49mpop(\u001b[39m\"\u001b[39;49m\u001b[39mcls\u001b[39;49m\u001b[39m\"\u001b[39;49m, key_phrases_result),\n\u001b[1;32m 910\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs\n\u001b[1;32m 911\u001b[0m )\n\u001b[1;32m 912\u001b[0m )\n\u001b[1;32m 914\u001b[0m \u001b[39m# api_versions 3.0, 3.1\u001b[39;00m\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/azure/ai/textanalytics/_generated/_operations_mixin.py:111\u001b[0m, in \u001b[0;36mTextAnalyticsClientOperationsMixin.analyze_text\u001b[0;34m(self, body, show_stats, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m mixin_instance\u001b[39m.\u001b[39m_deserialize \u001b[39m=\u001b[39m Deserializer(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_models_dict(api_version))\n\u001b[0;32m--> 111\u001b[0m \u001b[39mreturn\u001b[39;00m mixin_instance\u001b[39m.\u001b[39;49manalyze_text(body, show_stats, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/azure/core/tracing/decorator.py:78\u001b[0m, in \u001b[0;36mdistributed_trace..decorator..wrapper_use_tracer\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[39mif\u001b[39;00m span_impl_type \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m---> 78\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 80\u001b[0m \u001b[39m# Merge span is parameter is set, but only if no explicit parent are passed\u001b[39;00m\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/azure/ai/textanalytics/_generated/v2023_04_01/operations/_text_analytics_client_operations.py:299\u001b[0m, in \u001b[0;36mTextAnalyticsClientOperationsMixin.analyze_text\u001b[0;34m(self, body, show_stats, **kwargs)\u001b[0m\n\u001b[1;32m 298\u001b[0m \u001b[39mif\u001b[39;00m response\u001b[39m.\u001b[39mstatus_code \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m [\u001b[39m200\u001b[39m]:\n\u001b[0;32m--> 299\u001b[0m map_error(status_code\u001b[39m=\u001b[39;49mresponse\u001b[39m.\u001b[39;49mstatus_code, response\u001b[39m=\u001b[39;49mresponse, error_map\u001b[39m=\u001b[39;49merror_map)\n\u001b[1;32m 300\u001b[0m error \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_deserialize\u001b[39m.\u001b[39mfailsafe_deserialize(_models\u001b[39m.\u001b[39mErrorResponse, pipeline_response)\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/azure/core/exceptions.py:165\u001b[0m, in \u001b[0;36mmap_error\u001b[0;34m(status_code, response, error_map)\u001b[0m\n\u001b[1;32m 164\u001b[0m error \u001b[39m=\u001b[39m error_type(response\u001b[39m=\u001b[39mresponse)\n\u001b[0;32m--> 165\u001b[0m \u001b[39mraise\u001b[39;00m error\n", "\u001b[0;31mClientAuthenticationError\u001b[0m: (401) Access denied due to invalid subscription key or wrong API endpoint. Make sure to provide a valid key for an active subscription and use a correct regional API endpoint for your resource.\nCode: 401\nMessage: Access denied due to invalid subscription key or wrong API endpoint. Make sure to provide a valid key for an active subscription and use a correct regional API endpoint for your resource.", "\nThe above exception was the direct cause of the following exception:\n", "\u001b[0;31mClientAuthenticationError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m/home/ubuntu/SentenceStructureComparision/research/12_text_analytics_using_azure.ipynb Cell 8\u001b[0m line \u001b[0;36m7\n\u001b[1;32m 66\u001b[0m \u001b[39mprint\u001b[39m(\n\u001b[1;32m 67\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mThe articles that mention Microsoft are articles number: \u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m. Those are the ones I\u001b[39m\u001b[39m'\u001b[39m\u001b[39mm interested in reading.\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(\n\u001b[1;32m 68\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m, \u001b[39m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mjoin(articles_that_mention_microsoft)\n\u001b[1;32m 69\u001b[0m )\n\u001b[1;32m 70\u001b[0m )\n\u001b[1;32m 73\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39m__name__\u001b[39m \u001b[39m==\u001b[39m \u001b[39m'\u001b[39m\u001b[39m__main__\u001b[39m\u001b[39m'\u001b[39m:\n\u001b[0;32m---> 74\u001b[0m sample_extract_key_phrases()\n", "\u001b[1;32m/home/ubuntu/SentenceStructureComparision/research/12_text_analytics_using_azure.ipynb Cell 8\u001b[0m line \u001b[0;36m5\n\u001b[1;32m 38\u001b[0m text_analytics_client \u001b[39m=\u001b[39m TextAnalyticsClient(endpoint\u001b[39m=\u001b[39mendpoint, credential\u001b[39m=\u001b[39mAzureKeyCredential(key))\n\u001b[1;32m 39\u001b[0m articles \u001b[39m=\u001b[39m [\n\u001b[1;32m 40\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 41\u001b[0m \u001b[39m Washington, D.C. Autumn in DC is a uniquely beautiful season. The leaves fall from the trees\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m 53\u001b[0m ]\n\u001b[0;32m---> 55\u001b[0m result \u001b[39m=\u001b[39m text_analytics_client\u001b[39m.\u001b[39;49mextract_key_phrases(articles)\n\u001b[1;32m 56\u001b[0m \u001b[39mfor\u001b[39;00m idx, doc \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(result):\n\u001b[1;32m 57\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m doc\u001b[39m.\u001b[39mis_error:\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/azure/core/tracing/decorator.py:78\u001b[0m, in \u001b[0;36mdistributed_trace..decorator..wrapper_use_tracer\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 76\u001b[0m span_impl_type \u001b[39m=\u001b[39m settings\u001b[39m.\u001b[39mtracing_implementation()\n\u001b[1;32m 77\u001b[0m \u001b[39mif\u001b[39;00m span_impl_type \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m---> 78\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 80\u001b[0m \u001b[39m# Merge span is parameter is set, but only if no explicit parent are passed\u001b[39;00m\n\u001b[1;32m 81\u001b[0m \u001b[39mif\u001b[39;00m merge_span \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m passed_in_parent:\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/azure/ai/textanalytics/_validate.py:79\u001b[0m, in \u001b[0;36mvalidate_multiapi_args..decorator..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[39m# the latest version is selected, we assume all features supported\u001b[39;00m\n\u001b[1;32m 78\u001b[0m \u001b[39mif\u001b[39;00m selected_api_version \u001b[39m==\u001b[39m VERSIONS_SUPPORTED[\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m]:\n\u001b[0;32m---> 79\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 81\u001b[0m \u001b[39mif\u001b[39;00m version_method_added \u001b[39mand\u001b[39;00m version_method_added \u001b[39m!=\u001b[39m selected_api_version \u001b[39mand\u001b[39;00m \\\n\u001b[1;32m 82\u001b[0m VERSIONS_SUPPORTED\u001b[39m.\u001b[39mindex(selected_api_version) \u001b[39m<\u001b[39m VERSIONS_SUPPORTED\u001b[39m.\u001b[39mindex(version_method_added):\n\u001b[1;32m 83\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 84\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mclient\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m{\u001b[39;00mfunc\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m is not available in API version \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 85\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mselected_api_version\u001b[39m}\u001b[39;00m\u001b[39m. Use service API version \u001b[39m\u001b[39m{\u001b[39;00mversion_method_added\u001b[39m}\u001b[39;00m\u001b[39m or newer.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 86\u001b[0m )\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/azure/ai/textanalytics/_text_analytics_client.py:927\u001b[0m, in \u001b[0;36mTextAnalyticsClient.extract_key_phrases\u001b[0;34m(self, documents, disable_service_logs, language, model_version, show_stats, **kwargs)\u001b[0m\n\u001b[1;32m 915\u001b[0m \u001b[39mreturn\u001b[39;00m cast(\n\u001b[1;32m 916\u001b[0m List[Union[ExtractKeyPhrasesResult, DocumentError]],\n\u001b[1;32m 917\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_client\u001b[39m.\u001b[39mkey_phrases(\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 924\u001b[0m )\n\u001b[1;32m 925\u001b[0m )\n\u001b[1;32m 926\u001b[0m \u001b[39mexcept\u001b[39;00m HttpResponseError \u001b[39mas\u001b[39;00m error:\n\u001b[0;32m--> 927\u001b[0m \u001b[39mreturn\u001b[39;00m process_http_response_error(error)\n", "File \u001b[0;32m~/SentenceStructureComparision/venv/lib/python3.10/site-packages/azure/ai/textanalytics/_response_handlers.py:63\u001b[0m, in \u001b[0;36mprocess_http_response_error\u001b[0;34m(error)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[39mif\u001b[39;00m error\u001b[39m.\u001b[39mstatus_code \u001b[39m==\u001b[39m \u001b[39m404\u001b[39m:\n\u001b[1;32m 62\u001b[0m raise_error \u001b[39m=\u001b[39m ResourceNotFoundError\n\u001b[0;32m---> 63\u001b[0m \u001b[39mraise\u001b[39;00m raise_error(response\u001b[39m=\u001b[39merror\u001b[39m.\u001b[39mresponse, error_format\u001b[39m=\u001b[39mCSODataV4Format) \u001b[39mfrom\u001b[39;00m \u001b[39merror\u001b[39;00m\n", "\u001b[0;31mClientAuthenticationError\u001b[0m: (401) Access denied due to invalid subscription key or wrong API endpoint. Make sure to provide a valid key for an active subscription and use a correct regional API endpoint for your resource.\nCode: 401\nMessage: Access denied due to invalid subscription key or wrong API endpoint. Make sure to provide a valid key for an active subscription and use a correct regional API endpoint for your resource." ] } ], "source": [ "# -------------------------------------------------------------------------\n", "# Copyright (c) Microsoft Corporation. All rights reserved.\n", "# Licensed under the MIT License. See License.txt in the project root for\n", "# license information.\n", "# --------------------------------------------------------------------------\n", "\n", "\"\"\"\n", "FILE: sample_extract_key_phrases.py\n", "\n", "DESCRIPTION:\n", " This sample demonstrates how to extract key talking points from a batch of documents.\n", "\n", " In this sample, we want to go over articles and read the ones that mention Microsoft.\n", " We're going to use the SDK to create a rudimentary search algorithm to find these articles.\n", "\n", "USAGE:\n", " python sample_extract_key_phrases.py\n", "\n", " Set the environment variables with your own values before running the sample:\n", " 1) AZURE_LANGUAGE_ENDPOINT - the endpoint to your Language resource.\n", " 2) AZURE_LANGUAGE_KEY - your Language subscription key\n", "\"\"\"\n", "\n", "\n", "def sample_extract_key_phrases() -> None:\n", " print(\n", " \"In this sample, we want to find the articles that mention Microsoft to read.\"\n", " )\n", " articles_that_mention_microsoft = []\n", " # [START extract_key_phrases]\n", " import os\n", " from azure.core.credentials import AzureKeyCredential\n", " from azure.ai.textanalytics import TextAnalyticsClient\n", "\n", " endpoint = \"https://xouhou-1234.cognitiveservices.azure.com/\"\n", " key = \"d7fcbf17455647adbca355b021334c83\"\n", "\n", " text_analytics_client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key))\n", " articles = [\n", " \"\"\"\n", " Washington, D.C. Autumn in DC is a uniquely beautiful season. The leaves fall from the trees\n", " in a city chock-full of forests, leaving yellow leaves on the ground and a clearer view of the\n", " blue sky above...\n", " \"\"\",\n", " \"\"\"\n", " Redmond, WA. In the past few days, Microsoft has decided to further postpone the start date of\n", " its United States workers, due to the pandemic that rages with no end in sight...\n", " \"\"\",\n", " \"\"\"\n", " Redmond, WA. Employees at Microsoft can be excited about the new coffee shop that will open on campus\n", " once workers no longer have to work remotely...\n", " \"\"\"\n", " ]\n", "\n", " result = text_analytics_client.extract_key_phrases(articles)\n", " for idx, doc in enumerate(result):\n", " if not doc.is_error:\n", " print(\"Key phrases in article #{}: {}\".format(\n", " idx + 1,\n", " \", \".join(doc.key_phrases)\n", " ))\n", " # [END extract_key_phrases]\n", " if \"Microsoft\" in doc.key_phrases:\n", " articles_that_mention_microsoft.append(str(idx + 1))\n", "\n", " print(\n", " \"The articles that mention Microsoft are articles number: {}. Those are the ones I'm interested in reading.\".format(\n", " \", \".join(articles_that_mention_microsoft)\n", " )\n", " )\n", "\n", "\n", "if __name__ == '__main__':\n", " sample_extract_key_phrases()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 2 }