File size: 5,978 Bytes
95f7ff3
5880c96
9861c09
50639ab
2e937f5
5880c96
 
2e73823
5880c96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9861c09
5880c96
 
 
 
 
 
 
 
 
 
 
9861c09
5880c96
9861c09
5880c96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9861c09
5880c96
 
 
9861c09
5880c96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9861c09
 
 
 
 
 
 
 
 
 
5880c96
 
 
 
 
 
 
 
 
 
 
 
9861c09
5880c96
9861c09
5880c96
9861c09
5880c96
9861c09
5880c96
9861c09
5880c96
9861c09
5880c96
9861c09
5880c96
9861c09
5880c96
9861c09
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import requests
import json
import os

api_token = os.environ.get("TOKEN")
API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
headers = {"Authorization": f"Bearer {api_token}"}

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

def analyze_sentiment(text):
    output = query({
        "inputs": f'''
system
You're going to deeply analyze the texts I'm going to give you and you're only going to tell me which category they belong to by answering only the words that correspond to the following categories:
For posts that talk about chat models/LLM, return "Chatmodel/LLM"
For posts that talk about image generation models, return "image_generation"
For texts that ask for information from the community, return "questions"
For posts about fine-tuning or model adjustment, return "fine_tuning"
For posts related to ethics and bias in AI, return "ethics_bias"
For posts about datasets and data preparation, return "datasets"
For posts about tools and libraries, return "tools_libraries"
For posts containing tutorials and guides, return "tutorials_guides"
For posts about debugging and problem-solving, return "debugging"
Respond only with the category name, without any additional explanation or text.

user
{text}

assistant
'''
    })

    if isinstance(output, list) and len(output) > 0:
        response = output[0].get('generated_text', '').strip().lower()

        questions = response.count('questions')
        chat_model_llm = response.count('chatmodel/llm')
        other = response.count('other')
        image_generation = response.count("image_generation")
        fine_tuning = response.count("fine_tuning")
        ethics_bias = response.count("ethics_bias")
        datasets = response.count("datasets")
        tools_libraries = response.count("tools_libraries")
        tutorials_guides = response.count("tutorials_guides")
        debugging = response.count("debugging")

        if questions == 2:
            return 'questions'
        elif chat_model_llm == 2:
            return 'Chat Model/LLM'
        elif other == 2:
            return "Other"
        elif image_generation == 2:
            return "Image Generation"
        elif fine_tuning == 2:
            return "Fine-tuning"
        elif ethics_bias == 2:
            return "Ethics and Bias"
        elif datasets == 2:
            return "Datasets"
        elif tools_libraries == 2:
            return "Tools and Libraries"
        elif tutorials_guides == 2:
            return "Tutorials and Guides"
        elif debugging == 2:
            return "Debugging"
        else:
            return f"Erreur: Réponse ambiguë - '{response}'"

# URL de base de l'API
base_url = "https://huggingface.co/api/posts"

# Paramètres pour la pagination
skip = 0  # Nombre d'éléments à sauter
limit = 100  # Nombre maximal d'éléments à récupérer par requête

# Liste pour stocker tous les posts avec leur texte
all_posts_with_text = []

while True:
    # Construire l'URL avec les paramètres de pagination
    url = f"{base_url}?skip={skip}&limit={limit}&sort=recent"

    # Effectuer une requête HTTP pour récupérer les données
    response = requests.get(url)

    # Vérifier si la requête a réussi
    if response.status_code == 200:
        # Charger les données JSON à partir du contenu de la réponse
        data = response.json()

        # Vérifier s'il y a des posts à ajouter
        if not data["socialPosts"]:
            break  # Sortir de la boucle si aucun post n'est retourné

        # Ajouter les posts récupérés à la liste avec leur texte
        for post in data["socialPosts"]:
            post_text = ""
            for item in post["content"]:
                if item["type"] == "text":
                    post_text += item["value"] + " "
            all_posts_with_text.append({"slug": post["slug"], "text": post_text.strip()})

        # Mettre à jour le paramètre skip pour la prochaine requête
        skip += limit

    else:
        print(f"Erreur lors de la récupération des données: {response.status_code}")
        break

# Maintenant, all_posts_with_text contient tous les posts récupérés avec leur texte

questions_count = 0
chat_model_llm_count = 0
other_count = 0
image_generation_count = 0
fine_tuning_count = 0
ethics_and_bias_count = 0
datasets_count = 0
tools_and_libraries_count = 0
tutorials_and_guides_count = 0
debugging_count = 0

# Appliquer votre algorithme d'analyse à tous les posts
for i, post in enumerate(all_posts_with_text, 1):
    slug = post["slug"]
    text = post["text"]

    # Appeler votre algorithme d'analyse
    resultat = analyze_sentiment(text)

    if resultat == 'questions':
        questions_count += 1
    elif resultat == "Chat Model/LLM":
        chat_model_llm_count += 1
    elif resultat == 'Other':
        other_count += 1
    elif resultat == "Image Generation":
        image_generation_count += 1
    elif resultat == "Fine-tuning":
        fine_tuning_count += 1
    elif resultat == "Ethics and Bias":
        ethics_and_bias_count += 1
    elif resultat == 'Datasets':
        datasets_count += 1
    elif resultat == 'Tools and Libraries':
        tools_and_libraries_count += 1
    elif resultat == "Tutorials and Guides":
        tutorials_and_guides_count += 1
    elif resultat == "Debugging":
        debugging_count += 1

# Vous pouvez adapter votre algorithme d'analyse selon vos besoins spécifiques

print(f"Questions: {questions_count}")
print(f"Chat Model/LLM: {chat_model_llm_count}")
print(f"Other: {other_count}")
print(f"Image Generation: {image_generation_count}")
print(f"Fine-tuning: {fine_tuning_count}")
print(f"Ethics and Bias: {ethics_and_bias_count}")
print(f"Datasets: {datasets_count}")
print(f"Tools and Libraries: {tools_and_libraries_count}")
print(f"Tutorials and Guides: {tutorials_and_guides_count}")
print(f"Debugging: {debugging_count}")