File size: 12,380 Bytes
78dea1a
db21dc3
434f798
db21dc3
b0918e1
78dea1a
b7eca85
 
ad9d692
8dd1108
676f51e
ad9d692
cbe51b9
 
c6565e2
b7eca85
4b7b722
56509cf
d3ba82c
 
 
09d6ac2
ad9d692
09d6ac2
 
b8997d2
 
df5822e
 
51315e7
d3ba82c
b7eca85
623cc98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a6eddc
51315e7
 
 
 
 
b7eca85
 
ad9d692
 
78dea1a
 
 
b7eca85
75d2f81
b7eca85
b80bf81
09d6ac2
b80bf81
d3ba82c
78dea1a
 
 
b7eca85
78dea1a
 
 
 
b7eca85
9177836
4ba6ad1
5d8559e
e57e72d
b91f995
623cc98
 
b3ba3b3
 
623cc98
 
 
b3ba3b3
623cc98
 
 
 
 
 
 
 
 
b3ba3b3
568a224
b80bf81
 
 
 
d3ba82c
b80bf81
 
d3ba82c
b0cdaf1
90c7959
c4bbe7e
51315e7
 
be053da
 
90c7959
51315e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3fa325
e4e85e6
8fec8ef
e4e85e6
e4f9732
 
8c2219a
e4e85e6
 
e4f9732
e4e85e6
 
5d8559e
 
b80bf81
 
d3ba82c
df5822e
b80bf81
952362b
b0cdaf1
5021c90
952362b
d3ba82c
b80bf81
 
0654423
90c7959
37b0b26
 
6fca786
 
 
d589312
3c830fa
b80bf81
 
952362b
b80bf81
3c830fa
b80bf81
3c830fa
b80bf81
 
0654423
952362b
 
 
 
 
 
df083d0
 
 
952362b
 
 
 
4eb50e7
 
 
 
 
 
 
 
 
3a5f6f2
4eb50e7
952362b
761cff7
 
ca7a70a
761cff7
 
 
0022591
 
 
 
 
 
 
 
 
 
 
 
761cff7
 
0022591
 
 
687dc78
 
 
 
b0cdaf1
687dc78
 
 
 
 
 
0022591
 
 
 
 
 
 
 
 
 
 
ad9d692
 
cbe51b9
ad9d692
 
6c538e5
765d86a
 
b3ba3b3
5d8559e
765d86a
 
 
 
 
 
 
 
b3ba3b3
765d86a
5d8559e
 
765d86a
b0918e1
765d86a
 
 
b3ba3b3
c472ab1
765d86a
 
 
 
b0918e1
cbe51b9
ad9d692
 
 
cbe51b9
 
b3ba3b3
c6565e2
 
 
d4ce55b
c6565e2
 
 
db21dc3
 
 
b3ba3b3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
import discord
import threading
import os
import gradio as gr
import time
from discord.ext import commands
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
import aiojobs
import asyncio
import re
from datetime import datetime, timedelta
from apscheduler.executors.pool import ThreadPoolExecutor
from apscheduler.schedulers.background import BackgroundScheduler


DISCORD_TOKEN = os.getenv('DISCORD_TOKEN')
SLACK_BOT_TOKEN = os.getenv('BOT_USER_OAUTH_TOKEN_HF')

# real = os.getenv('SLACK_CHANNEL_ID_HF')
# test = 'C07B4KNU5BQ'
SLACK_CHANNEL_ID = os.getenv('SLACK_CHANNEL_ID_HF')
SLACK_CHANNEL_ID_TEST = 'C07B4KNU5BQ'
# 1259415803879751700    = test forum
# 1019883044724822016    = ask for help
ASK_FOR_HELP_CHANNEL_ID = 1019883044724822016
GRADIO_CHANNEL_ID = 1025174734427656283
ARGILLA_HELP_CHANNEL_ID = 1253640751481356330
DATA_DISCUSSIONS_CHANNEL_ID = 1217179426002047076
GIVE_HF_FEEDBACK_CHANNEL_ID = 897391062975385640

TRIGGERS = {
    ("discord bot",): ["<@U051DB2754M>"],  # adam
    ("autotrain",): ["<@U01E3LEC2N7>"],  # abhishek
    ("auto train",): ["<@U01E3LEC2N7>"], # abhishek
    ("competition",): ["<@U01E3LEC2N7>"], # abhishek
    ("competitions",): ["<@U01E3LEC2N7>"], # abhishek
    ("text to speech",): ["<@U039C2GANMV>"],  # VB
    ("tts",): ["<@U039C2GANMV>"], # VB
    ("asr",): ["<@U039C2GANMV>"], # VB
    ("musicgen",): ["<@U039C2GANMV>"], # VB
    ("whisper",): ["<@U039C2GANMV>"], # VB
    ("speech recognition",): ["<@U039C2GANMV>"], # VB
    ("bark",): ["<@U039C2GANMV>"], # VB
    ("sentence-transformers",): ["<@U04E4DNPWG7>"],  # tom aarsen
    ("sentence_transformers",): ["<@U04E4DNPWG7>"], # tom aarsen
    ("setfit",): ["<@U04E4DNPWG7>"], # tom aarsen
    ("sentence transformers",): ["<@U04E4DNPWG7>"], # tom aarsen
    ("argilla",): ["<@U076B8C7G3E>", "<@U0766H30T7F>", "<@U076MF65WEM>", "<@U0765RENPNZ>", "<@U0768QEN0LA>"],  # david berenstein, natalia elvira, sara han diaz lorenzo, Gabriel Martín Blázquez
    ("distilabel",): ["<@U076B8C7G3E>", "<@U076MF65WEM>", "<@U0765RENPNZ>", "<@U0768QEN0LA>", "<@U076271MBUN>"], # david berenstein, sara han diaz lorenzo, Gabriel Martín Blázquez, Agustín Piqueres   
    ("docs",): ["<@U02DATT4C5B>"],  # steven liu
    ("documentation",): ["<@U02DATT4C5B>"], # steven liu
    ("gradio",): ["<@U02NMK75F1V>", "<@U04FLGQ26PQ>"],  # abubakar abid, yuvraj sharma
    ("dataset", "feedback"): ["<@U0768RCHCRY>"],  # ben burtenshaw
    ("git ",): ["<@U07F1NP5U0K>"],  # ann huang
    ("lfs",): ["<@U07F1NP5U0K>"],  # ann huang
    ("xet",): ["<@U07F1NP5U0K>"],  # ann huang
    ("upload",): ["<@U07F1NP5U0K>"],  # ann huang
    ("download",): ["<@U07F1NP5U0K>"],  # ann huang
    ("stream",): ["<@U07F1NP5U0K>"],  # ann huang
}

daily_pings = []

intents = discord.Intents.all()
intents.messages = True
bot = commands.Bot(command_prefix='!', intents=intents)

slack_client = WebClient(token=SLACK_BOT_TOKEN)

thread_mapping = {}



@bot.event
async def on_ready():
    print(f'Logged in as {bot.user}')

@bot.event
async def on_message(message):
    if message.author == bot.user:
        return

    # notification bot
    print("on_message")

    huggingfolks_role = discord.utils.get(message.guild.roles, id=897376942817419265)
    bots_role = discord.utils.get(message.guild.roles, id=1258328471609016341)
    if huggingfolks_role not in message.author.roles:  # no need for ping if we're already discussing
        if bots_role not in message.author.roles:  # bots shouldn't trigger pings for this
            print(" not bot ")
            content = message.content.lower()

            for trigger, mentions in TRIGGERS.items():
                if all(word in content for word in trigger):
                    adjacent_words = extract_adjacent_words(message.content, trigger)
                    for slack_mention in mentions:
                        daily_pings.append({
                            'author': str(message.author),
                            'content': adjacent_words,
                            'channel': message.channel.name,
                            'url': message.jump_url,
                            'mention': slack_mention,
                            'trigger': trigger
                        })
                    print(f"daily pings:{daily_pings}")
    
    # Check if the message is in a thread
    if isinstance(message.channel, discord.Thread):
        discord_thread_id = message.channel.id
        # Check if there's an existing Slack thread for this Discord thread
        # (the only Slack threads created should be for forum channel threads, not just any thread)
        if discord_thread_id in thread_mapping:
            slack_thread_ts = thread_mapping[discord_thread_id]
            # post to slack only if thread already exists
            post_to_slack_forum_version(message, SLACK_CHANNEL_ID, message.content, message.author, thread_ts=slack_thread_ts)

    if message.channel.id == GIVE_HF_FEEDBACK_CHANNEL_ID:
        post_to_slack_general(message, SLACK_CHANNEL_ID)

    await bot.process_commands(message)


def post_to_slack_general(message, channel):
    text = f"New post in `#give-hf-feedback` by {message.author}: {message.content}"
    # Handle attachments if any
    if message.attachments:
        for attachment in message.attachments:
            attachment_url = attachment.url
            text += f"\nAttachment: {attachment_url}"
    try:
        response = slack_client.chat_postMessage(
            channel=channel,
            text=text,
        )
        return response['ts']
    except SlackApiError as e:
        print(f"Error posting to Slack: {e.response['error']}")
        return None


def extract_adjacent_words(content, trigger):
    words = content.split()
    pattern = r'\s*\b'.join(map(re.escape, trigger))
    regex = re.compile(pattern, re.IGNORECASE)
    match = regex.search(content)
    if match:
        start, end = match.span()
        before = content[:start].split()[-5:]
        after = content[end:].split()[:5]
        print("--------------------------------------------------------------")
        print('...' + ' '.join(before + [match.group()] + after) + '...')
        return '...' + ' '.join(before + [match.group()] + after) + '...'


@bot.event
async def on_thread_create(thread):
    # (discord) must be the child thread of the CORRECT forum channel(s) (not just any thread, or any forum channel)
    if isinstance(thread.parent, discord.ForumChannel) and thread.parent.id in {ASK_FOR_HELP_CHANNEL_ID, GRADIO_CHANNEL_ID, ARGILLA_HELP_CHANNEL_ID, DATA_DISCUSSIONS_CHANNEL_ID}:
        discord_thread_id = thread.id
        slack_thread_ts = post_to_slack_create_thread(
            SLACK_CHANNEL_ID,
            f"New forum thread started in {thread.parent.name} by {thread.owner}: *{thread.name}*\n"
            f"{thread.jump_url}"
        )
        if slack_thread_ts:
            thread_mapping[discord_thread_id] = slack_thread_ts


def post_to_slack_forum_version(message, channel, text, author, thread_ts=None):
    if message.attachments:
        for attachment in message.attachments:
            attachment_url = attachment.url
            text += f"\nAttachment: {attachment_url}"
    text = f"{author}" + ": " + text
    try:
        response = slack_client.chat_postMessage(
            channel=channel,
            text=text,
            thread_ts=thread_ts
        )
        return response['ts']  # Return the Slack message timestamp (thread ID)
    except SlackApiError as e:
        print(f"Error posting to Slack: {e.response['error']}")
        return None


def post_to_slack_create_thread(channel, text, thread_ts=None):
    try:
        response = slack_client.chat_postMessage(
            channel=channel,
            text=text,
            thread_ts=thread_ts,
            unfurl_links=False,
            unfurl_media=False  
        )
        return response['ts']  # Return the Slack message timestamp (thread ID)
    except SlackApiError as e:
        print(f"Error posting to Slack: {e.response['error']}")
        return None      


@bot.command()
async def list_tags(ctx, forum_channel_id: int):
    if ctx.author.id == 811235357663297546:
        forum_channel = bot.get_channel(forum_channel_id)
        if isinstance(forum_channel, discord.ForumChannel):
            tags = forum_channel.available_tags
            tag_list = [f"{tag.name} (ID: {tag.id})" for tag in tags]
            await ctx.send(f'Available tags: {", ".join(tag_list)}')


# react with ✅ on slack if marked with solved tag on discord
SOLVED_TAG_IDS = {1026743978026094664, 1025179659215847575, 1263095032328753174, 1253641354312155208}
@bot.event
async def on_thread_update(before, after):
    if isinstance(after.parent, discord.ForumChannel) and after.parent.id in {ASK_FOR_HELP_CHANNEL_ID, GRADIO_CHANNEL_ID, ARGILLA_HELP_CHANNEL_ID, DATA_DISCUSSIONS_CHANNEL_ID}:
        
        before_tag_ids = {tag.id for tag in before.applied_tags}
        after_tag_ids = {tag.id for tag in after.applied_tags}

        added_tags = after_tag_ids - before_tag_ids
        removed_tags = before_tag_ids - after_tag_ids

        discord_thread_id = after.id
        if discord_thread_id in thread_mapping:
            slack_thread_ts = thread_mapping[discord_thread_id]
            
            if any(tag_id in SOLVED_TAG_IDS for tag_id in added_tags):
                react_to_slack_message(slack_thread_ts, 'white_check_mark')

            if any(tag_id in SOLVED_TAG_IDS for tag_id in removed_tags):
                unreact_to_slack_message(slack_thread_ts, 'white_check_mark')


def react_to_slack_message(thread_ts, emoji):
    try:
        response = slack_client.reactions_add(
            channel=SLACK_CHANNEL_ID,
            name=emoji,
            timestamp=thread_ts
        )
    except SlackApiError as e:
        print(f"Error reacting to Slack message: {e.response['error']}")


def unreact_to_slack_message(thread_ts, emoji):
    try:
        response = slack_client.reactions_remove(
            channel=SLACK_CHANNEL_ID,
            name=emoji,
            timestamp=thread_ts
        )
    except SlackApiError as e:
        print(f"Error removing reaction from Slack message: {e.response['error']}")

#----------------------------------------------------------------------------------------------

def send_daily_pings():
    global daily_pings
    if daily_pings:
        print(f"sending daily pings...{daily_pings}")
        pings_by_mention = {}
        
        # group pings by who they are meant to notify
        for ping in daily_pings:
            mention = ping['mention']
            if mention not in pings_by_mention:
                pings_by_mention[mention] = []
            pings_by_mention[mention].append(ping)
        
        # send each group of pings in a separate thread
        for mention, pings in pings_by_mention.items():
            main_message = slack_client.chat_postMessage(
                channel=SLACK_CHANNEL_ID,
                text=f"DAILY PINGS FOR {mention} ON {datetime.now().strftime('%d/%m/%Y')}",
                unfurl_links=False,
                unfurl_media=False
            )
            time.sleep(2) # https://api.slack.com/apis/rate-limits
            main_ts = main_message['ts']
            for ping in pings:
                slack_client.chat_postMessage(
                    channel=SLACK_CHANNEL_ID,
                    text=f"(for the keyword -> '{ping['trigger']}')\nFrom {ping['author']} in channel #{ping['channel']}: {ping['content']}\n{ping['url']}",
                    thread_ts=main_ts,
                    unfurl_links=False,
                    unfurl_media=False
                )   
                time.sleep(2) # https://api.slack.com/apis/rate-limits

        daily_pings = []  # reset after posting

# pings -------------------------------------------------------------------------------------------
executor = ThreadPoolExecutor(max_workers=1)
scheduler = BackgroundScheduler(executors={'default': executor})
scheduler.add_job(send_daily_pings, trigger='interval', days=1)
scheduler.start()


# runs discord bot in thread = helps avoid blocking calls
def run_bot():
    bot.run(DISCORD_TOKEN)
threading.Thread(target=run_bot).start()
def greet(name):
    return "Hello " + name + "!"
demo = gr.Interface(fn=greet, inputs="text", outputs="text")
demo.launch()