rettttt / app.py
godlyjkrjjjcope's picture
Upload app.py
e0b61f1
import gradio as gr
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import requests, re, base64, string, random
from PIL import Image, ImageEnhance
from io import BytesIO
import os
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-small-printed")
model = VisionEncoderDecoderModel.from_pretrained("jonahgoldberg/bk_wht_8kun")
def random_string(string_length):
input = string.ascii_lowercase + string.digits
return ''.join(random.choice(input) for i in range(string_length))
# # load image examples
# urls = [
# 'https://storage.googleapis.com/trocr-captcha.appspot.com/captcha_images_v2/nfcb5.png',
# 'https://storage.googleapis.com/trocr-captcha.appspot.com/captcha_images_v2/p57fn.png',
# 'https://storage.googleapis.com/trocr-captcha.appspot.com/captcha_images_v2/w2yp7.png',
# 'https://storage.googleapis.com/trocr-captcha.appspot.com/captcha_images_v2/pme86.png',
# 'https://storage.googleapis.com/trocr-captcha.appspot.com/captcha_images_v2/w4nfx.png',
# 'https://storage.googleapis.com/trocr-captcha.appspot.com/captcha_images_v2/nf8b8.png'
# ]
# for idx, url in enumerate(urls):
# image = Image.open(requests.get(url, stream=True).raw)
# image.save(f"image_{idx}.png")
def execit(command):
return os.system(command)
###git add *.txt && git add *.py && git commit -m "lol" && git push
###git add *.txt && git add *.py && git commit -m "lol" && git push
###git add *.txt && git add *.py && git commit -m "lol" && git push
def process_image(image):
# prepare image
image_data = re.sub('^data:image/.+;base64,', '', image)
im = Image.open(BytesIO(base64.b64decode(image_data))).convert("RGB")
filter = ImageEnhance.Color(im)
im = filter.enhance(0)
# input_location = f"{random_string(9)}.png"
# outputfile_tmp = f"{random_string(9)}.png"
# outputfile_usable = f"{random_string(9)}.png"
# execit("input_location="+input_location)
# execit("outputfile_tmp="+outputfile_tmp)
# execit("outputfile_usable="+outputfile_usable)
# im.save(input_location, "png")
# execit('''gegl -x "<?xml version='1.0' encoding='UTF-8'?> <gegl> <node operation='gegl:ripple'> <params> <param name='amplitude'>9.9</param> <param name='period'>125.0</param> <param name='sampler-type'>nearest</param> <param name='abyss-policy'>none</param> </params> </node> <node operation='gegl:brightness-contrast'> <params> <param name='contrast'>5</param> <param name='brightness'>-1.0</param> </params> </node> <node operation='gegl:c2g'/> <node operation='gegl:load'> <params> <param name='path'>"$input_location"</param> </params> </node> </gegl>" -o $outputfile_tmp''')
# execit('convert $outputfile_tmp -background white -alpha remove -alpha off $outputfile_usable')
#Take's the picture
pixel_values = processor(im, return_tensors="pt").pixel_values
# generate (no beam search)
generated_ids = model.generate(pixel_values)
# os.remove(input_location)
# os.remove(outputfile_tmp)
# os.remove(outputfile_usable)
# decode
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return generated_text
title = "8kun captcha solver 1 in 8"
description = "Due to events. in 8chan staff moderation. I am attacking it. The gamergate shitposting days are over. and so is 8chan."
# article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2109.10282'>TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models</a> | <a href='https://github.com/microsoft/unilm/tree/master/trocr'>Github Repo</a></p>"
# examples =[["image_0.png"], ["image_1.png"], ["image_2.png"], ["image_3.png"], ["image_4.png"], ["image_5.png"]]
#css = """.output_image, .input_image {height: 600px !important}"""
iface = gr.Interface(fn=process_image,
# inputs=gr.inputs.Image(type="pil"),
inputs=gr.Textbox(placeholder="base64 string (right-click => copy-link) ..."),
outputs=gr.outputs.Textbox(),
title=title,
description=description,
# article=article,
# examples=examples
)
iface.launch(debug=True)