diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -1,4 +1,5 @@ from googletrans import Translator +from googletrans import LANGUAGES import spacy import gradio as gr import nltk @@ -14,6 +15,7 @@ from gtts import gTTS #from io import BytesIO from collections import Counter from PIL import Image, ImageDraw, ImageFont +import io import numpy as np from docx import Document import textwrap @@ -28,12 +30,25 @@ import math from langchain.document_loaders import YoutubeLoader #need youtube_transcpt_api and pytube installed from youtube_transcript_api import YouTubeTranscriptApi from spacy_syllables import SpacySyllables #https://spacy.io/universe/project/spacy_syllables/ +import torch +from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline +from flores200_codes import flores_codes #- figure this out after it works #When I forgot about the readme file ChatGPT suggested these - Leaving to remember the Readme.md must be updated as well #print(gr.__version__) #import subprocess #subprocess.run(["pip", "install", "--upgrade", "gradio==3.47.1"]) #For huggingface as they sometimes install specific versions on container build +#Direct and API based spaces +#Make new spaces to to debug inference +#https://huggingface.co/spaces/FM-1976/Starling7B-PlayGround - ctransformers +#https://huggingface.co/spaces/FM-1976/StableLM-Zepyhr-3B_Playground - llamacpp +#https://huggingface.co/spaces/julien-c/zero-gpu-demo/blob/main/app.py - switch the gpu on or off in the interface +#Custom gradio components - https://huggingface.co/freddyaboulton +#Florida map in gradio component - https://huggingface.co/spaces/freddyaboulton/gradio_folium (folium) +#https://huggingface.co/spaces/freddyaboulton/gradio_pdf/blob/main/Dockerfile - PDF component +#https://huggingface.co/spaces/jbilcke-hf/faster-image-server/blob/main/app.py - sdxl-turbo api server idea --> test on lambdalabs with share true + #Uncomment these for Huggingface nltk.download('maxent_ne_chunker') #Chunker nltk.download('stopwords') #Stop Words List (Mainly Roman Languages) @@ -67,6 +82,7 @@ nlp_ja = spacy.load("ja_core_news_sm") nlp_zh = spacy.load("zh_core_web_sm") nlp_en_syllable = spacy.load("en_core_web_sm") nlp_en_syllable.add_pipe("syllables", after="tagger") #https://spacy.io/universe/project/spacy_syllables/ +langdropdown_choices = [f"{code}: {name}" for code, name in LANGUAGES.items()] nlp = spacy.load('en_core_web_sm') translator = Translator() @@ -141,7 +157,7 @@ def BatchWordChunk(sentence): # Translate from English to French -langdest = gr.Dropdown(choices=["af", "de", "es", "ko", "ja", "zh-cn"], label="Choose Language", value="de") +langdest = gr.Dropdown(choices=langdropdown_choices, label="Choose Language", value="de: german") #["af", "de", "es", "ko", "ja", "zh-cn"] ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks", "Spelling Chunks"], label="Choose Chunk Type", value="Chunks") @@ -159,7 +175,7 @@ def FrontRevSentChunk (Chunkmode, Translate, Text, langdest): if Translate: TransFinalOutput = FinalOutput - translated = translator.translate(TransFinalOutput, dest=langdest) + translated = translator.translate(TransFinalOutput, dest=langdest[:2]) FinalOutput += "\n" + translated.text return FinalOutput @@ -229,13 +245,13 @@ def merge_lines(roman_file, w4w_file, full_mean_file, macaronic_file): return "\n".join(merged_lines) -TTSLangOptions = gr.Dropdown(choices=["en", "de", "es", "ja", "ko", "zh-cn"], value="en", label="choose the language of the srt/text accent") -TTSLangOptions2 = gr.Dropdown(choices=["en", "de", "es", "ja", "ko", "zh-cn"], value="en", label="choose the language of the srt/text accent") +TTSLangOptions = gr.Dropdown(choices=langdropdown_choices, value="en", label="choose the language of the srt/text accent") #["en", "de", "es", "ja", "ko", "zh-cn"] +TTSLangOptions2 = gr.Dropdown(choices=langdropdown_choices, value="en", label="choose the language of the srt/text accent") def TTSforListeningPractice(text, language = "en", Repeat10x = False): if Repeat10x: text = text * 10 - speech = gTTS(text=text, lang=language, slow="False") + speech = gTTS(text=text, lang=language[:2], slow="False") speech.save("CurrentTTSFile.mp3") #file = BytesIO() #speech.write_to_fp(file) @@ -335,7 +351,7 @@ def split_verbs_nouns(text): return other_words_text, pos_string_text, verbs_nouns_text -SRTLangOptions = gr.Dropdown(choices=["en", "ja", "ko", "zh-cn"], value="en", label="choose the language of the srt") +SRTLangOptions = gr.Dropdown(choices=["en", "ja", "ko", "zh-cn"], value="en", label="choose the language of the srt") #Left as these four as spacy function def save_string_to_file(string_to_save, file_name, srtdocx): with open(file_name, 'w', encoding='utf-8') as file: @@ -566,8 +582,8 @@ def UnknownTrackTexttoApp(text): #Copy of def OptimisedTtAppForUNWFWO(text): iWords += 1 allverbs = allverbs + token.text + " " elif token.pos_ == "NOUN": - HTMLMainbody = HTMLMainbody + "" - GradHTMLMainbody = GradHTMLMainbody + "" + HTMLMainbody = HTMLMainbody + " " + GradHTMLMainbody = GradHTMLMainbody + " " iNounCount += 1 iWords += 1 elif token.pos_ == "PUNCT": @@ -630,8 +646,8 @@ def UnknownTrackTexttoApp(text): #Copy of def OptimisedTtAppForUNWFWO(text): for sent in doc.sents: iSents += 1 - FinalHTML = FinalHTML + "\n
Picture on mouse hover = Visual
Speed = End Goal ==> App Timer Functions ||| \nSentences: " + str(iSents) + " | Words: " + str(iWords) + " | App elements: " + str(iNounCount + iVerbCount) + " | Verbs: " + str(iVerbCount) + "
" - FinalHTML = FinalHTML + "\n

" + FinalHTML += f"\n \n { FinalCSS } \n\n \n
\n Picture on mouse hover = Visual
Speed = End Goal ==> App Timer Functions ||| \n Sentences: " + str(iSents) + " | Words: " + str(iWords) + " | App elements: " + str(iNounCount + iVerbCount) + " | Verbs: " + str(iVerbCount) + "
" + FinalHTML += "\n

" FinalJS = FinalJS + '''\n + + + ''' FinalHTML = FinalHTML + '''


@@ -686,7 +704,7 @@ def UnknownTrackTexttoApp(text): #Copy of def OptimisedTtAppForUNWFWO(text): \n ''' - FinalOutput = FinalHTML + FinalCSS + FinalJS + FinalOutput = FinalHTML + FinalJS #FinalCSS + FinalJS FinalGradOutput = FinalGradHTML + FinalCSS + FinalJS @@ -999,7 +1017,7 @@ def LoadNLTKUDHRText(text): return CountNLTKText, NLTKtext -NLTKudhr = gr.Dropdown(choices=['English-Latin1', 'Akuapem_Twi-UTF8', 'Zulu-Latin1', 'Afrikaans-Latin1', 'German_Deutsch-Latin1', 'Japanese_Nihongo-EUC', 'Japanese_Nihongo-SJIS', 'Japanese_Nihongo-UTF8', 'Spanish-Latin1', 'Korean_Hankuko-UTF8', 'Chinese_Mandarin-GB2312', 'Abkhaz-Cyrillic+Abkh', 'Abkhaz-UTF8', 'Achehnese-Latin1', 'Achuar-Shiwiar-Latin1', 'Adja-UTF8', 'Afaan_Oromo_Oromiffa-Latin1', 'Afrikaans-Latin1', 'Aguaruna-Latin1', 'Akuapem_Twi-UTF8', 'Albanian_Shqip-Latin1', 'Amahuaca', 'Amahuaca-Latin1', 'Amarakaeri-Latin1', 'Amuesha-Yanesha-UTF8', 'Arabela-Latin1', 'Arabic_Alarabia-Arabic', 'Asante-UTF8', 'Ashaninca-Latin1', 'Asheninca-Latin1', 'Asturian_Bable-Latin1', 'Aymara-Latin1', 'Balinese-Latin1', 'Bambara-UTF8', 'Baoule-UTF8', 'Basque_Euskara-Latin1', 'Batonu_Bariba-UTF8', 'Belorus_Belaruski-Cyrillic', 'Belorus_Belaruski-UTF8', 'Bemba-Latin1', 'Bengali-UTF8', 'Beti-UTF8', 'Bichelamar-Latin1', 'Bikol_Bicolano-Latin1', 'Bora-Latin1', 'Bosnian_Bosanski-Cyrillic', 'Bosnian_Bosanski-Latin2', 'Bosnian_Bosanski-UTF8', 'Breton-Latin1', 'Bugisnese-Latin1', 'Bulgarian_Balgarski-Cyrillic', 'Bulgarian_Balgarski-UTF8', 'Cakchiquel-Latin1', 'Campa_Pajonalino-Latin1', 'Candoshi-Shapra-Latin1', 'Caquinte-Latin1', 'Cashibo-Cacataibo-Latin1', 'Cashinahua-Latin1', 'Catalan-Latin1', 'Catalan_Catala-Latin1', 'Cebuano-Latin1', 'Chamorro-Latin1', 'Chayahuita-Latin1', 'Chechewa_Nyanja-Latin1', 'Chickasaw-Latin1', 'Chinanteco-Ajitlan-Latin1', 'Chinanteco-UTF8', 'Chinese_Mandarin-GB2312', 'Chuuk_Trukese-Latin1', 'Cokwe-Latin1', 'Corsican-Latin1', 'Croatian_Hrvatski-Latin2', 'Czech-Latin2', 'Czech-UTF8', 'Czech_Cesky-Latin2', 'Czech_Cesky-UTF8', 'Dagaare-UTF8', 'Dagbani-UTF8', 'Dangme-UTF8', 'Danish_Dansk-Latin1', 'Dendi-UTF8', 'Ditammari-UTF8', 'Dutch_Nederlands-Latin1', 'Edo-Latin1', 'English-Latin1', 'Esperanto-UTF8', 'Estonian_Eesti-Latin1', 'Ewe_Eve-UTF8', 'Fante-UTF8', 'Faroese-Latin1', 'Farsi_Persian-UTF8', 'Farsi_Persian-v2-UTF8', 'Fijian-Latin1', 'Filipino_Tagalog-Latin1', 'Finnish_Suomi-Latin1', 'Fon-UTF8', 'French_Francais-Latin1', 'Frisian-Latin1', 'Friulian_Friulano-Latin1', 'Ga-UTF8', 'Gagauz_Gagauzi-UTF8', 'Galician_Galego-Latin1', 'Garifuna_Garifuna-Latin1', 'German_Deutsch-Latin1', 'Gonja-UTF8', 'Greek_Ellinika-Greek', 'Greek_Ellinika-UTF8', 'Greenlandic_Inuktikut-Latin1', 'Guarani-Latin1', 'Guen_Mina-UTF8', 'HaitianCreole_Kreyol-Latin1', 'HaitianCreole_Popular-Latin1', 'Hani-Latin1', 'Hausa_Haoussa-Latin1', 'Hawaiian-UTF8', 'Hebrew_Ivrit-Hebrew', 'Hebrew_Ivrit-UTF8', 'Hiligaynon-Latin1', 'Hindi-UTF8', 'Hindi_web-UTF8', 'Hmong_Miao-Sichuan-Guizhou-Yunnan-Latin1', 'Hmong_Miao-SouthernEast-Guizhou-Latin1', 'Hmong_Miao_Northern-East-Guizhou-Latin1', 'Hrvatski_Croatian-Latin2', 'Huasteco-Latin1', 'Huitoto_Murui-Latin1', 'Hungarian_Magyar-Latin1', 'Hungarian_Magyar-Latin2', 'Hungarian_Magyar-UTF8', 'Ibibio_Efik-Latin1', 'Icelandic_Yslenska-Latin1', 'Ido-Latin1', 'Igbo-UTF8', 'Iloko_Ilocano-Latin1', 'Indonesian-Latin1', 'Interlingua-Latin1', 'Inuktikut_Greenlandic-Latin1', 'IrishGaelic_Gaeilge-Latin1', 'Italian-Latin1', 'Italian_Italiano-Latin1', 'Japanese_Nihongo-EUC', 'Japanese_Nihongo-SJIS', 'Japanese_Nihongo-UTF8', 'Javanese-Latin1', 'Jola-Fogny_Diola-UTF8', 'Kabye-UTF8', 'Kannada-UTF8', 'Kaonde-Latin1', 'Kapampangan-Latin1', 'Kasem-UTF8', 'Kazakh-Cyrillic', 'Kazakh-UTF8', 'Kiche_Quiche-Latin1', 'Kicongo-Latin1', 'Kimbundu_Mbundu-Latin1', 'Kinyamwezi_Nyamwezi-Latin1', 'Kinyarwanda-Latin1', 'Kituba-Latin1', 'Korean_Hankuko-UTF8', 'Kpelewo-UTF8', 'Krio-UTF8', 'Kurdish-UTF8', 'Lamnso_Lam-nso-UTF8', 'Latin_Latina-Latin1', 'Latin_Latina-v2-Latin1', 'Latvian-Latin1', 'Limba-UTF8', 'Lingala-Latin1', 'Lithuanian_Lietuviskai-Baltic', 'Lozi-Latin1', 'Luba-Kasai_Tshiluba-Latin1', 'Luganda_Ganda-Latin1', 'Lunda_Chokwe-lunda-Latin1', 'Luvale-Latin1', 'Luxembourgish_Letzebuergeusch-Latin1', 'Macedonian-UTF8', 'Madurese-Latin1', 'Makonde-Latin1', 'Malagasy-Latin1', 'Malay_BahasaMelayu-Latin1', 'Maltese-UTF8', 'Mam-Latin1', 'Maninka-UTF8', 'Maori-Latin1', 'Mapudungun_Mapuzgun-Latin1', 'Mapudungun_Mapuzgun-UTF8', 'Marshallese-Latin1', 'Matses-Latin1', 'Mayan_Yucateco-Latin1', 'Mazahua_Jnatrjo-UTF8', 'Mazateco-Latin1', 'Mende-UTF8', 'Mikmaq_Micmac-Mikmaq-Latin1', 'Minangkabau-Latin1', 'Miskito_Miskito-Latin1', 'Mixteco-Latin1', 'Mongolian_Khalkha-Cyrillic', 'Mongolian_Khalkha-UTF8', 'Moore_More-UTF8', 'Nahuatl-Latin1', 'Ndebele-Latin1', 'Nepali-UTF8', 'Ngangela_Nyemba-Latin1', 'NigerianPidginEnglish-Latin1', 'Nomatsiguenga-Latin1', 'NorthernSotho_Pedi-Sepedi-Latin1', 'Norwegian-Latin1', 'Norwegian_Norsk-Bokmal-Latin1', 'Norwegian_Norsk-Nynorsk-Latin1', 'Nyanja_Chechewa-Latin1', 'Nyanja_Chinyanja-Latin1', 'Nzema-UTF8', 'OccitanAuvergnat-Latin1', 'OccitanLanguedocien-Latin1', 'Oromiffa_AfaanOromo-Latin1', 'Osetin_Ossetian-UTF8', 'Oshiwambo_Ndonga-Latin1', 'Otomi_Nahnu-Latin1', 'Paez-Latin1', 'Palauan-Latin1', 'Peuhl-UTF8', 'Picard-Latin1', 'Pipil-Latin1', 'Polish-Latin2', 'Polish_Polski-Latin2', 'Ponapean-Latin1', 'Portuguese_Portugues-Latin1', 'Pulaar-UTF8', 'Punjabi_Panjabi-UTF8', 'Purhepecha-UTF8', 'Qechi_Kekchi-Latin1', 'Quechua-Latin1', 'Quichua-Latin1', 'Rarotongan_MaoriCookIslands-Latin1', 'Rhaeto-Romance_Rumantsch-Latin1', 'Romani-Latin1', 'Romani-UTF8', 'Romanian-Latin2', 'Romanian_Romana-Latin2', 'Rukonzo_Konjo-Latin1', 'Rundi_Kirundi-Latin1', 'Runyankore-rukiga_Nkore-kiga-Latin1', 'Russian-Cyrillic', 'Russian-UTF8', 'Russian_Russky-Cyrillic', 'Russian_Russky-UTF8', 'Sami_Lappish-UTF8', 'Sammarinese-Latin1', 'Samoan-Latin1', 'Sango_Sangho-Latin1', 'Sanskrit-UTF8', 'Saraiki-UTF8', 'Sardinian-Latin1', 'ScottishGaelic_GaidhligAlbanach-Latin1', 'Seereer-UTF8', 'Serbian_Srpski-Cyrillic', 'Serbian_Srpski-Latin2', 'Serbian_Srpski-UTF8', 'Sharanahua-Latin1', 'Shipibo-Conibo-Latin1', 'Shona-Latin1', 'Sinhala-UTF8', 'Siswati-Latin1', 'Slovak-Latin2', 'Slovak_Slovencina-Latin2', 'Slovenian_Slovenscina-Latin2', 'SolomonsPidgin_Pijin-Latin1', 'Somali-Latin1', 'Soninke_Soninkanxaane-UTF8', 'Sorbian-Latin2', 'SouthernSotho_Sotho-Sesotho-Sutu-Sesutu-Latin1', 'Spanish-Latin1', 'Spanish_Espanol-Latin1', 'Sukuma-Latin1', 'Sundanese-Latin1', 'Sussu_Soussou-Sosso-Soso-Susu-UTF8', 'Swaheli-Latin1', 'Swahili_Kiswahili-Latin1', 'Swedish_Svenska-Latin1', 'Tahitian-UTF8', 'Tenek_Huasteco-Latin1', 'Tetum-Latin1', 'Themne_Temne-UTF8', 'Tiv-Latin1', 'Toba-UTF8', 'Tojol-abal-Latin1', 'TokPisin-Latin1', 'Tonga-Latin1', 'Tongan_Tonga-Latin1', 'Totonaco-Latin1', 'Trukese_Chuuk-Latin1', 'Turkish_Turkce-Turkish', 'Turkish_Turkce-UTF8', 'Tzeltal-Latin1', 'Tzotzil-Latin1', 'Uighur_Uyghur-Latin1', 'Uighur_Uyghur-UTF8', 'Ukrainian-Cyrillic', 'Ukrainian-UTF8', 'Umbundu-Latin1', 'Urarina-Latin1', 'Uzbek-Latin1', 'Vietnamese-ALRN-UTF8', 'Vietnamese-UTF8', 'Vlach-Latin1', 'Walloon_Wallon-Latin1', 'Wama-UTF8', 'Waray-Latin1', 'Wayuu-Latin1', 'Welsh_Cymraeg-Latin1', 'WesternSotho_Tswana-Setswana-Latin1', 'Wolof-Latin1', 'Xhosa-Latin1', 'Yagua-Latin1', 'Yao-Latin1', 'Yapese-Latin1', 'Yoruba-UTF8', 'Zapoteco-Latin1', 'Zapoteco-SanLucasQuiavini-Latin1', 'Zhuang-Latin1', 'Zulu-Latin1'], label="Choose one the below languages") +NLTKudhr = gr.Dropdown(choices=['English-Latin1', 'Akuapem_Twi-UTF8', 'Zulu-Latin1', 'Afrikaans-Latin1', 'German_Deutsch-Latin1', 'Japanese_Nihongo-EUC', 'Japanese_Nihongo-SJIS', 'Japanese_Nihongo-UTF8', 'Spanish-Latin1', 'Korean_Hankuko-UTF8', 'Chinese_Mandarin-GB2312', 'Abkhaz-Cyrillic+Abkh', 'Abkhaz-UTF8', 'Achehnese-Latin1', 'Achuar-Shiwiar-Latin1', 'Adja-UTF8', 'Afaan_Oromo_Oromiffa-Latin1', 'Afrikaans-Latin1', 'Aguaruna-Latin1', 'Akuapem_Twi-UTF8', 'Albanian_Shqip-Latin1', 'Amahuaca', 'Amahuaca-Latin1', 'Amarakaeri-Latin1', 'Amuesha-Yanesha-UTF8', 'Arabela-Latin1', 'Arabic_Alarabia-Arabic', 'Asante-UTF8', 'Ashaninca-Latin1', 'Asheninca-Latin1', 'Asturian_Bable-Latin1', 'Aymara-Latin1', 'Balinese-Latin1', 'Bambara-UTF8', 'Baoule-UTF8', 'Basque_Euskara-Latin1', 'Batonu_Bariba-UTF8', 'Belorus_Belaruski-Cyrillic', 'Belorus_Belaruski-UTF8', 'Bemba-Latin1', 'Bengali-UTF8', 'Beti-UTF8', 'Bichelamar-Latin1', 'Bikol_Bicolano-Latin1', 'Bora-Latin1', 'Bosnian_Bosanski-Cyrillic', 'Bosnian_Bosanski-Latin2', 'Bosnian_Bosanski-UTF8', 'Breton-Latin1', 'Bugisnese-Latin1', 'Bulgarian_Balgarski-Cyrillic', 'Bulgarian_Balgarski-UTF8', 'Cakchiquel-Latin1', 'Campa_Pajonalino-Latin1', 'Candoshi-Shapra-Latin1', 'Caquinte-Latin1', 'Cashibo-Cacataibo-Latin1', 'Cashinahua-Latin1', 'Catalan-Latin1', 'Catalan_Catala-Latin1', 'Cebuano-Latin1', 'Chamorro-Latin1', 'Chayahuita-Latin1', 'Chechewa_Nyanja-Latin1', 'Chickasaw-Latin1', 'Chinanteco-Ajitlan-Latin1', 'Chinanteco-UTF8', 'Chinese_Mandarin-GB2312', 'Chuuk_Trukese-Latin1', 'Cokwe-Latin1', 'Corsican-Latin1', 'Croatian_Hrvatski-Latin2', 'Czech-Latin2', 'Czech-UTF8', 'Czech_Cesky-Latin2', 'Czech_Cesky-UTF8', 'Dagaare-UTF8', 'Dagbani-UTF8', 'Dangme-UTF8', 'Danish_Dansk-Latin1', 'Dendi-UTF8', 'Ditammari-UTF8', 'Dutch_Nederlands-Latin1', 'Edo-Latin1', 'English-Latin1', 'Esperanto-UTF8', 'Estonian_Eesti-Latin1', 'Ewe_Eve-UTF8', 'Fante-UTF8', 'Faroese-Latin1', 'Farsi_Persian-UTF8', 'Farsi_Persian-v2-UTF8', 'Fijian-Latin1', 'Filipino_Tagalog-Latin1', 'Finnish_Suomi-Latin1', 'Fon-UTF8', 'French_Francais-Latin1', 'Frisian-Latin1', 'Friulian_Friulano-Latin1', 'Ga-UTF8', 'Gagauz_Gagauzi-UTF8', 'Galician_Galego-Latin1', 'Garifuna_Garifuna-Latin1', 'German_Deutsch-Latin1', 'Gonja-UTF8', 'Greek_Ellinika-Greek', 'Greek_Ellinika-UTF8', 'Greenlandic_Inuktikut-Latin1', 'Guarani-Latin1', 'Guen_Mina-UTF8', 'HaitianCreole_Kreyol-Latin1', 'HaitianCreole_Popular-Latin1', 'Hani-Latin1', 'Hausa_Haoussa-Latin1', 'Hawaiian-UTF8', 'Hebrew_Ivrit-Hebrew', 'Hebrew_Ivrit-UTF8', 'Hiligaynon-Latin1', 'Hindi-UTF8', 'Hindi_web-UTF8', 'Hmong_Miao-Sichuan-Guizhou-Yunnan-Latin1', 'Hmong_Miao-SouthernEast-Guizhou-Latin1', 'Hmong_Miao_Northern-East-Guizhou-Latin1', 'Hrvatski_Croatian-Latin2', 'Huasteco-Latin1', 'Huitoto_Murui-Latin1', 'Hungarian_Magyar-Latin1', 'Hungarian_Magyar-Latin2', 'Hungarian_Magyar-UTF8', 'Ibibio_Efik-Latin1', 'Icelandic_Yslenska-Latin1', 'Ido-Latin1', 'Igbo-UTF8', 'Iloko_Ilocano-Latin1', 'Indonesian-Latin1', 'Interlingua-Latin1', 'Inuktikut_Greenlandic-Latin1', 'IrishGaelic_Gaeilge-Latin1', 'Italian-Latin1', 'Italian_Italiano-Latin1', 'Japanese_Nihongo-EUC', 'Japanese_Nihongo-SJIS', 'Japanese_Nihongo-UTF8', 'Javanese-Latin1', 'Jola-Fogny_Diola-UTF8', 'Kabye-UTF8', 'Kannada-UTF8', 'Kaonde-Latin1', 'Kapampangan-Latin1', 'Kasem-UTF8', 'Kazakh-Cyrillic', 'Kazakh-UTF8', 'Kiche_Quiche-Latin1', 'Kicongo-Latin1', 'Kimbundu_Mbundu-Latin1', 'Kinyamwezi_Nyamwezi-Latin1', 'Kinyarwanda-Latin1', 'Kituba-Latin1', 'Korean_Hankuko-UTF8', 'Kpelewo-UTF8', 'Krio-UTF8', 'Kurdish-UTF8', 'Lamnso_Lam-nso-UTF8', 'Latin_Latina-Latin1', 'Latin_Latina-v2-Latin1', 'Latvian-Latin1', 'Limba-UTF8', 'Lingala-Latin1', 'Lithuanian_Lietuviskai-Baltic', 'Lozi-Latin1', 'Luba-Kasai_Tshiluba-Latin1', 'Luganda_Ganda-Latin1', 'Lunda_Chokwe-lunda-Latin1', 'Luvale-Latin1', 'Luxembourgish_Letzebuergeusch-Latin1', 'Macedonian-UTF8', 'Madurese-Latin1', 'Makonde-Latin1', 'Malagasy-Latin1', 'Malay_BahasaMelayu-Latin1', 'Maltese-UTF8', 'Mam-Latin1', 'Maninka-UTF8', 'Maori-Latin1', 'Mapudungun_Mapuzgun-Latin1', 'Mapudungun_Mapuzgun-UTF8', 'Marshallese-Latin1', 'Matses-Latin1', 'Mayan_Yucateco-Latin1', 'Mazahua_Jnatrjo-UTF8', 'Mazateco-Latin1', 'Mende-UTF8', 'Mikmaq_Micmac-Mikmaq-Latin1', 'Minangkabau-Latin1', 'Miskito_Miskito-Latin1', 'Mixteco-Latin1', 'Mongolian_Khalkha-Cyrillic', 'Mongolian_Khalkha-UTF8', 'Moore_More-UTF8', 'Nahuatl-Latin1', 'Ndebele-Latin1', 'Nepali-UTF8', 'Ngangela_Nyemba-Latin1', 'NigerianPidginEnglish-Latin1', 'Nomatsiguenga-Latin1', 'NorthernSotho_Pedi-Sepedi-Latin1', 'Norwegian-Latin1', 'Norwegian_Norsk-Bokmal-Latin1', 'Norwegian_Norsk-Nynorsk-Latin1', 'Nyanja_Chechewa-Latin1', 'Nyanja_Chinyanja-Latin1', 'Nzema-UTF8', 'OccitanAuvergnat-Latin1', 'OccitanLanguedocien-Latin1', 'Oromiffa_AfaanOromo-Latin1', 'Osetin_Ossetian-UTF8', 'Oshiwambo_Ndonga-Latin1', 'Otomi_Nahnu-Latin1', 'Paez-Latin1', 'Palauan-Latin1', 'Peuhl-UTF8', 'Picard-Latin1', 'Pipil-Latin1', 'Polish-Latin2', 'Polish_Polski-Latin2', 'Ponapean-Latin1', 'Portuguese_Portugues-Latin1', 'Pulaar-UTF8', 'Punjabi_Panjabi-UTF8', 'Purhepecha-UTF8', 'Qechi_Kekchi-Latin1', 'Quechua-Latin1', 'Quichua-Latin1', 'Rarotongan_MaoriCookIslands-Latin1', 'Rhaeto-Romance_Rumantsch-Latin1', 'Romani-Latin1', 'Romani-UTF8', 'Romanian-Latin2', 'Romanian_Romana-Latin2', 'Rukonzo_Konjo-Latin1', 'Rundi_Kirundi-Latin1', 'Runyankore-rukiga_Nkore-kiga-Latin1', 'Russian-Cyrillic', 'Russian-UTF8', 'Russian_Russky-Cyrillic', 'Russian_Russky-UTF8', 'Sami_Lappish-UTF8', 'Sammarinese-Latin1', 'Samoan-Latin1', 'Sango_Sangho-Latin1', 'Sanskrit-UTF8', 'Saraiki-UTF8', 'Sardinian-Latin1', 'ScottishGaelic_GaidhligAlbanach-Latin1', 'Seereer-UTF8', 'Serbian_Srpski-Cyrillic', 'Serbian_Srpski-Latin2', 'Serbian_Srpski-UTF8', 'Sharanahua-Latin1', 'Shipibo-Conibo-Latin1', 'Shona-Latin1', 'Sinhala-UTF8', 'Siswati-Latin1', 'Slovak-Latin2', 'Slovak_Slovencina-Latin2', 'Slovenian_Slovenscina-Latin2', 'SolomonsPidgin_Pijin-Latin1', 'Somali-Latin1', 'Soninke_Soninkanxaane-UTF8', 'Sorbian-Latin2', 'SouthernSotho_Sotho-Sesotho-Sutu-Sesutu-Latin1', 'Spanish-Latin1', 'Spanish_Espanol-Latin1', 'Sukuma-Latin1', 'Sundanese-Latin1', 'Sussu_Soussou-Sosso-Soso-Susu-UTF8', 'Swaheli-Latin1', 'Swahili_Kiswahili-Latin1', 'Swedish_Svenska-Latin1', 'Tahitian-UTF8', 'Tenek_Huasteco-Latin1', 'Tetum-Latin1', 'Themne_Temne-UTF8', 'Tiv-Latin1', 'Toba-UTF8', 'Tojol-abal-Latin1', 'TokPisin-Latin1', 'Tonga-Latin1', 'Tongan_Tonga-Latin1', 'Totonaco-Latin1', 'Trukese_Chuuk-Latin1', 'Turkish_Turkce-Turkish', 'Turkish_Turkce-UTF8', 'Tzeltal-Latin1', 'Tzotzil-Latin1', 'Uighur_Uyghur-Latin1', 'Uighur_Uyghur-UTF8', 'Ukrainian-Cyrillic', 'Ukrainian-UTF8', 'Umbundu-Latin1', 'Urarina-Latin1', 'Uzbek-Latin1', 'Vietnamese-ALRN-UTF8', 'Vietnamese-UTF8', 'Vlach-Latin1', 'Walloon_Wallon-Latin1', 'Wama-UTF8', 'Waray-Latin1', 'Wayuu-Latin1', 'Welsh_Cymraeg-Latin1', 'WesternSotho_Tswana-Setswana-Latin1', 'Wolof-Latin1', 'Xhosa-Latin1', 'Yagua-Latin1', 'Yao-Latin1', 'Yapese-Latin1', 'Yoruba-UTF8', 'Zapoteco-Latin1', 'Zapoteco-SanLucasQuiavini-Latin1', 'Zhuang-Latin1', 'Zulu-Latin1'], label="Choose one the below languages", value='English-Latin1') def SimultaneousSpellingPrac(text): TextToks = text.split() @@ -1206,6 +1224,164 @@ def imagebasedreading(inputtext): return filename, output_html, output_html + +def imagebasedreadingwordlevel(inputtext): + # Read the user input text file + #with open("inputtext.txt", "r", encoding="utf-8") as file: + # inputtext = file.read() + + inputtextlines = inputtext.split(" ") + + htmlpart1 = """ + + + + Image Placeholder with Text Background + + + + + + + + +
+ """ + + htmlpart2 = """ +
+ + + + + + """ + + #If you have a gpu and imagepipeline then src in img tag = filepath of generate image + def generate_html(textlines): + num_containers = len(textlines) + html_string = "" + for i in range(num_containers): + container = f''' +
+ + {textlines[i]} - Placeholder {i + 1} +
+ ''' + html_string += container + return html_string + + # Generate the HTML based on the number of lines + output_html = htmlpart1 + "\n" + generate_html(inputtextlines) + "\n" + htmlpart2 + + # Save the generated HTML + now = datetime.datetime.now() + filename = f"ImagePlaceholderwordlevel{now.strftime('%Y-%m-%d_%H-%M')}_{inputtext[:16]}.html" + with open(filename, "w", encoding="utf-8") as file: + file.write(output_html) + + return filename, output_html, output_html #-------- guidedreadingseperator = gr.Dropdown(choices=[".", ",", "lines", "Sentences"], value=".", interactive=True) @@ -1483,8 +1659,10 @@ def YTTransciptAnalysisandLoad(link): YTTtranscript = "No Transcript found" YTTtrancriptAnalysed = [] + YTTTextforTranslation = "" for subtitle in YTTtranscript: YTTtrancriptAnalysed.append({'text': keep_nouns(subtitle['text']), 'start': subtitle['start'], 'duration': subtitle['duration']}) + YTTTextforTranslation += "\n" + subtitle["text"] #YTTtrancriptAnalysed = str(len(YTTtranscript.page_content)) + "" + str(YTTtranscript.metadata) YTTtrancriptAnalysedSubtitleOutput = YTTtrancriptAnalysed @@ -1493,7 +1671,7 @@ def YTTransciptAnalysisandLoad(link): video_id = re.search(r'v=([^&]+)', link).group(1) link = 'https://www.youtube.com/embed/{}'.format(video_id) - return f'', YTTtranscript, YTTtrancriptAnalysed + return f'', YTTtranscript, YTTtrancriptAnalysed, YTTTextforTranslation def TimedList(list, duration, splits=0): if splits == 0: @@ -1514,7 +1692,7 @@ def subtitle_generator(): :param playback_start_time: The time when playback started, used to calculate the current playback time. """ if YTTtranscriptSubtitleOutput == "": - return ("No subtitle", "No subtitle") + return ("No subtitle", "No subtitle", "No subtitle") playback_start_time = time.time() while True: @@ -1523,10 +1701,10 @@ def subtitle_generator(): start_time = int(subtitle['start']) end_time = start_time + int(subtitle['duration']) if start_time <= current_time < end_time: - yield (YTTtrancriptAnalysedSubtitleOutput[index]['text'], subtitle['text']) + yield (YTTtrancriptAnalysedSubtitleOutput[index]['text'], subtitle['text'], subtitle['start']) break else: - yield ("", "") + yield ("", "", "") time.sleep(1) # Wait for 1 second before updating def word_to_k8s_format(word): @@ -1583,7 +1761,188 @@ def MultiOutputInterface(inputtext): Plurals = onlyplurals(inputtext) Output3 = TestSplitandUpdate(inputtext) Output4 = ForeignSyllableListenFormat(inputtext) - return Output3[0], FirstLetAccronyms, AcronymMap[0], AcronymMap[1], Output1, Output2, Plurals, k8sformat, Output4, Output3[1] + return Output3[1], Output3[0], FirstLetAccronyms, AcronymMap[0], AcronymMap[1], Output1, Output2, Plurals, k8sformat, Output4 + +def lingualinkassist(text, language): + words = text.split(" ") + FinalOutput = "Test \n" + for word in words: + FinalOutput += f"Next lets do '{ word }' in { language }\n" + return FinalOutput + +#Have to define inside blocks w4wsidebysidelangdest = gr.Dropdown(choices=langdropdown_choices, label="Choose Language", value="de: german") #["af", "de", "es", "ko", "ja", "zh-cn", "xh", "zu"] + +#---------------------------------------------------------------------------------------------------------------------- + +def w4wsidebysidereadergen(text, langdest): + #FrontRevSentChunk as reference + FinalOutput = "Side by Side Version: " + Translated = "FWNWO: " + words = text.split() + w4wsidebysidtranslator = Translator() + translatedFWO = w4wsidebysidtranslator.translate(text, dest=langdest[:2]) + translatedNWO = w4wsidebysidtranslator.translate(words, dest=langdest[:2]) #src or dest + #print(translated) + #print(dir(translatedNWO[0]), "\n") + #FinalOutput += "\n" + translated.text + for obj in translatedNWO: + # print(f"Original Text: {obj.origin}") + # print(f"Translated Text: {obj.text}") + # print(f"Source Language: {obj.src}") + # print(f"Destination Language: {obj.dest}") + # print(f"Pronunciation: {obj.pronunciation}\n") + FinalOutput += obj.origin + f" ({obj.text}) " + Translated += obj.text + " " + return FinalOutput, Translated, "FWFWO: " + translatedFWO.text + +#https://huggingface.co/spaces/Geonmo/nllb-translation-demo/blob/main/app.py +def nllbtranscload_models(): + # build model and tokenizer + #model_name_dict = {'nllb-distilled-600M': 'facebook/nllb-200-distilled-600M', + #'nllb-1.3B': 'facebook/nllb-200-1.3B', + #'nllb-distilled-1.3B': 'facebook/nllb-200-distilled-1.3B', + #'nllb-3.3B': 'facebook/nllb-200-3.3B', + # } + model_dict = {} + #for call_name, real_name in model_name_dict.items(): + print('\tLoading model: %s' % 'nllb-distilled-600M' ) #% call_name) + model = AutoModelForSeq2SeqLM.from_pretrained('facebook/nllb-200-distilled-600M') #real_name) + tokenizer = AutoTokenizer.from_pretrained('facebook/nllb-200-distilled-600M') #real_name) + model_dict['nllb-distilled-600M'+'_model'] = model #call_name+'_model'] = model + model_dict['nllb-distilled-600M'+'_tokenizer'] = tokenizer #call_name+'_tokenizer'] = tokenizer + return model_dict + +#global model_dict +model_dict = nllbtranscload_models() +nllb_lang_codes = list(flores_codes.keys()) + + +def nllbtransctranslation(source, target, text): + #if len(model_dict) == 2: + model_name = 'nllb-distilled-600M' + + start_time = time.time() + + source = flores_codes[source] + target = flores_codes[target] + #source = "eng_Latn" #colab test + #target = "aka_Latn" #colab test + + model = model_dict[model_name + '_model'] + tokenizer = model_dict[model_name + '_tokenizer'] + + translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=source, tgt_lang=target) + output = translator(text, max_length=400) #Check how much this can be extended + + end_time = time.time() + + output = output[0]['translation_text'] + result = {'inference_time': end_time - start_time, + 'source': source, + 'target': target, + 'result': output} + return result['result'], result['inference_time'] + +#pip install tokenizers==0.13.3 protobuf==3.20.0 + +#---------------------------------------------------------------------------------------------------------------------------- + +physical_similarity = { + # Physical transformation similarity + 'a': ['e', 'd', 'q', 'o'], + 'b': ['d', 'p', 'q', 'h'], + 'c': ['e', 'o'], + 'd': ['a', 'b', 'p', 'q'], + 'e': ['c'], + 'g': ['q', 'o'], + 'h': ['b', 'n'], + 'i': ['l', 'j'], + 'j': ['i', 'l'], + 'k': ['x'], + 'l': ['i', 'j'], + 'm': ['n', 'u', 'w'], + 'n': ['m', 'u'], + 'o': ['a', 'c', 'g', 'q'], + 'p': ['b', 'd', 'q'], + 'q': ['a', 'b', 'd', 'g', 'o', 'p'], + 'r': ['n'], + 's': ['z'], + 't': ['l'], + 'u': ['m', 'n'], + 'v': ['w', 'u'], + 'w': ['m', 'v'], + 'x': ['k'], + 'y': ['v'], + 'z': ['s'], +} + +phonetic_similarity = { # Phonetic similarity + 'c': ['k', 's', 'q'], + 'f': ['ph'], + 'k': ['c', 'q'], + 'q': ['c', 'k'], + 's': ['c', 'z'], + 'x': ['z'], + 'z': ['s', 'x'], + # ... other letters +} + +def letterbased_guess_word(target, guess): + matching_letters = 0 + exact_letters = {} + closest_letters = {} + physical_similar_letters = {} + phonetic_similar_letters = {} + + # Alphabet dictionary for calculating distance + alphabet = {chr(i): i - 97 for i in range(97, 123)} + + # Count and list exact letter matches in both words + for letter in set(target): + count = min(target.count(letter), guess.count(letter)) + if count > 0: + exact_letters[letter] = count + + for letter in target: + # Skip if letter is not a lowercase letter + if letter not in alphabet: + continue + + # Check for matching letters + if letter in guess: + matching_letters += 1 + + # Find closest letter in guess based on alphabet distance + closest_letter = min(guess, key=lambda g: abs(alphabet.get(g, 0) - alphabet[letter])) + closest_letters[letter] = closest_letter + + # Find physical similar letters in guess + physical_similar_letters[letter] = [g for g in guess if g in physical_similarity.get(letter, [])] + + # Find phonetic similar letters in guess + phonetic_similar_letters[letter] = [g for g in guess if g in phonetic_similarity.get(letter, [])] + + return { + "matching_letters": matching_letters, + "exact_letters": exact_letters, + "closest_letters": closest_letters, + "physical_similar_letters": physical_similar_letters, + "phonetic_similar_letters": phonetic_similar_letters, + } + +#---------------------------------------------------------------------------------------------------------------------------- + +speedmastery_load_imagesinput = gr.Files(file_count="multiple", type="binary", file_types='image') + +def speedmastery_load_images(image_files): + #return [image_file for image_file in image_files] + images = [] + for image_file in image_files: + # Read the image file and convert it to a PIL.Image object + image = Image.open(io.BytesIO(image_file)) + images.append(image) + return images + # Define the Gradio interface inputs and outputs for video split spvvideo_file_input = gr.File(label='Video File') @@ -1607,16 +1966,16 @@ VideoSplitTestInput = gr.File(label="select a mp4 video file", file_types=[".mp4 SplitVideoOutput = gr.FileExplorer(root='./splitvideo') with gr.Blocks() as lliface: #theme=gr.themes.Glass(primary_hue='green', secondary_hue='red', neutral_hue='blue', ) - gr.HTML('
Reading comprehension speed through picture based compression (collage), Live Image Subtitles and Listening Comprehension Test
---- Under Construction: Very Slowly figuring out what AI intergrated interface means (Chat vs Forms vs Function calling vs Sensor + Trigger vs Agent) | How to end copy paste once and for all? ----
All the apis from the below space need to be treated like RAG as notes for the LLM to read before providing its answer
') + gr.HTML('
Reading comprehension speed through picture based compression (collage), Live Image Subtitles and Listening Comprehension Test - -- Lingua Link (Simple GPT for assistinng image creation) -- |
---- Under Construction: Very Slowly figuring out what AI intergrated interface means (Chat vs Forms vs Function calling vs Sensor + Trigger vs Agent) | How to end copy paste once and for all? ----
All the apis from the below space need to be treated like RAG as notes for the LLM to read before providing its answer
') with gr.Accordion("Translation HF Spaces/Sites (Click Here to Open) - Use to get rough translations", open=False): with gr.Row(): - linktotranslate = gr.Dropdown(choices=["https://kadirnar-multilingual-translation.hf.space", "https://geonmo-nllb-translation-demo.hf.space"], label="Choose/Cancel type any .hf.space link here (can also type a link)'", allow_custom_value=True) + linktotranslate = gr.Dropdown(choices=["https://facebook-seamless-m4t-v2-large.hf.space", "https://hf-audio-whisper-large-v3.hf.space", "https://pyf98-owsm-v3-demo.hf.space", "https://kadirnar-multilingual-translation.hf.space", "https://geonmo-nllb-translation-demo.hf.space"], label="Choose/Cancel type any .hf.space link here (can also type a link)'", allow_custom_value=True) translatespacebtn = gr.Button("Use the chosen URL to load interface with a translate model") translatespace = gr.HTML("Translate Space Chosen will load here") translatespacebtn.click(display_website, inputs=linktotranslate, outputs=translatespace) with gr.Accordion("LLM HF Spaces/Sites (Click Here to Open) - Use 'Acronym Map Creation Space' Tab with this - Ask for Translation of image tags made below, sentence to emojis, Wordlists, Test Conversations, Get Grammar Explanations etc., Can use GPT-4 or new SOTA to review the conversation", open=False): with gr.Row(): - linktochat = gr.Dropdown(choices=["https://sdk.vercel.ai/docs", "https://labs.perplexity.ai/", "https://chat.lmsys.org", "https://huggingfaceh4-zephyr-chat.hf.space", "https://osanseviero-mistral-super-fast.hf.space", "https://artificialguybr-qwen-14b-chat-demo.hf.space", "https://huggingface-projects-llama-2-7b-chat.hf.space", "https://ysharma-explore-llamav2-with-tgi.hf.space", "https://mosaicml-mpt-30b-chat.hf.space", "https://huggingfaceh4-falcon-chat.hf.space", "https://uwnlp-guanaco-playground-tgi.hf.space", "https://stabilityai-stablelm-tuned-alpha-chat.hf.space", "https://mosaicml-mpt-7b-storywriter.hf.space", "https://huggingfaceh4-starchat-playground.hf.space", "https://bigcode-bigcode-playground.hf.space", "https://mosaicml-mpt-7b-chat.hf.space", "https://huggingchat-chat-ui.hf.space", "https://togethercomputer-openchatkit.hf.space"], label="Choose/Cancel type any .hf.space link here (can also type a link)'", allow_custom_value=True) + linktochat = gr.Dropdown(choices=["https://sdk.vercel.ai/docs", "https://labs.perplexity.ai/", "https://chat.lmsys.org", "https://deepseek-ai-deepseek-coder-7b-instruct.hf.space", "https://01-ai-yi-34b-chat.hf.space", "https://ysharma-zephyr-playground.hf.space", "https://huggingfaceh4-zephyr-chat.hf.space", "https://osanseviero-mistral-super-fast.hf.space", "https://artificialguybr-qwen-14b-chat-demo.hf.space", "https://huggingface-projects-llama-2-7b-chat.hf.space", "https://ysharma-explore-llamav2-with-tgi.hf.space", "https://mosaicml-mpt-30b-chat.hf.space", "https://huggingfaceh4-falcon-chat.hf.space", "https://uwnlp-guanaco-playground-tgi.hf.space", "https://stabilityai-stablelm-tuned-alpha-chat.hf.space", "https://mosaicml-mpt-7b-storywriter.hf.space", "https://huggingfaceh4-starchat-playground.hf.space", "https://bigcode-bigcode-playground.hf.space", "https://mosaicml-mpt-7b-chat.hf.space", "https://huggingchat-chat-ui.hf.space", "https://togethercomputer-openchatkit.hf.space"], label="Choose/Cancel type any .hf.space link here (can also type a link)'", allow_custom_value=True) chatspacebtn = gr.Button("Use the chosen URL to load interface with a chat model. For sdk.vercel click the chat button on the top left. For lymsys / chat arena copy the link and use a new tab") with gr.Accordion("Some prompt ideas", open=False): with gr.Accordion("Prompts in text (Manual copy paste)", open=False): @@ -1631,17 +1990,18 @@ with gr.Blocks() as lliface: #theme=gr.themes.Glass(primary_hue='green', seconda chatspacebtn.click(display_website, inputs=linktochat, outputs=chatspace) with gr.Accordion("Image HF Spaces/Sites (Click Here to Open) - Use with the image placeholder in Workflows tab", open=False): with gr.Row(): - linktoimagegen = gr.Dropdown(choices=["https://segmind-segmind-stable-diffusion.hf.space", "https://simianluo-latent-consistency-model.hf.space", "https://google-sdxl.hf.space", "https://fffiloni-sdxl-control-loras.hf.space"], label="Choose/Cancel type any .hf.space link here (can also type a link)'", allow_custom_value=True) + linktoimagegen = gr.Dropdown(choices=["https://guoyww-animatediff.hf.space", "https://segmind-segmind-stable-diffusion.hf.space", "https://simianluo-latent-consistency-model.hf.space", "https://artificialguybr-studio-ghibli-lora-sdxl.hf.space", "https://artificialguybr-pixel-art-generator.hf.space", "https://google-sdxl.hf.space", "https://fffiloni-sdxl-control-loras.hf.space"], label="Choose/Cancel type any .hf.space link here (can also type a link)'", allow_custom_value=True) imagegenspacebtn = gr.Button("Use the chosen URL to load interface with a chat model") imagegenspace = gr.HTML("Chat Space Chosen will load here") imagegenspacebtn.click(display_website, inputs=linktoimagegen, outputs=imagegenspace) - gr.HTML("
Agents = Custom Software (Personalised UI and Mods, among other things) = Custom Environments (AR) -- Open Interpreter -- | -- Microsoft Autogen -- |
") + #----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + #----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- with gr.Row(): with gr.Column(scale=1): - gr.HTML(""" -- Opensource List -- | -- Open LLM Leaderboard -- | -- Whisper JAX -- | -- Google Translate -- | -- Modelscope Text to Video -- | -- stable-diffusion 2 -- | -- stable-diffusion 1 -- | -- karlo 1 -- | -- Bark (TTS) -- | -- Offline Text Model Demos -- | -- SAM with Clip -- | -- Eleven Labs -- | -- Animate an Image -- | -- Clone a voice -- | -- OpenAI pricing -- | -- Image Training Data Search -- | -- Huggingface Chat -- | -- 128x128 Stable Diffusion (Fast) -- | -- Search 95 million research abstracts -- | -- Tiny Stories Dataset -- | -- Visualglm6b - Discuss images -- | -- RAM and Tag2Text -- | -- Potat1 Text2vid -- | -- Alexandria Prohect (Will Deque) - Free Embeddings -- | -- Google Arts and Culture Portal -- | -- Word Level Timestamps -- | -- NLLB 600M Demo -- = -- NLLB Github -- | -- Zeroscope v2 Text to video -- | -- ComfyUI Text to Image -- | -- Deepfloyd IF - Text in image -- | -- ChatGPT Custom Plugins Test Space -- | -- r/LocalLlama -- | -- r/Singularity -- | -- SD-XL Test Space -- | -- Seamless M4T - Translation one stop shop -- | -- Code Llama playground -- | -- Text to sing -- | -- Stable Diffusion Webui (Camenduru Space) -- | -- Wizard Coder 34B -- | -- Cowrite with llama2 -- | -- Image to Story -- | -- Clip interrogator 2 -- | -- Agent Benchmarks -- | -- AI Town Live Demo -- = -- AI Town Repository (Deployment]) -- | -- Generative Agents: Interactive Simulacra of Human Behavior (Research paper Repository) -- | -- IDEFICS - open Multimodal model -- | -- Belebele (Meta Dataset) -- | -- AI Comic Factory -- | -- CAMENDURU REPOS -- | -- SQL Dataset - A list of simple questions -- | -- Open Interpreter (alt to ChatGPT Pro) -- | -- List - Easy with AI -- | -- Whisper Web (UI) -- | -- Roblox Assistant -- | -- Illusion Diffusion (Hide words or shapes in the image) -- | -- Background replacement - Shopify -- | -- Lora The Explorer (SDXL) -- | -- InstaFlow (Under 1 second Inference) -- | -- TinyStories on mojo (230+ tk/s) -- | -- Any Emoji you want - emojijs -- | -- SDXL on TPUv5 -- | -- LCM - SD1.5 at 7secs per 4 images (after coldstart) -- | -- SDXL Control Lora -- | -- Whisper WebUI -- | """) + gr.HTML("""
Some useful links
-- Opensource List -- | -- Open LLM Leaderboard -- | -- Openxlabs - Huggingface Alternative -- | -- Whisper JAX -- | -- Google Translate -- | -- Modelscope Text to Video -- | -- stable-diffusion 2 -- | -- stable-diffusion 1 -- | -- karlo 1 -- | -- Bark (TTS) -- | -- Offline Text Model Demos -- | -- SAM with Clip -- | -- Eleven Labs -- | -- Animate an Image -- | -- Clone a voice -- | -- OpenAI pricing -- | -- Image Training Data Search -- | -- Huggingface Chat -- | -- 128x128 Stable Diffusion (Fast) -- | -- Search 95 million research abstracts -- | -- Tiny Stories Dataset -- | -- Visualglm6b - Discuss images -- | -- RAM and Tag2Text -- | -- Potat1 Text2vid -- | -- Alexandria Prohect (Will Deque) - Free Embeddings -- | -- Google Arts and Culture Portal -- | -- Word Level Timestamps -- | -- NLLB 600M Demo -- = -- NLLB Github -- | -- Zeroscope v2 Text to video -- | -- ComfyUI Text to Image -- | -- Deepfloyd IF - Text in image -- | -- ChatGPT Custom Plugins Test Space -- | -- r/LocalLlama -- | -- r/Singularity -- | -- SD-XL Test Space -- | -- Seamless M4T - Translation one stop shop -- | -- Code Llama playground -- | -- Text to sing -- | -- Stable Diffusion Webui (Camenduru Space) -- | -- Wizard Coder 34B -- | -- Cowrite with llama2 -- | -- Image to Story -- | -- Clip interrogator 2 -- | -- Agent Benchmarks -- | -- AI Town Live Demo -- = -- AI Town Repository (Deployment]) -- | -- Generative Agents: Interactive Simulacra of Human Behavior (Research paper Repository) -- | -- IDEFICS - open Multimodal model -- | -- Belebele (Meta Dataset) -- | -- AI Comic Factory -- | -- CAMENDURU REPOS -- | -- SQL Dataset - A list of simple questions -- | -- Open Interpreter (alt to ChatGPT Pro) -- | -- List - Easy with AI -- | -- Whisper Web (UI) -- | -- Roblox Assistant -- | -- Illusion Diffusion (Hide words or shapes in the image) -- | -- Background replacement - Shopify -- | -- Lora The Explorer (SDXL) -- | -- InstaFlow (Under 1 second Inference) -- | -- TinyStories on mojo (230+ tk/s) -- | -- Any Emoji you want - emojijs -- | -- SDXL on TPUv5 -- | -- LCM - SD1.5 at 7secs per 4 images (after coldstart) -- | -- SDXL Control Lora -- | -- Whisper WebUI -- | -- AnimateDiff: Create an image make a video -- | -- Seamless m4t v2 -- | -- OtterHD: Multimodal model -- | -- Ego-exo4d Multimodal dataset -- | -- Meta Imagine images (Free) -- | -- Mage Space images (Free) -- | -- Bing Image Creator (Free) -- | -- Jay Alammar Blog - Illustrated Transformer, Stable Diffusion and More -- |
""") with gr.Tabs() as nav1: with gr.Tab("Rep - HTML"): - gr.HTML("UNNWFWO = Unknown Native Word Foreign Word Order i.e. during active listening practice you only need the words you dont know") + gr.HTML("UNWFWO = Unknown Native Word Foreign Word Order i.e. during active listening practice you only need the words you dont know") gr.HTML("""""") with gr.Tab("Rep - Gradio"): @@ -1672,15 +2032,115 @@ with gr.Blocks() as lliface: #theme=gr.themes.Glass(primary_hue='green', seconda gr.HTML("Advanced Repitition = Combinatorics --> to understand a sentence properly you need understanding of every word --> in language that means use with other words --> Combos within the unique words in a sentence, paragraph, page, etc. --> as close to 3 word sentences") with gr.Column(scale=1): gr.HTML("

Timing Practice - Repitition: Run from it, Dread it, Repitition is inevitable - Thanos --> Repitition of reaction - Foreign in eyes/ears native in mind (For beginners) | Repitition is a multitask activity like driving must be subconcious process to show mastery

") + #----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + #----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- with gr.Column(scale=3): - with gr.Tab("General"): - with gr.Row(): - gr.HTML("Start at Unkown Tracker if unseure
UNNWFWO = Unknown Native Word Foreign Word Order i.e. during active listening practice you only need the words you dont know

General Ideas in this space - Speed of Learning = Avoid Things you know like the plague -- How to track what you know -- Counter is easiest and How you feel is the hardest (The more you know, the more confusion on what you dont know as you probably werent keeping track)

Visulisation of long text - Bottom of this page
Wordlist - 1 new word at a time per minute in the space to the left
Youtube Video Watching - Subtitles Tab
Reading - Unknown Tracker Tabs
Longer Text Memorising - Acronym Map Creation Tab and Transition Tab
Brainstorming - Reading Assistant
Random Exposure
") - gr.Interface(fn=MultiOutputInterface, inputs=TestSplitandUpdateinput, outputs=["text", "text", "text", "text", "text", "text", "text", "text", "text", "button"]) + with gr.Tab("General Idea"): + gr.HTML("True mastery is from the relations between each item aka how every word relates to each other - Repitition in the form combinatorics ") + with gr.Accordion("Memorisation by string comparison idea", open=False): + gr.HTML('Result of prompt chain starting with: Lets say I have the strings "red" and "ppalgan" how can I guess the second from the first from just spelling (eg. similar words and distance in the alphabet, ...), how can I use python to do this i.e. output of no matching letters, closest letter to r, then e, then d, a dictionary of letters that look similar eg. d and p, l and I a and d etc.') + gr.Interface(fn=letterbased_guess_word, inputs=["text", "text"], outputs="text", description="letter based guess suggestions (one word to one word is the designed use case)") + gr.HTML("Side by side reading creator (Google Translate) TODO - Roman output of Non roman characters") + with gr.Group(): + with gr.Row(): + #gr.Interface(fn=w4wsidebysidereadergen, inputs=["text", w4wsidebysidelangdest], outputs=["text", "text", "text"], description="Side by side reading creator") + w4wsidebysideinput = gr.Text(label="Paste Text you want to learn here (wordlists and UDHR can be used as )", placeholder="Enter Text Here") + with gr.Row(): + w4wsidebysidelangdest = gr.Dropdown(choices=langdropdown_choices, label="Choose destination language", value="de: german") + w4wsidebysidebtn = gr.Button("Create Side by Side, FWNWO, and FWFWO (Just read start to finish of each till you can read the FWFWO without assistance)") + with gr.Row(): + w4wsidebysideOutput = gr.Text(label="Side by side", placeholder="Side by side will display here") + w4wsidebysideFWNWOOutput = gr.Text(label="Foreign Word Native Word Order", placeholder="FW NWO will display here") + w4wsidebysideFWFWOOutput = gr.Text(label="Foreign Word Foreign Word Order", placeholder="FW FWO will display here") + w4wsidebysidebtn.click(fn=w4wsidebysidereadergen, inputs=[w4wsidebysideinput, w4wsidebysidelangdest], outputs=[w4wsidebysideOutput, w4wsidebysideFWNWOOutput, w4wsidebysideFWFWOOutput]) + with gr.Row(): + gr.HTML("Coming Soon") + with gr.Row(): + w4wsidebysideWordCountOutput = gr.Text(label="Word Count", placeholder="Word Count will display here") + w4wsidebysideRandomisedOutput = gr.Text(label="Random Order As Test", placeholder="Randomised version (Transition Tab most left bottom) will display here") + w4wsidebysideQueGenOutput = gr.Text(label="Questions generation as comprehension test", placeholder="Questions generated will display here") + w4wsidebysideUNWFWOOutput = gr.Text(label="HTML as UNWFWO assistant", placeholder="HTML as UNWFWO assistant download will display here") + gr.HTML("Side by side reading creator (NLLB-600M (+-3gb / setting has 400 tokens? as max length for inference optimisation?)- 200 languages vs 107 in googletrans) - -- Fairseq Github -- | inspired by - -- Geonmo NLLB Demo (Runtime Error Dec 23) -- | -- Vutuka demo (Working) -- ") + with gr.Group(): + gr.HTML("Under Construction - generator and cpu based to beat gpu cost, cpu wait time and network dependency for local use") + nllbtranscpuinput = gr.Text(label="Paste Text you want to learn here (wordlists and UDHR can be used as )", placeholder="Enter Text Here") + with gr.Row(): + nllbtranscpulangsrc = gr.Dropdown(choices=nllb_lang_codes, label="Choose source language", value="English") + nllbtranscpulangdest = gr.Dropdown(choices=nllb_lang_codes, label="Choose target language", value="Akan") + nllbtranscpubtn = gr.Button("Create Side by Side, FWNWO, and FWFWO (Just read start to finish of each till you can read the FWFWO without assistance)") + with gr.Row(): + with gr.Column(scale=4): + nllbtranscpuOutput = gr.Text(label="Side by side", placeholder="Side by side will display here") + with gr.Column(scale=1): + nllbtranscpudetailsOutput = gr.Text(label="Inference Details", placeholder="Details will display here") + nllbtranscpubtn.click(fn=nllbtransctranslation, inputs=[nllbtranscpulangsrc, nllbtranscpulangdest, nllbtranscpuinput], outputs=[nllbtranscpuOutput, nllbtranscpudetailsOutput]) gr.Interface(fn=LoadNLTKUDHRText, inputs=NLTKudhr, outputs=["text", "textarea"], description="UDHR as some test texts") + with gr.Tab("Youtube Subs Listening Comprehension"): + gr.HTML("State Management Solution for Word --> Find LingQ Here --> https://www.lingq.com/en/") + with gr.Tab("New - Learning with Youtube"): + gr.HTML("TODO: 1st syllable subtitle. First Syllable of any word is all you need to listen - pair this with Youtube subtitle interface - Listening is ability to spell really fast (real time)") + gr.HTML(" -- artificialguybr's Video Translation/Transcription Space -- | ") + with gr.Group(): + gr.HTML("Custom Subtitles Ideas - Realtime Practice = Test (Pause to learn)") + YTTransciptOutDropdown = gr.Dropdown(choices=["https://www.youtube.com/watch?v=UYk43fncV68&pp=ygULcnVubmluZyBtYW4%3D", "https://youtu.be/dxVaP0-aFIE"], value="https://www.youtube.com/watch?v=UYk43fncV68&pp=ygULcnVubmluZyBtYW4%3D", allow_custom_value=True) + YTTransciptOutbtn = gr.Button("Transcript to text") + YTTransciptOutVid = gr.HTML('Video will load Here') + gr.Interface(fn=subtitle_generator, inputs=None, outputs=["textbox", "textbox", "textbox"], description='Modified Subtitles Test - will only work after video has been loaded') + with gr.Row(): + with gr.Column(scale=4): + with gr.Row(): + YTTransciptAnalysedOut = gr.Textbox(placeholder="Place the translated transcript here (Click on the youtube logo to open the video and copy)") + with gr.Row(): + YTTransciptOut = gr.Textbox(placeholder="Place the translated transcript here (Click on the youtube logo to open the video and copy)") + with gr.Column(scale=1): + YTTransciptTextOnlyOut = gr.Textbox(placeholder="Place the translated transcript here (Click on the youtube logo to open the video and copy)") + YTTransciptOutbtn.click(fn=YTTransciptAnalysisandLoad, inputs=YTTransciptOutDropdown, outputs=[YTTransciptOutVid, YTTransciptOut, YTTransciptAnalysedOut, YTTransciptTextOnlyOut]) + with gr.Tab("Old - Visual - Multiline Custom Video Subtitles"): + gr.HTML("------------------------ INCOMPLETE (Somewhere around the SRT merging and merging process) -------------------------------------") + gr.HTML("LingQ Companion Idea - i.e. Full Translation Read along, and eventually Videoplayer watch along like RAMUS whisper space

Extra functions needed - Persitent Sentence translation, UNWFWO, POS tagging and Word Count per user of words in their account. Macaronic Text is also another way to practice only the important information") + gr.HTML("""

For Transcripts to any video on youtube use the link below ⬇️

https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles | https://huggingface.co/spaces/vumichien/whisper-speaker-diarization""") + #gr.HTML("

If Space not loaded its because of offline devopment errors please message for edit


") + with gr.Tab("Merged Subtitles (Incomplete)"): + gr.HTML(""" Core Idea = Ability to follow one video from start to finish is more important than number of words (except for verbs)
+ Step 1 - Get foreign transcript - WHISPER (Need to download video though - booo) / Youtube / Youtube transcript api / SRT websites
+ Step 2 - Get Translation of foreign transcript
+ Step 3 - Word for Word Translation Creation in both Directions (Paste Google Translation here)
+ """) + gr.Interface(fn=split_srt_file, inputs=["text", SRTLangOptions] , outputs=["text", "file", "text", "text"], description="SRT Contents to W4W Split SRT for Google Translate") + gr.Interface(fn=chunk_srt_text, inputs=['text', Translationchuncksize], outputs=['dataframe','text'], description='Assitant for google translate character limit - aka where to expect cuts in the text') + gr.HTML("Step 4 - Pronounciation (Roman) to Subtitle Format --> GTranslate returns unformatted string") + gr.Interface(fn=splittext, inputs="text", outputs="text", description="Text for w4w creation in G Translate") + gr.HTML("Step 5 - Merge into one file") + with gr.Row(): + RomanFile = gr.File(label="Paste Roman") + W4WFile = gr.File(label="Paste Word 4 Word") + FullMeanFile = gr.File(label="Paste Full Meaning") + MacaronicFile = gr.File(label="Paste Macaronic Text") + SentGramFormula = gr.File(label="Paste Sentence Grammar Formula Text") + with gr.Row(): + MergeButton = gr.Button('Merge the seperate files into one interpolated file (Line by line merge)', ) + with gr.Row(): + MergeOutput = gr.TextArea(label="Output") + MergeButton.click(merge_lines, inputs=[RomanFile, W4WFile, FullMeanFile, MacaronicFile], outputs=[MergeOutput], ) + with gr.Row(): + gr.Text("Make sure there are 4 spaces after the last subtitle block (Otherwise its skipped)") + CleanedMergeButton = gr.Button('Create a Usable file for SRT') + with gr.Row(): + CleanedMergeOutput = gr.TextArea(label="Output") + CleanedMergeButton.click(fn=SRTLineSort, inputs=[MergeOutput], outputs=[CleanedMergeOutput]) + with gr.Tab("Split video to segments"): + gr.HTML("How to make screenshot in vlc - https://www.vlchelp.com/automated-screenshots-interval/
") + gr.Interface(VideotoSegment, inputs=[spvvideo_file_input, spvsubtitle_file_input], outputs=spvdownload_output) + gr.TextArea("Placeholder for ffmpeg command generator and ffmpeg-python code to split video") + gr.Text("Text to Closed Class + Adjectives + Punctuation or Noun Verb + Punctuation ") + with gr.Tab("Audio - Only English thoughts as practice"): + gr.HTML("For Audio Most productive is real time recall of native (where your full reasoning ability will always be)

Find Replace new lines of the foreign text with full stops or | to get per word translation") + gr.Interface(fn=TTSforListeningPractice, inputs=["text", TTSLangOptions2], outputs="audio", description="Paste only english words in foreign order and then keep removing the words from this to practice as effectively") with gr.Tab("Speed through Imagery"): gr.HTML("Categories for Image Creation
1. Extract only nouns
2. Extract Acronyms
3. Convert verbs to Careers
Dall-E 3 best for multi concept images - every thing else = one picture at a time
Ask the llm to create a prompt from the nouns extracted or to create english words ot sentences from the acronyms") ImageryInput = gr.Textbox(placeholder='Enter the text here and use in this tab') + gr.HTML(" -- Lingua Link (Simple GPT for assistinng image creation) --
Use with placeholder generator tab below ") + gr.Interface(fn=lingualinkassist, inputs=["text","text"], outputs="text", description="Lingua Link Prompt Assistance") with gr.Tab("Filter Text"): gr.Interface(fn=onlyplurals, inputs=["text"], outputs=["text"], description="Only plurals = optimal concepts to learn first as LT work = repitition") gr.Interface(fn=create_acronym_map, inputs="textbox", outputs="textbox", description="Acronyms") @@ -1700,24 +2160,42 @@ with gr.Blocks() as lliface: #theme=gr.themes.Glass(primary_hue='green', seconda with gr.Column(scale=2): imageplaceholdertextoutput = gr.Code("The code for the HTML created will come here") imageplaceholderbtn.click(fn=imagebasedreading, inputs=[imageplaceholderinput], outputs=[imageplaceholderdownload, imageplaceholderoutput, imageplaceholdertextoutput]) + with gr.Tab("Word level Placeholder Genration"): + gr.HTML("Placeholder for every images of each sentence - Good ChatGPT + Dall-E ") + with gr.Row(): + with gr.Column(scale=4): + imageplaceholderinputwordlevel = gr.TextArea() + with gr.Column(scale=1): + gr.Label("Enter Text and Get a line by line placeholder for image associated with the text") + imageplaceholderdownloadwordlevel = gr.File() + imageplaceholderbtnwordlevel = gr.Button("Create the image placeholder") + with gr.Row(): + with gr.Column(scale=3): + imageplaceholderoutputwordlevel = gr.HTML("Preview will load here") + with gr.Column(scale=2): + imageplaceholdertextoutputwordlevel = gr.Code("The code for the HTML created will come here") + imageplaceholderbtnwordlevel.click(fn=imagebasedreadingwordlevel, inputs=[imageplaceholderinputwordlevel], outputs=[imageplaceholderdownloadwordlevel, imageplaceholderoutputwordlevel, imageplaceholdertextoutputwordlevel]) with gr.Tab('Picture Subtitles / Image Slide Show'): + gr.HTML("----------------------------------INCOMPLETE------------------------------------ runs into filepath error") + gr.Interface(fn=speedmastery_load_images, inputs=speedmastery_load_imagesinput, outputs="gallery", description="Put the images you want to practice or work on") gr.Image() gr.HTML('placeholder for button to start generator for time based image recognition i.e. Picture Subtitles') - with gr.Tab("Repetition Injected Text"): - gr.Label("Optimal Study Reps is inbetween new information acquisition - i.e. any thing you havent read already") - gr.Interface(fn=RepititionInjectedReading, inputs=[Repsplitdropdown, "text", "text"], outputs="text") - with gr.Tab("Stateful Reading - Progress Tracking"): + gr.Gallery() + with gr.Tab("UNWFWO - Stateful Reading - Progress Tracking"): gr.HTML("The problem is to learn you need to repeat things you know that you didnt know thiss morning - The repitition space to the right is only half of solution
You need to read new stuff while revising the stuff you didnt know that you just learned aka the things you write as notes") gr.HTML("The sentences used as notes repitition interface then can count as recently known sentences that you can ignore in favour of the sentences you didnt interact with") gr.Label("Some Tests - click to hide - unknown word only list") gr.Interface(fn=hidingbuttontesttranslate, inputs="text", outputs=["code", "html"]) gr.Label("Missing is database integration for the counter and non-english - ALSO TODO - Parralell interface for the html and acronym creator") - gr.Interface(fn=UnknownTrackTexttoApp, inputs="text", outputs=["file", "html", "text"], description="HTML mini App - UNNWFWO (To track verbs you dont know for listening practice). Use the text from here to create lists you use for the TTS section") + gr.Interface(fn=UnknownTrackTexttoApp, inputs="text", outputs=["file", "html", "code"], description="HTML mini App - UNNWFWO (To track verbs you dont know for listening practice). Use the text from here to create lists you use for the TTS section") gr.Interface(create_acronym_map, inputs='text', outputs=['text', 'text']) gr.HTML("On the Acronyms you need to underline the verbs") gr.HTML("Aim for 1000 reps per item in your mind - the end goal for full sentences is to identify the SOV equivalent ASAP") gr.Interface(fill_lines, inputs=["text", RepSched_Num_lines], outputs="text") - + with gr.Tab("Kinesthetic / Mime Translation"): + gr.HTML("placeholder for prompt generator and tests for llama-cpp with slm") + gr.HTML("Keep nouns and verbs -- turn every noun into a verb -- turn every verb into a pose") + gr.HTML("Viseme Test with video recording screenshots judged LMM") with gr.Tab("Beginner - Listen + Read"): gr.Label("Closed Eye Recital per new word | 1 new word a minute while recycling the words from the previous minutes") with gr.Row(): @@ -1778,59 +2256,48 @@ with gr.Blocks() as lliface: #theme=gr.themes.Glass(primary_hue='green', seconda gr.TextArea("Make as many questions on the text as you can in native language and then translate", interactive=True) gr.Dropdown(["Placeholder chunk 1", "Placeholder chunk 2", "Placeholder chunk 3"]) gr.HTML("Load the current chunk here and Put a Dataframe where you have only one column for the questions") - with gr.Tab("Advanced - Youtube - Subtitles - LingQ Addon Ideas"): - gr.HTML("Find LingQ Here --> https://www.lingq.com/en/") - with gr.Tab("New - Learning with Youtube"): - gr.HTML("TODO: 1st syllable subtitle. First Syllable of any word is all you need to listen - pair this with Youtube subtitle interface - Listening is ability to spell really fast (real time)") - gr.HTML(" -- artificialguybr's Video Translation/Transcription Space -- | ") - YTTransciptOutDropdown = gr.Dropdown(choices=["https://www.youtube.com/watch?v=UYk43fncV68&pp=ygULcnVubmluZyBtYW4%3D", "https://youtu.be/dxVaP0-aFIE"], value="https://www.youtube.com/watch?v=UYk43fncV68&pp=ygULcnVubmluZyBtYW4%3D", allow_custom_value=True) - YTTransciptOutbtn = gr.Button("Transcript to text") - YTTransciptOutVid = gr.HTML('Video will load Here') - gr.Interface(fn=subtitle_generator, inputs=None, outputs=["textbox", "textbox"], description='Modified Subtitles Test - will only work after video has been loaded') - with gr.Row(): - YTTransciptAnalysedOut = gr.Textbox(placeholder="Place the translated transcript here (Click on the youtube logo to open the video and copy)") - YTTransciptOut = gr.Textbox(placeholder="Place the translated transcript here (Click on the youtube logo to open the video and copy)") - YTTransciptOutbtn.click(fn=YTTransciptAnalysisandLoad, inputs=YTTransciptOutDropdown, outputs=[YTTransciptOutVid, YTTransciptOut, YTTransciptAnalysedOut]) - with gr.Tab("Old - Visual - Multiline Custom Video Subtitles"): - gr.HTML("LingQ Companion Idea - i.e. Full Translation Read along, and eventually Videoplayer watch along like RAMUS whisper space

Extra functions needed - Persitent Sentence translation, UNWFWO, POS tagging and Word Count per user of words in their account. Macaronic Text is also another way to practice only the important information") - gr.HTML("""

For Transcripts to any video on youtube use the link below ⬇️

https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles | https://huggingface.co/spaces/vumichien/whisper-speaker-diarization""") - #gr.HTML("

If Space not loaded its because of offline devopment errors please message for edit


") - with gr.Tab("Merged Subtitles (Incomplete)"): - gr.HTML(""" Core Idea = Ability to follow one video from start to finish is more important than number of words (except for verbs)
- Step 1 - Get foreign transcript - WHISPER (Need to download video though - booo) / Youtube / Youtube transcript api / SRT websites
- Step 2 - Get Translation of foreign transcript
- Step 3 - Word for Word Translation Creation in both Directions (Paste Google Translation here)
- """) - gr.Interface(fn=split_srt_file, inputs=["text", SRTLangOptions] , outputs=["text", "file", "text", "text"], description="SRT Contents to W4W Split SRT for Google Translate") - gr.Interface(fn=chunk_srt_text, inputs=['text', Translationchuncksize], outputs=['dataframe','text'], description='Assitant for google translate character limit - aka where to expect cuts in the text') - gr.HTML("Step 4 - Pronounciation (Roman) to Subtitle Format --> GTranslate returns unformatted string") - gr.Interface(fn=splittext, inputs="text", outputs="text", description="Text for w4w creation in G Translate") - gr.HTML("Step 5 - Merge into one file") - with gr.Row(): - RomanFile = gr.File(label="Paste Roman") - W4WFile = gr.File(label="Paste Word 4 Word") - FullMeanFile = gr.File(label="Paste Full Meaning") - MacaronicFile = gr.File(label="Paste Macaronic Text") - SentGramFormula = gr.File(label="Paste Sentence Grammar Formula Text") - with gr.Row(): - MergeButton = gr.Button('Merge the seperate files into one interpolated file (Line by line merge)', ) - with gr.Row(): - MergeOutput = gr.TextArea(label="Output") - MergeButton.click(merge_lines, inputs=[RomanFile, W4WFile, FullMeanFile, MacaronicFile], outputs=[MergeOutput], ) - with gr.Row(): - gr.Text("Make sure there are 4 spaces after the last subtitle block (Otherwise its skipped)") - CleanedMergeButton = gr.Button('Create a Usable file for SRT') - with gr.Row(): - CleanedMergeOutput = gr.TextArea(label="Output") - CleanedMergeButton.click(fn=SRTLineSort, inputs=[MergeOutput], outputs=[CleanedMergeOutput]) - with gr.Tab("Split video to segments"): - gr.HTML("How to make screenshot in vlc - https://www.vlchelp.com/automated-screenshots-interval/
") - gr.Interface(VideotoSegment, inputs=[spvvideo_file_input, spvsubtitle_file_input], outputs=spvdownload_output) - gr.TextArea("Placeholder for ffmpeg command generator and ffmpeg-python code to split video") - gr.Text("Text to Closed Class + Adjectives + Punctuation or Noun Verb + Punctuation ") - with gr.Tab("Audio - Only English thoughts as practice"): - gr.HTML("For Audio Most productive is real time recall of native (where your full reasoning ability will always be)

Find Replace new lines of the foreign text with full stops or | to get per word translation") - gr.Interface(fn=TTSforListeningPractice, inputs=["text", TTSLangOptions2], outputs="audio", description="Paste only english words in foreign order and then keep removing the words from this to practice as effectively") + gr.HTML(""" Some considerations for questions to make +Please continue this sentence Types of knowing something = verbatim recall, differences, characteristics, definition of the item, definitions of the items related + +

Mistral-Medium: +Sure! Here are some additional types of knowing something: + +Causes and effects: Understanding the causes and effects of a concept or phenomenon can help deepen your knowledge and make it more applicable to real-world situations. +Examples and non-examples: Providing examples and non-examples of a concept can help clarify its meaning and distinguish it from similar concepts. +Analogies and metaphors: Using analogies and metaphors can help make abstract concepts more concrete and easier to understand. +Principles and theories: Understanding the principles and theories that underlie a concept can help you see how it fits into a broader framework of knowledge. +Applications and implications: Understanding the practical applications and implications of a concept can help you see its relevance and importance in real-world contexts. +Historical and cultural context: Understanding the historical and cultural context in which a concept developed can provide valuable insights into its meaning and significance. +Overall, there are many different types of knowing, and each one can contribute to a more complete and nuanced understanding of a topic. By using a variety of approaches and strategies, you can deepen your knowledge and improve your ability to apply it in meaningful ways. + +

GPT-4 Turbo: +Please continue this sentence Types of knowing something = verbatim recall, differences, characteristics, definition of the item, definitions of the items related + +Types of knowing something can include verbatim recall, understanding differences, recognizing characteristics, defining the item, and understanding definitions of related items. Beyond these aspects, knowing can also encompass: + +Application: The ability to use the information in new and concrete situations. This involves applying rules, methods, concepts, principles, laws, and theories. + +Analysis: The capability to break down material into its constituent parts and understand its organizational structure. This includes recognizing the relationships between the parts and the overall structure and purpose. + +Synthesis: The skill to put parts together to form a new whole. This could involve creating a new pattern, proposing alternative solutions, or deriving a new set of abstract relations. + +Evaluation: Making judgments about the value of ideas or materials. This requires forming opinions and making decisions based on criteria and standards through checking and critiquing. + +Procedural Knowledge: Knowing how to do something, which involves procedural steps or actions required to perform a task or solve a problem. + +Conditional Knowledge: Understanding when to use certain knowledge, which involves recognizing the applicability of a skill or concept in various contexts. + +Metacognitive Knowledge: This is a higher-order type of knowing, which involves awareness and understanding of one's own thought processes. It includes the ability to monitor, regulate, and direct oneself to a desired end, like learning or problem-solving. + +Tacit Knowledge: This is the kind of knowledge that is difficult to transfer to another person by writing it down or verbalizing it. It is acquired through personal experience and context, often without conscious awareness. + +Explicit Knowledge: In contrast to tacit knowledge, explicit knowledge is that which can be articulated, codified, accessed and verbalized. It can be readily transmitted to others. + +Intuitive Knowledge: This refers to understanding or believing something instinctively without the need for conscious reasoning. It's often experienced as a 'gut feeling' or an 'aha' moment that comes seemingly out of nowhere. + +Each type of knowing involves different cognitive processes and levels of understanding. The depth and breadth of one's knowledge often depend on the ability to combine these various types of knowing effectively. + """) + with gr.Tab("Transition is the end goal (SOV, SVO, VSO)"): gr.Interface(fn=FrontRevSentChunk, inputs=[ChunkModeDrop, "checkbox", "text", langdest], outputs="text", description="Chunks creator") with gr.Row(): @@ -1859,17 +2326,18 @@ with gr.Blocks() as lliface: #theme=gr.themes.Glass(primary_hue='green', seconda gr.Interface(fn=TextCompFormat, inputs=["textarea", HTMLCompMode], outputs="text", description="Convert Text to HTML Dropdown or Links which you paste in any html file") gr.Interface(fn=create_collapsiblebutton, inputs=["textbox", "textbox", "textarea"], outputs="textbox", description="Button and Div HTML Generator, Generate the HTML for a button and the corresponding div element.") with gr.Tab("Real-Time AI - Video/Audio/AR"): - gr.HTML("Pydroid and OpenCV and Tkinter = Frontend for OpenAI / OSS vision API as ChatGPT doesnt support video / real-time screenshot yet") + gr.HTML("
Agents = Custom Software (Personalised UI and Mods, among other things) = Custom Environments (AR) -- Open Interpreter -- | -- Microsoft Autogen -- |
") + gr.HTML("Raspberry Pi / Pydroid and OpenCV and Tkinter = Frontend for OpenAI / OSS vision API as ChatGPT doesnt support video / real-time screenshot yet") gr.HTML("HUD Experiment (Waiting for GPT4V API) - Full context of user situation + Ability to communicate in real-time to user using images (H100+ and low enough resolution and low enough steps - it/s = fps) - just like google maps but for real life") gr.HTML("Some sample Open Interpreter Code - https://huggingface.co/spaces/ysharma/open-interpreter/blob/main/app.py") gr.Interface(fn=ImageTranslationTest , inputs=[VideoTestInput, VideoTestSubtitleInput], outputs="video") - with gr.Accordion("Whisper Spaces (sanchit-gandhi spaces have microphone input)"): - Whisperspaceoptions = gr.Dropdown(choices=["https://sanchit-gandhi-whisper-jax-diarization.hf.space", "https://sanchit-gandhi-whisper-jax.hf.space", "https://sanchit-gandhi-whisper-large-v2.hf.space", "https://facebook-seamless-m4t.hf.space"], label="existing whisper spaces") + with gr.Accordion("STT Spaces (sanchit-gandhi spaces have microphone input)"): + Whisperspaceoptions = gr.Dropdown(choices=["https://facebook-seamless-m4t-v2-large.hf.space", "https://hf-audio-whisper-large-v3.hf.space", "https://sanchit-gandhi-whisper-jax-diarization.hf.space", "https://sanchit-gandhi-whisper-jax.hf.space", "https://sanchit-gandhi-whisper-large-v2.hf.space", "https://facebook-seamless-m4t.hf.space"], label="existing whisper spaces") Whisperspaceoptionsbtn = gr.Button("Load Whisper Space") WhisperspaceoptionsOut = gr.HTML() Whisperspaceoptionsbtn.click(fn=display_website, inputs=Whisperspaceoptions, outputs=WhisperspaceoptionsOut) with gr.Accordion("Image as prompt Spaces"): - Imagepromptspaceoptions = gr.Dropdown(choices=["https://adept-fuyu-8b-demo.hf.space", "https://badayvedat-llava.hf.space", "https://xinyu1205-recognize-anything.hf.space"], label="existing Image prompt spaces") + Imagepromptspaceoptions = gr.Dropdown(choices=["https://otter-ai-otterhd-demo.hf.space", "https://adept-fuyu-8b-demo.hf.space", "https://badayvedat-llava.hf.space", "https://xinyu1205-recognize-anything.hf.space"], label="existing Image prompt spaces") Imagepromptspaceoptionsbtn = gr.Button("Load a Image as prompt Space") ImagepromptspaceoptionsOut = gr.HTML() Imagepromptspaceoptionsbtn.click(fn=display_website, inputs=Imagepromptspaceoptions, outputs=ImagepromptspaceoptionsOut) @@ -1880,6 +2348,16 @@ with gr.Blocks() as lliface: #theme=gr.themes.Glass(primary_hue='green', seconda gr.HTML("Some Prompt ideas --> Prompt: Describe the place where these descriptions may be (You job is to be speculative for brainstorming purposes): A dog and a boy, the area is texas, the weather is sunny, the date is 01 May 2021
Prompt Content Ideas Ideas Clip Interrogator + Location Data aka tags for place, location and time + general news updates on the location + overview of the items in the location
Location based advise is most important but after that is information observed by appliances in the location eg. Times Computer turned on, times geyser inspected, amount of time keys havent been touched etc.
each location will have an ai personality that will relay more information ") gr.HTML(" -- RAM and Tag2Text -- | -- SAM with Clip -- ") with gr.Tab("Incomplete Tests and Experiments"): + with gr.Tab("Multi Output and other ideas"): + gr.HTML("Start at Unkown Tracker if unseure
UNNWFWO = Unknown Native Word Foreign Word Order i.e. during active listening practice you only need the words you dont know

General Ideas in this space - Speed of Learning = Avoid Things you know like the plague -- How to track what you know -- Counter is easiest and How you feel is the hardest (The more you know, the more confusion on what you dont know as you probably werent keeping track)

Visulisation of long text - Bottom of this page
Wordlist - 1 new word at a time per minute in the space to the left
Youtube Video Watching - Subtitles Tab
Reading - Unknown Tracker Tabs
Longer Text Memorising - Acronym Map Creation Tab and Transition Tab
Brainstorming - Reading Assistant
Random Exposure
") + gr.Interface(fn=MultiOutputInterface, inputs=TestSplitandUpdateinput, outputs=["button", "text", "text", "text", "text", "text", "text", "text", "text", "text"]) + with gr.Tab("Order based Forcing content into time (Time is the bottleneck)"): + gr.Label("Inspect the Transition Tab last item in left most column - RandomiseTextbyType - Incomplete, but this plus timer will work well achieve what this space aims to achieve - LLMs would choose the next set of words based on the last batch and its knowledge of your vocab level or even your current location (using phone as sensors) and can make image/audio prompts to make more engaging") + gr.Label("Placeholder for take in list and return 10 items in correct order for 20 seconds and 20 seconds to assign order to each word using numbers input - generator with blocks 20 seconds to understand what was wrong and move on") + gr.Label("Placeholder for long text version as well - Order of the sentence or sentence spans (spacy) is what is what most important in long text - you cant organise you dont know") + with gr.Tab("Repetition Injected Text"): + gr.Label("Optimal Study Reps is inbetween new information acquisition - i.e. any thing you havent read already") + gr.Interface(fn=RepititionInjectedReading, inputs=[Repsplitdropdown, "text", "text"], outputs="text") with gr.Tab("Graph Based Reading", id="1"): gr.Textbox('Parts of Speech based | Automating the Notetaking Tab either directly or using visual llm to use this interface efficiently') gr.HTML("Types of comprehension agent
Speed of Comprehension = Verb comprehension
From the following please extract the verbs
now explain each in context
Next, use picture descriptions for each word in the verb list
Create combinations using the verb list
") @@ -1888,6 +2366,7 @@ with gr.Blocks() as lliface: #theme=gr.themes.Glass(primary_hue='green', seconda gr.HTML("Sentence to PyDot graph") gr.HTML("Currently a bug that locks all buttons in the space when you use this above example - Reload to fix") with gr.Tab("Random Ideas"): + gr.HTML("
(Part to be formatted as System prompt for model used as the help admin for the app) Objective - Learn vocab from video/music and have record of how far you got using as overengineered solution (to keep upto date with SOTA AI models and use cases)
Current Workflow = One Youtube video as focus for a week or until learned (i.e. instant recognition recall of vocab in random order exposure)
Timeline Brainstorm = Know Nothing (Just need Exposure - Random 10% suggesed as starting point) --- Know a little (Remove known) --- Know half (UNNWFWO)
") gr.HTML("""

Spaces Test - Still Undercontruction --> Next Milestone is Turning this interface handsfree | Knowledge is a Language but productive knowledge is find replace as well | LingQ is good option for per word state management

Arrows app json creator for easy knowledge graphing and spacy POS graph? --> Questions? -->

ChatGPT Turns Learning into a read only what you dont know ask only what you dont know feedback loop --> All you have to do is keep track of what prompts you have asked in the past

""") gr.HTML("

Target 0: Mnemonics as title of images --> Comprehensible input
Target 1: Dual audio at word Level while using repitition to train random recall --> Word level Time
Target 2: Video --> Split by sentence --> each word repeated (60) + each phrase (10) + each sentence (10) --> TTS file for practice --> State Management/Known word Tracker
-----------------------
The trick is minimum one minute of focus on a new word --> Listening is hard because there are new word within seconds and you need repeated focus on each to learn

Audio = best long form attention mechanism AS it is ANTICIPATION (Awareness of something before it happens like knowing song Lyrics) FOCUSED - Attention (Focused Repitition) + Exposure (Random Repitition)

Listening is hard due to different word order and word combinations (collocations more important than single words)


") @@ -1946,4 +2425,4 @@ with gr.Blocks() as lliface: #theme=gr.themes.Glass(primary_hue='green', seconda gr.Textbox(label='Use this text to hold translations of the SQL rows in the above linked dataset (A kind of What I say vs what I want)') -lliface.queue().launch(share=True) #(inbrowser="true") \ No newline at end of file +lliface.queue().launch() #(inbrowser="true") \ No newline at end of file