Spaces:

koptelovmax
/

amrdemo

Sleeping

App Files Files Community

koptelovmax commited on May 4

Commit

b27de9c

•

1 Parent(s): 13d2655

Add application file

Browse files

Files changed (11) hide show

README.md +1 -12
app.py +204 -0
model_gtos/amrlib_meta.json +9 -0
model_gtos/config.json +36 -0
model_gtos/pytorch_model.bin +3 -0
model_gtos/training_args.bin +3 -0
model_stog/amrlib_meta.json +10 -0
model_stog/config.json +70 -0
model_stog/model_parse_xfm_bart_large.json +42 -0
model_stog/pytorch_model.bin +3 -0
requirements.txt +18 -0

README.md CHANGED Viewed

@@ -1,12 +1 @@
----
-title: Test2
-emoji: 🔥
-colorFrom: pink
-colorTo: pink
-sdk: streamlit
-sdk_version: 1.33.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


1	+ # amr_demo

app.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import streamlit as st
+from easynmt import EasyNMT
+from nltk import word_tokenize
+from simalign import SentenceAligner
+import re
+import penman
+import amrlib
+from amrlib.graph_processing.annotator import add_lemmas
+from amrlib.alignments.rbw_aligner import RBWAligner
+# Find a node corresponding targetWord in the graph:
+def getTargetWordNode(segmentTokens, aligner, alignments, target):
+    # Get target word in English:
+    if target in segmentTokens:
+        targetIndexFr = segmentTokens.index(target)
+        targetIndexesEn = [i for i in alignments['mwmf'] if i[0]==targetIndexFr]
+        if len(targetIndexesEn) > 0:
+            targetIndexEn = targetIndexesEn[0][1]
+            # Get a full name of the graph node:
+            if aligner.alignments[targetIndexEn] != None:
+                nodeConcepts = [i for i in re.split(',|\(|\"|\'', str(aligner.alignments[targetIndexEn])) if i.strip() != '']
+                return nodeConcepts[0]+' / '+nodeConcepts[2]
+            else:
+                return 'Error!' # Alignment between target word in French and its English instance not found
+        else:
+            return 'Error!' # Alignment between target word in French and its English instance not found
+    else:
+        return 'Error!' # Alignment between target word in French and its English instance not found
+# Extract a subgraph containing target word with full path (all the node) to it:
+def getTargetWordSubGraphFullPath(amrGraph, target):
+    stringTmp = [i+' ' for i in re.split('\n', amrGraph) if i[0] !='#']
+    stringTmp2 = []
+    for s in stringTmp:
+        stringTmp2+=[i for i in re.split('(:\w+\s|:\w+-\w+\s)', s) if i.strip() !='']
+    string = []
+    for s in stringTmp2:
+        string+=[i for i in re.split('(\(|\))', s) if i.strip() !='']
+    openListGlobal = []
+    openList = []
+    subGraph = ""
+    subGraphGlobal = []
+    flag = False
+    stop = False
+    for i in range(len(string)):
+        if flag:
+            if string[i] == '(':
+                openList.append('(')
+                subGraph+=string[i]
+            elif string[i] == ')':
+                openList.pop()
+                if openList == []:
+                    flag = False
+                    stop = True
+                    subGraph+=')'
+                    subGraphGlobal.append(subGraph)
+                else:
+                    subGraph+=string[i]
+            else:
+                subGraph+=string[i]
+        else:
+            if target in string[i].strip():
+                flag = True
+                subGraph+=string[i]
+                openList.append('(')
+            else:
+                if not stop and string[i] == '(':
+                    openListGlobal.append('(')
+                    subGraphGlobal.append(string[i])
+                elif not stop and string[i] == ')':
+                    openListGlobal.pop()
+                    while subGraphGlobal[-1] != '(':
+                        subGraphGlobal.pop()
+                    subGraphGlobal.pop()
+                    subGraphGlobal.pop()
+                elif not stop:
+                    subGraphGlobal.append(string[i])
+    for i in openListGlobal:
+        if i=='(':
+            subGraphGlobal.append(')')
+    resultGraph = ""
+    for i in subGraphGlobal:
+        resultGraph+=i
+    # Fix the formatting:
+    g = penman.decode(resultGraph)
+    return penman.encode(g)
+def main():
+    st.header('Abstract Meaning Representation based summary of French text', divider='blue')
+    #segmentFr = st.text_area(
+    #"Text to summarize:",
+    #"Article 1 : Occupations ou utilisations du sol interdites\n\n"
+    #"1) Dans l’ensemble de la zone sont interdits :\n\n"
+    #"Les pylônes et poteaux, supports d’enseignes et d’antennes d’émission ou de réception de \n"
+    #"signaux radioélectriques.",
+    #height=170,
+    #)#.replace('\n',' ')
+    segmentFr = st.text_area(
+    "Text to summarize:",
+    "Article 2 : Occupations ou utilisations du sol soumises à des conditions particulières\n\n"
+    "2) Dans les périmètres en bordure des cours d’eau définis dans les annexes sanitaires du PLU :\n\n"
+    "− Seules les clôtures en grillage pourront être autorisées à condition qu'elles soient conçues de\n"
+    "manière à ne pas faire obstacle au libre écoulement des eaux.",
+    height=170,
+    )
+    ## Alternative example:
+    #segmentFr = st.text_area(
+    #"Text to summarize:",
+    #"Article 1: Le classement interdit tout changement d'affectation ou tout mode d'occupation du sol de nature à compromettre la conservation, la protection ou la création des boisements. Dans les bois, forêts ou parcs situés sur le territoire de communes où l'établissement d'un plan d'occupation des sols a été prescrit mais où ce plan n'a pas encore été rendu public, ainsi que dans tout espace boisé classé, les coupes et abattages d'arbres sont soumis à autorisation préalable.",
+    #height=170,
+    #)
+    #targetWord = st.text_input('Keyword:', 'Occupations')
+    targetWord = st.text_input('Keyword:', 'clôtures')
+    ##targetWord = st.text_input('Keyword:', 'compromettre')
+    if st.button('Summarize'):
+        # Fix input formatting:
+        segmentFr = segmentFr.replace('\n',' ')
+        #st.code(segmentFr)
+        # Translate segment into English:
+        model = EasyNMT('opus-mt')
+        #segmentEn = model.translate(segmentFr.lower() , source_lang='fr', target_lang='en')
+        segmentEn = model.translate(segmentFr , source_lang='fr', target_lang='en')
+        # Get an AMR graph:
+        stog = amrlib.load_stog_model(model_dir='resources/model_stog')
+        inputGraph = stog.parse_sents([segmentEn])
+        ## Output the resulting graph:
+        #print(inputGraph[0])
+        # Get tokenized representation of segment in French:
+        segmentFrTokens = word_tokenize(segmentFr, language='french')
+        # Get tokenized representation of segment in English:
+        penmanGraph = add_lemmas(inputGraph[0], snt_key='snt')
+        aligner = RBWAligner.from_penman_w_json(penmanGraph)
+        segmentEnTokens = aligner.lemmas
+        # Get alignments between original version and translation:
+        myaligner = SentenceAligner(model="bert", token_type="bpe", matching_methods="mai")
+        alignments = myaligner.get_word_aligns(segmentFrTokens, segmentEnTokens)
+        # Find a node corresponding targetWord in the graph:
+        targetNode = getTargetWordNode(segmentFrTokens, aligner, alignments, targetWord)
+        # Check if targetNode is in the graph:
+        errorFlag = False
+        if targetNode not in inputGraph[0]:
+            #if targetWord in inputGraph[0]:
+            if targetWord in ''.join(inputGraph[0].split('\n')[1:]):
+                targetNode = targetWord
+            else:
+                errorFlag = True
+        # Extract a subgraph containing target word with full path (all the node) to it:
+        if not errorFlag:
+            if targetNode != 'Error!':
+                targetSubGraph = getTargetWordSubGraphFullPath(inputGraph[0], targetNode)
+                #print(targetSubGraph)
+                # Generate text from given AMR-graph:
+                gtos = amrlib.load_gtos_model(model_dir='resources/model_gtos')
+                rulesEn, _ = gtos.generate([targetSubGraph])
+                # Remove "1." from the text:
+                rulesEn = [re.sub('\d. ', '', rulesEn[0])]
+                # Translate it back to French
+                rulesFr = model.translate(rulesEn[0], source_lang='en', target_lang='fr')
+                #print(rulesEn[0])
+                #print(rulesFr)
+                st.write("Summary: ", rulesFr)
+            else:
+                #print('Alignment between target word in French and its English instance not found')
+                st.write('Error! Alignment between target word in French and its English instance not found')
+        else:
+            #print('Error! Cannot find keyword in the graph')
+            st.write('Error! Cannot find keyword in the graph')
+if __name__ == "__main__":
+    main()

model_gtos/amrlib_meta.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "model_type":"gtos",
+    "version":"0.1.0",
+    "date":"2020-12-30",
+    "inference_module":".generate_t5wtense.inference",
+    "inference_class":"Inference",
+    "model_fn":"pytorch_model.bin",
+    "kwargs":{}
+}

model_gtos/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_name_or_path": "t5-base",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "d_ff": 3072,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "translation_amr_to_text": {
+      "corpus_dir": "data/tdata_generate_t5wtense/",
+      "max_in_len": 512,
+      "max_out_len": 90,
+      "model_name_or_path": "t5-base",
+      "train_fn": "train.txt.features.nowiki.tdata",
+      "valid_fn": "dev.txt.features.nowiki.tdata"
+    }
+  },
+  "use_cache": true,
+  "vocab_size": 32128
+}

model_gtos/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df94c52acfeb55f655113784b6ccc324615dfb9e68b8e69c9c3a05d9afaa4954
+size 891734433

model_gtos/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c00118711daf521f494ac5b629f8b170e607cc9741fbdcaa05fe4938dbf4432
+size 1775

model_stog/amrlib_meta.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "model_type":"stog",
+    "version":"0.1.0",
+    "date":"2022-02-16",
+    "inference_module":".parse_xfm.inference",
+    "inference_class":"Inference",
+    "model_fn":"pytorch_model.bin",
+    "base_model":"facebook/bart-large",
+    "kwargs":{}
+}

model_stog/config.json ADDED Viewed

	@@ -0,0 +1,70 @@

+{
+  "_name_or_path": "facebook/bart-large",
+  "activation_dropout": 0.1,
+  "activation_function": "gelu",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "BartForConditionalGeneration"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "classif_dropout": 0.1,
+  "classifier_dropout": 0.0,
+  "d_model": 1024,
+  "decoder_attention_heads": 16,
+  "decoder_ffn_dim": 4096,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 12,
+  "decoder_start_token_id": 2,
+  "dropout": 0.1,
+  "early_stopping": true,
+  "encoder_attention_heads": 16,
+  "encoder_ffn_dim": 4096,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 12,
+  "eos_token_id": 2,
+  "forced_eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_position_embeddings": 1024,
+  "model_type": "bart",
+  "normalize_before": false,
+  "num_beams": 4,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "scale_embedding": false,
+  "task_specific_params": {
+    "parse_amr": {
+      "corpus_dir": "data/tdata_xfm/",
+      "custom_save_checkpoint": true,
+      "eval_batch_size": 32,
+      "eval_fn": "dev.txt.nowiki",
+      "eval_num_beams": 1,
+      "first_eval_epoch": 1,
+      "max_in_len": 1024,
+      "max_out_len": 1024,
+      "max_train_graph_len": 512,
+      "max_train_sent_len": 100,
+      "model_name_or_path": "facebook/bart-large",
+      "save_at_end": false,
+      "save_tokenizer": false,
+      "train_fn": "train.txt.nowiki"
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.16.2",
+  "use_cache": true,
+  "vocab_size": 50265
+}

model_stog/model_parse_xfm_bart_large.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{   "gen_args" :
+    {
+        "model_name_or_path"            : "facebook/bart-large",
+        "corpus_dir"                    : "data/tdata_xfm/",
+        "train_fn"                      : "train.txt.nowiki",
+        "eval_fn"                       : "dev.txt.nowiki",
+        "custom_save_checkpoint"        : true,
+        "save_tokenizer"                : false,
+        "save_at_end"                   : false,
+        "first_eval_epoch"              : 1,
+        "eval_batch_size"               : 32,
+        "eval_num_beams"                : 1,
+        "max_in_len"                    : 1024,
+        "max_out_len"                   : 1024,
+        "max_train_sent_len"            : 100,
+        "max_train_graph_len"           : 512
+    },
+    "model_args":
+    {
+        "no_repeat_ngram_size"          : 0
+    },
+    "hf_args" :
+    {
+        "output_dir"                    : "data/model_parse_xfm",
+        "save_strategy"                 : "epoch",
+        "evaluation_strategy"           : "epoch",
+        "fp16"                          : true,
+        "group_by_length"               : true,
+        "do_train"                      : true,
+        "do_eval"                       : true,
+        "save_total_limit"              : 1,
+        "logging_steps"                 : 300,
+        "num_train_epochs"              : 16,
+        "per_device_train_batch_size"   : 8,
+        "gradient_accumulation_steps"   : 4,
+        "weight_decay"                  : 0.004,
+        "learning_rate"                 : 5e-5,
+        "max_grad_norm"                 : 1.0,
+        "warmup_steps"                  : 5200,
+        "seed"                          : 42
+    }
+}

model_stog/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9bdcde696e63224ba56853469689d2e22e64c324eedee6e48348e806eedd45b
+size 1625557313

requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+streamlit
+penman
+torch
+numpy
+spacy
+tqdm
+transformers
+smatch
+cached_property
+networkx
+nltk
+unidecode
+requests
+word2number
+amrlib
+easynmt
+simalign
+https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1.tar.gz#egg=en_core_web_sm