Spaces:

HibiscusMaximus
/

PaperClassification

Sleeping

Valeriy Sinyukov commited on 22 days ago

Commit

82ec9f7

1 Parent(s): 5c5407c

Remove model wrappers, use dict and model input

Files changed (7) hide show

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import pandas as pd
 import streamlit as st
 from category_classification.models import models as class_models
-from common import Input
 from languages import *
 from results import process_results
@@ -33,7 +32,7 @@ authors = st.text_area(authors_label[lang], height=text_area_height(2))
 abstract = st.text_area(abstract_label[lang], height=text_area_height(5))
 if title:
-    input = Input(title=title, abstract=abstract, authors=authors)
     model = load_class_model(model_name)
     results = model(input)
     results = process_results(results, lang)

 import streamlit as st
 from category_classification.models import models as class_models
 from languages import *
 from results import process_results
 abstract = st.text_area(abstract_label[lang], height=text_area_height(5))
 if title:
+    input = {"title": title, "abstract": abstract, "authors": authors}
     model = load_class_model(model_name)
     results = model(input)
     results = process_results(results, lang)

category_classification/models/HibiscusMaximus__scibert_paper_classification/model.py CHANGED Viewed

@@ -2,16 +2,8 @@ from transformers import pipeline
 name = "HibiscusMaximus/scibert_paper_classification"
-class SciBertPaperClassifier:
-    def __init__(self):
-        self.pipeline = pipeline("paper-classification", model=name)
-    def __call__(self, input):
-        return self.pipeline(input)
 def get_model():
-    return SciBertPaperClassifier()
 supported_langs = ["en"]

 name = "HibiscusMaximus/scibert_paper_classification"
 def get_model():
+    return pipeline("paper-classification", model=name)
 supported_langs = ["en"]

category_classification/models/oracat__bert_paper_classifier/model.py CHANGED Viewed

@@ -2,15 +2,8 @@ from transformers import pipeline
 name = "oracat/bert-paper-classifier"
-class BertPaperClassifierModel:
-    def __init__(self):
-        self.pipeline = pipeline("text-classification", model=name)
-    def __call__(self, input):
-        return self.pipeline(input.title + ' ' + input.abstract)
 def get_model():
-    return BertPaperClassifierModel()
-supported_langs = ['en']

 name = "oracat/bert-paper-classifier"
 def get_model():
+    return pipeline("paper-classification", model=name)
+supported_langs = ["en"]

category_classification/models/oracat__bert_paper_classifier_arxiv/model.py CHANGED Viewed

@@ -2,15 +2,8 @@ from transformers import pipeline
 name = "oracat/bert-paper-classifier-arxiv"
-class BertPaperClassifierArxivModel:
-    def __init__(self):
-        self.pipeline = pipeline("text-classification", model=name)
-    def __call__(self, input):
-        return self.pipeline(input.title + ' ' + input.abstract)
 def get_model():
-    return BertPaperClassifierArxivModel()
-supported_langs = ['en']

 name = "oracat/bert-paper-classifier-arxiv"
 def get_model():
+    return pipeline("paper-classification", model=name)
+supported_langs = ["en"]

category_classification/models/pipeline.py CHANGED Viewed

@@ -6,16 +6,28 @@ import torch
 from transformers import Pipeline, AutoModelForSequenceClassification
 from transformers.pipelines import PIPELINE_REGISTRY
 class PapersClassificationPipeline(Pipeline):
     def _sanitize_parameters(self, **kwargs):
         return {}, {}, {}
     def preprocess(self, inputs):
-        if not isinstance(inputs, tp.Iterable):
             inputs = [inputs]
         texts = [
-            f"AUTHORS: {' '.join(paper.authors) if isinstance(paper.authors, list) else paper.authors} "
-            f"TITLE: {paper.title} ABSTRACT: {paper.abstract}"
             for paper in inputs
         ]
         inputs = self.tokenizer(

 from transformers import Pipeline, AutoModelForSequenceClassification
 from transformers.pipelines import PIPELINE_REGISTRY
 class PapersClassificationPipeline(Pipeline):
     def _sanitize_parameters(self, **kwargs):
         return {}, {}, {}
     def preprocess(self, inputs):
+        if (
+            not isinstance(inputs, tp.Iterable)
+            or isinstance(inputs, tp.Dict)
+            or isinstance(inputs, str)
+        ):
             inputs = [inputs]
+        title = "title"
+        authors = "authors"
+        abstract = "abstract"
         texts = [
+            (
+                f"AUTHORS: {' '.join(paper[title]) if isinstance(paper[authors], list) else paper[authors]} "
+                f"TITLE: {paper[title]} ABSTRACT: {paper[abstract]}"
+                if not isinstance(paper, str)
+                else paper
+            )
             for paper in inputs
         ]
         inputs = self.tokenizer(

category_classification/models/translation.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from collections import namedtuple
 from functools import partial
@@ -13,28 +14,27 @@ def get_translator():
         torch_dtype="auto",
     )
-class Input:
-    def __init__(self, title, abstract, authors):
-        self.title = title
-        self.abstract = abstract
-        self.authors = authors
 class TranslationModel:
     def __init__(self, get_model):
         self.translator = get_translator()
         self.model = get_model()
-    def __call__(self, input):
-        def translate(text):
-            if text is None or text.strip() == "":
-                return ""
-            text = str(text).strip()
-            translated = self.translator(text)[0]['translation_text']
-            return translated
-        title = translate(input.title)
-        abstract = translate(input.abstract)
-        authors = translate(input.authors)
-        out = self.model(Input(title, abstract, authors))
         return out
@@ -43,4 +43,3 @@ def create_translation_models(models):
         f"{name} (С помощью перевода)": partial(TranslationModel, get_model=get_model)
         for name, get_model in models.items()
     }

+import typing as tp
 from collections import namedtuple
 from functools import partial
         torch_dtype="auto",
     )
 class TranslationModel:
     def __init__(self, get_model):
         self.translator = get_translator()
         self.model = get_model()
+    def __call__(self, input, **kwargs):
+        def transform_input_dict_to_str(input):
+            if isinstance(input, tp.Dict):
+                return input["authors"] + " " + input["abstract"] + " " + input["title"]
+        if not isinstance(input, tp.Iterable) or isinstance(input, tp.Dict):
+            input = [input]
+        input = [transform_input_dict_to_str(i) for i in input]
+        translated_input = self.translator(input)
+        translated = [
+            translated_i["translation_text"] for translated_i in translated_input
+        ]
+        out = self.model(translated)
+        if 1 == len(out):
+            return out[0]
         return out
         f"{name} (С помощью перевода)": partial(TranslationModel, get_model=get_model)
         for name, get_model in models.items()
     }

common.py DELETED Viewed

@@ -1,5 +0,0 @@
-class Input:
-    def __init__(self, title, abstract=None, authors=None):
-        self.title = title
-        self.abstract = abstract if abstract is not None else ''
-        self.authors = authors if authors is not None else ''