add text summary

2025-10-29 21:16:06 +02:00 · 2023-03-30 11:23:01 +02:00 · 2023-03-30 11:23:01 +02:00 · cf1e1b83d7
--- a/misinformation/test/test_text.py
+++ b/misinformation/test/test_text.py
@ -116,6 +116,18 @@ def test_sentiment_analysis():
    assert test_obj.subdict["subjectivity"] == 0.6
 def test_text_summary(get_path):
    mydict = {}
    test_obj = tt.TextDetector(mydict, analyse_text=True)
    ref_file = get_path + "example_summary.txt"
    with open(ref_file, "r", encoding="utf8") as file:
        reference_text = file.read()
    test_obj.subdict["text_english"] = reference_text
    test_obj.text_summary()
    reference_summary = " I’m sorry, but I don’t want to be an emperor. That’s not my business. I should like to help everyone - if possible - Jew, Gentile - black man - white . We all want to help one another. In this world there is room for everyone. The way of life can be free and beautiful, but we have lost the way ."
    assert mydict["summary_text"] == reference_summary
 def test_PostprocessText(set_testdict, get_path):
    reference_dict = "THE\nALGEBRAIC\nEIGENVALUE\nPROBLEM\nDOM\nNVS TIO\nMINA\nMonographs\non Numerical Analysis\nJ.. H. WILKINSON"
    reference_df = "Mathematische Formelsammlung\nfür Ingenieure und Naturwissenschaftler\nMit zahlreichen Abbildungen und Rechenbeispielen\nund einer ausführlichen Integraltafel\n3., verbesserte Auflage"
--- a/misinformation/text.py
+++ b/misinformation/text.py
@ -9,6 +9,7 @@ from misinformation import utils
 import grpc
 import pandas as pd
 from bertopic import BERTopic
 from transformers import pipeline
 # make widgets work again
 # clean text has weird spaces and separation of "do n't"
@ -119,6 +120,14 @@ class TextDetector(utils.AnalysisMethod):
        # where 0.0 is very objective and 1.0 is very subjective
        self.subdict["subjectivity"] = self.doc._.blob.subjectivity
    def text_summary(self):
        # use the transformers pipeline to summarize the text
        pipe = pipeline("summarization")
        self.subdict.update(pipe(self.subdict["text_english"])[0])
    # def text_sentiment_transformers(self):
    # pipe = pipeline("text-classification")
 class PostprocessText:
    def __init__(
--- a/pyproject.toml
+++ b/pyproject.toml
@ -48,6 +48,7 @@ dependencies = [
    "tensorflow",
    "textblob",
    "torch",
    "transformers",
    "google-cloud-vision",
    "setuptools",
    "opencv-contrib-python",