From aa69c653ef7eda9a75bfd280089ecffcb0a871c1 Mon Sep 17 00:00:00 2001
From: Vlad Pronsky <v.pronsky@gmail.com>
Date: Sun, 30 Jul 2023 17:24:07 +0300
Subject: [PATCH] restore full tweets text in case of retweet #42

---
 examples/parallel_search_with_limit.py |  4 ++--
 tests/test_parser.py                   |  7 ++++---
 twscrape/models.py                     | 26 +++++++++++++++-----------
 3 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/examples/parallel_search_with_limit.py b/examples/parallel_search_with_limit.py
index cbddefb..64fefc7 100644
--- a/examples/parallel_search_with_limit.py
+++ b/examples/parallel_search_with_limit.py
@@ -24,8 +24,8 @@ async def worker(queue: asyncio.Queue, api: twscrape.API):
 async def main():
     api = twscrape.API()
     # add accounts here or before from cli (see README.md for examples)
-    # await api.pool.add_account("u1", "p1", "eu1", "ep1")
-    # await api.pool.login_all()
+    await api.pool.add_account("u1", "p1", "eu1", "ep1")
+    await api.pool.login_all()
 
     queries = ["elon musk", "tesla", "spacex", "neuralink", "boring company"]
 
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 9ec6303..74225ce 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -99,6 +99,9 @@ def check_tweet(doc: Tweet | None):
                     assert v.bitrate is not None
                     assert v.contentType is not None
 
+    if doc.retweetedTweet is not None:
+        assert doc.rawContent.endswith(doc.retweetedTweet.rawContent), "content should be full"
+
     check_user(doc.user)
 
 
@@ -306,6 +309,4 @@ async def test_issue_42():
     assert doc.rawContent is not None
     assert doc.retweetedTweet.rawContent is not None
 
-    msg = "Today marks the arrival of a traditional Chinese solar term called mangzhong, or Grain in Ear, signifying a busy farming period. https://t.co/SQMrX99bWr"
-    assert doc.retweetedTweet.rawContent == msg
-    # assert doc.rawContent == msg # todo: not sure should it be populated from rt
+    assert doc.rawContent.endswith(doc.retweetedTweet.rawContent)
diff --git a/twscrape/models.py b/twscrape/models.py
index 0ceaafe..239f6af 100644
--- a/twscrape/models.py
+++ b/twscrape/models.py
@@ -195,14 +195,7 @@ class Tweet(JSONTrait):
         qt_id = _first(obj, ["quoted_status_id_str", "quoted_status_result.result.rest_id"])
         qt_obj = get_or(res, f"tweets.{qt_id}")
 
-        # for development
-        # print()
-        # print("-" * 80)
-        # print(res["tweets"].keys())
-        # print(rt_id, rt_obj is not None)
-        # print(qt_id, qt_obj is not None)
-
-        return Tweet(
+        doc = Tweet(
             id=int(obj["id_str"]),
             id_str=obj["id_str"],
             url=f'https://twitter.com/{tw_usr.username}/status/{obj["id_str"]}',
@@ -232,6 +225,19 @@ class Tweet(JSONTrait):
             media=Media.parse(obj),
         )
 
+        # issue #42 – restore full rt text
+        rt = doc.retweetedTweet
+        if rt is not None and rt.user is not None and doc.rawContent.endswith("…"):
+            prefix = f"RT @{rt.user.username}: "
+            rt_msg = f"{prefix}{rt.rawContent}"
+            if doc.rawContent != rt_msg and doc.rawContent.startswith(prefix):
+                # was = doc.rawContent.replace("\n", "")
+                # now = rt_msg.replace("\n", "")
+                # print(f"\n{was}\n{now}\n")
+                doc.rawContent = rt_msg
+
+        return doc
+
 
 @dataclass
 class MediaPhoto(JSONTrait):
@@ -239,9 +245,7 @@ class MediaPhoto(JSONTrait):
 
     @staticmethod
     def parse(obj: dict):
-        return MediaPhoto(
-            url=obj["media_url_https"],
-        )
+        return MediaPhoto(url=obj["media_url_https"])
 
 
 @dataclass