restore full tweets text in case of retweet #42

Этот коммит содержится в:
Vlad Pronsky 2023-07-30 17:24:07 +03:00
родитель bce3bac8be
Коммит aa69c653ef
3 изменённых файлов: 21 добавлений и 16 удалений

Просмотреть файл

@ -24,8 +24,8 @@ async def worker(queue: asyncio.Queue, api: twscrape.API):
async def main():
api = twscrape.API()
# add accounts here or before from cli (see README.md for examples)
# await api.pool.add_account("u1", "p1", "eu1", "ep1")
# await api.pool.login_all()
await api.pool.add_account("u1", "p1", "eu1", "ep1")
await api.pool.login_all()
queries = ["elon musk", "tesla", "spacex", "neuralink", "boring company"]

Просмотреть файл

@ -99,6 +99,9 @@ def check_tweet(doc: Tweet | None):
assert v.bitrate is not None
assert v.contentType is not None
if doc.retweetedTweet is not None:
assert doc.rawContent.endswith(doc.retweetedTweet.rawContent), "content should be full"
check_user(doc.user)
@ -306,6 +309,4 @@ async def test_issue_42():
assert doc.rawContent is not None
assert doc.retweetedTweet.rawContent is not None
msg = "Today marks the arrival of a traditional Chinese solar term called mangzhong, or Grain in Ear, signifying a busy farming period. https://t.co/SQMrX99bWr"
assert doc.retweetedTweet.rawContent == msg
# assert doc.rawContent == msg # todo: not sure should it be populated from rt
assert doc.rawContent.endswith(doc.retweetedTweet.rawContent)

Просмотреть файл

@ -195,14 +195,7 @@ class Tweet(JSONTrait):
qt_id = _first(obj, ["quoted_status_id_str", "quoted_status_result.result.rest_id"])
qt_obj = get_or(res, f"tweets.{qt_id}")
# for development
# print()
# print("-" * 80)
# print(res["tweets"].keys())
# print(rt_id, rt_obj is not None)
# print(qt_id, qt_obj is not None)
return Tweet(
doc = Tweet(
id=int(obj["id_str"]),
id_str=obj["id_str"],
url=f'https://twitter.com/{tw_usr.username}/status/{obj["id_str"]}',
@ -232,6 +225,19 @@ class Tweet(JSONTrait):
media=Media.parse(obj),
)
# issue #42 – restore full rt text
rt = doc.retweetedTweet
if rt is not None and rt.user is not None and doc.rawContent.endswith(""):
prefix = f"RT @{rt.user.username}: "
rt_msg = f"{prefix}{rt.rawContent}"
if doc.rawContent != rt_msg and doc.rawContent.startswith(prefix):
# was = doc.rawContent.replace("\n", "")
# now = rt_msg.replace("\n", "")
# print(f"\n{was}\n{now}\n")
doc.rawContent = rt_msg
return doc
@dataclass
class MediaPhoto(JSONTrait):
@ -239,9 +245,7 @@ class MediaPhoto(JSONTrait):
@staticmethod
def parse(obj: dict):
return MediaPhoto(
url=obj["media_url_https"],
)
return MediaPhoto(url=obj["media_url_https"])
@dataclass