зеркало из
https://github.com/viginum-datalab/twscrape.git
synced 2025-10-29 21:16:25 +02:00
restore full tweets text in case of retweet #42
Этот коммит содержится в:
родитель
bce3bac8be
Коммит
aa69c653ef
@ -24,8 +24,8 @@ async def worker(queue: asyncio.Queue, api: twscrape.API):
|
|||||||
async def main():
|
async def main():
|
||||||
api = twscrape.API()
|
api = twscrape.API()
|
||||||
# add accounts here or before from cli (see README.md for examples)
|
# add accounts here or before from cli (see README.md for examples)
|
||||||
# await api.pool.add_account("u1", "p1", "eu1", "ep1")
|
await api.pool.add_account("u1", "p1", "eu1", "ep1")
|
||||||
# await api.pool.login_all()
|
await api.pool.login_all()
|
||||||
|
|
||||||
queries = ["elon musk", "tesla", "spacex", "neuralink", "boring company"]
|
queries = ["elon musk", "tesla", "spacex", "neuralink", "boring company"]
|
||||||
|
|
||||||
|
|||||||
@ -99,6 +99,9 @@ def check_tweet(doc: Tweet | None):
|
|||||||
assert v.bitrate is not None
|
assert v.bitrate is not None
|
||||||
assert v.contentType is not None
|
assert v.contentType is not None
|
||||||
|
|
||||||
|
if doc.retweetedTweet is not None:
|
||||||
|
assert doc.rawContent.endswith(doc.retweetedTweet.rawContent), "content should be full"
|
||||||
|
|
||||||
check_user(doc.user)
|
check_user(doc.user)
|
||||||
|
|
||||||
|
|
||||||
@ -306,6 +309,4 @@ async def test_issue_42():
|
|||||||
assert doc.rawContent is not None
|
assert doc.rawContent is not None
|
||||||
assert doc.retweetedTweet.rawContent is not None
|
assert doc.retweetedTweet.rawContent is not None
|
||||||
|
|
||||||
msg = "Today marks the arrival of a traditional Chinese solar term called mangzhong, or Grain in Ear, signifying a busy farming period. https://t.co/SQMrX99bWr"
|
assert doc.rawContent.endswith(doc.retweetedTweet.rawContent)
|
||||||
assert doc.retweetedTweet.rawContent == msg
|
|
||||||
# assert doc.rawContent == msg # todo: not sure should it be populated from rt
|
|
||||||
|
|||||||
@ -195,14 +195,7 @@ class Tweet(JSONTrait):
|
|||||||
qt_id = _first(obj, ["quoted_status_id_str", "quoted_status_result.result.rest_id"])
|
qt_id = _first(obj, ["quoted_status_id_str", "quoted_status_result.result.rest_id"])
|
||||||
qt_obj = get_or(res, f"tweets.{qt_id}")
|
qt_obj = get_or(res, f"tweets.{qt_id}")
|
||||||
|
|
||||||
# for development
|
doc = Tweet(
|
||||||
# print()
|
|
||||||
# print("-" * 80)
|
|
||||||
# print(res["tweets"].keys())
|
|
||||||
# print(rt_id, rt_obj is not None)
|
|
||||||
# print(qt_id, qt_obj is not None)
|
|
||||||
|
|
||||||
return Tweet(
|
|
||||||
id=int(obj["id_str"]),
|
id=int(obj["id_str"]),
|
||||||
id_str=obj["id_str"],
|
id_str=obj["id_str"],
|
||||||
url=f'https://twitter.com/{tw_usr.username}/status/{obj["id_str"]}',
|
url=f'https://twitter.com/{tw_usr.username}/status/{obj["id_str"]}',
|
||||||
@ -232,6 +225,19 @@ class Tweet(JSONTrait):
|
|||||||
media=Media.parse(obj),
|
media=Media.parse(obj),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# issue #42 – restore full rt text
|
||||||
|
rt = doc.retweetedTweet
|
||||||
|
if rt is not None and rt.user is not None and doc.rawContent.endswith("…"):
|
||||||
|
prefix = f"RT @{rt.user.username}: "
|
||||||
|
rt_msg = f"{prefix}{rt.rawContent}"
|
||||||
|
if doc.rawContent != rt_msg and doc.rawContent.startswith(prefix):
|
||||||
|
# was = doc.rawContent.replace("\n", "")
|
||||||
|
# now = rt_msg.replace("\n", "")
|
||||||
|
# print(f"\n{was}\n{now}\n")
|
||||||
|
doc.rawContent = rt_msg
|
||||||
|
|
||||||
|
return doc
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class MediaPhoto(JSONTrait):
|
class MediaPhoto(JSONTrait):
|
||||||
@ -239,9 +245,7 @@ class MediaPhoto(JSONTrait):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def parse(obj: dict):
|
def parse(obj: dict):
|
||||||
return MediaPhoto(
|
return MediaPhoto(url=obj["media_url_https"])
|
||||||
url=obj["media_url_https"],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
Загрузка…
x
Ссылка в новой задаче
Block a user