зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-29 21:16:06 +02:00
Merge branch 'main' into summary_multi_docs
Этот коммит содержится в:
Коммит
ca3994c72f
2
.flake8
2
.flake8
@ -1,5 +1,5 @@
|
||||
[flake8]
|
||||
ignore = E203, F401, E402, E501, W503
|
||||
extend-ignore = E203, F401, E402, E501, W503
|
||||
exclude = .git,__pycache__,.ipynb_checkpoints
|
||||
max-line-length = 90
|
||||
max-complexity = 18
|
||||
@ -1,5 +1,5 @@
|
||||
[flake8_nb]
|
||||
ignore = F401, E402, E501
|
||||
extend-ignore = E203, F401, E402, E501
|
||||
exclude = .git,__pycache__,.ipynb_checkpoints
|
||||
max-line-length = 90
|
||||
max-complexity = 18
|
||||
1
.github/workflows/ci.yml
поставляемый
1
.github/workflows/ci.yml
поставляемый
@ -40,3 +40,4 @@ jobs:
|
||||
fail_ci_if_error: true
|
||||
files: ammico/coverage.xml
|
||||
verbose: true
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
|
||||
@ -13,6 +13,6 @@ repos:
|
||||
hooks:
|
||||
- id: flake8
|
||||
- repo: https://github.com/s-weigand/flake8-nb
|
||||
rev: v0.5.0
|
||||
rev: v0.5.3
|
||||
hooks:
|
||||
- id: flake8-nb
|
||||
|
||||
@ -70,7 +70,7 @@ class AnalysisExplorer:
|
||||
[self._middle_picture_frame()],
|
||||
id="Div_middle",
|
||||
style={
|
||||
"width": "60%",
|
||||
"width": "50%",
|
||||
"display": "inline-block",
|
||||
"verticalAlign": "top",
|
||||
},
|
||||
@ -80,13 +80,13 @@ class AnalysisExplorer:
|
||||
[self._right_output_json()],
|
||||
id="Div_right",
|
||||
style={
|
||||
"width": "30%",
|
||||
"width": "45%",
|
||||
"display": "inline-block",
|
||||
"verticalAlign": "top",
|
||||
},
|
||||
),
|
||||
],
|
||||
style={"width": "80%", "display": "inline-block"},
|
||||
style={"width": "95%", "display": "inline-block"},
|
||||
)
|
||||
self.app.layout = app_layout
|
||||
# add callbacks to app
|
||||
|
||||
@ -11,7 +11,6 @@ import pandas as pd
|
||||
from bertopic import BERTopic
|
||||
from transformers import pipeline
|
||||
|
||||
# make widgets work again
|
||||
# clean text has weird spaces and separation of "do n't"
|
||||
# increase coverage for text
|
||||
|
||||
@ -127,6 +126,7 @@ class TextDetector(utils.AnalysisMethod):
|
||||
# use the current default model - 03/2023
|
||||
model_name = "sshleifer/distilbart-cnn-12-6"
|
||||
model_revision = "a4f8f3e"
|
||||
max_number_of_characters = 3000
|
||||
pipe = pipeline(
|
||||
"summarization",
|
||||
model=model_name,
|
||||
@ -134,7 +134,8 @@ class TextDetector(utils.AnalysisMethod):
|
||||
min_length=5,
|
||||
max_length=20,
|
||||
)
|
||||
summary = pipe(self.subdict["text_english"])
|
||||
print(self.subdict["text_english"])
|
||||
summary = pipe(self.subdict["text_english"][0:max_number_of_characters])
|
||||
self.subdict["text_summary"] = summary[0]["summary_text"]
|
||||
|
||||
def text_sentiment_transformers(self):
|
||||
@ -143,7 +144,10 @@ class TextDetector(utils.AnalysisMethod):
|
||||
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
|
||||
model_revision = "af0f99b"
|
||||
pipe = pipeline(
|
||||
"text-classification", model=model_name, revision=model_revision
|
||||
"text-classification",
|
||||
model=model_name,
|
||||
revision=model_revision,
|
||||
truncation=True,
|
||||
)
|
||||
result = pipe(self.subdict["text_english"])
|
||||
self.subdict["sentiment"] = result[0]["label"]
|
||||
@ -161,7 +165,6 @@ class TextDetector(utils.AnalysisMethod):
|
||||
aggregation_strategy="simple",
|
||||
)
|
||||
result = pipe(self.subdict["text_english"])
|
||||
# self.subdict["entity"] = result
|
||||
self.subdict["entity"] = []
|
||||
self.subdict["entity_type"] = []
|
||||
for entity in result:
|
||||
|
||||
Загрузка…
x
Ссылка в новой задаче
Block a user