зеркало из
				https://github.com/ssciwr/AMMICO.git
				synced 2025-10-30 05:26:05 +02:00 
			
		
		
		
	fix long text pipeline issues (#82)
Этот коммит содержится в:
		
							родитель
							
								
									a19ea3ea82
								
							
						
					
					
						Коммит
						31aefc368b
					
				| @ -11,7 +11,6 @@ import pandas as pd | |||||||
| from bertopic import BERTopic | from bertopic import BERTopic | ||||||
| from transformers import pipeline | from transformers import pipeline | ||||||
| 
 | 
 | ||||||
| # make widgets work again |  | ||||||
| # clean text has weird spaces and separation of "do n't" | # clean text has weird spaces and separation of "do n't" | ||||||
| # increase coverage for text | # increase coverage for text | ||||||
| 
 | 
 | ||||||
| @ -127,6 +126,7 @@ class TextDetector(utils.AnalysisMethod): | |||||||
|         # use the current default model - 03/2023 |         # use the current default model - 03/2023 | ||||||
|         model_name = "sshleifer/distilbart-cnn-12-6" |         model_name = "sshleifer/distilbart-cnn-12-6" | ||||||
|         model_revision = "a4f8f3e" |         model_revision = "a4f8f3e" | ||||||
|  |         max_number_of_characters = 3000 | ||||||
|         pipe = pipeline( |         pipe = pipeline( | ||||||
|             "summarization", |             "summarization", | ||||||
|             model=model_name, |             model=model_name, | ||||||
| @ -134,7 +134,8 @@ class TextDetector(utils.AnalysisMethod): | |||||||
|             min_length=5, |             min_length=5, | ||||||
|             max_length=20, |             max_length=20, | ||||||
|         ) |         ) | ||||||
|         summary = pipe(self.subdict["text_english"]) |         print(self.subdict["text_english"]) | ||||||
|  |         summary = pipe(self.subdict["text_english"][0:max_number_of_characters]) | ||||||
|         self.subdict["text_summary"] = summary[0]["summary_text"] |         self.subdict["text_summary"] = summary[0]["summary_text"] | ||||||
| 
 | 
 | ||||||
|     def text_sentiment_transformers(self): |     def text_sentiment_transformers(self): | ||||||
| @ -143,7 +144,10 @@ class TextDetector(utils.AnalysisMethod): | |||||||
|         model_name = "distilbert-base-uncased-finetuned-sst-2-english" |         model_name = "distilbert-base-uncased-finetuned-sst-2-english" | ||||||
|         model_revision = "af0f99b" |         model_revision = "af0f99b" | ||||||
|         pipe = pipeline( |         pipe = pipeline( | ||||||
|             "text-classification", model=model_name, revision=model_revision |             "text-classification", | ||||||
|  |             model=model_name, | ||||||
|  |             revision=model_revision, | ||||||
|  |             truncation=True, | ||||||
|         ) |         ) | ||||||
|         result = pipe(self.subdict["text_english"]) |         result = pipe(self.subdict["text_english"]) | ||||||
|         self.subdict["sentiment"] = result[0]["label"] |         self.subdict["sentiment"] = result[0]["label"] | ||||||
| @ -161,7 +165,6 @@ class TextDetector(utils.AnalysisMethod): | |||||||
|             aggregation_strategy="simple", |             aggregation_strategy="simple", | ||||||
|         ) |         ) | ||||||
|         result = pipe(self.subdict["text_english"]) |         result = pipe(self.subdict["text_english"]) | ||||||
|         # self.subdict["entity"] = result |  | ||||||
|         self.subdict["entity"] = [] |         self.subdict["entity"] = [] | ||||||
|         self.subdict["entity_type"] = [] |         self.subdict["entity_type"] = [] | ||||||
|         for entity in result: |         for entity in result: | ||||||
|  | |||||||
		Загрузка…
	
	
			
			x
			
			
		
	
		Ссылка в новой задаче
	
	Block a user
	 Inga Ulusoy
						Inga Ulusoy