зеркало из
				https://github.com/ssciwr/AMMICO.git
				synced 2025-10-30 05:26:05 +02:00 
			
		
		
		
	keep original text length, skip analysis if not translated
Этот коммит содержится в:
		
							родитель
							
								
									09d009c464
								
							
						
					
					
						Коммит
						15e4f0a524
					
				| @ -159,9 +159,11 @@ def test_truncate_text(accepted): | ||||
|     test_obj.subdict["text"] = "I like cats and dogs." | ||||
|     test_obj._truncate_text() | ||||
|     assert test_obj.subdict["text"] == "I like cats and dogs." | ||||
|     assert "text_truncated" not in test_obj.subdict | ||||
|     test_obj.subdict["text"] = 20000 * "m" | ||||
|     test_obj._truncate_text() | ||||
|     assert test_obj.subdict["text"] == 5000 * "m" | ||||
|     assert test_obj.subdict["text_truncated"] == 5000 * "m" | ||||
|     assert test_obj.subdict["text"] == 20000 * "m" | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.gcv | ||||
| @ -173,6 +175,15 @@ def test_analyse_image(set_testdict, set_environ, accepted): | ||||
|             set_testdict[item], analyse_text=True, accept_privacy=accepted | ||||
|         ) | ||||
|         test_obj.analyse_image() | ||||
|     testdict = {} | ||||
|     testdict["text"] = 20000 * "m" | ||||
|     test_obj = tt.TextDetector( | ||||
|         testdict, skip_extraction=True, analyse_text=True, accept_privacy=accepted | ||||
|     ) | ||||
|     test_obj.analyse_image() | ||||
|     assert test_obj.subdict["text_truncated"] == 5000 * "m" | ||||
|     assert test_obj.subdict["text"] == 20000 * "m" | ||||
|     print(test_obj.subdict["text_english"]) | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.gcv | ||||
|  | ||||
| @ -263,7 +263,7 @@ class TextDetector(AnalysisMethod): | ||||
|         """Truncate the text if it is too long for googletrans.""" | ||||
|         if self.subdict["text"] and len(self.subdict["text"]) > max_length: | ||||
|             print("Text is too long - truncating to {} characters.".format(max_length)) | ||||
|             self.subdict["text"] = self.subdict["text"][:max_length] | ||||
|             self.subdict["text_truncated"] = self.subdict["text"][:max_length] | ||||
| 
 | ||||
|     def analyse_image(self) -> dict: | ||||
|         """Perform text extraction and analysis of the text. | ||||
| @ -283,7 +283,7 @@ class TextDetector(AnalysisMethod): | ||||
|             self._truncate_text() | ||||
|             self.translate_text() | ||||
|             self.remove_linebreaks() | ||||
|             if self.analyse_text: | ||||
|             if self.analyse_text and self.subdict["text_english"]: | ||||
|                 self._run_spacy() | ||||
|                 self.clean_text() | ||||
|                 self.text_summary() | ||||
| @ -336,8 +336,13 @@ class TextDetector(AnalysisMethod): | ||||
|             raise ValueError( | ||||
|                 "Privacy disclosure not accepted - skipping text translation." | ||||
|             ) | ||||
|         text_to_translate = ( | ||||
|             self.subdict["text_truncated"] | ||||
|             if "text_truncated" in self.subdict | ||||
|             else self.subdict["text"] | ||||
|         ) | ||||
|         try: | ||||
|             translated = self.translator.translate(self.subdict["text"]) | ||||
|             translated = self.translator.translate(text_to_translate) | ||||
|         except Exception: | ||||
|             print("Could not translate the text with error {}.".format(Exception)) | ||||
|             translated = None | ||||
|  | ||||
		Загрузка…
	
	
			
			x
			
			
		
	
		Ссылка в новой задаче
	
	Block a user
	 Inga Ulusoy
						Inga Ulusoy