Этот коммит содержится в:
Petr Andriushchenko 2023-04-27 11:06:19 +02:00
родитель d1f65d016d
Коммит ea297ea23d
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4C4A5DCF634115B6
2 изменённых файлов: 7 добавлений и 7 удалений

Просмотреть файл

@ -350,7 +350,7 @@ class MultimodalSearch(AnalysisMethod):
def itm_text_precessing(self, search_query):
for query in search_query:
if not (len(query) == 1) and (query in ("image", "text_input")):
if (len(query) != 1) and (query in ("image", "text_input")):
raise SyntaxError(
'Each querry must contain either an "image" or a "text_input"'
)

Просмотреть файл

@ -58,9 +58,9 @@ class SummaryDetector(AnalysisMethod):
def analyse_questions(self, list_of_questions):
(
summary_VQA_model,
summary_VQA_vis_processors,
summary_VQA_txt_processors,
summary_vqa_model,
summary_vqa_vis_processors,
summary_vqa_txt_processors,
) = load_model_and_preprocess(
name="blip_vqa",
model_type="vqav2",
@ -71,18 +71,18 @@ class SummaryDetector(AnalysisMethod):
path = self.subdict["filename"]
raw_image = Image.open(path).convert("RGB")
image = (
summary_VQA_vis_processors["eval"](raw_image)
summary_vqa_vis_processors["eval"](raw_image)
.unsqueeze(0)
.to(self.summary_device)
)
question_batch = []
for quest in list_of_questions:
question_batch.append(summary_VQA_txt_processors["eval"](quest))
question_batch.append(summary_vqa_txt_processors["eval"](quest))
batch_size = len(list_of_questions)
image_batch = image.repeat(batch_size, 1, 1, 1)
with no_grad():
answers_batch = summary_VQA_model.predict_answers(
answers_batch = summary_vqa_model.predict_answers(
samples={"image": image_batch, "text_input": question_batch},
inference_method="generate",
)