Commit ebfc2add authored by Jerome Wuerf's avatar Jerome Wuerf
Browse files

Improve logging

parent 21e345c6
......@@ -13,9 +13,12 @@ services:
volumes:
- ../data:/data
- ../python:/app
- /media:/media
- /tmp:/tmp
depends_on:
- elastic
elastic:
image: "docker.elastic.co/elasticsearch/elasticsearch:7.15.2"
restart: always
......
......@@ -8,7 +8,7 @@ import logging
import pickle
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', level='INFO')
logging.Logger.manager.loggerDict["elastic_transport.transport"].disabled = True
class App:
......@@ -79,9 +79,12 @@ class App:
reranker = NoReranking(retrieved_results, self.config['RUN_NAME'])
reranker.rerank()
self.logger.info("Writing run.txt ...")
write_terc_file(reranker.generate_trec_rows(),
Path(self.config['OUTPUT_PATH'], 'run.txt')
)
self.logger.info("Wrote run.txt!")
self.logger.info("Finished retrieval!")
if __name__ == '__main__':
......
......@@ -33,14 +33,14 @@ class Reranking:
):
self.logger.info('Starting to calculate reranking scores...')
calculate_scores()
self.logger.info('Finished to calculate reranking socres!')
self.logger.info('Finished to calculate reranking scores!')
if min_max_normalizaion:
self._min_max_normalizaion()
self.logger.info('Starting reranking...')
self._sort(reranking_strategy_conclusions, reranking_strategy_premises)
self.logger.info('Finished reranking...')
self.logger.info('Finished reranking!')
def _sort(
self,
......@@ -126,8 +126,8 @@ class Reranking:
and premise['_id'] not in encountered_premise_ids:
trec_style_rows.append(
(topic_nrb, conclusion['_source']['sentence_stance'],
f'{"__".join(conclusion["_id"].split("_"))},' +
f'{"__".join(premise["_id"].split("_"))}',
f'{"__".join(premise["_id"].split("_"))},' +
f'{"__".join(conclusion["_id"].split("_"))}',
len(trec_style_rows),
f'{conclusion["final_reranking_score"]:.2f}', self.run_name))
encountered_premise_ids.add(premise['_id'])
......
......@@ -2,6 +2,7 @@ from concurrent.futures import ThreadPoolExecutor
import re
from operator import index, itemgetter
from typing import OrderedDict
import logging
import numpy as np
import pandas as pd
......@@ -26,6 +27,7 @@ class Retrieval:
"""
TODO
"""
self.logger = logging.getLogger(__name__)
self.topics = topics
self.run_name = run_name
self.length_factor = length_factor
......@@ -60,6 +62,7 @@ class Retrieval:
"""
conclusions_respons = OrderedDict()
for topic, topic_embedding in tqdm(topics_and_embeddings):
self.logger.info(f"Retrieve conclusion for topic {topic[1]}")
conclusions_respons[topic[0]] = self.es.search(
index='conc',
body=self._get_query_body(
......@@ -75,9 +78,10 @@ class Retrieval:
"""
premise_per_conclusion_per_topic = OrderedDict()
for topic_nrb, conclusions_response in tqdm(conclusion_per_topic.items()):
self.logger.info(f"Retrieve premises for topic number: {topic_nrb}")
premise_per_conclusion_per_topic[topic_nrb] = {}
conclusions = conclusions_response['hits']['hits']
with ThreadPoolExecutor(10) as executor:
with ThreadPoolExecutor(2) as executor:
premises_per_conclusion = executor.map(lambda x: (x, self._get_premises(x)),
conclusions)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment