Commit 7582599c authored by Jerome Wuerf's avatar Jerome Wuerf
Browse files

Update sentence and embeddings path

parent 2ae9b99f
......@@ -7,6 +7,9 @@ from retrieval import (Retrieval, MaximalMarginalRelevanceReranking, StructuralD
from utils import (Configuration, SubCommands, RerankingOptions, parse_cli_args, read_data_to_index,
read_results, read_unranked, read_sentences, read_topics, write_output)
import logging
import pickle
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', level='INFO')
class App:
......@@ -33,10 +36,9 @@ class App:
def _indexing(self) -> None:
self.logger.info('Reading sentences and embeddings form disk.')
sentences, embeddings = read_data_to_index(self.config['SENTENCES_PARQUET_PATH'],
self.config['EMBEDDINGS_PKL_PATH'])
Indexing(sentences,
embeddings,
input_generator = read_data_to_index(self.config['SENTENCES_PATH'],
self.config['EMBEDDINGS_PATH'])
Indexing(input_generator,
self.config['ELASTIC_HOST'],
self.config['CREATE_INDEX']
).index_to_es()
......@@ -47,7 +49,7 @@ class App:
results = None
if self.config['REUSE_UNRANKED']:
self.logger.info('Reading unranked...')
results = read_unranked(self.config['REUSE_UNRANKED'])
results = read_unranked(self.config['REUSE_UNRANKED'], self.config['TOPIC_NRB'])
self.logger.info('Read unranked!')
else:
results = Retrieval(topics,
......@@ -57,6 +59,7 @@ class App:
self.config['NRB_CONCLUSIONS_PER_TOPIC'],
self.config['NRB_PREMISES_PER_CONCLUSION']
).retrieve()
pickle.dump(results, open('/data/baseline_dump.pkl', 'wb'))
if self.config['RERANKING'] == RerankingOptions.MAXIMAL_MARGINAL_RELEVANCE.value:
results = MaximalMarginalRelevanceReranking(
results,
......@@ -76,7 +79,7 @@ class App:
)
def _assessment(self) -> None:
sentences = read_sentences(self.config['SENTENCES_PARQUET_PATH'])
sentences = read_sentences(self.config['SENTENCES_PATH'])
results = read_results(self.config['RESULT_PATH'])
topics = read_topics(self.config['TOPICS_PATH'])
Assessment(results, sentences, topics).cmdloop()
......
......@@ -12,8 +12,8 @@ class Configuration():
TODO Figure out how to inherit from dict
"""
keys = {
SubCommands.INDEXING: ['SENTENCES_PARQUET_PATH',
'EMBEDDINGS_PKL_PATH',
SubCommands.INDEXING: ['SENTENCES_PATH',
'EMBEDDINGS_PATH',
'CREATE_INDEX',
'ELASTIC_HOST'],
SubCommands.RETRIEVAL: ['TOPICS_PATH',
......@@ -28,7 +28,7 @@ class Configuration():
'LAMBDA_CONCLUSIONS',
'LAMBDA_PREMISES'],
SubCommands.ASSESSMENT: ['TOPICS_PATH',
'SENTENCES_PARQUET_PATH',
'SENTENCES_PATH',
'RESULT_PATH'],
SubCommands.DEDUPLICATION: ['ELASTIC_HOST',
'INDICES']
......@@ -48,8 +48,8 @@ class Configuration():
args_list = None
# Looks ugly but there is no pattern matching in python :(
if self.command == SubCommands.INDEXING:
args_list = [Path(args.sentences_parquet_path),
Path(args.embeddings_pkl_path),
args_list = [Path(args.sentences_path),
Path(args.embeddings_path),
args.create_index,
args.elastic_host]
......@@ -69,7 +69,7 @@ class Configuration():
elif self.command == SubCommands.ASSESSMENT:
args_list = [Path(args.topics_path),
Path(args.sentences_parquet_path),
Path(args.sentences_path),
args.result_path]
elif self.command == SubCommands.DEDUPLICATION:
......
......@@ -18,9 +18,8 @@ class Text:
elastic_host = 'The hostname of the server that runs elastic search.'
create = 'If flag is present two new indices are created, one for conclusions, one for ' \
'premises. If there is already an existing index it will be overridden.'
sentences_parquet_path = 'The file path to the parquet file containing the sentences.' \
'Overrides the SENTENCES_PARQUET_PATH environment variable.'
embeddings_pkl_path = 'The file path to the pickled embeddings of the argument units.' \
sentences_path = 'The file path to the csv file containing the sentences.'
embeddings_path = 'The file path to the embeddings of the argument units.' \
'Overrides the EMBEDDINGS_PKL_PATH environment variable.'
retrieval = 'This sub command is intended for a run on the TIRA evaluation system.'
topic_nrb = 'Lorem ipsum' # TODO
......@@ -50,8 +49,8 @@ def parse_cli_args() -> argparse.Namespace:
parser_index.add_argument('--create', dest='create_index',
action='store_true',
help=Text.create)
parser_index.add_argument('sentences_parquet_path', type=str, help=Text.sentences_parquet_path)
parser_index.add_argument('embeddings_pkl_path', type=str, help=Text.embeddings_pkl_path)
parser_index.add_argument('sentences_path', type=str, help=Text.sentences_path)
parser_index.add_argument('embeddings_path', type=str, help=Text.embeddings_path)
# Retrieval
......@@ -100,9 +99,9 @@ def parse_cli_args() -> argparse.Namespace:
# Assessment
parser_assessment = subparsers.add_parser(SubCommands.ASSESSMENT.value, help=Text.assessment)
parser_assessment.add_argument('sentences_parquet_path',
parser_assessment.add_argument('sentences_path',
type=str,
help=Text.sentences_parquet_path)
help=Text.sentences_path)
parser_assessment.add_argument('result_path', type=str, help=Text.result_path)
parser_assessment.add_argument('topics_path', type=str, help=Text.topics_path)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment