#!/usr/bin/python


import os, sys, time
from optparse import OptionParser
from os import remove, mkdir, access
from lxml import etree




# Variable initialization.
# ENGINE-SPECIFIC CODE
engine_name = "lemur"
memory_limit = "512000000"
parsed_query_file = "temp." + engine_name + ".parsed-query.txt"
current_engine_element = None
model_options_string, model_parameters_string = None, None




# Small function to add slash to path if it's not already there
def add_slash(path_string):
    if path_string[len(path_string) - 1] != "/":
        return path_string + "/"
    else:
        return path_string



# Method for generating the string describing all available models for this
# engine.
def get_models_string(engine_element):
    models_string = None
    for model_element in engine_element.getchildren():
        number = model_element.find("number").text
        name = model_element.find("name").text
        if models_string == None:
            models_string = "    " + number + ": " + name
        else:
            models_string = models_string + "\n    " + number + ": " + name
    return models_string




# Method for generating the string describing all available parameters for this
# engine.
def get_model_parameters_string(engine_element, model_number):
    parameters_string = None
    model_element = None
    for model_element in engine_element.getchildren():
        if model_element.find("number").text == model_number:
            parameters_string = model_element.find("name").text + "\n"
            parameters = model_element.find("parameters")
            for parameter in parameters.getchildren():
                param_name  = parameter.find("name").text
                description = parameter.find("description").text
                default     = parameter.find("default").text
                parameters_string = parameters_string + "  * Name:           " + param_name + "\n    Description:    " + description + "\n    Default value:  " + default + "\n"
    return parameters_string




# Method for generating a dictionary of the model-specific parameters and their 
# values. for the current retrieval engine.
def get_model_parameters(engine_element, model_number):
    param_dict = {}
    model_element = None
    for model_element in engine_element.getchildren():
        if model_element.find("number").text == model_number:
            parameters_string = model_element.find("name").text + "\n"
            parameters = model_element.find("parameters")
            for parameter in parameters.getchildren():
                param_name = parameter.find("name").text
                param_value = parameter.find("default").text
                param_dict[param_name] = param_value
    return param_dict




# Method for altering the value of a specific model parameter.
def setParameterValue(engine_element, model_number, param_name, param_value):
    model_element = None
    for model_element in engine_element.getchildren():
        if model_element.find("number").text == model_number:
            parameters_string = model_element.find("name").text + "\n"
            parameters = model_element.find("parameters")
            for parameter in parameters.getchildren():
                if param_name == parameter.find("name").text:
                    parameter.find("default").text = param_value




# Parse the XML document containing the information about the retrieval models 
# supported by each engine. Extract the information for the current engine.
retrieval_models = "retrieval-models.xml"
models_root = etree.parse(retrieval_models)
for engine_element in models_root.getiterator("engine"):
    if engine_element.attrib["name"] == engine_name:
        current_engine_element = engine_element
model_options_string = get_models_string(current_engine_element)




# Usage message. Parse the command line arguments
if ( len(sys.argv) == 1 ):
    sys.argv.append("-h")
parser = OptionParser()
# -f pseudo-relevance feedback (if available)
parser.add_option("-c", "--config-file", 
                  dest="config_filename", 
                  help="Specify an alternative configuration filename. If this is not specified, the default file '" + engine_name + "-config.xml" + "' is used.", metavar="C")
parser.add_option("-m", "--retrieval-model", 
                  dest="model", 
                  help="Retrieval model to be used, Select '?' as the model for a list of available options.", metavar="M")
parser.add_option("-n", "--result-count", 
                  dest="result_count", 
                  help="Number of results to be returned. Default is 1000.", metavar="N")
parser.add_option("-o", "--output-file", 
                  dest="output_file", 
                  help="Name of the file containing the search results.", metavar="O")
parser.add_option("-p", "--model-parameters", 
                  dest="model_parameters", 
                  help="Model-specific parameters. Use the '-m ?' option to see a list of available model and their parameters. To set one or more model parameters, assign them as 'param1=0.5' and 'param2=350', where param1 and param2 would be the actual parameter names. You can set multiple model parameters at the same time by joining them with a ':' like '-p param1=0.5:param2=350'.", metavar="M")
parser.add_option("-q", "--query-to-match", 
                  dest="query", 
                  help="Query (set) to retrieve documents for.", metavar="Q")
(options, args) = parser.parse_args()

config_filename  = options.config_filename
model            = options.model
model_parameters = options.model_parameters
output_file      = options.output_file
query            = options.query
result_count     = options.result_count

if model == None:
    print "ERROR: Please specify the format of the indexed documents (" + model_options_string + ")."
    sys.exit()
elif model == "?":
    print "Available retrieval models:\n" + model_options_string
    sys.exit()
if model_parameters == "?":
    print get_model_parameters_string(current_engine_element, model)
    sys.exit()
if query == None:
    print "ERROR: Please specify the query file."
    sys.exit()
if output_file == None:
    print "ERROR: Please specify the filename for the retrieval output."
    sys.exit()
if result_count == None:
    result_count = "1000"
elif int(result_count) <= 0:
    print "ERROR: Cannot return less than 1 result."
    sys.exit()




# If the model parameters string was not empty, we should parse it. For each
# of the parameters, we alter its value in the retrieval models XML tree. Later, 
# we take the values from this tree when we specify the parameters for the 
# search engine.
if model_parameters != None:
    parameter_array = model_parameters.split(":")
    for param in parameter_array:
        param_name = param.split("=")[0]
        param_value = param.split("=")[1]
        setParameterValue(current_engine_element, model, param_name, param_value)




# Parse the XML config file. Add trailing slashes to the paths if they're not
# already there.
if config_filename == None:
    config_filename = engine_name + "-config.xml"
root_element    = etree.parse(config_filename)
document_format = root_element.find("document_format").text
index_location  = root_element.find("index_location").text
stemming        = root_element.find("stemming").text
stopword_list   = root_element.find("stopword_list").text
toolkit_path    = root_element.find("toolkit_path").text




# Generate the parameter file for Lemur query parsing.
# ENGINE-SPECIFIC CODE
root_element = etree.Element("parameters")
doc = etree.ElementTree(root_element)
etree.SubElement(root_element, "docFormat").text = document_format
etree.SubElement(root_element, "outputFile").text = parsed_query_file
if stemming != None:
    etree.SubElement(root_element, "stemmer").text = stemming
if stopword_list != None:
    etree.SubElement(root_element, "stopwords").text = stopword_list




# Save the querying parameter tree to file.
# ENGINE-SPECIFIC CODE
query_param_filename = "params." + engine_name + "-query-parsing.xml"
param_handle = open(query_param_filename, "w")
doc.write(param_handle, pretty_print=True)
param_handle.close()




# Perform the actual query parsing.
# ENGINE-SPECIFIC CODE
command = toolkit_path + "ParseToFile " + query_param_filename + " " + query
os.system(command)




# Generate the parameter file for Lemur retrieval. These are dependent on the
# selected retrieval model. First we start with the general parameters.
# ENGINE-SPECIFIC CODE
root_element = etree.Element("parameters")
doc = etree.ElementTree(root_element)
etree.SubElement(root_element, "index").text = add_slash(index_location) + index_location + ".key"
etree.SubElement(root_element, "textQuery").text = parsed_query_file
etree.SubElement(root_element, "resultFile").text = output_file
etree.SubElement(root_element, "resultCount").text = result_count
etree.SubElement(root_element, "resultFormat").text = "1"  # TREC output format

# Model 1-specific parameters that should not be changed via the command-line.
if model == '1':
    etree.SubElement(root_element, "retModel").text = "0"
elif model == '2':
    etree.SubElement(root_element, "retModel").text = "1"
elif model == '3':
    etree.SubElement(root_element, "retModel").text = "2"
    etree.SubElement(root_element, "smoothStrategy").text = "0"
    etree.SubElement(root_element, "smoothMethod").text = "0"
elif model == '4':
    etree.SubElement(root_element, "retModel").text = "2"
    etree.SubElement(root_element, "smoothStrategy").text = "0"
    etree.SubElement(root_element, "smoothMethod").text = "1"
elif model == '5':
    etree.SubElement(root_element, "retModel").text = "2"
    etree.SubElement(root_element, "smoothStrategy").text = "0"
    etree.SubElement(root_element, "smoothMethod").text = "2"
    etree.SubElement(root_element, "smoothSupportFile").text = add_slash(index_location) + index_location + ".supp"

# Set the model-specific parameters that can be changed.
param_dict = get_model_parameters(current_engine_element, model)
for param_name in param_dict:
    param_value = param_dict[param_name]
    etree.SubElement(root_element, param_name).text = param_value




# Save the retrieval parameter tree to file.
# ENGINE-SPECIFIC CODE
retrieval_param_filename = "params." + engine_name + "-search.xml"
param_handle = open(retrieval_param_filename, "w")
doc.write(param_handle, pretty_print=True)
param_handle.close()




# Perform the actual retrieval
# ENGINE-SPECIFIC CODE
command = toolkit_path + "RetEval " + retrieval_param_filename
os.system(command)




# Remove the temporary (parameter) files.
remove(query_param_filename)
remove(parsed_query_file)
remove(retrieval_param_filename)
