#!/usr/bin/python

from __mysql import * 
import libxml2

class tag_occurrence_c:
   def __init__(self):
      self.start = 0.0
      self.end = 0.0
      self.score = 0

   def get_string(self):
      return "Start: %8.3f, End: %8.3f, Score: %3.d" % (self.start,self.end,self.score)

# - global variable, contain result -
records = {}

# ------------------------------------------------------------------------------
def f_get_xml_file(a_tag):
   """ - retrieve xml for given tag - """
   search_cmd = "cd indexer/scripts/;./8_search.sh %s 2> /dev/null;cd ../.." % a_tag
   return os.popen(search_cmd)

# ------------------------------------------------------------------------------
def f_insert_occurrence_in_records(a_rec_name,a_tag,a_tag_occ):
   """ - insert occurrence info in records dictionary - """

   if not records.has_key(a_rec_name):
      records[a_rec_name] = {}

   rec_tags = records[a_rec_name]
   if not rec_tags.has_key(a_tag):
      rec_tags[a_tag] = []

   tag_occurrences = rec_tags[a_tag]
   tag_occurrences.append(a_tag_occ)

# ------------------------------------------------------------------------------
def f_process_xml_file(a_xml_file,a_tag):
   """ - process xml file - search for tag occurrences - """
   xml_doc = libxml2.parseDoc(a_xml_file.read())
   xml_data = xml_doc.children.children.next.children

   while xml_data is not None:
      if xml_data.type == "element":
         if xml_data.name == "record":
            rec_name = ""
            tag_occ = tag_occurrence_c()

            # - process record properties -
            xml_node_prop = xml_data.properties
            while xml_node_prop is not None:
               if xml_node_prop.name == "document":
                  rec_name = xml_node_prop.content
               elif xml_node_prop.name == "start":
                  tag_occ.start = float(xml_node_prop.content)
               elif xml_node_prop.name == "end":
                  tag_occ.end = float(xml_node_prop.content)
               elif xml_node_prop.name == "score":
                  tag_occ.score = int(xml_node_prop.content)

               xml_node_prop = xml_node_prop.next

            f_insert_occurrence_in_records(rec_name,a_tag,tag_occ)

         elif xml_data.name == "msg":

            # - get message type -
            xml_node_prop = xml_data.properties
            while xml_node_prop is not None:
               if xml_node_prop.name == "type":
                  if xml_node_prop.content == "info":
                     log_msg("Searching \"%s\": %s" % (a_tag,xml_data.children.content))
                  elif xml_node_prop.content == "error":
                     err_msg("Searching \"%s\": %s" % (a_tag,xml_data.children.content))
               xml_node_prop = xml_node_prop.next

      xml_data = xml_data.next
   xml_doc.freeDoc()

# ------------------------------------------------------------------------------
def f_sort_tag_occurrences():
   """ - sort occurrences of tags in records by start time - """
   for record,tags in records.iteritems():
      for tag,occurrences in tags.iteritems():
         occurrences.sort(key=lambda occurrence: occurrence.start)

# ------------------------------------------------------------------------------
def f_search_for_tags(a_tag_list):
   """ - search for tags in tuple - """
   for tag in a_tag_list:
      upp_tag = tag.upper()
      xml_f = f_get_xml_file(upp_tag)
      f_process_xml_file(xml_f,upp_tag)
      xml_f.close()

   f_sort_tag_occurrences()

# ------------------------------------------------------------------------------
def f_print_records():
   """ - DEBUG print occurrences of tags in human readable format - """
   for record,tags in records.iteritems():
      print "* RECORD: %s" % record
      for tag,occurrences in tags.iteritems():
         print "** TAG: %s" % tag
         for occurrence in occurrences:
            print "*** %s" % occurrence.get_string()

# ------------------------------------------------------------------------------
def f_score2probab(a_score):
   return a_score / 100.0

# ------------------------------------------------------------------------------
def f_insert_occurrences():
   """ - DEBUG insert occurrences of tags into DB - """
   sql_ins_cmd = ""

   for record,tags in records.iteritems():
      
      # - get record id from DB -
      f_mysql_execute("SELECT id FROM records WHERE name=\"%s\"" % record)
      data = db_cursor.fetchone()
      id_rec = data[0]
      
      for tag,occurrences in tags.iteritems():

         # - get record id from DB -
         f_mysql_execute("SELECT id FROM tags WHERE name=\"%s\"" % tag)
         data = db_cursor.fetchone()
         id_tag = data[0]
         
         # - test if tag is already associated to record YES = something is wrong -
         f_mysql_execute("SELECT * FROM occurrences WHERE id_tag=%s AND id_rec=%s" % (id_tag,id_rec))
         assert db_cursor.rowcount == 0

         for occurrence in occurrences:

            # - get occurrence probability -
            tag_probab = f_score2probab(occurrence.score)
            
            # - build query -       
            sql_ins_cmd = sql_ins_cmd + ("\nINSERT INTO occurrences VALUES(%d, %d, %f, %f, %f);" % (id_tag, id_rec, tag_probab, occurrence.start, occurrence.end))

   # - insert occurrence into DB -
   if sql_ins_cmd != "":
      f_mysql_execute(sql_ins_cmd)

   return 0

# ------------------------------------------------------------------------------
def f_exit(a_ret_val):
   """ exit program """
   f_release_database(a_ret_val)
   sys.exit(a_ret_val)

# - PROGRAM START - ============================================================
if (len(sys.argv) < 2):
   wrn_msg("No tags were given as arguments")
   f_exit(0)

f_search_for_tags(sys.argv[1:])
f_print_records()
if f_insert_occurrences() != 0:
   err_msg("Cannot insert occurrences to database")
   f_exit(1)

f_exit(0)

