import mysql.connector
import numpy
import sys
import os
import datetime
from bs4 import BeautifulSoup

# spusteni: 
    # python data_insert.py

def parseProtocol(fileName, protocolContent, countSession):

    # vytvoreni BeautifulSoup objektu (dokument jako vnorena datova struktura)
    soup = BeautifulSoup(protocolContent, 'html.parser')

    # ziskani 55 jmen volicu
    voters = []
    for voter in soup.find_all('td', class_='votename'):
        name = voter.contents[0]
        # z HTML: <td nowrap class="votename">&nbsp;</td>
        if name.isspace() != True:
            voters.insert(len(voters), name[:-1])

    # {'Přítomno': cnt, 'Ano': cnt, 'Ne': cnt, 'Zdržel se': cnt, 'Nehlasoval': cnt}
    summaryVote = dict()
    # {'clen_ZMB': (politicka_strana, vysledek_volice)}
    votersVotes = dict.fromkeys(voters, ('', ''))

    idx = 0
    offset = 0
    party = ''
    vote = ''
    session = ''

    # ziskani spravnych indexu tabulek
    max = len(soup.find_all('table'))
    # [5, 8, 11, 14, 17, 20]
    parties = numpy.arange(5, max, 3).tolist()
    # [6, 9, 12, 15, 18, 21]
    votes = numpy.arange(6, max, 3).tolist()

    for table in soup.find_all('table'):
        # ziskani data, casu a predmetu hlasovani
        if (idx == 0):
            number = table.find_all('p')[1].contents[0].split()[2]
            try:
                session = table.p.contents[0].split()[4]
            except IndexError:
                print('Error: prazdne cislo zasedani')
            datetime = table.p.contents[2].split()
            date = datetime[0]
            time = datetime[2]
            subject = table.find_all('p', class_='subject')[0].contents[0]
    
        # ziskani vysledku hlasovani
        if (idx == 1):
            result = table.find_all('b')[0].contents[0]
    
        # ziskani souhrnnych cisel o hlasovani
        if (idx == 2):
            for row in table.find_all('td'):
                if int(countSession) < 20:
                    if offset > 1:
                        item = row.contents[0].split(':')
                        if item[1] == ' ':
                            summaryVote[item[0]] = -1
                        else:
                            summaryVote[item[0]] = int(item[1].strip())
                    offset = offset + 1
                else:
                    # zmena struktury protokolu od zmb-z8-20
                    if offset > 0:
                        item = row.contents[0].split(':')
                        if item[1] == ' ':
                            summaryVote[item[0]] = -1
                        else:
                            summaryVote[item[0]] = int(item[1].strip())
                    offset = offset + 1
                    
            
        # ziskani politickych stran
        if (idx in parties):
            party = table.find_all('th')[0].contents[0]
        
        # ziskani volicu a jejich vysledku hlasovani
        if (idx in votes):
            for row in table.find_all('td'):
                if row.get('class')[0] == 'votename':
                    name = row.contents[0]
                if row.get('class')[0] == 'votechoice':
                    if len(row.contents) == 0:
                        vote = "nepřít."
                    else:
                        vote = row.contents[0]
                    # novejsi zapisy maji odpoved obalenou <span>...</span>
                    if 'span' in str(vote):
                        vote = row.text
                if name.isspace() != True:
                    votersVotes[name[:-1]] = (party, vote)
    
        idx = idx + 1
    
    oneProtocol = dict({'fileName': fileName, 'isValid': 1, 'number' : number, 'session' : session, 'date': date, 'time': time, 'subject' : subject, 'result' : result, 'summaryVote' : summaryVote,'votersVotes' : votersVotes})
    return voters, oneProtocol


def checkProtocols(protocols):

    # prazdny predmet hlasovani => nevim, o cem se hlasovalo
    for protocol in protocols:
        if str(protocol['subject']).strip() == '':
            protocol['isValid'] = 0
    
    for x in range(len(protocols)-1, 1, -1):
        # hlasovani se stejnym number => bere se novejsi
        if protocols[x]['number'] == protocols[x-1]['number']:
            protocols[x-1]['isValid'] = 0
        # neplatna hlasovani, tj. dva po sobe jdouci potokoly se stejnym predmetem hlasovani => take se bere novejsi
        if protocols[x]['subject'] == protocols[x-1]['subject']:
            protocols[x-1]['isValid'] = 0
    
    # ziskani nejcastejsi 'session' a 'date'
    dictSessions = {}
    dictDates = {}
    for protocol in protocols:
        # vyskyty 'date'
        if protocol['date'] not in dictDates:
            dictDates.update({protocol['date'] : 0})
        else:
            i = dictDates[protocol['date']]
            dictDates[protocol['date']] = i + 1
        
        # vyskyty 'session'
        if protocol['session'] not in dictSessions:
            dictSessions.update({protocol['session'] : 1})
        else:
            i = dictSessions[protocol['session']]
            dictSessions[protocol['session']] = i + 1

    finalDate = max(dictDates, key=dictDates.get)
    finalSession = max(dictSessions, key=dictSessions.get).split('/')

    return finalDate, finalSession, protocols


def insertIntoDb(finalSession, finalDate, finalVoters, finalProtocols):

    # vytvorení spojeni s databazi
    mydb = mysql.connector.connect(
    host="localhost",
    user="root",
    password="",
    port=3308,
    database="zmb_hlasovani"
    )

    # kontrola, zda existuje v DB cislo zastupitelstva
    cursor = mydb.cursor(buffered=True)
    val = (finalSession[0][1:])
    cursor.execute("SELECT * FROM zmb WHERE poradi_ZMB=" + val)
    row = cursor.fetchone()

    if row is None:
        print("Error: cislo zastupitelstva neni v DB")
    
    # zapis data zasedani do databaze
    finalDate = finalDate.split('.')    # [den, mesic, rok]
    finalDate = datetime.date(int(finalDate[2]), int(finalDate[1]), int(finalDate[0]))
    sql = "SELECT * FROM zasedani_zmb WHERE ck_poradi_ZMB=%s AND cislo_zasedani=%s"
    cursor.execute(sql, (finalSession[0][1:], finalSession[1]))
    row = cursor.fetchone()

    if row is None:
        sql = "INSERT INTO zasedani_zmb (ck_poradi_ZMB, cislo_zasedani, datum_zasedani) VALUES (%s, %s, %s)"
        cursor.execute(sql, (finalSession[0][1:], finalSession[1], finalDate))
        mydb.commit()

    # seznam politickych stran (z prvniho protokolu)
    finalParties = []
    for value in finalProtocols[0]['votersVotes'].values():
        if value[0] not in finalParties:
            finalParties.append(value[0])
        
    # kontrola, zda vsechny pol. strany existuji v DB
    for party in finalParties:
        sql = "SELECT * FROM politicka_strana WHERE zkratka=%s"
        cursor.execute(sql, (str(party),))
        row = cursor.fetchone()

        # => pokud ne, insert
        if row is None:
            sql = "INSERT INTO politicka_strana (zkratka) VALUES (%s)"
            cursor.execute(sql, (str(party),))
            mydb.commit()


    # kontrola, zda vsichni clenove existuji v DB
    for voter in finalVoters:
        voter = voter.split()

        sql = "SELECT id FROM clen_zmb WHERE jmeno_clena=%s AND prijmeni_clena=%s"
        cursor.execute(sql, (str(voter[0]), str(voter[1])))
        record = cursor.fetchone()

        # zjisteni politicke strany (z prvniho protokolu)
        voterParty = finalProtocols[0]['votersVotes'][str(voter[0]) + ' ' + str(voter[1])][0]
        
        sql = "SELECT id FROM politicka_strana WHERE zkratka=%s"
        cursor.execute(sql, (str(voterParty),))
        row = cursor.fetchone()
        voterPartyId = row[0]

        if record is None:
            # pokud ne => insert 1) mezi cleny_zmb 
            sql = "INSERT INTO clen_zmb (jmeno_clena, prijmeni_clena) VALUES (%s, %s)"
            cursor.execute(sql, (str(voter[0]), str(voter[1])))
            mydb.commit()

            sql = "SELECT id FROM clen_zmb WHERE jmeno_clena=%s AND prijmeni_clena=%s"
            cursor.execute(sql, (str(voter[0]), str(voter[1])))
            row = cursor.fetchone()
            voterId= row[0]

            # 2) k prislusne strane
            sql = "INSERT INTO prislusi_k (ck_poradi_ZMB, ck_clen_ZMB, ck_politicka_strana, datum_od, datum_do) VALUES (%s, %s, %s, %s, %s)"
            cursor.execute(sql, (finalSession[0][1:], voterId, voterPartyId, finalDate, finalDate))
            mydb.commit()

        else:
            voterId = record[0]

            sql = "SELECT * FROM prislusi_k WHERE ck_poradi_ZMB=%s AND ck_clen_ZMB=%s ORDER BY id DESC"
            cursor.execute(sql, (finalSession[0][1:], voterId))
            row = cursor.fetchone()

            # stejna strana
            if voterPartyId == row[3]:
                # pokud ano a datum do kdy je mensi => update
                if finalDate > row[5]:
                    sql = "UPDATE prislusi_k SET datum_do=%s WHERE id=%s"
                    cursor.execute(sql, (finalDate, str(row[0])))
                    mydb.commit()
            else:
                # pokud zastupitel zmenil prislusnost ke strane => insert noveho zaznamu
                sql = "INSERT INTO prislusi_k (ck_poradi_ZMB, ck_clen_ZMB, ck_politicka_strana, datum_od, datum_do) VALUES (%s, %s, %s, %s, %s)"
                cursor.execute(sql, (finalSession[0][1:], voterId, voterPartyId, finalDate, finalDate))
                mydb.commit()


    # slovniky ID pro budouci inserty
    dictParties = dict.fromkeys(finalParties, 0)
    dictVoters = dict.fromkeys(finalVoters, 0)
    dictResultOptions = {}
    dictVoteOptions = {}

    sql = "SELECT id, zkratka FROM politicka_strana"
    cursor.execute(sql)
    for id, zkratka in cursor:
        dictParties[zkratka] = id
    
    sql = "SELECT id, jmeno_clena, prijmeni_clena FROM clen_zmb"
    cursor.execute(sql)
    for id, jmeno_clena, prijmeni_clena in cursor:
        dictVoters[jmeno_clena + ' ' + prijmeni_clena] = id

    sql = "SELECT id, text FROM vysledek_hlasovani"
    cursor.execute(sql)
    for id, text in cursor:
        dictResultOptions[text] = id

    sql = "SELECT id, text FROM vysledek_volice"
    cursor.execute(sql)
    for id, text in cursor:
        dictVoteOptions[text] = id
    
    # zpracovani po protokolech
    for protocol in finalProtocols:

        sql = "SELECT * FROM hlasovani WHERE ck_poradi_ZMB=%s AND cislo_zasedani=%s AND cislo_hlasovani=%s"
        cursor.execute(sql, (finalSession[0][1:], finalSession[1], protocol['number']))
        row = cursor.fetchone()

        if row is None:

            # spravny format pro time - 00:00:00
            finalTime = protocol['time'].split(':')    # [hodiny, minuty, sekundy]
            finalTime = datetime.time(int(finalTime[0]), int(finalTime[1]), int(finalTime[2]))

            # insert do tabulky 'hlasovani'
            sql = "INSERT INTO hlasovani (jmeno_souboru, validni_hlasovani, ck_poradi_ZMB, cislo_zasedani, cislo_hlasovani, cas, predmet_hlasovani, ck_vysledek_hlasovani) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
            cursor.execute(sql, (protocol['fileName'], protocol['isValid'], finalSession[0][1:], finalSession[1], protocol['number'], finalTime, str(protocol['subject']).strip(), dictResultOptions[protocol['result']]))
            mydb.commit()

            # ziskani id hlasovani
            sql = "SELECT id FROM hlasovani WHERE ck_poradi_ZMB=%s AND cislo_zasedani=%s AND cislo_hlasovani=%s"
            cursor.execute(sql, (finalSession[0][1:], finalSession[1], protocol['number']))
            row = cursor.fetchone()
            idForNumber = row[0]
        else:
            idForNumber = row[0]

        # zpracovani po hlasech volicu
        for voter in protocol['votersVotes']:

            # overeni existence zaznamu
            sql = "SELECT * FROM hlasovani_clena WHERE ck_cislo_hlasovani=%s AND ck_clen_ZMB=%s"
            cursor.execute(sql, (idForNumber, dictVoters[voter]))
            row = cursor.fetchone()

            if row is None:
                # pokud moznost hlasovani dosud neexistuje v DB
                if protocol['votersVotes'][voter][1] not in dictVoteOptions:
                    # zapis nove moznosti hlasovani
                    sql = "INSERT INTO vysledek_volice (text) VALUES (%s)"
                    cursor.execute(sql, (str(protocol['votersVotes'][voter][1]),))
                    mydb.commit()
                    
                    # zjisteni id nove moznosti hlasovani
                    sql = "SELECT id FROM vysledek_volice WHERE text=%s"
                    cursor.execute(sql, (str(protocol['votersVotes'][voter][1]),))
                    row = cursor.fetchone()

                    dictVoteOptions[protocol['votersVotes'][voter][1]] = row[0]
             
                # insert do tabulky 'hlasovani_clena'
                sql = "INSERT INTO hlasovani_clena (ck_cislo_hlasovani, ck_clen_ZMB, ck_vysledek_volice) VALUES (%s, %s, %s)"
                cursor.execute(sql, (idForNumber, dictVoters[voter], dictVoteOptions[protocol['votersVotes'][voter][1]]))
                mydb.commit()

    # uklid na zaver
    cursor.close()
    mydb.close()

def mySort(e):
    return int(e.split('-')[1])

def parseFolder(protocolsFolder):
    files = []
    route = os.path.join(os.getcwd(), protocolsFolder)
    try:
        # cesta, kde je skript + slozka, ve ktere jsou protokoly
        os.path.isdir(route)

        # ziskani listu souboru ze slozky
        for entry in os.scandir(route):
            if not entry.name.startswith('.') and entry.is_file():
                files.append(entry.name)        
        if 'log.html' in files:
            files.remove('log.html')
        if 'znak.gif' in files:
            files.remove('znak.gif')
        files.sort(key=mySort)

        # pro vypis, ktera slozka se zpracovava
        print("\n\n----------\nZpracovani zasedani ", protocolsFolder, "\npocet protokolu: ", len(files), "\n")

        # otevreni a nacteni souboru
        protocols = []
        finalVoters = []
        for file in files:
            route = os.path.join(protocolsFolder, file)
            
            # problem s kodovanim
            if protocolsFolder in ['Do_db\zmb-z8-01', 'Do_db\zmb-z8-16', 'Do_db\zmb-z8-30', 'Do_db\zmb-z8-31', 'Do_db\zmb-z8-32', 'Do_db\zmb-z8-33', 'Do_db\zmb-z8-34', 'Do_db\zmb-z8-35', 'Do_db\zmb-z8-36', 'Do_db\zmb-z8-37', 'Do_db\zmb-z8-38', 'Do_db\zmb-z8-39', 'Do_db\zmb-z8-40']:
                f = open(route, 'r', encoding='utf8')
            else:
                f = open(route, 'r')

            protocolContent = f.read()

            voters = finalVoters
            finalVoters, oneProtocol = parseProtocol(file, protocolContent, protocolsFolder.split("-")[2])

            protocols.append(oneProtocol)

            if voters != [] and voters != finalVoters:
                print("Error: nesrovnalost ve volicich")
            
        
        # kontrola chyb v protokolech
        finalDate, finalSession, finalProtocols = checkProtocols(protocols)

        # zapis do databaze
        insertIntoDb(finalSession, finalDate, finalVoters, finalProtocols)
                
    except FileNotFoundError:
        print("Error: neplatna cesta ke slozce s protokoly")

if __name__ == "__main__":

    # vsechna zasedani
    #numbers = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "40"]
    # pouze jedno zasedani
    numbers = ["40"]

    for number in numbers:
        parseFolder("Do_db\zmb-z8-" + number)
