123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222 |
- import re, argparse
- import sqlite3 as sql
- import logging
- import logging.handlers
- from os import listdir, mkdir
- from os.path import isfile, exists
- from urllib.parse import unquote
- #------------------------------------------------+
- # get_list_from_server_txt
- #------------------------------------------------+
- # Rows in the game server database are
- # occasionally concatenated into one line.
- # To simplify, they are unmerged.
- #
- # The final result is every row in the game server
- # database with its own index in a list.
- #------------------------------------------------+
- def get_list_from_server_txt(filename):
- def unmerge_rows(line, char, x):
- chunks = line.split(char)
- newrows = [char.join(chunks[:x]), char.join(chunks[x:])]
- # need to prefix each row with a char.
- # only the last row will be missing it.
- newrows[-1] = char + newrows[-1]
- if newrows[-1].count(char) > (x - 1):
- newrows += unmerge_rows(newrows.pop(), char, x)
- return newrows
- rows = []
- with open(filename, 'r') as f:
- # server database has a lot of newlines to ignore
- rows = [line for line in f if line != "\n"]
- output = []
- n = 3
- backslash = '\\'
- for row in rows:
- # The first and last column is prefixed with a backslash.
- # So multiple rows on one line should be split at the 3rd backslash.
- if row.count(backslash) > (n - 1):
- unmerged = unmerge_rows(row, backslash, n)
- for u in unmerged:
- output.append(u)
- else:
- output.append(row)
- return output
- def init_logging():
- filename = "_logs/dbimport-%s.log"
- i = 0
- while exists(filename % i):
- i += 1
- filename = filename % i
- f = open(filename, mode='a', encoding='utf-8')
- logging.basicConfig(stream=f, level=logging.DEBUG)
- return filename
- #------------------------------------------------+
- # Functions: Clean up.
- #------------------------------------------------+
- # Unlike other rows,
- # the separator character, '/' is part of the value of the second column.
- # so an ordinary match for '/' or '\' can not be done like the other types of rows.
- # example from game server db:
- # \/uid2name/Mnumg2Yh/yxNFDTqGI+YyhlM7QDI0fpEmAaBJ8cI5dU=\Tuxxy
- # it should become:
- # ["uid2name", "Mnumg2Yh/yxNFDTqGI+YyhlM7QDI0fpEmAaBJ8cI5dU=", "Tuxxy"]
- def uid2namefix(row):
- # quick fix
- # replace first and last occurrence of backslash
- # this results in [,/uid2name/cryptoid_fp, name]
- e = re.sub(r'^([^\\]*)\\|\\(?=[^\\]*$)', ',', row)
- # replace first two occurence of forward slash
- # this results in [,,uid2name,cryptoid_fp, name]
- ee = e.replace('/', ',', 2)
- # split on comma
- # but start from index 2 because the first commas are left over
- # c is now a list of strings.
- # ["uid2name", <crypto_idfp value>, <player name value>]
- c = ee[2:].split(',')
- c[2] = unquote(c[2])
- c[2] = c[2].strip('\n')
- return c
- # O(n) and organize cts related data into list of rows.
- def filters(db):
- tt = []
- tr = []
- ti = []
- rank_index = 2
- for d in db:
- if d.find("uid2name") != -1:
- ti.append(uid2namefix(d))
- else:
- # regex:
- # find substrings that do not contain backslash, forwardslash, or newline.
- e = re.findall(r'[^\\/\n]+', d)
- if d.find("cts100record/time") != -1:
- e[rank_index] = int(e[rank_index].replace("time", ""))
- tt.append(e)
- if d.find("cts100record/crypto_idfp") != -1:
- e[3] = unquote(e[3])
- e[rank_index] = int(e[rank_index].replace("crypto_idfp", ""))
- tr.append(e)
- if d.find("cts100record/speed") != -1:
- # print(d)
- # speed records - not implemented
- pass
- return tt, tr, ti
- #------------------------------------------------+
- # Functions: Database Creation
- #------------------------------------------------+
- def inserttodb(c, q, d):
- for x in d:
- # possible to do executemany
- # but want to be able to catch the problematic rows
- # as it is iterated through.
- # and proceed with adding OK rows.
- try:
- c.execute(q, x)
- except sql.ProgrammingError as e:
- print(e)
- print(x)
- #------------------------------------------------+
- # insert new data directly into new database file
- def i(d, s):
- con = sql.connect(d)
- with con:
- csr = con.cursor()
- try:
- times, ranks, ids = filters(get_list_from_server_txt(s))
- if times:
- inserttodb(csr, "INSERT OR REPLACE INTO Cts_times VALUES(?, ?, ?, ?)", times)
- logging.info('\n'.join(y for y in [str(x) for x in times]))
- if ranks:
- inserttodb(csr, "INSERT OR REPLACE INTO Cts_ranks VALUES(?, ?, ?, ?)", ranks)
- logging.info('\n'.join(y for y in [str(x) for x in ranks]))
- if ids:
- inserttodb(csr, "INSERT OR REPLACE INTO Id2alias VALUES(?, ?, ?)", ids)
- logging.info('\n'.join(y for y in [str(x) for x in ids]))
- except sql.Error:
- logging.exception("sql error encountered in function 'i'")
- if con:
- con.rollback()
- # 'insert' new data into a file i.e sql query file
- def f(d, s):
- with open(d, 'w', encoding='utf-8') as h:
- times, ranks, ids = filters(get_list_from_server_txt(s))
- for t in times:
- h.write("INSERT OR REPLACE INTO Cts_times VALUES(%s, %s, %s, %s)\n" % tuple(t))
- pass
- for r in ranks:
- h.write("INSERT OR REPLACE INTO Cts_ranks VALUES(%s, %s, %s, %s)\n" % tuple(r))
- pass
- for i in ids:
- h.write("INSERT OR REPLACE INTO Id2aslias VALUES(%s, %s, %s)\n" % tuple(i))
- pass
- pass
- pass
- # Test whether repeat rows are added.
- def duplicatestest(d, s):
- c = sql.connect(d)
- p = True
- with c:
- cs = c.cursor()
- try:
- logging.info("Inserting into database (1/2)")
- i(d, s)
- logging.info("Querying (1/2)")
- cs.execute("SELECT * FROM Cts_times")
- a = cs.fetchall()
- cs.execute("SELECT * FROM Cts_ranks")
- b = cs.fetchall()
- cs.execute("SELECT * FROM Id2alias")
- c = cs.fetchall()
- logging.info("Inserting into database (2/2)")
- i(d, s)
- logging.info("Querying (2/2)")
- cs.execute("SELECT * FROM Cts_times")
- x = cs.fetchall()
- cs.execute("SELECT * FROM Cts_ranks")
- y = cs.fetchall()
- cs.execute("SELECT * FROM Id2alias")
- z = cs.fetchall()
- if len(a) != len(x):
- logging.error("Issue with Cts_times")
- p = False
- if len(b) != len(y):
- logging.error("Issue with Cts_ranks")
- p = False
- if len(c) != len(z):
- logging.error("Issue with Id2alias")
- p = False
- if p:
- logging.info("Database ok - no repeat rows added.")
- except sql.Error:
- logging.exception("encountered sql error in function 'duplicate test'.")
- if __name__ == "__main__":
- ap = argparse.ArgumentParser()
- ap.add_argument('db')
- ap.add_argument('src')
- ap.add_argument('-t', '--test', action='store_true')
- ap.add_argument('-q', '--sql', action='store_true')
- args = ap.parse_args()
- log_file = init_logging()
- print("Writing log to ", log_file)
- if args.test:
- duplicatestest(args.db, args.src)
- if args.sql:
- f(args.db, args.src)
- else:
- i(args.db, args.src)
|