#!/usr/bin/python
# -*- coding: utf-8 -*-
### BEGIN LICENSE
# Copyright (C) 2012 Jobi Kea Carter keacarterdev@gmail.com
# This program is free software: you can redistribute it and/or modify it 
# under the terms of the GNU General Public License version 3, as published 
# by the Free Software Foundation.
# 
# This program is distributed in the hope that it will be useful, but 
# WITHOUT ANY WARRANTY; without even the implied warranties of 
# MERCHANTABILITY, SATISFACTORY QUALITY, or FITNESS FOR A PARTICULAR 
# PURPOSE.  See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License along 
# with this program.  If not, see <http://www.gnu.org/licenses/>.
### END LICENSE

##Future updates: consider converting everything to lowercase, so ok returns OK... or make it so case does not matter would be better
##Look into data structures more size effecient than shelve



import shelve
import os

class edict:
	'''make sure to use set_db_directory() to set the directory to your db, or this will not work.'''
	def __init__(self):
		pass

	def set_db_directory(self,eng_db_directory,kanji_db_directory):
		self.eng_file = eng_db_directory
		self.kanji_file = kanji_db_directory
		self.eng_dic = shelve.open(self.eng_file,writeback=False) #open dic file 
		self.kanji_dic = shelve.open(self.kanji_file,writeback=False) #open dic file
	
	#this is used to create the dictionary dbs ##remember you are still using the headless edict file. 
	def make_dictionaries(self,edict_source,output_dir,eng_dic,kanji_dic): 
		'''removes and recreates the dictioanry dbs, needs an edict source as a list of lines, and a directory to output to.'''
		if output_dir[-1] == '/':
			output_dir = output_dir[-1]
		for row in edict_source: 
			self.add_kanji_and_readings_to_dic(kanji_dic,row,row)
			self.add_eng_to_dic(eng_dic,row,row)#add english definitions as key to return value list definitions (rows) containing that eng string
		eng_dic.close() #close the db file
		kanji_dic.close() #close the db file
		
	#the follwing methods are used for searching the databases, and if a key exists in the dictionary, it will return all matches for that key.
	def search(self,key):  
		def_list = []
		try: #try in kanji_dic
			def_list = self.kanji_dic[key]
		except:
			pass
		try: #try in eng dic
			def_list = def_list + self.eng_dic[key]
		except:
			pass
		if def_list:
			return sorted(list(set(def_list))) #return a sorted list with no duplicates
		else:
			return False

	def search_list(self,key_list):
		def_list = []
		for key in key_list:
			try:
				def_list = def_list + self.search(key)
			except:
				pass
		return sorted(list(set(def_list))) #return a sorted list with no duplicates
	
	def write_output(self,def_list,output_dir,filename,file_format):
		import os.path
		if file_format == 'csv':
			seperator = ','
		elif file_format == 'tsv':
			seperator = '\t'
		'''requires a list of definitions, output directory and a filename to write the csv.
		   it will check to see if the file already exists, and add a numerator 0000 long,
		   to note the version. '''
		if output_dir[-1] != '/':
			output_dir = output_dir + '/'
		default_filename = filename + '.' + file_format
		mod = 1
		while os.path.isfile(output_dir+default_filename):
			default_filename = default_filename.replace(str(mod-1).zfill(4),'')
			default_filename = default_filename[:-4] + str(mod).zfill(4) + '.' + file_format
			mod += 1
		csv = open(output_dir + default_filename, "w")
		CSVString = ''
		for definition in def_list: #make your string for the csv file
			c1 = self.grab_first_section(definition)
			definition = definition[len(c1)+1:]
			if '[' in self.grab_first_section(definition):
				c2 = self.grab_first_section(definition)
				definition = definition[len(c2)+1:]
			else:
				c2 = c1
			c3 = definition
			r = c1 + seperator + c2.replace('[','').replace(']','') + seperator + c3.replace('/','',1).replace(',',';')# + seperator
			if r[-1] == seperator:
				r = r[:-1]
			CSVString += r + '\n'
		csv.write(CSVString)
		csv.close()
		return default_filename
	
	
	#the followming methods may be used in various methods
	def grab_first_section(self,string):
		end = self.first_space(string)
		string = string[0:end]
		return string
	
	def grab_rest(self,string):
		pass
		end = self.first_space(string) + 1
		string = string[end:]
		return string
	
	def first_space(self,string,alternate_character=' '):
		'''returns the index of the first character in string which will either by default be a space, or the alternate character you choose'''
		if alternate_character in string:
				return(string.index(alternate_character))
		else:
			return 0
	
	def remove_between(self,string,opening='(',closing=')'):
		'''removes all instances of opening and closing strings and the content between them.'''
		if string.count(opening) != string.count(closing): #this should break the loop!
			print(string)
			return 'dummy string so no crash! look for me with code 123456789'
		removal = []
		def build_opclist(string): #creates a list of all opening and closing tags in order, so subversion > (open, but then (another opening before close)) < can be found
			op = [] #opening char index list
			cl = [] #closing char index list
			for i in range(len(string)):
				if string[i] == opening:
					op.append(i)
				elif string[i] == closing:
					cl.append(i)
			return op,cl
		op,cl = build_opclist(string)	
		for i in range(len(op)-1): #searches for subversive strings and then adds them to then removal list if found. 
			if op[i+1] < cl[i]: #if the string is subvesive, add it. 
				removal.append(string[op[i+1]:cl[i]+1]) #a subversive string can be found because the index an opening slice will be smaller than the previous closing slice index. 
				#e.g, in op: [0, 4, 12, 26, 33, 59, 63, 70], cl: [2, 17, 18, 38, 39, 61, 68, 80], you can see subversiveness in op[2] < cl[1], meaning, there is a second opening before it closes.
		if removal: #if there are any subversive strings, remove them and generate new op and cl
			for i in removal: #removes
				string = string.replace(i,'')
			op,cl = build_opclist(string) #generates a fresh op and cl if there was anything to remove previously (as in any subversions present)
		#otherwise, and even if the op and cl can be your slices to remove normal tweens
		for i in range(len(op)):
			removal.append(string[op[i]:cl[i]+1])
		for i in removal: #removes
			string = string.replace(i,'')
		return string.replace('  ',' ').replace('//','/')

	def remove_blank_space_at_beginning(self,string):
		'''this removes all spaces from the beginning of a string'''
		if string.count(' ') == len(string): #if you pass this a string composed only of spaces
			return 'It looks like you have found the secret bug! Please notify the creator of this program (see "About"), to collect your reward! (the reward being that he will slam his face against a wall trying to see what the hell went wrong. Thanks!'
		if string: #make sure there is a string, just in case... 
			while string[0] == ' ':
				string = string[1:]
			return string

	#the following methods are used only for creating the dictionary DBs	
	def make_eng_def_list(self,string):  
		'''This takes the string (which must be a dictionary row) and parses out the english definitions'''
		deflist = self.remove_between(self.grab_rest(self.grab_rest(string))).replace('\n','').split('/')
		rl = []
		for item in deflist: #build list of empty (composed only of spaces) strings
			if item.count(' ') == len(item):
				rl.append(item)
		for item in rl: #remove them from the list
			deflist.remove(item)			
		return filter(None, deflist)
	
	def add_eng_to_dic(self,dictionary,string,key_return): #make sure the elif key not in dic[key] prevent dplucates line works right
		'''dictionary should be eng_dic, string should be row, and key_return should be what you want the key to return''' #string here == row
		english_final = self.make_eng_def_list(string)
		for item in english_final:
			key = self.remove_blank_space_at_beginning(item)
			if not key in dictionary: #if the key does not already exist, create it as a list with the first key return in it
				dictionary[key] = ([key_return])
			else: #if key already is in dic
				if key_return not in dictionary[key]: #if the row is not already in the defs list
					dictionary[key].append(key_return) #add it
			if key[:3] == 'to ': #should change this to only check for to at beginning of string for security, though it should be fine. 
				key = key[3:]
				if not key in dictionary: #if the key does not already exist, create it as a list with the first key return in it
					dictionary[key] = ([key_return])
				elif key_return not in dictionary[key]: #else, if the key does exist, and the kanji we want to append to list of kanji is not already in there, then apend the kanji to the list
					dictionary[key].append(key_return)

	def add_kanji_and_readings_to_dic(self,dictionary,row,key_return):
		key = self.grab_first_section(row) #grab the kanji/word which begins the line
		if not key in dictionary: #if the key does not already exist, create it as a list with the first key return in it
			dictionary[key] = ([key_return])
		else: #if key already is in dic
			if key_return not in dictionary[key]: #if the row is not already in the defs list
				dictionary[key].append(key_return) #add it
		possible_reading = self.grab_first_section(self.grab_rest(row)) #grab the second section of the row
		if '[' in possible_reading and ']' in possible_reading: #if the possible reading is actually a reading
			key = possible_reading.replace(']','').replace('[','')
			if not key in dictionary: #if the key does not already exist, create it as a list with the first key return in it
				dictionary[key] = ([key_return])
			else: #if key already is in dic
				if key_return not in dictionary[key]: #if the row is not already in the defs list
					dictionary[key].append(key_return) #add it

#example use:
#~ edict = edict()
#~ edict.make_dictionaries()
#~ print(edict.search_list(['書く','test','話']))














