jpParser.py (2315B)
1 # coding: utf8 2 import re, os 3 from aqt.qt import * 4 from anki.hooks import addHook 5 from aqt import mw 6 7 from shinmeikai_definitions import config 8 from codecs import open 9 10 source_directory = os.path.join(os.path.dirname(__file__),'data') 11 wordFreqListFileName = os.path.join(source_directory, config.Config().wordFreqList) 12 kanjiListFileName = os.path.join(source_directory, config.Config().kanjiList) 13 14 # for words not found 15 veryLargeFreq = 999999 16 # for words in the frequency file, but without a frequency 17 emptyFrequency = 888888 18 19 20 21 def readDict(file, parseFun): 22 # parseFun should a tuple, the first is the key, the second the value 23 with open(file, encoding="utf-8") as f: 24 lines = [parseFun(line) for line in f if "\t" in line] 25 return dict(lines) 26 27 def readKeywords(file): 28 29 def parse_line(line): 30 f = line.rstrip().split("\t")[:2] 31 assert len(f) == 2 32 return (f[0], f[1]) 33 34 return readDict(file, parse_line) 35 36 def readFrequency(file): 37 38 def parse_line(line): 39 f = line.rstrip().split("\t")[:3] 40 assert len(f) >= 2 41 # if the second field does not exist, or 42 # is not a valid number 43 # simply set it to the largefreq 44 try: 45 freqInt = int(f[2]) 46 except: 47 freqInt = emptyFrequency 48 return ((f[0], f[1]), freqInt) 49 50 return readDict(file, parse_line) 51 52 53 kanjiList = readKeywords(kanjiListFileName) 54 55 kanjiFreq = readFrequency(wordFreqListFileName) 56 57 58 def getRTKKeyword(str): 59 kanji = filter (lambda a: a in kanjiList, list(str)) 60 # filter returns an iterator 61 mappedKanji = map(lambda a: a + ": " + kanjiList.get(a, "this should never happen"), kanji) 62 kanjiField = "</div><div>".join(mappedKanji) 63 return "" if (kanjiField == "") else ( "<div>" + kanjiField + "</div>") 64 65 def stringContainsKanji(searchTerm): 66 for c in searchTerm: 67 #Checks if codepoint of character is anything but hiragana/katakana 68 if ord(c) < 12353 or ord(c) > 12543: 69 return True 70 return False 71 72 def getWordFreq(strF, strK): 73 74 if isinstance(strK, list): 75 # iterate through each... find smallest 76 listFreq = map(lambda k: kanjiFreq.get((k, strF), veryLargeFreq), strK) 77 return min(listFreq) 78 else: 79 return kanjiFreq.get((strK, strF), veryLargeFreq)