dotfiles

My dotfiles
git clone https://git.neuralcrash.com/dotfiles.git
Log | Files | Refs

jpParser.py (2315B)


      1 # coding: utf8
      2 import re, os
      3 from aqt.qt import *
      4 from anki.hooks import addHook
      5 from aqt import mw
      6 
      7 from shinmeikai_definitions import config
      8 from codecs import open
      9 
     10 source_directory = os.path.join(os.path.dirname(__file__),'data')
     11 wordFreqListFileName = os.path.join(source_directory, config.Config().wordFreqList)
     12 kanjiListFileName = os.path.join(source_directory, config.Config().kanjiList)
     13 
     14 # for words not found
     15 veryLargeFreq  = 999999
     16 # for words in the frequency file, but without a frequency
     17 emptyFrequency = 888888
     18 
     19 
     20 
     21 def readDict(file, parseFun):
     22     # parseFun should a tuple, the first is the key, the second the value
     23     with open(file, encoding="utf-8") as f:
     24         lines = [parseFun(line) for line in f if "\t" in line]
     25     return dict(lines)
     26 
     27 def readKeywords(file):
     28 
     29     def parse_line(line):
     30         f = line.rstrip().split("\t")[:2]
     31         assert len(f) == 2
     32         return (f[0], f[1])
     33 
     34     return readDict(file, parse_line)
     35 
     36 def readFrequency(file):
     37 
     38     def parse_line(line):
     39         f = line.rstrip().split("\t")[:3]
     40         assert len(f) >= 2
     41         # if the second field does not exist, or
     42         # is not a valid number
     43         # simply set it to the largefreq
     44         try:
     45            freqInt = int(f[2])
     46         except:
     47             freqInt = emptyFrequency
     48         return ((f[0], f[1]), freqInt)
     49 
     50     return readDict(file, parse_line)
     51 
     52 
     53 kanjiList = readKeywords(kanjiListFileName)
     54 
     55 kanjiFreq = readFrequency(wordFreqListFileName)
     56 
     57 
     58 def getRTKKeyword(str):
     59     kanji = filter (lambda a: a in kanjiList, list(str))
     60     # filter returns an iterator
     61     mappedKanji = map(lambda a: a + ": " + kanjiList.get(a, "this should never happen"), kanji)
     62     kanjiField = "</div><div>".join(mappedKanji)
     63     return "" if  (kanjiField == "") else ( "<div>" + kanjiField + "</div>")
     64 
     65 def stringContainsKanji(searchTerm):
     66     for c in searchTerm:
     67         #Checks if codepoint of character is anything but hiragana/katakana
     68         if ord(c) < 12353 or ord(c) > 12543:
     69             return True
     70     return False
     71 
     72 def getWordFreq(strF, strK):
     73 
     74     if isinstance(strK, list):
     75         # iterate through each... find smallest
     76         listFreq = map(lambda k: kanjiFreq.get((k, strF), veryLargeFreq), strK)
     77         return min(listFreq)
     78     else:
     79         return kanjiFreq.get((strK, strF), veryLargeFreq)