dotfiles

My dotfiles
git clone https://git.neuralcrash.com/dotfiles.git
Log | Files | Refs

shinmeikai.py (3361B)


      1 # coding: utf8
      2 import re, os
      3 from . import jpParser
      4 from aqt import mw
      5 from aqt.utils import showInfo
      6 from aqt.qt import *
      7 
      8 #parserObj = parser.Parser()
      9 source_directory = os.path.join(os.path.dirname(__file__),'data')
     10 
     11 dictFileNamePrefix = os.path.join(source_directory, "Shinmeikai", "shinmeikai_")
     12 
     13 amountOfDictionaryFiles = 9
     14 
     15 def getDefsOfWord(word, conf):
     16     #return u"私はバカです"
     17     return searchForWordInShinmeikai(word, conf)
     18 
     19 def searchForWordInShinmeikai(searchTerm, conf):
     20     dictionaryCounter = 1
     21     defArray = []
     22     definitionFound = False
     23     rtkKeywordList = []
     24 
     25     if jpParser.stringContainsKanji(searchTerm):
     26         regex = "(\[\""+searchTerm+"\"(,.*?){7}\])" #With kanji
     27     else:
     28         regex = "(\[\".{0,8}\",\""+searchTerm+"\"(,.*?){6}\])" #Without kanji
     29 
     30     #Looks for matches in all shinmeikai dictionary files.
     31     while True:
     32         dict = open(dictFileNamePrefix +str(dictionaryCounter)+".txt", "rb")
     33         contents = dict.read().decode("UTF-8")
     34         dict.close()
     35 
     36         pattern = re.compile(regex, re.UNICODE)
     37         export = pattern.findall(contents)
     38         #return pattern.pattern
     39         #return str(export).decode('unicode-escape')
     40         if len(export) >= 1:
     41             definitionFound = True
     42             for x in export:
     43                 #after = x
     44                 #after.encode("UTF-8")
     45                 #return str(x).decode('unicode-escape')
     46 				########### innerDef = extractDefFromElement(str(x).decode('unicode-escape'))
     47                 innerDef = extractDefFromElement(str(x))
     48                 hira = extractHiraFromElement(innerDef)
     49                 kanji = extractKanjiFromElement(innerDef)
     50 
     51                 if kanji == None:#If word doesnt have kanji, search with hira for both fields
     52                     kanji = hira
     53                 freq = jpParser.getWordFreq(hira, kanji)
     54 
     55                 defArray.append((innerDef, int(freq)))
     56         if dictionaryCounter >= amountOfDictionaryFiles:
     57             if definitionFound == False:
     58                 defArray = ""
     59             break
     60         dictionaryCounter += 1
     61     return (defArray)
     62 
     63 def extractDefFromElement(org):
     64     regex = "(.*)\[.*\[\"(.*?)\"\]"
     65     matchObj = re.match(regex, org)
     66     if matchObj == None:
     67         returnValue = None
     68     else:
     69         returnValue = matchObj.group(2)
     70         returnValue = re.sub(r'\\\\', r'\\', returnValue)
     71     return returnValue
     72 
     73 def extractHiraFromElement(org):
     74     regex = "(.*?) "
     75     matchObj = re.match(regex, org)
     76     if matchObj == None:
     77         returnValue = None
     78     else:
     79         returnValue = matchObj.group(1)
     80     return returnValue
     81 
     82 
     83 def extractKanjiFromElement(org):
     84     regex = ".*?【(.*?)】"
     85     matchObj = re.match(regex, org)
     86     if matchObj == None:
     87         return None
     88     else:
     89         returnValue = matchObj.group(1)
     90 
     91     #If more than one kanji, ex(内・家) for うち
     92     #return an array of kanji, It seems like the "・" character doesnt work with regex?
     93     #Currently only handles two different words, like うち -> 家 and 内 works
     94     matchObj = re.search("・", returnValue)
     95     if matchObj != None: #Multiple kanji found
     96         matchObj = re.match("(.*?)・(.*?) ", returnValue+" ")
     97         if matchObj != None:
     98             returnValue = []
     99             returnValue.append(matchObj.group(1))
    100             returnValue.append(matchObj.group(2))
    101     return returnValue