Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions jamdict/data/setup_jmdict.sql
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,14 @@ CREATE TABLE SenseGloss (
,FOREIGN KEY (sid) REFERENCES Sense(id)
);

CREATE TABLE SenseExample (
sid INTEGER
,text TEXT
,sentence_jpn
,sentence_eng
,FOREIGN KEY (sid) REFERENCES Sense(id)
);

-------------------------------------------------------------------------------------
-- INDICES - JMDict
-------------------------------------------------------------------------------------
Expand Down Expand Up @@ -231,3 +239,7 @@ CREATE INDEX SenseGloss_sid ON SenseGloss(sid);
CREATE INDEX SenseGloss_lang ON SenseGloss(lang);
CREATE INDEX SenseGloss_gend ON SenseGloss(gend);
CREATE INDEX SenseGloss_text ON SenseGloss(text);
CREATE INDEX SenseExample_sid ON SenseExample(sid);
CREATE INDEX SenseExample_text ON SenseExample(text);
CREATE INDEX SenseExample_sentence_jpn ON SenseExample(sentence_jpn);
CREATE INDEX SenseExample_sentence_eng ON SenseExample(sentence_eng);
68 changes: 61 additions & 7 deletions jamdict/jmdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,12 +406,7 @@ def __init__(self):

self.gloss: List[SenseGloss] = [] # <!ELEMENT gloss (#PCDATA | pri)*>

'''The example elements provide for pairs of short Japanese and
target-language phrases or sentences which exemplify the usage of the
Japanese head-word and the target-language gloss. Words in example
fields would typically not be indexed by a dictionary application.'''
# It seems that this field is not used anymore!
self.examples = [] # <!ELEMENT example (#PCDATA)>
self.examples: List[SenseExample] = [] # <!ELEMENT example (ex_srce,ex_text,ex_sent+)>

def __repr__(self):
return str(self)
Expand Down Expand Up @@ -455,6 +450,8 @@ def to_dict(self):
sd['dialect'] = self.dialect
if self.gloss:
sd['SenseGloss'] = [x.to_dict() for x in self.gloss]
if self.examples:
sd['examples'] = self.examples
return sd


Expand Down Expand Up @@ -541,6 +538,45 @@ def to_dict(self):
return gd


class SenseExample(object):
'''The example elements contain a Japanese sentence using the term
associated with the entry, and one or more translations of that sentence.
Within the element, the ex_srce element will indicate the source of the
sentences (typically the sequence number in the Tatoeba Project), the
ex_text element will contain the form of the term in the Japanese
sentence, and the ex_sent elements contain the example sentences.
DTD: <!ELEMENT example (ex_srce,ex_text,ex_sent+)>

<!ELEMENT ex_srce (#PCDATA)>
<!ELEMENT ex_text (#PCDATA)>
<!ELEMENT ex_sent (#PCDATA)>
<!ATTLIST ex_sent xml:lang CDATA "eng">
<!ATTLIST ex_srce exsrc_type CDATA #IMPLIED>'''
def __init__(self, text, sentences):
self.text = text
self.sentences = sentences

def __str__(self):
tmp = [self.text]

if len(self.sentences) > 0:
tmp.append(self.sentences['jpn'])

for lang,sent in self.sentences.items():
if lang != 'jpn':
tmp.append('(lang:%s) %s' % (lang, sent))

return ' - '.join(tmp)

def to_dict(self):
gd = {}
if self.text:
gd['text'] = self.text
if self.sentences:
gd['sentences'] = self.sentences
return gd


class LSource:
'''This element records the information about the source
language(s) of a loan-word/gairaigo. If the source language is other
Expand Down Expand Up @@ -780,7 +816,7 @@ def parse_sense(self, sense_tag, entry):
elif child.tag == 'dial':
sense.dialect.append(child.text)
elif child.tag == 'example':
sense.examples.append(child.text)
self.parse_example(child, sense)
elif child.tag == 'lsource':
self.parse_lsource(child, sense)
elif child.tag == 'gloss':
Expand All @@ -798,6 +834,24 @@ def get_attrib(self, a_tag, attr_name, default_value=''):
else:
return default_value

def parse_example(self, example_tag, sense):
srce = ''
text = ''
sentences = {}

for child in example_tag:
if child.tag == 'ex_srce':
srce = child.text
elif child.tag == 'ex_text':
text = child.text
elif child.tag == 'ex_sent':
lang = self.get_attrib(child, 'xml:lang')
sentences[lang] = child.text

example = SenseExample(text, sentences)
sense.examples.append(example)
return example

def parse_sensegloss(self, gloss_tag, sense):
lang = self.get_attrib(gloss_tag, 'xml:lang')
gend = self.get_attrib(gloss_tag, 'g_gend')
Expand Down
9 changes: 8 additions & 1 deletion jamdict/jmdict_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from puchikarui import Schema
from . import __version__ as JAMDICT_VERSION, __url__ as JAMDICT_URL
from .jmdict import Meta, JMDEntry, EntryInfo, Link, BibInfo, Audit, KanjiForm, KanaForm, Sense, SenseGloss, LSource
from .jmdict import Meta, JMDEntry, EntryInfo, Link, BibInfo, Audit, KanjiForm, KanaForm, Sense, SenseGloss, SenseExample, LSource

# -------------------------------------------------------------------------------
# Configuration
Expand Down Expand Up @@ -82,6 +82,7 @@ def __init__(self, db_path, *args, **kwargs):
self.add_table('SenseSource', ['sid', 'text', 'lang', 'lstype', 'wasei'])
self.add_table('dialect', ['sid', 'text'])
self.add_table('SenseGloss', ['sid', 'lang', 'gend', 'text'])
self.add_table('SenseExample', ['sid', 'text', 'sentence_jpn', 'sentence_eng'])


class JMDictSQLite(JMDictSchema):
Expand Down Expand Up @@ -268,6 +269,10 @@ def get_entry(self, idseq, ctx=None):
gs = ctx.SenseGloss.select('sid=?', (dbs.ID,))
for g in gs:
s.gloss.append(SenseGloss(g.lang, g.gend, g.text))
# examples
exs = ctx.SenseExample.select('sid=?', (dbs.ID,))
for e in exs:
s.examples.append(SenseExample(e.text, {"jpn":e.sentence_jpn, "eng":e.sentence_eng}))
entry.senses.append(s)
return entry

Expand Down Expand Up @@ -359,3 +364,5 @@ def insert_entry(self, entry, ctx=None):
# SenseGloss
for g in s.gloss:
ctx.SenseGloss.insert(sid, g.lang, g.gend, g.text)
for e in s.examples:
ctx.SenseExample.insert(sid, e.text, e.sentences['jpn'], e.sentences['eng'])
3 changes: 3 additions & 0 deletions jamdict/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ def dump_result(results, report=None):
report.print("-" * 20)
for idx, s in enumerate(e.senses):
report.print("{idx}. {s}".format(idx=idx + 1, s=s))
for idx, e in enumerate(s.examples):
report.print("\t{idx}. {e}".format(idx=idx + 1, e=e.sentences['jpn']))
report.print("\t {e:>{spaces}}".format(spaces=len(str(idx))+len(e.sentences['eng']), e=e.sentences['eng']))
report.print('')
else:
report.print("No dictionary entry was found.")
Expand Down