15if pyversion[:3]
in [
'2.6',
'2.7']:
16 import urllib
as urllib_request
26elif pyversion[:2] ==
'3.':
36 return [
unichr(int(i[2:7], 16))
for i
in args
if i[2:7]]
41SCIM_TABLES_VER =
'0.5.13'
42SCIM_PINYIN_VER =
'0.5.92'
49 print(
'File %s is up to date.' % dest)
57 print(
'Downloading from [%s] ...' % url)
59 print(
'Download complete.\n')
65 print(
'Extracting %s ...' % name)
70 if pyversion[:1]
in [
'2']:
71 fc = open(name,
'rb', encoding,
'ignore')
73 fc = open(name,
'r', encoding=encoding, errors=
'ignore')
76unzip =
lambda path, member, encoding =
'U8': \
79untargz =
lambda path, member, encoding =
'U8': \
84 if beginmark
and endmark:
99 elif len(elems[0]) > 1
and len(elems[pos]) > 1:
105 """ Read file from scim-tables and parse it. """
106 global SCIM_TABLES_VER
107 src =
'scim-tables-%s/tables/zh/%s' % (SCIM_TABLES_VER, name)
109 return parserCore(fp, 1,
'BEGIN_TABLE',
'END_TABLE')
117 """ Read phrase_lib.txt and parse it. """
118 global SCIM_PINYIN_VER
119 src =
'scim-pinyin-%s/data/phrase_lib.txt' % SCIM_PINYIN_VER
125 """ Read tsi.src and parse it. """
126 src =
'libtabe/tsi-src/tsi.src'
127 fp =
untargz(path, src,
'big5hkscs')
132 """ Read Unihan_Variants.txt and parse it. """
133 fp =
unzip(path,
'Unihan_Variants.txt',
'U8')
145 if type ==
'kTraditionalVariant':
146 s2t[elems[0]] = elems[1:]
147 elif type ==
'kSimplifiedVariant':
148 t2s[elems[0]] = elems[1:]
154 """ Apply exclude rules from path to mlist. """
155 if pyversion[:1]
in [
'2']:
156 excludes = open(path,
'rb',
'U8').read().split()
158 excludes = open(path,
'r', encoding=
'U8').read().split()
159 excludes = [
word.split(
'#')[0].strip()
for word
in excludes]
160 excludes =
'|'.join(excludes)
168 fp = open(path,
'r', encoding=
'U8')
173 yield elems[0], elems[1:]
178 if pyversion[:1]
in [
'2']:
180 for i
in range(1, len(t)):
184 for i
in range(1, len(t)):
190 fp = open(path,
'r', encoding=
'U8')
194 f = t = elems[0].strip()
206 texcptn =
re.compile(
'^(?:%s)$' %
'|'.join(texc))
207 if pyversion[:1]
in [
'2']:
208 for (tmp_f, tmp_t)
in table.copy().iteritems():
219 fp = open(path,
'r', encoding=
'U8')
227 ret[elems[0]] = elems[1]
238 for j
in range(len(text) - i, 0, -1):
242 text = text[:i] + t + text[i:][j:]
250 fp = open(path,
'r', encoding=
'U8')
253 wordlist = [
line.split(
'#')[0].strip()
for line
in fp]
254 wordlist =
list(set(wordlist))
261 reconv_table[word] = out_table[word] = word
262 reconv_table[new_word] = out_table[new_word] = word
268 wordlist =
list(src_wordlist)
271 word_reconv_table = {}
274 tomanyptn =
re.compile(
'(?:%s)' %
'|'.join(src_tomany))
279 new_word_len = word_len = len(word)
280 while new_word_len == word_len:
281 test_word =
translate(word, reconv_table)
284 (test_word != word
or
286 word !=
translate(new_word, reconv_table))):
287 word_conv_table[word] = new_word
288 word_reconv_table[new_word] = word
293 new_word_len = len(word)
294 return word_reconv_table
298 lines = [
'\'%s\' => \'%s\',' % (f, t)
for (f, t)
in table
if f
and t]
299 return '\n'.join(lines)
304 url =
'https://www.unicode.org/Public/%s/ucd/Unihan.zip' % UNIHAN_VER
305 han_dest =
'Unihan-%s.zip' % UNIHAN_VER
308 sfurlbase =
'http://%s.dl.sourceforge.net/sourceforge/' % SF_MIRROR
311 url = sfurlbase +
'scim/scim-tables-%s.tar.gz' % SCIM_TABLES_VER
312 tbe_dest =
'scim-tables-%s.tar.gz' % SCIM_TABLES_VER
316 url = sfurlbase +
'scim/scim-pinyin-%s.tar.gz' % SCIM_PINYIN_VER
317 pyn_dest =
'scim-pinyin-%s.tar.gz' % SCIM_PINYIN_VER
321 url = sfurlbase +
'libtabe/libtabe-%s.tgz' % LIBTABE_VER
322 lbt_dest =
'libtabe-%s.tgz' % LIBTABE_VER
333 if pyversion[:1]
in [
'2']:
344 t2s_1to1 =
removeRules(
'trad2simp_noconvert.manual', t2s_1to1)
345 s2t_1to1 =
removeRules(
'simp2trad_noconvert.manual', s2t_1to1)
355 s2t_1to1_supp, t2s_1to1_supp)
358 t2s_1to1_supp, s2t_1to1_supp)
371 s_wordlist =
applyExcludes(s_wordlist,
'simpphrases_exclude.manual')
372 t_wordlist =
applyExcludes(t_wordlist,
'tradphrases_exclude.manual')
381 s2t_1to1_supp, t2s_supp)
384 t2s_1to1_supp, s2t_supp)
389 if pyversion[:1]
in [
'2']:
392 t2s_1to1 = dict([(f, t)
for (f, t)
in t2s_1to1.items()
if f != t])
395 if pyversion[:1]
in [
'2']:
398 s2t_1to1 = dict([(f, t)
for (f, t)
in s2t_1to1.items()
if f != t])
410 * Simplified / Traditional Chinese conversion tables
412 * Automatically generated using code and data in maintenance/language/zhtable/
413 * Do not modify directly!
418namespace MediaWiki\Languages\Data;
421public static $zh2Hant = [\n'''
423 +
'\n];\n\npublic static $zh2Hans = [\n' \
425 +
'\n];\n\npublic static $zh2TW = [\n' \
427 +
'\n];\n\npublic static $zh2HK = [\n' \
429 +
'\n];\n\npublic static $zh2CN = [\n' \
433 if pyversion[:1]
in [
'2']:
434 f = open(
os.path.join(
'..',
'..',
'..',
'languages',
'data',
'ZhConversion.php'),
'wb', encoding=
'utf8')
436 f = open(
os.path.join(
'..',
'..',
'..',
'languages',
'data',
'ZhConversion.php'),
'w', buffering=4096, encoding=
'utf8')
437 print (
'Writing ZhConversion.php ... ')
442 print (
'Deleting temporary files ... ')
451if __name__ ==
'__main__':
and that you know you can do these things To protect your we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights These restrictions translate to certain responsibilities for you if you distribute copies of the or if you modify it For if you distribute copies of such a whether gratis or for a you must give the recipients all the rights that you have You must make sure that receive or can get the source code And you must show them these terms so they know their rights We protect your rights with two and(2) offer you this license which gives you legal permission to copy
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
while(( $__line=Maintenance::readconsole()) !==false) print
translate(text, conv_table)
manualWordsTable(path, conv_table, reconv_table)
defaultWordsTable(src_wordlist, src_tomany, char_conv_table, char_reconv_table)
applyExcludes(mlist, path)
parserCore(fp, pos, beginmark=None, endmark=None)
uncompress(fp, member, encoding='U8')
dictToSortedList(src_table, pos)