Модул:ja — разлика између измена
Изглед
Садржај обрисан Садржај додат
Нема описа измене |
Нема описа измене |
||
(9 међуизмена истог корисника није приказано) | |||
Ред 1: | Ред 1: | ||
local |
local m_str_utils = require("Модул:string utilities") |
||
local export = {} |
local export = {} |
||
local codepoint = |
local codepoint = m_str_utils.codepoint |
||
local concat = table.concat |
local concat = table.concat |
||
local find = |
local find = string.find |
||
local get_by_code = require("Модул:languages").getByCode |
local get_by_code = require("Модул:languages").getByCode |
||
local gsub = mw_str_utils.gsub |
|||
local insert = table.insert |
local insert = table.insert |
||
local len = mw_str_utils.len |
|||
local load_data = mw.loadData |
local load_data = mw.loadData |
||
local sub = mw_str_utils.sub |
|||
local toNFC = mw.ustring.toNFC |
local toNFC = mw.ustring.toNFC |
||
local toNFD = mw.ustring.toNFD |
local toNFD = mw.ustring.toNFD |
||
local u = |
local u = m_str_utils.char |
||
local ugsub = m_str_utils.gsub |
|||
local ulen = m_str_utils.len |
|||
local ulower = m_str_utils.lower |
|||
local umatch = mw.ustring.match |
|||
local usub = m_str_utils.sub |
|||
-- note that arrays loaded by mw.loadData cannot be directly used by gsub |
-- note that arrays loaded by mw.loadData cannot be directly used by gsub |
||
local pagename -- generated when needed, to avoid an infinite loop with [[Module:Jpan-sortkey]] |
local pagename -- generated when needed, to avoid an infinite loop with [[Module:Jpan-sortkey]] |
||
local namespace = mw.title.getCurrentTitle().nsText |
local namespace = mw.title.getCurrentTitle().nsText |
||
local data = load_data("Модул:ja/data") |
local data = load_data("Модул:ja/data") |
||
local |
local long_vowels_hira = data.long_vowels_hira |
||
local |
local long_vowels_kata = data.long_vowels_kata |
||
local voice_marks = data.voice_marks |
local voice_marks = data.voice_marks |
||
local specials = data.specials |
|||
local range = load_data("Модул:ja/data/range") |
local range = load_data("Модул:ja/data/range") |
||
local r_hiragana = range.hiragana |
|||
local r_vowels = range.vowels |
|||
export.data = { |
|||
local r_kana_combining_characters = range.kana_combining_characters |
|||
joyo_kanji = data.joyo_kanji, |
|||
jinmeiyo_kanji = data.jinmeiyo_kanji, |
|||
grade1 = data.grade1, |
|||
grade2 = data.grade2, |
|||
grade3 = data.grade3, |
|||
grade4 = data.grade4, |
|||
grade5 = data.grade5, |
|||
grade6 = data.grade6 |
|||
} |
|||
local function change_codepoint(added_value) |
local function change_codepoint(added_value) |
||
Ред 43: | Ред 38: | ||
end |
end |
||
function export.hira_to_kata(text) |
|||
-- Normalizes long vowels, iteration marks and non-combining voice marks to the standard equivalents. |
|||
if type(text) == "table" then |
|||
-- Note: output text is normalized to NFD. |
|||
text = text.args[1] |
|||
function export.normalize_kana(text) |
|||
text = toNFD(text) |
|||
local chars, text_len = {}, #text |
|||
local i, c, end_c, from, b = 0 |
|||
while i < text_len do |
|||
i = i + 1 |
|||
c = text:sub(i, i) |
|||
if c == "<" then |
|||
from = i |
|||
repeat |
|||
i = i + 1 |
|||
end_c = text:sub(i, i) |
|||
if end_c == ">" then |
|||
insert(chars, text:sub(from, i)) |
|||
break |
|||
elseif i == text_len then |
|||
i = from |
|||
insert(chars, c) |
|||
break |
|||
end |
|||
until false |
|||
else |
|||
b = c:byte() |
|||
if b <= 127 then |
|||
insert(chars, c) |
|||
else |
|||
from = i |
|||
repeat |
|||
i = i + 1 |
|||
b = text:sub(i, i):byte() |
|||
until not b or b <= 127 or b >= 194 |
|||
i = i - 1 |
|||
insert(chars, text:sub(from, i)) |
|||
end |
|||
end |
|||
end |
|||
local pos = 0 |
|||
local function do_iter(start, from, to) |
|||
local prev = chars[start - 1] |
|||
while start > 1 and not long_vowel[prev] do |
|||
start = start - 1 |
|||
prev = chars[start - 1] |
|||
end |
|||
start = start - 1 |
|||
insert(from, 1, start) |
|||
insert(to, pos) |
|||
return start |
|||
end |
end |
||
text = ugsub(toNFD(text), "[ぁ-ゖゝゞ]", change_codepoint(96)) |
|||
text = ugsub(text, "[𛅐-𛅒]", change_codepoint(20)) |
|||
repeat |
|||
text = ugsub(text, "[𛀁𛀆𛄟𛄲]", data.hira_to_kata) |
|||
pos = pos + 1 |
|||
local char = chars[pos] |
|||
if char == "ー" then |
|||
local start = pos |
|||
local prev = chars[pos - 1] |
|||
while start > 1 and not long_vowel[prev] do |
|||
start = start - 1 |
|||
prev = chars[start - 1] |
|||
end |
|||
chars[pos] = long_vowel[prev] or chars[pos] |
|||
elseif voice_marks[char] then |
|||
chars[pos] = voice_marks[char] |
|||
elseif iter_marks[char] then |
|||
local from, to = {}, {} |
|||
local start = do_iter(pos, from, to) |
|||
local next = chars[pos + 1] |
|||
while next and (iter_marks[next] or voice_marks[next] or specials[next] or next:sub(1, 1) == "<") do |
|||
pos = pos + 1 |
|||
if iter_marks[next] then |
|||
start = do_iter(start, from, to) |
|||
end |
|||
next = chars[pos + 1] |
|||
end |
|||
for i, char_pos in ipairs(from) do |
|||
local iter_pos = to[i] |
|||
chars[iter_pos] = chars[char_pos] or chars[iter_pos] |
|||
end |
|||
end |
|||
until pos >= #chars |
|||
return concat(chars) |
|||
end |
|||
function export.hira_to_kata(text) |
|||
if type(text) == "table" then text = text.args[1] end |
|||
text = gsub(text, '[ぁ-ゖゝゞ]', change_codepoint(96)) |
|||
text = gsub(text, '[𛅐-𛅒]', change_codepoint(20)) |
|||
text = gsub(text, '[𛀆𛄟]', {["𛀆"] = "𛄠", ["𛄟"] = "𛄢"}) |
|||
return toNFC(text) |
return toNFC(text) |
||
end |
end |
||
function export.kata_to_hira(text) |
function export.kata_to_hira(text) |
||
if type(text) == "table" then |
if type(text) == "table" then |
||
text = text.args[1] |
|||
end |
|||
text = |
text = ugsub(toNFD(text), "[ァ-ヶヽヾ]", change_codepoint(-96)) |
||
text = |
text = ugsub(text, "[𛅤-𛅦]", change_codepoint(-20)) |
||
text = |
text = ugsub(text, "[𛀀𛄠-𛄢𛅕]", data.kata_to_hira) |
||
return toNFC(text) |
return toNFC(text) |
||
end |
|||
function export.fullwidth_to_halfwidth(text) |
|||
if type(text) == "table" then text = text.args[1] end |
|||
return (gsub(text:gsub(' ', ' '), '[!-~]', change_codepoint(-65248))) |
|||
end |
end |
||
Ред 157: | Ред 62: | ||
-- insertion of spaces or hyphens in manual romaji without appearing "wrong" |
-- insertion of spaces or hyphens in manual romaji without appearing "wrong" |
||
function export.rm_spaces_hyphens(f) |
function export.rm_spaces_hyphens(f) |
||
local text = type(f) == |
local text = type(f) == "table" and f.args[1] or f |
||
return (text:gsub("[ '%-.]+", "") |
|||
:gsub(" ", "")) |
|||
return text |
|||
end |
end |
||
do |
|||
function export.romaji_to_kata(f) |
|||
local function handle_macron(ch) |
|||
local text = type(f) == 'table' and f.args[1] or f |
|||
return ch == "o" and "ou" or ch .. ch |
|||
text = text:ulower() |
|||
end |
|||
text = text:gsub('[\1-\255][\128-\191]*', data.rd) |
|||
text = text:gsub('(.)%1', { |
|||
function export.romaji_to_kata(f) |
|||
k = 'ッk', s = 'ッs', t = 'ッt', p = 'ッp', |
|||
local text = type(f) == "table" and f.args[1] or f |
|||
b = 'ッb', d = 'ッd', g = 'ッg', j = 'ッj' |
|||
text = ulower(toNFD(text)) |
|||
}) |
|||
text = text:gsub("(.[\128-\191]*)\204\132", handle_macron) |
|||
:gsub("(.)%1", "ッ%1") |
|||
:gsub("tc", "ッc") |
|||
text = text:gsub('ts[uoiea]', {['tsu']='ツ',['tso']='ツォ',['tsi']='ツィ',['tse']='ツェ',['tsa']='ツァ'}) |
|||
:gsub("tsyu", "ツュ") |
|||
text = text:gsub('sh[uoiea]', {['shu']='シュ',['sho']='ショ',['shi']='シ',['she']='シェ',['sha']='シャ'}) |
|||
:gsub("ts[uoiea]", {["tsu"]="ツ",["tso"]="ツォ",["tsi"]="ツィ",["tse"]="ツェ",["tsa"]="ツァ"}) |
|||
:gsub("sh[uoiea]", {["shu"]="シュ",["sho"]="ショ",["shi"]="シ",["she"]="シェ",["sha"]="シャ"}) |
|||
:gsub("ch[uoiea]", {["chu"]="チュ",["cho"]="チョ",["chi"]="チ",["che"]="チェ",["cha"]="チャ"}) |
|||
text = text:gsub('[wvtrpsnmlkjhgfdbzy][yw]?[uoiea]', data.rk) |
|||
:gsub("n[uoiea']?", {["nu"]="ヌ",["no"]="ノ",["ni"]="ニ",["ne"]="ネ",["na"]="ナ"}) |
|||
text = text:gsub("n'?", 'ン') |
|||
:gsub("[wvtrpsnmlkjhgfdbzy][yw]?[uoiea]", data.rk) |
|||
text = text:gsub('[aeiou]', { |
|||
:gsub("n'?", "ン") |
|||
u = 'ウ', o = 'オ', i = 'イ', e = 'エ', a = 'ア' |
|||
:gsub("[aeiou]", { |
|||
}) |
|||
u = "ウ", o = "オ", i = "イ", e = "エ", a = "ア" |
|||
return text |
|||
}) |
|||
return text |
|||
end |
|||
end |
end |
||
Ред 189: | Ред 96: | ||
-- e.g. given イギリス人, it returns Kana+Hani |
-- e.g. given イギリス人, it returns Kana+Hani |
||
function export.script(f) |
function export.script(f) |
||
local text = type(f) == |
local text = type(f) == "table" and f.args[1] or f |
||
local script = {} |
local script = {} |
||
-- For Hira and Kana, we remove any characters which also feature in the other first, so that we don't get false positives for ー etc. |
-- For Hira and Kana, we remove any characters which also feature in the other first, so that we don't get false positives for ー etc. |
||
local no_overlap = ugsub(text, "[" .. range.kana_overlap .. "]+", "") |
|||
insert(script, 'Hira') |
|||
if umatch(no_overlap, "[" .. r_hiragana .. "ゟ]") then |
|||
insert(script, "Hira") |
|||
end |
end |
||
if umatch(no_overlap, "[" .. range.katakana .. "ヿ]") then |
|||
insert(script, "Kana") |
|||
end |
|||
if umatch(text, "[" .. range.kanji .. "]") then |
|||
insert(script, "Hani") |
|||
end |
|||
if umatch(text, "[" .. range.latin .. "]") then |
|||
insert(script, "Romaji") |
|||
end |
|||
if umatch(text, "[" .. range.numbers .. "]") then |
|||
insert(script, "Number") |
|||
end |
|||
if umatch(text, "[〆々]") then |
|||
insert(script, "Abbreviation") |
|||
end |
|||
return concat(script, "+") |
|||
end |
|||
do |
|||
local submoraic = range.submoraic_kana .. r_kana_combining_characters |
|||
local spacing_punc = "%s%p%$%+=>%^`|~" |
|||
local function handle_spacing_punc(ch, mora) |
|||
if find(gsub(text, "[" .. range.hiragana .. "]+", ""), "[" .. range.katakana .. "]") then |
|||
insert( |
insert(mora, ch) |
||
if ch:match("[^%^%%']") then |
|||
mora.sp = true |
|||
end |
|||
return ch, mora |
|||
end |
end |
||
local function iterate_mora(text, start, morae, mora) |
|||
if find(text, "[" .. range.kanji .. "]") then |
|||
mora = mora or {} |
|||
insert(script, 'Hani') |
|||
local ch = umatch(text, "^[" .. spacing_punc .. "]+", start) |
|||
if ch then |
|||
return handle_spacing_punc(ch, mora) |
|||
end |
|||
ch = usub(text, start, start) |
|||
if ch == "<" then |
|||
ch = umatch(text, "^<.->", start) or umatch(text, "^[<" .. spacing_punc .. "]+", start) |
|||
return handle_spacing_punc(ch, mora) |
|||
elseif ( |
|||
mora.sp or |
|||
mora.kana and umatch(ch, "[^" .. submoraic .. "]") |
|||
) then |
|||
insert(morae, concat(mora)) |
|||
mora = {} |
|||
end |
|||
mora.kana = true |
|||
insert(mora, ch) |
|||
return ch, mora |
|||
end |
end |
||
-- Returns an array of morae. |
|||
if find(text, "[" .. range.latin .. "]") then |
|||
-- Small vowel kana (and any combining dakuten/handakuten) are grouped with any prior word characters, which should be kana. Non-word characters (spaces, punctuation etc.) are accounted for, and grouped with surrounding morae wherever possible. |
|||
insert(script, 'Romaji') |
|||
function export.moraify(text) |
|||
local morae, start, text_len, mora = {}, 1, ulen(text) |
|||
while start <= text_len do |
|||
local ch |
|||
ch, mora = iterate_mora(text, start, morae, mora) |
|||
start = start + ulen(ch) |
|||
end |
|||
if mora then |
|||
insert(morae, concat(mora)) |
|||
end |
|||
return morae |
|||
end |
end |
||
if find(text, '[' .. range.numbers .. ']') then |
|||
local function remove_formatting(text) |
|||
insert(script, 'Number') |
|||
return ugsub(text:gsub("<.->", ""), "[<" .. spacing_punc .. "]+", "") |
|||
end |
end |
||
if find(text, '[〆々]') then |
|||
-- Counts the number of morae. |
|||
insert(script, 'Abbreviation') |
|||
function export.count_morae(text) |
|||
text = export.moraify(text) |
|||
local morae = #text |
|||
for i = 1, morae do |
|||
if #remove_formatting(text[i]) == 0 then |
|||
morae = morae - 1 |
|||
end |
|||
end |
|||
return morae |
|||
end |
|||
local function do_long_vowel(i, text) |
|||
if not text[i]:find("ー") then |
|||
return |
|||
end |
|||
local prev = text[i - 1] |
|||
if not prev then |
|||
return |
|||
end |
|||
prev = ugsub(remove_formatting(prev), "[" .. r_kana_combining_characters .. "]+", "") |
|||
:match("[^\128-\191][\128-\191]*$") |
|||
for vowel, kana in pairs(r_vowels) do |
|||
if kana:find(prev) then |
|||
local v = (umatch(prev, "[" .. r_hiragana .. "]") and long_vowels_hira or long_vowels_kata)[vowel] |
|||
text[i] = text[i]:gsub("ー", v, 1) |
|||
end |
|||
end |
|||
end |
end |
||
local function do_iteration_mark(i, n, text) |
|||
return concat(script, '+') |
|||
local mora = text[i] |
|||
end |
|||
if mora:find("ゝ") or mora:find("ヽ") then |
|||
return n + 1 |
|||
-- when counting morae, most small hiragana belong to the previous mora, |
|||
elseif n == 0 then |
|||
-- so for purposes of counting them, they can be removed and the characters |
|||
return |
|||
-- can be counted to get the number of morae. The exception is small tsu, |
|||
end |
|||
-- so data.nonmora_to_empty maps all small hiragana except small tsu. |
|||
-- Count backwards once for each iteration mark, but stop early if we find something which can't be iterated, as that marks the start of the set to be repeated. |
|||
function export.count_morae(text) |
|||
local anchor = i |
|||
if type(text) == "table" then |
|||
for j = 0, n - 1 do |
|||
local prev = text[anchor - j] |
|||
if not prev then |
|||
n = j |
|||
break |
|||
end |
|||
prev = remove_formatting(prev) |
|||
if prev:find("ゝ") or prev:find("ヽ") or umatch(prev, "[%s%p]") then |
|||
n = j |
|||
break |
|||
end |
|||
end |
|||
if n == 0 then |
|||
return |
|||
end |
|||
i = i - n + 1 |
|||
-- Replace iteration marks ahead with the relevant character. |
|||
for j = i, i + n - 1 do |
|||
mora = remove_formatting(text[j]):gsub("^(.[\128-\191]*)\227\130[\153\154]", "%1") |
|||
text[j + n] = ugsub(text[j + n], "([ゝヽ])([゙゚]?)", function(mark, voicing) |
|||
local repl = mora:gsub("^.[\128-\191]*", "%0" .. voicing) |
|||
return mark == "ゝ" and export.kata_to_hira(repl) or export.hira_to_kata(repl) |
|||
end) |
|||
end |
|||
return |
|||
end |
|||
-- Normalizes long vowels, iteration marks and non-combining voice marks to the standard equivalents. |
|||
-- Note: output text is normalized to NFD. |
|||
function export.normalize_kana(text) |
|||
text = export.moraify((toNFD(text):gsub("[\227\239][\130\190][\155\156\158\159]", voice_marks))) |
|||
local n, morae = 0, #text |
|||
for i = morae, 1, -1 do |
|||
n = do_iteration_mark(i, n, text) or 0 |
|||
end |
|||
for i = 1, morae do |
|||
do_long_vowel(i, text) |
|||
end |
|||
-- Normalize again to be safe. |
|||
return toNFD(concat(text)) |
|||
end |
end |
||
-- convert kata to hira (hira is untouched) |
|||
text = export.kata_to_hira(text) |
|||
-- remove all of the small hiragana such as ょ except small tsu |
|||
text = text:gsub('[\1-\255][\128-\191]*',data.nonmora_to_empty) |
|||
-- remove zero-width spaces |
|||
text = text:gsub('', '') |
|||
-- return number of characters, which should be the number of morae |
|||
return len(text) |
|||
end |
|||
-- returns a sort key with |sort= in front, e.g. |
|||
-- |sort=はつぐん' if given ばつぐん |
|||
function export.sort(f) |
|||
return "|sort=" .. (get_by_code("ja"):makeSortKey(f)) |
|||
end |
end |
||
-- returns the "stem" of a verb or -i adjective, that is the term minus the final character |
-- returns the "stem" of a verb or -i adjective, that is the term minus the final character |
||
function export.definal(f) |
function export.definal(f) |
||
return |
return usub(f.args[1], 1, -2) |
||
end |
end |
||
Ред 258: | Ред 278: | ||
if namespace == "" then |
if namespace == "" then |
||
local params = { |
local params = { |
||
grade = {}, |
grade = {}, -- To be removed. |
||
rs = {}, |
rs = {}, |
||
shin = {}, |
shin = {}, |
||
Ред 265: | Ред 285: | ||
} |
} |
||
local lang_code = frame.args[1] |
local lang_code = frame.args[1] |
||
local |
local lang = get_by_code(lang_code) |
||
local lang_name = lang:getCanonicalName() |
|||
local args = require("Модул:parameters").process(frame:getParent().args, params, nil, "ja", "kanji") |
local args = require("Модул:parameters").process(frame:getParent().args, params, nil, "ja", "kanji") |
||
local |
local sortkey = args.rs or require("Модул:Hani-sortkey").makeSortKey(pagename) or pagename -- radical sort |
||
local shin = args.shin |
local shin = args.shin |
||
local kyu = args.kyu |
local kyu = args.kyu |
||
local |
local wikitext, categories = {}, {} |
||
['c'] = 7, |
|||
['n'] = 8, |
|||
['uc'] = 9, |
|||
['r'] = 0, |
|||
} |
|||
local grade = args.grade |
|||
grade = tonumber(grade) or grade |
|||
grade = grade_replacements[grade] or grade |
|||
local wikitext = {} |
|||
local categories = {} |
|||
local catsort = rs or pagename |
|||
-- display the kanji itself at the top at 275% size |
-- display the kanji itself at the top at 275% size |
||
insert(wikitext, |
insert(wikitext, "<div><span lang=\"" .. lang_code .. "\" class=\"Jpan\" style=\"font-size:275%; line-height:1;\">" .. (args.head or pagename) .. "</span></div>") |
||
-- display information for the grade |
-- display information for the grade |
||
-- determine grade |
|||
-- if grade was not specified, determine it now |
|||
local grade, in_parenthesis = export.kanji_grade(pagename), {} |
|||
if not grade then |
|||
insert(in_parenthesis, data.grade_links[grade]) |
|||
grade = export.kanji_grade(pagename) |
|||
if args.grade then |
|||
require("Модул:debug/track")("ja/redundant grade parameter") |
|||
end |
end |
||
local in_parenthesis = {} |
|||
local grade_links = { |
|||
[1] = "[[w:Kyōiku kanji|grade 1 “Kyōiku” kanji]]", |
|||
[2] = "[[w:Kyōiku kanji|grade 2 “Kyōiku” kanji]]", |
|||
[3] = "[[w:Kyōiku kanji|grade 3 “Kyōiku” kanji]]", |
|||
[4] = "[[w:Kyōiku kanji|grade 4 “Kyōiku” kanji]]", |
|||
[5] = "[[w:Kyōiku kanji|grade 5 “Kyōiku” kanji]]", |
|||
[6] = "[[w:Kyōiku kanji|grade 6 “Kyōiku” kanji]]", |
|||
[7] = "[[w:Jōyō kanji|common “Jōyō” kanji]]", |
|||
[8] = "[[w:Jinmeiyō kanji|“Jinmeiyō” kanji used for names]]", |
|||
[9] = "[[w:Hyōgai kanji|uncommon “Hyōgai” kanji]]", |
|||
[0] = "[[w:Radical_(Chinese_character)|Radical]]", |
|||
} |
|||
if grade_links[grade] then |
|||
insert(in_parenthesis, grade_links[grade]) |
|||
else |
|||
insert(categories, "[[Категорија:" .. lang_name .. " kanji missing grade|" .. catsort .. "]]") |
|||
end |
|||
-- link to shinjitai if shinjitai was specified, and link to kyujitai if kyujitai was specified |
-- link to shinjitai if shinjitai was specified, and link to kyujitai if kyujitai was specified |
||
if kyu then |
if kyu then |
||
insert(in_parenthesis, |
insert(in_parenthesis, "[[shinjitai]] kanji, [[kyūjitai]] form <span lang=\"" .. lang_code .. "\" class=\"Jpan\">[[" .. kyu .. "#" .. lang_name .. "|" .. kyu .. "]]</span>") |
||
elseif shin then |
elseif shin then |
||
insert(in_parenthesis, |
insert(in_parenthesis, "[[kyūjitai]] kanji, [[shinjitai]] form <span lang=\"" .. lang_code .. "\" class=\"Jpan\">[[" .. shin .. "#" .. lang_name .. "|" .. shin .. "]]</span>") |
||
end |
end |
||
insert(wikitext, "''(" .. concat(in_parenthesis, ", ") .. "'')") |
insert(wikitext, "''(" .. concat(in_parenthesis, ", ") .. "'')") |
||
-- add categories |
-- add categories |
||
insert(categories, |
insert(categories, lang_name .. " Хан карактери") |
||
insert(categories, lang_name .. " " .. data.grades[grade]) |
|||
local grade_categories = { |
|||
[1] = "Grade 1 kanji", |
|||
if grade <= 6 then |
|||
insert(categories, lang_name .. " kyōiku kanji") |
|||
insert(categories, lang_name .. " jōyō kanji") -- Grade 7 get this from the data. |
|||
[4] = "Grade 4 kanji", |
|||
[5] = "Grade 5 kanji", |
|||
[6] = "Grade 6 kanji", |
|||
[7] = "Common kanji", |
|||
[8] = "Kanji used for names", |
|||
[9] = "Uncommon kanji", |
|||
[0] = "CJKV radicals", |
|||
} |
|||
insert(categories, "[[Категорија:" .. (grade_categories[grade] or error("The grade " .. grade .. " is invalid.")) .. "|" .. (grade == "0" and " " or catsort) .. "]]") |
|||
-- error category |
|||
if not rs then |
|||
insert(categories, "[[Категорија:" .. lang_name .. " kanji missing radical and strokes]]") |
|||
end |
end |
||
if mw.title.new(lang_name .. " terms spelled with " .. pagename, 14).exists then |
if mw.title.new(lang_name .. " terms spelled with " .. pagename, 14).exists then |
||
insert(wikitext, 1, |
insert(wikitext, 1, "<div class=\"noprint floatright catlinks\" style=\"font-size: 90%; width: 270px\"><div style=\"padding:0 5px\"><i>See also:</i><div style=\"margin-left: 10px;\">[[:Category:" .. lang_name .. " terms spelled with " .. pagename .. "]]</div></div></div>") |
||
end |
end |
||
return concat(wikitext) .. |
return concat(wikitext) .. require("Модул:utilities").format_categories(categories, lang, sortkey) |
||
end |
end |
||
end |
end |
||
local grade1_pattern = ('[' .. data.grade1 .. ']') |
|||
local grade2_pattern = ('[' .. data.grade2 .. ']') |
|||
local grade3_pattern = ('[' .. data.grade3 .. ']') |
|||
local grade4_pattern = ('[' .. data.grade4 .. ']') |
|||
local grade5_pattern = ('[' .. data.grade5 .. ']') |
|||
local grade6_pattern = ('[' .. data.grade6 .. ']') |
|||
local secondary_pattern = ('[' .. data.secondary .. ']') |
|||
local jinmeiyo_kanji_pattern = ('[' .. data.jinmeiyo_kanji .. ']') |
|||
local hyogaiji_pattern = ('[^' .. data.joyo_kanji .. data.jinmeiyo_kanji .. ']') |
|||
function export.kanji_grade(kanji) |
function export.kanji_grade(kanji) |
||
for i, set in ipairs(data.grade_kanji) do |
|||
if type(kanji) == "table" then |
|||
if find(set, kanji, 1, true) then |
|||
return i |
|||
end |
|||
end |
end |
||
return umatch(kanji, "[" .. range.kanji .. "]") and 9 or false |
|||
if find(kanji, hyogaiji_pattern) then return 9 |
|||
elseif find(kanji, jinmeiyo_kanji_pattern) then return 8 |
|||
elseif find(kanji, secondary_pattern) then return 7 |
|||
elseif find(kanji, grade6_pattern) then return 6 |
|||
elseif find(kanji, grade5_pattern) then return 5 |
|||
elseif find(kanji, grade4_pattern) then return 4 |
|||
elseif find(kanji, grade3_pattern) then return 3 |
|||
elseif find(kanji, grade2_pattern) then return 2 |
|||
elseif find(kanji, grade1_pattern) then return 1 |
|||
end |
|||
return false |
|||
end |
end |
||
Тренутна верзија на датум 19. јул 2024. у 16:52
Script error: The function "main" does not exist.
local m_str_utils = require("Модул:string utilities")
local export = {}
local codepoint = m_str_utils.codepoint
local concat = table.concat
local find = string.find
local get_by_code = require("Модул:languages").getByCode
local insert = table.insert
local load_data = mw.loadData
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local u = m_str_utils.char
local ugsub = m_str_utils.gsub
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local umatch = mw.ustring.match
local usub = m_str_utils.sub
-- note that arrays loaded by mw.loadData cannot be directly used by gsub
local pagename -- generated when needed, to avoid an infinite loop with [[Module:Jpan-sortkey]]
local namespace = mw.title.getCurrentTitle().nsText
local data = load_data("Модул:ja/data")
local long_vowels_hira = data.long_vowels_hira
local long_vowels_kata = data.long_vowels_kata
local voice_marks = data.voice_marks
local range = load_data("Модул:ja/data/range")
local r_hiragana = range.hiragana
local r_vowels = range.vowels
local r_kana_combining_characters = range.kana_combining_characters
local function change_codepoint(added_value)
return function(char)
return u(codepoint(char) + added_value)
end
end
function export.hira_to_kata(text)
if type(text) == "table" then
text = text.args[1]
end
text = ugsub(toNFD(text), "[ぁ-ゖゝゞ]", change_codepoint(96))
text = ugsub(text, "[𛅐-𛅒]", change_codepoint(20))
text = ugsub(text, "[𛀁𛀆𛄟𛄲]", data.hira_to_kata)
return toNFC(text)
end
function export.kata_to_hira(text)
if type(text) == "table" then
text = text.args[1]
end
text = ugsub(toNFD(text), "[ァ-ヶヽヾ]", change_codepoint(-96))
text = ugsub(text, "[𛅤-𛅦]", change_codepoint(-20))
text = ugsub(text, "[𛀀𛄠-𛄢𛅕]", data.kata_to_hira)
return toNFC(text)
end
-- removes spaces and hyphens from input
-- intended to be used when checking manual romaji to allow the
-- insertion of spaces or hyphens in manual romaji without appearing "wrong"
function export.rm_spaces_hyphens(f)
local text = type(f) == "table" and f.args[1] or f
return (text:gsub("[ '%-.]+", "")
:gsub(" ", ""))
end
do
local function handle_macron(ch)
return ch == "o" and "ou" or ch .. ch
end
function export.romaji_to_kata(f)
local text = type(f) == "table" and f.args[1] or f
text = ulower(toNFD(text))
text = text:gsub("(.[\128-\191]*)\204\132", handle_macron)
:gsub("(.)%1", "ッ%1")
:gsub("tc", "ッc")
:gsub("tsyu", "ツュ")
:gsub("ts[uoiea]", {["tsu"]="ツ",["tso"]="ツォ",["tsi"]="ツィ",["tse"]="ツェ",["tsa"]="ツァ"})
:gsub("sh[uoiea]", {["shu"]="シュ",["sho"]="ショ",["shi"]="シ",["she"]="シェ",["sha"]="シャ"})
:gsub("ch[uoiea]", {["chu"]="チュ",["cho"]="チョ",["chi"]="チ",["che"]="チェ",["cha"]="チャ"})
:gsub("n[uoiea']?", {["nu"]="ヌ",["no"]="ノ",["ni"]="ニ",["ne"]="ネ",["na"]="ナ"})
:gsub("[wvtrpsnmlkjhgfdbzy][yw]?[uoiea]", data.rk)
:gsub("n'?", "ン")
:gsub("[aeiou]", {
u = "ウ", o = "オ", i = "イ", e = "エ", a = "ア"
})
return text
end
end
-- expects: any mix of kanji and kana
-- determines the script types used
-- e.g. given イギリス人, it returns Kana+Hani
function export.script(f)
local text = type(f) == "table" and f.args[1] or f
local script = {}
-- For Hira and Kana, we remove any characters which also feature in the other first, so that we don't get false positives for ー etc.
local no_overlap = ugsub(text, "[" .. range.kana_overlap .. "]+", "")
if umatch(no_overlap, "[" .. r_hiragana .. "ゟ]") then
insert(script, "Hira")
end
if umatch(no_overlap, "[" .. range.katakana .. "ヿ]") then
insert(script, "Kana")
end
if umatch(text, "[" .. range.kanji .. "]") then
insert(script, "Hani")
end
if umatch(text, "[" .. range.latin .. "]") then
insert(script, "Romaji")
end
if umatch(text, "[" .. range.numbers .. "]") then
insert(script, "Number")
end
if umatch(text, "[〆々]") then
insert(script, "Abbreviation")
end
return concat(script, "+")
end
do
local submoraic = range.submoraic_kana .. r_kana_combining_characters
local spacing_punc = "%s%p%$%+=>%^`|~"
local function handle_spacing_punc(ch, mora)
insert(mora, ch)
if ch:match("[^%^%%']") then
mora.sp = true
end
return ch, mora
end
local function iterate_mora(text, start, morae, mora)
mora = mora or {}
local ch = umatch(text, "^[" .. spacing_punc .. "]+", start)
if ch then
return handle_spacing_punc(ch, mora)
end
ch = usub(text, start, start)
if ch == "<" then
ch = umatch(text, "^<.->", start) or umatch(text, "^[<" .. spacing_punc .. "]+", start)
return handle_spacing_punc(ch, mora)
elseif (
mora.sp or
mora.kana and umatch(ch, "[^" .. submoraic .. "]")
) then
insert(morae, concat(mora))
mora = {}
end
mora.kana = true
insert(mora, ch)
return ch, mora
end
-- Returns an array of morae.
-- Small vowel kana (and any combining dakuten/handakuten) are grouped with any prior word characters, which should be kana. Non-word characters (spaces, punctuation etc.) are accounted for, and grouped with surrounding morae wherever possible.
function export.moraify(text)
local morae, start, text_len, mora = {}, 1, ulen(text)
while start <= text_len do
local ch
ch, mora = iterate_mora(text, start, morae, mora)
start = start + ulen(ch)
end
if mora then
insert(morae, concat(mora))
end
return morae
end
local function remove_formatting(text)
return ugsub(text:gsub("<.->", ""), "[<" .. spacing_punc .. "]+", "")
end
-- Counts the number of morae.
function export.count_morae(text)
text = export.moraify(text)
local morae = #text
for i = 1, morae do
if #remove_formatting(text[i]) == 0 then
morae = morae - 1
end
end
return morae
end
local function do_long_vowel(i, text)
if not text[i]:find("ー") then
return
end
local prev = text[i - 1]
if not prev then
return
end
prev = ugsub(remove_formatting(prev), "[" .. r_kana_combining_characters .. "]+", "")
:match("[^\128-\191][\128-\191]*$")
for vowel, kana in pairs(r_vowels) do
if kana:find(prev) then
local v = (umatch(prev, "[" .. r_hiragana .. "]") and long_vowels_hira or long_vowels_kata)[vowel]
text[i] = text[i]:gsub("ー", v, 1)
end
end
end
local function do_iteration_mark(i, n, text)
local mora = text[i]
if mora:find("ゝ") or mora:find("ヽ") then
return n + 1
elseif n == 0 then
return
end
-- Count backwards once for each iteration mark, but stop early if we find something which can't be iterated, as that marks the start of the set to be repeated.
local anchor = i
for j = 0, n - 1 do
local prev = text[anchor - j]
if not prev then
n = j
break
end
prev = remove_formatting(prev)
if prev:find("ゝ") or prev:find("ヽ") or umatch(prev, "[%s%p]") then
n = j
break
end
end
if n == 0 then
return
end
i = i - n + 1
-- Replace iteration marks ahead with the relevant character.
for j = i, i + n - 1 do
mora = remove_formatting(text[j]):gsub("^(.[\128-\191]*)\227\130[\153\154]", "%1")
text[j + n] = ugsub(text[j + n], "([ゝヽ])([゙゚]?)", function(mark, voicing)
local repl = mora:gsub("^.[\128-\191]*", "%0" .. voicing)
return mark == "ゝ" and export.kata_to_hira(repl) or export.hira_to_kata(repl)
end)
end
return
end
-- Normalizes long vowels, iteration marks and non-combining voice marks to the standard equivalents.
-- Note: output text is normalized to NFD.
function export.normalize_kana(text)
text = export.moraify((toNFD(text):gsub("[\227\239][\130\190][\155\156\158\159]", voice_marks)))
local n, morae = 0, #text
for i = morae, 1, -1 do
n = do_iteration_mark(i, n, text) or 0
end
for i = 1, morae do
do_long_vowel(i, text)
end
-- Normalize again to be safe.
return toNFD(concat(text))
end
end
-- returns the "stem" of a verb or -i adjective, that is the term minus the final character
function export.definal(f)
return usub(f.args[1], 1, -2)
end
function export.remove_ruby_markup(text)
return (text:gsub("[%^%-%. %%]", ""))
end
-- do the work of [[Template:ja-kanji]], [[Template:ryu-kanji]] etc.
-- should probably be folded into [[Module:Jpan-headword]]
function export.kanji(frame)
pagename = pagename or load_data("Модул:headword/data").pagename
-- only do this if this entry is a kanji page and not some user's page
if namespace == "" then
local params = {
grade = {}, -- To be removed.
rs = {},
shin = {},
kyu = {},
head = {},
}
local lang_code = frame.args[1]
local lang = get_by_code(lang_code)
local lang_name = lang:getCanonicalName()
local args = require("Модул:parameters").process(frame:getParent().args, params, nil, "ja", "kanji")
local sortkey = args.rs or require("Модул:Hani-sortkey").makeSortKey(pagename) or pagename -- radical sort
local shin = args.shin
local kyu = args.kyu
local wikitext, categories = {}, {}
-- display the kanji itself at the top at 275% size
insert(wikitext, "<div><span lang=\"" .. lang_code .. "\" class=\"Jpan\" style=\"font-size:275%; line-height:1;\">" .. (args.head or pagename) .. "</span></div>")
-- display information for the grade
-- determine grade
local grade, in_parenthesis = export.kanji_grade(pagename), {}
insert(in_parenthesis, data.grade_links[grade])
if args.grade then
require("Модул:debug/track")("ja/redundant grade parameter")
end
-- link to shinjitai if shinjitai was specified, and link to kyujitai if kyujitai was specified
if kyu then
insert(in_parenthesis, "[[shinjitai]] kanji, [[kyūjitai]] form <span lang=\"" .. lang_code .. "\" class=\"Jpan\">[[" .. kyu .. "#" .. lang_name .. "|" .. kyu .. "]]</span>")
elseif shin then
insert(in_parenthesis, "[[kyūjitai]] kanji, [[shinjitai]] form <span lang=\"" .. lang_code .. "\" class=\"Jpan\">[[" .. shin .. "#" .. lang_name .. "|" .. shin .. "]]</span>")
end
insert(wikitext, "''(" .. concat(in_parenthesis, ", ") .. "'')")
-- add categories
insert(categories, lang_name .. " Хан карактери")
insert(categories, lang_name .. " " .. data.grades[grade])
if grade <= 6 then
insert(categories, lang_name .. " kyōiku kanji")
insert(categories, lang_name .. " jōyō kanji") -- Grade 7 get this from the data.
end
if mw.title.new(lang_name .. " terms spelled with " .. pagename, 14).exists then
insert(wikitext, 1, "<div class=\"noprint floatright catlinks\" style=\"font-size: 90%; width: 270px\"><div style=\"padding:0 5px\"><i>See also:</i><div style=\"margin-left: 10px;\">[[:Category:" .. lang_name .. " terms spelled with " .. pagename .. "]]</div></div></div>")
end
return concat(wikitext) .. require("Модул:utilities").format_categories(categories, lang, sortkey)
end
end
function export.kanji_grade(kanji)
for i, set in ipairs(data.grade_kanji) do
if find(set, kanji, 1, true) then
return i
end
end
return umatch(kanji, "[" .. range.kanji .. "]") and 9 or false
end
return export