Пређи на садржај

Модул:ja — разлика између измена

Садржај обрисан Садржај додат
Нема описа измене
Нема описа измене
 
(9 међуизмена истог корисника није приказано)
Ред 1: Ред 1:
local mw_str_utils = require("Модул:string utilities")
local m_str_utils = require("Модул:string utilities")


local export = {}
local export = {}


local codepoint = mw_str_utils.codepoint
local codepoint = m_str_utils.codepoint
local concat = table.concat
local concat = table.concat
local find = mw_str_utils.find
local find = string.find
local get_by_code = require("Модул:languages").getByCode
local get_by_code = require("Модул:languages").getByCode
local gsub = mw_str_utils.gsub
local insert = table.insert
local insert = table.insert
local len = mw_str_utils.len
local load_data = mw.loadData
local load_data = mw.loadData
local sub = mw_str_utils.sub
local toNFC = mw.ustring.toNFC
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local toNFD = mw.ustring.toNFD
local u = mw_str_utils.char
local u = m_str_utils.char
local ugsub = m_str_utils.gsub
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local umatch = mw.ustring.match
local usub = m_str_utils.sub


-- note that arrays loaded by mw.loadData cannot be directly used by gsub
-- note that arrays loaded by mw.loadData cannot be directly used by gsub
local pagename -- generated when needed, to avoid an infinite loop with [[Module:Jpan-sortkey]]
local pagename -- generated when needed, to avoid an infinite loop with [[Module:Jpan-sortkey]]
local namespace = mw.title.getCurrentTitle().nsText
local namespace = mw.title.getCurrentTitle().nsText

local data = load_data("Модул:ja/data")
local data = load_data("Модул:ja/data")
local long_vowel = data.long_vowel
local long_vowels_hira = data.long_vowels_hira
local iter_marks = data.iter_marks
local long_vowels_kata = data.long_vowels_kata
local voice_marks = data.voice_marks
local voice_marks = data.voice_marks

local specials = data.specials
local range = load_data("Модул:ja/data/range")
local range = load_data("Модул:ja/data/range")
local r_hiragana = range.hiragana

local r_vowels = range.vowels
export.data = {
local r_kana_combining_characters = range.kana_combining_characters
joyo_kanji = data.joyo_kanji,
jinmeiyo_kanji = data.jinmeiyo_kanji,
grade1 = data.grade1,
grade2 = data.grade2,
grade3 = data.grade3,
grade4 = data.grade4,
grade5 = data.grade5,
grade6 = data.grade6
}


local function change_codepoint(added_value)
local function change_codepoint(added_value)
Ред 43: Ред 38:
end
end


function export.hira_to_kata(text)
-- Normalizes long vowels, iteration marks and non-combining voice marks to the standard equivalents.
if type(text) == "table" then
-- Note: output text is normalized to NFD.
text = text.args[1]
function export.normalize_kana(text)
text = toNFD(text)
local chars, text_len = {}, #text
local i, c, end_c, from, b = 0
while i < text_len do
i = i + 1
c = text:sub(i, i)
if c == "<" then
from = i
repeat
i = i + 1
end_c = text:sub(i, i)
if end_c == ">" then
insert(chars, text:sub(from, i))
break
elseif i == text_len then
i = from
insert(chars, c)
break
end
until false
else
b = c:byte()
if b <= 127 then
insert(chars, c)
else
from = i
repeat
i = i + 1
b = text:sub(i, i):byte()
until not b or b <= 127 or b >= 194
i = i - 1
insert(chars, text:sub(from, i))
end
end
end
local pos = 0
local function do_iter(start, from, to)
local prev = chars[start - 1]
while start > 1 and not long_vowel[prev] do
start = start - 1
prev = chars[start - 1]
end
start = start - 1
insert(from, 1, start)
insert(to, pos)
return start
end
end
text = ugsub(toNFD(text), "[ぁ-ゖゝゞ]", change_codepoint(96))
text = ugsub(text, "[𛅐-𛅒]", change_codepoint(20))
repeat
text = ugsub(text, "[𛀁𛀆𛄟𛄲]", data.hira_to_kata)
pos = pos + 1
local char = chars[pos]
if char == "ー" then
local start = pos
local prev = chars[pos - 1]
while start > 1 and not long_vowel[prev] do
start = start - 1
prev = chars[start - 1]
end
chars[pos] = long_vowel[prev] or chars[pos]
elseif voice_marks[char] then
chars[pos] = voice_marks[char]
elseif iter_marks[char] then
local from, to = {}, {}
local start = do_iter(pos, from, to)
local next = chars[pos + 1]
while next and (iter_marks[next] or voice_marks[next] or specials[next] or next:sub(1, 1) == "<") do
pos = pos + 1
if iter_marks[next] then
start = do_iter(start, from, to)
end
next = chars[pos + 1]
end
for i, char_pos in ipairs(from) do
local iter_pos = to[i]
chars[iter_pos] = chars[char_pos] or chars[iter_pos]
end
end
until pos >= #chars
return concat(chars)
end

function export.hira_to_kata(text)
if type(text) == "table" then text = text.args[1] end
text = gsub(text, '[ぁ-ゖゝゞ]', change_codepoint(96))
text = gsub(text, '[𛅐-𛅒]', change_codepoint(20))
text = gsub(text, '[𛀆𛄟]', {["𛀆"] = "𛄠", ["𛄟"] = "𛄢"})
return toNFC(text)
return toNFC(text)
end
end


function export.kata_to_hira(text)
function export.kata_to_hira(text)
if type(text) == "table" then text = text.args[1] end
if type(text) == "table" then
text = text.args[1]
end
text = gsub(toNFD(text), '[ァ-ヶヽヾ]', change_codepoint(-96))
text = ugsub(toNFD(text), "[ァ-ヶヽヾ]", change_codepoint(-96))
text = gsub(text, '[𛅤-𛅦]', change_codepoint(-20))
text = ugsub(text, "[𛅤-𛅦]", change_codepoint(-20))
text = gsub(text, '[𛄠𛄢]', {["𛄠"] = "𛀆", ["𛄢"] = "𛄟"})
text = ugsub(text, "[𛀀𛄠-𛄢𛅕]", data.kata_to_hira)
return toNFC(text)
return toNFC(text)
end

function export.fullwidth_to_halfwidth(text)
if type(text) == "table" then text = text.args[1] end

return (gsub(text:gsub(' ', ' '), '[!-~]', change_codepoint(-65248)))
end
end


Ред 157: Ред 62:
-- insertion of spaces or hyphens in manual romaji without appearing "wrong"
-- insertion of spaces or hyphens in manual romaji without appearing "wrong"
function export.rm_spaces_hyphens(f)
function export.rm_spaces_hyphens(f)
local text = type(f) == 'table' and f.args[1] or f
local text = type(f) == "table" and f.args[1] or f
text = text:gsub('.', { [' '] = '', ['-'] = '', ['.'] = '', ['\''] = '' })
return (text:gsub("[ '%-.]+", "")
text = text:gsub('&nbsp;', '')
:gsub("&nbsp;", ""))
return text
end
end


do
function export.romaji_to_kata(f)
local function handle_macron(ch)
local text = type(f) == 'table' and f.args[1] or f
return ch == "o" and "ou" or ch .. ch
text = text:ulower()
end
text = text:gsub('[\1-\255][\128-\191]*', data.rd)
text = text:gsub('(.)%1', {
function export.romaji_to_kata(f)
k = 'ッk', s = 'ッs', t = 'ッt', p = 'ッp',
local text = type(f) == "table" and f.args[1] or f
b = 'ッb', d = 'ッd', g = 'ッg', j = 'ッj'
text = ulower(toNFD(text))
})
text = text:gsub('tc', 'ッc')
text = text:gsub("(.[\128-\191]*)\204\132", handle_macron)
text = text:gsub('tsyu', 'ツュ')
:gsub("(.)%1", "ッ%1")
:gsub("tc", "ッc")
text = text:gsub('ts[uoiea]', {['tsu']='ツ',['tso']='ツォ',['tsi']='ツィ',['tse']='ツェ',['tsa']='ツァ'})
:gsub("tsyu", "ツュ")
text = text:gsub('sh[uoiea]', {['shu']='シュ',['sho']='ショ',['shi']='シ',['she']='シェ',['sha']='シャ'})
text = text:gsub('ch[uoiea]', {['chu']='チュ',['cho']='チョ',['chi']='チ',['che']='チ',['cha']='チャ'})
:gsub("ts[uoiea]", {["tsu"]="ツ",["tso"]="ツォ",["tsi"]="ツィ",["tse"]="ツ",["tsa"]="ツァ"})
text = text:gsub("n[uoiea']?", {['nu']='ヌ',['no']='ノ',['ni']='ニ',['ne']='ネ',['na']='ナ'})
:gsub("sh[uoiea]", {["shu"]="シュ",["sho"]="ショ",["shi"]="シ",["she"]="シェ",["sha"]="シャ"})
:gsub("ch[uoiea]", {["chu"]="チュ",["cho"]="チョ",["chi"]="チ",["che"]="チェ",["cha"]="チャ"})
text = text:gsub('[wvtrpsnmlkjhgfdbzy][yw]?[uoiea]', data.rk)
:gsub("n[uoiea']?", {["nu"]="ヌ",["no"]="ノ",["ni"]="ニ",["ne"]="ネ",["na"]="ナ"})
text = text:gsub("n'?", 'ン')
:gsub("[wvtrpsnmlkjhgfdbzy][yw]?[uoiea]", data.rk)
text = text:gsub('[aeiou]', {
:gsub("n'?", "ン")
u = 'ウ', o = 'オ', i = 'イ', e = 'エ', a = 'ア'
:gsub("[aeiou]", {
})
u = "ウ", o = "オ", i = "イ", e = "エ", a = "ア"
return text
})
return text
end
end
end


Ред 189: Ред 96:
-- e.g. given イギリス人, it returns Kana+Hani
-- e.g. given イギリス人, it returns Kana+Hani
function export.script(f)
function export.script(f)
local text = type(f) == 'table' and f.args[1] or f
local text = type(f) == "table" and f.args[1] or f
local script = {}
local script = {}
-- For Hira and Kana, we remove any characters which also feature in the other first, so that we don't get false positives for ー etc.
-- For Hira and Kana, we remove any characters which also feature in the other first, so that we don't get false positives for ー etc.
if find(gsub(text, "[" .. range.katakana .. "]+", ""), "[" .. range.hiragana .. "]") then
local no_overlap = ugsub(text, "[" .. range.kana_overlap .. "]+", "")
insert(script, 'Hira')
if umatch(no_overlap, "[" .. r_hiragana .. "ゟ]") then
insert(script, "Hira")
end
end
if umatch(no_overlap, "[" .. range.katakana .. "ヿ]") then
insert(script, "Kana")
end
if umatch(text, "[" .. range.kanji .. "]") then
insert(script, "Hani")
end
if umatch(text, "[" .. range.latin .. "]") then
insert(script, "Romaji")
end
if umatch(text, "[" .. range.numbers .. "]") then
insert(script, "Number")
end
if umatch(text, "[〆々]") then
insert(script, "Abbreviation")
end

return concat(script, "+")
end

do
local submoraic = range.submoraic_kana .. r_kana_combining_characters
local spacing_punc = "%s%p%$%+=>%^`|~"
local function handle_spacing_punc(ch, mora)
if find(gsub(text, "[" .. range.hiragana .. "]+", ""), "[" .. range.katakana .. "]") then
insert(script, 'Kana')
insert(mora, ch)
if ch:match("[^%^%%']") then
mora.sp = true
end
return ch, mora
end
end
local function iterate_mora(text, start, morae, mora)
if find(text, "[" .. range.kanji .. "]") then
mora = mora or {}
insert(script, 'Hani')
local ch = umatch(text, "^[" .. spacing_punc .. "]+", start)
if ch then
return handle_spacing_punc(ch, mora)
end
ch = usub(text, start, start)
if ch == "<" then
ch = umatch(text, "^<.->", start) or umatch(text, "^[<" .. spacing_punc .. "]+", start)
return handle_spacing_punc(ch, mora)
elseif (
mora.sp or
mora.kana and umatch(ch, "[^" .. submoraic .. "]")
) then
insert(morae, concat(mora))
mora = {}
end
mora.kana = true
insert(mora, ch)
return ch, mora
end
end
-- Returns an array of morae.
if find(text, "[" .. range.latin .. "]") then
-- Small vowel kana (and any combining dakuten/handakuten) are grouped with any prior word characters, which should be kana. Non-word characters (spaces, punctuation etc.) are accounted for, and grouped with surrounding morae wherever possible.
insert(script, 'Romaji')
function export.moraify(text)
local morae, start, text_len, mora = {}, 1, ulen(text)
while start <= text_len do
local ch
ch, mora = iterate_mora(text, start, morae, mora)
start = start + ulen(ch)
end
if mora then
insert(morae, concat(mora))
end
return morae
end
end
if find(text, '[' .. range.numbers .. ']') then
local function remove_formatting(text)
insert(script, 'Number')
return ugsub(text:gsub("<.->", ""), "[<" .. spacing_punc .. "]+", "")
end
end
if find(text, '[〆々]') then
-- Counts the number of morae.
insert(script, 'Abbreviation')
function export.count_morae(text)
text = export.moraify(text)
local morae = #text
for i = 1, morae do
if #remove_formatting(text[i]) == 0 then
morae = morae - 1
end
end
return morae
end
local function do_long_vowel(i, text)
if not text[i]:find("ー") then
return
end
local prev = text[i - 1]
if not prev then
return
end
prev = ugsub(remove_formatting(prev), "[" .. r_kana_combining_characters .. "]+", "")
:match("[^\128-\191][\128-\191]*$")
for vowel, kana in pairs(r_vowels) do
if kana:find(prev) then
local v = (umatch(prev, "[" .. r_hiragana .. "]") and long_vowels_hira or long_vowels_kata)[vowel]
text[i] = text[i]:gsub("ー", v, 1)
end
end
end
end


local function do_iteration_mark(i, n, text)
return concat(script, '+')
local mora = text[i]
end
if mora:find("ゝ") or mora:find("ヽ") then

return n + 1
-- when counting morae, most small hiragana belong to the previous mora,
elseif n == 0 then
-- so for purposes of counting them, they can be removed and the characters
return
-- can be counted to get the number of morae. The exception is small tsu,
end
-- so data.nonmora_to_empty maps all small hiragana except small tsu.
-- Count backwards once for each iteration mark, but stop early if we find something which can't be iterated, as that marks the start of the set to be repeated.
function export.count_morae(text)
local anchor = i
if type(text) == "table" then
text = text.args[1]
for j = 0, n - 1 do
local prev = text[anchor - j]
if not prev then
n = j
break
end
prev = remove_formatting(prev)
if prev:find("ゝ") or prev:find("ヽ") or umatch(prev, "[%s%p]") then
n = j
break
end
end
if n == 0 then
return
end
i = i - n + 1
-- Replace iteration marks ahead with the relevant character.
for j = i, i + n - 1 do
mora = remove_formatting(text[j]):gsub("^(.[\128-\191]*)\227\130[\153\154]", "%1")
text[j + n] = ugsub(text[j + n], "([ゝヽ])([゙゚]?)", function(mark, voicing)
local repl = mora:gsub("^.[\128-\191]*", "%0" .. voicing)
return mark == "ゝ" and export.kata_to_hira(repl) or export.hira_to_kata(repl)
end)
end
return
end
-- Normalizes long vowels, iteration marks and non-combining voice marks to the standard equivalents.
-- Note: output text is normalized to NFD.
function export.normalize_kana(text)
text = export.moraify((toNFD(text):gsub("[\227\239][\130\190][\155\156\158\159]", voice_marks)))
local n, morae = 0, #text
for i = morae, 1, -1 do
n = do_iteration_mark(i, n, text) or 0
end
for i = 1, morae do
do_long_vowel(i, text)
end
-- Normalize again to be safe.
return toNFD(concat(text))
end
end
-- convert kata to hira (hira is untouched)
text = export.kata_to_hira(text)
-- remove all of the small hiragana such as ょ except small tsu
text = text:gsub('[\1-\255][\128-\191]*',data.nonmora_to_empty)
-- remove zero-width spaces
text = text:gsub('‎', '')
-- return number of characters, which should be the number of morae
return len(text)
end

-- returns a sort key with |sort= in front, e.g.
-- |sort=はつぐん' if given ばつぐん
function export.sort(f)
return "|sort=" .. (get_by_code("ja"):makeSortKey(f))
end
end


-- returns the "stem" of a verb or -i adjective, that is the term minus the final character
-- returns the "stem" of a verb or -i adjective, that is the term minus the final character
function export.definal(f)
function export.definal(f)
return sub(f.args[1], 1, -2)
return usub(f.args[1], 1, -2)
end
end


Ред 258: Ред 278:
if namespace == "" then
if namespace == "" then
local params = {
local params = {
grade = {},
grade = {}, -- To be removed.
rs = {},
rs = {},
shin = {},
shin = {},
Ред 265: Ред 285:
}
}
local lang_code = frame.args[1]
local lang_code = frame.args[1]
local lang_name = get_by_code(lang_code):getCanonicalName()
local lang = get_by_code(lang_code)
local lang_name = lang:getCanonicalName()
local args = require("Модул:parameters").process(frame:getParent().args, params, nil, "ja", "kanji")
local args = require("Модул:parameters").process(frame:getParent().args, params, nil, "ja", "kanji")
local rs = args.rs or require("Модул:Hani-sortkey").makeSortKey(pagename) -- radical sort
local sortkey = args.rs or require("Модул:Hani-sortkey").makeSortKey(pagename) or pagename -- radical sort
local shin = args.shin
local shin = args.shin
local kyu = args.kyu
local kyu = args.kyu

local grade_replacements = {
local wikitext, categories = {}, {}
['c'] = 7,
['n'] = 8,
['uc'] = 9,
['r'] = 0,
}
local grade = args.grade
grade = tonumber(grade) or grade
grade = grade_replacements[grade] or grade

local wikitext = {}
local categories = {}

local catsort = rs or pagename

-- display the kanji itself at the top at 275% size
-- display the kanji itself at the top at 275% size
insert(wikitext, '<div><span lang="' .. lang_code .. '" class="Jpan" style="font-size:275%; line-height:1;">' .. (args.head or pagename) .. '</span></div>')
insert(wikitext, "<div><span lang=\"" .. lang_code .. "\" class=\"Jpan\" style=\"font-size:275%; line-height:1;\">" .. (args.head or pagename) .. "</span></div>")


-- display information for the grade
-- display information for the grade


-- determine grade
-- if grade was not specified, determine it now
local grade, in_parenthesis = export.kanji_grade(pagename), {}
if not grade then
insert(in_parenthesis, data.grade_links[grade])
grade = export.kanji_grade(pagename)
if args.grade then
require("Модул:debug/track")("ja/redundant grade parameter")
end
end

local in_parenthesis = {}
local grade_links = {
[1] = "[[w:Kyōiku kanji|grade 1 “Kyōiku” kanji]]",
[2] = "[[w:Kyōiku kanji|grade 2 “Kyōiku” kanji]]",
[3] = "[[w:Kyōiku kanji|grade 3 “Kyōiku” kanji]]",
[4] = "[[w:Kyōiku kanji|grade 4 “Kyōiku” kanji]]",
[5] = "[[w:Kyōiku kanji|grade 5 “Kyōiku” kanji]]",
[6] = "[[w:Kyōiku kanji|grade 6 “Kyōiku” kanji]]",
[7] = "[[w:Jōyō kanji|common “Jōyō” kanji]]",
[8] = "[[w:Jinmeiyō kanji|“Jinmeiyō” kanji used for names]]",
[9] = "[[w:Hyōgai kanji|uncommon “Hyōgai” kanji]]",
[0] = "[[w:Radical_(Chinese_character)|Radical]]",
}
if grade_links[grade] then
insert(in_parenthesis, grade_links[grade])
else
insert(categories, "[[Категорија:" .. lang_name .. " kanji missing grade|" .. catsort .. "]]")
end

-- link to shinjitai if shinjitai was specified, and link to kyujitai if kyujitai was specified
-- link to shinjitai if shinjitai was specified, and link to kyujitai if kyujitai was specified

if kyu then
if kyu then
insert(in_parenthesis, '[[shinjitai]] kanji, [[kyūjitai]] form <span lang="' .. lang_code .. '" class="Jpan">[[' .. kyu .. '#' .. lang_name .. '|' .. kyu .. ']]</span>')
insert(in_parenthesis, "[[shinjitai]] kanji, [[kyūjitai]] form <span lang=\"" .. lang_code .. "\" class=\"Jpan\">[[" .. kyu .. "#" .. lang_name .. "|" .. kyu .. "]]</span>")
elseif shin then
elseif shin then
insert(in_parenthesis, '[[kyūjitai]] kanji, [[shinjitai]] form <span lang="' .. lang_code .. '" class="Jpan">[[' .. shin .. '#' .. lang_name .. '|' .. shin .. ']]</span>')
insert(in_parenthesis, "[[kyūjitai]] kanji, [[shinjitai]] form <span lang=\"" .. lang_code .. "\" class=\"Jpan\">[[" .. shin .. "#" .. lang_name .. "|" .. shin .. "]]</span>")
end
end
insert(wikitext, "''(" .. concat(in_parenthesis, ",&nbsp;") .. "'')")
insert(wikitext, "''(" .. concat(in_parenthesis, ",&nbsp;") .. "'')")


-- add categories
-- add categories
insert(categories, "[[Категорија:" .. lang_name .. " Han characters|" .. catsort .. "]]")
insert(categories, lang_name .. " Хан карактери")
insert(categories, lang_name .. " " .. data.grades[grade])
local grade_categories = {
[1] = "Grade 1 kanji",
[2] = "Grade 2 kanji",
if grade <= 6 then
[3] = "Grade 3 kanji",
insert(categories, lang_name .. " kyōiku kanji")
insert(categories, lang_name .. " jōyō kanji") -- Grade 7 get this from the data.
[4] = "Grade 4 kanji",
[5] = "Grade 5 kanji",
[6] = "Grade 6 kanji",
[7] = "Common kanji",
[8] = "Kanji used for names",
[9] = "Uncommon kanji",
[0] = "CJKV radicals",
}
insert(categories, "[[Категорија:" .. (grade_categories[grade] or error("The grade " .. grade .. " is invalid.")) .. "|" .. (grade == "0" and " " or catsort) .. "]]")

-- error category
if not rs then
insert(categories, "[[Категорија:" .. lang_name .. " kanji missing radical and strokes]]")
end
end
if mw.title.new(lang_name .. " terms spelled with " .. pagename, 14).exists then
if mw.title.new(lang_name .. " terms spelled with " .. pagename, 14).exists then
insert(wikitext, 1, '<div class="noprint floatright catlinks" style="font-size: 90%; width: 270px"><div style="padding:0 5px"><i>See also:</i><div style="margin-left: 10px;">[[:Category:' .. lang_name .. ' terms spelled with ' .. pagename .. ']]</div></div></div>')
insert(wikitext, 1, "<div class=\"noprint floatright catlinks\" style=\"font-size: 90%; width: 270px\"><div style=\"padding:0 5px\"><i>See also:</i><div style=\"margin-left: 10px;\">[[:Category:" .. lang_name .. " terms spelled with " .. pagename .. "]]</div></div></div>")
end
end

return concat(wikitext) .. concat(categories, "\n")
return concat(wikitext) .. require("Модул:utilities").format_categories(categories, lang, sortkey)
end
end
end
end

local grade1_pattern = ('[' .. data.grade1 .. ']')
local grade2_pattern = ('[' .. data.grade2 .. ']')
local grade3_pattern = ('[' .. data.grade3 .. ']')
local grade4_pattern = ('[' .. data.grade4 .. ']')
local grade5_pattern = ('[' .. data.grade5 .. ']')
local grade6_pattern = ('[' .. data.grade6 .. ']')
local secondary_pattern = ('[' .. data.secondary .. ']')
local jinmeiyo_kanji_pattern = ('[' .. data.jinmeiyo_kanji .. ']')
local hyogaiji_pattern = ('[^' .. data.joyo_kanji .. data.jinmeiyo_kanji .. ']')


function export.kanji_grade(kanji)
function export.kanji_grade(kanji)
for i, set in ipairs(data.grade_kanji) do
if type(kanji) == "table" then
kanji = kanji.args[1]
if find(set, kanji, 1, true) then
return i
end
end
end
return umatch(kanji, "[" .. range.kanji .. "]") and 9 or false

if find(kanji, hyogaiji_pattern) then return 9
elseif find(kanji, jinmeiyo_kanji_pattern) then return 8
elseif find(kanji, secondary_pattern) then return 7
elseif find(kanji, grade6_pattern) then return 6
elseif find(kanji, grade5_pattern) then return 5
elseif find(kanji, grade4_pattern) then return 4
elseif find(kanji, grade3_pattern) then return 3
elseif find(kanji, grade2_pattern) then return 2
elseif find(kanji, grade1_pattern) then return 1
end

return false
end
end



Тренутна верзија на датум 19. јул 2024. у 16:52

Script error: The function "main" does not exist.

local m_str_utils = require("Модул:string utilities")

local export = {}

local codepoint = m_str_utils.codepoint
local concat = table.concat
local find = string.find
local get_by_code = require("Модул:languages").getByCode
local insert = table.insert
local load_data = mw.loadData
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local u = m_str_utils.char
local ugsub = m_str_utils.gsub
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local umatch = mw.ustring.match
local usub = m_str_utils.sub

-- note that arrays loaded by mw.loadData cannot be directly used by gsub
local pagename -- generated when needed, to avoid an infinite loop with [[Module:Jpan-sortkey]]
local namespace = mw.title.getCurrentTitle().nsText

local data = load_data("Модул:ja/data")
local long_vowels_hira = data.long_vowels_hira
local long_vowels_kata = data.long_vowels_kata
local voice_marks = data.voice_marks

local range = load_data("Модул:ja/data/range")
local r_hiragana = range.hiragana
local r_vowels = range.vowels
local r_kana_combining_characters = range.kana_combining_characters

local function change_codepoint(added_value)
	return function(char)
		return u(codepoint(char) + added_value)
	end
end

function export.hira_to_kata(text)
	if type(text) == "table" then
		text = text.args[1]
	end
	text = ugsub(toNFD(text), "[ぁ-ゖゝゞ]", change_codepoint(96))
	text = ugsub(text, "[𛅐-𛅒]", change_codepoint(20))
	text = ugsub(text, "[𛀁𛀆𛄟𛄲]", data.hira_to_kata)
	return toNFC(text)
end

function export.kata_to_hira(text)
	if type(text) == "table" then
		text = text.args[1]
	end
	text = ugsub(toNFD(text), "[ァ-ヶヽヾ]", change_codepoint(-96))
	text = ugsub(text, "[𛅤-𛅦]", change_codepoint(-20))
	text = ugsub(text, "[𛀀𛄠-𛄢𛅕]", data.kata_to_hira)
	return toNFC(text)
end

-- removes spaces and hyphens from input
-- intended to be used when checking manual romaji to allow the
-- insertion of spaces or hyphens in manual romaji without appearing "wrong"
function export.rm_spaces_hyphens(f)
	local text = type(f) == "table" and f.args[1] or f
	return (text:gsub("[ '%-.]+", "")
		:gsub("&nbsp;", ""))
end

do
	local function handle_macron(ch)
		return ch == "o" and "ou" or ch .. ch
	end
	
	function export.romaji_to_kata(f)
		local text = type(f) == "table" and f.args[1] or f
		text = ulower(toNFD(text))
		text = text:gsub("(.[\128-\191]*)\204\132", handle_macron)
			:gsub("(.)%1", "ッ%1")
			:gsub("tc", "ッc")
			:gsub("tsyu", "ツュ")
			:gsub("ts[uoiea]", {["tsu"]="ツ",["tso"]="ツォ",["tsi"]="ツィ",["tse"]="ツェ",["tsa"]="ツァ"})
			:gsub("sh[uoiea]", {["shu"]="シュ",["sho"]="ショ",["shi"]="シ",["she"]="シェ",["sha"]="シャ"})
			:gsub("ch[uoiea]", {["chu"]="チュ",["cho"]="チョ",["chi"]="チ",["che"]="チェ",["cha"]="チャ"})
			:gsub("n[uoiea']?", {["nu"]="ヌ",["no"]="ノ",["ni"]="ニ",["ne"]="ネ",["na"]="ナ"})
			:gsub("[wvtrpsnmlkjhgfdbzy][yw]?[uoiea]", data.rk)
			:gsub("n'?", "ン")
			:gsub("[aeiou]", {
			u = "ウ", o = "オ", i = "イ", e = "エ", a = "ア"
			})
		return text
	end
end

-- expects: any mix of kanji and kana
-- determines the script types used
-- e.g. given イギリス人, it returns Kana+Hani
function export.script(f)
	local text = type(f) == "table" and f.args[1] or f
	local script = {}
	
	-- For Hira and Kana, we remove any characters which also feature in the other first, so that we don't get false positives for ー etc.
	local no_overlap = ugsub(text, "[" .. range.kana_overlap .. "]+", "")
	
	if umatch(no_overlap, "[" .. r_hiragana .. "ゟ]") then
		insert(script, "Hira")
	end
	if umatch(no_overlap, "[" .. range.katakana .. "ヿ]") then
		insert(script, "Kana")
	end
	if umatch(text, "[" .. range.kanji .. "]") then
		insert(script, "Hani")
	end
	if umatch(text, "[" .. range.latin .. "]") then
		insert(script, "Romaji")
	end
	if umatch(text, "[" .. range.numbers .. "]") then
		insert(script, "Number")
	end
	if umatch(text, "[〆々]") then
		insert(script, "Abbreviation")
	end

	return concat(script, "+")
end

do
	local submoraic = range.submoraic_kana .. r_kana_combining_characters
	local spacing_punc = "%s%p%$%+=>%^`|~"
	
	local function handle_spacing_punc(ch, mora)
		insert(mora, ch)
		if ch:match("[^%^%%']") then
			mora.sp = true
		end
		return ch, mora
	end
	
	local function iterate_mora(text, start, morae, mora)
		mora = mora or {}
		local ch = umatch(text, "^[" .. spacing_punc .. "]+", start)
		if ch then
			return handle_spacing_punc(ch, mora)
		end
		ch = usub(text, start, start)
		if ch == "<" then
			ch = umatch(text, "^<.->", start) or umatch(text, "^[<" .. spacing_punc .. "]+", start)
			return handle_spacing_punc(ch, mora)
		elseif (
			mora.sp or
			mora.kana and umatch(ch, "[^" .. submoraic .. "]")
		) then
			insert(morae, concat(mora))
			mora = {}
		end
		mora.kana = true
		insert(mora, ch)
		return ch, mora
	end
	
	-- Returns an array of morae.
	-- Small vowel kana (and any combining dakuten/handakuten) are grouped with any prior word characters, which should be kana. Non-word characters (spaces, punctuation etc.) are accounted for, and grouped with surrounding morae wherever possible.
	function export.moraify(text)
		local morae, start, text_len, mora = {}, 1, ulen(text)
		while start <= text_len do
			local ch
			ch, mora = iterate_mora(text, start, morae, mora)
			start = start + ulen(ch)
		end
		if mora then
			insert(morae, concat(mora))
		end
		return morae
	end
	
	local function remove_formatting(text)
		return ugsub(text:gsub("<.->", ""), "[<" .. spacing_punc .. "]+", "")
	end
	
	-- Counts the number of morae.
	function export.count_morae(text)
		text = export.moraify(text)
		local morae = #text
		for i = 1, morae do
			if #remove_formatting(text[i]) == 0 then
				morae = morae - 1
			end
		end
		return morae
	end
	
	local function do_long_vowel(i, text)
		if not text[i]:find("ー") then
			return
		end
		local prev = text[i - 1]
		if not prev then
			return
		end
		prev = ugsub(remove_formatting(prev), "[" .. r_kana_combining_characters .. "]+", "")
			:match("[^\128-\191][\128-\191]*$")
		for vowel, kana in pairs(r_vowels) do
			if kana:find(prev) then
				local v = (umatch(prev, "[" .. r_hiragana .. "]") and long_vowels_hira or long_vowels_kata)[vowel]
				text[i] = text[i]:gsub("ー", v, 1)
			end
		end
	end

	local function do_iteration_mark(i, n, text)
		local mora = text[i]
		if mora:find("ゝ") or mora:find("ヽ") then
			return n + 1
		elseif n == 0 then
			return
		end
		-- Count backwards once for each iteration mark, but stop early if we find something which can't be iterated, as that marks the start of the set to be repeated.
		local anchor = i
		for j = 0, n - 1 do
			local prev = text[anchor - j]
			if not prev then
				n = j
				break
			end
			prev = remove_formatting(prev)
			if prev:find("ゝ") or prev:find("ヽ") or umatch(prev, "[%s%p]") then
				n = j
				break
			end
		end
		if n == 0 then
			return
		end
		i = i - n + 1
		-- Replace iteration marks ahead with the relevant character.
		for j = i, i + n - 1 do
			mora = remove_formatting(text[j]):gsub("^(.[\128-\191]*)\227\130[\153\154]", "%1")
			text[j + n] = ugsub(text[j + n], "([ゝヽ])([゙゚]?)", function(mark, voicing)
				local repl = mora:gsub("^.[\128-\191]*", "%0" .. voicing)
				return mark == "ゝ" and export.kata_to_hira(repl) or export.hira_to_kata(repl)
			end)
		end
		return
	end
	
	-- Normalizes long vowels, iteration marks and non-combining voice marks to the standard equivalents.
	-- Note: output text is normalized to NFD.
	function export.normalize_kana(text)
		text = export.moraify((toNFD(text):gsub("[\227\239][\130\190][\155\156\158\159]", voice_marks)))
		
		local n, morae = 0, #text
		for i = morae, 1, -1 do
			n = do_iteration_mark(i, n, text) or 0
		end
		
		for i = 1, morae do
			do_long_vowel(i, text)
		end
		
		-- Normalize again to be safe.
		return toNFD(concat(text))
	end
end

-- returns the "stem" of a verb or -i adjective, that is the term minus the final character
function export.definal(f)
	return usub(f.args[1], 1, -2)
end

function export.remove_ruby_markup(text)
	return (text:gsub("[%^%-%. %%]", ""))
end

-- do the work of [[Template:ja-kanji]], [[Template:ryu-kanji]] etc.
-- should probably be folded into [[Module:Jpan-headword]]
function export.kanji(frame)
	pagename = pagename or load_data("Модул:headword/data").pagename
	-- only do this if this entry is a kanji page and not some user's page
	if namespace == "" then
		local params = {
			grade = {}, -- To be removed.
			rs = {},
			shin = {},
			kyu = {},
			head = {},
		}
		local lang_code = frame.args[1]
		local lang = get_by_code(lang_code)
		local lang_name = lang:getCanonicalName()
		local args = require("Модул:parameters").process(frame:getParent().args, params, nil, "ja", "kanji")
		local sortkey = args.rs or require("Модул:Hani-sortkey").makeSortKey(pagename) or pagename -- radical sort
		local shin = args.shin
		local kyu = args.kyu
		
		local wikitext, categories = {}, {}
		
		-- display the kanji itself at the top at 275% size
		insert(wikitext, "<div><span lang=\"" .. lang_code .. "\" class=\"Jpan\" style=\"font-size:275%; line-height:1;\">" .. (args.head or pagename) .. "</span></div>")

		-- display information for the grade

		-- determine grade
		local grade, in_parenthesis = export.kanji_grade(pagename), {}
		insert(in_parenthesis, data.grade_links[grade])
		
		if args.grade then
			require("Модул:debug/track")("ja/redundant grade parameter")
		end
		
		-- link to shinjitai if shinjitai was specified, and link to kyujitai if kyujitai was specified
		if kyu then
			insert(in_parenthesis, "[[shinjitai]] kanji, [[kyūjitai]] form <span lang=\"" .. lang_code .. "\" class=\"Jpan\">[[" .. kyu .. "#" .. lang_name .. "|" .. kyu .. "]]</span>")
		elseif shin then
			insert(in_parenthesis, "[[kyūjitai]] kanji, [[shinjitai]] form <span lang=\"" .. lang_code .. "\" class=\"Jpan\">[[" .. shin .. "#" .. lang_name .. "|" .. shin .. "]]</span>")
		end
		insert(wikitext, "''(" .. concat(in_parenthesis, ",&nbsp;") .. "'')")

		-- add categories
		insert(categories, lang_name .. " Хан карактери")
		insert(categories, lang_name .. " " .. data.grades[grade])
		
		if grade <= 6 then
			insert(categories, lang_name .. " kyōiku kanji")
			insert(categories, lang_name .. " jōyō kanji") -- Grade 7 get this from the data.
		end
		
		if mw.title.new(lang_name .. " terms spelled with " .. pagename, 14).exists then
			insert(wikitext, 1, "<div class=\"noprint floatright catlinks\" style=\"font-size: 90%; width: 270px\"><div style=\"padding:0 5px\"><i>See also:</i><div style=\"margin-left: 10px;\">[[:Category:" .. lang_name .. " terms spelled with " .. pagename .. "]]</div></div></div>")
		end
		
		return concat(wikitext) .. require("Модул:utilities").format_categories(categories, lang, sortkey)
	end
end

function export.kanji_grade(kanji)
	for i, set in ipairs(data.grade_kanji) do
		if find(set, kanji, 1, true) then
			return i
		end
	end
	return umatch(kanji, "[" .. range.kanji .. "]") and 9 or false
end

return export