模組:Wuu-pron/sandbox
外观
local export = {}
--[=[ for future direct wugniu to ipa
TODO:
- do IPA for glottalised nasal intials (currently the glottal stop is dropped)
- add Suzhou (still experimental)
-- rom_check
-- RPS (does Suzhou have RPS?)
- add Wenzhou?
- add Hangzhou mb?
]=]--
local ipa_initial = {
["p"] = "p", ["ph"] = "pʰ", ["b"] = "b", ["m"] = "m", ["'m"] = "m", ["f"] = "f", ["v"] = "v",
["t"] = "t", ["th"] = "tʰ", ["d"] = "d", ["n"] = "n", ["'n"] = "n", ["l"] = "l", ["'l"] = "l",
["ts"] = "t͡s", ["tsh"] = "t͡sʰ", ["s"] = "s", ["z"] = "z", ["c"] = "t͡ɕ", ["ch"] = "t͡ɕʰ",
["j"] = "d͡ʑ", ["gn"] = "n̠ʲ", ["'gn"] = "n̠ʲ", ["sh"] = "ɕ", ["zh"] = "ʑ",
["k"] = "k", ["kh"] = "kʰ", ["g"] = "ɡ", ["ng"] = "ŋ", ["'ng"] = "ŋ", ["h"] = "h",
["'"] = "ʔ", ["gh"] = "ɦ",
}
local ipa_final = {
["a"] = "a", ["o"] = "o", ["au"] = "ɔ", ["eu"] = "ɤ", ["e"] = "e̞", ["oe"] = "ø",
["i"] = "i", ["ia"] = "ia", ["io"] = "io", ["iau"] = "iɔ", ["ieu"] = "iɤ",
["u"] = "u", ["ua"] = "ua", ["ue"] = "ue̞", ["uoe"] = "uø",
["iu"] = "y", ["ioe"] = "yø",
["an"] = "ã", ["aon"] = "ɑ̃", ["en"] = "ən", ["on"] = "oŋ",
["aq"] = "aʔ", ["oq"] = "oʔ", ["eq"] = "əʔ",
["ian"] = "iã", ["iaon"] = "iɑ̃", ["in"] = "in", ["ion"] = "ioŋ",
["iaq"] = "iaʔ", ["ioq"] = "iʊʔ", ["iq"] = "iɪʔ",
["uan"] = "uã", ["uaon"] = "uɑ̃", ["uen"] = "uən",
["uaq"] = "uaʔ", ["ueq"] = "uəʔ",
["iun"] = "yn", ["iuq"] = "ɥɪʔ",
["er"] = "əl", ["y"] = "z̩"
}
local ipa_syllabic = {
["m"] = "m̩", ["ng"] = "ŋ̍",
["'m"] = "m̩", ["'ng"] = "ŋ̍"
}
local tone_contours = {
["1-0"] = "", ["1--"] = "³³",
["1-1"] = "⁵³", ["2-1"] = "⁵⁵ ²¹", ["3-1"] = "⁵⁵ ³³ ²¹", ["4-1"] = "⁵⁵ ³³ ³³ ²¹", ["5-1"] = "⁵⁵ ³³ ³³ ³³ ²¹",
["1-5"] = "³⁴", ["2-5"] = "³³ ⁴⁴", ["3-5"] = "³³ ⁵⁵ ²¹", ["4-5"] = "³³ ⁵⁵ ³³ ²¹", ["5-5"] = "³³ ⁵⁵ ³³ ³³ ²¹",
["1-6"] = "²³", ["2-6"] = "²² ⁴⁴", ["3-6"] = "²² ⁵⁵ ²¹", ["4-6"] = "²² ⁵⁵ ³³ ²¹", ["5-6"] = "²² ⁵⁵ ³³ ³³ ²¹",
["1-7"] = "⁵⁵", ["2-7"] = "³³ ⁴⁴", ["3-7"] = "³³ ⁵⁵ ²¹", ["4-7"] = "³³ ⁵⁵ ³³ ²¹", ["5-7"] = "³³ ⁵⁵ ³³ ³³ ²¹",
["1-8"] = "¹²", ["2-8"] = "¹¹ ²³", ["3-8"] = "¹¹ ²² ²³", ["4-8"] = "²² ⁵⁵ ³³ ²¹", ["5-8"] = "²² ⁵⁵ ³³ ³³ ²¹",
--RPS tones
["1-single"] = "⁴⁴", ["5-single"] = "⁴⁴", ["6-single"] = "³³", ["7-single"] = "⁴⁴", ["8-single"] = "²²",
["multiple"] = "³³"
}
local function RPS_tone_determ(word_length, tone)
if word_length == 1 then
return tone_contours[tone .. "-single"]
else
return tone_contours["multiple"]
end
end
local function rom_check(text) --this checks wugniu
if text == "勿" then
return nil
end
local tone = text:sub(1,1)
local text = text:sub(2,-1)
if text:find('[0-9]') then
error("Invalid syllable: "..tone..text..". Multiple tone numbers.")
end
if tone:find('[^0-9-勿]') then
error("Invalid syllable: "..tone..text..". Missing or incorrect tone number.")
end
local checked = text:find('^[a-z]+q') and 1 or 0
--[[ disable tone check for voicing
if voiced == 0 and tone:find("[157]") then
error("Invalid syllable: "..tone..text..". Voiced initials only occur in tones 6 and 8.")
elseif voiced == 1 and tone:find("[68]") then
error("Invalid syllable: "..tone..text ..". Voiceless initials only occur in tones 1, 5 and 7.")
end
]]--
if checked == 1 and tone:find("[156]") then
error("Checked syllables only occur in tones 7 and 8.")
elseif checked == 0 and tone:find("[78]") then
error("Unchecked syllables only occur in tones 1, 5 and 6.")
end
if text:find("^['qx]") or text:find('ny') or text:find('hh') then
error('Invalid syllable: ' .. text ..'. Wugniu expected, but Wiktionary romanisation is supplied.')
end
return nil
end
local function ipa_table_lookup_syllabic(text)
return ipa_syllabic[text]
end
local function ipa_table_lookup_2(initial, final)
return (ipa_initial[initial])..(ipa_final[final])
end
local function ipa_table_lookup_y(final)
return "ɦ"..ipa_final[final:gsub("^i?", "i")]
end
local function ipa_table_lookup_w(final)
return "ɦ"..ipa_final[final:gsub("^u?", "u")]
end
local function ipa_table_lookup_final(final)
return ipa_final[final]
end
function export.ipa_syl_conv(text)
local result = "?"
if text:find("[mg]$") then
result = text:gsub("^.+$", ipa_table_lookup_syllabic)
elseif text:find("^([pbmfvtdnlszcjghk]s?[hng]?)") then
result = text:gsub("^([pbmfvtdnlszcjgkh]s?[hng]?)(.+)$", ipa_table_lookup_2)
elseif text:find("^y") then
result = text:gsub("^y(.+)$", ipa_table_lookup_y)
elseif text:find("^w") then
result = text:gsub("^w(.+)$", ipa_table_lookup_w)
else
result = text:gsub("^.+$", ipa_table_lookup_final)
end
if result:find("?") then
return error(("Invalid syllable: \"%s\""):format(text))
end
return result
end
function export.wugniu_to_ipa(original_text)
local text, conv_text = "", ""
local tone_number = ""
local reading = mw.text.split(original_text, ",", true)
local syllable = {}
local syl_tone = {}
for reading_index = 1, #reading, 1 do
local components = mw.text.split(reading[reading_index], "&", true)
for component_index = 1, #components do
local indep_words = mw.text.split(components[component_index], "+", true)
for indep_index = 1, #indep_words do
text = indep_words[indep_index]
local word_length = string.len(text:gsub("[^ ]", "")) + 1
rom_check(text)
local tone = ""
if text:find("^勿") then
if text == "勿" then
error("勿 cannot be used alone.")
end
text = text:gsub("勿", "veq")
tone = tone_contours[word_length.."-6"]
tone_number = "6"
else
tone_number = text:sub(1, 1)
tone = tone_contours[word_length.."-"..tone_number] or error("Tone notation is incorrect. See [[WT:WUU]].")
text = text:sub(2, -1)
end
local syllable = mw.text.split(text, " ", true)
local syl_tone = mw.text.split(tone, " ", true)
for i = 1, word_length, 1 do
--RPS
if i == word_length and indep_words[indep_index + 1] and tone ~= "³³" then
syl_tone[i] = RPS_tone_determ(word_length, tone_number)
end
syllable[i] = export.ipa_syl_conv(syllable[i]) .. syl_tone[i]
end
indep_words[indep_index] = table.concat(syllable, " ")
end
components[component_index] = table.concat(indep_words, " ")
end
reading[reading_index] = table.concat(components, " ")
end
return table.concat(reading, "/, /")
end
function export.wuu_ipa(text)
if type(text) == "table" then text = text.args[1] end
text = mw.ustring.lower(text)
text = mw.text.split(text, ":", true )
if #text == 1 then
return export.wugniu_to_ipa(export.wikt_to_wugniu(text[1]))
elseif text[1] == "wt" then
return export.wugniu_to_ipa(export.wikt_to_wugniu(text[2]))
else
return export.wugniu_to_ipa(text[2])
end
end
function export.wikt_to_wugniu(text)
if type(text) == "table" then text = text.args[1] end
return text
--initials
:gsub("'''", "\1") --escape bold markup
:gsub("'", "")
:gsub("\1", "'''")
:gsub("j", "c")
:gsub("cc", "j") --jj
:gsub("q(%a)", "ch%1")
:gsub("x", "sh")
:gsub("shsh", "zh") --xx
:gsub("ny", "gn")
:gsub("hh", "gh")
--vowels
:gsub("un", "uen")
:gsub("yoe", "ioe")
:gsub("y", "iu")
:gsub("aan", "aon")
:gsub("([^e])r", "%1y")
:gsub("mm", "m")
:gsub("ngg", "ng")
--tones
:gsub("5", "8")
:gsub("4", "7")
:gsub("3", "6")
:gsub("2", "5")
--gh rules
:gsub("ghi", "yi")
:gsub("yi([aeou])", "y%1")
:gsub("ghu", "wu")
:gsub("wu([aeo])", "w%1")
:gsub("ghng", "ng")
:gsub("ghm", "m")
end
function export.wugniu_to_wikt(text)
if type(text) == "table" then text = text.args[1] end
--initials
--Glottal stops? text = text:gsub("", "'")
return text
:gsub("j", "jj")
:gsub("ch", "q")
:gsub("c", "j")
:gsub("sh", "x")
:gsub("zh", "xx")
:gsub("([0-9])y", "%1hhi")
:gsub("wu?", "hhu")
:gsub("gn", "ny")
:gsub("gh", "hh")
--vowels
:gsub("y(%A)", "r%1")
:gsub("y$", "r")
:gsub("uen", "un")
:gsub("ioe", "yoe")
:gsub("iu", "y")
:gsub("aon", "aan")
--syllabics
:gsub("(%d)m(%A)", "%1hhmm%2")
:gsub("(%d)m$", "%1hhmm")
:gsub("(%d)ng(%A)", "%1hhngg%2")
:gsub("(%d)ng$", "%1hhngg")
--tones
:gsub("5", "2"):gsub("6", "3"):gsub("7", "4"):gsub("8", "5")
end
function export.wugniu_format(text)
text = mw.text.split(text, ":", true)
if #text == 1 then
text = export.wikt_to_wugniu(text[1])
elseif text[1] == 'wt' then
text = export.wikt_to_wugniu(text[2])
else
text = text[2]
end
return text:gsub("勿", '8veq')
:gsub(" ", "-")
:gsub("[&+]", " ")
:gsub(",", "; ")
:gsub('([%d]+)', '<sup>%1</sup>')
end
function export.wikt_format(text)
text = mw.text.split(text, ":", true)
if #text == 1 then
text = text[1]
elseif text[1] == 'sh' then
text = export.wugniu_to_wikt(text[2])
else
text = text[2]
end
return text:gsub("勿", '5veq')
:gsub(" ", "-")
:gsub("([%d]+)", '<sup>%1</sup>')
:gsub("[&+]", " ")
:gsub(",", "; ")
:gsub("'", "")
:gsub("[])]([a-z])", ") %1")
end
return export