跳转到内容

模組:Wuu-pron/sandbox

維基詞典,自由的多語言詞典
local export = {}

--[=[ for future direct wugniu to ipa
TODO:
- do IPA for glottalised nasal intials (currently the glottal stop is dropped)
- add Suzhou (still experimental)
-- rom_check
-- RPS (does Suzhou have RPS?)
- add Wenzhou?
- add Hangzhou mb?
]=]--

local ipa_initial = {
	["p"] = "p", ["ph"] = "pʰ", ["b"] = "b", ["m"] = "m", ["'m"] = "m", ["f"] = "f", ["v"] = "v",
	["t"] = "t", ["th"] = "tʰ", ["d"] = "d", ["n"] = "n", ["'n"] = "n", ["l"] = "l", ["'l"] = "l",
	["ts"] = "t͡s", ["tsh"] = "t͡sʰ", ["s"] = "s", ["z"] = "z", ["c"] = "t͡ɕ", ["ch"] = "t͡ɕʰ",
	["j"] = "d͡ʑ", ["gn"] = "n̠ʲ", ["'gn"] = "n̠ʲ", ["sh"] = "ɕ", ["zh"] = "ʑ",
	["k"] = "k", ["kh"] = "kʰ", ["g"] = "ɡ", ["ng"] = "ŋ", ["'ng"] = "ŋ", ["h"] = "h",
	["'"] = "ʔ", ["gh"] = "ɦ",
}

local ipa_final = {
	["a"] = "a", ["o"] = "o", ["au"] = "ɔ", ["eu"] = "ɤ", ["e"] = "e̞", ["oe"] = "ø",
	["i"] = "i", ["ia"] = "ia", ["io"] = "io", ["iau"] = "iɔ", ["ieu"] = "iɤ",
	["u"] = "u", ["ua"] = "ua", ["ue"] = "ue̞", ["uoe"] = "uø",
	["iu"] = "y", ["ioe"] = "yø", 
	["an"] = "ã", ["aon"] = "ɑ̃", ["en"] = "ən", ["on"] = "oŋ",
	["aq"] = "aʔ", ["oq"] = "oʔ", ["eq"] = "əʔ",
	["ian"] = "iã", ["iaon"] = "iɑ̃", ["in"] = "in", ["ion"] = "ioŋ",
	["iaq"] = "iaʔ", ["ioq"] = "iʊʔ", ["iq"] = "iɪʔ",
	["uan"] = "uã", ["uaon"] = "uɑ̃", ["uen"] = "uən",
	["uaq"] = "uaʔ", ["ueq"] = "uəʔ",
	["iun"] = "yn", ["iuq"] = "ɥɪʔ",
	["er"] = "əl", ["y"] = "z̩"
}

local ipa_syllabic = {
	["m"] = "m̩", ["ng"] = "ŋ̍",
	["'m"] = "m̩", ["'ng"] = "ŋ̍"
}

local tone_contours = {
	["1-0"] = "", ["1--"] = "³³",
	["1-1"] = "⁵³", ["2-1"] = "⁵⁵ ²¹", ["3-1"] = "⁵⁵ ³³ ²¹", ["4-1"] = "⁵⁵ ³³ ³³ ²¹", ["5-1"] = "⁵⁵ ³³ ³³ ³³ ²¹", 
	["1-5"] = "³⁴", ["2-5"] = "³³ ⁴⁴", ["3-5"] = "³³ ⁵⁵ ²¹", ["4-5"] = "³³ ⁵⁵ ³³ ²¹", ["5-5"] = "³³ ⁵⁵ ³³ ³³ ²¹", 
	["1-6"] = "²³", ["2-6"] = "²² ⁴⁴", ["3-6"] = "²² ⁵⁵ ²¹", ["4-6"] = "²² ⁵⁵ ³³ ²¹", ["5-6"] = "²² ⁵⁵ ³³ ³³ ²¹", 
	["1-7"] = "⁵⁵", ["2-7"] = "³³ ⁴⁴", ["3-7"] = "³³ ⁵⁵ ²¹", ["4-7"] = "³³ ⁵⁵ ³³ ²¹", ["5-7"] = "³³ ⁵⁵ ³³ ³³ ²¹", 
	["1-8"] = "¹²", ["2-8"] = "¹¹ ²³", ["3-8"] = "¹¹ ²² ²³", ["4-8"] = "²² ⁵⁵ ³³ ²¹", ["5-8"] = "²² ⁵⁵ ³³ ³³ ²¹",
	
	--RPS tones
	["1-single"] = "⁴⁴", ["5-single"] = "⁴⁴", ["6-single"] = "³³", ["7-single"] = "⁴⁴", ["8-single"] = "²²",
	["multiple"] = "³³"
}

local function RPS_tone_determ(word_length, tone)
	if word_length == 1 then
		return tone_contours[tone .. "-single"]
	else
		return tone_contours["multiple"]
	end
end


local function rom_check(text) --this checks wugniu
	if text == "勿" then
		return nil
	end
	local tone = text:sub(1,1) 
	local text = text:sub(2,-1)
	
	if text:find('[0-9]') then
		error("Invalid syllable: "..tone..text..". Multiple tone numbers.")
	end
	
	if tone:find('[^0-9-勿]') then
		error("Invalid syllable: "..tone..text..". Missing or incorrect tone number.")
	end
	
	local checked = text:find('^[a-z]+q') and 1 or 0
	
	--[[ disable tone check for voicing
	if voiced == 0 and tone:find("[157]") then
		error("Invalid syllable: "..tone..text..". Voiced initials only occur in tones 6 and 8.")
	elseif voiced == 1 and tone:find("[68]") then
		error("Invalid syllable: "..tone..text ..". Voiceless initials only occur in tones 1, 5 and 7.")
	end
	]]--

	if checked == 1 and tone:find("[156]") then
		error("Checked syllables only occur in tones 7 and 8.")
	elseif checked == 0 and tone:find("[78]") then
		error("Unchecked syllables only occur in tones 1, 5 and 6.")
	end
	
	if text:find("^['qx]") or text:find('ny') or text:find('hh') then
		error('Invalid syllable: ' .. text ..'. Wugniu expected, but Wiktionary romanisation is supplied.')
	end
	return nil
end

local function ipa_table_lookup_syllabic(text)
	return ipa_syllabic[text]
end

local function ipa_table_lookup_2(initial, final)
	return (ipa_initial[initial])..(ipa_final[final])
end

local function ipa_table_lookup_y(final)
	return "ɦ"..ipa_final[final:gsub("^i?", "i")]
end

local function ipa_table_lookup_w(final)
	return "ɦ"..ipa_final[final:gsub("^u?", "u")]
end

local function ipa_table_lookup_final(final)
	return ipa_final[final]
end

function export.ipa_syl_conv(text)
	local result = "?"
	if text:find("[mg]$") then
		result = text:gsub("^.+$", ipa_table_lookup_syllabic)
	elseif text:find("^([pbmfvtdnlszcjghk]s?[hng]?)") then
		result = text:gsub("^([pbmfvtdnlszcjgkh]s?[hng]?)(.+)$", ipa_table_lookup_2)
	elseif text:find("^y") then
		result = text:gsub("^y(.+)$", ipa_table_lookup_y)
	elseif text:find("^w") then
		result = text:gsub("^w(.+)$", ipa_table_lookup_w)
	else
		result = text:gsub("^.+$", ipa_table_lookup_final)
	end
	if result:find("?") then
		return error(("Invalid syllable: \"%s\""):format(text))
	end
	
	return result
end

function export.wugniu_to_ipa(original_text)
	local text, conv_text = "", ""
	local tone_number = ""
	local reading = mw.text.split(original_text, ",", true)
	local syllable = {}
	local syl_tone = {}
	for reading_index = 1, #reading, 1 do
		local components = mw.text.split(reading[reading_index], "&", true)
		for component_index = 1, #components do
			local indep_words = mw.text.split(components[component_index], "+", true)
			for indep_index = 1, #indep_words do
				text = indep_words[indep_index]
				local word_length = string.len(text:gsub("[^ ]", "")) + 1
				rom_check(text)
				local tone = ""
				if text:find("^勿") then
					if text == "勿" then
						error("勿 cannot be used alone.")
					end
					text = text:gsub("勿", "veq")
					tone = tone_contours[word_length.."-6"]
					tone_number = "6"
				else
					tone_number = text:sub(1, 1)
					tone = tone_contours[word_length.."-"..tone_number] or error("Tone notation is incorrect. See [[WT:WUU]].")
					text = text:sub(2, -1)
				end
				local syllable = mw.text.split(text, " ", true)
				local syl_tone = mw.text.split(tone, " ", true)
				for i = 1, word_length, 1 do
					
					--RPS
					if i == word_length and indep_words[indep_index + 1] and tone ~= "³³" then
						syl_tone[i] = RPS_tone_determ(word_length, tone_number)
					end
					
					syllable[i] = export.ipa_syl_conv(syllable[i]) .. syl_tone[i]
				end
				indep_words[indep_index] = table.concat(syllable, " ")
			end
			components[component_index] = table.concat(indep_words, "  ")
		end
		reading[reading_index] = table.concat(components, " ")
	end
	return table.concat(reading, "/, /")
end

function export.wuu_ipa(text)
	if type(text) == "table" then text = text.args[1] end
	text = mw.ustring.lower(text)
	text = mw.text.split(text, ":", true )
	if #text == 1 then
		return export.wugniu_to_ipa(export.wikt_to_wugniu(text[1]))
	elseif text[1] == "wt" then
		return export.wugniu_to_ipa(export.wikt_to_wugniu(text[2]))
	else
		return export.wugniu_to_ipa(text[2])
	end
end

function export.wikt_to_wugniu(text)
	if type(text) == "table" then text = text.args[1] end

	return text
	--initials
		:gsub("'''", "\1") --escape bold markup
		:gsub("'", "")
		:gsub("\1", "'''")
		:gsub("j", "c")
		:gsub("cc", "j") --jj
		:gsub("q(%a)", "ch%1")
		:gsub("x", "sh")
		:gsub("shsh", "zh") --xx
		:gsub("ny", "gn")
		:gsub("hh", "gh")
	
	--vowels
		:gsub("un", "uen")
		:gsub("yoe", "ioe")
		:gsub("y", "iu")
		:gsub("aan", "aon")
		:gsub("([^e])r", "%1y")
		:gsub("mm", "m")
		:gsub("ngg", "ng")

	--tones	
		:gsub("5", "8")
		:gsub("4", "7")
		:gsub("3", "6")
		:gsub("2", "5")
	--gh rules
		:gsub("ghi", "yi")
		:gsub("yi([aeou])", "y%1")
		:gsub("ghu", "wu")
		:gsub("wu([aeo])", "w%1")
		:gsub("ghng", "ng")
		:gsub("ghm", "m")
end

function export.wugniu_to_wikt(text)
	if type(text) == "table" then text = text.args[1] end
	--initials
	--Glottal stops? text = text:gsub("", "'")
	return text
		:gsub("j", "jj")
		:gsub("ch", "q")
		:gsub("c", "j")
		:gsub("sh", "x")
		:gsub("zh", "xx")
		:gsub("([0-9])y", "%1hhi")
		:gsub("wu?", "hhu")
		:gsub("gn", "ny")
		:gsub("gh", "hh")

	--vowels
		:gsub("y(%A)", "r%1")
		:gsub("y$", "r")
		:gsub("uen", "un")
		:gsub("ioe", "yoe")
		:gsub("iu", "y")
		:gsub("aon", "aan")
	
	--syllabics
		:gsub("(%d)m(%A)", "%1hhmm%2")
		:gsub("(%d)m$", "%1hhmm")
		:gsub("(%d)ng(%A)", "%1hhngg%2")
		:gsub("(%d)ng$", "%1hhngg")

	--tones	
		:gsub("5", "2"):gsub("6", "3"):gsub("7", "4"):gsub("8", "5")
end

function export.wugniu_format(text)
	text = mw.text.split(text, ":", true)
	if #text == 1 then
		text = export.wikt_to_wugniu(text[1])
	elseif text[1] == 'wt' then
		text = export.wikt_to_wugniu(text[2])
	else
		text = text[2]
	end
	
	return text:gsub("勿", '8veq')
		:gsub(" ", "-")
		:gsub("[&+]", " ")
		:gsub(",", "; ")
		:gsub('([%d]+)', '<sup>%1</sup>')
end

function export.wikt_format(text)
	text = mw.text.split(text, ":", true)
	if #text == 1 then
		text = text[1]
	elseif text[1] == 'sh' then
		text = export.wugniu_to_wikt(text[2])
	else
		text = text[2]
	end
	
	return text:gsub("勿", '5veq')
		:gsub(" ", "-")
		:gsub("([%d]+)", '<sup>%1</sup>')
		:gsub("[&+]", " ")
		:gsub(",", "; ")
		:gsub("'", "")
		:gsub("[])]([a-z])", ") %1")
end

return export