跳转到内容

模組:Syllables

維基詞典,自由的多語言詞典

測試用例

{{#invoke:syllables|countVowels|/ɛː.e.lí.o͜i.o/}}
{{#invoke:syllables|countVowels|/ˈhaɪweɪ/}}
{{#invoke:syllables|countVowels|/ˈloʊɚ/}}
{{#invoke:syllables|countVowels|/ænˈdreɪə/}}
{{#invoke:syllables|countVowels|/ˈbaɪəʊ/}}
{{#invoke:syllables|countVowels|/ˈɑːmiɑi̯smɑjoi̯tus/}}
{{#invoke:syllables|countVowels|/vr̩x/}}
{{#invoke:syllables|countVowels|/vl̩k/}}
{{#invoke:syllables|countVowels|/ˈvr̩ːba/}}
{{#invoke:syllables|countVowels|/ˈɛi̯ərə(n)/}}
{{#invoke:syllables|countVowels|/ˈleːu̯ə(n)/}}
  • The text "/ɛː.e.lí.o͜i.o/" contains 6 vowels、​0 vowel sequences和5 vowels or vowels and diphthongs.
  • The text "/ˈhaɪweɪ/" contains 4 vowels、​0 vowel sequences和4 vowels or vowels and diphthongs.
  • The text "/ˈloʊɚ/" contains 3 vowels、​0 vowel sequences和3 vowels or vowels and diphthongs.
  • The text "/ænˈdreɪə/" contains 4 vowels、​0 vowel sequences和4 vowels or vowels and diphthongs.
  • The text "/ˈbaɪəʊ/" contains 4 vowels、​0 vowel sequences和4 vowels or vowels and diphthongs.
  • The text "/ˈɑːmiɑi̯smɑjoi̯tus/" contains 8 vowels、​0 vowel sequences和6 vowels or vowels and diphthongs.
  • The text "/vr̩x/" contains 0 vowels、​0 vowel sequences和0 vowels or vowels and diphthongs.
  • The text "/vl̩k/" contains 0 vowels、​0 vowel sequences和0 vowels or vowels and diphthongs.
  • The text "/ˈvr̩ːba/" contains 1 vowels、​0 vowel sequences和1 vowels or vowels and diphthongs.
  • The text "/ˈɛi̯ərə(n)/" contains 4 vowels、​0 vowel sequences和3 vowels or vowels and diphthongs.
  • The text "/ˈleːu̯ə(n)/" contains 3 vowels、​0 vowel sequences和2 vowels or vowels and diphthongs.
{{#invoke:syllables|countVowelsDiphthongs|en|/ˈhaɪweɪ/}}
{{#invoke:syllables|countVowelsDiphthongs|en|/ˈloʊɚ/}}
{{#invoke:syllables|countVowelsDiphthongs|en|/aɪˈdiə/}}
{{#invoke:syllables|countVowelsDiphthongs|en|/ænˈdreɪə/}}
{{#invoke:syllables|countVowelsDiphthongs|en|/ˈbaɪəʊ/}}
{{#invoke:syllables|countVowelsDiphthongs|fi|/ˈɑːmiɑi̯smɑjoi̯tus/}}
{{#invoke:syllables|countVowelsDiphthongs|sk|/vr̩x/}}
{{#invoke:syllables|countVowelsDiphthongs|sk|/vl̩k/}}
{{#invoke:syllables|countVowelsDiphthongs|sk|/ˈvr̩ːba/}}
{{#invoke:syllables|countVowelsDiphthongs|nl|/ˈɛi̯ərə(n)/}}
{{#invoke:syllables|countVowelsDiphthongs|nl|/ˈleːu̯ə(n)/}}
  • The text "/ˈhaɪweɪ/" contains 2 vowels or diphthongs.
  • The text "/ˈloʊɚ/" contains 2 vowels or diphthongs.
  • The text "/aɪˈdiə/" contains 2 vowels or diphthongs. – /iə/ is disyllabic sequence in GA but diphthong in NZ; unfortunately, no language code for dialect.
  • The text "/ænˈdreɪə/" contains 2 vowels or diphthongs.
  • The text "/ˈbaɪəʊ/" contains 0 vowels or diphthongs.
  • The text "/ˈɑːmiɑi̯smɑjoi̯tus/" contains 8 vowels or diphthongs.
  • The text "/vr̩x/" contains 0 vowels or diphthongs.
  • The text "/vl̩k/" contains 0 vowels or diphthongs.
  • The text "/ˈvr̩ːba/" contains 1 vowel or diphthong.
  • The text "/ˈɛi̯ərə(n)/" contains 4 vowels or diphthongs.
  • The text "/ˈleːu̯ə(n)/" contains 3 vowels or diphthongs.
{{#invoke:syllables|countVowels2Test|en|/ˈhaɪweɪ/}}
{{#invoke:syllables|countVowels2Test|en|/ˈloʊɚ/}}
{{#invoke:syllables|countVowels2Test|en|/aɪˈdiə/}}
{{#invoke:syllables|countVowels2Test|en|/ænˈdreɪə/}}
{{#invoke:syllables|countVowels2Test|en|/ˈbaɪəʊ/}}
{{#invoke:syllables|countVowels2Test|en|/avə(ʊ)ˈkeɪʃən/}}
{{#invoke:syllables|countVowels2Test|en|/ˈflaʊə/}}
{{#invoke:syllables|countVowels2Test|en|/ˈfaɪ̯ə/}}
{{#invoke:syllables|countVowels2Test|fi|/ˈɑːmiɑi̯smɑjoi̯tus/}}
{{#invoke:syllables|countVowels2Test|sk|/vr̩x/}}
{{#invoke:syllables|countVowels2Test|sk|/vl̩k/}}
{{#invoke:syllables|countVowels2Test|sk|/ˈvr̩ːba/}}
{{#invoke:syllables|countVowels2Test|nl|/ˈɛi̯ərə(n)/}}
{{#invoke:syllables|countVowels2Test|nl|/ˈleːu̯ə(n)/}}
  • The text "/ˈhaɪweɪ/" contains 2 vowels.
  • The text "/ˈloʊɚ/" contains 2 vowels.
  • The text "/aɪˈdiə/" contains 2 vowels. – /iə/ is disyllabic sequence in GA but diphthong in NZ; unfortunately, no language code for dialect.
  • The text "/ænˈdreɪə/" contains 3 vowels.
  • The text "/ˈbaɪəʊ/" contains 2 vowels.
  • The text "/avə(ʊ)ˈkeɪʃən/" contains 4 vowels.
  • The text "/ˈflaʊə/" contains 2 vowels.
  • The text "/ˈfaɪ̯ə/" contains 2 vowels.
  • The text "/ˈɑːmiɑi̯smɑjoi̯tus/" contains 6 vowels.
  • The text "/vr̩x/" contains 1 vowels.
  • The text "/vl̩k/" contains 1 vowels.
  • The text "/ˈvr̩ːba/" contains 2 vowels.
  • The text "/ˈɛi̯ərə(n)/" contains 3 vowels.
  • The text "/ˈleːu̯ə(n)/" contains 2 vowels.

local export = {}

local m_str_utils = require("Module:string utilities")

local gsub = m_str_utils.gsub
local match = m_str_utils.match
local toNFD = mw.ustring.toNFD
local U = m_str_utils.char

local diphthongs = mw.loadData("Module:IPA/data").diphthongs
local vowels = mw.loadData("Module:IPA/data/symbols").vowels .. "ᵻ" .. "ᵿ"

--[[ No use for this at the moment, though it is an interesting catalogue.
	It might be usable for phonetic transcriptions.
	Diacritics added to vowels:
	inverted breve above, inverted breve below,
	up tack, down tack,
	left tack, right tack,
	diaeresis (above), diaeresis below,
	right half ring, left half ring,
	plus sign below, minus sign below,
	combining x above, rhotic hook,
	tilde (above), tilde below
	ligature tie (combining double breve), ligature tie below
	]]
local diacritics = U(
	0x311, 0x32F,
	0x31D, 0x31E,
	0x318, 0x319,
	0x308, 0x324,
	0x339, 0x31C,
	0x31F, 0x320,
	0x33D, 0x2DE,
	0x303, 0x330,
	0x361, 0x35C
)

--[[
combining acute and grave tone marks, circumflex
]]--
local tone = "[" .. U(0x341, 0x340, 0x302) .. "]"
local nonsyllabicDiacritics = U(0x311, 0x32F)
local syllabicDiacritics = U(0x0329, 0x030D)
local ties = U(0x361, 0x35C)

-- long, half-long, extra short
local lengthDiacritics = U(0x2D0, 0x2D1, 0x306)
local vowel = "[" .. vowels .. "]" .. tone .. "?"
local tie = "[" .. ties .. "]"
local nonsyllabicDiacritic = "[" .. nonsyllabicDiacritics .. "]"
local syllabicDiacritic = "[" .. syllabicDiacritics .. "]"

local UTF8Char = "[\1-\127\194-\244][\128-\191]*"


function export.getVowels(remainder, lang)
	if string.find(remainder, "^[%[/]?%-") or string.find(remainder, "%-[%[/]?$") then
		return nil
	end	-- If a hyphen is at the beginning or end of the transcription, do not count syllables.
	
	local count = 0
	local diphs = diphthongs[lang:getCode()] or {}
	
	remainder = toNFD(remainder)
	remainder = string.gsub(remainder, "%((.*)%)", "%1") -- Remove parentheses.

	while remainder ~= "" do
		-- Ignore nonsyllabic vowels
		remainder = gsub(remainder, "^" .. vowel .. nonsyllabicDiacritic, "")
		
		local m =
			match(remainder, "^." .. syllabicDiacritic) or  -- Syllabic consonant
			match(remainder, "^" .. vowel .. tie .. vowel)  -- Tie bar
		
		-- Starts with a recognised diphthong?
		for _, diph in ipairs(diphs) do
			if m then
				break
			end
			
			m = m or match(remainder, "^" .. diph)
		end
		
		-- If we haven't found anything yet, just match on a single vowel
		m = m or match(remainder, "^" .. vowel)
		
		if m then
			-- Found a vowel, add it
			count = count + 1
			remainder = string.sub(remainder, #m + 1)
		else
			-- Found a non-vowel, skip it
			remainder = string.gsub(remainder, "^" .. UTF8Char, "")
		end
	end
	
	if count ~= 0 then return count end
	
	return nil
	
end


function export.countVowels2Test(frame)
	local params = {
		[1] = {required = true},
		[2] = {default = ""},
	}
	
	local args = require("Module:parameters").process(frame.args, params)
	
	local lang = require("Module:languages").getByCode(args[1]) or require("Module:languages").err(args[1], 1)
	
	local count = export.getVowels(args[2], lang)
	
	return 'The text "' .. args[2] .. '" contains ' .. count .. ' vowels.'
end


local function countVowels(text)
	text = toNFD(text) or error("Invalid UTF-8")
	
	local _, count = gsub(text, vowel, "")
	local _, sequenceCount = gsub(text, vowel.."+", "")
	local _, nonsyllabicCount = gsub(text, vowel .. nonsyllabicDiacritic, "")
	local _, tieCount = gsub(text, vowel .. tie .. vowel, "")
	
	local diphthongCount = count - (nonsyllabicCount + tieCount)
	
	return count, sequenceCount, diphthongCount
end


local function countDiphthongs(text, lang)
	text = toNFD(text) or error("Invalid UTF-8")
	
	local diphthongs = diphthongs[lang:getCode()] or {}
	
	local _, count
	local total = 0
	
	if diphthongs then
		for i, diphthong in pairs(diphthongs) do
			_, count = gsub(text, diphthong, "")
			total = total + count
		end
	end
	
	return total
end

function export.countVowels(frame)
	local params = {
		[1] = {default = ""},
	}
	
	local args = require("Module:parameters").process(frame.args, params)
	
	local count, sequenceCount, diphthongCount = countVowels(args[1])
	
	local outputs = {}
	table.insert(outputs, (count or 'an unknown number of') .. ' vowels')
	table.insert(outputs, (sequenceCount or 'an unknown number of') .. ' vowel sequences')
	table.insert(outputs, (diphthongCount or 'an unknown number of') .. ' vowels or vowels and diphthongs')
	
	return 'The text "' .. args[1] .. '" contains ' .. mw.text.listToText(outputs) .. "."
end


function export.countVowelsDiphthongs(frame)
	local params = {
		[1] = {required = true},
		[2] = {default = ""},
	}
	
	local args = require("Module:parameters").process(frame.args, params)
	
	local lang = require("Module:languages").getByCode(args[1]) or require("Module:languages").err(args[1], 1)
	
	local vowels = countVowels(args[2])
	local count = vowels - countDiphthongs(args[2], lang) or 0
	
	local out = 'The text "' .. args[2] .. '" contains ' .. (count or 'an unknown number of')
	
	if count == 1 then
		out = out .. ' vowel or diphthong.'
	else
		out = out .. ' vowels or diphthongs.'
	end
	
	return out
end

return export