模組:IPA
This module is used by the templates {{IPA}}
and {{IPAchar}}
to format IPA in entries, and it also converts X-SAMPA (an ASCII version of IPA) to IPA for the templates {{x2i}}
, {{x2ipa}}
, and {{x2ipachar}}
. The actual functions called by these templates are found in Module:IPA/templates
The function format_IPA_full
generates the content of the template {{IPA}}
. It should also be used by pronunciation modules for specific languages. (It is used, for instance, by Module:ru-pron.) It generates a label IPA (key), followed by a list of IPA transcriptions with the class attribute "IPA"
added to them, and a language-specific category (such as Category:English terms with IPA pronunciation).
The function format_IPA_multiple
generates the content of the template {{IPAchar}}
. It is similar to format_IPA_full
, but does not add a label or categories.
Data is in Module:IPA/data, Module:IPA/data/symbols, and Module:IPA/data/X-SAMPA.
Tracking
The submodule Module:IPA/tracking allows you to track specific symbols used in the IPA transcriptions of a given language: for instance, the trill symbol ⟨r⟩ in English transcriptions. Go there for the list of languages and tracking conditions.
Unit tests
- See also: Module:IPA/testcases
IPA to X-SAMPA back to IPA
Term | IPA | Generated X-SAMPA | Regenerated IPA | Matched? |
---|---|---|---|---|
dictionary | /ˈdɪkʃən(ə)ɹi/ | /"dIkS@n(@)r\i/ |
/ˈdɪkʃən(ə)ɹi/ | yes |
/ˈdɪkʃənɛɹi/ | /"dIkS@nEr\i/ |
/ˈdɪkʃənɛɹi/ | yes | |
Україна (Ukrajina) | /ukrɑˈjɪnɑ/ | /ukrA"jInA/ |
/ukrɑˈjɪnɑ/ | yes |
نوروز | [næu̯ˈɾoːz] | [n{u_^"4o:z] |
[næu̯ˈɾoːz] | yes |
[nou̯ˈɾuːz] | [nou_^"4u:z] |
[nou̯ˈɾuːz] | yes | |
[noːˈɾuːz] | [no:"4u:z] |
[noːˈɾuːz] | yes | |
[næu̯ˈɾɵːz] | [n{u_^"48:z] |
[næu̯ˈɾɵːz] | yes | |
新年 | [ɕɪn˥˥niɛn˧˥] | [s\In__T__TniEn__M__T] |
[ɕɪn˥˥niɛn˧˥] | yes |
battleship | [ˈbætl̩ʃɪp] | ["b{tl=SIp] |
[ˈbætl̩ʃɪp] | yes |
báid | [bˠɑːdʲ] | [b_GA:d_j] |
[bˠɑːdʲ] | yes |
Deutsch | [dɔʏ̯t͡ʃ] | [dOY_^t__S] |
[dɔʏ̯t͡ʃ] | yes |
dóigh | [d̪ˠoːɟ] | [d_d_Go:J\] |
[d̪ˠoːɟ] | yes |
murder | [ˈmɝdɚ] | ["m3`d@`] |
[ˈmɝdɚ] | yes |
local export = {}
-- [[Module:IPA/data]]
local m_data = mw.loadData('Module:IPA/data') -- [[Module:IPA/data]]
local m_symbols = mw.loadData('Module:IPA/data/symbols') -- [[Module:IPA/data/symbols]]
local m_syllables -- [[Module:syllables]]; loaded below if needed
local sub = mw.ustring.sub
local find = mw.ustring.find
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch
local U = mw.ustring.char
function export.format_IPA_full(lang, items, err, separator, sortKey, no_count)
local IPA_key, key_link, err_text, prefix, IPAs, category
local hasKey = m_data.langs_with_infopages
local namespace = mw.title.getCurrentTitle().nsText
if err then
err_text = '<span class="error">' .. err .. '</span>'
else
if hasKey[lang:getCode()] then
IPA_key = "Appendix:" .. lang:getCanonicalName() .. "發音"
else
IPA_key = ":w:" .. lang:getCanonicalName() .. "音系"
end
key_link = "[[" .. IPA_key .. "|幫助]]"
end
local prefix = "[[Wiktionary:國際音標|IPA]]<sup>(" .. ( key_link or err_text ) .. ")</sup>:"
IPAs = export.format_IPA_multiple(lang, items, separator, no_count)
if lang and (namespace == "" or namespace == "Reconstruction") then
sortKey = sortKey or lang:makeSortKey(mw.title.getCurrentTitle().text)
sortKey = sortKey and ("|" .. sortKey) or ""
category = "[[Category:有國際音標的" .. lang:getCanonicalName() .. "詞" .. sortKey .. "]]"
else
category = ""
end
return prefix .. IPAs .. category
end
local function determine_repr(pron)
local repr_mark = {}
local repr, reconstructed
-- remove initial asterisk before representation marks, used on some Reconstruction pages
if find(pron, "^%*") then
reconstructed = true
pron = sub(pron, 2)
end
local representation_types = {
['/'] = { right = '/', type = 'phonemic', },
['['] = { right = ']', type = 'phonetic', },
['⟨'] = { right = '⟩', type = 'orthographic', },
['-'] = { type = 'rhyme' },
}
repr_mark.i, repr_mark.f, repr_mark.left, repr_mark.right = find(pron, '^(.).-(.)$')
local representation_type = representation_types[repr_mark.left]
if representation_type then
if representation_type.right then
if repr_mark.right == representation_type.right then
repr = representation_type.type
end
else
repr = representation_type.type
end
else
repr = nil
end
return repr, reconstructed
end
local function hasInvalidSeparators(transcription)
if find(transcription, "%.[ˈˌ]") then
return true
else
return false
end
end
function export.format_IPA_multiple(lang, items, separator, no_count)
local categories = {}
separator = separator or ', '
-- Format
if not items[1] then
if mw.title.getCurrentTitle().nsText == "Template" then
table.insert(items, {pron = "/aɪ piː ˈeɪ/"})
else
table.insert(categories, "[[Category:使用發音模板而缺少發音的詞條]]")
end
end
local bits = {}
for _, item in ipairs(items) do
local bit = export.format_IPA(lang, item.pron)
if item.pretext then
bit = item.pretext .. bit
end
if item.posttext then
bit = bit .. item.posttext
end
local leftq = item.q or item.qualifiers
if leftq and leftq[1] then
bit = require("Module:qualifier").format_qualifier(leftq) .. " " .. bit
end
local rightq = item.qq
if rightq and rightq[1] then
bit = bit .. " " .. require("Module:qualifier").format_qualifier(rightq)
end
if item.refs or item.note then
local refspecs
if item.note then
-- FIXME: eliminate item.note in favor of item.refs. Use tracking to find places
-- that use item.note.
refspecs = {item.note}
require("Module:debug").track("IPA/note")
else
refspecs = item.refs
end
local refs = {}
if #refspecs > 0 then
for _, refspec in ipairs(refspecs) do
if type(refspec) ~= "table" then
refspec = {text = refspec}
end
local refargs
if refspec.name or refspec.group then
refargs = {name = refspec.name, group = refspec.group}
end
table.insert(refs, mw.getCurrentFrame():extensionTag("ref", refspec.text, refargs))
end
bit = bit .. table.concat(refs)
end
end
if item.separator then
bit = item.separator .. bit
end
table.insert(bits, bit)
--[=[ [[Special:WhatLinksHere/Template:tracking/IPA/syntax-error]]
The length or gemination symbol should not appear after a syllable break or stress symbol. ]=]
if find(item.pron, "[ˈˌ%.][ːˑ]") then
require("Module:debug").track("IPA/syntax-error")
end
if lang then
-- Add syllable count if the language's diphthongs are listed in [[Module:syllables]].
-- Don't do this if the term has spaces or a liaison mark (‿).
if not no_count and mw.title.getCurrentTitle().namespace == 0 then
m_syllables = m_syllables or require('Module:syllables')
local langcode = lang:getCode()
if m_data.langs_to_generate_syllable_count_categories[langcode] then
local repr = determine_repr(item.pron)
local use_it
if m_data.langs_to_use_phonetic_notation[langcode] then
use_it = repr == "phonetic"
else
use_it = repr == "phonemic"
end
if use_it and not find(item.pron, "[ ‿]") then
local syllable_count = m_syllables.getVowels(item.pron, lang)
if syllable_count then
table.insert(categories, "[[Category:" .. lang:getCanonicalName() .. syllable_count .. "音節詞]]")
end
end
end
end
if lang:getCode() == "en" and hasInvalidSeparators(item.pron) then
table.insert(categories, "[[Category:國際音標使用.ˈ或.ˌ的英語詞]]")
end
end
end
return table.concat(bits, separator) .. table.concat(categories)
end
-- Takes an IPA pronunciation and formats it and adds cleanup categories.
function export.format_IPA(lang, pron, split_output)
local err = {}
local categories = {}
-- Remove wikilinks, so that wikilink brackets are not misinterpreted as
-- indicating phonemic transcription
local str_gsub = string.gsub
local without_links = str_gsub(pron, '%[%[[^|%]]+|([^%]]+)%]%]', '%1')
without_links = str_gsub(without_links, '%[%[[^%]]+%]%]', '%1')
-- Detect whether this is a phonemic or phonetic transcription
local repr, reconstructed = determine_repr(without_links)
if reconstructed then
pron = sub(pron, 2)
end
-- If valid, strip the representation marks
if repr == "phonemic" then
pron = sub(pron, 2, -2)
without_links = sub(without_links, 2, -2)
elseif repr == "phonetic" then
pron = sub(pron, 2, -2)
without_links = sub(without_links, 2, -2)
elseif repr == "orthographic" then
pron = sub(pron, 2, -2)
without_links = sub(without_links, 2, -2)
elseif repr == "rhyme" then
pron = sub(pron, 2)
without_links = sub(without_links, 2)
else
table.insert(categories, "[[Category:使用無效表示符號的國際音標發音]]")
-- table.insert(err, "invalid representation marks")
-- Removed because it's annoying when previewing pronunciation pages.
end
if pron == "" then
table.insert(categories, "[[Category:沒有發音的國際音標發音]]")
end
-- Check for obsolete and nonstandard symbols
for i, symbol in ipairs(m_data.nonstandard) do
local result
for nonstandard in gmatch(pron, symbol) do
if not result then
result = {}
end
table.insert(result, nonstandard)
table.insert(categories, "[[Category:使用廢棄或非標準字符的國際音標發音|" .. nonstandard .. "]]")
end
if result then
table.insert(err, "obsolete or nonstandard characters (" .. table.concat(result) .. ")")
break
end
end
--[[ Check for invalid symbols after removing the following:
1. wikilinks (handled above)
2. paired HTML tags
3. bolding
4. italics
5. HTML entity for space
6. asterisk at beginning of transcription
7. comma followed by spacing characters
8. superscripts enclosed in superscript parentheses ]]
local found_HTML
local result = str_gsub(without_links, "<(%a+)[^>]*>([^<]+)</%1>",
function(tagName, content)
found_HTML = true
return content
end)
result = str_gsub(result, "'''([^']*)'''", "%1")
result = str_gsub(result, "''([^']*)''", "%1")
result = str_gsub(result, "&[^;]+;", "") -- This may catch things that are not valid character entities.
result = str_gsub(result, "^%*", "")
result = gsub(result, ",%s+", "")
result = gsub(result, "⁽[".. m_symbols.superscripts .. "]+⁾", "")
result = gsub(result, '[' .. m_symbols.valid .. ']', '')
-- VS15
local vs15_class = "[" .. m_symbols.add_vs15 .. "]"
if mw.ustring.find(pron, vs15_class) then
local vs15 = U(0xFE0E)
if mw.ustring.find(result, vs15) then
result = gsub(result, vs15, "")
pron = mw.ustring.gsub(pron, vs15, "")
end
pron = mw.ustring.gsub(pron, "(" .. vs15_class .. ")", "%1" .. vs15)
end
if result ~= '' then
mw.log(pron, result)
local namespace = mw.title.getCurrentTitle().namespace
local suggestions = {}
for k, v in pairs(m_symbols.invalid) do
if result:match(k) then
table.insert(suggestions, k .. " with " .. v)
end
end
if suggestions[1] then
if namespace == 0 or namespace == 118 then
error("Invalid IPA: replace " .. mw.text.listToText(suggestions))
else
table.insert(err, "replace " .. mw.text.listToText(suggestions))
end
end
result = gsub(result, "⁽[".. m_symbols.superscripts .. "]+⁾", "")
result = gsub(result, '[' .. m_symbols.valid .. ']', '')
if result ~= '' then
local category = "使用無效表示符號的國際音標發音"
if namespace ~= 0 and namespace ~= 118 then
category = category .. "/non_mainspace"
end
table.insert(categories, "[[Category:" .. category .. "]]")
table.insert(err, "invalid IPA characters (" .. result .. ")")
end
end
if found_HTML then
table.insert(categories, "[[Category:使用成對HTML標籤的國際音標發音]]")
end
-- Reference inside IPA template usage
-- FIXME: Doesn't work; you can't put HTML in module output.
--if mw.ustring.find(pron, '</ref>') then
-- table.insert(categories, "[[Category:IPA pronunciations with reference]]")
--end
if repr == "phonemic" or repr == "rhyme" then
if lang and m_data.phonemes[lang:getCode()] then
local valid_phonemes = m_data.phonemes[lang:getCode()]
local rest = pron
local phonemes = {}
while mw.ustring.len(rest) > 0 do
local longestmatch = ""
if sub(rest, 1, 1) == "(" or sub(rest, 1, 1) == ")" then
longestmatch = sub(rest, 1, 1)
else
for _, phoneme in ipairs(valid_phonemes) do
if mw.ustring.len(phoneme) > mw.ustring.len(longestmatch) and sub(rest, 1, mw.ustring.len(phoneme)) == phoneme then
longestmatch = phoneme
end
end
end
if mw.ustring.len(longestmatch) > 0 then
table.insert(phonemes, longestmatch)
rest = sub(rest, mw.ustring.len(longestmatch) + 1)
else
local phoneme = sub(rest, 1, 1)
table.insert(phonemes, "<span style=\"color: red\">" .. phoneme .. "</span>")
rest = sub(rest, 2)
table.insert(categories, "[[Category:含有無效音位的國際音標發音/" .. lang:getCode() .. "]]")
require("Module:debug").track("IPA/invalid phonemes/" .. phoneme)
end
end
pron = table.concat(phonemes)
end
if repr == "phonemic" then
pron = "/" .. pron .. "/"
else
pron = "-" .. pron
end
elseif repr == "phonetic" then
pron = "[" .. pron .. "]"
elseif repr == "orthographic" then
pron = "⟨" .. pron .. "⟩"
end
if reconstructed then
pron = "*" .. pron
end
if err[1] then
err = '<span class="previewonly error" style="font-size: small;> ' .. table.concat(err, ', ') .. '</span>'
else
err = ""
end
if split_output then -- for use of IPA in links
return '<span class="IPA">' .. pron .. '</span>', table.concat(categories), err
else
return '<span class="IPA">' .. pron .. '</span>' .. err .. table.concat(categories)
end
end
return export