ਮੌਡਿਊਲ:IPA
Documentation for this module may be created at ਮੌਡਿਊਲ:IPA/doc
local export = {}
-- [[Module:IPA/data]]
local m_data = mw.loadData('Module:IPA/data')
function export.format_IPA_full(lang, items, err)
local prefix = nil
if err then
prefix = '<span style="color:red">' .. err .. '</span>'
elseif m_data.langs_with_infopages[lang:getCode()] then
prefix = "[[Appendix:" .. lang:getCanonicalName() .. " pronunciation|key]]"
else
prefix = "[[wikipedia:" .. lang:getCanonicalName() .. " phonology|key]]"
end
prefix = "[[Wiktionary:International Phonetic Alphabet|IPA]]<sup>(" .. prefix .. ")</sup>: " .. export.format_IPA_multiple(lang, items)
if lang and (mw.title.getCurrentTitle().nsText == "" or mw.title.getCurrentTitle().nsText == "Reconstruction") then
prefix = prefix .. "[[Category:" .. lang:getCanonicalName() .. " terms with IPA pronunciation]]"
end
return prefix
end
function invalidSeparators(word)
local separators = {"%.ˈ", "%.ˌ"}
for i in pairs(separators) do
for k in mw.ustring.gmatch(word, separators[i]) do
return true
end
end
return false
end
function export.format_IPA_multiple(lang, items)
notes = notes or {}
local categories = {}
-- Format
if #items == 0 then
if mw.title.getCurrentTitle().nsText == "Template" then
table.insert(items, {pron = "/aɪ piː ˈeɪ/"})
else
table.insert(categories, "[[Category:Pronunciation templates without a pronunciation]]")
end
end
local bits = {}
for _, item in ipairs(items) do
local bit = export.format_IPA(lang, item.pron)
if item.qualifiers and #item.qualifiers > 0 then
bit = mw.getCurrentFrame():expandTemplate{title = "qualifier", args = item.qualifiers} .. " " .. bit
end
if item.note then
bit = bit .. mw.getCurrentFrame():extensionTag("ref", item.note)
end
table.insert(bits, bit)
end
if lang and lang:getCode() == "en" then
for _, item in ipairs(items) do
if invalidSeparators(item.pron) == true then
table.insert(categories, "[[Category:IPA for English using .ˈ or .ˌ]]")
end
end
end
return table.concat(bits, ", ") .. table.concat(categories)
end
-- TODO: Use data module for this
local diacritics = '̘̙̜̝̞̟̠̣̤̥̩̪̬̯̰̹̺̻̼͇͈͉͍͎͔͕̀́̂̃̄̆̈̋̌̏̽͆͊͋͌̊̌᷄᷅᷆᷇᷈᷉̚͢͡'
local tones = '˥˦˧˨˩¹²³⁴⁵'
local valid_symbols = ' %(%)%%{%|%}%-~.!abcdefhijklmnopqrstuvwxyz¡àáâãäæçèéêëìíîïðòóôõöøùúûüýÿāăēĕěħĩīĭŋōŏőœũūŭűŷǀǁǂǃǎǐǒǔǖǘǚǜǟǣǽǿȁȅȉȍȕȫȭȳɐɑɒɓɔɕɖɗɘəɚɛɜɝɞɟɠɡɢɣɤɥɦɧɨɪɫɬɭɮɯɰɱɲɳɴɵɶɸɹɺɻɽɾʀʁʂʃʄʈʉʊʋṽʌʍʎʏʐʑʒʔʕʘʙʛʜʝʟʡʢʬʭ⁻¹²³⁴⁵ᵝʰʱʲʳʴʵʶʷʸʼˀˁˈˌːˑ˞ˠˡˢˣˬ˭β͜θχᴙᵊᵐᵑᶑᶣᶬᶮᶯᶰᶹ᷽ḁḛḭḯṍṏṳṵṹṻạẹẽịọụỳỵỹ‖․‥…‿ⁿ↑↓ⱱꜛꜜꟸꟹ𝆏𝆑' .. diacritics .. tones
-- Takes an IPA pronunciation and formats it and adds cleanup categories.
function export.format_IPA(lang, pron)
local categories = {}
-- Detect whether this is a phonemic or phonetic transcription
local repr_mark = {}
repr_mark.i, repr_mark.f, repr_mark.left, repr_mark.right = mw.ustring.find(pron, '^(.).-(.)$')
local repr = nil
-- If valid, strip the representation marks
if repr_mark.left == '/' and repr_mark.right == '/' then
repr = "phonemic"
pron = mw.ustring.sub(pron, 2, -2)
elseif repr_mark.left == '[' and repr_mark.right == ']' then
repr = "phonetic"
pron = mw.ustring.sub(pron, 2, -2)
else
table.insert(categories, "[[Category:IPA pronunciations with invalid representation marks]]")
end
-- Check for obsolete and nonstandard symbols
for i, symbol in ipairs(m_data.nonstandard) do
local result = mw.ustring.find(pron, symbol)
if result then
table.insert(categories, "[[Category:IPA pronunciations with obsolete or nonstandard characters|" .. result .. "]]")
break
end
end
-- Check for invalid symbols
local result = mw.ustring.gsub(pron, '[' .. valid_symbols .. ']', '')
if result ~= '' then
mw.log(pron,result)
table.insert(categories, "[[Category:IPA pronunciations with invalid IPA characters|" .. result .. "]]")
end
-- Reference inside IPA template usage
-- FIXME: Doesn't work; you can't put HTML in module output.
--if mw.ustring.find(pron, '</ref>') then
-- table.insert(categories, "[[Category:IPA pronunciations with reference]]")
--end
if repr == "phonemic" then
if lang and m_data.phonemes[lang:getCode()] then
local valid_phonemes = m_data.phonemes[lang:getCode()]
local rest = pron
local phonemes = {}
while mw.ustring.len(rest) > 0 do
local longestmatch = ""
if mw.ustring.sub(rest, 1, 1) == "(" or mw.ustring.sub(rest, 1, 1) == ")" then
longestmatch = mw.ustring.sub(rest, 1, 1)
else
for _, phoneme in ipairs(valid_phonemes) do
if mw.ustring.len(phoneme) > mw.ustring.len(longestmatch) and mw.ustring.sub(rest, 1, mw.ustring.len(phoneme)) == phoneme then
longestmatch = phoneme
end
end
end
if mw.ustring.len(longestmatch) > 0 then
table.insert(phonemes, longestmatch)
rest = mw.ustring.sub(rest, mw.ustring.len(longestmatch) + 1)
else
local phoneme = mw.ustring.sub(rest, 1, 1)
table.insert(phonemes, "<span style=\"color: red\">" .. phoneme .. "</span>")
rest = mw.ustring.sub(rest, 2)
table.insert(categories, "[[Category:IPA pronunciations with invalid phonemes/" .. lang:getCode() .. "]]")
require("Module:debug").track("IPA/invalid phonemes/" .. phoneme)
end
end
pron = table.concat(phonemes)
end
pron = "/" .. pron .. "/"
elseif repr == "phonetic" then
pron = "[" .. pron .. "]"
end
return '<span class="IPA" lang="">' .. pron .. '</span>' .. table.concat(categories)
end
-- IPA <-> XSAMPA lookup tables
local i2x_lookup, x2i_lookup = {}, {}
function Populate_IPA_XSAMPA_LookupTables()
if #i2x_lookup > 0 or #x2i_lookup > 0 then return end
local m = mw.loadData('Module:IPA/data/symbols') --[[Module:IPA/data/symbols]]
for ipa_sym, data in pairs(m.symbols[1]) do
if type(data.XSAMPA) == "table" then
i2x_lookup[ipa_sym] = data.XSAMPA[1]
for _, xsampa_sym in ipairs(data.XSAMPA) do
x2i_lookup[xsampa_sym] = ipa_sym
end
else
i2x_lookup[ipa_sym] = data.XSAMPA
x2i_lookup[data.XSAMPA] = ipa_sym
end
end
--exception cases where two IPA characters map to one XSAMPA character
x2i_lookup["_T"]="˥"
x2i_lookup["_H"]="˦"
x2i_lookup["_M"]="˧"
x2i_lookup["_L"]="˨"
x2i_lookup["_B"]="˩"
end
function export.IPA_to_XSAMPA(text)
Populate_IPA_XSAMPA_LookupTables()
local escape = false
if type(text) == 'table' then -- a frame, extract args
text = text.args[1]
text = text:gsub('{{=}}','='):gsub('{{!}}','|')
text = mw.text.decode(text) -- XXX
escape = true
end
text = mw.ustring.gsub(text, 'ːː', ':') -- this basically sums up m_data.symbols[2].XSAMPA
text = mw.ustring.gsub(text, '.', i2x_lookup)
if escape then
text = mw.text.nowiki(text)
end
return text
end
function export.XSAMPA_to_IPA(text)
Populate_IPA_XSAMPA_LookupTables()
local escape = false
if type(text) == 'table' then -- a frame, extract args
text = text.args[1]
text = mw.text.decode(text) -- XXX
escape = true
end
-- XXX: may not be the most efficient, but at least correct.
local output = {}
while #text > 0 do
local a1, a2, a3, a4 = mw.ustring.sub(text, 1, 1), mw.ustring.sub(text, 1, 2), mw.ustring.sub(text, 1, 3), mw.ustring.sub(text, 1, 4)
if x2i_lookup[a4] then
table.insert(output, x2i_lookup[a4])
text = mw.ustring.sub(text, 5)
elseif x2i_lookup[a3] then
table.insert(output, x2i_lookup[a3])
text = mw.ustring.sub(text, 4)
elseif x2i_lookup[a2] then
table.insert(output, x2i_lookup[a2])
text = mw.ustring.sub(text, 3)
elseif x2i_lookup[a1] then
table.insert(output, x2i_lookup[a1])
text = mw.ustring.sub(text, 2)
else -- no match
table.insert(output, a1)
text = mw.ustring.sub(text, 2)
end
end
output = table.concat(output)
if escape then
-- output = mw.text.nowiki(output)
end
return output
end
return export