From fab85f8d639c8b7792775d02a2d3fe22fe96befd Mon Sep 17 00:00:00 2001 From: sanine Date: Mon, 19 Dec 2022 02:38:48 -0600 Subject: initial commit --- language.lua | 171 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 language.lua (limited to 'language.lua') diff --git a/language.lua b/language.lua new file mode 100644 index 0000000..cf6725f --- /dev/null +++ b/language.lua @@ -0,0 +1,171 @@ +local language = {} +setmetatable(language, {__index=_G}) +setfenv(1, language) + + +-- local utility functions + +-- convert string to array of characters +local function string_to_array(str) + local arr = {} + for i=1,#str do + table.insert(arr, string.sub(str, i, i)) + end + return arr +end + + +-- iterate over lines in string +local function lines(str) + local clean = function(s) return string.gsub(s, "\n", "") end + return coroutine.wrap(function() + for line in string.gmatch(str, "[^\n]*\n") do + coroutine.yield(clean(line)) + end + local lastline = string.match(str, "\n?[^\n]+$") + if lastline then + coroutine.yield(clean(lastline)) + end + end) +end + + +-- parse phonemes from string +local function parse_phonemes(str) + local phonemes = {} + for line in lines(str) do + if string.match(line, "^%s*#") then + -- this is a comment, ignore + elseif string.match(line, "^%s*$") then + -- ignore blank lines + else + class, phones = string.match(line, "(.)=(.*)") + -- remove whitespace from phones + phones = string.gsub(phones, "%s", "") + phonemes[class] = string_to_array(phones) + end + end + return phonemes +end + + +-- parse syllables from string +local function parse_syllables(str) + local syllables = {} + for line in lines(str) do + line = string.gsub(line, "%s", "") + table.insert(syllables, string_to_array(line)) + end + return syllables +end + + +-- parse orthography patterns from string +local function parse_orthography(str, phonemes) + local rules = {} + for line in lines(str) do + if string.match(str, '^%s*$') then + -- ignore blank lines + else + -- trim whitespace + line = string.gsub(line, "%s*$", "") + local pattern, replace = string.match(line, "(.*)=(.*)") + pattern = string.gsub(pattern, "%%(.)", function(class) + if phonemes[class] == nil then + return '%' .. class + end + return '[' .. table.concat(phonemes[class], '') .. ']' + end) + table.insert(rules, { pattern=pattern, replace=replace }) + end + end + return rules +end + + +-- randomly choose an element from an array with a given distribution +local function random_choice(tbl, distribution) + local x = distribution(math.random()) + local bin = math.floor(x * #tbl) + 1 + return tbl[bin] +end + + +-- probability distributions + +-- uniform (flat) distribution +function uniform(x) + return x +end + + +function quadratic(x) + return x*x +end + + + + + + +Language = {} + +function Language.new(_, phonemes, syllables, orthography, len_min, len_max, p_dist, s_dist, l_dist) + print(len_min, len_max) + local self = {} + setmetatable(self, {__index=Language}) + self.phonemes = parse_phonemes(phonemes) + self.p_dist = p_dist or uniform + self.syllables = parse_syllables(syllables) + self.s_dist = s_dist or uniform + self.orthography = parse_orthography(orthography, self.phonemes) + self.length = {} + for len=len_min,len_max do + table.insert(self.length, len) + end + self.l_dist = l_dist or uniform + return self +end +setmetatable(Language, {__call=Language.new}) + + +function Language.random_phone(self, class) + return random_choice(self.phonemes[class], self.p_dist) +end + + +function Language.syllable(self) + -- pick a random syllable type + local syl = random_choice(self.syllables, self.s_dist) + local result = '' + for _, ch in ipairs(syl) do + if self.phonemes[ch] then + result = result .. self:random_phone(ch) + else + result = result .. ch + end + end + return result +end + + +function Language.word(self) + local len = random_choice(self.length, self.l_dist) + local word = '' + for i=1,len do + word = word .. self:syllable() + end + return word +end + + +function Language.romanize(self, text) + local result = text + for _, rule in ipairs(self.orthography) do + result = string.gsub(result, rule.pattern, rule.replace) + end + return result +end + + +return language -- cgit v1.2.1