summaryrefslogtreecommitdiff
path: root/language.lua
diff options
context:
space:
mode:
authorsanine <sanine.not@pm.me>2022-12-19 02:38:48 -0600
committersanine <sanine.not@pm.me>2022-12-19 02:38:48 -0600
commitfab85f8d639c8b7792775d02a2d3fe22fe96befd (patch)
treee04fb110a297be8d29b6f4f7562692096f80eb5f /language.lua
initial commit
Diffstat (limited to 'language.lua')
-rw-r--r--language.lua171
1 files changed, 171 insertions, 0 deletions
diff --git a/language.lua b/language.lua
new file mode 100644
index 0000000..cf6725f
--- /dev/null
+++ b/language.lua
@@ -0,0 +1,171 @@
+local language = {}
+setmetatable(language, {__index=_G})
+setfenv(1, language)
+
+
+-- local utility functions
+
+-- convert string to array of characters
+local function string_to_array(str)
+ local arr = {}
+ for i=1,#str do
+ table.insert(arr, string.sub(str, i, i))
+ end
+ return arr
+end
+
+
+-- iterate over lines in string
+local function lines(str)
+ local clean = function(s) return string.gsub(s, "\n", "") end
+ return coroutine.wrap(function()
+ for line in string.gmatch(str, "[^\n]*\n") do
+ coroutine.yield(clean(line))
+ end
+ local lastline = string.match(str, "\n?[^\n]+$")
+ if lastline then
+ coroutine.yield(clean(lastline))
+ end
+ end)
+end
+
+
+-- parse phonemes from string
+local function parse_phonemes(str)
+ local phonemes = {}
+ for line in lines(str) do
+ if string.match(line, "^%s*#") then
+ -- this is a comment, ignore
+ elseif string.match(line, "^%s*$") then
+ -- ignore blank lines
+ else
+ class, phones = string.match(line, "(.)=(.*)")
+ -- remove whitespace from phones
+ phones = string.gsub(phones, "%s", "")
+ phonemes[class] = string_to_array(phones)
+ end
+ end
+ return phonemes
+end
+
+
+-- parse syllables from string
+local function parse_syllables(str)
+ local syllables = {}
+ for line in lines(str) do
+ line = string.gsub(line, "%s", "")
+ table.insert(syllables, string_to_array(line))
+ end
+ return syllables
+end
+
+
+-- parse orthography patterns from string
+local function parse_orthography(str, phonemes)
+ local rules = {}
+ for line in lines(str) do
+ if string.match(str, '^%s*$') then
+ -- ignore blank lines
+ else
+ -- trim whitespace
+ line = string.gsub(line, "%s*$", "")
+ local pattern, replace = string.match(line, "(.*)=(.*)")
+ pattern = string.gsub(pattern, "%%(.)", function(class)
+ if phonemes[class] == nil then
+ return '%' .. class
+ end
+ return '[' .. table.concat(phonemes[class], '') .. ']'
+ end)
+ table.insert(rules, { pattern=pattern, replace=replace })
+ end
+ end
+ return rules
+end
+
+
+-- randomly choose an element from an array with a given distribution
+local function random_choice(tbl, distribution)
+ local x = distribution(math.random())
+ local bin = math.floor(x * #tbl) + 1
+ return tbl[bin]
+end
+
+
+-- probability distributions
+
+-- uniform (flat) distribution
+function uniform(x)
+ return x
+end
+
+
+function quadratic(x)
+ return x*x
+end
+
+
+
+
+
+
+Language = {}
+
+function Language.new(_, phonemes, syllables, orthography, len_min, len_max, p_dist, s_dist, l_dist)
+ print(len_min, len_max)
+ local self = {}
+ setmetatable(self, {__index=Language})
+ self.phonemes = parse_phonemes(phonemes)
+ self.p_dist = p_dist or uniform
+ self.syllables = parse_syllables(syllables)
+ self.s_dist = s_dist or uniform
+ self.orthography = parse_orthography(orthography, self.phonemes)
+ self.length = {}
+ for len=len_min,len_max do
+ table.insert(self.length, len)
+ end
+ self.l_dist = l_dist or uniform
+ return self
+end
+setmetatable(Language, {__call=Language.new})
+
+
+function Language.random_phone(self, class)
+ return random_choice(self.phonemes[class], self.p_dist)
+end
+
+
+function Language.syllable(self)
+ -- pick a random syllable type
+ local syl = random_choice(self.syllables, self.s_dist)
+ local result = ''
+ for _, ch in ipairs(syl) do
+ if self.phonemes[ch] then
+ result = result .. self:random_phone(ch)
+ else
+ result = result .. ch
+ end
+ end
+ return result
+end
+
+
+function Language.word(self)
+ local len = random_choice(self.length, self.l_dist)
+ local word = ''
+ for i=1,len do
+ word = word .. self:syllable()
+ end
+ return word
+end
+
+
+function Language.romanize(self, text)
+ local result = text
+ for _, rule in ipairs(self.orthography) do
+ result = string.gsub(result, rule.pattern, rule.replace)
+ end
+ return result
+end
+
+
+return language