1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
|
local language = {}
setmetatable(language, {__index=_G})
setfenv(1, language)
-- local utility functions
-- convert string to array of characters
local function string_to_array(str)
local arr = {}
for i=1,#str do
table.insert(arr, string.sub(str, i, i))
end
return arr
end
-- iterate over lines in string
local function lines(str)
local clean = function(s) return string.gsub(s, "\n", "") end
return coroutine.wrap(function()
for line in string.gmatch(str, "[^\n]*\n") do
coroutine.yield(clean(line))
end
local lastline = string.match(str, "\n?[^\n]+$")
if lastline then
coroutine.yield(clean(lastline))
end
end)
end
-- parse phonemes from string
local function parse_phonemes(str)
local phonemes = {}
for line in lines(str) do
if string.match(line, "^%s*#") then
-- this is a comment, ignore
elseif string.match(line, "^%s*$") then
-- ignore blank lines
else
class, phones = string.match(line, "(.)=(.*)")
-- remove whitespace from phones
phones = string.gsub(phones, "%s", "")
phonemes[class] = string_to_array(phones)
end
end
return phonemes
end
-- parse syllables from string
local function parse_syllables(str)
local syllables = {}
for line in lines(str) do
if string.match(line, "^%s*$") then
-- ignore blank lines
else
line = string.gsub(line, "%s", "")
table.insert(syllables, string_to_array(line))
end
end
return syllables
end
-- parse orthography patterns from string
local function parse_orthography(str, phonemes)
local rules = {}
for line in lines(str) do
if string.match(str, '^%s*$') then
-- ignore blank lines
else
-- trim whitespace
line = string.gsub(line, "%s*$", "")
local pattern, replace = string.match(line, "(.*)=(.*)")
if not pattern then
-- ???
else
pattern = string.gsub(pattern, "%%(.)", function(class)
if phonemes[class] == nil then
return '%' .. class
end
return '[' .. table.concat(phonemes[class], '') .. ']'
end)
table.insert(rules, { pattern=pattern, replace=replace })
end
end
end
return rules
end
-- randomly choose an element from an array with a given distribution
local function random_choice(tbl, distribution)
local x = distribution(math.random())
local bin = math.floor(x * #tbl) + 1
return tbl[bin]
end
-- probability distributions
-- uniform (flat) distribution
function uniform(x)
return x
end
function quadratic(x)
return x*x
end
Language = {}
function Language.new(_, phonemes, syllables, orthography, len_min, len_max, p_dist, s_dist, l_dist)
print(len_min, len_max)
local self = {}
setmetatable(self, {__index=Language})
self.phonemes = parse_phonemes(phonemes)
self.p_dist = p_dist or uniform
self.syllables = parse_syllables(syllables)
self.s_dist = s_dist or uniform
self.orthography = parse_orthography(orthography, self.phonemes)
self.length = {}
for len=len_min,len_max do
table.insert(self.length, len)
end
self.l_dist = l_dist or uniform
return self
end
setmetatable(Language, {__call=Language.new})
function Language.random_phone(self, class)
return random_choice(self.phonemes[class], self.p_dist)
end
function Language.syllable(self)
-- pick a random syllable type
local syl = random_choice(self.syllables, self.s_dist)
local result = ''
for _, ch in ipairs(syl) do
if self.phonemes[ch] then
result = result .. self:random_phone(ch)
else
result = result .. ch
end
end
return result
end
function Language.word(self)
local len = random_choice(self.length, self.l_dist)
local word = ''
for i=1,len do
word = word .. self:syllable()
end
return word
end
function Language.romanize(self, text)
local result = text
for _, rule in ipairs(self.orthography) do
result = string.gsub(result, rule.pattern, rule.replace)
end
return result
end
return language
|