summaryrefslogtreecommitdiff
path: root/language.lua
blob: b6a2f77b6b6ed2348a8382ae95e593efd27863ac (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
local language = {}
setmetatable(language, {__index=_G})
setfenv(1, language)


-- local utility functions

-- convert string to array of characters
local function string_to_array(str)
	local arr = {}
	for i=1,#str do
		table.insert(arr, string.sub(str, i, i))
	end
	return arr
end


-- iterate over lines in string
local function lines(str)
	local clean = function(s) return string.gsub(s, "\n", "") end
	return coroutine.wrap(function()
		for line in string.gmatch(str, "[^\n]*\n") do
			coroutine.yield(clean(line))
		end
		local lastline = string.match(str, "\n?[^\n]+$")
		if lastline then
			coroutine.yield(clean(lastline))
		end
	end)
end


-- parse phonemes from string
local function parse_phonemes(str)
	local phonemes = {}
	for line in lines(str) do
		if string.match(line, "^%s*#") then
			-- this is a comment, ignore
		elseif string.match(line, "^%s*$") then
			-- ignore blank lines
		else
			class, phones = string.match(line, "(.)=(.*)")
			-- remove whitespace from phones
			phones = string.gsub(phones, "%s", "")
			phonemes[class] = string_to_array(phones)
		end
	end
	return phonemes
end


-- parse syllables from string
local function parse_syllables(str)
	local syllables = {}
	for line in lines(str) do
		if string.match(line, "^%s*$") then
			-- ignore blank lines
		else
			line = string.gsub(line, "%s", "")
			table.insert(syllables, string_to_array(line))
		end
	end
	return syllables
end


-- parse orthography patterns from string
local function parse_orthography(str, phonemes)
	local rules = {}
	for line in lines(str) do
		if string.match(str, '^%s*$') then
			-- ignore blank lines
		else
			-- trim whitespace
			line = string.gsub(line, "%s*$", "")
			local pattern, replace = string.match(line, "(.*)=(.*)")
			if not pattern then
				-- ???
			else
				pattern = string.gsub(pattern, "%%(.)", function(class)
					if phonemes[class] == nil then
						return '%' .. class
					end
					return '[' .. table.concat(phonemes[class], '') .. ']'
				end)
				table.insert(rules, { pattern=pattern, replace=replace })
			end
		end
	end
	return rules
end


-- randomly choose an element from an array with a given distribution
local function random_choice(tbl, distribution)
	local x = distribution(math.random())
	local bin = math.floor(x * #tbl) + 1
	return tbl[bin]
end


-- probability distributions

-- uniform (flat) distribution
function uniform(x)
	return x
end


function quadratic(x)
	return x*x
end






Language = {}

function Language.new(_, phonemes, syllables, orthography, len_min, len_max, p_dist, s_dist, l_dist)
	print(len_min, len_max)
	local self = {}
	setmetatable(self, {__index=Language})
	self.phonemes = parse_phonemes(phonemes)
	self.p_dist = p_dist or uniform
	self.syllables = parse_syllables(syllables)
	self.s_dist = s_dist or uniform
	self.orthography = parse_orthography(orthography, self.phonemes)
	self.length = {}
	for len=len_min,len_max do
		table.insert(self.length, len)
	end
	self.l_dist = l_dist or uniform
	return self
end
setmetatable(Language, {__call=Language.new})


function Language.random_phone(self, class)
	return random_choice(self.phonemes[class], self.p_dist)
end


function Language.syllable(self)
	-- pick a random syllable type
	local syl = random_choice(self.syllables, self.s_dist)
	local result = ''
	for _, ch in ipairs(syl) do
		if self.phonemes[ch] then
			result = result .. self:random_phone(ch)
		else
			result = result .. ch
		end
	end
	return result
end


function Language.word(self)
	local len = random_choice(self.length, self.l_dist)
	local word = ''
	for i=1,len do
		word = word .. self:syllable()
	end
	return word
end


function Language.romanize(self, text)
	local result = text
	for _, rule in ipairs(self.orthography) do
		result = string.gsub(result, rule.pattern, rule.replace)
	end
	return result 
end


return language