Purpose: make up fake words using a Markov chain.
Usage: markov_wordgen depth minlen [prefixes ...]
Reads a (newline-separated) list of "real" words to train a character-by-character Markov chain, then generates a word extending from the prefix given as each argument after the first two. depth determines how many characters the Markov chain uses to infer the next character. minlen determines the minimum length of the words produced.
Changelog:
2021-172 and before: initial development
2023-235 reformat/redocument a tad
Source code (perhaps slightly corrupted) is as follows.
-- trains markov chain `c` (in-place)
-- on word `w` with depth `d`
local function trainword(c, w, d)
for aci = 2, #w + 1 do
local minss = aci - d
if minss < 1 then
minss = 1
end
for ss = minss, aci do
local pre = w:sub(ss, aci - 1)
local suf = w:sub(aci, aci)
c[pre] = c[pre] or {}
c[pre][suf] = (c[pre][suf] or 0) + 1
end
end
end
-- `t` should be a table, with values being numbers
-- to indicate choice-weights
-- returns a randomly-selected key from `t`
local function weightsel(t)
local tw = 0
for _k, v in pairs(t) do
tw = tw + v
end
local rn = math.random() * tw
for k, v in pairs(t) do
rn = rn - v
if rn < 0 then
return k
end
end
end
-- pick a character to append to `w` (returning it, not appending it)
-- from the markov chain `c` with depth `d`
-- may return the empty string, meaning that `w` should end
local function genchar(c, d, w)
for gpi = d, 1, -1 do
local pre = w:sub(#w + 1 - gpi, -1)
if c[pre] then
return weightsel(c[pre])
end
end
end
-- generate a word from markov chain `c` with depth `d`
-- and starting text `w` (optional)
local function genword(c, d, w)
local gw = w or ""
local ac = true
while ac ~= "" do
ac = genchar(c, d, gw)
gw = gw .. ac
end
return gw
end
-- handle first arguments
if not arg[1] or not arg[2] then
io.stderr:write(
arg[0] .. ": expected at least 2 arguments
"
)
return
end
local depth = tonumber(arg[1], 10) or 3
local minlen = tonumber(arg[2], 10) or 8
-- read training data
local wordlist = {}
for l in io.lines() do
wordlist[#wordlist + 1] = l
end
-- prepare model
local chain = {}
for _ind, word in ipairs(wordlist) do
trainword(chain, word, depth)
end
-- generate words
local argind = 3
while arg[argind] do
local outword = ""
while #outword < minlen do
outword = genword(chain, depth, arg[argind])
end
io.stdout:write(outword .. "
")
argind = argind + 1
end