markov_wordgen

Purpose: make up fake words using a Markov chain.
Usage: markov_wordgen depth minlen [prefixes ...]

Reads a (newline-separated) list of "real" words to train a character-by-character Markov chain, then generates a word extending from the prefix given as each argument after the first two. depth determines how many characters the Markov chain uses to infer the next character. minlen determines the minimum length of the words produced.

Download markov_wordgen

Changelog:
2021-172 and before: initial development
2023-235 reformat/redocument a tad

Source code (perhaps slightly corrupted) is as follows.

-- trains markov chain `c` (in-place)
-- on word `w` with depth `d`
local function trainword(c, w, d)
    for aci = 2, #w + 1 do
        local minss = aci - d
        if minss < 1 then
            minss = 1
        end
        for ss = minss, aci do
            local pre = w:sub(ss, aci - 1)
            local suf = w:sub(aci, aci)
            c[pre] = c[pre] or {}
            c[pre][suf] = (c[pre][suf] or 0) + 1
        end
    end
end

-- `t` should be a table, with values being numbers
-- to indicate choice-weights
-- returns a randomly-selected key from `t`
local function weightsel(t)
    local tw = 0
    for _k, v in pairs(t) do
        tw = tw + v
    end
    local rn = math.random() * tw
    for k, v in pairs(t) do
        rn = rn - v
        if rn < 0 then
            return k
        end
    end
end

-- pick a character to append to `w` (returning it, not appending it)
-- from the markov chain `c` with depth `d`
-- may return the empty string, meaning that `w` should end
local function genchar(c, d, w)
    for gpi = d, 1, -1 do
        local pre = w:sub(#w + 1 - gpi, -1)
        if c[pre] then
            return weightsel(c[pre])
        end
    end
end

-- generate a word from markov chain `c` with depth `d`
-- and starting text `w` (optional)
local function genword(c, d, w)
    local gw = w or ""
    local ac = true
    while ac ~= "" do
        ac = genchar(c, d, gw)
        gw = gw .. ac
    end
    return gw
end

-- handle first arguments
if not arg[1] or not arg[2] then
    io.stderr:write(
        arg[0] .. ": expected at least 2 arguments
"
    )
    return
end
local depth = tonumber(arg[1], 10) or 3
local minlen = tonumber(arg[2], 10) or 8

-- read training data
local wordlist = {}
for l in io.lines() do
    wordlist[#wordlist + 1] = l
end

-- prepare model
local chain = {}
for _ind, word in ipairs(wordlist) do
    trainword(chain, word, depth)
end

-- generate words
local argind = 3
while arg[argind] do
    local outword = ""
    while #outword < minlen do
        outword = genword(chain, depth, arg[argind])
    end
    io.stdout:write(outword .. "
")
    argind = argind + 1
end

All scripts | dkl9 home