123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325 |
- local MAX_SENTENCE_LENGTH = 30
- local START = 1
- local END = 2
- local node_prototype = {}
- local function link(node, to_link)
- node.link_count = node.link_count + 1
- node.links[to_link] = (node.links[to_link] or 0) + 1
- end
- local function MarkovNode()
- local prototype = {}
- prototype.links = {}
- prototype.linked_by = {}
- prototype.link_count = 0
-
- for k, v in pairs(node_prototype)
- do
- prototype[k] = v
- end
- return prototype
- end
- local graph_prototype = {}
- function graph_prototype:link(word1, word2)
- link(self.nodes[word1], word2)
- local reverse = self.nodes[word2].linked_by
- for _, name in ipairs(reverse)
- do
- if name == word1
- then
- return
- end
- end
- table.insert(reverse, word1)
- end
- local function cutup(str)
- local pieces = {}
- pieces[0] = START
- local i = 1
- for k in string.gmatch(str, "([^%s]+)")
- do
- pieces[i] = k
- i = i + 1
- end
- pieces[i] = END
-
-
- if pieces[1] == END
- then
- return
- else
- return pieces
- end
- end
- function graph_prototype:learn_line(line)
- local input = cutup(line)
- if not input
- then
- return
- end
- for i, v in ipairs(input)
- do
- self:link(input[i - 1], v)
- end
- end
- function graph_prototype:learn_from_file(filename)
- for line in io.lines(filename)
- do
- self:learn_line(line)
- end
- end
- function graph_prototype:randomwalk()
- local path = {}
- local current = START
- local sentence_length = 0
- while current ~= END and sentence_length < MAX_SENTENCE_LENGTH
- do
- sentence_length = sentence_length + 1
- local node = self.nodes[current]
- local rand = math.random(node.link_count + 1)
-
- for k, v in pairs(node.links)
- do
- rand = rand - v
- if rand <= 0
- then
- current = k
- table.insert(path, k)
- break
- end
- end
- end
- path[#path] = nil
- return path
- end
- function graph_prototype:get_sentence()
- local path = self:randomwalk()
- return table.concat(path, " ")
- end
- function graph_prototype:count_known_words()
- local count = 0
- for name, _ in pairs(self.nodes)
- do
- if not(name == START or name == END)
- then
- count = count + 1
- end
- end
- return count
- end
- local function contains(table, value)
- for k, v in pairs(table)
- do
- if v == value
- then
- return true
- end
- end
- return false
- end
- local function get_dead_nodes(graph)
-
- local sdistances = {}
- sdistances[START] = 0
- local to_visit = {START}
- while #to_visit > 0
- do
- local current = table.remove(to_visit)
- local current_distance = sdistances[current] + 1
- for node, _ in pairs(graph.nodes[current].links)
- do
- if current_distance < (sdistances[node] or math.huge)
- then
- sdistances[node] = current_distance
- if not contains(to_visit, node)
- then
- table.insert(to_visit, node)
- end
- end
- end
- end
-
- local edistances = {}
- edistances[END] = 0
- table.insert(to_visit, END)
- while #to_visit > 0
- do
- local current = table.remove(to_visit)
- local current_distance = edistances[current] + 1
- for _, node in pairs(graph.nodes[current].linked_by)
- do
- if current_distance < (edistances[node] or math.huge)
- then
- edistances[node] = current_distance
- if not contains(to_visit, node)
- then
- table.insert(to_visit, node)
- end
- end
- end
- end
-
-
- local dead = {}
- for name, node in pairs(graph.nodes)
- do
- if not (edistances[name] and sdistances[name])
- then
-
- table.insert(dead, name)
- end
- end
- return dead
- end
- local function remove(graph, to_remove)
-
- if to_remove == START or to_remove == END
- then
- return
- end
-
- for i, name in ipairs(graph.nodes[to_remove].linked_by)
- do
- local node = graph.nodes[name]
- node.link_count = node.link_count - node.links[to_remove]
- node.links[to_remove] = nil
- end
-
- for name, _ in pairs(graph.nodes[to_remove].links)
- do
- local node = graph.nodes[name]
- for i, v in ipairs(node.linked_by)
- do
- if v == to_remove
- then
- table.remove(node.linked_by, i)
- break
- end
- end
- end
-
- graph.nodes[to_remove] = nil
- end
- function graph_prototype:remove_node(to_remove)
- remove(self, to_remove)
- local dead = get_dead_nodes(self)
- for i, name in ipairs(dead)
- do
- remove(self, name)
- end
- end
- local function get_least_used(nodes)
- local least_uses = math.huge
- local least_used = nil
-
- for name, node in pairs(nodes)
- do
- if node.link_count < least_uses and
- not(name == END or name == START)
- then
- least_uses = node.link_count
- least_used = name
- end
- end
- return least_used
- end
- function graph_prototype:cull(to)
- local count = self:count_known_words()
- while count > to
- do
- local least_used = get_least_used(self.nodes)
- if least_used
- then
- self:remove_node(least_used)
- end
- count = self:count_known_words()
- end
- end
- local nodes_meta = {}
- function nodes_meta.__index(self, key)
- self[key] = MarkovNode()
- return self[key]
- end
- local function MarkovGraph()
- local prototype = {}
- prototype.nodes = {}
- setmetatable(prototype.nodes, nodes_meta)
-
- for k, v in pairs(graph_prototype)
- do
- prototype[k] = v
- end
-
- return prototype
- end
- return MarkovGraph
|