Better indexing of help suggestions with lunr, better display

This commit is contained in:
Griatch 2020-04-11 09:42:22 +02:00
parent a8a5453a97
commit 2a18cb0a2d
8 changed files with 287 additions and 72 deletions

View File

@ -3,7 +3,7 @@
BLACK_FORMAT_CONFIGS = --target-version py37 --line-length 100 BLACK_FORMAT_CONFIGS = --target-version py37 --line-length 100
TEST_GAME_DIR = .test_game_dir TEST_GAME_DIR = .test_game_dir
tests?=evennia TESTS ?= evennia
default: default:
@echo " Usage: " @echo " Usage: "
@ -29,10 +29,10 @@ test:
evennia --init $(TEST_GAME_DIR);\ evennia --init $(TEST_GAME_DIR);\
cd $(TEST_GAME_DIR);\ cd $(TEST_GAME_DIR);\
evennia migrate;\ evennia migrate;\
evennia test --keepdb $(tests);\ evennia test --keepdb $(TESTS);\
testp: testp:
evennia --init $(TEST_GAME_DIR);\ evennia --init $(TEST_GAME_DIR);\
cd $(TEST_GAME_DIR);\ cd $(TEST_GAME_DIR);\
evennia migrate;\ evennia migrate;\
evennia test --keepdb --parallel 4 $(tests);\ evennia test --keepdb --parallel 4 $(TESTS);\

View File

@ -414,7 +414,7 @@ def _init():
GLOBAL_SCRIPTS.start() GLOBAL_SCRIPTS.start()
def set_trace(term_size=(140, 40), debugger="auto"): def set_trace(term_size=(140, 80), debugger="auto"):
""" """
Helper function for running a debugger inside the Evennia event loop. Helper function for running a debugger inside the Evennia event loop.

View File

@ -84,6 +84,15 @@ def _init_command(cls, **kwargs):
break break
cls.help_category = cls.help_category.lower() cls.help_category = cls.help_category.lower()
# pre-prepare a help index entry for quicker lookup
cls.search_index_entry = {
"key": cls.key,
"aliases": " ".join(cls.aliases),
"category": cls.help_category,
"text": cls.__doc__,
"tags": ""
}
class CommandMeta(type): class CommandMeta(type):
""" """

View File

@ -6,6 +6,8 @@ set. The normal, database-tied help system is used for collaborative
creation of other help topics such as RP help or game-world aides. creation of other help topics such as RP help or game-world aides.
""" """
from lunr import lunr
from lunr.exceptions import QueryParseError
from django.conf import settings from django.conf import settings
from collections import defaultdict from collections import defaultdict
from evennia.utils.utils import fill, dedent from evennia.utils.utils import fill, dedent
@ -13,7 +15,7 @@ from evennia.commands.command import Command
from evennia.help.models import HelpEntry from evennia.help.models import HelpEntry
from evennia.utils import create, evmore from evennia.utils import create, evmore
from evennia.utils.eveditor import EvEditor from evennia.utils.eveditor import EvEditor
from evennia.utils.utils import string_suggestions, class_from_module from evennia.utils.utils import string_suggestions, class_from_module, inherits_from, format_grid
COMMAND_DEFAULT_CLASS = class_from_module(settings.COMMAND_DEFAULT_CLASS) COMMAND_DEFAULT_CLASS = class_from_module(settings.COMMAND_DEFAULT_CLASS)
HELP_MORE = settings.HELP_MORE HELP_MORE = settings.HELP_MORE
@ -25,6 +27,71 @@ _DEFAULT_WIDTH = settings.CLIENT_DEFAULT_WIDTH
_SEP = "|C" + "-" * _DEFAULT_WIDTH + "|n" _SEP = "|C" + "-" * _DEFAULT_WIDTH + "|n"
class HelpCategory:
def __init__(self, key):
self.key = key
@property
def search_index_entry(self):
return {
"key": str(self),
"aliases": "",
"category": self.key,
"tags": "",
"text": ""
}
def __str__(self):
return f"Category: {self.key}"
def __eq__(self, other):
return str(self).lower() == str(other).lower()
def __hash__(self):
return id(self)
def help_search_with_index(query, candidate_entries, suggestion_maxnum=5):
indx = [cnd.search_index_entry for cnd in candidate_entries]
mapping = {indx[ix]["key"]: cand for ix, cand in enumerate(candidate_entries)}
search_index = lunr(
ref="key",
fields=[
{
"field_name": "key",
"boost": 10,
},
{
"field_name": "aliases",
"boost": 9,
},
{
"field_name": "category",
"boost": 8,
},
{
"field_name": "tags",
"boost": 5
},
{
"field_name": "text",
"boost": 1,
},
],
documents=indx
)
try:
matches = search_index.search(query)[:suggestion_maxnum]
except QueryParseError:
# this is a user-input problem
matches = []
# matches (objs), suggestions (strs)
return ([mapping[match["ref"]] for match in matches],
[str(match["ref"]) # + f" (score {match['score']})") # good debug
for match in matches])
class CmdHelp(Command): class CmdHelp(Command):
""" """
View help or a list of topics View help or a list of topics
@ -119,12 +186,20 @@ class CmdHelp(Command):
respectively. You can override this method to return a respectively. You can override this method to return a
custom display of the list of commands and topics. custom display of the list of commands and topics.
""" """
output = []
for category in sorted(set(list(hdict_cmds.keys()) + list(hdict_db.keys()))):
output.append(f"|w{category.title()}|G")
entries = sorted(set(hdict_cmds.get(category, []) + hdict_db.get(category, [])))
output.append(format_grid(entries, width=78)) # self.client_width()))
return "\n".join(output)
string = "" string = ""
if hdict_cmds and any(hdict_cmds.values()): if hdict_cmds and any(hdict_cmds.values()):
string += "\n" + _SEP + "\n |CCommand help entries|n\n" + _SEP string += "\n" + _SEP + "\n |CCommand help entries|n\n" + _SEP
for category in sorted(hdict_cmds.keys()): for category in sorted(hdict_cmds.keys()):
string += "\n |w%s|n:\n" % (str(category).title()) string += "\n |w%s|n:\n" % (str(category).title())
string += "|G" + fill("|C, |G".join(sorted(hdict_cmds[category]))) + "|n" string += "|G" + fill("|C, |G".join(sorted(hdict_cmds[category]))) + "|n"
if hdict_db and any(hdict_db.values()): if hdict_db and any(hdict_db.values()):
string += "\n\n" + _SEP + "\n\r |COther help entries|n\n" + _SEP string += "\n\n" + _SEP + "\n\r |COther help entries|n\n" + _SEP
for category in sorted(hdict_db.keys()): for category in sorted(hdict_db.keys()):
@ -134,6 +209,7 @@ class CmdHelp(Command):
+ fill(", ".join(sorted([str(topic) for topic in hdict_db[category]]))) + fill(", ".join(sorted([str(topic) for topic in hdict_db[category]])))
+ "|n" + "|n"
) )
return string return string
def check_show_help(self, cmd, caller): def check_show_help(self, cmd, caller):
@ -208,8 +284,8 @@ class CmdHelp(Command):
] ]
all_categories = list( all_categories = list(
set( set(
[cmd.help_category.lower() for cmd in all_cmds] [HelpCategory(cmd.help_category) for cmd in all_cmds] +
+ [topic.help_category.lower() for topic in all_topics] [HelpCategory(topic.help_category) for topic in all_topics]
) )
) )
@ -228,74 +304,43 @@ class CmdHelp(Command):
self.msg_help(self.format_help_list(hdict_cmd, hdict_topic)) self.msg_help(self.format_help_list(hdict_cmd, hdict_topic))
return return
# Try to access a particular command # Try to access a particular help entry or category
entries = ([cmd for cmd in all_cmds if cmd] +
list(HelpEntry.objects.all()) +
all_categories)
# build vocabulary of suggestions and rate them by string similarity. for match_query in [f"{query}~1", f"{query}*"]:
suggestions = None # We first do an exact word-match followed by a start-by query
if suggestion_maxnum > 0:
vocabulary = (
[cmd.key for cmd in all_cmds if cmd]
+ [topic.key for topic in all_topics]
+ all_categories
)
[vocabulary.extend(cmd.aliases) for cmd in all_cmds]
suggestions = [
sugg
for sugg in string_suggestions(
query, set(vocabulary), cutoff=suggestion_cutoff, maxnum=suggestion_maxnum
)
if sugg != query
]
if not suggestions:
suggestions = [
sugg for sugg in vocabulary if sugg != query and sugg.startswith(query)
]
# try an exact command auto-help match matches, suggestions = help_search_with_index(
match = [cmd for cmd in all_cmds if cmd == query] match_query, entries, suggestion_maxnum=self.suggestion_maxnum)
if not match: if matches:
# try an inexact match with prefixes stripped from query and cmds match = matches[0]
_query = query[1:] if query[0] in CMD_IGNORE_PREFIXES else query if isinstance(match, HelpCategory):
formatted = self.format_help_list(
{match.key: [cmd.key for cmd in all_cmds
if match.key.lower() == cmd.help_category]},
{match.key: [topic.key for topic in all_topics
if match.key.lower() == topic.help_category]}
)
elif inherits_from(match, "evennia.commands.command.Command"):
formatted = self.format_help_entry(
match.key,
match.get_help(caller, cmdset),
aliases=match.aliases,
suggested=suggestions[1:]
)
else:
formatted = self.format_help_entry(
match.key,
match.entrytext,
aliases=match.aliases.all(),
suggested=suggestions[1:]
)
match = [ self.msg_help(formatted)
cmd return
for cmd in all_cmds
for m in cmd._matchset
if m == _query or m[0] in CMD_IGNORE_PREFIXES and m[1:] == _query
]
if len(match) == 1:
formatted = self.format_help_entry(
match[0].key,
match[0].get_help(caller, cmdset),
aliases=match[0].aliases,
suggested=suggestions,
)
self.msg_help(formatted)
return
# try an exact database help entry match
match = list(HelpEntry.objects.find_topicmatch(query, exact=True))
if len(match) == 1:
formatted = self.format_help_entry(
match[0].key,
match[0].entrytext,
aliases=match[0].aliases.all(),
suggested=suggestions,
)
self.msg_help(formatted)
return
# try to see if a category name was entered
if query in all_categories:
self.msg_help(
self.format_help_list(
{query: [cmd.key for cmd in all_cmds if cmd.help_category == query]},
{query: [topic.key for topic in all_topics if topic.help_category == query]},
)
)
return
# no exact matches found. Just give suggestions. # no exact matches found. Just give suggestions.
self.msg( self.msg(

View File

@ -73,7 +73,8 @@ class HelpEntry(SharedMemoryModel):
db_tags = models.ManyToManyField( db_tags = models.ManyToManyField(
Tag, Tag,
blank=True, blank=True,
help_text="tags on this object. Tags are simple string markers to identify, group and alias objects.", help_text="tags on this object. Tags are simple string markers to "
"identify, group and alias objects.",
) )
# (deprecated, only here to allow MUX helpfile load (don't use otherwise)). # (deprecated, only here to allow MUX helpfile load (don't use otherwise)).
# TODO: remove this when not needed anymore. # TODO: remove this when not needed anymore.
@ -123,6 +124,19 @@ class HelpEntry(SharedMemoryModel):
""" """
return self.locks.check(accessing_obj, access_type=access_type, default=default) return self.locks.check(accessing_obj, access_type=access_type, default=default)
@property
def search_index_entry(self):
"""
Property for easily retaining a search index entry for this object.
"""
return {
"key": self.db_key,
"aliases": " ".join(self.aliases.all()),
"category": self.db_help_category,
"text": self.db_entrytext,
"tags": " ".join(str(tag) for tag in self.tags.all())
}
# #
# Web/Django methods # Web/Django methods
# #

View File

@ -6,6 +6,7 @@ TODO: Not nearly all utilities are covered yet.
""" """
import os.path import os.path
import random
import mock import mock
from django.test import TestCase from django.test import TestCase
@ -264,3 +265,61 @@ class LatinifyTest(TestCase):
byte_str = utils.to_bytes(self.example_str) byte_str = utils.to_bytes(self.example_str)
result = utils.latinify(byte_str) result = utils.latinify(byte_str)
self.assertEqual(result, self.expected_output) self.assertEqual(result, self.expected_output)
class TestFormatGrid(TestCase):
maxDiff = None
def setUp(self):
# make the random only semi-random with a fixed seed
random.seed(1)
def tearDown(self):
# restore normal randomness
random.seed(None)
def _generate_elements(self, basewidth, variation, amount):
return [
"X" * max(1, basewidth + int(random.randint(-variation, variation)))
for _ in range(amount)
]
def test_even_grid(self):
"""Grid with small variations"""
elements = self._generate_elements(3, 1, 30)
result = utils.format_grid(elements, width=78)
rows = result.split("\n")
self.assertEqual(len(rows), 3)
self.assertTrue(all(len(row) == 78 for row in rows))
def test_disparate_grid(self):
"""Grid with big variations"""
elements = self._generate_elements(3, 15, 30)
result = utils.format_grid(elements, width=82, sep=" ")
rows = result.split("\n")
self.assertEqual(len(rows), 8)
self.assertTrue(all(len(row) == 82 for row in rows))
def test_huge_grid(self):
"""Grid with very long strings"""
elements = self._generate_elements(70, 20, 30)
result = utils.format_grid(elements, width=78)
rows = result.split("\n")
self.assertEqual(len(rows), 30)
self.assertTrue(all(len(row) == 78 for row in rows))
def test_overlap(self):
elements = ("alias", "batchcode", "batchcommands", "cmdsets",
"copy", "cpattr", "desc", "destroy", "dig",
"examine", "find", "force", "lock")
# from evennia import set_trace;set_trace()
from pudb import debugger
debugger.Debugger().set_trace()
result = utils.format_grid(elements, width=78)
rows = result.split("\n")
self.assertEqual(len(rows), 2)
for row in rows:
print(f"'{row}'")
for element in elements:
self.assertTrue(element in result, f"element {element} is missing.")

View File

@ -1686,6 +1686,93 @@ def format_table(table, extra_space=1):
) )
return ftable return ftable
import functools
def percentile(iterable, percent, key=lambda x:x):
"""
Find the percentile of a list of values.
Args:
iterable (iterable): A list of values. Note N MUST BE already sorted.
percent (float): A value from 0.0 to 1.0.
key (callable, optional). Function to compute value from each element of N.
@return - the percentile of the values
"""
if not iterable:
return None
k = (len(iterable) - 1) * percent
f = math.floor(k)
c = math.ceil(k)
if f == c:
return key(iterable[int(k)])
d0 = key(iterable[int(f)]) * (c - k)
d1 = key(iterable[int(c)]) * (k - f)
return d0 + d1
def format_grid(elements, width=78, sep=" "):
"""
This helper function makes a 'grid' output, where it distributes the given
string-elements as evenly as possible to fill out the given width.
will not work well if the variation of length is very big!
Args:
elements (iterable): A 1D list of string elements to put in the grid.
width (int, optional): The width of the grid area to fill.
sep (str, optional): The extra separator to put between words. If
set to the empty string, words may run into each other
Returns:
gridstr (str): The grid as a finished renderede multi-line string.
"""
nelements = len(elements)
elements = [elements[ie] + sep for ie in range(nelements - 1)] + [elements[-1]]
wls = [len(elem) for elem in elements]
# get the nth percentile as a good representation of average width
averlen = int(percentile(sorted(wls), 0.9)) + 2 # include extra space
aver_per_row = width // averlen + 1
indices = [averlen * ind for ind in range(aver_per_row - 1)]
rows = []
ic = 0
row = ""
for ie, element in enumerate(elements):
wl = wls[ie]
# from evennia import set_trace;set_trace()
if ic >= aver_per_row - 1 or ie >= nelements - 1:
if ic == 0:
row = crop(element)
row += " " * max(0, (width - len(row)))
rows.append(row)
row = ""
ic = 0
elif indices[ic] + wl > width:
row += " " * (width - len(row))
rows.append(row)
row = crop(element, width)
ic = 0
else:
try:
while len(row) > indices[ic] - 1:
ic += 1
except IndexError:
if ic == 0:
row = crop(element, width)
else:
row += " " * max(0, width - len(row))
rows.append(row)
ic = 0
else:
row += element + " " * max(0, averlen - wl)
ic += 1
return "\n".join(rows)
def get_evennia_pids(): def get_evennia_pids():
""" """

View File

@ -10,6 +10,7 @@ django-filter >= 2.2.0, < 2.3
django-sekizai django-sekizai
inflect inflect
autobahn >= 17.9.3 autobahn >= 17.9.3
lunr == 0.5.6
# try to resolve dependency issue in py3.7 # try to resolve dependency issue in py3.7
attrs >= 19.2.0 attrs >= 19.2.0