Better indexing of help suggestions with lunr, better display

This commit is contained in:
Griatch 2020-04-11 09:42:22 +02:00
parent a8a5453a97
commit 2a18cb0a2d
8 changed files with 287 additions and 72 deletions

View File

@ -3,7 +3,7 @@
BLACK_FORMAT_CONFIGS = --target-version py37 --line-length 100
TEST_GAME_DIR = .test_game_dir
tests?=evennia
TESTS ?= evennia
default:
@echo " Usage: "
@ -29,10 +29,10 @@ test:
evennia --init $(TEST_GAME_DIR);\
cd $(TEST_GAME_DIR);\
evennia migrate;\
evennia test --keepdb $(tests);\
evennia test --keepdb $(TESTS);\
testp:
evennia --init $(TEST_GAME_DIR);\
cd $(TEST_GAME_DIR);\
evennia migrate;\
evennia test --keepdb --parallel 4 $(tests);\
evennia test --keepdb --parallel 4 $(TESTS);\

View File

@ -414,7 +414,7 @@ def _init():
GLOBAL_SCRIPTS.start()
def set_trace(term_size=(140, 40), debugger="auto"):
def set_trace(term_size=(140, 80), debugger="auto"):
"""
Helper function for running a debugger inside the Evennia event loop.

View File

@ -84,6 +84,15 @@ def _init_command(cls, **kwargs):
break
cls.help_category = cls.help_category.lower()
# pre-prepare a help index entry for quicker lookup
cls.search_index_entry = {
"key": cls.key,
"aliases": " ".join(cls.aliases),
"category": cls.help_category,
"text": cls.__doc__,
"tags": ""
}
class CommandMeta(type):
"""

View File

@ -6,6 +6,8 @@ set. The normal, database-tied help system is used for collaborative
creation of other help topics such as RP help or game-world aides.
"""
from lunr import lunr
from lunr.exceptions import QueryParseError
from django.conf import settings
from collections import defaultdict
from evennia.utils.utils import fill, dedent
@ -13,7 +15,7 @@ from evennia.commands.command import Command
from evennia.help.models import HelpEntry
from evennia.utils import create, evmore
from evennia.utils.eveditor import EvEditor
from evennia.utils.utils import string_suggestions, class_from_module
from evennia.utils.utils import string_suggestions, class_from_module, inherits_from, format_grid
COMMAND_DEFAULT_CLASS = class_from_module(settings.COMMAND_DEFAULT_CLASS)
HELP_MORE = settings.HELP_MORE
@ -25,6 +27,71 @@ _DEFAULT_WIDTH = settings.CLIENT_DEFAULT_WIDTH
_SEP = "|C" + "-" * _DEFAULT_WIDTH + "|n"
class HelpCategory:
def __init__(self, key):
self.key = key
@property
def search_index_entry(self):
return {
"key": str(self),
"aliases": "",
"category": self.key,
"tags": "",
"text": ""
}
def __str__(self):
return f"Category: {self.key}"
def __eq__(self, other):
return str(self).lower() == str(other).lower()
def __hash__(self):
return id(self)
def help_search_with_index(query, candidate_entries, suggestion_maxnum=5):
indx = [cnd.search_index_entry for cnd in candidate_entries]
mapping = {indx[ix]["key"]: cand for ix, cand in enumerate(candidate_entries)}
search_index = lunr(
ref="key",
fields=[
{
"field_name": "key",
"boost": 10,
},
{
"field_name": "aliases",
"boost": 9,
},
{
"field_name": "category",
"boost": 8,
},
{
"field_name": "tags",
"boost": 5
},
{
"field_name": "text",
"boost": 1,
},
],
documents=indx
)
try:
matches = search_index.search(query)[:suggestion_maxnum]
except QueryParseError:
# this is a user-input problem
matches = []
# matches (objs), suggestions (strs)
return ([mapping[match["ref"]] for match in matches],
[str(match["ref"]) # + f" (score {match['score']})") # good debug
for match in matches])
class CmdHelp(Command):
"""
View help or a list of topics
@ -119,12 +186,20 @@ class CmdHelp(Command):
respectively. You can override this method to return a
custom display of the list of commands and topics.
"""
output = []
for category in sorted(set(list(hdict_cmds.keys()) + list(hdict_db.keys()))):
output.append(f"|w{category.title()}|G")
entries = sorted(set(hdict_cmds.get(category, []) + hdict_db.get(category, [])))
output.append(format_grid(entries, width=78)) # self.client_width()))
return "\n".join(output)
string = ""
if hdict_cmds and any(hdict_cmds.values()):
string += "\n" + _SEP + "\n |CCommand help entries|n\n" + _SEP
for category in sorted(hdict_cmds.keys()):
string += "\n |w%s|n:\n" % (str(category).title())
string += "|G" + fill("|C, |G".join(sorted(hdict_cmds[category]))) + "|n"
if hdict_db and any(hdict_db.values()):
string += "\n\n" + _SEP + "\n\r |COther help entries|n\n" + _SEP
for category in sorted(hdict_db.keys()):
@ -134,6 +209,7 @@ class CmdHelp(Command):
+ fill(", ".join(sorted([str(topic) for topic in hdict_db[category]])))
+ "|n"
)
return string
def check_show_help(self, cmd, caller):
@ -208,8 +284,8 @@ class CmdHelp(Command):
]
all_categories = list(
set(
[cmd.help_category.lower() for cmd in all_cmds]
+ [topic.help_category.lower() for topic in all_topics]
[HelpCategory(cmd.help_category) for cmd in all_cmds] +
[HelpCategory(topic.help_category) for topic in all_topics]
)
)
@ -228,73 +304,42 @@ class CmdHelp(Command):
self.msg_help(self.format_help_list(hdict_cmd, hdict_topic))
return
# Try to access a particular command
# Try to access a particular help entry or category
entries = ([cmd for cmd in all_cmds if cmd] +
list(HelpEntry.objects.all()) +
all_categories)
# build vocabulary of suggestions and rate them by string similarity.
suggestions = None
if suggestion_maxnum > 0:
vocabulary = (
[cmd.key for cmd in all_cmds if cmd]
+ [topic.key for topic in all_topics]
+ all_categories
for match_query in [f"{query}~1", f"{query}*"]:
# We first do an exact word-match followed by a start-by query
matches, suggestions = help_search_with_index(
match_query, entries, suggestion_maxnum=self.suggestion_maxnum)
if matches:
match = matches[0]
if isinstance(match, HelpCategory):
formatted = self.format_help_list(
{match.key: [cmd.key for cmd in all_cmds
if match.key.lower() == cmd.help_category]},
{match.key: [topic.key for topic in all_topics
if match.key.lower() == topic.help_category]}
)
[vocabulary.extend(cmd.aliases) for cmd in all_cmds]
suggestions = [
sugg
for sugg in string_suggestions(
query, set(vocabulary), cutoff=suggestion_cutoff, maxnum=suggestion_maxnum
)
if sugg != query
]
if not suggestions:
suggestions = [
sugg for sugg in vocabulary if sugg != query and sugg.startswith(query)
]
# try an exact command auto-help match
match = [cmd for cmd in all_cmds if cmd == query]
if not match:
# try an inexact match with prefixes stripped from query and cmds
_query = query[1:] if query[0] in CMD_IGNORE_PREFIXES else query
match = [
cmd
for cmd in all_cmds
for m in cmd._matchset
if m == _query or m[0] in CMD_IGNORE_PREFIXES and m[1:] == _query
]
if len(match) == 1:
elif inherits_from(match, "evennia.commands.command.Command"):
formatted = self.format_help_entry(
match[0].key,
match[0].get_help(caller, cmdset),
aliases=match[0].aliases,
suggested=suggestions,
match.key,
match.get_help(caller, cmdset),
aliases=match.aliases,
suggested=suggestions[1:]
)
self.msg_help(formatted)
return
# try an exact database help entry match
match = list(HelpEntry.objects.find_topicmatch(query, exact=True))
if len(match) == 1:
else:
formatted = self.format_help_entry(
match[0].key,
match[0].entrytext,
aliases=match[0].aliases.all(),
suggested=suggestions,
match.key,
match.entrytext,
aliases=match.aliases.all(),
suggested=suggestions[1:]
)
self.msg_help(formatted)
return
# try to see if a category name was entered
if query in all_categories:
self.msg_help(
self.format_help_list(
{query: [cmd.key for cmd in all_cmds if cmd.help_category == query]},
{query: [topic.key for topic in all_topics if topic.help_category == query]},
)
)
self.msg_help(formatted)
return
# no exact matches found. Just give suggestions.

View File

@ -73,7 +73,8 @@ class HelpEntry(SharedMemoryModel):
db_tags = models.ManyToManyField(
Tag,
blank=True,
help_text="tags on this object. Tags are simple string markers to identify, group and alias objects.",
help_text="tags on this object. Tags are simple string markers to "
"identify, group and alias objects.",
)
# (deprecated, only here to allow MUX helpfile load (don't use otherwise)).
# TODO: remove this when not needed anymore.
@ -123,6 +124,19 @@ class HelpEntry(SharedMemoryModel):
"""
return self.locks.check(accessing_obj, access_type=access_type, default=default)
@property
def search_index_entry(self):
"""
Property for easily retaining a search index entry for this object.
"""
return {
"key": self.db_key,
"aliases": " ".join(self.aliases.all()),
"category": self.db_help_category,
"text": self.db_entrytext,
"tags": " ".join(str(tag) for tag in self.tags.all())
}
#
# Web/Django methods
#

View File

@ -6,6 +6,7 @@ TODO: Not nearly all utilities are covered yet.
"""
import os.path
import random
import mock
from django.test import TestCase
@ -264,3 +265,61 @@ class LatinifyTest(TestCase):
byte_str = utils.to_bytes(self.example_str)
result = utils.latinify(byte_str)
self.assertEqual(result, self.expected_output)
class TestFormatGrid(TestCase):
maxDiff = None
def setUp(self):
# make the random only semi-random with a fixed seed
random.seed(1)
def tearDown(self):
# restore normal randomness
random.seed(None)
def _generate_elements(self, basewidth, variation, amount):
return [
"X" * max(1, basewidth + int(random.randint(-variation, variation)))
for _ in range(amount)
]
def test_even_grid(self):
"""Grid with small variations"""
elements = self._generate_elements(3, 1, 30)
result = utils.format_grid(elements, width=78)
rows = result.split("\n")
self.assertEqual(len(rows), 3)
self.assertTrue(all(len(row) == 78 for row in rows))
def test_disparate_grid(self):
"""Grid with big variations"""
elements = self._generate_elements(3, 15, 30)
result = utils.format_grid(elements, width=82, sep=" ")
rows = result.split("\n")
self.assertEqual(len(rows), 8)
self.assertTrue(all(len(row) == 82 for row in rows))
def test_huge_grid(self):
"""Grid with very long strings"""
elements = self._generate_elements(70, 20, 30)
result = utils.format_grid(elements, width=78)
rows = result.split("\n")
self.assertEqual(len(rows), 30)
self.assertTrue(all(len(row) == 78 for row in rows))
def test_overlap(self):
elements = ("alias", "batchcode", "batchcommands", "cmdsets",
"copy", "cpattr", "desc", "destroy", "dig",
"examine", "find", "force", "lock")
# from evennia import set_trace;set_trace()
from pudb import debugger
debugger.Debugger().set_trace()
result = utils.format_grid(elements, width=78)
rows = result.split("\n")
self.assertEqual(len(rows), 2)
for row in rows:
print(f"'{row}'")
for element in elements:
self.assertTrue(element in result, f"element {element} is missing.")

View File

@ -1686,6 +1686,93 @@ def format_table(table, extra_space=1):
)
return ftable
import functools
def percentile(iterable, percent, key=lambda x:x):
"""
Find the percentile of a list of values.
Args:
iterable (iterable): A list of values. Note N MUST BE already sorted.
percent (float): A value from 0.0 to 1.0.
key (callable, optional). Function to compute value from each element of N.
@return - the percentile of the values
"""
if not iterable:
return None
k = (len(iterable) - 1) * percent
f = math.floor(k)
c = math.ceil(k)
if f == c:
return key(iterable[int(k)])
d0 = key(iterable[int(f)]) * (c - k)
d1 = key(iterable[int(c)]) * (k - f)
return d0 + d1
def format_grid(elements, width=78, sep=" "):
"""
This helper function makes a 'grid' output, where it distributes the given
string-elements as evenly as possible to fill out the given width.
will not work well if the variation of length is very big!
Args:
elements (iterable): A 1D list of string elements to put in the grid.
width (int, optional): The width of the grid area to fill.
sep (str, optional): The extra separator to put between words. If
set to the empty string, words may run into each other
Returns:
gridstr (str): The grid as a finished renderede multi-line string.
"""
nelements = len(elements)
elements = [elements[ie] + sep for ie in range(nelements - 1)] + [elements[-1]]
wls = [len(elem) for elem in elements]
# get the nth percentile as a good representation of average width
averlen = int(percentile(sorted(wls), 0.9)) + 2 # include extra space
aver_per_row = width // averlen + 1
indices = [averlen * ind for ind in range(aver_per_row - 1)]
rows = []
ic = 0
row = ""
for ie, element in enumerate(elements):
wl = wls[ie]
# from evennia import set_trace;set_trace()
if ic >= aver_per_row - 1 or ie >= nelements - 1:
if ic == 0:
row = crop(element)
row += " " * max(0, (width - len(row)))
rows.append(row)
row = ""
ic = 0
elif indices[ic] + wl > width:
row += " " * (width - len(row))
rows.append(row)
row = crop(element, width)
ic = 0
else:
try:
while len(row) > indices[ic] - 1:
ic += 1
except IndexError:
if ic == 0:
row = crop(element, width)
else:
row += " " * max(0, width - len(row))
rows.append(row)
ic = 0
else:
row += element + " " * max(0, averlen - wl)
ic += 1
return "\n".join(rows)
def get_evennia_pids():
"""

View File

@ -10,6 +10,7 @@ django-filter >= 2.2.0, < 2.3
django-sekizai
inflect
autobahn >= 17.9.3
lunr == 0.5.6
# try to resolve dependency issue in py3.7
attrs >= 19.2.0