Better indexing of help suggestions with lunr, better display

2025-10-29 11:26:10 +00:00 · 2020-04-11 09:42:22 +02:00 · 2020-04-11 09:42:22 +02:00 · 2a18cb0a2d
commit 2a18cb0a2d
parent a8a5453a97
8 changed files with 287 additions and 72 deletions
--- a/6
+++ b/6
@ -3,7 +3,7 @@

 BLACK_FORMAT_CONFIGS = --target-version py37 --line-length 100
 TEST_GAME_DIR = .test_game_dir
-tests?=evennia
+TESTS ?= evennia

 default:
 	@echo " Usage: "
@ -29,10 +29,10 @@ test:
 	evennia --init $(TEST_GAME_DIR);\
 	cd $(TEST_GAME_DIR);\
 	evennia migrate;\
-	evennia test --keepdb $(tests);\
+	evennia test --keepdb $(TESTS);\

 testp:
 	evennia --init $(TEST_GAME_DIR);\
 	cd $(TEST_GAME_DIR);\
 	evennia migrate;\
-	evennia test --keepdb --parallel 4 $(tests);\
+	evennia test --keepdb --parallel 4 $(TESTS);\
--- a/evennia/init.py
+++ b/evennia/init.py
@ -414,7 +414,7 @@ def _init():
    GLOBAL_SCRIPTS.start()


-def set_trace(term_size=(140, 40), debugger="auto"):
+def set_trace(term_size=(140, 80), debugger="auto"):
    """
    Helper function for running a debugger inside the Evennia event loop.

--- a/evennia/commands/command.py
+++ b/evennia/commands/command.py
@ -84,6 +84,15 @@ def _init_command(cls, **kwargs):
                break
    cls.help_category = cls.help_category.lower()

+    # pre-prepare a help index entry for quicker lookup
+    cls.search_index_entry = {
+        "key": cls.key,
+        "aliases": " ".join(cls.aliases),
+        "category": cls.help_category,
+        "text": cls.__doc__,
+        "tags": ""
+    }
+

 class CommandMeta(type):
    """
--- a/evennia/commands/default/help.py
+++ b/evennia/commands/default/help.py
@ -6,6 +6,8 @@ set. The normal, database-tied help system is used for collaborative
 creation of other help topics such as RP help or game-world aides.
 """

+from lunr import lunr
+from lunr.exceptions import QueryParseError
 from django.conf import settings
 from collections import defaultdict
 from evennia.utils.utils import fill, dedent
@ -13,7 +15,7 @@ from evennia.commands.command import Command
 from evennia.help.models import HelpEntry
 from evennia.utils import create, evmore
 from evennia.utils.eveditor import EvEditor
-from evennia.utils.utils import string_suggestions, class_from_module
+from evennia.utils.utils import string_suggestions, class_from_module, inherits_from, format_grid

 COMMAND_DEFAULT_CLASS = class_from_module(settings.COMMAND_DEFAULT_CLASS)
 HELP_MORE = settings.HELP_MORE
@ -25,6 +27,71 @@ _DEFAULT_WIDTH = settings.CLIENT_DEFAULT_WIDTH
 _SEP = "|C" + "-" * _DEFAULT_WIDTH + "|n"


+class HelpCategory:
+    def __init__(self, key):
+        self.key = key
+
+    @property
+    def search_index_entry(self):
+        return {
+            "key": str(self),
+            "aliases": "",
+            "category": self.key,
+            "tags": "",
+            "text": ""
+        }
+    def __str__(self):
+        return f"Category: {self.key}"
+
+    def __eq__(self, other):
+        return str(self).lower() == str(other).lower()
+
+    def __hash__(self):
+        return id(self)
+
+
+def help_search_with_index(query, candidate_entries, suggestion_maxnum=5):
+    indx = [cnd.search_index_entry for cnd in candidate_entries]
+    mapping = {indx[ix]["key"]: cand for ix, cand in enumerate(candidate_entries)}
+
+    search_index = lunr(
+        ref="key",
+        fields=[
+            {
+                "field_name": "key",
+                "boost": 10,
+            },
+            {
+                "field_name": "aliases",
+                "boost": 9,
+            },
+            {
+                "field_name": "category",
+                "boost": 8,
+            },
+            {
+                "field_name": "tags",
+                "boost": 5
+            },
+            {
+                "field_name": "text",
+                "boost": 1,
+            },
+        ],
+        documents=indx
+    )
+    try:
+        matches = search_index.search(query)[:suggestion_maxnum]
+    except QueryParseError:
+        # this is a user-input problem
+        matches = []
+
+    # matches (objs), suggestions (strs)
+    return ([mapping[match["ref"]] for match in matches],
+            [str(match["ref"])  # + f" (score {match['score']})")   # good debug
+             for match in matches])
+
+
 class CmdHelp(Command):
    """
    View help or a list of topics
@ -119,12 +186,20 @@ class CmdHelp(Command):
        respectively.  You can override this method to return a
        custom display of the list of commands and topics.
        """
+        output = []
+        for category in sorted(set(list(hdict_cmds.keys()) + list(hdict_db.keys()))):
+            output.append(f"|w{category.title()}|G")
+            entries = sorted(set(hdict_cmds.get(category, []) + hdict_db.get(category, [])))
+            output.append(format_grid(entries, width=78))  # self.client_width()))
+        return "\n".join(output)
+
        string = ""
        if hdict_cmds and any(hdict_cmds.values()):
            string += "\n" + _SEP + "\n   |CCommand help entries|n\n" + _SEP
            for category in sorted(hdict_cmds.keys()):
                string += "\n  |w%s|n:\n" % (str(category).title())
                string += "|G" + fill("|C, |G".join(sorted(hdict_cmds[category]))) + "|n"
+
        if hdict_db and any(hdict_db.values()):
            string += "\n\n" + _SEP + "\n\r  |COther help entries|n\n" + _SEP
            for category in sorted(hdict_db.keys()):
@ -134,6 +209,7 @@ class CmdHelp(Command):
                    + fill(", ".join(sorted([str(topic) for topic in hdict_db[category]])))
                    + "|n"
                )
+
        return string

    def check_show_help(self, cmd, caller):
@ -208,8 +284,8 @@ class CmdHelp(Command):
        ]
        all_categories = list(
            set(
-                [cmd.help_category.lower() for cmd in all_cmds]
-                + [topic.help_category.lower() for topic in all_topics]
+                [HelpCategory(cmd.help_category) for cmd in all_cmds] +
+                [HelpCategory(topic.help_category) for topic in all_topics]
            )
        )

@ -228,73 +304,42 @@ class CmdHelp(Command):
            self.msg_help(self.format_help_list(hdict_cmd, hdict_topic))
            return

-        # Try to access a particular command
+        # Try to access a particular help entry or category
+        entries = ([cmd for cmd in all_cmds if cmd] +
+                   list(HelpEntry.objects.all()) +
+                   all_categories)

-        # build vocabulary of suggestions and rate them by string similarity.
-        suggestions = None
-        if suggestion_maxnum > 0:
-            vocabulary = (
-                [cmd.key for cmd in all_cmds if cmd]
-                + [topic.key for topic in all_topics]
-                + all_categories
+        for match_query in [f"{query}~1", f"{query}*"]:
+            # We first do an exact word-match followed by a start-by query
+
+            matches, suggestions = help_search_with_index(
+                match_query, entries, suggestion_maxnum=self.suggestion_maxnum)
+
+            if matches:
+                match = matches[0]
+                if isinstance(match, HelpCategory):
+                    formatted = self.format_help_list(
+                        {match.key: [cmd.key for cmd in all_cmds
+                                 if match.key.lower() == cmd.help_category]},
+                        {match.key: [topic.key for topic in all_topics
+                                 if match.key.lower() == topic.help_category]}
                    )
-            [vocabulary.extend(cmd.aliases) for cmd in all_cmds]
-            suggestions = [
-                sugg
-                for sugg in string_suggestions(
-                    query, set(vocabulary), cutoff=suggestion_cutoff, maxnum=suggestion_maxnum
-                )
-                if sugg != query
-            ]
-            if not suggestions:
-                suggestions = [
-                    sugg for sugg in vocabulary if sugg != query and sugg.startswith(query)
-                ]
-
-        # try an exact command auto-help match
-        match = [cmd for cmd in all_cmds if cmd == query]
-
-        if not match:
-            # try an inexact match with prefixes stripped from query and cmds
-            _query = query[1:] if query[0] in CMD_IGNORE_PREFIXES else query
-
-            match = [
-                cmd
-                for cmd in all_cmds
-                for m in cmd._matchset
-                if m == _query or m[0] in CMD_IGNORE_PREFIXES and m[1:] == _query
-            ]
-
-        if len(match) == 1:
+                elif inherits_from(match, "evennia.commands.command.Command"):
                    formatted = self.format_help_entry(
-                match[0].key,
-                match[0].get_help(caller, cmdset),
-                aliases=match[0].aliases,
-                suggested=suggestions,
+                        match.key,
+                        match.get_help(caller, cmdset),
+                        aliases=match.aliases,
+                        suggested=suggestions[1:]
                    )
-            self.msg_help(formatted)
-            return
-
-        # try an exact database help entry match
-        match = list(HelpEntry.objects.find_topicmatch(query, exact=True))
-        if len(match) == 1:
+                else:
                    formatted = self.format_help_entry(
-                match[0].key,
-                match[0].entrytext,
-                aliases=match[0].aliases.all(),
-                suggested=suggestions,
+                        match.key,
+                        match.entrytext,
+                        aliases=match.aliases.all(),
+                        suggested=suggestions[1:]
                    )
-            self.msg_help(formatted)
-            return

-        # try to see if a category name was entered
-        if query in all_categories:
-            self.msg_help(
-                self.format_help_list(
-                    {query: [cmd.key for cmd in all_cmds if cmd.help_category == query]},
-                    {query: [topic.key for topic in all_topics if topic.help_category == query]},
-                )
-            )
+                self.msg_help(formatted)
                return

        # no exact matches found. Just give suggestions.
--- a/evennia/help/models.py
+++ b/evennia/help/models.py
@ -73,7 +73,8 @@ class HelpEntry(SharedMemoryModel):
    db_tags = models.ManyToManyField(
        Tag,
        blank=True,
-        help_text="tags on this object. Tags are simple string markers to identify, group and alias objects.",
+        help_text="tags on this object. Tags are simple string markers to "
+                  "identify, group and alias objects.",
    )
    # (deprecated, only here to allow MUX helpfile load (don't use otherwise)).
    # TODO: remove this when not needed anymore.
@ -123,6 +124,19 @@ class HelpEntry(SharedMemoryModel):
        """
        return self.locks.check(accessing_obj, access_type=access_type, default=default)

+    @property
+    def search_index_entry(self):
+        """
+        Property for easily retaining a search index entry for this object.
+        """
+        return {
+            "key": self.db_key,
+            "aliases": " ".join(self.aliases.all()),
+            "category": self.db_help_category,
+            "text": self.db_entrytext,
+            "tags": " ".join(str(tag) for tag in self.tags.all())
+        }
+
    #
    # Web/Django methods
    #
--- a/evennia/utils/tests/test_utils.py
+++ b/evennia/utils/tests/test_utils.py
@ -6,6 +6,7 @@ TODO: Not nearly all utilities are covered yet.
 """

 import os.path
+import random

 import mock
 from django.test import TestCase
@ -264,3 +265,61 @@ class LatinifyTest(TestCase):
        byte_str = utils.to_bytes(self.example_str)
        result = utils.latinify(byte_str)
        self.assertEqual(result, self.expected_output)
+
+
+class TestFormatGrid(TestCase):
+
+    maxDiff = None
+    def setUp(self):
+        # make the random only semi-random with a fixed seed
+        random.seed(1)
+
+    def tearDown(self):
+        # restore normal randomness
+        random.seed(None)
+
+    def _generate_elements(self, basewidth, variation, amount):
+        return [
+            "X" * max(1, basewidth + int(random.randint(-variation, variation)))
+            for _ in range(amount)
+        ]
+
+    def test_even_grid(self):
+        """Grid with small variations"""
+        elements = self._generate_elements(3, 1, 30)
+        result = utils.format_grid(elements, width=78)
+        rows = result.split("\n")
+        self.assertEqual(len(rows), 3)
+        self.assertTrue(all(len(row) == 78 for row in rows))
+
+    def test_disparate_grid(self):
+        """Grid with big variations"""
+        elements = self._generate_elements(3, 15, 30)
+        result = utils.format_grid(elements, width=82, sep="  ")
+        rows = result.split("\n")
+        self.assertEqual(len(rows), 8)
+        self.assertTrue(all(len(row) == 82 for row in rows))
+
+    def test_huge_grid(self):
+        """Grid with very long strings"""
+        elements = self._generate_elements(70, 20, 30)
+        result = utils.format_grid(elements, width=78)
+        rows = result.split("\n")
+        self.assertEqual(len(rows), 30)
+        self.assertTrue(all(len(row) == 78 for row in rows))
+
+    def test_overlap(self):
+        elements = ("alias", "batchcode", "batchcommands", "cmdsets",
+                    "copy", "cpattr", "desc", "destroy", "dig",
+                    "examine", "find", "force", "lock")
+        # from evennia import set_trace;set_trace()
+        from pudb import debugger
+        debugger.Debugger().set_trace()
+        result = utils.format_grid(elements, width=78)
+        rows = result.split("\n")
+        self.assertEqual(len(rows), 2)
+        for row in rows:
+            print(f"'{row}'")
+        for element in elements:
+            self.assertTrue(element in result, f"element {element} is missing.")
+
--- a/evennia/utils/utils.py
+++ b/evennia/utils/utils.py
@ -1686,6 +1686,93 @@ def format_table(table, extra_space=1):
        )
    return ftable

+import functools
+
+
+def percentile(iterable, percent, key=lambda x:x):
+    """
+    Find the percentile of a list of values.
+
+    Args:
+        iterable (iterable): A list of values. Note N MUST BE already sorted.
+        percent (float): A value from 0.0 to 1.0.
+        key (callable, optional). Function to compute value from each element of N.
+
+    @return - the percentile of the values
+    """
+    if not iterable:
+        return None
+    k = (len(iterable) - 1) * percent
+    f = math.floor(k)
+    c = math.ceil(k)
+    if f == c:
+        return key(iterable[int(k)])
+    d0 = key(iterable[int(f)]) * (c - k)
+    d1 = key(iterable[int(c)]) * (k - f)
+    return d0 + d1
+
+
+def format_grid(elements, width=78, sep=" "):
+    """
+    This helper function makes a 'grid' output, where it distributes the given
+    string-elements as evenly as possible to fill out the given width.
+    will not work well if the variation of length is very big!
+
+    Args:
+        elements (iterable): A 1D list of string elements to put in the grid.
+        width (int, optional): The width of the grid area to fill.
+        sep (str, optional): The extra separator to put between words. If
+            set to the empty string, words may run into each other
+
+    Returns:
+        gridstr (str): The grid as a finished renderede multi-line string.
+
+    """
+    nelements = len(elements)
+    elements = [elements[ie] + sep for ie in range(nelements - 1)] + [elements[-1]]
+
+    wls = [len(elem) for elem in elements]
+    # get the nth percentile as a good representation of average width
+    averlen = int(percentile(sorted(wls), 0.9)) + 2   # include extra space
+    aver_per_row = width // averlen + 1
+
+    indices = [averlen * ind for ind in range(aver_per_row - 1)]
+
+    rows = []
+    ic = 0
+    row = ""
+    for ie, element in enumerate(elements):
+        wl = wls[ie]
+        # from evennia import set_trace;set_trace()
+        if ic >= aver_per_row - 1 or ie >= nelements - 1:
+            if ic == 0:
+                row = crop(element)
+            row += " " * max(0, (width - len(row)))
+            rows.append(row)
+            row = ""
+            ic = 0
+        elif indices[ic] + wl > width:
+            row += " " * (width - len(row))
+            rows.append(row)
+            row = crop(element, width)
+            ic = 0
+        else:
+            try:
+                while len(row) > indices[ic] - 1:
+                    ic += 1
+            except IndexError:
+                if ic == 0:
+                    row = crop(element, width)
+                else:
+                    row += " " * max(0, width - len(row))
+                rows.append(row)
+                ic = 0
+            else:
+                row += element + " " * max(0, averlen - wl)
+                ic += 1
+
+    return "\n".join(rows)
+

 def get_evennia_pids():
    """
--- a/requirements.txt
+++ b/requirements.txt
@ -10,6 +10,7 @@ django-filter >= 2.2.0, < 2.3
 django-sekizai
 inflect
 autobahn >= 17.9.3
+lunr == 0.5.6

 # try to resolve dependency issue in py3.7
 attrs >= 19.2.0