Source code for evennia.utils.ansi

"""
ANSI - Gives colour to text.

Use the codes defined in the *ANSIParser* class to apply colour to text. The
`parse_ansi` function in this module parses text for markup and `strip_ansi`
removes it.

You should usually not need to call `parse_ansi` explicitly; it is run by
Evennia just before returning data to/from the user. Alternative markup is
possible by overriding the parser class (see also contrib/ for deprecated
markup schemes).


Supported standards:

- ANSI 8 bright and 8 dark fg (foreground) colors
- ANSI 8 dark bg (background) colors
- 'ANSI' 8 bright bg colors 'faked' with xterm256 (bright bg not included in ANSI standard)
- Xterm256 - 255 fg/bg colors + 26 greyscale fg/bg colors

## Markup

ANSI colors: `r` ed, `g` reen, `y` ellow, `b` lue, `m` agenta, `c` yan, `n` ormal (no color).
Capital letters indicate the 'dark' variant.

- `|r` fg bright red
- `|R` fg dark red
- `|[r` bg bright red
- `|[R` bg dark red
- `|[R|g` bg dark red, fg bright green

```python
"This is |rRed text|n and this is normal again."

```

Xterm256 colors are given as RGB (Red-Green-Blue), with values 0-5:

- `|500` fg bright red
- `|050` fg bright green
- `|005` fg bright blue
- `|110` fg dark brown
- `|425` fg pink
- `|[431` bg orange

Xterm256 greyscale:

- `|=a` fg black
- `|=g` fg dark grey
- `|=o` fg middle grey
- `|=v` fg bright grey
- `|=z` fg white
- `|[=r` bg middle grey

```python
"This is |500Red text|n and this is normal again."
"This is |[=jText on dark grey background"

```

----

"""

import functools
import re
from collections import OrderedDict

from django.conf import settings
from evennia.utils import logger, utils
from evennia.utils.hex_colors import HexColors
from evennia.utils.utils import to_str

hex2truecolor = HexColors()
hex_sub = HexColors.hex_sub

MXP_ENABLED = settings.MXP_ENABLED

# ANSI definitions

ANSI_BEEP = "\07"
ANSI_ESCAPE = "\033"
ANSI_NORMAL = "\033[0m"

ANSI_UNDERLINE = "\033[4m"
ANSI_UNDERLINE_RESET = "\033[24m"
ANSI_ITALIC = "\033[3m"
ANSI_ITALIC_RESET = "\033[23m"
ANSI_STRIKE = "\033[9m"
ANSI_STRIKE_RESET = "\033[29m"
ANSI_HILITE = "\033[1m"
ANSI_UNHILITE = "\033[22m"
ANSI_BLINK = "\033[5m"
ANSI_INVERSE = "\033[7m"
ANSI_INV_HILITE = "\033[1;7m"
ANSI_INV_BLINK = "\033[7;5m"
ANSI_BLINK_HILITE = "\033[1;5m"
ANSI_INV_BLINK_HILITE = "\033[1;5;7m"

# Foreground colors
ANSI_BLACK = "\033[30m"
ANSI_RED = "\033[31m"
ANSI_GREEN = "\033[32m"
ANSI_YELLOW = "\033[33m"
ANSI_BLUE = "\033[34m"
ANSI_MAGENTA = "\033[35m"
ANSI_CYAN = "\033[36m"
ANSI_WHITE = "\033[37m"

# Background colors
ANSI_BACK_BLACK = "\033[40m"
ANSI_BACK_RED = "\033[41m"
ANSI_BACK_GREEN = "\033[42m"
ANSI_BACK_YELLOW = "\033[43m"
ANSI_BACK_BLUE = "\033[44m"
ANSI_BACK_MAGENTA = "\033[45m"
ANSI_BACK_CYAN = "\033[46m"
ANSI_BACK_WHITE = "\033[47m"

# Formatting Characters
ANSI_RETURN = "\r\n"
ANSI_TAB = "\t"
ANSI_SPACE = " "

# Escapes
ANSI_ESCAPES = ("{{", r"\\", r"\|\|")

_PARSE_CACHE = OrderedDict()
_PARSE_CACHE_SIZE = 10000

_COLOR_NO_DEFAULT = settings.COLOR_NO_DEFAULT


[docs]class ANSIParser(object):
    """
    A class that parses ANSI markup
    to ANSI command sequences

    We also allow to escape colour codes
    by prepending with an extra `|`.

    """

    # Mapping using {r {n etc

    ansi_map = [
        # alternative |-format
        (r"|n", ANSI_NORMAL),  # reset
        (r"|/", ANSI_RETURN),  # line break
        (r"|-", ANSI_TAB),  # tab
        (r"|>", ANSI_SPACE * 4),  # indent (4 spaces)
        (r"|_", ANSI_SPACE),  # space
        (r"|*", ANSI_INVERSE),  # invert
        (r"|^", ANSI_BLINK),  # blinking text (very annoying and not supported by all clients)
        (r"|u", ANSI_UNDERLINE),  # underline
        (r"|U", ANSI_UNDERLINE_RESET),  # underline reset
        (r"|i", ANSI_ITALIC),  # italic
        (r"|I", ANSI_ITALIC_RESET),  # italic reset
        (r"|s", ANSI_STRIKE),  # strikethrough
        (r"|S", ANSI_STRIKE_RESET),  # strikethrough reset
        (r"|r", ANSI_HILITE + ANSI_RED),
        (r"|g", ANSI_HILITE + ANSI_GREEN),
        (r"|y", ANSI_HILITE + ANSI_YELLOW),
        (r"|b", ANSI_HILITE + ANSI_BLUE),
        (r"|m", ANSI_HILITE + ANSI_MAGENTA),
        (r"|c", ANSI_HILITE + ANSI_CYAN),
        (r"|w", ANSI_HILITE + ANSI_WHITE),  # pure white
        (r"|x", ANSI_HILITE + ANSI_BLACK),  # dark grey
        (r"|R", ANSI_UNHILITE + ANSI_RED),
        (r"|G", ANSI_UNHILITE + ANSI_GREEN),
        (r"|Y", ANSI_UNHILITE + ANSI_YELLOW),
        (r"|B", ANSI_UNHILITE + ANSI_BLUE),
        (r"|M", ANSI_UNHILITE + ANSI_MAGENTA),
        (r"|C", ANSI_UNHILITE + ANSI_CYAN),
        (r"|W", ANSI_UNHILITE + ANSI_WHITE),  # light grey
        (r"|X", ANSI_UNHILITE + ANSI_BLACK),  # pure black
        # hilight-able colors
        (r"|h", ANSI_HILITE),
        (r"|H", ANSI_UNHILITE),
        (r"|!R", ANSI_RED),
        (r"|!G", ANSI_GREEN),
        (r"|!Y", ANSI_YELLOW),
        (r"|!B", ANSI_BLUE),
        (r"|!M", ANSI_MAGENTA),
        (r"|!C", ANSI_CYAN),
        (r"|!W", ANSI_WHITE),  # light grey
        (r"|!X", ANSI_BLACK),  # pure black
        # normal ANSI backgrounds
        (r"|[R", ANSI_BACK_RED),
        (r"|[G", ANSI_BACK_GREEN),
        (r"|[Y", ANSI_BACK_YELLOW),
        (r"|[B", ANSI_BACK_BLUE),
        (r"|[M", ANSI_BACK_MAGENTA),
        (r"|[C", ANSI_BACK_CYAN),
        (r"|[W", ANSI_BACK_WHITE),  # light grey background
        (r"|[X", ANSI_BACK_BLACK),  # pure black background
    ]

    ansi_xterm256_bright_bg_map = [
        # "bright" ANSI backgrounds using xterm256 since ANSI
        # standard does not support it (will
        # fallback to dark ANSI background colors if xterm256
        # is not supported by client)
        # |-style variations
        (r"|[r", r"|[500"),
        (r"|[g", r"|[050"),
        (r"|[y", r"|[550"),
        (r"|[b", r"|[005"),
        (r"|[m", r"|[505"),
        (r"|[c", r"|[055"),
        (r"|[w", r"|[555"),  # white background
        (r"|[x", r"|[222"),
    ]  # dark grey background

    # xterm256. These are replaced directly by
    # the sub_xterm256 method

    if settings.COLOR_NO_DEFAULT:
        ansi_map = settings.COLOR_ANSI_EXTRA_MAP
        xterm256_fg = settings.COLOR_XTERM256_EXTRA_FG
        xterm256_bg = settings.COLOR_XTERM256_EXTRA_BG
        xterm256_gfg = settings.COLOR_XTERM256_EXTRA_GFG
        xterm256_gbg = settings.COLOR_XTERM256_EXTRA_GBG
        ansi_xterm256_bright_bg_map = settings.COLOR_ANSI_XTERM256_BRIGHT_BG_EXTRA_MAP
    else:
        xterm256_fg = [r"\|([0-5])([0-5])([0-5])"]  # |123 - foreground colour
        xterm256_bg = [r"\|\[([0-5])([0-5])([0-5])"]  # |[123 - background colour
        xterm256_gfg = [r"\|=([a-z])"]  # |=a - greyscale foreground
        xterm256_gbg = [r"\|\[=([a-z])"]  # |[=a - greyscale background
        ansi_map += settings.COLOR_ANSI_EXTRA_MAP
        xterm256_fg += settings.COLOR_XTERM256_EXTRA_FG
        xterm256_bg += settings.COLOR_XTERM256_EXTRA_BG
        xterm256_gfg += settings.COLOR_XTERM256_EXTRA_GFG
        xterm256_gbg += settings.COLOR_XTERM256_EXTRA_GBG
        ansi_xterm256_bright_bg_map += settings.COLOR_ANSI_XTERM256_BRIGHT_BG_EXTRA_MAP

    mxp_re = r"\|lc(.*?)\|lt(.*?)\|le"
    mxp_url_re = r"\|lu(.*?)\|lt(.*?)\|le"

    # prepare regex matching
    brightbg_sub = re.compile(
        r"|".join([r"(?<!\|)%s" % re.escape(tup[0]) for tup in ansi_xterm256_bright_bg_map]),
        re.DOTALL,
    )
    xterm256_fg_sub = re.compile(r"|".join(xterm256_fg), re.DOTALL)
    xterm256_bg_sub = re.compile(r"|".join(xterm256_bg), re.DOTALL)
    xterm256_gfg_sub = re.compile(r"|".join(xterm256_gfg), re.DOTALL)
    xterm256_gbg_sub = re.compile(r"|".join(xterm256_gbg), re.DOTALL)

    # xterm256_sub = re.compile(r"|".join([tup[0] for tup in xterm256_map]), re.DOTALL)
    ansi_sub = re.compile(r"|".join([re.escape(tup[0]) for tup in ansi_map]), re.DOTALL)
    mxp_sub = re.compile(mxp_re, re.DOTALL)
    mxp_url_sub = re.compile(mxp_url_re, re.DOTALL)

    # used by regex replacer to correctly map ansi sequences
    ansi_map_dict = dict(ansi_map)
    ansi_xterm256_bright_bg_map_dict = dict(ansi_xterm256_bright_bg_map)

    # prepare matching ansi codes overall
    ansi_re = r"\033\[[0-9;]+m"
    ansi_regex = re.compile(ansi_re)

    # escapes - these double-chars will be replaced with a single
    # instance of each
    ansi_escapes = re.compile(r"(%s)" % "|".join(ANSI_ESCAPES), re.DOTALL)

    # tabs/linebreaks |/ and |- should be able to be cleaned
    unsafe_tokens = re.compile(r"\|\/|\|-", re.DOTALL)

[docs]    def sub_ansi(self, ansimatch):
        """
        Replacer used by `re.sub` to replace ANSI
        markers with correct ANSI sequences

        Args:
            ansimatch (re.matchobject): The match.

        Returns:
            processed (str): The processed match string.

        """
        return self.ansi_map_dict.get(ansimatch.group(), "")

[docs]    def sub_brightbg(self, ansimatch):
        """
        Replacer used by `re.sub` to replace ANSI
        bright background markers with Xterm256 replacement

        Args:
            ansimatch (re.matchobject): The match.

        Returns:
            processed (str): The processed match string.

        """
        return self.ansi_xterm256_bright_bg_map_dict.get(ansimatch.group(), "")

[docs]    def sub_xterm256(self, rgbmatch, use_xterm256=False, color_type="fg"):
        """
        This is a replacer method called by `re.sub` with the matched
        tag. It must return the correct ansi sequence.

        It checks `self.do_xterm256` to determine if conversion
        to standard ANSI should be done or not.

        Args:
            rgbmatch (re.matchobject): The match.
            use_xterm256 (bool, optional): Don't convert 256-colors to 16.
            color_type (str): One of 'fg', 'bg', 'gfg', 'gbg'.

        Returns:
            processed (str): The processed match string.

        """
        if not rgbmatch:
            return ""

        # get tag, stripping the initial marker
        # rgbtag = rgbmatch.group()[1:]

        background = color_type in ("bg", "gbg")
        grayscale = color_type in ("gfg", "gbg")

        if not grayscale:
            # 6x6x6 color-cube (xterm indexes 16-231)
            try:
                red, green, blue = [int(val) for val in rgbmatch.groups() if val is not None]
            except (IndexError, ValueError):
                logger.log_trace()
                return rgbmatch.group(0)
        else:
            # grayscale values (xterm indexes 0, 232-255, 15) for full spectrum
            try:
                letter = [val for val in rgbmatch.groups() if val is not None][0]
            except IndexError:
                logger.log_trace()
                return rgbmatch.group(0)

            if letter == "a":
                colval = 16  # pure black @ index 16 (first color cube entry)
            elif letter == "z":
                colval = 231  # pure white @ index 231 (last color cube entry)
            else:
                # letter in range [b..y] (exactly 24 values!)
                colval = 134 + ord(letter)

            # ansi fallback logic expects r,g,b values in [0..5] range
            gray = round((ord(letter) - 97) / 5.0)
            red, green, blue = gray, gray, gray

        if use_xterm256:
            if not grayscale:
                colval = 16 + (red * 36) + (green * 6) + blue

            return "\033[%s8;5;%sm" % (3 + int(background), colval)
            # replaced since some clients (like Potato) does not accept codes with leading zeroes,
            # see issue #1024.
            # return "\033[%s8;5;%s%s%sm" % (3 + int(background), colval // 100, (colval % 100) // 10, colval%10)  # noqa

        else:
            # xterm256 not supported, convert the rgb value to ansi instead
            rgb = (red, green, blue)

            def _convert_for_ansi(val):
                return int((val + 1) // 2)

            # greys
            if (max(rgb) - min(rgb)) <= 1:
                match rgb:
                    case (0, 0, 0):
                        return ANSI_BACK_BLACK if background else ANSI_NORMAL + ANSI_BLACK
                    case ((1 | 2), (1 | 2), (1 | 2)):
                        return ANSI_BACK_BLACK if background else ANSI_HILITE + ANSI_BLACK
                    case ((2 | 3), (2 | 3), (2 | 3)):
                        return ANSI_BACK_WHITE if background else ANSI_NORMAL + ANSI_WHITE
                    case ((3 | 4), (3 | 4), (3 | 4)):
                        return ANSI_BACK_WHITE if background else ANSI_NORMAL + ANSI_WHITE
                    case ((4 | 5), (4 | 5), (4 | 5)):
                        return ANSI_BACK_WHITE if background else ANSI_HILITE + ANSI_WHITE

            match tuple(_convert_for_ansi(c) for c in rgb):
                # red
                case ((2 | 3), (0 | 1), (0 | 1)):
                    return ANSI_BACK_RED if background else ANSI_HILITE + ANSI_RED
                case ((1 | 2), 0, 0):
                    return ANSI_BACK_RED if background else ANSI_NORMAL + ANSI_RED
                # green
                case ((0 | 1), (2 | 3), (0 | 1)):
                    return ANSI_BACK_GREEN if background else ANSI_HILITE + ANSI_GREEN
                case ((0 | 1), 1, 0) if green > red:
                    return ANSI_BACK_GREEN if background else ANSI_NORMAL + ANSI_GREEN
                # blue
                case ((0 | 1), (0 | 1), (2 | 3)):
                    return ANSI_BACK_BLUE if background else ANSI_HILITE + ANSI_BLUE
                case (0, 0, 1):
                    return ANSI_BACK_BLUE if background else ANSI_NORMAL + ANSI_BLUE
                # cyan
                case ((0 | 1 | 2), (2 | 3), (2 | 3)) if red == min(rgb):
                    return ANSI_BACK_CYAN if background else ANSI_HILITE + ANSI_CYAN
                case (0, (1 | 2), (1 | 2)):
                    return ANSI_BACK_CYAN if background else ANSI_NORMAL + ANSI_CYAN
                # yellow
                case ((2 | 3), (2 | 3), (0 | 1 | 2)) if blue == min(rgb):
                    return ANSI_BACK_YELLOW if background else ANSI_HILITE + ANSI_YELLOW
                case ((2 | 1), (2 | 1), (0 | 1)):
                    return ANSI_BACK_YELLOW if background else ANSI_NORMAL + ANSI_YELLOW
                # magenta
                case ((2 | 3), (0 | 1 | 2), (2 | 3)) if green == min(rgb):
                    return ANSI_BACK_MAGENTA if background else ANSI_HILITE + ANSI_MAGENTA
                case ((1 | 2), 0, (1 | 2)):
                    return ANSI_BACK_MAGENTA if background else ANSI_NORMAL + ANSI_MAGENTA

[docs]    def strip_raw_codes(self, string):
        """
        Strips raw ANSI codes from a string.

        Args:
            string (str): The string to strip.

        Returns:
            string (str): The processed string.

        """
        return self.ansi_regex.sub("", string)

[docs]    def strip_mxp(self, string):
        """
        Strips all MXP codes from a string.

        Args:
            string (str): The string to strip.

        Returns:
            string (str): The processed string.

        """
        string = self.mxp_sub.sub(r"\2", string)
        string = self.mxp_url_sub.sub(r"\1", string)  # replace with url verbatim
        return string

[docs]    def strip_unsafe_tokens(self, string):
        """
        Strip explicitly ansi line breaks and tabs.

        """
        return self.unsafe_tokens.sub("", string)

[docs]    def parse_ansi(self, string, strip_ansi=False, xterm256=False, mxp=False, truecolor=False):
        """
        Parses a string, subbing color codes according to the stored
        mapping.

        Args:
            string (str): The string to parse.
            strip_ansi (boolean, optional): Strip all found ansi markup.
            xterm256 (boolean, optional): If actually using xterm256 or if
                these values should be converted to 16-color ANSI.
            mxp (boolean, optional): Parse MXP commands in string.

        Returns:
            string (str): The parsed string.

        """
        if hasattr(string, "_raw_string"):
            if strip_ansi:
                return string.clean()
            else:
                return string.raw()

        if not string:
            return ""

        # check cached parsings
        global _PARSE_CACHE
        cachekey = f"{string}-{strip_ansi}-{xterm256}-{mxp}-{truecolor}"

        if cachekey in _PARSE_CACHE:
            return _PARSE_CACHE[cachekey]

        # pre-convert bright colors to xterm256 color tags
        string = self.brightbg_sub.sub(self.sub_brightbg, string)

        def do_truecolor(part: re.Match, truecolor=truecolor):
            return hex2truecolor.sub_truecolor(part, truecolor)

        def do_xterm256_fg(part):
            return self.sub_xterm256(part, xterm256, "fg")

        def do_xterm256_bg(part):
            return self.sub_xterm256(part, xterm256, "bg")

        def do_xterm256_gfg(part):
            return self.sub_xterm256(part, xterm256, "gfg")

        def do_xterm256_gbg(part):
            return self.sub_xterm256(part, xterm256, "gbg")

        in_string = utils.to_str(string)

        # do string replacement
        parsed_string = []
        parts = self.ansi_escapes.split(in_string) + [" "]
        for part, sep in zip(parts[::2], parts[1::2]):
            pstring = hex_sub.sub(do_truecolor, part)
            pstring = self.xterm256_fg_sub.sub(do_xterm256_fg, pstring)
            pstring = self.xterm256_bg_sub.sub(do_xterm256_bg, pstring)
            pstring = self.xterm256_gfg_sub.sub(do_xterm256_gfg, pstring)
            pstring = self.xterm256_gbg_sub.sub(do_xterm256_gbg, pstring)
            pstring = self.ansi_sub.sub(self.sub_ansi, pstring)
            parsed_string.append("%s%s" % (pstring, sep[0].strip()))
        parsed_string = "".join(parsed_string)

        if not mxp:
            parsed_string = self.strip_mxp(parsed_string)

        if strip_ansi:
            # remove all ansi codes (including those manually
            # inserted in string)
            return self.strip_raw_codes(parsed_string)

        # cache and crop old cache
        _PARSE_CACHE[cachekey] = parsed_string
        if len(_PARSE_CACHE) > _PARSE_CACHE_SIZE:
            _PARSE_CACHE.popitem(last=False)

        return parsed_string


ANSI_PARSER = ANSIParser()


#
# Access function
#


[docs]def parse_ansi(
    string, strip_ansi=False, parser=ANSI_PARSER, xterm256=False, mxp=False, truecolor=False
):
    """
    Parses a string, subbing color codes as needed.

    Args:
        string (str): The string to parse.
        strip_ansi (bool, optional): Strip all ANSI sequences.
        parser (ansi.AnsiParser, optional): A parser instance to use.
        xterm256 (bool, optional): Support xterm256 or not.
        mxp (bool, optional): Support MXP markup or not.
        truecolor (bool, optional): Support for truecolor or not.

    Returns:
        string (str): The parsed string.

    """
    string = string or ""
    return parser.parse_ansi(
        string, strip_ansi=strip_ansi, xterm256=xterm256, mxp=mxp, truecolor=truecolor
    )


[docs]def strip_ansi(string, parser=ANSI_PARSER):
    """
    Strip all ansi from the string. This handles the Evennia-specific
    markup.

    Args:
        string (str): The string to strip.
        parser (ansi.AnsiParser, optional): The parser to use.

    Returns:
        string (str): The stripped string.

    """
    string = string or ""
    return parser.parse_ansi(string, strip_ansi=True)


[docs]def strip_raw_ansi(string, parser=ANSI_PARSER):
    """
    Remove raw ansi codes from string. This assumes pure
    ANSI-bytecodes in the string.

    Args:
        string (str): The string to parse.
        parser (bool, optional): The parser to use.

    Returns:
        string (str): the stripped string.

    """
    string = string or ""
    return parser.strip_raw_codes(string)


[docs]def strip_unsafe_tokens(string, parser=ANSI_PARSER):
    """
    Strip markup that can be used to create visual exploits
    (notably linebreaks and tags)

    """
    return parser.strip_unsafe_tokens(string)


[docs]def strip_mxp(string, parser=ANSI_PARSER):
    """
    Strip MXP markup.

    """
    string = string or ""
    return parser.strip_mxp(string)


[docs]def raw(string):
    """
    Escapes a string into a form which won't be colorized by the ansi
    parser.

    Returns:
        string (str): The raw, escaped string.

    """
    string = string or ""
    return string.replace("{", "{{").replace("|", "||")


# ------------------------------------------------------------
#
# ANSIString - ANSI-aware string class
#
# ------------------------------------------------------------


def _spacing_preflight(func):
    """
    This wrapper function is used to do some preflight checks on
    functions used for padding ANSIStrings.

    """

    @functools.wraps(func)
    def wrapped(self, width=78, fillchar=None):
        if fillchar is None:
            fillchar = " "
        if (len(fillchar) != 1) or (not isinstance(fillchar, str)):
            raise TypeError("must be char, not %s" % type(fillchar))
        if not isinstance(width, int):
            raise TypeError("integer argument expected, got %s" % type(width))
        _difference = width - len(self)
        if _difference <= 0:
            return self
        return func(self, width, fillchar, _difference)

    return wrapped


def _query_super(func_name):
    """
    Have the string class handle this with the cleaned string instead
    of ANSIString.

    """

    def wrapped(self, *args, **kwargs):
        return getattr(self.clean(), func_name)(*args, **kwargs)

    return wrapped


def _on_raw(func_name):
    """
    Like query_super, but makes the operation run on the raw string.

    """

    def wrapped(self, *args, **kwargs):
        args = list(args)
        try:
            string = args.pop(0)
            if hasattr(string, "_raw_string"):
                args.insert(0, string.raw())
            else:
                args.insert(0, string)
        except IndexError:
            # just skip out if there are no more strings
            pass
        result = getattr(self._raw_string, func_name)(*args, **kwargs)
        if isinstance(result, str):
            return ANSIString(result, decoded=True)
        return result

    return wrapped


def _transform(func_name):
    """
    Some string functions, like those manipulating capital letters,
    return a string the same length as the original. This function
    allows us to do the same, replacing all the non-coded characters
    with the resulting string.

    """

    def wrapped(self, *args, **kwargs):
        replacement_string = _query_super(func_name)(self, *args, **kwargs)
        to_string = []
        char_counter = 0
        for index in range(0, len(self._raw_string)):
            if index in self._code_indexes:
                to_string.append(self._raw_string[index])
            elif index in self._char_indexes:
                to_string.append(replacement_string[char_counter])
                char_counter += 1
        return ANSIString(
            "".join(to_string),
            decoded=True,
            code_indexes=self._code_indexes,
            char_indexes=self._char_indexes,
            clean_string=replacement_string,
        )

    return wrapped


[docs]class ANSIMeta(type):
    """
    Many functions on ANSIString are just light wrappers around the string
    base class. We apply them here, as part of the classes construction.

    """

[docs]    def __init__(cls, *args, **kwargs):
        for func_name in [
            "count",
            "startswith",
            "endswith",
            "find",
            "index",
            "isalnum",
            "isalpha",
            "isdigit",
            "islower",
            "isspace",
            "istitle",
            "isupper",
            "rfind",
            "rindex",
            "__len__",
        ]:
            setattr(cls, func_name, _query_super(func_name))
        for func_name in ["__mod__", "expandtabs", "decode", "replace", "format", "encode"]:
            setattr(cls, func_name, _on_raw(func_name))
        for func_name in ["capitalize", "translate", "lower", "upper", "swapcase"]:
            setattr(cls, func_name, _transform(func_name))
        super().__init__(*args, **kwargs)


[docs]class ANSIString(str, metaclass=ANSIMeta):
    """
    Unicode-like object that is aware of ANSI codes.

    This class can be used nearly identically to strings, in that it will
    report string length, handle slices, etc, much like a string object
    would. The methods should be used identically as string methods are.

    There is at least one exception to this (and there may be more, though
    they have not come up yet). When using ''.join() or u''.join() on an
    ANSIString, color information will get lost. You must use
    ANSIString('').join() to preserve color information.

    This implementation isn't perfectly clean, as it doesn't really have an
    understanding of what the codes mean in order to eliminate
    redundant characters-- though cleaning up the strings might end up being
    inefficient and slow without some C code when dealing with larger values.
    Such enhancements could be made as an enhancement to ANSI_PARSER
    if needed, however.

    If one is going to use ANSIString, one should generally avoid converting
    away from it until one is about to send information on the wire. This is
    because escape sequences in the string may otherwise already be decoded,
    and taken literally the second time around.

    """

    # A compiled Regex for the format mini-language:
    # https://docs.python.org/3/library/string.html#formatspec
    re_format = re.compile(
        r"(?i)(?P<just>(?P<fill>.)?(?P<align>\<|\>|\=|\^))?(?P<sign>\+|\-| )?(?P<alt>\#)?"
        r"(?P<zero>0)?(?P<width>\d+)?(?P<grouping>\_|\,)?(?:\.(?P<precision>\d+))?"
        r"(?P<type>b|c|d|e|E|f|F|g|G|n|o|s|x|X|%)?"
    )

    def __new__(cls, *args, **kwargs):
        """
        When creating a new ANSIString, you may use a custom parser that has
        the same attributes as the standard one, and you may declare the
        string to be handled as already decoded. It is important not to double
        decode strings, as escapes can only be respected once.

        Internally, ANSIString can also passes itself precached code/character
        indexes and clean strings to avoid doing extra work when combining
        ANSIStrings.

        """
        string = args[0]
        if not isinstance(string, str):
            string = to_str(string)
        parser = kwargs.get("parser", ANSI_PARSER)
        decoded = kwargs.get("decoded", False) or hasattr(string, "_raw_string")
        code_indexes = kwargs.pop("code_indexes", None)
        char_indexes = kwargs.pop("char_indexes", None)
        clean_string = kwargs.pop("clean_string", None)
        # All True, or All False, not just one.
        checks = [x is None for x in [code_indexes, char_indexes, clean_string]]
        if not len(set(checks)) == 1:
            raise ValueError(
                "You must specify code_indexes, char_indexes, "
                "and clean_string together, or not at all."
            )
        if not all(checks):
            decoded = True
        if not decoded:
            # Completely new ANSI String
            clean_string = parser.parse_ansi(string, strip_ansi=True, mxp=MXP_ENABLED)
            string = parser.parse_ansi(string, xterm256=True, mxp=MXP_ENABLED, truecolor=True)
        elif clean_string is not None:
            # We have an explicit clean string.
            pass
        elif hasattr(string, "_clean_string"):
            # It's already an ANSIString
            clean_string = string._clean_string
            code_indexes = string._code_indexes
            char_indexes = string._char_indexes
            string = string._raw_string
        else:
            # It's a string that has been pre-ansi decoded.
            clean_string = parser.strip_raw_codes(string)

        if not isinstance(string, str):
            string = string.decode("utf-8")

        ansi_string = super().__new__(ANSIString, to_str(clean_string))
        ansi_string._raw_string = string
        ansi_string._clean_string = clean_string
        ansi_string._code_indexes = code_indexes
        ansi_string._char_indexes = char_indexes
        return ansi_string

    def __str__(self):
        return self._raw_string

    def __format__(self, format_spec):
        """
        This magic method covers ANSIString's behavior within a str.format() or f-string.

        Current features supported: fill, align, width.

        Args:
            format_spec (str): The format specification passed by f-string or str.format(). This is
            a string such as "0<30" which would mean "left justify to 30, filling with zeros".
            The full specification can be found at
            https://docs.python.org/3/library/string.html#formatspec

        Returns:
            ansi_str (str): The formatted ANSIString's .raw() form, for display.

        """
        # This calls the compiled regex stored on ANSIString's class to analyze the format spec.
        # It returns a dictionary.
        format_data = self.re_format.match(format_spec).groupdict()
        clean = self.clean()
        base_output = ANSIString(self.raw())
        align = format_data.get("align", "<")
        fill = format_data.get("fill", " ")

        # Need to coerce width into an integer. We can be certain that it's numeric thanks to regex.
        width = format_data.get("width", None)
        if width is None:
            width = len(clean)
        else:
            width = int(width)

        if align == "<":
            base_output = self.ljust(width, fill)
        elif align == ">":
            base_output = self.rjust(width, fill)
        elif align == "^":
            base_output = self.center(width, fill)
        elif align == "=":
            pass

        # Return the raw string with ANSI markup, ready to be displayed.
        return base_output.raw()

    def __repr__(self):
        """
        Let's make the repr the command that would actually be used to
        construct this object, for convenience and reference.

        """
        return "ANSIString(%s, decoded=True)" % repr(self._raw_string)

[docs]    def __init__(self, *_, **kwargs):
        """
        When the ANSIString is first initialized, a few internal variables
        have to be set.

        The first is the parser. It is possible to replace Evennia's standard
        ANSI parser with one of your own syntax if you wish, so long as it
        implements the same interface.

        The second is the _raw_string. This is the original "dumb" string
        with ansi escapes that ANSIString represents.

        The third thing to set is the _clean_string. This is a string that is
        devoid of all ANSI Escapes.

        Finally, _code_indexes and _char_indexes are defined. These are lookup
        tables for which characters in the raw string are related to ANSI
        escapes, and which are for the readable text.

        """
        self.parser = kwargs.pop("parser", ANSI_PARSER)
        super().__init__()
        if self._code_indexes is None:
            self._code_indexes, self._char_indexes = self._get_indexes()

    @staticmethod
    def _shifter(iterable, offset):
        """
        Takes a list of integers, and produces a new one incrementing all
        by a number.

        """
        if not offset:
            return iterable
        return [i + offset for i in iterable]

    @classmethod
    def _adder(cls, first, second):
        """
        Joins two ANSIStrings, preserving calculated info.

        """

        raw_string = first._raw_string + second._raw_string
        clean_string = first._clean_string + second._clean_string
        code_indexes = first._code_indexes[:]
        char_indexes = first._char_indexes[:]
        code_indexes.extend(cls._shifter(second._code_indexes, len(first._raw_string)))
        char_indexes.extend(cls._shifter(second._char_indexes, len(first._raw_string)))
        return ANSIString(
            raw_string,
            code_indexes=code_indexes,
            char_indexes=char_indexes,
            clean_string=clean_string,
        )

    def __add__(self, other):
        """
        We have to be careful when adding two strings not to reprocess things
        that don't need to be reprocessed, lest we end up with escapes being
        interpreted literally.

        """
        if not isinstance(other, str):
            return NotImplemented
        if not isinstance(other, ANSIString):
            other = ANSIString(other)
        return self._adder(self, other)

    def __radd__(self, other):
        """
        Likewise, if we're on the other end.

        """
        if not isinstance(other, str):
            return NotImplemented
        if not isinstance(other, ANSIString):
            other = ANSIString(other)
        return self._adder(other, self)

    def __getslice__(self, i, j):
        """
        This function is deprecated, so we just make it call the proper
        function.

        """
        return self.__getitem__(slice(i, j))

    def _slice(self, slc):
        """
        This function takes a slice() object.

        Slices have to be handled specially. Not only are they able to specify
        a start and end with [x:y], but many forget that they can also specify
        an interval with [x:y:z]. As a result, not only do we have to track
        the ANSI Escapes that have played before the start of the slice, we
        must also replay any in these intervals, should they exist.

        Thankfully, slicing the _char_indexes table gives us the actual
        indexes that need slicing in the raw string. We can check between
        those indexes to figure out what escape characters need to be
        replayed.

        """
        char_indexes = self._char_indexes
        slice_indexes = char_indexes[slc]
        # If it's the end of the string, we need to append final color codes.
        if not slice_indexes:
            # if we find no characters it may be because we are just outside
            # of the interval, using an open-ended slice. We must replay all
            # of the escape characters until/after this point.
            if char_indexes:
                if slc.start is None and slc.stop is None:
                    # a [:] slice of only escape characters
                    return ANSIString(self._raw_string[slc])
                if slc.start is None:
                    # this is a [:x] slice
                    return ANSIString(self._raw_string[: char_indexes[0]])
                if slc.stop is None:
                    # a [x:] slice
                    return ANSIString(self._raw_string[char_indexes[-1] + 1 :])
            return ANSIString("")
        try:
            string = self[slc.start or 0]._raw_string
        except IndexError:
            return ANSIString("")
        last_mark = slice_indexes[0]
        # Check between the slice intervals for escape sequences.
        i = None
        for i in slice_indexes[1:]:
            for index in range(last_mark, i):
                if index in self._code_indexes:
                    string += self._raw_string[index]
            last_mark = i
            try:
                string += self._raw_string[i]
            except IndexError:
                # raw_string not long enough
                pass
        if i is not None:
            append_tail = self._get_interleving(char_indexes.index(i) + 1)
        else:
            append_tail = ""
        return ANSIString(string + append_tail, decoded=True)

    def __getitem__(self, item):
        """
        Gateway for slices and getting specific indexes in the ANSIString. If
        this is a regexable ANSIString, it will get the data from the raw
        string instead, bypassing ANSIString's intelligent escape skipping,
        for reasons explained in the __new__ method's docstring.

        """
        if isinstance(item, slice):
            # Slices must be handled specially.
            return self._slice(item)
        try:
            self._char_indexes[item]
        except IndexError:
            raise IndexError("ANSIString Index out of range")
        # Get character codes after the index as well.
        if self._char_indexes[-1] == self._char_indexes[item]:
            append_tail = self._get_interleving(item + 1)
        else:
            append_tail = ""
        item = self._char_indexes[item]

        clean = self._raw_string[item]
        result = ""
        # Get the character they're after, and replay all escape sequences
        # previous to it.
        for index in range(0, item + 1):
            if index in self._code_indexes:
                result += self._raw_string[index]
        return ANSIString(result + clean + append_tail, decoded=True)

[docs]    def clean(self):
        """
        Return a string object *without* the ANSI escapes.

        Returns:
            clean_string (str): A unicode object with no ANSI escapes.

        """
        return self._clean_string

[docs]    def raw(self):
        """
        Return a string object with the ANSI escapes.

        Returns:
            raw (str): A unicode object *with* the raw ANSI escape sequences.

        """
        return self._raw_string

[docs]    def partition(self, sep, reverse=False):
        """
        Splits once into three sections (with the separator being the middle section)

        We use the same techniques we used in split() to make sure each are
        colored.

        Args:
            sep (str): The separator to split the string on.
            reverse (boolean): Whether to split the string on the last
                occurrence of the separator rather than the first.

        Returns:
            ANSIString: The part of the string before the separator
            ANSIString: The separator itself
            ANSIString: The part of the string after the separator.

        """
        if hasattr(sep, "_clean_string"):
            sep = sep.clean()
        if reverse:
            parent_result = self._clean_string.rpartition(sep)
        else:
            parent_result = self._clean_string.partition(sep)
        current_index = 0
        result = tuple()
        for section in parent_result:
            result += (self[current_index : current_index + len(section)],)
            current_index += len(section)
        return result

    def _get_indexes(self):
        """
        Two tables need to be made, one which contains the indexes of all
        readable characters, and one which contains the indexes of all ANSI
        escapes. It's important to remember that ANSI escapes require more
        that one character at a time, though no readable character needs more
        than one character, since the string base class abstracts that away
        from us. However, several readable characters can be placed in a row.

        We must use regexes here to figure out where all the escape sequences
        are hiding in the string. Then we use the ranges of their starts and
        ends to create a final, comprehensive list of all indexes which are
        dedicated to code, and all dedicated to text.

        It's possible that only one of these tables is actually needed, the
        other assumed to be what isn't in the first.

        """

        code_indexes = []
        for match in self.parser.ansi_regex.finditer(self._raw_string):
            code_indexes.extend(list(range(match.start(), match.end())))
        if not code_indexes:
            # Plain string, no ANSI codes.
            return code_indexes, list(range(0, len(self._raw_string)))
        # all indexes not occupied by ansi codes are normal characters
        char_indexes = [i for i in range(len(self._raw_string)) if i not in code_indexes]
        return code_indexes, char_indexes

    def _get_interleving(self, index):
        """
        Get the code characters from the given slice end to the next
        character.

        """
        try:
            index = self._char_indexes[index - 1]
        except IndexError:
            return ""
        s = ""
        while True:
            index += 1
            if index in self._char_indexes:
                break
            elif index in self._code_indexes:
                s += self._raw_string[index]
            else:
                break
        return s

    def __mul__(self, other):
        """
        Multiplication method. Implemented for performance reasons.

        """
        if not isinstance(other, int):
            return NotImplemented
        raw_string = self._raw_string * other
        clean_string = self._clean_string * other
        code_indexes = self._code_indexes[:]
        char_indexes = self._char_indexes[:]
        for i in range(other):
            code_indexes.extend(self._shifter(self._code_indexes, i * len(self._raw_string)))
            char_indexes.extend(self._shifter(self._char_indexes, i * len(self._raw_string)))
        return ANSIString(
            raw_string,
            code_indexes=code_indexes,
            char_indexes=char_indexes,
            clean_string=clean_string,
        )

    def __rmul__(self, other):
        return self.__mul__(other)

[docs]    def split(self, by=None, maxsplit=-1):
        """
        Splits a string based on a separator.

        Stolen from PyPy's pure Python string implementation, tweaked for
        ANSIString.

        PyPy is distributed under the MIT licence.
        http://opensource.org/licenses/MIT

        Args:
            by (str): A string to search for which will be used to split
                the string. For instance, ',' for 'Hello,world' would
                result in ['Hello', 'world']
            maxsplit (int): The maximum number of times to split the string.
                For example, a maxsplit of 2 with a by of ',' on the string
                'Hello,world,test,string' would result in
                ['Hello', 'world', 'test,string']
        Returns:
            result (list of ANSIStrings): A list of ANSIStrings derived from
                this string.

        """
        drop_spaces = by is None
        if drop_spaces:
            by = " "

        bylen = len(by)
        if bylen == 0:
            raise ValueError("empty separator")

        res = []
        start = 0
        while maxsplit != 0:
            next = self._clean_string.find(by, start)
            if next < 0:
                break
            # Get character codes after the index as well.
            res.append(self[start:next])
            start = next + bylen
            maxsplit -= 1  # NB. if it's already < 0, it stays < 0

        res.append(self[start : len(self)])
        if drop_spaces:
            return [part for part in res if part != ""]
        return res

[docs]    def rsplit(self, by=None, maxsplit=-1):
        """
        Like split, but starts from the end of the string rather than the
        beginning.

        Stolen from PyPy's pure Python string implementation, tweaked for
        ANSIString.

        PyPy is distributed under the MIT licence.
        http://opensource.org/licenses/MIT

        Args:
            by (str): A string to search for which will be used to split
                the string. For instance, ',' for 'Hello,world' would
                result in ['Hello', 'world']
            maxsplit (int): The maximum number of times to split the string.
                For example, a maxsplit of 2 with a by of ',' on the string
                'Hello,world,test,string' would result in
                ['Hello,world', 'test', 'string']
        Returns:
            result (list of ANSIStrings): A list of ANSIStrings derived from
                this string.

        """
        res = []
        end = len(self)
        drop_spaces = by is None
        if drop_spaces:
            by = " "
        bylen = len(by)
        if bylen == 0:
            raise ValueError("empty separator")

        while maxsplit != 0:
            next = self._clean_string.rfind(by, 0, end)
            if next < 0:
                break
            # Get character codes after the index as well.
            res.append(self[next + bylen : end])
            end = next
            maxsplit -= 1  # NB. if it's already < 0, it stays < 0

        res.append(self[:end])
        res.reverse()
        if drop_spaces:
            return [part for part in res if part != ""]
        return res

[docs]    def strip(self, chars=None):
        """
        Strip from both ends, taking ANSI markers into account.

        Args:
            chars (str, optional): A string containing individual characters
                to strip off of both ends of the string. By default, any blank
                spaces are trimmed.
        Returns:
            result (ANSIString): A new ANSIString with the ends trimmed of the
                relevant characters.

        """
        clean = self._clean_string
        raw = self._raw_string

        # count continuous sequence of chars from left and right
        nlen = len(clean)
        nlstripped = nlen - len(clean.lstrip(chars))
        nrstripped = nlen - len(clean.rstrip(chars))

        # within the stripped regions, only retain parts of the raw
        # string *not* matching the clean string (these are ansi/mxp tags)
        lstripped = ""
        ic, ir1 = 0, 0
        while nlstripped:
            if ic >= nlstripped:
                break
            elif raw[ir1] != clean[ic]:
                lstripped += raw[ir1]
            else:
                ic += 1
            ir1 += 1
        rstripped = ""
        ic, ir2 = nlen - 1, len(raw) - 1
        while nrstripped:
            if nlen - ic > nrstripped:
                break
            elif raw[ir2] != clean[ic]:
                rstripped += raw[ir2]
            else:
                ic -= 1
            ir2 -= 1
        rstripped = rstripped[::-1]
        return ANSIString(lstripped + raw[ir1 : ir2 + 1] + rstripped)

[docs]    def lstrip(self, chars=None):
        """
        Strip from the left, taking ANSI markers into account.

        Args:
            chars (str, optional): A string containing individual characters
                to strip off of the left end of the string. By default, any
                blank spaces are trimmed.
        Returns:
            result (ANSIString): A new ANSIString with the left end trimmed of
                the relevant characters.

        """
        clean = self._clean_string
        raw = self._raw_string

        # count continuous sequence of chars from left and right
        nlen = len(clean)
        nlstripped = nlen - len(clean.lstrip(chars))
        # within the stripped regions, only retain parts of the raw
        # string *not* matching the clean string (these are ansi/mxp tags)
        lstripped = ""
        ic, ir1 = 0, 0
        while nlstripped:
            if ic >= nlstripped:
                break
            elif raw[ir1] != clean[ic]:
                lstripped += raw[ir1]
            else:
                ic += 1
            ir1 += 1
        return ANSIString(lstripped + raw[ir1:])

[docs]    def rstrip(self, chars=None):
        """
        Strip from the right, taking ANSI markers into account.

        Args:
            chars (str, optional): A string containing individual characters
                to strip off of the right end of the string. By default, any
                blank spaces are trimmed.
        Returns:
            result (ANSIString): A new ANSIString with the right end trimmed of
                the relevant characters.

        """
        clean = self._clean_string
        raw = self._raw_string
        nlen = len(clean)
        nrstripped = nlen - len(clean.rstrip(chars))
        rstripped = ""
        ic, ir2 = nlen - 1, len(raw) - 1
        while nrstripped:
            if nlen - ic > nrstripped:
                break
            elif raw[ir2] != clean[ic]:
                rstripped += raw[ir2]
            else:
                ic -= 1
            ir2 -= 1
        rstripped = rstripped[::-1]
        return ANSIString(raw[: ir2 + 1] + rstripped)

[docs]    def join(self, iterable):
        """
        Joins together strings in an iterable, using this string between each
        one.

        NOTE: This should always be used for joining strings when ANSIStrings
        are involved. Otherwise color information will be discarded by python,
        due to details in the C implementation of strings.

        Args:
            iterable (list of strings): A list of strings to join together

        Returns:
            ANSIString: A single string with all of the iterable's
                contents concatenated, with this string between each.

        Examples:
            ::

                >>> ANSIString(', ').join(['up', 'right', 'left', 'down'])
                ANSIString('up, right, left, down')

        """
        result = ANSIString("")
        last_item = None
        for item in iterable:
            if last_item is not None:
                result += self._raw_string
            if not isinstance(item, ANSIString):
                item = ANSIString(item)
            result += item
            last_item = item
        return result

    def _filler(self, char, amount):
        """
        Generate a line of characters in a more efficient way than just adding
        ANSIStrings.

        """
        if not isinstance(char, ANSIString):
            line = char * amount
            return ANSIString(
                char * amount,
                code_indexes=[],
                char_indexes=list(range(0, len(line))),
                clean_string=char,
            )
        try:
            start = char._code_indexes[0]
        except IndexError:
            start = None
        end = char._char_indexes[0]
        prefix = char._raw_string[start:end]
        postfix = char._raw_string[end + 1 :]
        line = char._clean_string * amount
        code_indexes = [i for i in range(0, len(prefix))]
        length = len(prefix) + len(line)
        code_indexes.extend([i for i in range(length, length + len(postfix))])
        char_indexes = self._shifter(list(range(0, len(line))), len(prefix))
        raw_string = prefix + line + postfix
        return ANSIString(
            raw_string, clean_string=line, char_indexes=char_indexes, code_indexes=code_indexes
        )

    # The following methods should not be called with the '_difference' argument explicitly. This is
    # data provided by the wrapper _spacing_preflight.
[docs]    @_spacing_preflight
    def center(self, width, fillchar, _difference):
        """
        Center some text with some spaces padding both sides.

        Args:
            width (int): The target width of the output string.
            fillchar (str): A single character string to pad the output string
                with.
        Returns:
            result (ANSIString): A string padded on both ends with fillchar.

        """
        remainder = _difference % 2
        _difference //= 2
        spacing = self._filler(fillchar, _difference)
        result = spacing + self + spacing + self._filler(fillchar, remainder)
        return result

[docs]    @_spacing_preflight
    def ljust(self, width, fillchar, _difference):
        """
        Left justify some text.

        Args:
            width (int): The target width of the output string.
            fillchar (str): A single character string to pad the output string
                with.
        Returns:
            result (ANSIString): A string padded on the right with fillchar.

        """
        return self + self._filler(fillchar, _difference)

[docs]    @_spacing_preflight
    def rjust(self, width, fillchar, _difference):
        """
        Right justify some text.

        Args:
            width (int): The target width of the output string.
            fillchar (str): A single character string to pad the output string
                with.
        Returns:
            result (ANSIString): A string padded on the left with fillchar.

        """
        return self._filler(fillchar, _difference) + self
Links

Doc Versions

Source code for evennia.utils.ansi