Source code for evennia.utils.ansi

"""
ANSI - Gives colour to text.

Use the codes defined in the *ANSIParser* class to apply colour to text. The
`parse_ansi` function in this module parses text for markup and `strip_ansi`
removes it.

You should usually not need to call `parse_ansi` explicitly; it is run by
Evennia just before returning data to/from the user. Alternative markup is
possible by overriding the parser class (see also contrib/ for deprecated
markup schemes).


Supported standards:

- ANSI 8 bright and 8 dark fg (foreground) colors
- ANSI 8 dark bg (background) colors
- 'ANSI' 8 bright bg colors 'faked' with xterm256 (bright bg not included in ANSI standard)
- Xterm256 - 255 fg/bg colors + 26 greyscale fg/bg colors

## Markup

ANSI colors: `r` ed, `g` reen, `y` ellow, `b` lue, `m` agenta, `c` yan, `n` ormal (no color).
Capital letters indicate the 'dark' variant.

- `|r` fg bright red
- `|R` fg dark red
- `|[r` bg bright red
- `|[R` bg dark red
- `|[R|g` bg dark red, fg bright green

```python
"This is |rRed text|n and this is normal again."

```

Xterm256 colors are given as RGB (Red-Green-Blue), with values 0-5:

- `|500` fg bright red
- `|050` fg bright green
- `|005` fg bright blue
- `|110` fg dark brown
- `|425` fg pink
- `|[431` bg orange

Xterm256 greyscale:

- `|=a` fg black
- `|=g` fg dark grey
- `|=o` fg middle grey
- `|=v` fg bright grey
- `|=z` fg white
- `|[=r` bg middle grey

```python
"This is |500Red text|n and this is normal again."
"This is |[=jText on dark grey background"

```

----

"""

import functools
import re
from collections import OrderedDict

from django.conf import settings
from evennia.utils import logger, utils
from evennia.utils.hex_colors import HexColors
from evennia.utils.utils import to_str

hex2truecolor = HexColors()
hex_sub = HexColors.hex_sub

MXP_ENABLED = settings.MXP_ENABLED

# ANSI definitions

ANSI_BEEP = "\07"
ANSI_ESCAPE = "\033"
ANSI_NORMAL = "\033[0m"

ANSI_UNDERLINE = "\033[4m"
ANSI_UNDERLINE_RESET = "\033[24m"
ANSI_ITALIC = "\033[3m"
ANSI_ITALIC_RESET = "\033[23m"
ANSI_STRIKE = "\033[9m"
ANSI_STRIKE_RESET = "\033[29m"
ANSI_HILITE = "\033[1m"
ANSI_UNHILITE = "\033[22m"
ANSI_BLINK = "\033[5m"
ANSI_INVERSE = "\033[7m"
ANSI_INV_HILITE = "\033[1;7m"
ANSI_INV_BLINK = "\033[7;5m"
ANSI_BLINK_HILITE = "\033[1;5m"
ANSI_INV_BLINK_HILITE = "\033[1;5;7m"

# Foreground colors
ANSI_BLACK = "\033[30m"
ANSI_RED = "\033[31m"
ANSI_GREEN = "\033[32m"
ANSI_YELLOW = "\033[33m"
ANSI_BLUE = "\033[34m"
ANSI_MAGENTA = "\033[35m"
ANSI_CYAN = "\033[36m"
ANSI_WHITE = "\033[37m"

# Background colors
ANSI_BACK_BLACK = "\033[40m"
ANSI_BACK_RED = "\033[41m"
ANSI_BACK_GREEN = "\033[42m"
ANSI_BACK_YELLOW = "\033[43m"
ANSI_BACK_BLUE = "\033[44m"
ANSI_BACK_MAGENTA = "\033[45m"
ANSI_BACK_CYAN = "\033[46m"
ANSI_BACK_WHITE = "\033[47m"

# Formatting Characters
ANSI_RETURN = "\r\n"
ANSI_TAB = "\t"
ANSI_SPACE = " "

# Escapes
ANSI_ESCAPES = ("{{", r"\\", r"\|\|")

_PARSE_CACHE = OrderedDict()
_PARSE_CACHE_SIZE = 10000

_COLOR_NO_DEFAULT = settings.COLOR_NO_DEFAULT


[docs]class ANSIParser(object): """ A class that parses ANSI markup to ANSI command sequences We also allow to escape colour codes by prepending with an extra `|`. """ # Mapping using {r {n etc ansi_map = [ # alternative |-format (r"|n", ANSI_NORMAL), # reset (r"|/", ANSI_RETURN), # line break (r"|-", ANSI_TAB), # tab (r"|>", ANSI_SPACE * 4), # indent (4 spaces) (r"|_", ANSI_SPACE), # space (r"|*", ANSI_INVERSE), # invert (r"|^", ANSI_BLINK), # blinking text (very annoying and not supported by all clients) (r"|u", ANSI_UNDERLINE), # underline (r"|U", ANSI_UNDERLINE_RESET), # underline reset (r"|i", ANSI_ITALIC), # italic (r"|I", ANSI_ITALIC_RESET), # italic reset (r"|s", ANSI_STRIKE), # strikethrough (r"|S", ANSI_STRIKE_RESET), # strikethrough reset (r"|r", ANSI_HILITE + ANSI_RED), (r"|g", ANSI_HILITE + ANSI_GREEN), (r"|y", ANSI_HILITE + ANSI_YELLOW), (r"|b", ANSI_HILITE + ANSI_BLUE), (r"|m", ANSI_HILITE + ANSI_MAGENTA), (r"|c", ANSI_HILITE + ANSI_CYAN), (r"|w", ANSI_HILITE + ANSI_WHITE), # pure white (r"|x", ANSI_HILITE + ANSI_BLACK), # dark grey (r"|R", ANSI_UNHILITE + ANSI_RED), (r"|G", ANSI_UNHILITE + ANSI_GREEN), (r"|Y", ANSI_UNHILITE + ANSI_YELLOW), (r"|B", ANSI_UNHILITE + ANSI_BLUE), (r"|M", ANSI_UNHILITE + ANSI_MAGENTA), (r"|C", ANSI_UNHILITE + ANSI_CYAN), (r"|W", ANSI_UNHILITE + ANSI_WHITE), # light grey (r"|X", ANSI_UNHILITE + ANSI_BLACK), # pure black # hilight-able colors (r"|h", ANSI_HILITE), (r"|H", ANSI_UNHILITE), (r"|!R", ANSI_RED), (r"|!G", ANSI_GREEN), (r"|!Y", ANSI_YELLOW), (r"|!B", ANSI_BLUE), (r"|!M", ANSI_MAGENTA), (r"|!C", ANSI_CYAN), (r"|!W", ANSI_WHITE), # light grey (r"|!X", ANSI_BLACK), # pure black # normal ANSI backgrounds (r"|[R", ANSI_BACK_RED), (r"|[G", ANSI_BACK_GREEN), (r"|[Y", ANSI_BACK_YELLOW), (r"|[B", ANSI_BACK_BLUE), (r"|[M", ANSI_BACK_MAGENTA), (r"|[C", ANSI_BACK_CYAN), (r"|[W", ANSI_BACK_WHITE), # light grey background (r"|[X", ANSI_BACK_BLACK), # pure black background ] ansi_xterm256_bright_bg_map = [ # "bright" ANSI backgrounds using xterm256 since ANSI # standard does not support it (will # fallback to dark ANSI background colors if xterm256 # is not supported by client) # |-style variations (r"|[r", r"|[500"), (r"|[g", r"|[050"), (r"|[y", r"|[550"), (r"|[b", r"|[005"), (r"|[m", r"|[505"), (r"|[c", r"|[055"), (r"|[w", r"|[555"), # white background (r"|[x", r"|[222"), ] # dark grey background # xterm256. These are replaced directly by # the sub_xterm256 method if settings.COLOR_NO_DEFAULT: ansi_map = settings.COLOR_ANSI_EXTRA_MAP xterm256_fg = settings.COLOR_XTERM256_EXTRA_FG xterm256_bg = settings.COLOR_XTERM256_EXTRA_BG xterm256_gfg = settings.COLOR_XTERM256_EXTRA_GFG xterm256_gbg = settings.COLOR_XTERM256_EXTRA_GBG ansi_xterm256_bright_bg_map = settings.COLOR_ANSI_XTERM256_BRIGHT_BG_EXTRA_MAP else: xterm256_fg = [r"\|([0-5])([0-5])([0-5])"] # |123 - foreground colour xterm256_bg = [r"\|\[([0-5])([0-5])([0-5])"] # |[123 - background colour xterm256_gfg = [r"\|=([a-z])"] # |=a - greyscale foreground xterm256_gbg = [r"\|\[=([a-z])"] # |[=a - greyscale background ansi_map += settings.COLOR_ANSI_EXTRA_MAP xterm256_fg += settings.COLOR_XTERM256_EXTRA_FG xterm256_bg += settings.COLOR_XTERM256_EXTRA_BG xterm256_gfg += settings.COLOR_XTERM256_EXTRA_GFG xterm256_gbg += settings.COLOR_XTERM256_EXTRA_GBG ansi_xterm256_bright_bg_map += settings.COLOR_ANSI_XTERM256_BRIGHT_BG_EXTRA_MAP mxp_re = r"\|lc(.*?)\|lt(.*?)\|le" mxp_url_re = r"\|lu(.*?)\|lt(.*?)\|le" # prepare regex matching brightbg_sub = re.compile( r"|".join([r"(?<!\|)%s" % re.escape(tup[0]) for tup in ansi_xterm256_bright_bg_map]), re.DOTALL, ) xterm256_fg_sub = re.compile(r"|".join(xterm256_fg), re.DOTALL) xterm256_bg_sub = re.compile(r"|".join(xterm256_bg), re.DOTALL) xterm256_gfg_sub = re.compile(r"|".join(xterm256_gfg), re.DOTALL) xterm256_gbg_sub = re.compile(r"|".join(xterm256_gbg), re.DOTALL) # xterm256_sub = re.compile(r"|".join([tup[0] for tup in xterm256_map]), re.DOTALL) ansi_sub = re.compile(r"|".join([re.escape(tup[0]) for tup in ansi_map]), re.DOTALL) mxp_sub = re.compile(mxp_re, re.DOTALL) mxp_url_sub = re.compile(mxp_url_re, re.DOTALL) # used by regex replacer to correctly map ansi sequences ansi_map_dict = dict(ansi_map) ansi_xterm256_bright_bg_map_dict = dict(ansi_xterm256_bright_bg_map) # prepare matching ansi codes overall ansi_re = r"\033\[[0-9;]+m" ansi_regex = re.compile(ansi_re) # escapes - these double-chars will be replaced with a single # instance of each ansi_escapes = re.compile(r"(%s)" % "|".join(ANSI_ESCAPES), re.DOTALL) # tabs/linebreaks |/ and |- should be able to be cleaned unsafe_tokens = re.compile(r"\|\/|\|-", re.DOTALL)
[docs] def sub_ansi(self, ansimatch): """ Replacer used by `re.sub` to replace ANSI markers with correct ANSI sequences Args: ansimatch (re.matchobject): The match. Returns: processed (str): The processed match string. """ return self.ansi_map_dict.get(ansimatch.group(), "")
[docs] def sub_brightbg(self, ansimatch): """ Replacer used by `re.sub` to replace ANSI bright background markers with Xterm256 replacement Args: ansimatch (re.matchobject): The match. Returns: processed (str): The processed match string. """ return self.ansi_xterm256_bright_bg_map_dict.get(ansimatch.group(), "")
[docs] def sub_xterm256(self, rgbmatch, use_xterm256=False, color_type="fg"): """ This is a replacer method called by `re.sub` with the matched tag. It must return the correct ansi sequence. It checks `self.do_xterm256` to determine if conversion to standard ANSI should be done or not. Args: rgbmatch (re.matchobject): The match. use_xterm256 (bool, optional): Don't convert 256-colors to 16. color_type (str): One of 'fg', 'bg', 'gfg', 'gbg'. Returns: processed (str): The processed match string. """ if not rgbmatch: return "" # get tag, stripping the initial marker # rgbtag = rgbmatch.group()[1:] background = color_type in ("bg", "gbg") grayscale = color_type in ("gfg", "gbg") if not grayscale: # 6x6x6 color-cube (xterm indexes 16-231) try: red, green, blue = [int(val) for val in rgbmatch.groups() if val is not None] except (IndexError, ValueError): logger.log_trace() return rgbmatch.group(0) else: # grayscale values (xterm indexes 0, 232-255, 15) for full spectrum try: letter = [val for val in rgbmatch.groups() if val is not None][0] except IndexError: logger.log_trace() return rgbmatch.group(0) if letter == "a": colval = 16 # pure black @ index 16 (first color cube entry) elif letter == "z": colval = 231 # pure white @ index 231 (last color cube entry) else: # letter in range [b..y] (exactly 24 values!) colval = 134 + ord(letter) # ansi fallback logic expects r,g,b values in [0..5] range gray = round((ord(letter) - 97) / 5.0) red, green, blue = gray, gray, gray if use_xterm256: if not grayscale: colval = 16 + (red * 36) + (green * 6) + blue return "\033[%s8;5;%sm" % (3 + int(background), colval) # replaced since some clients (like Potato) does not accept codes with leading zeroes, # see issue #1024. # return "\033[%s8;5;%s%s%sm" % (3 + int(background), colval // 100, (colval % 100) // 10, colval%10) # noqa else: # xterm256 not supported, convert the rgb value to ansi instead rgb = (red, green, blue) def _convert_for_ansi(val): return int((val + 1) // 2) # greys if (max(rgb) - min(rgb)) <= 1: match rgb: case (0, 0, 0): return ANSI_BACK_BLACK if background else ANSI_NORMAL + ANSI_BLACK case ((1 | 2), (1 | 2), (1 | 2)): return ANSI_BACK_BLACK if background else ANSI_HILITE + ANSI_BLACK case ((2 | 3), (2 | 3), (2 | 3)): return ANSI_BACK_WHITE if background else ANSI_NORMAL + ANSI_WHITE case ((3 | 4), (3 | 4), (3 | 4)): return ANSI_BACK_WHITE if background else ANSI_NORMAL + ANSI_WHITE case ((4 | 5), (4 | 5), (4 | 5)): return ANSI_BACK_WHITE if background else ANSI_HILITE + ANSI_WHITE match tuple(_convert_for_ansi(c) for c in rgb): # red case ((2 | 3), (0 | 1), (0 | 1)): return ANSI_BACK_RED if background else ANSI_HILITE + ANSI_RED case ((1 | 2), 0, 0): return ANSI_BACK_RED if background else ANSI_NORMAL + ANSI_RED # green case ((0 | 1), (2 | 3), (0 | 1)): return ANSI_BACK_GREEN if background else ANSI_HILITE + ANSI_GREEN case ((0 | 1), 1, 0) if green > red: return ANSI_BACK_GREEN if background else ANSI_NORMAL + ANSI_GREEN # blue case ((0 | 1), (0 | 1), (2 | 3)): return ANSI_BACK_BLUE if background else ANSI_HILITE + ANSI_BLUE case (0, 0, 1): return ANSI_BACK_BLUE if background else ANSI_NORMAL + ANSI_BLUE # cyan case ((0 | 1 | 2), (2 | 3), (2 | 3)) if red == min(rgb): return ANSI_BACK_CYAN if background else ANSI_HILITE + ANSI_CYAN case (0, (1 | 2), (1 | 2)): return ANSI_BACK_CYAN if background else ANSI_NORMAL + ANSI_CYAN # yellow case ((2 | 3), (2 | 3), (0 | 1 | 2)) if blue == min(rgb): return ANSI_BACK_YELLOW if background else ANSI_HILITE + ANSI_YELLOW case ((2 | 1), (2 | 1), (0 | 1)): return ANSI_BACK_YELLOW if background else ANSI_NORMAL + ANSI_YELLOW # magenta case ((2 | 3), (0 | 1 | 2), (2 | 3)) if green == min(rgb): return ANSI_BACK_MAGENTA if background else ANSI_HILITE + ANSI_MAGENTA case ((1 | 2), 0, (1 | 2)): return ANSI_BACK_MAGENTA if background else ANSI_NORMAL + ANSI_MAGENTA
[docs] def strip_raw_codes(self, string): """ Strips raw ANSI codes from a string. Args: string (str): The string to strip. Returns: string (str): The processed string. """ return self.ansi_regex.sub("", string)
[docs] def strip_mxp(self, string): """ Strips all MXP codes from a string. Args: string (str): The string to strip. Returns: string (str): The processed string. """ string = self.mxp_sub.sub(r"\2", string) string = self.mxp_url_sub.sub(r"\1", string) # replace with url verbatim return string
[docs] def strip_unsafe_tokens(self, string): """ Strip explicitly ansi line breaks and tabs. """ return self.unsafe_tokens.sub("", string)
[docs] def parse_ansi(self, string, strip_ansi=False, xterm256=False, mxp=False, truecolor=False): """ Parses a string, subbing color codes according to the stored mapping. Args: string (str): The string to parse. strip_ansi (boolean, optional): Strip all found ansi markup. xterm256 (boolean, optional): If actually using xterm256 or if these values should be converted to 16-color ANSI. mxp (boolean, optional): Parse MXP commands in string. Returns: string (str): The parsed string. """ if hasattr(string, "_raw_string"): if strip_ansi: return string.clean() else: return string.raw() if not string: return "" # check cached parsings global _PARSE_CACHE cachekey = f"{string}-{strip_ansi}-{xterm256}-{mxp}-{truecolor}" if cachekey in _PARSE_CACHE: return _PARSE_CACHE[cachekey] # pre-convert bright colors to xterm256 color tags string = self.brightbg_sub.sub(self.sub_brightbg, string) def do_truecolor(part: re.Match, truecolor=truecolor): return hex2truecolor.sub_truecolor(part, truecolor) def do_xterm256_fg(part): return self.sub_xterm256(part, xterm256, "fg") def do_xterm256_bg(part): return self.sub_xterm256(part, xterm256, "bg") def do_xterm256_gfg(part): return self.sub_xterm256(part, xterm256, "gfg") def do_xterm256_gbg(part): return self.sub_xterm256(part, xterm256, "gbg") in_string = utils.to_str(string) # do string replacement parsed_string = [] parts = self.ansi_escapes.split(in_string) + [" "] for part, sep in zip(parts[::2], parts[1::2]): pstring = hex_sub.sub(do_truecolor, part) pstring = self.xterm256_fg_sub.sub(do_xterm256_fg, pstring) pstring = self.xterm256_bg_sub.sub(do_xterm256_bg, pstring) pstring = self.xterm256_gfg_sub.sub(do_xterm256_gfg, pstring) pstring = self.xterm256_gbg_sub.sub(do_xterm256_gbg, pstring) pstring = self.ansi_sub.sub(self.sub_ansi, pstring) parsed_string.append("%s%s" % (pstring, sep[0].strip())) parsed_string = "".join(parsed_string) if not mxp: parsed_string = self.strip_mxp(parsed_string) if strip_ansi: # remove all ansi codes (including those manually # inserted in string) return self.strip_raw_codes(parsed_string) # cache and crop old cache _PARSE_CACHE[cachekey] = parsed_string if len(_PARSE_CACHE) > _PARSE_CACHE_SIZE: _PARSE_CACHE.popitem(last=False) return parsed_string
ANSI_PARSER = ANSIParser() # # Access function #
[docs]def parse_ansi( string, strip_ansi=False, parser=ANSI_PARSER, xterm256=False, mxp=False, truecolor=False ): """ Parses a string, subbing color codes as needed. Args: string (str): The string to parse. strip_ansi (bool, optional): Strip all ANSI sequences. parser (ansi.AnsiParser, optional): A parser instance to use. xterm256 (bool, optional): Support xterm256 or not. mxp (bool, optional): Support MXP markup or not. truecolor (bool, optional): Support for truecolor or not. Returns: string (str): The parsed string. """ string = string or "" return parser.parse_ansi( string, strip_ansi=strip_ansi, xterm256=xterm256, mxp=mxp, truecolor=truecolor )
[docs]def strip_ansi(string, parser=ANSI_PARSER): """ Strip all ansi from the string. This handles the Evennia-specific markup. Args: string (str): The string to strip. parser (ansi.AnsiParser, optional): The parser to use. Returns: string (str): The stripped string. """ string = string or "" return parser.parse_ansi(string, strip_ansi=True)
[docs]def strip_raw_ansi(string, parser=ANSI_PARSER): """ Remove raw ansi codes from string. This assumes pure ANSI-bytecodes in the string. Args: string (str): The string to parse. parser (bool, optional): The parser to use. Returns: string (str): the stripped string. """ string = string or "" return parser.strip_raw_codes(string)
[docs]def strip_unsafe_tokens(string, parser=ANSI_PARSER): """ Strip markup that can be used to create visual exploits (notably linebreaks and tags) """ return parser.strip_unsafe_tokens(string)
[docs]def strip_mxp(string, parser=ANSI_PARSER): """ Strip MXP markup. """ string = string or "" return parser.strip_mxp(string)
[docs]def raw(string): """ Escapes a string into a form which won't be colorized by the ansi parser. Returns: string (str): The raw, escaped string. """ string = string or "" return string.replace("{", "{{").replace("|", "||")
# ------------------------------------------------------------ # # ANSIString - ANSI-aware string class # # ------------------------------------------------------------ def _spacing_preflight(func): """ This wrapper function is used to do some preflight checks on functions used for padding ANSIStrings. """ @functools.wraps(func) def wrapped(self, width=78, fillchar=None): if fillchar is None: fillchar = " " if (len(fillchar) != 1) or (not isinstance(fillchar, str)): raise TypeError("must be char, not %s" % type(fillchar)) if not isinstance(width, int): raise TypeError("integer argument expected, got %s" % type(width)) _difference = width - len(self) if _difference <= 0: return self return func(self, width, fillchar, _difference) return wrapped def _query_super(func_name): """ Have the string class handle this with the cleaned string instead of ANSIString. """ def wrapped(self, *args, **kwargs): return getattr(self.clean(), func_name)(*args, **kwargs) return wrapped def _on_raw(func_name): """ Like query_super, but makes the operation run on the raw string. """ def wrapped(self, *args, **kwargs): args = list(args) try: string = args.pop(0) if hasattr(string, "_raw_string"): args.insert(0, string.raw()) else: args.insert(0, string) except IndexError: # just skip out if there are no more strings pass result = getattr(self._raw_string, func_name)(*args, **kwargs) if isinstance(result, str): return ANSIString(result, decoded=True) return result return wrapped def _transform(func_name): """ Some string functions, like those manipulating capital letters, return a string the same length as the original. This function allows us to do the same, replacing all the non-coded characters with the resulting string. """ def wrapped(self, *args, **kwargs): replacement_string = _query_super(func_name)(self, *args, **kwargs) to_string = [] char_counter = 0 for index in range(0, len(self._raw_string)): if index in self._code_indexes: to_string.append(self._raw_string[index]) elif index in self._char_indexes: to_string.append(replacement_string[char_counter]) char_counter += 1 return ANSIString( "".join(to_string), decoded=True, code_indexes=self._code_indexes, char_indexes=self._char_indexes, clean_string=replacement_string, ) return wrapped
[docs]class ANSIMeta(type): """ Many functions on ANSIString are just light wrappers around the string base class. We apply them here, as part of the classes construction. """
[docs] def __init__(cls, *args, **kwargs): for func_name in [ "count", "startswith", "endswith", "find", "index", "isalnum", "isalpha", "isdigit", "islower", "isspace", "istitle", "isupper", "rfind", "rindex", "__len__", ]: setattr(cls, func_name, _query_super(func_name)) for func_name in ["__mod__", "expandtabs", "decode", "replace", "format", "encode"]: setattr(cls, func_name, _on_raw(func_name)) for func_name in ["capitalize", "translate", "lower", "upper", "swapcase"]: setattr(cls, func_name, _transform(func_name)) super().__init__(*args, **kwargs)
[docs]class ANSIString(str, metaclass=ANSIMeta): """ Unicode-like object that is aware of ANSI codes. This class can be used nearly identically to strings, in that it will report string length, handle slices, etc, much like a string object would. The methods should be used identically as string methods are. There is at least one exception to this (and there may be more, though they have not come up yet). When using ''.join() or u''.join() on an ANSIString, color information will get lost. You must use ANSIString('').join() to preserve color information. This implementation isn't perfectly clean, as it doesn't really have an understanding of what the codes mean in order to eliminate redundant characters-- though cleaning up the strings might end up being inefficient and slow without some C code when dealing with larger values. Such enhancements could be made as an enhancement to ANSI_PARSER if needed, however. If one is going to use ANSIString, one should generally avoid converting away from it until one is about to send information on the wire. This is because escape sequences in the string may otherwise already be decoded, and taken literally the second time around. """ # A compiled Regex for the format mini-language: # https://docs.python.org/3/library/string.html#formatspec re_format = re.compile( r"(?i)(?P<just>(?P<fill>.)?(?P<align>\<|\>|\=|\^))?(?P<sign>\+|\-| )?(?P<alt>\#)?" r"(?P<zero>0)?(?P<width>\d+)?(?P<grouping>\_|\,)?(?:\.(?P<precision>\d+))?" r"(?P<type>b|c|d|e|E|f|F|g|G|n|o|s|x|X|%)?" ) def __new__(cls, *args, **kwargs): """ When creating a new ANSIString, you may use a custom parser that has the same attributes as the standard one, and you may declare the string to be handled as already decoded. It is important not to double decode strings, as escapes can only be respected once. Internally, ANSIString can also passes itself precached code/character indexes and clean strings to avoid doing extra work when combining ANSIStrings. """ string = args[0] if not isinstance(string, str): string = to_str(string) parser = kwargs.get("parser", ANSI_PARSER) decoded = kwargs.get("decoded", False) or hasattr(string, "_raw_string") code_indexes = kwargs.pop("code_indexes", None) char_indexes = kwargs.pop("char_indexes", None) clean_string = kwargs.pop("clean_string", None) # All True, or All False, not just one. checks = [x is None for x in [code_indexes, char_indexes, clean_string]] if not len(set(checks)) == 1: raise ValueError( "You must specify code_indexes, char_indexes, " "and clean_string together, or not at all." ) if not all(checks): decoded = True if not decoded: # Completely new ANSI String clean_string = parser.parse_ansi(string, strip_ansi=True, mxp=MXP_ENABLED) string = parser.parse_ansi(string, xterm256=True, mxp=MXP_ENABLED, truecolor=True) elif clean_string is not None: # We have an explicit clean string. pass elif hasattr(string, "_clean_string"): # It's already an ANSIString clean_string = string._clean_string code_indexes = string._code_indexes char_indexes = string._char_indexes string = string._raw_string else: # It's a string that has been pre-ansi decoded. clean_string = parser.strip_raw_codes(string) if not isinstance(string, str): string = string.decode("utf-8") ansi_string = super().__new__(ANSIString, to_str(clean_string)) ansi_string._raw_string = string ansi_string._clean_string = clean_string ansi_string._code_indexes = code_indexes ansi_string._char_indexes = char_indexes return ansi_string def __str__(self): return self._raw_string def __format__(self, format_spec): """ This magic method covers ANSIString's behavior within a str.format() or f-string. Current features supported: fill, align, width. Args: format_spec (str): The format specification passed by f-string or str.format(). This is a string such as "0<30" which would mean "left justify to 30, filling with zeros". The full specification can be found at https://docs.python.org/3/library/string.html#formatspec Returns: ansi_str (str): The formatted ANSIString's .raw() form, for display. """ # This calls the compiled regex stored on ANSIString's class to analyze the format spec. # It returns a dictionary. format_data = self.re_format.match(format_spec).groupdict() clean = self.clean() base_output = ANSIString(self.raw()) align = format_data.get("align", "<") fill = format_data.get("fill", " ") # Need to coerce width into an integer. We can be certain that it's numeric thanks to regex. width = format_data.get("width", None) if width is None: width = len(clean) else: width = int(width) if align == "<": base_output = self.ljust(width, fill) elif align == ">": base_output = self.rjust(width, fill) elif align == "^": base_output = self.center(width, fill) elif align == "=": pass # Return the raw string with ANSI markup, ready to be displayed. return base_output.raw() def __repr__(self): """ Let's make the repr the command that would actually be used to construct this object, for convenience and reference. """ return "ANSIString(%s, decoded=True)" % repr(self._raw_string)
[docs] def __init__(self, *_, **kwargs): """ When the ANSIString is first initialized, a few internal variables have to be set. The first is the parser. It is possible to replace Evennia's standard ANSI parser with one of your own syntax if you wish, so long as it implements the same interface. The second is the _raw_string. This is the original "dumb" string with ansi escapes that ANSIString represents. The third thing to set is the _clean_string. This is a string that is devoid of all ANSI Escapes. Finally, _code_indexes and _char_indexes are defined. These are lookup tables for which characters in the raw string are related to ANSI escapes, and which are for the readable text. """ self.parser = kwargs.pop("parser", ANSI_PARSER) super().__init__() if self._code_indexes is None: self._code_indexes, self._char_indexes = self._get_indexes()
@staticmethod def _shifter(iterable, offset): """ Takes a list of integers, and produces a new one incrementing all by a number. """ if not offset: return iterable return [i + offset for i in iterable] @classmethod def _adder(cls, first, second): """ Joins two ANSIStrings, preserving calculated info. """ raw_string = first._raw_string + second._raw_string clean_string = first._clean_string + second._clean_string code_indexes = first._code_indexes[:] char_indexes = first._char_indexes[:] code_indexes.extend(cls._shifter(second._code_indexes, len(first._raw_string))) char_indexes.extend(cls._shifter(second._char_indexes, len(first._raw_string))) return ANSIString( raw_string, code_indexes=code_indexes, char_indexes=char_indexes, clean_string=clean_string, ) def __add__(self, other): """ We have to be careful when adding two strings not to reprocess things that don't need to be reprocessed, lest we end up with escapes being interpreted literally. """ if not isinstance(other, str): return NotImplemented if not isinstance(other, ANSIString): other = ANSIString(other) return self._adder(self, other) def __radd__(self, other): """ Likewise, if we're on the other end. """ if not isinstance(other, str): return NotImplemented if not isinstance(other, ANSIString): other = ANSIString(other) return self._adder(other, self) def __getslice__(self, i, j): """ This function is deprecated, so we just make it call the proper function. """ return self.__getitem__(slice(i, j)) def _slice(self, slc): """ This function takes a slice() object. Slices have to be handled specially. Not only are they able to specify a start and end with [x:y], but many forget that they can also specify an interval with [x:y:z]. As a result, not only do we have to track the ANSI Escapes that have played before the start of the slice, we must also replay any in these intervals, should they exist. Thankfully, slicing the _char_indexes table gives us the actual indexes that need slicing in the raw string. We can check between those indexes to figure out what escape characters need to be replayed. """ char_indexes = self._char_indexes slice_indexes = char_indexes[slc] # If it's the end of the string, we need to append final color codes. if not slice_indexes: # if we find no characters it may be because we are just outside # of the interval, using an open-ended slice. We must replay all # of the escape characters until/after this point. if char_indexes: if slc.start is None and slc.stop is None: # a [:] slice of only escape characters return ANSIString(self._raw_string[slc]) if slc.start is None: # this is a [:x] slice return ANSIString(self._raw_string[: char_indexes[0]]) if slc.stop is None: # a [x:] slice return ANSIString(self._raw_string[char_indexes[-1] + 1 :]) return ANSIString("") try: string = self[slc.start or 0]._raw_string except IndexError: return ANSIString("") last_mark = slice_indexes[0] # Check between the slice intervals for escape sequences. i = None for i in slice_indexes[1:]: for index in range(last_mark, i): if index in self._code_indexes: string += self._raw_string[index] last_mark = i try: string += self._raw_string[i] except IndexError: # raw_string not long enough pass if i is not None: append_tail = self._get_interleving(char_indexes.index(i) + 1) else: append_tail = "" return ANSIString(string + append_tail, decoded=True) def __getitem__(self, item): """ Gateway for slices and getting specific indexes in the ANSIString. If this is a regexable ANSIString, it will get the data from the raw string instead, bypassing ANSIString's intelligent escape skipping, for reasons explained in the __new__ method's docstring. """ if isinstance(item, slice): # Slices must be handled specially. return self._slice(item) try: self._char_indexes[item] except IndexError: raise IndexError("ANSIString Index out of range") # Get character codes after the index as well. if self._char_indexes[-1] == self._char_indexes[item]: append_tail = self._get_interleving(item + 1) else: append_tail = "" item = self._char_indexes[item] clean = self._raw_string[item] result = "" # Get the character they're after, and replay all escape sequences # previous to it. for index in range(0, item + 1): if index in self._code_indexes: result += self._raw_string[index] return ANSIString(result + clean + append_tail, decoded=True)
[docs] def clean(self): """ Return a string object *without* the ANSI escapes. Returns: clean_string (str): A unicode object with no ANSI escapes. """ return self._clean_string
[docs] def raw(self): """ Return a string object with the ANSI escapes. Returns: raw (str): A unicode object *with* the raw ANSI escape sequences. """ return self._raw_string
[docs] def partition(self, sep, reverse=False): """ Splits once into three sections (with the separator being the middle section) We use the same techniques we used in split() to make sure each are colored. Args: sep (str): The separator to split the string on. reverse (boolean): Whether to split the string on the last occurrence of the separator rather than the first. Returns: ANSIString: The part of the string before the separator ANSIString: The separator itself ANSIString: The part of the string after the separator. """ if hasattr(sep, "_clean_string"): sep = sep.clean() if reverse: parent_result = self._clean_string.rpartition(sep) else: parent_result = self._clean_string.partition(sep) current_index = 0 result = tuple() for section in parent_result: result += (self[current_index : current_index + len(section)],) current_index += len(section) return result
def _get_indexes(self): """ Two tables need to be made, one which contains the indexes of all readable characters, and one which contains the indexes of all ANSI escapes. It's important to remember that ANSI escapes require more that one character at a time, though no readable character needs more than one character, since the string base class abstracts that away from us. However, several readable characters can be placed in a row. We must use regexes here to figure out where all the escape sequences are hiding in the string. Then we use the ranges of their starts and ends to create a final, comprehensive list of all indexes which are dedicated to code, and all dedicated to text. It's possible that only one of these tables is actually needed, the other assumed to be what isn't in the first. """ code_indexes = [] for match in self.parser.ansi_regex.finditer(self._raw_string): code_indexes.extend(list(range(match.start(), match.end()))) if not code_indexes: # Plain string, no ANSI codes. return code_indexes, list(range(0, len(self._raw_string))) # all indexes not occupied by ansi codes are normal characters char_indexes = [i for i in range(len(self._raw_string)) if i not in code_indexes] return code_indexes, char_indexes def _get_interleving(self, index): """ Get the code characters from the given slice end to the next character. """ try: index = self._char_indexes[index - 1] except IndexError: return "" s = "" while True: index += 1 if index in self._char_indexes: break elif index in self._code_indexes: s += self._raw_string[index] else: break return s def __mul__(self, other): """ Multiplication method. Implemented for performance reasons. """ if not isinstance(other, int): return NotImplemented raw_string = self._raw_string * other clean_string = self._clean_string * other code_indexes = self._code_indexes[:] char_indexes = self._char_indexes[:] for i in range(other): code_indexes.extend(self._shifter(self._code_indexes, i * len(self._raw_string))) char_indexes.extend(self._shifter(self._char_indexes, i * len(self._raw_string))) return ANSIString( raw_string, code_indexes=code_indexes, char_indexes=char_indexes, clean_string=clean_string, ) def __rmul__(self, other): return self.__mul__(other)
[docs] def split(self, by=None, maxsplit=-1): """ Splits a string based on a separator. Stolen from PyPy's pure Python string implementation, tweaked for ANSIString. PyPy is distributed under the MIT licence. http://opensource.org/licenses/MIT Args: by (str): A string to search for which will be used to split the string. For instance, ',' for 'Hello,world' would result in ['Hello', 'world'] maxsplit (int): The maximum number of times to split the string. For example, a maxsplit of 2 with a by of ',' on the string 'Hello,world,test,string' would result in ['Hello', 'world', 'test,string'] Returns: result (list of ANSIStrings): A list of ANSIStrings derived from this string. """ drop_spaces = by is None if drop_spaces: by = " " bylen = len(by) if bylen == 0: raise ValueError("empty separator") res = [] start = 0 while maxsplit != 0: next = self._clean_string.find(by, start) if next < 0: break # Get character codes after the index as well. res.append(self[start:next]) start = next + bylen maxsplit -= 1 # NB. if it's already < 0, it stays < 0 res.append(self[start : len(self)]) if drop_spaces: return [part for part in res if part != ""] return res
[docs] def rsplit(self, by=None, maxsplit=-1): """ Like split, but starts from the end of the string rather than the beginning. Stolen from PyPy's pure Python string implementation, tweaked for ANSIString. PyPy is distributed under the MIT licence. http://opensource.org/licenses/MIT Args: by (str): A string to search for which will be used to split the string. For instance, ',' for 'Hello,world' would result in ['Hello', 'world'] maxsplit (int): The maximum number of times to split the string. For example, a maxsplit of 2 with a by of ',' on the string 'Hello,world,test,string' would result in ['Hello,world', 'test', 'string'] Returns: result (list of ANSIStrings): A list of ANSIStrings derived from this string. """ res = [] end = len(self) drop_spaces = by is None if drop_spaces: by = " " bylen = len(by) if bylen == 0: raise ValueError("empty separator") while maxsplit != 0: next = self._clean_string.rfind(by, 0, end) if next < 0: break # Get character codes after the index as well. res.append(self[next + bylen : end]) end = next maxsplit -= 1 # NB. if it's already < 0, it stays < 0 res.append(self[:end]) res.reverse() if drop_spaces: return [part for part in res if part != ""] return res
[docs] def strip(self, chars=None): """ Strip from both ends, taking ANSI markers into account. Args: chars (str, optional): A string containing individual characters to strip off of both ends of the string. By default, any blank spaces are trimmed. Returns: result (ANSIString): A new ANSIString with the ends trimmed of the relevant characters. """ clean = self._clean_string raw = self._raw_string # count continuous sequence of chars from left and right nlen = len(clean) nlstripped = nlen - len(clean.lstrip(chars)) nrstripped = nlen - len(clean.rstrip(chars)) # within the stripped regions, only retain parts of the raw # string *not* matching the clean string (these are ansi/mxp tags) lstripped = "" ic, ir1 = 0, 0 while nlstripped: if ic >= nlstripped: break elif raw[ir1] != clean[ic]: lstripped += raw[ir1] else: ic += 1 ir1 += 1 rstripped = "" ic, ir2 = nlen - 1, len(raw) - 1 while nrstripped: if nlen - ic > nrstripped: break elif raw[ir2] != clean[ic]: rstripped += raw[ir2] else: ic -= 1 ir2 -= 1 rstripped = rstripped[::-1] return ANSIString(lstripped + raw[ir1 : ir2 + 1] + rstripped)
[docs] def lstrip(self, chars=None): """ Strip from the left, taking ANSI markers into account. Args: chars (str, optional): A string containing individual characters to strip off of the left end of the string. By default, any blank spaces are trimmed. Returns: result (ANSIString): A new ANSIString with the left end trimmed of the relevant characters. """ clean = self._clean_string raw = self._raw_string # count continuous sequence of chars from left and right nlen = len(clean) nlstripped = nlen - len(clean.lstrip(chars)) # within the stripped regions, only retain parts of the raw # string *not* matching the clean string (these are ansi/mxp tags) lstripped = "" ic, ir1 = 0, 0 while nlstripped: if ic >= nlstripped: break elif raw[ir1] != clean[ic]: lstripped += raw[ir1] else: ic += 1 ir1 += 1 return ANSIString(lstripped + raw[ir1:])
[docs] def rstrip(self, chars=None): """ Strip from the right, taking ANSI markers into account. Args: chars (str, optional): A string containing individual characters to strip off of the right end of the string. By default, any blank spaces are trimmed. Returns: result (ANSIString): A new ANSIString with the right end trimmed of the relevant characters. """ clean = self._clean_string raw = self._raw_string nlen = len(clean) nrstripped = nlen - len(clean.rstrip(chars)) rstripped = "" ic, ir2 = nlen - 1, len(raw) - 1 while nrstripped: if nlen - ic > nrstripped: break elif raw[ir2] != clean[ic]: rstripped += raw[ir2] else: ic -= 1 ir2 -= 1 rstripped = rstripped[::-1] return ANSIString(raw[: ir2 + 1] + rstripped)
[docs] def join(self, iterable): """ Joins together strings in an iterable, using this string between each one. NOTE: This should always be used for joining strings when ANSIStrings are involved. Otherwise color information will be discarded by python, due to details in the C implementation of strings. Args: iterable (list of strings): A list of strings to join together Returns: ANSIString: A single string with all of the iterable's contents concatenated, with this string between each. Examples: :: >>> ANSIString(', ').join(['up', 'right', 'left', 'down']) ANSIString('up, right, left, down') """ result = ANSIString("") last_item = None for item in iterable: if last_item is not None: result += self._raw_string if not isinstance(item, ANSIString): item = ANSIString(item) result += item last_item = item return result
def _filler(self, char, amount): """ Generate a line of characters in a more efficient way than just adding ANSIStrings. """ if not isinstance(char, ANSIString): line = char * amount return ANSIString( char * amount, code_indexes=[], char_indexes=list(range(0, len(line))), clean_string=char, ) try: start = char._code_indexes[0] except IndexError: start = None end = char._char_indexes[0] prefix = char._raw_string[start:end] postfix = char._raw_string[end + 1 :] line = char._clean_string * amount code_indexes = [i for i in range(0, len(prefix))] length = len(prefix) + len(line) code_indexes.extend([i for i in range(length, length + len(postfix))]) char_indexes = self._shifter(list(range(0, len(line))), len(prefix)) raw_string = prefix + line + postfix return ANSIString( raw_string, clean_string=line, char_indexes=char_indexes, code_indexes=code_indexes ) # The following methods should not be called with the '_difference' argument explicitly. This is # data provided by the wrapper _spacing_preflight.
[docs] @_spacing_preflight def center(self, width, fillchar, _difference): """ Center some text with some spaces padding both sides. Args: width (int): The target width of the output string. fillchar (str): A single character string to pad the output string with. Returns: result (ANSIString): A string padded on both ends with fillchar. """ remainder = _difference % 2 _difference //= 2 spacing = self._filler(fillchar, _difference) result = spacing + self + spacing + self._filler(fillchar, remainder) return result
[docs] @_spacing_preflight def ljust(self, width, fillchar, _difference): """ Left justify some text. Args: width (int): The target width of the output string. fillchar (str): A single character string to pad the output string with. Returns: result (ANSIString): A string padded on the right with fillchar. """ return self + self._filler(fillchar, _difference)
[docs] @_spacing_preflight def rjust(self, width, fillchar, _difference): """ Right justify some text. Args: width (int): The target width of the output string. fillchar (str): A single character string to pad the output string with. Returns: result (ANSIString): A string padded on the left with fillchar. """ return self._filler(fillchar, _difference) + self