245 lines
6.8 KiB
Python
245 lines
6.8 KiB
Python
|
# encoding: utf-8
|
||
|
"""
|
||
|
Utilities for working with strings and text.
|
||
|
|
||
|
Inheritance diagram:
|
||
|
|
||
|
.. inheritance-diagram:: IPython.utils.text
|
||
|
:parts: 3
|
||
|
"""
|
||
|
|
||
|
import os
|
||
|
import re
|
||
|
import sys
|
||
|
import textwrap
|
||
|
from string import Formatter
|
||
|
|
||
|
# datetime.strftime date format for ipython
|
||
|
if sys.platform == 'win32':
|
||
|
date_format = "%B %d, %Y"
|
||
|
else:
|
||
|
date_format = "%B %-d, %Y"
|
||
|
|
||
|
|
||
|
def indent(instr,nspaces=4, ntabs=0, flatten=False):
|
||
|
"""Indent a string a given number of spaces or tabstops.
|
||
|
|
||
|
indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
|
||
|
instr : basestring
|
||
|
The string to be indented.
|
||
|
nspaces : int (default: 4)
|
||
|
The number of spaces to be indented.
|
||
|
ntabs : int (default: 0)
|
||
|
The number of tabs to be indented.
|
||
|
flatten : bool (default: False)
|
||
|
Whether to scrub existing indentation. If True, all lines will be
|
||
|
aligned to the same indentation. If False, existing indentation will
|
||
|
be strictly increased.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
|
||
|
str|unicode : string indented by ntabs and nspaces.
|
||
|
|
||
|
"""
|
||
|
if instr is None:
|
||
|
return
|
||
|
ind = '\t'*ntabs+' '*nspaces
|
||
|
if flatten:
|
||
|
pat = re.compile(r'^\s*', re.MULTILINE)
|
||
|
else:
|
||
|
pat = re.compile(r'^', re.MULTILINE)
|
||
|
outstr = re.sub(pat, ind, instr)
|
||
|
if outstr.endswith(os.linesep+ind):
|
||
|
return outstr[:-len(ind)]
|
||
|
else:
|
||
|
return outstr
|
||
|
|
||
|
|
||
|
def dedent(text):
|
||
|
"""Equivalent of textwrap.dedent that ignores unindented first line.
|
||
|
|
||
|
This means it will still dedent strings like:
|
||
|
'''foo
|
||
|
is a bar
|
||
|
'''
|
||
|
|
||
|
For use in wrap_paragraphs.
|
||
|
"""
|
||
|
|
||
|
if text.startswith('\n'):
|
||
|
# text starts with blank line, don't ignore the first line
|
||
|
return textwrap.dedent(text)
|
||
|
|
||
|
# split first line
|
||
|
splits = text.split('\n',1)
|
||
|
if len(splits) == 1:
|
||
|
# only one line
|
||
|
return textwrap.dedent(text)
|
||
|
|
||
|
first, rest = splits
|
||
|
# dedent everything but the first line
|
||
|
rest = textwrap.dedent(rest)
|
||
|
return '\n'.join([first, rest])
|
||
|
|
||
|
|
||
|
def wrap_paragraphs(text, ncols=80):
|
||
|
"""Wrap multiple paragraphs to fit a specified width.
|
||
|
|
||
|
This is equivalent to textwrap.wrap, but with support for multiple
|
||
|
paragraphs, as separated by empty lines.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
|
||
|
list of complete paragraphs, wrapped to fill `ncols` columns.
|
||
|
"""
|
||
|
paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE)
|
||
|
text = dedent(text).strip()
|
||
|
paragraphs = paragraph_re.split(text)[::2] # every other entry is space
|
||
|
out_ps = []
|
||
|
indent_re = re.compile(r'\n\s+', re.MULTILINE)
|
||
|
for p in paragraphs:
|
||
|
# presume indentation that survives dedent is meaningful formatting,
|
||
|
# so don't fill unless text is flush.
|
||
|
if indent_re.search(p) is None:
|
||
|
# wrap paragraph
|
||
|
p = textwrap.fill(p, ncols)
|
||
|
out_ps.append(p)
|
||
|
return out_ps
|
||
|
|
||
|
|
||
|
def strip_ansi(source):
|
||
|
"""
|
||
|
Remove ansi escape codes from text.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
source : str
|
||
|
Source to remove the ansi from
|
||
|
"""
|
||
|
return re.sub(r'\033\[(\d|;)+?m', '', source)
|
||
|
|
||
|
|
||
|
#-----------------------------------------------------------------------------
|
||
|
# Utils to columnize a list of string
|
||
|
#-----------------------------------------------------------------------------
|
||
|
|
||
|
def _chunks(l, n):
|
||
|
"""Yield successive n-sized chunks from l."""
|
||
|
for i in range(0, len(l), n):
|
||
|
yield l[i:i+n]
|
||
|
|
||
|
|
||
|
def _find_optimal(rlist , separator_size=2 , displaywidth=80):
|
||
|
"""Calculate optimal info to columnize a list of string"""
|
||
|
for nrow in range(1, len(rlist)+1) :
|
||
|
chk = list(map(max,_chunks(rlist, nrow)))
|
||
|
sumlength = sum(chk)
|
||
|
ncols = len(chk)
|
||
|
if sumlength+separator_size*(ncols-1) <= displaywidth :
|
||
|
break;
|
||
|
return {'columns_numbers' : ncols,
|
||
|
'optimal_separator_width':(displaywidth - sumlength)/(ncols-1) if (ncols -1) else 0,
|
||
|
'rows_numbers' : nrow,
|
||
|
'columns_width' : chk
|
||
|
}
|
||
|
|
||
|
|
||
|
def _get_or_default(mylist, i, default=None):
|
||
|
"""return list item number, or default if don't exist"""
|
||
|
if i >= len(mylist):
|
||
|
return default
|
||
|
else :
|
||
|
return mylist[i]
|
||
|
|
||
|
|
||
|
def compute_item_matrix(items, empty=None, *args, **kwargs) :
|
||
|
"""Returns a nested list, and info to columnize items
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
|
||
|
items
|
||
|
list of strings to columize
|
||
|
empty : (default None)
|
||
|
default value to fill list if needed
|
||
|
separator_size : int (default=2)
|
||
|
How much caracters will be used as a separation between each columns.
|
||
|
displaywidth : int (default=80)
|
||
|
The width of the area onto wich the columns should enter
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
|
||
|
strings_matrix
|
||
|
|
||
|
nested list of string, the outer most list contains as many list as
|
||
|
rows, the innermost lists have each as many element as colums. If the
|
||
|
total number of elements in `items` does not equal the product of
|
||
|
rows*columns, the last element of some lists are filled with `None`.
|
||
|
|
||
|
dict_info
|
||
|
some info to make columnize easier:
|
||
|
|
||
|
columns_numbers
|
||
|
number of columns
|
||
|
rows_numbers
|
||
|
number of rows
|
||
|
columns_width
|
||
|
list of with of each columns
|
||
|
optimal_separator_width
|
||
|
best separator width between columns
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
::
|
||
|
|
||
|
In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l']
|
||
|
...: compute_item_matrix(l,displaywidth=12)
|
||
|
Out[1]:
|
||
|
([['aaa', 'f', 'k'],
|
||
|
['b', 'g', 'l'],
|
||
|
['cc', 'h', None],
|
||
|
['d', 'i', None],
|
||
|
['eeeee', 'j', None]],
|
||
|
{'columns_numbers': 3,
|
||
|
'columns_width': [5, 1, 1],
|
||
|
'optimal_separator_width': 2,
|
||
|
'rows_numbers': 5})
|
||
|
"""
|
||
|
info = _find_optimal(list(map(len, items)), *args, **kwargs)
|
||
|
nrow, ncol = info['rows_numbers'], info['columns_numbers']
|
||
|
return ([[ _get_or_default(items, c*nrow+i, default=empty) for c in range(ncol) ] for i in range(nrow) ], info)
|
||
|
|
||
|
|
||
|
def columnize(items, separator=' ', displaywidth=80):
|
||
|
""" Transform a list of strings into a single string with columns.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
items : sequence of strings
|
||
|
The strings to process.
|
||
|
|
||
|
separator : str, optional [default is two spaces]
|
||
|
The string that separates columns.
|
||
|
|
||
|
displaywidth : int, optional [default is 80]
|
||
|
Width of the display in number of characters.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
The formatted string.
|
||
|
"""
|
||
|
if not items :
|
||
|
return '\n'
|
||
|
matrix, info = compute_item_matrix(items, separator_size=len(separator), displaywidth=displaywidth)
|
||
|
fmatrix = [filter(None, x) for x in matrix]
|
||
|
sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['columns_width'])])
|
||
|
return '\n'.join(map(sjoin, fmatrix))+'\n'
|
||
|
|