126 lines
3.6 KiB
Python
126 lines
3.6 KiB
Python
|
from bleach.linkifier import (
|
||
|
DEFAULT_CALLBACKS,
|
||
|
Linker,
|
||
|
)
|
||
|
from bleach.sanitizer import (
|
||
|
ALLOWED_ATTRIBUTES,
|
||
|
ALLOWED_PROTOCOLS,
|
||
|
ALLOWED_TAGS,
|
||
|
Cleaner,
|
||
|
)
|
||
|
|
||
|
|
||
|
# yyyymmdd
|
||
|
__releasedate__ = "20220627"
|
||
|
# x.y.z or x.y.z.dev0 -- semver
|
||
|
__version__ = "5.0.1"
|
||
|
|
||
|
|
||
|
__all__ = ["clean", "linkify"]
|
||
|
|
||
|
|
||
|
def clean(
|
||
|
text,
|
||
|
tags=ALLOWED_TAGS,
|
||
|
attributes=ALLOWED_ATTRIBUTES,
|
||
|
protocols=ALLOWED_PROTOCOLS,
|
||
|
strip=False,
|
||
|
strip_comments=True,
|
||
|
css_sanitizer=None,
|
||
|
):
|
||
|
"""Clean an HTML fragment of malicious content and return it
|
||
|
|
||
|
This function is a security-focused function whose sole purpose is to
|
||
|
remove malicious content from a string such that it can be displayed as
|
||
|
content in a web page.
|
||
|
|
||
|
This function is not designed to use to transform content to be used in
|
||
|
non-web-page contexts.
|
||
|
|
||
|
Example::
|
||
|
|
||
|
import bleach
|
||
|
|
||
|
better_text = bleach.clean(yucky_text)
|
||
|
|
||
|
|
||
|
.. Note::
|
||
|
|
||
|
If you're cleaning a lot of text and passing the same argument values or
|
||
|
you want more configurability, consider using a
|
||
|
:py:class:`bleach.sanitizer.Cleaner` instance.
|
||
|
|
||
|
:arg str text: the text to clean
|
||
|
|
||
|
:arg list tags: allowed list of tags; defaults to
|
||
|
``bleach.sanitizer.ALLOWED_TAGS``
|
||
|
|
||
|
:arg dict attributes: allowed attributes; can be a callable, list or dict;
|
||
|
defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
|
||
|
|
||
|
:arg list protocols: allowed list of protocols for links; defaults
|
||
|
to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
|
||
|
|
||
|
:arg bool strip: whether or not to strip disallowed elements
|
||
|
|
||
|
:arg bool strip_comments: whether or not to strip HTML comments
|
||
|
|
||
|
:arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
|
||
|
sanitizing style attribute values and style text; defaults to None
|
||
|
|
||
|
:returns: cleaned text as unicode
|
||
|
|
||
|
"""
|
||
|
cleaner = Cleaner(
|
||
|
tags=tags,
|
||
|
attributes=attributes,
|
||
|
protocols=protocols,
|
||
|
strip=strip,
|
||
|
strip_comments=strip_comments,
|
||
|
css_sanitizer=css_sanitizer,
|
||
|
)
|
||
|
return cleaner.clean(text)
|
||
|
|
||
|
|
||
|
def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_tags=None, parse_email=False):
|
||
|
"""Convert URL-like strings in an HTML fragment to links
|
||
|
|
||
|
This function converts strings that look like URLs, domain names and email
|
||
|
addresses in text that may be an HTML fragment to links, while preserving:
|
||
|
|
||
|
1. links already in the string
|
||
|
2. urls found in attributes
|
||
|
3. email addresses
|
||
|
|
||
|
linkify does a best-effort approach and tries to recover from bad
|
||
|
situations due to crazy text.
|
||
|
|
||
|
.. Note::
|
||
|
|
||
|
If you're linking a lot of text and passing the same argument values or
|
||
|
you want more configurability, consider using a
|
||
|
:py:class:`bleach.linkifier.Linker` instance.
|
||
|
|
||
|
.. Note::
|
||
|
|
||
|
If you have text that you want to clean and then linkify, consider using
|
||
|
the :py:class:`bleach.linkifier.LinkifyFilter` as a filter in the clean
|
||
|
pass. That way you're not parsing the HTML twice.
|
||
|
|
||
|
:arg str text: the text to linkify
|
||
|
|
||
|
:arg list callbacks: list of callbacks to run when adjusting tag attributes;
|
||
|
defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
|
||
|
|
||
|
:arg list skip_tags: list of tags that you don't want to linkify the
|
||
|
contents of; for example, you could set this to ``['pre']`` to skip
|
||
|
linkifying contents of ``pre`` tags
|
||
|
|
||
|
:arg bool parse_email: whether or not to linkify email addresses
|
||
|
|
||
|
:returns: linkified text as unicode
|
||
|
|
||
|
"""
|
||
|
linker = Linker(callbacks=callbacks, skip_tags=skip_tags, parse_email=parse_email)
|
||
|
return linker.linkify(text)
|