From 56d128279162d3a5696cfba767891c843393e372 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sun, 17 May 2020 13:49:43 -0400 Subject: Documented some recently added customization features. --- CHANGELOG | 2 +- bs4/builder/_htmlparser.py | 8 +++ bs4/tests/test_htmlparser.py | 4 +- doc/source/index.rst | 138 +++++++++++++++++++++++++++++++++++++++---- 4 files changed, 137 insertions(+), 15 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 270a771..e66a4eb 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -4,7 +4,7 @@ BeautifulSoupHTMLParser constructor (used by the html.parser tree builder) which lets you customize the handling of markup that contains the same attribute more than once, as in: - [bug=1878209] TODO: This needs documentation. + [bug=1878209] * Added a distinct subclass, GuessedAtParserWarning, for the warning issued when BeautifulSoup is instantiated without a parser being diff --git a/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py index 476fd79..1d96a66 100644 --- a/bs4/builder/_htmlparser.py +++ b/bs4/builder/_htmlparser.py @@ -315,9 +315,17 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): invoked. :param kwargs: Keyword arguments for the superclass constructor. """ + # Some keyword arguments will be pulled out of kwargs and placed + # into parser_kwargs. + extra_parser_kwargs = dict() + for arg in ('on_duplicate_attribute',): + if arg in kwargs: + value = kwargs.pop(arg) + extra_parser_kwargs[arg] = value super(HTMLParserTreeBuilder, self).__init__(**kwargs) parser_args = parser_args or [] parser_kwargs = parser_kwargs or {} + parser_kwargs.update(extra_parser_kwargs) if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED: parser_kwargs['strict'] = False if CONSTRUCTOR_TAKES_CONVERT_CHARREFS: diff --git a/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py index 7b06f89..7ee91aa 100644 --- a/bs4/tests/test_htmlparser.py +++ b/bs4/tests/test_htmlparser.py @@ -67,9 +67,7 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): # You can also get this behavior explicitly. def assert_attribute(on_duplicate_attribute, expected): soup = self.soup( - markup, parser_kwargs=dict( - on_duplicate_attribute=on_duplicate_attribute - ) + markup, on_duplicate_attribute=on_duplicate_attribute ) self.assertEquals(expected, soup.a['href']) diff --git a/doc/source/index.rst b/doc/source/index.rst index 87c04d9..987ffdd 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -568,8 +568,8 @@ found in a ``