summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG7
-rw-r--r--bs4/element.py26
-rw-r--r--bs4/tests/test_tree.py25
3 files changed, 48 insertions, 10 deletions
diff --git a/CHANGELOG b/CHANGELOG
index a7d8260..767b74b 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -6,8 +6,11 @@
and position within a line (Tag.sourcepos). Based on code by Chris
Mayo. [bug=1742921]
-* Fixed a bug that made it impossible to pretty-print tags that were not
- created during initial parsing. [bug=1838903]
+* Fixed a crash when pretty-printing tags that were not created
+ during initial parsing. [bug=1838903]
+
+* Copying a Tag preserves information that was originally obtained from
+ the TreeBuilder used to build the original Tag. [bug=1838903]
= 4.8.0 (20190720, "One Small Soup")
diff --git a/bs4/element.py b/bs4/element.py
index 658115d..24d504b 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -724,7 +724,10 @@ class Tag(PageElement):
def __init__(self, parser=None, builder=None, name=None, namespace=None,
prefix=None, attrs=None, parent=None, previous=None,
- is_xml=None, sourceline=None, sourcepos=None):
+ is_xml=None, sourceline=None, sourcepos=None,
+ can_be_empty_element=None, cdata_list_attributes=None,
+ preserve_whitespace_tags=None
+ ):
"Basic constructor."
if parser is None:
@@ -765,11 +768,12 @@ class Tag(PageElement):
self.hidden = False
if builder is None:
- # In the absence of a TreeBuilder, assume this tag is nothing
- # special.
- self.can_be_empty_element = False
- self.cdata_list_attributes = None
- self.preserve_whitespace_tags = None
+ # In the absence of a TreeBuilder, use whatever values were
+ # passed in here. They're probably None, unless this is a copy of some
+ # other tag.
+ self.can_be_empty_element = can_be_empty_element
+ self.cdata_list_attributes = cdata_list_attributes
+ self.preserve_whitespace_tags = preserve_whitespace_tags
else:
# Set up any substitutions for this tag, such as the charset in a META tag.
builder.set_up_substitutions(self)
@@ -797,8 +801,14 @@ class Tag(PageElement):
"""A copy of a Tag is a new Tag, unconnected to the parse tree.
Its contents are a copy of the old Tag's contents.
"""
- clone = type(self)(None, self.builder, self.name, self.namespace,
- self.prefix, self.attrs, is_xml=self._is_xml)
+ clone = type(self)(
+ None, self.builder, self.name, self.namespace,
+ self.prefix, self.attrs, is_xml=self._is_xml,
+ sourceline=self.sourceline, sourcepos=self.sourcepos,
+ can_be_empty_element=self.can_be_empty_element,
+ cdata_list_attributes=self.cdata_list_attributes,
+ preserve_whitespace_tags=self.preserve_whitespace_tags
+ )
for attr in ('can_be_empty_element', 'hidden'):
setattr(clone, attr, getattr(self, attr))
for child in self.contents:
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
index c995c46..7d8da01 100644
--- a/bs4/tests/test_tree.py
+++ b/bs4/tests/test_tree.py
@@ -1490,6 +1490,31 @@ class TestPersistence(SoupTest):
self.assertEqual(u"<p> </p>", unicode(copy))
self.assertEqual(encoding, copy.original_encoding)
+ def test_copy_preserves_builder_information(self):
+
+ tag = self.soup('<p></p>').p
+
+ # Simulate a tag obtained from a source file.
+ tag.sourceline = 10
+ tag.sourcepos = 33
+
+ copied = tag.__copy__()
+
+ # The TreeBuilder object is no longer availble, but information
+ # obtained from it gets copied over to the new Tag object.
+ self.assertEqual(tag.sourceline, copied.sourceline)
+ self.assertEqual(tag.sourcepos, copied.sourcepos)
+ self.assertEqual(
+ tag.can_be_empty_element, copied.can_be_empty_element
+ )
+ self.assertEqual(
+ tag.cdata_list_attributes, copied.cdata_list_attributes
+ )
+ self.assertEqual(
+ tag.preserve_whitespace_tags, copied.preserve_whitespace_tags
+ )
+
+
def test_unicode_pickle(self):
# A tree containing Unicode characters can be pickled.
html = u"<b>\N{SNOWMAN}</b>"