php-ext-xlswriter/library/libxlsxwriter/test/functional/helper_functions.py

###############################################################################
#
# Helper functions for testing libxlsxwriter.
#
# Copyright 2014-2021, John McNamara, jmcnamara@cpan.org
#

import re
import sys
import os.path
from zipfile import ZipFile
from zipfile import BadZipfile
from zipfile import LargeZipFile


def _xml_to_list(xml_str):
    # Convert test generated XML strings into lists for comparison testing.

    # Split the XML string at tag boundaries.
    parser = re.compile(r'>\s*<')
    elements = parser.split(xml_str.strip())

    elements = [s.replace("\r", "") for s in elements]

    # Add back the removed brackets.
    for index, element in enumerate(elements):
        if not element[0] == '<':
            elements[index] = '<' + elements[index]
        if not element[-1] == '>':
            elements[index] = elements[index] + '>'

    return elements


def _vml_to_list(vml_str):
    # Convert an Excel generated VML string into a list for comparison testing.
    #
    # The VML data in the testcases is taken from Excel 2007 files. The data
    # has to be massaged significantly to make it suitable for comparison.
    #
    # The VML produced by XlsxWriter can be parsed as ordinary XML.
    vml_str = vml_str.replace("\r", "")

    vml = vml_str.split("\n")
    vml_str = ''

    for line in vml:
        # Skip blank lines.
        if not line:
            continue

        # Strip leading and trailing whitespace.
        line = line.strip()

        # Convert VMLs attribute quotes.
        line = line.replace("'", '"')

        # Add space between attributes.
        if re.search('"$', line):
            line += " "

        # Add newline after element end.
        if re.search('>$', line):
            line += "\n"

        # Split multiple elements.
        line = line.replace('><', ">\n<")

        # Put all of Anchor on one line.
        if line == "<x:Anchor>\n":
            line = line.strip()

        vml_str += line

    # Remove the final newline.
    vml_str = vml_str.rstrip()

    return vml_str.split("\n")


def _sort_rel_file_data(xml_elements):
    # Re-order the relationship elements in an array of XLSX XML rel
    # (relationship) data. This is necessary for comparison since
    # Excel can produce the elements in a semi-random order.

    # We don't want to sort the first or last elements.
    first = xml_elements.pop(0)
    last = xml_elements.pop()

    # Sort the relationship elements.
    xml_elements.sort()

    # Add back the first and last elements.
    xml_elements.insert(0, first)
    xml_elements.append(last)

    return xml_elements


def _compare_xlsx_files(got_file, exp_file, ignore_files, ignore_elements):
    # Compare two XLSX files by extracting the XML files from each
    # zip archive and comparing them.
    #
    # This is used to compare an "expected" file produced by Excel
    # with a "got" file produced by XlsxWriter.
    #
    # In order to compare the XLSX files we convert the data in each
    # XML file into an list of XML elements.
    try:
        # Open the XlsxWriter as a zip file for testing.
        got_zip = ZipFile(got_file, 'r')
    except IOError:
        # For Python 2.5+ compatibility.
        e = sys.exc_info()[1]
        error = "XlsxWriter file error: " + str(e)
        return error, ''
    except (BadZipfile, LargeZipFile):
        e = sys.exc_info()[1]
        error = "XlsxWriter zipfile error, '" + exp_file + "': " + str(e)
        return error, ''

    try:
        # Open the Excel as a zip file for testing.
        exp_zip = ZipFile(exp_file, 'r')
    except IOError:
        e = sys.exc_info()[1]
        error = "Excel file error: " + str(e)
        return error, ''
    except (BadZipfile, LargeZipFile):
        e = sys.exc_info()[1]
        error = "Excel zipfile error, '" + exp_file + "': " + str(e)
        return error, ''

    # Get the filenames from the zip files.
    got_files = sorted(got_zip.namelist())
    exp_files = sorted(exp_zip.namelist())

    # Ignore some test specific filenames.
    got_files = [name for name in got_files if name not in ignore_files]
    exp_files = [name for name in exp_files if name not in ignore_files]

    # Check that each XLSX container has the same files.
    if got_files != exp_files:
        return got_files, exp_files

    # Compare each file in the XLSX containers.
    for filename in exp_files:

        got_xml_str = got_zip.read(filename)
        exp_xml_str = exp_zip.read(filename)

        # Compare binary files with string comparison based on extension.
        extension = os.path.splitext(filename)[1]
        if extension in ('.png', '.jpeg', '.gif','.bmp', '.bin'):
            if got_xml_str != exp_xml_str:
                return 'got: %s' % filename, 'exp: %s' % filename
            continue

        if sys.version_info >= (3, 0, 0):
            got_xml_str = got_xml_str.decode('utf-8')
            exp_xml_str = exp_xml_str.decode('utf-8')

        # Remove dates and user specific data from the core.xml data.
        if filename == 'docProps/core.xml':
            exp_xml_str = re.sub(r' ?John', '', exp_xml_str)
            exp_xml_str = re.sub(r'\d\d\d\d-\d\d-\d\dT\d\d\:\d\d:\d\dZ',
                                 '', exp_xml_str)
            got_xml_str = re.sub(r'\d\d\d\d-\d\d-\d\dT\d\d\:\d\d:\d\dZ',
                                 '', got_xml_str)

        # Remove workbookView dimensions which are almost always different
        # and calcPr which can have different Excel version ids.
        if filename == 'xl/workbook.xml':
            exp_xml_str = re.sub(r'<workbookView[^>]*>',
                                 '<workbookView/>', exp_xml_str)
            got_xml_str = re.sub(r'<workbookView[^>]*>',
                                 '<workbookView/>', got_xml_str)
            exp_xml_str = re.sub(r'<calcPr[^>]*>',
                                 '<calcPr/>', exp_xml_str)
            got_xml_str = re.sub(r'<calcPr[^>]*>',
                                 '<calcPr/>', got_xml_str)

        # Remove printer specific settings from Worksheet pageSetup elements.
        if re.match(r'xl/worksheets/sheet\d.xml', filename):
            exp_xml_str = re.sub(r'horizontalDpi="200" ', '', exp_xml_str)
            exp_xml_str = re.sub(r'verticalDpi="200" ', '', exp_xml_str)
            exp_xml_str = re.sub(r'(<pageSetup[^>]*) r:id="rId1"',
                                 r'\1', exp_xml_str)

        # Remove Chart pageMargin dimensions which are almost always different.
        if re.match(r'xl/charts/chart\d.xml', filename):
            exp_xml_str = re.sub(r'<c:pageMargins[^>]*>',
                                 '<c:pageMargins/>', exp_xml_str)
            got_xml_str = re.sub(r'<c:pageMargins[^>]*>',
                                 '<c:pageMargins/>', got_xml_str)

        # Convert the XML string to lists for comparison.
        if re.search('.vml$', filename):
            got_xml = _xml_to_list(got_xml_str)
            exp_xml = _vml_to_list(exp_xml_str)
        else:
            got_xml = _xml_to_list(got_xml_str)
            exp_xml = _xml_to_list(exp_xml_str)

        # Ignore test specific XML elements for defined filenames.
        if filename in ignore_elements:
            patterns = ignore_elements[filename]

            for pattern in patterns:
                exp_xml = [tag for tag in exp_xml if not re.match(pattern, tag)]
                got_xml = [tag for tag in got_xml if not re.match(pattern, tag)]

        # Reorder the XML elements in the XLSX relationship files.
        if filename == '[Content_Types].xml' or re.search('.rels$', filename):
            got_xml = _sort_rel_file_data(got_xml)
            exp_xml = _sort_rel_file_data(exp_xml)

        # Compared the XML elements in each file.
        if got_xml != exp_xml:
            got_xml.insert(0, filename)
            exp_xml.insert(0, filename)
            return got_xml, exp_xml

    # If we got here the files are the same.
    return 'Ok', 'Ok'
初始化PHP-Xlswrite扩展 2024-03-05 10:01:08 +08:00			`###############################################################################`
			`#`
			`# Helper functions for testing libxlsxwriter.`
			`#`
			`# Copyright 2014-2021, John McNamara, jmcnamara@cpan.org`
			`#`

			`import re`
			`import sys`
			`import os.path`
			`from zipfile import ZipFile`
			`from zipfile import BadZipfile`
			`from zipfile import LargeZipFile`


			`def _xml_to_list(xml_str):`
			`# Convert test generated XML strings into lists for comparison testing.`

			`# Split the XML string at tag boundaries.`
			`parser = re.compile(r'>\s*<')`
			`elements = parser.split(xml_str.strip())`

			`elements = [s.replace("\r", "") for s in elements]`

			`# Add back the removed brackets.`
			`for index, element in enumerate(elements):`
			`if not element[0] == '<':`
			`elements[index] = '<' + elements[index]`
			`if not element[-1] == '>':`
			`elements[index] = elements[index] + '>'`

			`return elements`


			`def _vml_to_list(vml_str):`
			`# Convert an Excel generated VML string into a list for comparison testing.`
			`#`
			`# The VML data in the testcases is taken from Excel 2007 files. The data`
			`# has to be massaged significantly to make it suitable for comparison.`
			`#`
			`# The VML produced by XlsxWriter can be parsed as ordinary XML.`
			`vml_str = vml_str.replace("\r", "")`

			`vml = vml_str.split("\n")`
			`vml_str = ''`

			`for line in vml:`
			`# Skip blank lines.`
			`if not line:`
			`continue`

			`# Strip leading and trailing whitespace.`
			`line = line.strip()`

			`# Convert VMLs attribute quotes.`
			`line = line.replace("'", '"')`

			`# Add space between attributes.`
			`if re.search('"$', line):`
			`line += " "`

			`# Add newline after element end.`
			`if re.search('>$', line):`
			`line += "\n"`

			`# Split multiple elements.`
			`line = line.replace('><', ">\n<")`

			`# Put all of Anchor on one line.`
			`if line == "<x:Anchor>\n":`
			`line = line.strip()`

			`vml_str += line`

			`# Remove the final newline.`
			`vml_str = vml_str.rstrip()`

			`return vml_str.split("\n")`


			`def _sort_rel_file_data(xml_elements):`
			`# Re-order the relationship elements in an array of XLSX XML rel`
			`# (relationship) data. This is necessary for comparison since`
			`# Excel can produce the elements in a semi-random order.`

			`# We don't want to sort the first or last elements.`
			`first = xml_elements.pop(0)`
			`last = xml_elements.pop()`

			`# Sort the relationship elements.`
			`xml_elements.sort()`

			`# Add back the first and last elements.`
			`xml_elements.insert(0, first)`
			`xml_elements.append(last)`

			`return xml_elements`


			`def _compare_xlsx_files(got_file, exp_file, ignore_files, ignore_elements):`
			`# Compare two XLSX files by extracting the XML files from each`
			`# zip archive and comparing them.`
			`#`
			`# This is used to compare an "expected" file produced by Excel`
			`# with a "got" file produced by XlsxWriter.`
			`#`
			`# In order to compare the XLSX files we convert the data in each`
			`# XML file into an list of XML elements.`
			`try:`
			`# Open the XlsxWriter as a zip file for testing.`
			`got_zip = ZipFile(got_file, 'r')`
			`except IOError:`
			`# For Python 2.5+ compatibility.`
			`e = sys.exc_info()[1]`
			`error = "XlsxWriter file error: " + str(e)`
			`return error, ''`
			`except (BadZipfile, LargeZipFile):`
			`e = sys.exc_info()[1]`
			`error = "XlsxWriter zipfile error, '" + exp_file + "': " + str(e)`
			`return error, ''`

			`try:`
			`# Open the Excel as a zip file for testing.`
			`exp_zip = ZipFile(exp_file, 'r')`
			`except IOError:`
			`e = sys.exc_info()[1]`
			`error = "Excel file error: " + str(e)`
			`return error, ''`
			`except (BadZipfile, LargeZipFile):`
			`e = sys.exc_info()[1]`
			`error = "Excel zipfile error, '" + exp_file + "': " + str(e)`
			`return error, ''`

			`# Get the filenames from the zip files.`
			`got_files = sorted(got_zip.namelist())`
			`exp_files = sorted(exp_zip.namelist())`

			`# Ignore some test specific filenames.`
			`got_files = [name for name in got_files if name not in ignore_files]`
			`exp_files = [name for name in exp_files if name not in ignore_files]`

			`# Check that each XLSX container has the same files.`
			`if got_files != exp_files:`
			`return got_files, exp_files`

			`# Compare each file in the XLSX containers.`
			`for filename in exp_files:`

			`got_xml_str = got_zip.read(filename)`
			`exp_xml_str = exp_zip.read(filename)`

			`# Compare binary files with string comparison based on extension.`
			`extension = os.path.splitext(filename)[1]`
			`if extension in ('.png', '.jpeg', '.gif','.bmp', '.bin'):`
			`if got_xml_str != exp_xml_str:`
			`return 'got: %s' % filename, 'exp: %s' % filename`
			`continue`

			`if sys.version_info >= (3, 0, 0):`
			`got_xml_str = got_xml_str.decode('utf-8')`
			`exp_xml_str = exp_xml_str.decode('utf-8')`

			`# Remove dates and user specific data from the core.xml data.`
			`if filename == 'docProps/core.xml':`
			`exp_xml_str = re.sub(r' ?John', '', exp_xml_str)`
			`exp_xml_str = re.sub(r'\d\d\d\d-\d\d-\d\dT\d\d\:\d\d:\d\dZ',`
			`'', exp_xml_str)`
			`got_xml_str = re.sub(r'\d\d\d\d-\d\d-\d\dT\d\d\:\d\d:\d\dZ',`
			`'', got_xml_str)`

			`# Remove workbookView dimensions which are almost always different`
			`# and calcPr which can have different Excel version ids.`
			`if filename == 'xl/workbook.xml':`
			`exp_xml_str = re.sub(r'<workbookView[^>]*>',`
			`'<workbookView/>', exp_xml_str)`
			`got_xml_str = re.sub(r'<workbookView[^>]*>',`
			`'<workbookView/>', got_xml_str)`
			`exp_xml_str = re.sub(r'<calcPr[^>]*>',`
			`'<calcPr/>', exp_xml_str)`
			`got_xml_str = re.sub(r'<calcPr[^>]*>',`
			`'<calcPr/>', got_xml_str)`

			`# Remove printer specific settings from Worksheet pageSetup elements.`
			`if re.match(r'xl/worksheets/sheet\d.xml', filename):`
			`exp_xml_str = re.sub(r'horizontalDpi="200" ', '', exp_xml_str)`
			`exp_xml_str = re.sub(r'verticalDpi="200" ', '', exp_xml_str)`
			`exp_xml_str = re.sub(r'(<pageSetup[^>]*) r:id="rId1"',`
			`r'\1', exp_xml_str)`

			`# Remove Chart pageMargin dimensions which are almost always different.`
			`if re.match(r'xl/charts/chart\d.xml', filename):`
			`exp_xml_str = re.sub(r'<c:pageMargins[^>]*>',`
			`'<c:pageMargins/>', exp_xml_str)`
			`got_xml_str = re.sub(r'<c:pageMargins[^>]*>',`
			`'<c:pageMargins/>', got_xml_str)`

			`# Convert the XML string to lists for comparison.`
			`if re.search('.vml$', filename):`
			`got_xml = _xml_to_list(got_xml_str)`
			`exp_xml = _vml_to_list(exp_xml_str)`
			`else:`
			`got_xml = _xml_to_list(got_xml_str)`
			`exp_xml = _xml_to_list(exp_xml_str)`

			`# Ignore test specific XML elements for defined filenames.`
			`if filename in ignore_elements:`
			`patterns = ignore_elements[filename]`

			`for pattern in patterns:`
			`exp_xml = [tag for tag in exp_xml if not re.match(pattern, tag)]`
			`got_xml = [tag for tag in got_xml if not re.match(pattern, tag)]`

			`# Reorder the XML elements in the XLSX relationship files.`
			`if filename == '[Content_Types].xml' or re.search('.rels$', filename):`
			`got_xml = _sort_rel_file_data(got_xml)`
			`exp_xml = _sort_rel_file_data(exp_xml)`

			`# Compared the XML elements in each file.`
			`if got_xml != exp_xml:`
			`got_xml.insert(0, filename)`
			`exp_xml.insert(0, filename)`
			`return got_xml, exp_xml`

			`# If we got here the files are the same.`
			`return 'Ok', 'Ok'`