404

[ Avaa Bypassed ]




Upload:

Command:

botdev@3.145.82.96: ~ $
import calendar
import codecs
import collections
import mmap
import os
import re
import time
import zlib


# see 7.9.2.2 Text String Type on page 86 and D.3 PDFDocEncoding Character Set
# on page 656
def encode_text(s):
    return codecs.BOM_UTF16_BE + s.encode("utf_16_be")


PDFDocEncoding = {
    0x16: "\u0017",
    0x18: "\u02D8",
    0x19: "\u02C7",
    0x1A: "\u02C6",
    0x1B: "\u02D9",
    0x1C: "\u02DD",
    0x1D: "\u02DB",
    0x1E: "\u02DA",
    0x1F: "\u02DC",
    0x80: "\u2022",
    0x81: "\u2020",
    0x82: "\u2021",
    0x83: "\u2026",
    0x84: "\u2014",
    0x85: "\u2013",
    0x86: "\u0192",
    0x87: "\u2044",
    0x88: "\u2039",
    0x89: "\u203A",
    0x8A: "\u2212",
    0x8B: "\u2030",
    0x8C: "\u201E",
    0x8D: "\u201C",
    0x8E: "\u201D",
    0x8F: "\u2018",
    0x90: "\u2019",
    0x91: "\u201A",
    0x92: "\u2122",
    0x93: "\uFB01",
    0x94: "\uFB02",
    0x95: "\u0141",
    0x96: "\u0152",
    0x97: "\u0160",
    0x98: "\u0178",
    0x99: "\u017D",
    0x9A: "\u0131",
    0x9B: "\u0142",
    0x9C: "\u0153",
    0x9D: "\u0161",
    0x9E: "\u017E",
    0xA0: "\u20AC",
}


def decode_text(b):
    if b[: len(codecs.BOM_UTF16_BE)] == codecs.BOM_UTF16_BE:
        return b[len(codecs.BOM_UTF16_BE) :].decode("utf_16_be")
    else:
        return "".join(PDFDocEncoding.get(byte, chr(byte)) for byte in b)


class PdfFormatError(RuntimeError):
    """An error that probably indicates a syntactic or semantic error in the
    PDF file structure"""

    pass


def check_format_condition(condition, error_message):
    if not condition:
        raise PdfFormatError(error_message)


class IndirectReference(
    collections.namedtuple("IndirectReferenceTuple", ["object_id", "generation"])
):
    def __str__(self):
        return "%s %s R" % self

    def __bytes__(self):
        return self.__str__().encode("us-ascii")

    def __eq__(self, other):
        return (
            other.__class__ is self.__class__
            and other.object_id == self.object_id
            and other.generation == self.generation
        )

    def __ne__(self, other):
        return not (self == other)

    def __hash__(self):
        return hash((self.object_id, self.generation))


class IndirectObjectDef(IndirectReference):
    def __str__(self):
        return "%s %s obj" % self


class XrefTable:
    def __init__(self):
        self.existing_entries = {}  # object ID => (offset, generation)
        self.new_entries = {}  # object ID => (offset, generation)
        self.deleted_entries = {0: 65536}  # object ID => generation
        self.reading_finished = False

    def __setitem__(self, key, value):
        if self.reading_finished:
            self.new_entries[key] = value
        else:
            self.existing_entries[key] = value
        if key in self.deleted_entries:
            del self.deleted_entries[key]

    def __getitem__(self, key):
        try:
            return self.new_entries[key]
        except KeyError:
            return self.existing_entries[key]

    def __delitem__(self, key):
        if key in self.new_entries:
            generation = self.new_entries[key][1] + 1
            del self.new_entries[key]
            self.deleted_entries[key] = generation
        elif key in self.existing_entries:
            generation = self.existing_entries[key][1] + 1
            self.deleted_entries[key] = generation
        elif key in self.deleted_entries:
            generation = self.deleted_entries[key]
        else:
            raise IndexError(
                "object ID " + str(key) + " cannot be deleted because it doesn't exist"
            )

    def __contains__(self, key):
        return key in self.existing_entries or key in self.new_entries

    def __len__(self):
        return len(
            set(self.existing_entries.keys())
            | set(self.new_entries.keys())
            | set(self.deleted_entries.keys())
        )

    def keys(self):
        return (
            set(self.existing_entries.keys()) - set(self.deleted_entries.keys())
        ) | set(self.new_entries.keys())

    def write(self, f):
        keys = sorted(set(self.new_entries.keys()) | set(self.deleted_entries.keys()))
        deleted_keys = sorted(set(self.deleted_entries.keys()))
        startxref = f.tell()
        f.write(b"xref\n")
        while keys:
            # find a contiguous sequence of object IDs
            prev = None
            for index, key in enumerate(keys):
                if prev is None or prev + 1 == key:
                    prev = key
                else:
                    contiguous_keys = keys[:index]
                    keys = keys[index:]
                    break
            else:
                contiguous_keys = keys
                keys = None
            f.write(b"%d %d\n" % (contiguous_keys[0], len(contiguous_keys)))
            for object_id in contiguous_keys:
                if object_id in self.new_entries:
                    f.write(b"%010d %05d n \n" % self.new_entries[object_id])
                else:
                    this_deleted_object_id = deleted_keys.pop(0)
                    check_format_condition(
                        object_id == this_deleted_object_id,
                        f"expected the next deleted object ID to be {object_id}, "
                        f"instead found {this_deleted_object_id}",
                    )
                    try:
                        next_in_linked_list = deleted_keys[0]
                    except IndexError:
                        next_in_linked_list = 0
                    f.write(
                        b"%010d %05d f \n"
                        % (next_in_linked_list, self.deleted_entries[object_id])
                    )
        return startxref


class PdfName:
    def __init__(self, name):
        if isinstance(name, PdfName):
            self.name = name.name
        elif isinstance(name, bytes):
            self.name = name
        else:
            self.name = name.encode("us-ascii")

    def name_as_str(self):
        return self.name.decode("us-ascii")

    def __eq__(self, other):
        return (
            isinstance(other, PdfName) and other.name == self.name
        ) or other == self.name

    def __hash__(self):
        return hash(self.name)

    def __repr__(self):
        return f"PdfName({repr(self.name)})"

    @classmethod
    def from_pdf_stream(cls, data):
        return cls(PdfParser.interpret_name(data))

    allowed_chars = set(range(33, 127)) - {ord(c) for c in "#%/()<>[]{}"}

    def __bytes__(self):
        result = bytearray(b"/")
        for b in self.name:
            if b in self.allowed_chars:
                result.append(b)
            else:
                result.extend(b"#%02X" % b)
        return bytes(result)


class PdfArray(list):
    def __bytes__(self):
        return b"[ " + b" ".join(pdf_repr(x) for x in self) + b" ]"


class PdfDict(collections.UserDict):
    def __setattr__(self, key, value):
        if key == "data":
            collections.UserDict.__setattr__(self, key, value)
        else:
            self[key.encode("us-ascii")] = value

    def __getattr__(self, key):
        try:
            value = self[key.encode("us-ascii")]
        except KeyError as e:
            raise AttributeError(key) from e
        if isinstance(value, bytes):
            value = decode_text(value)
        if key.endswith("Date"):
            if value.startswith("D:"):
                value = value[2:]

            relationship = "Z"
            if len(value) > 17:
                relationship = value[14]
                offset = int(value[15:17]) * 60
                if len(value) > 20:
                    offset += int(value[18:20])

            format = "%Y%m%d%H%M%S"[: len(value) - 2]
            value = time.strptime(value[: len(format) + 2], format)
            if relationship in ["+", "-"]:
                offset *= 60
                if relationship == "+":
                    offset *= -1
                value = time.gmtime(calendar.timegm(value) + offset)
        return value

    def __bytes__(self):
        out = bytearray(b"<<")
        for key, value in self.items():
            if value is None:
                continue
            value = pdf_repr(value)
            out.extend(b"\n")
            out.extend(bytes(PdfName(key)))
            out.extend(b" ")
            out.extend(value)
        out.extend(b"\n>>")
        return bytes(out)


class PdfBinary:
    def __init__(self, data):
        self.data = data

    def __bytes__(self):
        return b"<%s>" % b"".join(b"%02X" % b for b in self.data)


class PdfStream:
    def __init__(self, dictionary, buf):
        self.dictionary = dictionary
        self.buf = buf

    def decode(self):
        try:
            filter = self.dictionary.Filter
        except AttributeError:
            return self.buf
        if filter == b"FlateDecode":
            try:
                expected_length = self.dictionary.DL
            except AttributeError:
                expected_length = self.dictionary.Length
            return zlib.decompress(self.buf, bufsize=int(expected_length))
        else:
            raise NotImplementedError(
                f"stream filter {repr(self.dictionary.Filter)} unknown/unsupported"
            )


def pdf_repr(x):
    if x is True:
        return b"true"
    elif x is False:
        return b"false"
    elif x is None:
        return b"null"
    elif isinstance(x, (PdfName, PdfDict, PdfArray, PdfBinary)):
        return bytes(x)
    elif isinstance(x, int):
        return str(x).encode("us-ascii")
    elif isinstance(x, time.struct_time):
        return b"(D:" + time.strftime("%Y%m%d%H%M%SZ", x).encode("us-ascii") + b")"
    elif isinstance(x, dict):
        return bytes(PdfDict(x))
    elif isinstance(x, list):
        return bytes(PdfArray(x))
    elif isinstance(x, str):
        return pdf_repr(encode_text(x))
    elif isinstance(x, bytes):
        # XXX escape more chars? handle binary garbage
        x = x.replace(b"\\", b"\\\\")
        x = x.replace(b"(", b"\\(")
        x = x.replace(b")", b"\\)")
        return b"(" + x + b")"
    else:
        return bytes(x)


class PdfParser:
    """Based on
    https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf
    Supports PDF up to 1.4
    """

    def __init__(self, filename=None, f=None, buf=None, start_offset=0, mode="rb"):
        if buf and f:
            raise RuntimeError("specify buf or f or filename, but not both buf and f")
        self.filename = filename
        self.buf = buf
        self.f = f
        self.start_offset = start_offset
        self.should_close_buf = False
        self.should_close_file = False
        if filename is not None and f is None:
            self.f = f = open(filename, mode)
            self.should_close_file = True
        if f is not None:
            self.buf = buf = self.get_buf_from_file(f)
            self.should_close_buf = True
            if not filename and hasattr(f, "name"):
                self.filename = f.name
        self.cached_objects = {}
        if buf:
            self.read_pdf_info()
        else:
            self.file_size_total = self.file_size_this = 0
            self.root = PdfDict()
            self.root_ref = None
            self.info = PdfDict()
            self.info_ref = None
            self.page_tree_root = {}
            self.pages = []
            self.orig_pages = []
            self.pages_ref = None
            self.last_xref_section_offset = None
            self.trailer_dict = {}
            self.xref_table = XrefTable()
        self.xref_table.reading_finished = True
        if f:
            self.seek_end()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.close()
        return False  # do not suppress exceptions

    def start_writing(self):
        self.close_buf()
        self.seek_end()

    def close_buf(self):
        try:
            self.buf.close()
        except AttributeError:
            pass
        self.buf = None

    def close(self):
        if self.should_close_buf:
            self.close_buf()
        if self.f is not None and self.should_close_file:
            self.f.close()
            self.f = None

    def seek_end(self):
        self.f.seek(0, os.SEEK_END)

    def write_header(self):
        self.f.write(b"%PDF-1.4\n")

    def write_comment(self, s):
        self.f.write(f"% {s}\n".encode("utf-8"))

    def write_catalog(self):
        self.del_root()
        self.root_ref = self.next_object_id(self.f.tell())
        self.pages_ref = self.next_object_id(0)
        self.rewrite_pages()
        self.write_obj(self.root_ref, Type=PdfName(b"Catalog"), Pages=self.pages_ref)
        self.write_obj(
            self.pages_ref,
            Type=PdfName(b"Pages"),
            Count=len(self.pages),
            Kids=self.pages,
        )
        return self.root_ref

    def rewrite_pages(self):
        pages_tree_nodes_to_delete = []
        for i, page_ref in enumerate(self.orig_pages):
            page_info = self.cached_objects[page_ref]
            del self.xref_table[page_ref.object_id]
            pages_tree_nodes_to_delete.append(page_info[PdfName(b"Parent")])
            if page_ref not in self.pages:
                # the page has been deleted
                continue
            # make dict keys into strings for passing to write_page
            stringified_page_info = {}
            for key, value in page_info.items():
                # key should be a PdfName
                stringified_page_info[key.name_as_str()] = value
            stringified_page_info["Parent"] = self.pages_ref
            new_page_ref = self.write_page(None, **stringified_page_info)
            for j, cur_page_ref in enumerate(self.pages):
                if cur_page_ref == page_ref:
                    # replace the page reference with the new one
                    self.pages[j] = new_page_ref
        # delete redundant Pages tree nodes from xref table
        for pages_tree_node_ref in pages_tree_nodes_to_delete:
            while pages_tree_node_ref:
                pages_tree_node = self.cached_objects[pages_tree_node_ref]
                if pages_tree_node_ref.object_id in self.xref_table:
                    del self.xref_table[pages_tree_node_ref.object_id]
                pages_tree_node_ref = pages_tree_node.get(b"Parent", None)
        self.orig_pages = []

    def write_xref_and_trailer(self, new_root_ref=None):
        if new_root_ref:
            self.del_root()
            self.root_ref = new_root_ref
        if self.info:
            self.info_ref = self.write_obj(None, self.info)
        start_xref = self.xref_table.write(self.f)
        num_entries = len(self.xref_table)
        trailer_dict = {b"Root": self.root_ref, b"Size": num_entries}
        if self.last_xref_section_offset is not None:
            trailer_dict[b"Prev"] = self.last_xref_section_offset
        if self.info:
            trailer_dict[b"Info"] = self.info_ref
        self.last_xref_section_offset = start_xref
        self.f.write(
            b"trailer\n"
            + bytes(PdfDict(trailer_dict))
            + b"\nstartxref\n%d\n%%%%EOF" % start_xref
        )

    def write_page(self, ref, *objs, **dict_obj):
        if isinstance(ref, int):
            ref = self.pages[ref]
        if "Type" not in dict_obj:
            dict_obj["Type"] = PdfName(b"Page")
        if "Parent" not in dict_obj:
            dict_obj["Parent"] = self.pages_ref
        return self.write_obj(ref, *objs, **dict_obj)

    def write_obj(self, ref, *objs, **dict_obj):
        f = self.f
        if ref is None:
            ref = self.next_object_id(f.tell())
        else:
            self.xref_table[ref.object_id] = (f.tell(), ref.generation)
        f.write(bytes(IndirectObjectDef(*ref)))
        stream = dict_obj.pop("stream", None)
        if stream is not None:
            dict_obj["Length"] = len(stream)
        if dict_obj:
            f.write(pdf_repr(dict_obj))
        for obj in objs:
            f.write(pdf_repr(obj))
        if stream is not None:
            f.write(b"stream\n")
            f.write(stream)
            f.write(b"\nendstream\n")
        f.write(b"endobj\n")
        return ref

    def del_root(self):
        if self.root_ref is None:
            return
        del self.xref_table[self.root_ref.object_id]
        del self.xref_table[self.root[b"Pages"].object_id]

    @staticmethod
    def get_buf_from_file(f):
        if hasattr(f, "getbuffer"):
            return f.getbuffer()
        elif hasattr(f, "getvalue"):
            return f.getvalue()
        else:
            try:
                return mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
            except ValueError:  # cannot mmap an empty file
                return b""

    def read_pdf_info(self):
        self.file_size_total = len(self.buf)
        self.file_size_this = self.file_size_total - self.start_offset
        self.read_trailer()
        self.root_ref = self.trailer_dict[b"Root"]
        self.info_ref = self.trailer_dict.get(b"Info", None)
        self.root = PdfDict(self.read_indirect(self.root_ref))
        if self.info_ref is None:
            self.info = PdfDict()
        else:
            self.info = PdfDict(self.read_indirect(self.info_ref))
        check_format_condition(b"Type" in self.root, "/Type missing in Root")
        check_format_condition(
            self.root[b"Type"] == b"Catalog", "/Type in Root is not /Catalog"
        )
        check_format_condition(b"Pages" in self.root, "/Pages missing in Root")
        check_format_condition(
            isinstance(self.root[b"Pages"], IndirectReference),
            "/Pages in Root is not an indirect reference",
        )
        self.pages_ref = self.root[b"Pages"]
        self.page_tree_root = self.read_indirect(self.pages_ref)
        self.pages = self.linearize_page_tree(self.page_tree_root)
        # save the original list of page references
        # in case the user modifies, adds or deletes some pages
        # and we need to rewrite the pages and their list
        self.orig_pages = self.pages[:]

    def next_object_id(self, offset=None):
        try:
            # TODO: support reuse of deleted objects
            reference = IndirectReference(max(self.xref_table.keys()) + 1, 0)
        except ValueError:
            reference = IndirectReference(1, 0)
        if offset is not None:
            self.xref_table[reference.object_id] = (offset, 0)
        return reference

    delimiter = br"[][()<>{}/%]"
    delimiter_or_ws = br"[][()<>{}/%\000\011\012\014\015\040]"
    whitespace = br"[\000\011\012\014\015\040]"
    whitespace_or_hex = br"[\000\011\012\014\015\0400-9a-fA-F]"
    whitespace_optional = whitespace + b"*"
    whitespace_mandatory = whitespace + b"+"
    newline_only = br"[\r\n]+"
    newline = whitespace_optional + newline_only + whitespace_optional
    re_trailer_end = re.compile(
        whitespace_mandatory
        + br"trailer"
        + whitespace_optional
        + br"\<\<(.*\>\>)"
        + newline
        + br"startxref"
        + newline
        + br"([0-9]+)"
        + newline
        + br"%%EOF"
        + whitespace_optional
        + br"$",
        re.DOTALL,
    )
    re_trailer_prev = re.compile(
        whitespace_optional
        + br"trailer"
        + whitespace_optional
        + br"\<\<(.*?\>\>)"
        + newline
        + br"startxref"
        + newline
        + br"([0-9]+)"
        + newline
        + br"%%EOF"
        + whitespace_optional,
        re.DOTALL,
    )

    def read_trailer(self):
        search_start_offset = len(self.buf) - 16384
        if search_start_offset < self.start_offset:
            search_start_offset = self.start_offset
        m = self.re_trailer_end.search(self.buf, search_start_offset)
        check_format_condition(m, "trailer end not found")
        # make sure we found the LAST trailer
        last_match = m
        while m:
            last_match = m
            m = self.re_trailer_end.search(self.buf, m.start() + 16)
        if not m:
            m = last_match
        trailer_data = m.group(1)
        self.last_xref_section_offset = int(m.group(2))
        self.trailer_dict = self.interpret_trailer(trailer_data)
        self.xref_table = XrefTable()
        self.read_xref_table(xref_section_offset=self.last_xref_section_offset)
        if b"Prev" in self.trailer_dict:
            self.read_prev_trailer(self.trailer_dict[b"Prev"])

    def read_prev_trailer(self, xref_section_offset):
        trailer_offset = self.read_xref_table(xref_section_offset=xref_section_offset)
        m = self.re_trailer_prev.search(
            self.buf[trailer_offset : trailer_offset + 16384]
        )
        check_format_condition(m, "previous trailer not found")
        trailer_data = m.group(1)
        check_format_condition(
            int(m.group(2)) == xref_section_offset,
            "xref section offset in previous trailer doesn't match what was expected",
        )
        trailer_dict = self.interpret_trailer(trailer_data)
        if b"Prev" in trailer_dict:
            self.read_prev_trailer(trailer_dict[b"Prev"])

    re_whitespace_optional = re.compile(whitespace_optional)
    re_name = re.compile(
        whitespace_optional
        + br"/([!-$&'*-.0-;=?-Z\\^-z|~]+)(?="
        + delimiter_or_ws
        + br")"
    )
    re_dict_start = re.compile(whitespace_optional + br"\<\<")
    re_dict_end = re.compile(whitespace_optional + br"\>\>" + whitespace_optional)

    @classmethod
    def interpret_trailer(cls, trailer_data):
        trailer = {}
        offset = 0
        while True:
            m = cls.re_name.match(trailer_data, offset)
            if not m:
                m = cls.re_dict_end.match(trailer_data, offset)
                check_format_condition(
                    m and m.end() == len(trailer_data),
                    "name not found in trailer, remaining data: "
                    + repr(trailer_data[offset:]),
                )
                break
            key = cls.interpret_name(m.group(1))
            value, offset = cls.get_value(trailer_data, m.end())
            trailer[key] = value
        check_format_condition(
            b"Size" in trailer and isinstance(trailer[b"Size"], int),
            "/Size not in trailer or not an integer",
        )
        check_format_condition(
            b"Root" in trailer and isinstance(trailer[b"Root"], IndirectReference),
            "/Root not in trailer or not an indirect reference",
        )
        return trailer

    re_hashes_in_name = re.compile(br"([^#]*)(#([0-9a-fA-F]{2}))?")

    @classmethod
    def interpret_name(cls, raw, as_text=False):
        name = b""
        for m in cls.re_hashes_in_name.finditer(raw):
            if m.group(3):
                name += m.group(1) + bytearray.fromhex(m.group(3).decode("us-ascii"))
            else:
                name += m.group(1)
        if as_text:
            return name.decode("utf-8")
        else:
            return bytes(name)

    re_null = re.compile(whitespace_optional + br"null(?=" + delimiter_or_ws + br")")
    re_true = re.compile(whitespace_optional + br"true(?=" + delimiter_or_ws + br")")
    re_false = re.compile(whitespace_optional + br"false(?=" + delimiter_or_ws + br")")
    re_int = re.compile(
        whitespace_optional + br"([-+]?[0-9]+)(?=" + delimiter_or_ws + br")"
    )
    re_real = re.compile(
        whitespace_optional
        + br"([-+]?([0-9]+\.[0-9]*|[0-9]*\.[0-9]+))(?="
        + delimiter_or_ws
        + br")"
    )
    re_array_start = re.compile(whitespace_optional + br"\[")
    re_array_end = re.compile(whitespace_optional + br"]")
    re_string_hex = re.compile(
        whitespace_optional + br"\<(" + whitespace_or_hex + br"*)\>"
    )
    re_string_lit = re.compile(whitespace_optional + br"\(")
    re_indirect_reference = re.compile(
        whitespace_optional
        + br"([-+]?[0-9]+)"
        + whitespace_mandatory
        + br"([-+]?[0-9]+)"
        + whitespace_mandatory
        + br"R(?="
        + delimiter_or_ws
        + br")"
    )
    re_indirect_def_start = re.compile(
        whitespace_optional
        + br"([-+]?[0-9]+)"
        + whitespace_mandatory
        + br"([-+]?[0-9]+)"
        + whitespace_mandatory
        + br"obj(?="
        + delimiter_or_ws
        + br")"
    )
    re_indirect_def_end = re.compile(
        whitespace_optional + br"endobj(?=" + delimiter_or_ws + br")"
    )
    re_comment = re.compile(
        br"(" + whitespace_optional + br"%[^\r\n]*" + newline + br")*"
    )
    re_stream_start = re.compile(whitespace_optional + br"stream\r?\n")
    re_stream_end = re.compile(
        whitespace_optional + br"endstream(?=" + delimiter_or_ws + br")"
    )

    @classmethod
    def get_value(cls, data, offset, expect_indirect=None, max_nesting=-1):
        if max_nesting == 0:
            return None, None
        m = cls.re_comment.match(data, offset)
        if m:
            offset = m.end()
        m = cls.re_indirect_def_start.match(data, offset)
        if m:
            check_format_condition(
                int(m.group(1)) > 0,
                "indirect object definition: object ID must be greater than 0",
            )
            check_format_condition(
                int(m.group(2)) >= 0,
                "indirect object definition: generation must be non-negative",
            )
            check_format_condition(
                expect_indirect is None
                or expect_indirect
                == IndirectReference(int(m.group(1)), int(m.group(2))),
                "indirect object definition different than expected",
            )
            object, offset = cls.get_value(data, m.end(), max_nesting=max_nesting - 1)
            if offset is None:
                return object, None
            m = cls.re_indirect_def_end.match(data, offset)
            check_format_condition(m, "indirect object definition end not found")
            return object, m.end()
        check_format_condition(
            not expect_indirect, "indirect object definition not found"
        )
        m = cls.re_indirect_reference.match(data, offset)
        if m:
            check_format_condition(
                int(m.group(1)) > 0,
                "indirect object reference: object ID must be greater than 0",
            )
            check_format_condition(
                int(m.group(2)) >= 0,
                "indirect object reference: generation must be non-negative",
            )
            return IndirectReference(int(m.group(1)), int(m.group(2))), m.end()
        m = cls.re_dict_start.match(data, offset)
        if m:
            offset = m.end()
            result = {}
            m = cls.re_dict_end.match(data, offset)
            while not m:
                key, offset = cls.get_value(data, offset, max_nesting=max_nesting - 1)
                if offset is None:
                    return result, None
                value, offset = cls.get_value(data, offset, max_nesting=max_nesting - 1)
                result[key] = value
                if offset is None:
                    return result, None
                m = cls.re_dict_end.match(data, offset)
            offset = m.end()
            m = cls.re_stream_start.match(data, offset)
            if m:
                try:
                    stream_len = int(result[b"Length"])
                except (TypeError, KeyError, ValueError) as e:
                    raise PdfFormatError(
                        "bad or missing Length in stream dict (%r)"
                        % result.get(b"Length", None)
                    ) from e
                stream_data = data[m.end() : m.end() + stream_len]
                m = cls.re_stream_end.match(data, m.end() + stream_len)
                check_format_condition(m, "stream end not found")
                offset = m.end()
                result = PdfStream(PdfDict(result), stream_data)
            else:
                result = PdfDict(result)
            return result, offset
        m = cls.re_array_start.match(data, offset)
        if m:
            offset = m.end()
            result = []
            m = cls.re_array_end.match(data, offset)
            while not m:
                value, offset = cls.get_value(data, offset, max_nesting=max_nesting - 1)
                result.append(value)
                if offset is None:
                    return result, None
                m = cls.re_array_end.match(data, offset)
            return result, m.end()
        m = cls.re_null.match(data, offset)
        if m:
            return None, m.end()
        m = cls.re_true.match(data, offset)
        if m:
            return True, m.end()
        m = cls.re_false.match(data, offset)
        if m:
            return False, m.end()
        m = cls.re_name.match(data, offset)
        if m:
            return PdfName(cls.interpret_name(m.group(1))), m.end()
        m = cls.re_int.match(data, offset)
        if m:
            return int(m.group(1)), m.end()
        m = cls.re_real.match(data, offset)
        if m:
            # XXX Decimal instead of float???
            return float(m.group(1)), m.end()
        m = cls.re_string_hex.match(data, offset)
        if m:
            # filter out whitespace
            hex_string = bytearray(
                [b for b in m.group(1) if b in b"0123456789abcdefABCDEF"]
            )
            if len(hex_string) % 2 == 1:
                # append a 0 if the length is not even - yes, at the end
                hex_string.append(ord(b"0"))
            return bytearray.fromhex(hex_string.decode("us-ascii")), m.end()
        m = cls.re_string_lit.match(data, offset)
        if m:
            return cls.get_literal_string(data, m.end())
        # return None, offset  # fallback (only for debugging)
        raise PdfFormatError("unrecognized object: " + repr(data[offset : offset + 32]))

    re_lit_str_token = re.compile(
        br"(\\[nrtbf()\\])|(\\[0-9]{1,3})|(\\(\r\n|\r|\n))|(\r\n|\r|\n)|(\()|(\))"
    )
    escaped_chars = {
        b"n": b"\n",
        b"r": b"\r",
        b"t": b"\t",
        b"b": b"\b",
        b"f": b"\f",
        b"(": b"(",
        b")": b")",
        b"\\": b"\\",
        ord(b"n"): b"\n",
        ord(b"r"): b"\r",
        ord(b"t"): b"\t",
        ord(b"b"): b"\b",
        ord(b"f"): b"\f",
        ord(b"("): b"(",
        ord(b")"): b")",
        ord(b"\\"): b"\\",
    }

    @classmethod
    def get_literal_string(cls, data, offset):
        nesting_depth = 0
        result = bytearray()
        for m in cls.re_lit_str_token.finditer(data, offset):
            result.extend(data[offset : m.start()])
            if m.group(1):
                result.extend(cls.escaped_chars[m.group(1)[1]])
            elif m.group(2):
                result.append(int(m.group(2)[1:], 8))
            elif m.group(3):
                pass
            elif m.group(5):
                result.extend(b"\n")
            elif m.group(6):
                result.extend(b"(")
                nesting_depth += 1
            elif m.group(7):
                if nesting_depth == 0:
                    return bytes(result), m.end()
                result.extend(b")")
                nesting_depth -= 1
            offset = m.end()
        raise PdfFormatError("unfinished literal string")

    re_xref_section_start = re.compile(whitespace_optional + br"xref" + newline)
    re_xref_subsection_start = re.compile(
        whitespace_optional
        + br"([0-9]+)"
        + whitespace_mandatory
        + br"([0-9]+)"
        + whitespace_optional
        + newline_only
    )
    re_xref_entry = re.compile(br"([0-9]{10}) ([0-9]{5}) ([fn])( \r| \n|\r\n)")

    def read_xref_table(self, xref_section_offset):
        subsection_found = False
        m = self.re_xref_section_start.match(
            self.buf, xref_section_offset + self.start_offset
        )
        check_format_condition(m, "xref section start not found")
        offset = m.end()
        while True:
            m = self.re_xref_subsection_start.match(self.buf, offset)
            if not m:
                check_format_condition(
                    subsection_found, "xref subsection start not found"
                )
                break
            subsection_found = True
            offset = m.end()
            first_object = int(m.group(1))
            num_objects = int(m.group(2))
            for i in range(first_object, first_object + num_objects):
                m = self.re_xref_entry.match(self.buf, offset)
                check_format_condition(m, "xref entry not found")
                offset = m.end()
                is_free = m.group(3) == b"f"
                generation = int(m.group(2))
                if not is_free:
                    new_entry = (int(m.group(1)), generation)
                    check_format_condition(
                        i not in self.xref_table or self.xref_table[i] == new_entry,
                        "xref entry duplicated (and not identical)",
                    )
                    self.xref_table[i] = new_entry
        return offset

    def read_indirect(self, ref, max_nesting=-1):
        offset, generation = self.xref_table[ref[0]]
        check_format_condition(
            generation == ref[1],
            f"expected to find generation {ref[1]} for object ID {ref[0]} in xref "
            f"table, instead found generation {generation} at offset {offset}",
        )
        value = self.get_value(
            self.buf,
            offset + self.start_offset,
            expect_indirect=IndirectReference(*ref),
            max_nesting=max_nesting,
        )[0]
        self.cached_objects[ref] = value
        return value

    def linearize_page_tree(self, node=None):
        if node is None:
            node = self.page_tree_root
        check_format_condition(
            node[b"Type"] == b"Pages", "/Type of page tree node is not /Pages"
        )
        pages = []
        for kid in node[b"Kids"]:
            kid_object = self.read_indirect(kid)
            if kid_object[b"Type"] == b"Page":
                pages.append(kid)
            else:
                pages.extend(self.linearize_page_tree(node=kid_object))
        return pages

Filemanager

Name Type Size Permission Actions
__pycache__ Folder 2755
BdfFontFile.py File 2.75 KB 0644
BlpImagePlugin.py File 14 KB 0644
BmpImagePlugin.py File 13.92 KB 0644
BufrStubImagePlugin.py File 1.48 KB 0644
ContainerIO.py File 2.82 KB 0644
CurImagePlugin.py File 1.68 KB 0644
DcxImagePlugin.py File 2.09 KB 0644
DdsImagePlugin.py File 5.34 KB 0644
EpsImagePlugin.py File 11.82 KB 0644
ExifTags.py File 8.8 KB 0644
FitsStubImagePlugin.py File 1.59 KB 0644
FliImagePlugin.py File 4.23 KB 0644
FontFile.py File 2.7 KB 0644
FpxImagePlugin.py File 6.53 KB 0644
FtexImagePlugin.py File 3.23 KB 0644
GbrImagePlugin.py File 2.73 KB 0644
GdImageFile.py File 2.47 KB 0644
GifImagePlugin.py File 28.25 KB 0644
GimpGradientFile.py File 3.27 KB 0644
GimpPaletteFile.py File 1.24 KB 0644
GribStubImagePlugin.py File 1.51 KB 0644
Hdf5StubImagePlugin.py File 1.48 KB 0644
IcnsImagePlugin.py File 11.44 KB 0644
IcoImagePlugin.py File 9.94 KB 0644
ImImagePlugin.py File 10.53 KB 0644
Image.py File 113.4 KB 0644
ImageChops.py File 7.13 KB 0644
ImageCms.py File 36.22 KB 0644
ImageColor.py File 8.44 KB 0644
ImageDraw.py File 29.94 KB 0644
ImageDraw2.py File 4.9 KB 0644
ImageEnhance.py File 3.12 KB 0644
ImageFile.py File 20.74 KB 0644
ImageFilter.py File 15.46 KB 0644
ImageFont.py File 43.49 KB 0644
ImageGrab.py File 3.54 KB 0644
ImageMath.py File 6.88 KB 0644
ImageMode.py File 1.6 KB 0644
ImageMorph.py File 7.67 KB 0644
ImageOps.py File 18.03 KB 0644
ImagePalette.py File 6.2 KB 0644
ImagePath.py File 336 B 0644
ImageQt.py File 5.67 KB 0644
ImageSequence.py File 1.81 KB 0644
ImageShow.py File 6.15 KB 0644
ImageStat.py File 3.81 KB 0644
ImageTk.py File 9.11 KB 0644
ImageTransform.py File 2.78 KB 0644
ImageWin.py File 7.02 KB 0644
ImtImagePlugin.py File 2.15 KB 0644
IptcImagePlugin.py File 5.6 KB 0644
Jpeg2KImagePlugin.py File 8.52 KB 0644
JpegImagePlugin.py File 27.16 KB 0644
JpegPresets.py File 12.41 KB 0644
McIdasImagePlugin.py File 1.71 KB 0644
MicImagePlugin.py File 2.54 KB 0644
MpegImagePlugin.py File 1.76 KB 0644
MpoImagePlugin.py File 4.14 KB 0644
MspImagePlugin.py File 5.43 KB 0644
PSDraw.py File 6.51 KB 0644
PaletteFile.py File 1.08 KB 0644
PalmImagePlugin.py File 8.89 KB 0644
PcdImagePlugin.py File 1.47 KB 0644
PcfFontFile.py File 6.2 KB 0644
PcxImagePlugin.py File 5.41 KB 0644
PdfImagePlugin.py File 7.49 KB 0644
PdfParser.py File 33.58 KB 0644
PixarImagePlugin.py File 1.61 KB 0644
PngImagePlugin.py File 42.79 KB 0644
PpmImagePlugin.py File 4.34 KB 0644
PsdImagePlugin.py File 7.56 KB 0644
PyAccess.py File 9.37 KB 0644
SgiImagePlugin.py File 5.96 KB 0644
SpiderImagePlugin.py File 9.31 KB 0644
SunImagePlugin.py File 4.2 KB 0644
TarIO.py File 1.41 KB 0644
TgaImagePlugin.py File 6.18 KB 0644
TiffImagePlugin.py File 66.86 KB 0644
TiffTags.py File 14.22 KB 0644
WalImageFile.py File 5.4 KB 0644
WebPImagePlugin.py File 10.54 KB 0644
WmfImagePlugin.py File 4.56 KB 0644
XVThumbImagePlugin.py File 1.9 KB 0644
XbmImagePlugin.py File 2.37 KB 0644
XpmImagePlugin.py File 3 KB 0644
__init__.py File 3.18 KB 0644
__main__.py File 41 B 0644
_binary.py File 1.75 KB 0644
_imaging.cpython-36m-x86_64-linux-gnu.so File 650.13 KB 0755
_imagingcms.cpython-36m-x86_64-linux-gnu.so File 38.05 KB 0755
_imagingft.cpython-36m-x86_64-linux-gnu.so File 41.98 KB 0755
_imagingmath.cpython-36m-x86_64-linux-gnu.so File 24.43 KB 0755
_imagingmorph.cpython-36m-x86_64-linux-gnu.so File 8.12 KB 0755
_imagingtk.cpython-36m-x86_64-linux-gnu.so File 9.37 KB 0755
_tkinter_finder.py File 224 B 0644
_util.py File 359 B 0644
_version.py File 50 B 0644
_webp.cpython-36m-x86_64-linux-gnu.so File 40.82 KB 0755
features.py File 8.8 KB 0644